intel: define meta modes

Define VS_POINTS, FS_RECT, and DEPTH_STENCIL_RECT modes, meaning only VS, FS,
or depth/stencil is enabled respectively.

For now, FS_RECT is used for everything except for depth/stencil clear, which
uses DEPTH_STENCIL_RECT mode.  VS_POINTS mode is new and unused.
diff --git a/icd/intel/cmd_meta.c b/icd/intel/cmd_meta.c
index 1953fad..b4ca65d 100644
--- a/icd/intel/cmd_meta.c
+++ b/icd/intel/cmd_meta.c
@@ -369,6 +369,7 @@
     XGL_UINT i;
 
     memset(&meta, 0, sizeof(meta));
+    meta.mode = INTEL_CMD_META_FS_RECT;
 
     meta.shader_id = INTEL_DEV_META_FS_COPY_MEM;
     meta.height = 1;
@@ -430,6 +431,7 @@
     }
 
     memset(&meta, 0, sizeof(meta));
+    meta.mode = INTEL_CMD_META_FS_RECT;
 
     cmd_meta_set_src_for_img(cmd, src,
             (raw_copy) ? raw_format : src->layout.format,
@@ -487,6 +489,7 @@
     XGL_UINT i;
 
     memset(&meta, 0, sizeof(meta));
+    meta.mode = INTEL_CMD_META_FS_RECT;
 
     meta.shader_id = INTEL_DEV_META_FS_COPY_MEM_TO_IMG;
     meta.samples = img->samples;
@@ -536,6 +539,7 @@
     XGL_UINT i;
 
     memset(&meta, 0, sizeof(meta));
+    meta.mode = INTEL_CMD_META_FS_RECT;
 
     format = cmd_meta_img_raw_format(cmd, img->layout.format);
     cmd_meta_set_src_for_img(cmd, img, format, XGL_IMAGE_ASPECT_COLOR, &meta);
@@ -622,6 +626,7 @@
     format.numericFormat = XGL_NUM_FMT_UINT;
 
     memset(&meta, 0, sizeof(meta));
+    meta.mode = INTEL_CMD_META_FS_RECT;
 
     meta.shader_id = INTEL_DEV_META_FS_COPY_MEM;
 
@@ -658,6 +663,7 @@
     }
 
     memset(&meta, 0, sizeof(meta));
+    meta.mode = INTEL_CMD_META_FS_RECT;
 
     meta.shader_id = INTEL_DEV_META_FS_CLEAR_COLOR;
 
@@ -743,6 +749,7 @@
     XGL_UINT i;
 
     memset(&meta, 0, sizeof(meta));
+    meta.mode = INTEL_CMD_META_FS_RECT;
 
     meta.shader_id = INTEL_DEV_META_FS_CLEAR_COLOR;
     meta.samples = img->samples;
@@ -772,6 +779,7 @@
     XGL_UINT i;
 
     memset(&meta, 0, sizeof(meta));
+    meta.mode = INTEL_CMD_META_FS_RECT;
 
     meta.shader_id = INTEL_DEV_META_FS_CLEAR_COLOR;
     meta.samples = img->samples;
@@ -801,6 +809,7 @@
     XGL_UINT i;
 
     memset(&meta, 0, sizeof(meta));
+    meta.mode = INTEL_CMD_META_DEPTH_STENCIL_RECT;
 
     meta.shader_id = INTEL_DEV_META_FS_CLEAR_DEPTH;
     meta.samples = img->samples;
@@ -837,6 +846,7 @@
     }
 
     memset(&meta, 0, sizeof(meta));
+    meta.mode = INTEL_CMD_META_FS_RECT;
 
     switch (src->samples) {
     case 2:
diff --git a/icd/intel/cmd_pipeline.c b/icd/intel/cmd_pipeline.c
index 74e70e3..310093d 100644
--- a/icd/intel/cmd_pipeline.c
+++ b/icd/intel/cmd_pipeline.c
@@ -2007,7 +2007,7 @@
     cc_offset = 0;
     cc_vp_offset = 0;
 
-    if (meta->dst.valid) {
+    if (meta->mode == INTEL_CMD_META_FS_RECT) {
         /* BLEND_STATE */
         blend_offset = cmd_state_pointer(cmd, INTEL_CMD_ITEM_BLEND,
                 GEN6_ALIGNMENT_BLEND_STATE * 4, 2, &dw);
@@ -2015,27 +2015,29 @@
         dw[1] = GEN6_BLEND_DW1_COLORCLAMP_RTFORMAT | 0x3;
     }
 
-    if (meta->ds.state) {
-        const uint32_t blend_color[4] = { 0, 0, 0, 0 };
+    if (meta->mode != INTEL_CMD_META_VS_POINTS) {
+        if (meta->ds.state) {
+            const uint32_t blend_color[4] = { 0, 0, 0, 0 };
 
-        /* DEPTH_STENCIL_STATE */
-        ds_offset = gen6_DEPTH_STENCIL_STATE(cmd, meta->ds.state);
+            /* DEPTH_STENCIL_STATE */
+            ds_offset = gen6_DEPTH_STENCIL_STATE(cmd, meta->ds.state);
 
-        /* COLOR_CALC_STATE */
-        cc_offset = gen6_COLOR_CALC_STATE(cmd,
-                meta->ds.state->cmd_stencil_ref, blend_color);
+            /* COLOR_CALC_STATE */
+            cc_offset = gen6_COLOR_CALC_STATE(cmd,
+                    meta->ds.state->cmd_stencil_ref, blend_color);
 
-        /* CC_VIEWPORT */
-        cc_vp_offset = cmd_state_pointer(cmd, INTEL_CMD_ITEM_CC_VIEWPORT,
-                GEN6_ALIGNMENT_CC_VIEWPORT * 4, 2, &dw);
-        dw[0] = u_fui(0.0f);
-        dw[1] = u_fui(1.0f);
-    } else {
-        /* DEPTH_STENCIL_STATE */
-        ds_offset = cmd_state_pointer(cmd, INTEL_CMD_ITEM_DEPTH_STENCIL,
-                GEN6_ALIGNMENT_DEPTH_STENCIL_STATE * 4,
-                GEN6_DEPTH_STENCIL_STATE__SIZE, &dw);
-        memset(dw, 0, sizeof(*dw) * GEN6_DEPTH_STENCIL_STATE__SIZE);
+            /* CC_VIEWPORT */
+            cc_vp_offset = cmd_state_pointer(cmd, INTEL_CMD_ITEM_CC_VIEWPORT,
+                    GEN6_ALIGNMENT_CC_VIEWPORT * 4, 2, &dw);
+            dw[0] = u_fui(0.0f);
+            dw[1] = u_fui(1.0f);
+        } else {
+            /* DEPTH_STENCIL_STATE */
+            ds_offset = cmd_state_pointer(cmd, INTEL_CMD_ITEM_DEPTH_STENCIL,
+                    GEN6_ALIGNMENT_DEPTH_STENCIL_STATE * 4,
+                    GEN6_DEPTH_STENCIL_STATE__SIZE, &dw);
+            memset(dw, 0, sizeof(*dw) * GEN6_DEPTH_STENCIL_STATE__SIZE);
+        }
     }
 
     if (cmd_gen(cmd) >= INTEL_GEN(7)) {
@@ -2073,6 +2075,9 @@
 
     CMD_ASSERT(cmd, 6, 7.5);
 
+    if (meta->mode == INTEL_CMD_META_DEPTH_STENCIL_RECT)
+        return;
+
     /* SURFACE_STATEs */
     if (meta->src.valid) {
         offset = cmd_surface_write(cmd, INTEL_CMD_ITEM_SURFACE,
@@ -2110,12 +2115,16 @@
             2, binding_table);
 
     if (cmd_gen(cmd) >= INTEL_GEN(7)) {
-        gen7_3dstate_pointer(cmd,
-                GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_PS,
-                offset);
+        const int subop = (meta->mode == INTEL_CMD_META_VS_POINTS) ?
+            GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_VS :
+            GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_PS;
+        gen7_3dstate_pointer(cmd, subop, offset);
     } else {
         /* 3DSTATE_BINDING_TABLE_POINTERS */
-        gen6_3DSTATE_BINDING_TABLE_POINTERS(cmd, 0, 0, offset);
+        if (meta->mode == INTEL_CMD_META_VS_POINTS)
+            gen6_3DSTATE_BINDING_TABLE_POINTERS(cmd, offset, 0, 0);
+        else
+            gen6_3DSTATE_BINDING_TABLE_POINTERS(cmd, 0, 0, offset);
     }
 }
 
@@ -2134,6 +2143,7 @@
 
 static void gen7_meta_urb(struct intel_cmd *cmd)
 {
+    const struct intel_cmd_meta *meta = cmd->bind.meta;
     uint32_t *dw;
 
     CMD_ASSERT(cmd, 7, 7.5);
@@ -2142,7 +2152,7 @@
     cmd_batch_pointer(cmd, 10, &dw);
 
     dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_VS) | (2 - 2);
-    dw[1] = 0;
+    dw[1] = (meta->mode == INTEL_CMD_META_VS_POINTS);
     dw += 2;
 
     dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_HS) | (2 - 2);
@@ -2158,7 +2168,7 @@
     dw += 2;
 
     dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_PS) | (2 - 2);
-    dw[1] = 1;
+    dw[1] = (meta->mode == INTEL_CMD_META_FS_RECT);
 
     cmd_wa_gen7_pre_vs_depth_stall_write(cmd);
 
@@ -2193,44 +2203,64 @@
 
     CMD_ASSERT(cmd, 6, 7.5);
 
-    /* write vertices */
-    if (meta->shader_id == INTEL_DEV_META_FS_CLEAR_DEPTH) {
-        XGL_FLOAT vertices[3][3];
+    switch (meta->mode) {
+    case INTEL_CMD_META_VS_POINTS:
+        cmd_batch_pointer(cmd, 3, &dw);
+        dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (3 - 2);
+        dw[1] = GEN6_VE_STATE_DW0_VALID;
+        dw[2] = GEN6_VFCOMP_STORE_VID << GEN6_VE_STATE_DW1_COMP0__SHIFT |
+                GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT |
+                GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT |
+                GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT;
+        return;
+        break;
+    case INTEL_CMD_META_FS_RECT:
+        {
+            XGL_UINT vertices[3][2];
 
-        vertices[0][0] = (XGL_FLOAT) (meta->dst.x + meta->width);
-        vertices[0][1] = (XGL_FLOAT) (meta->dst.y + meta->height);
-        vertices[0][2] = u_uif(meta->clear_val[0]);
-        vertices[1][0] = (XGL_FLOAT) meta->dst.x;
-        vertices[1][1] = (XGL_FLOAT) (meta->dst.y + meta->height);
-        vertices[1][2] = u_uif(meta->clear_val[0]);
-        vertices[2][0] = (XGL_FLOAT) meta->dst.x;
-        vertices[2][1] = (XGL_FLOAT) meta->dst.y;
-        vertices[2][2] = u_uif(meta->clear_val[0]);
+            vertices[0][0] = meta->dst.x + meta->width;
+            vertices[0][1] = meta->dst.y + meta->height;
+            vertices[1][0] = meta->dst.x;
+            vertices[1][1] = meta->dst.y + meta->height;
+            vertices[2][0] = meta->dst.x;
+            vertices[2][1] = meta->dst.y;
 
-        vb_start = cmd_state_write(cmd, INTEL_CMD_ITEM_BLOB, 32,
-                sizeof(vertices) / 4, (const uint32_t *) vertices);
+            vb_start = cmd_state_write(cmd, INTEL_CMD_ITEM_BLOB, 32,
+                    sizeof(vertices) / 4, (const uint32_t *) vertices);
 
-        vb_end = vb_start + sizeof(vertices) - 1;
-        vb_stride = sizeof(vertices[0]);
-        ve_z_source = GEN6_VFCOMP_STORE_SRC;
-        ve_format = GEN6_FORMAT_R32G32B32_FLOAT;
-    } else {
-        XGL_UINT vertices[3][2];
+            vb_end = vb_start + sizeof(vertices) - 1;
+            vb_stride = sizeof(vertices[0]);
+            ve_z_source = GEN6_VFCOMP_STORE_0;
+            ve_format = GEN6_FORMAT_R32G32_USCALED;
+        }
+        break;
+    case INTEL_CMD_META_DEPTH_STENCIL_RECT:
+        {
+            XGL_FLOAT vertices[3][3];
 
-        vertices[0][0] = meta->dst.x + meta->width;
-        vertices[0][1] = meta->dst.y + meta->height;
-        vertices[1][0] = meta->dst.x;
-        vertices[1][1] = meta->dst.y + meta->height;
-        vertices[2][0] = meta->dst.x;
-        vertices[2][1] = meta->dst.y;
+            vertices[0][0] = (XGL_FLOAT) (meta->dst.x + meta->width);
+            vertices[0][1] = (XGL_FLOAT) (meta->dst.y + meta->height);
+            vertices[0][2] = u_uif(meta->clear_val[0]);
+            vertices[1][0] = (XGL_FLOAT) meta->dst.x;
+            vertices[1][1] = (XGL_FLOAT) (meta->dst.y + meta->height);
+            vertices[1][2] = u_uif(meta->clear_val[0]);
+            vertices[2][0] = (XGL_FLOAT) meta->dst.x;
+            vertices[2][1] = (XGL_FLOAT) meta->dst.y;
+            vertices[2][2] = u_uif(meta->clear_val[0]);
 
-        vb_start = cmd_state_write(cmd, INTEL_CMD_ITEM_BLOB, 32,
-                sizeof(vertices) / 4, (const uint32_t *) vertices);
+            vb_start = cmd_state_write(cmd, INTEL_CMD_ITEM_BLOB, 32,
+                    sizeof(vertices) / 4, (const uint32_t *) vertices);
 
-        vb_end = vb_start + sizeof(vertices) - 1;
-        vb_stride = sizeof(vertices[0]);
-        ve_z_source = GEN6_VFCOMP_STORE_0;
-        ve_format = GEN6_FORMAT_R32G32_USCALED;
+            vb_end = vb_start + sizeof(vertices) - 1;
+            vb_stride = sizeof(vertices[0]);
+            ve_z_source = GEN6_VFCOMP_STORE_SRC;
+            ve_format = GEN6_FORMAT_R32G32B32_FLOAT;
+        }
+        break;
+    default:
+        assert(!"unknown meta mode");
+        return;
+        break;
     }
 
     /* 3DSTATE_VERTEX_BUFFERS */
@@ -2250,7 +2280,7 @@
     /* 3DSTATE_VERTEX_ELEMENTS */
     cmd_batch_pointer(cmd, 5, &dw);
     dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (5 - 2);
-    dw[1] = GEN6_VE_STATE_DW0_VALID,
+    dw[1] = GEN6_VE_STATE_DW0_VALID;
     dw[2] = GEN6_VFCOMP_STORE_0 << GEN6_VE_STATE_DW1_COMP0__SHIFT | /* Reserved */
             GEN6_VFCOMP_STORE_0 << GEN6_VE_STATE_DW1_COMP1__SHIFT | /* Render Target Array Index */
             GEN6_VFCOMP_STORE_0 << GEN6_VE_STATE_DW1_COMP2__SHIFT | /* Viewport Index */
@@ -2263,30 +2293,103 @@
             GEN6_VFCOMP_STORE_1_FP << GEN6_VE_STATE_DW1_COMP3__SHIFT;
 }
 
-static void gen6_meta_disabled(struct intel_cmd *cmd)
+static uint32_t gen6_meta_vs_constants(struct intel_cmd *cmd)
 {
     const struct intel_cmd_meta *meta = cmd->bind.meta;
+    /* one GPR */
+    XGL_UINT consts[8];
+    XGL_UINT const_count;
+
+    CMD_ASSERT(cmd, 6, 7.5);
+
+    switch (meta->shader_id) {
+    default:
+        assert(!"unknown meta shader id");
+        const_count = 0;
+        break;
+    }
+
+    /* this can be skipped but it makes state dumping prettier */
+    memset(&consts[const_count], 0, sizeof(consts[0]) * (8 - const_count));
+
+    return cmd_state_write(cmd, INTEL_CMD_ITEM_BLOB, 32, 8, consts);
+}
+
+static void gen6_meta_vs(struct intel_cmd *cmd)
+{
+    const struct intel_cmd_meta *meta = cmd->bind.meta;
+    const struct intel_pipeline_shader *sh =
+        intel_dev_get_meta_shader(cmd->dev, meta->shader_id);
+    uint32_t offset, *dw;
+
+    CMD_ASSERT(cmd, 6, 7.5);
+
+    if (meta->mode != INTEL_CMD_META_VS_POINTS) {
+        XGL_UINT cmd_len;
+
+        /* 3DSTATE_CONSTANT_VS */
+        cmd_len = (cmd_gen(cmd) >= INTEL_GEN(7)) ? 7 : 5;
+        cmd_batch_pointer(cmd, cmd_len, &dw);
+        dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) | (cmd_len - 2);
+        memset(&dw[1], 0, sizeof(*dw) * (cmd_len - 1));
+
+        /* 3DSTATE_VS */
+        cmd_batch_pointer(cmd, 6, &dw);
+        dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (6 - 2);
+        memset(&dw[1], 0, sizeof(*dw) * (6 - 1));
+
+        return;
+    }
+
+    assert(meta->dst.valid && sh->uses == INTEL_SHADER_USE_VID);
+
+    /* 3DSTATE_CONSTANT_VS */
+    offset = gen6_meta_vs_constants(cmd);
+    if (cmd_gen(cmd) >= INTEL_GEN(7)) {
+        cmd_batch_pointer(cmd, 7, &dw);
+        dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) | (7 - 2);
+        dw[1] = 1 << GEN7_PCB_ANY_DW1_PCB0_SIZE__SHIFT;
+        dw[2] = 0;
+        dw[3] = offset;
+        dw[4] = 0;
+        dw[5] = 0;
+        dw[6] = 0;
+    } else {
+        cmd_batch_pointer(cmd, 5, &dw);
+        dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) | (5 - 2) |
+            GEN6_PCB_ANY_DW0_PCB0_VALID;
+        dw[1] = offset;
+        dw[2] = 0;
+        dw[3] = 0;
+        dw[4] = 0;
+    }
+
+    /* 3DSTATE_VS */
+    offset = emit_shader(cmd, sh);
+    cmd_batch_pointer(cmd, 6, &dw);
+    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (6 - 2);
+    dw[1] = offset;
+    dw[2] = GEN6_THREADDISP_SPF |
+            (sh->sampler_count + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+             sh->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
+    dw[3] = 0;
+    dw[4] = sh->urb_grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT |
+            1 << GEN6_VS_DW4_URB_READ_LEN__SHIFT;
+
+    dw[5] = GEN6_VS_DW5_CACHE_DISABLE |
+            GEN6_VS_DW5_VS_ENABLE;
+    if (cmd_gen(cmd) >= INTEL_GEN(7.5))
+        dw[5] |= (70 - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
+    else
+        dw[5] |= (24 - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT;
+}
+
+static void gen6_meta_disabled(struct intel_cmd *cmd)
+{
     uint32_t *dw;
 
     CMD_ASSERT(cmd, 6, 6);
 
-    /* 3DSTATE_CONSTANT_VS */
-    cmd_batch_pointer(cmd, 5, &dw);
-    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) | (5 - 2);
-    dw[1] = 0;
-    dw[2] = 0;
-    dw[3] = 0;
-    dw[4] = 0;
-
-    /* 3DSTATE_VS */
-    cmd_batch_pointer(cmd, 6, &dw);
-    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (6 - 2);
-    dw[1] = 0;
-    dw[2] = 0;
-    dw[3] = 0;
-    dw[4] = 0;
-    dw[5] = 0;
-
     /* 3DSTATE_CONSTANT_GS */
     cmd_batch_pointer(cmd, 5, &dw);
     dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_GS) | (5 - 2);
@@ -2305,59 +2408,19 @@
     dw[5] = GEN6_GS_DW5_STATISTICS;
     dw[6] = 0;
 
-    /* 3DSTATE_CLIP */
-    cmd_batch_pointer(cmd, 4, &dw);
-    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (4 - 2);
-    dw[1] = 0;
-    dw[2] = 0;
-    dw[3] = 0;
-
     /* 3DSTATE_SF */
     cmd_batch_pointer(cmd, 20, &dw);
     dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (20 - 2);
     dw[1] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
     memset(&dw[2], 0, 18 * sizeof(*dw));
-
-    if (meta->shader_id == INTEL_DEV_META_FS_CLEAR_DEPTH) {
-        /* 3DSTATE_CONSTANT_PS */
-        cmd_batch_pointer(cmd, 5, &dw);
-        dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) | (5 - 2);
-        dw[1] = 0;
-        dw[2] = 0;
-        dw[3] = 0;
-        dw[4] = 0;
-
-        /* 3DSTATE_WM */
-        cmd_batch_pointer(cmd, 9, &dw);
-        dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (9 - 2);
-        dw[1] = 0;
-        dw[2] = 0;
-        dw[3] = 0;
-        dw[4] = 0;
-        dw[5] = (40 - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
-        dw[6] = 0;
-        dw[7] = 0;
-        dw[8] = 0;
-    }
 }
 
 static void gen7_meta_disabled(struct intel_cmd *cmd)
 {
-    const struct intel_cmd_meta *meta = cmd->bind.meta;
     uint32_t *dw;
 
     CMD_ASSERT(cmd, 7, 7.5);
 
-    /* 3DSTATE_CONSTANT_VS */
-    cmd_batch_pointer(cmd, 7, &dw);
-    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) | (7 - 2);
-    memset(&dw[1], 0, sizeof(*dw) * (7 - 1));
-
-    /* 3DSTATE_VS */
-    cmd_batch_pointer(cmd, 6, &dw);
-    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (6 - 2);
-    memset(&dw[1], 0, sizeof(*dw) * (6 - 1));
-
     /* 3DSTATE_CONSTANT_HS */
     cmd_batch_pointer(cmd, 7, &dw);
     dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_CONSTANT_HS) | (7 - 2);
@@ -2398,11 +2461,6 @@
     dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_STREAMOUT) | (3 - 2);
     memset(&dw[1], 0, sizeof(*dw) * (3 - 1));
 
-    /* 3DSTATE_CLIP */
-    cmd_batch_pointer(cmd, 4, &dw);
-    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (4 - 2);
-    memset(&dw[1], 0, sizeof(*dw) * (4 - 1));
-
     /* 3DSTATE_SF */
     cmd_batch_pointer(cmd, 7, &dw);
     dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (7 - 2);
@@ -2413,30 +2471,24 @@
     dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (14 - 2);
     dw[1] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
     memset(&dw[2], 0, sizeof(*dw) * (14 - 2));
+}
 
-    if (meta->shader_id == INTEL_DEV_META_FS_CLEAR_DEPTH) {
-        /* 3DSTATE_WM */
-        cmd_batch_pointer(cmd, 3, &dw);
-        dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (3 - 2);
-        memset(&dw[1], 0, sizeof(*dw) * (3 - 1));
+static void gen6_meta_clip(struct intel_cmd *cmd)
+{
+    const struct intel_cmd_meta *meta = cmd->bind.meta;
+    uint32_t *dw;
 
-        /* 3DSTATE_CONSTANT_GS */
-        cmd_batch_pointer(cmd, 7, &dw);
-        dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) | (7 - 2);
-        memset(&dw[1], 0, sizeof(*dw) * (7 - 1));
-
-        /* 3DSTATE_PS */
-        cmd_batch_pointer(cmd, 8, &dw);
-        dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (8 - 2);
-        dw[1] = 0;
+    /* 3DSTATE_CLIP */
+    cmd_batch_pointer(cmd, 4, &dw);
+    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (4 - 2);
+    dw[1] = 0;
+    if (meta->mode == INTEL_CMD_META_VS_POINTS) {
+        dw[2] = GEN6_CLIP_DW2_CLIP_ENABLE |
+                GEN6_CLIP_DW2_CLIPMODE_REJECT_ALL;
+    } else {
         dw[2] = 0;
-        dw[3] = 0;
-        dw[4] = GEN7_PS_DW4_8_PIXEL_DISPATCH | /* required to avoid hangs */
-                (48 - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
-        dw[5] = 0;
-        dw[6] = 0;
-        dw[7] = 0;
     }
+    dw[3] = 0;
 }
 
 static void gen6_meta_wm(struct intel_cmd *cmd)
@@ -2566,8 +2618,30 @@
 
     CMD_ASSERT(cmd, 6, 6);
 
-    if (meta->shader_id == INTEL_DEV_META_FS_CLEAR_DEPTH)
+    if (meta->mode != INTEL_CMD_META_FS_RECT) {
+        /* 3DSTATE_CONSTANT_PS */
+        cmd_batch_pointer(cmd, 5, &dw);
+        dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) | (5 - 2);
+        dw[1] = 0;
+        dw[2] = 0;
+        dw[3] = 0;
+        dw[4] = 0;
+
+        /* 3DSTATE_WM */
+        cmd_batch_pointer(cmd, 9, &dw);
+        dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (9 - 2);
+        dw[1] = 0;
+        dw[2] = 0;
+        dw[3] = 0;
+        dw[4] = 0;
+        dw[5] = (40 - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
+        dw[6] = 0;
+        dw[7] = 0;
+        dw[8] = 0;
+
         return;
+    }
+
     /* a normal color write */
     assert(meta->dst.valid && !sh->uses);
 
@@ -2619,8 +2693,32 @@
 
     CMD_ASSERT(cmd, 7, 7.5);
 
-    if (meta->shader_id == INTEL_DEV_META_FS_CLEAR_DEPTH)
+    if (meta->mode != INTEL_CMD_META_FS_RECT) {
+        /* 3DSTATE_WM */
+        cmd_batch_pointer(cmd, 3, &dw);
+        dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (3 - 2);
+        memset(&dw[1], 0, sizeof(*dw) * (3 - 1));
+
+        /* 3DSTATE_CONSTANT_GS */
+        cmd_batch_pointer(cmd, 7, &dw);
+        dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) | (7 - 2);
+        memset(&dw[1], 0, sizeof(*dw) * (7 - 1));
+
+        /* 3DSTATE_PS */
+        cmd_batch_pointer(cmd, 8, &dw);
+        dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (8 - 2);
+        dw[1] = 0;
+        dw[2] = 0;
+        dw[3] = 0;
+        dw[4] = GEN7_PS_DW4_8_PIXEL_DISPATCH | /* required to avoid hangs */
+                (48 - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
+        dw[5] = 0;
+        dw[6] = 0;
+        dw[7] = 0;
+
         return;
+    }
+
     /* a normal color write */
     assert(meta->dst.valid && !sh->uses);
 
@@ -2899,7 +2997,9 @@
     if (cmd_gen(cmd) >= INTEL_GEN(7)) {
         gen7_meta_urb(cmd);
         gen6_meta_vf(cmd);
+        gen6_meta_vs(cmd);
         gen7_meta_disabled(cmd);
+        gen6_meta_clip(cmd);
         gen6_meta_wm(cmd);
         gen7_meta_ps(cmd);
         gen6_meta_depth_buffer(cmd);
@@ -2907,16 +3007,28 @@
         cmd_wa_gen7_post_command_cs_stall(cmd);
         cmd_wa_gen7_post_command_depth_stall(cmd);
 
-        gen7_3DPRIMITIVE(cmd, GEN6_3DPRIM_RECTLIST, false, 3, 0, 1, 0, 0);
+        if (meta->mode == INTEL_CMD_META_VS_POINTS) {
+            gen7_3DPRIMITIVE(cmd, GEN6_3DPRIM_POINTLIST, false,
+                    meta->width, 0, 1, 0, 0);
+        } else {
+            gen7_3DPRIMITIVE(cmd, GEN6_3DPRIM_RECTLIST, false, 3, 0, 1, 0, 0);
+        }
     } else {
         gen6_meta_urb(cmd);
         gen6_meta_vf(cmd);
+        gen6_meta_vs(cmd);
         gen6_meta_disabled(cmd);
+        gen6_meta_clip(cmd);
         gen6_meta_wm(cmd);
         gen6_meta_ps(cmd);
         gen6_meta_depth_buffer(cmd);
 
-        gen6_3DPRIMITIVE(cmd, GEN6_3DPRIM_RECTLIST, false, 3, 0, 1, 0, 0);
+        if (meta->mode == INTEL_CMD_META_VS_POINTS) {
+            gen6_3DPRIMITIVE(cmd, GEN6_3DPRIM_POINTLIST, false,
+                    meta->width, 0, 1, 0, 0);
+        } else {
+            gen6_3DPRIMITIVE(cmd, GEN6_3DPRIM_RECTLIST, false, 3, 0, 1, 0, 0);
+        }
     }
 
     cmd->bind.draw_count++;
diff --git a/icd/intel/cmd_priv.h b/icd/intel/cmd_priv.h
index ab71719..3d6f750 100644
--- a/icd/intel/cmd_priv.h
+++ b/icd/intel/cmd_priv.h
@@ -77,7 +77,28 @@
 
 struct intel_ds_view;
 
+enum intel_cmd_meta_mode {
+    /*
+     * Draw POINTLIST of (width - 1) vertices with only VS enabled.  The
+     * vertex id is from 0 to (width - 1).
+     */
+    INTEL_CMD_META_VS_POINTS,
+
+    /*
+     * Draw a RECTLIST from (dst.x, dst.y) to (dst.x + width, dst.y + height)
+     * with only FS enabled.
+     */
+    INTEL_CMD_META_FS_RECT,
+
+    /*
+     * Draw a RECTLIST from (dst.x, dst.y) to (dst.x + width, dst.y + height)
+     * with only depth/stencil enabled.
+     */
+    INTEL_CMD_META_DEPTH_STENCIL_RECT,
+};
+
 struct intel_cmd_meta {
+    enum intel_cmd_meta_mode mode;
     enum intel_dev_meta_shader shader_id;
 
     struct {