intel: bump up max thread count for meta

Use the max values.
diff --git a/icd/intel/cmd_pipeline.c b/icd/intel/cmd_pipeline.c
index 310093d..69809bd 100644
--- a/icd/intel/cmd_pipeline.c
+++ b/icd/intel/cmd_pipeline.c
@@ -594,9 +594,37 @@
     dw[3] = dw3;
 }
 
+static int cmd_vs_max_threads(const struct intel_cmd *cmd)
+{
+    switch (cmd_gen(cmd)) {
+    case INTEL_GEN(7.5):
+        return (cmd->dev->gpu->gt >= 2) ? 280 : 70;
+    case INTEL_GEN(7):
+        return (cmd->dev->gpu->gt == 2) ? 128 : 36;
+    case INTEL_GEN(6):
+        return (cmd->dev->gpu->gt == 2) ? 60 : 24;
+    default:
+        return 1;
+    }
+}
+
+static int cmd_ps_max_threads(const struct intel_cmd *cmd)
+{
+    switch (cmd_gen(cmd)) {
+    case INTEL_GEN(7.5):
+        return (cmd->dev->gpu->gt == 3) ? 408 :
+               (cmd->dev->gpu->gt == 2) ? 204 : 102;
+    case INTEL_GEN(7):
+        return (cmd->dev->gpu->gt == 2) ? 172 : 48;
+    case INTEL_GEN(6):
+        return (cmd->dev->gpu->gt == 2) ? 80 : 40;
+    default:
+        return 4;
+    }
+}
+
 static void gen6_3DSTATE_WM(struct intel_cmd *cmd)
 {
-    const int max_threads = (cmd->dev->gpu->gt == 2) ? 80 : 40;
     const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
     const struct intel_pipeline_shader *fs = &pipeline->fs;
     const struct intel_msaa_state *msaa = cmd->bind.state.msaa;
@@ -615,7 +643,7 @@
           0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
           0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
 
-    dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT |
+    dw5 = (cmd_ps_max_threads(cmd) - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT |
           GEN6_WM_DW5_PS_ENABLE |
           GEN6_WM_DW5_8_PIXEL_DISPATCH;
 
@@ -709,6 +737,7 @@
     const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
     const struct intel_pipeline_shader *fs = &pipeline->fs;
     const struct intel_msaa_state *msaa = cmd->bind.state.msaa;
+    const int max_threads = cmd_ps_max_threads(cmd);
     const uint8_t cmd_len = 8;
     uint32_t dw0, dw2, dw4, dw5, *dw;
 
@@ -723,13 +752,9 @@
           GEN7_PS_DW4_8_PIXEL_DISPATCH;
 
     if (cmd_gen(cmd) >= INTEL_GEN(7.5)) {
-        const int max_threads =
-            (cmd->dev->gpu->gt == 3) ? 408 :
-            (cmd->dev->gpu->gt == 2) ? 204 : 102;
         dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
         dw4 |= msaa->cmd[msaa->cmd_len - 1] << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
     } else {
-        const int max_threads = (cmd->dev->gpu->gt == 2) ? 172 : 48;
         dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
     }
 
@@ -1703,10 +1728,11 @@
 {
     const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
     const struct intel_pipeline_shader *vs = &pipeline->vs;
+    const int max_threads = cmd_vs_max_threads(cmd);
     const uint8_t cmd_len = 6;
     const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
     uint32_t dw2, dw4, dw5, *dw;
-    int vue_read_len, max_threads;
+    int vue_read_len;
 
     CMD_ASSERT(cmd, 6, 7.5);
 
@@ -1734,21 +1760,6 @@
     dw5 = GEN6_VS_DW5_STATISTICS |
           GEN6_VS_DW5_VS_ENABLE;
 
-    switch (cmd_gen(cmd)) {
-    case INTEL_GEN(7.5):
-        max_threads = (cmd->dev->gpu->gt >= 2) ? 280 : 70;
-        break;
-    case INTEL_GEN(7):
-        max_threads = (cmd->dev->gpu->gt == 2) ? 128 : 36;
-        break;
-    case INTEL_GEN(6):
-        max_threads = (cmd->dev->gpu->gt == 2) ? 60 : 24;
-        break;
-    default:
-        max_threads = 1;
-        break;
-    }
-
     if (cmd_gen(cmd) >= INTEL_GEN(7.5))
         dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
     else
@@ -2320,6 +2331,7 @@
     const struct intel_cmd_meta *meta = cmd->bind.meta;
     const struct intel_pipeline_shader *sh =
         intel_dev_get_meta_shader(cmd->dev, meta->shader_id);
+    const int max_threads = cmd_vs_max_threads(cmd);
     uint32_t offset, *dw;
 
     CMD_ASSERT(cmd, 6, 7.5);
@@ -2379,9 +2391,9 @@
     dw[5] = GEN6_VS_DW5_CACHE_DISABLE |
             GEN6_VS_DW5_VS_ENABLE;
     if (cmd_gen(cmd) >= INTEL_GEN(7.5))
-        dw[5] |= (70 - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
+        dw[5] |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
     else
-        dw[5] |= (24 - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT;
+        dw[5] |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT;
 }
 
 static void gen6_meta_disabled(struct intel_cmd *cmd)
@@ -2614,6 +2626,7 @@
     const struct intel_cmd_meta *meta = cmd->bind.meta;
     const struct intel_pipeline_shader *sh =
         intel_dev_get_meta_shader(cmd->dev, meta->shader_id);
+    const int max_threads = cmd_ps_max_threads(cmd);
     uint32_t offset, *dw;
 
     CMD_ASSERT(cmd, 6, 6);
@@ -2634,7 +2647,7 @@
         dw[2] = 0;
         dw[3] = 0;
         dw[4] = 0;
-        dw[5] = (40 - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
+        dw[5] = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
         dw[6] = 0;
         dw[7] = 0;
         dw[8] = 0;
@@ -2664,7 +2677,7 @@
              sh->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
     dw[3] = 0;
     dw[4] = sh->urb_grf_start << GEN6_WM_DW4_URB_GRF_START0__SHIFT;
-    dw[5] = (40 - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT |
+    dw[5] = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT |
             GEN6_WM_DW5_PS_ENABLE |
             GEN6_WM_DW5_16_PIXEL_DISPATCH;
 
@@ -2689,6 +2702,7 @@
     const struct intel_cmd_meta *meta = cmd->bind.meta;
     const struct intel_pipeline_shader *sh =
         intel_dev_get_meta_shader(cmd->dev, meta->shader_id);
+    const int max_threads = cmd_ps_max_threads(cmd);
     uint32_t offset, *dw;
 
     CMD_ASSERT(cmd, 7, 7.5);
@@ -2711,7 +2725,7 @@
         dw[2] = 0;
         dw[3] = 0;
         dw[4] = GEN7_PS_DW4_8_PIXEL_DISPATCH | /* required to avoid hangs */
-                (48 - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
+                (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
         dw[5] = 0;
         dw[6] = 0;
         dw[7] = 0;
@@ -2753,10 +2767,14 @@
 
     dw[4] = GEN7_PS_DW4_PUSH_CONSTANT_ENABLE |
             GEN7_PS_DW4_POSOFFSET_NONE |
-            GEN7_PS_DW4_16_PIXEL_DISPATCH |
-            (48 - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
-    if (cmd_gen(cmd) >= INTEL_GEN(7.5))
+            GEN7_PS_DW4_16_PIXEL_DISPATCH;
+
+    if (cmd_gen(cmd) >= INTEL_GEN(7.5)) {
+        dw[4] |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
         dw[4] |= ((1 << meta->samples) - 1) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
+    } else {
+        dw[4] |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
+    }
 
     dw[5] = sh->urb_grf_start << GEN7_PS_DW5_URB_GRF_START0__SHIFT;
     dw[6] = 0;