intel: bump up max thread count for meta
Use the max values.
diff --git a/icd/intel/cmd_pipeline.c b/icd/intel/cmd_pipeline.c
index 310093d..69809bd 100644
--- a/icd/intel/cmd_pipeline.c
+++ b/icd/intel/cmd_pipeline.c
@@ -594,9 +594,37 @@
dw[3] = dw3;
}
+static int cmd_vs_max_threads(const struct intel_cmd *cmd)
+{
+ switch (cmd_gen(cmd)) {
+ case INTEL_GEN(7.5):
+ return (cmd->dev->gpu->gt >= 2) ? 280 : 70;
+ case INTEL_GEN(7):
+ return (cmd->dev->gpu->gt == 2) ? 128 : 36;
+ case INTEL_GEN(6):
+ return (cmd->dev->gpu->gt == 2) ? 60 : 24;
+ default:
+ return 1;
+ }
+}
+
+static int cmd_ps_max_threads(const struct intel_cmd *cmd)
+{
+ switch (cmd_gen(cmd)) {
+ case INTEL_GEN(7.5):
+ return (cmd->dev->gpu->gt == 3) ? 408 :
+ (cmd->dev->gpu->gt == 2) ? 204 : 102;
+ case INTEL_GEN(7):
+ return (cmd->dev->gpu->gt == 2) ? 172 : 48;
+ case INTEL_GEN(6):
+ return (cmd->dev->gpu->gt == 2) ? 80 : 40;
+ default:
+ return 4;
+ }
+}
+
static void gen6_3DSTATE_WM(struct intel_cmd *cmd)
{
- const int max_threads = (cmd->dev->gpu->gt == 2) ? 80 : 40;
const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
const struct intel_pipeline_shader *fs = &pipeline->fs;
const struct intel_msaa_state *msaa = cmd->bind.state.msaa;
@@ -615,7 +643,7 @@
0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
- dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT |
+ dw5 = (cmd_ps_max_threads(cmd) - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT |
GEN6_WM_DW5_PS_ENABLE |
GEN6_WM_DW5_8_PIXEL_DISPATCH;
@@ -709,6 +737,7 @@
const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
const struct intel_pipeline_shader *fs = &pipeline->fs;
const struct intel_msaa_state *msaa = cmd->bind.state.msaa;
+ const int max_threads = cmd_ps_max_threads(cmd);
const uint8_t cmd_len = 8;
uint32_t dw0, dw2, dw4, dw5, *dw;
@@ -723,13 +752,9 @@
GEN7_PS_DW4_8_PIXEL_DISPATCH;
if (cmd_gen(cmd) >= INTEL_GEN(7.5)) {
- const int max_threads =
- (cmd->dev->gpu->gt == 3) ? 408 :
- (cmd->dev->gpu->gt == 2) ? 204 : 102;
dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
dw4 |= msaa->cmd[msaa->cmd_len - 1] << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
} else {
- const int max_threads = (cmd->dev->gpu->gt == 2) ? 172 : 48;
dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
}
@@ -1703,10 +1728,11 @@
{
const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
const struct intel_pipeline_shader *vs = &pipeline->vs;
+ const int max_threads = cmd_vs_max_threads(cmd);
const uint8_t cmd_len = 6;
const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
uint32_t dw2, dw4, dw5, *dw;
- int vue_read_len, max_threads;
+ int vue_read_len;
CMD_ASSERT(cmd, 6, 7.5);
@@ -1734,21 +1760,6 @@
dw5 = GEN6_VS_DW5_STATISTICS |
GEN6_VS_DW5_VS_ENABLE;
- switch (cmd_gen(cmd)) {
- case INTEL_GEN(7.5):
- max_threads = (cmd->dev->gpu->gt >= 2) ? 280 : 70;
- break;
- case INTEL_GEN(7):
- max_threads = (cmd->dev->gpu->gt == 2) ? 128 : 36;
- break;
- case INTEL_GEN(6):
- max_threads = (cmd->dev->gpu->gt == 2) ? 60 : 24;
- break;
- default:
- max_threads = 1;
- break;
- }
-
if (cmd_gen(cmd) >= INTEL_GEN(7.5))
dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
else
@@ -2320,6 +2331,7 @@
const struct intel_cmd_meta *meta = cmd->bind.meta;
const struct intel_pipeline_shader *sh =
intel_dev_get_meta_shader(cmd->dev, meta->shader_id);
+ const int max_threads = cmd_vs_max_threads(cmd);
uint32_t offset, *dw;
CMD_ASSERT(cmd, 6, 7.5);
@@ -2379,9 +2391,9 @@
dw[5] = GEN6_VS_DW5_CACHE_DISABLE |
GEN6_VS_DW5_VS_ENABLE;
if (cmd_gen(cmd) >= INTEL_GEN(7.5))
- dw[5] |= (70 - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
+ dw[5] |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
else
- dw[5] |= (24 - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT;
+ dw[5] |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT;
}
static void gen6_meta_disabled(struct intel_cmd *cmd)
@@ -2614,6 +2626,7 @@
const struct intel_cmd_meta *meta = cmd->bind.meta;
const struct intel_pipeline_shader *sh =
intel_dev_get_meta_shader(cmd->dev, meta->shader_id);
+ const int max_threads = cmd_ps_max_threads(cmd);
uint32_t offset, *dw;
CMD_ASSERT(cmd, 6, 6);
@@ -2634,7 +2647,7 @@
dw[2] = 0;
dw[3] = 0;
dw[4] = 0;
- dw[5] = (40 - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
+ dw[5] = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
dw[6] = 0;
dw[7] = 0;
dw[8] = 0;
@@ -2664,7 +2677,7 @@
sh->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
dw[3] = 0;
dw[4] = sh->urb_grf_start << GEN6_WM_DW4_URB_GRF_START0__SHIFT;
- dw[5] = (40 - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT |
+ dw[5] = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT |
GEN6_WM_DW5_PS_ENABLE |
GEN6_WM_DW5_16_PIXEL_DISPATCH;
@@ -2689,6 +2702,7 @@
const struct intel_cmd_meta *meta = cmd->bind.meta;
const struct intel_pipeline_shader *sh =
intel_dev_get_meta_shader(cmd->dev, meta->shader_id);
+ const int max_threads = cmd_ps_max_threads(cmd);
uint32_t offset, *dw;
CMD_ASSERT(cmd, 7, 7.5);
@@ -2711,7 +2725,7 @@
dw[2] = 0;
dw[3] = 0;
dw[4] = GEN7_PS_DW4_8_PIXEL_DISPATCH | /* required to avoid hangs */
- (48 - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
+ (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
dw[5] = 0;
dw[6] = 0;
dw[7] = 0;
@@ -2753,10 +2767,14 @@
dw[4] = GEN7_PS_DW4_PUSH_CONSTANT_ENABLE |
GEN7_PS_DW4_POSOFFSET_NONE |
- GEN7_PS_DW4_16_PIXEL_DISPATCH |
- (48 - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
- if (cmd_gen(cmd) >= INTEL_GEN(7.5))
+ GEN7_PS_DW4_16_PIXEL_DISPATCH;
+
+ if (cmd_gen(cmd) >= INTEL_GEN(7.5)) {
+ dw[4] |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
dw[4] |= ((1 << meta->samples) - 1) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
+ } else {
+ dw[4] |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
+ }
dw[5] = sh->urb_grf_start << GEN7_PS_DW5_URB_GRF_START0__SHIFT;
dw[6] = 0;