panfrost: Set workgroups z to 32 for non-instanced graphics

This is a blob quirk; in so much as I know, the hardware doesn't care.
But we're trying to be bit-identical to take as much entropy out of
traces as possible, so let's introduce the quirk.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c
index d0b2e13..50e70cd 100644
--- a/src/gallium/drivers/panfrost/pan_compute.c
+++ b/src/gallium/drivers/panfrost/pan_compute.c
@@ -119,7 +119,7 @@
 
         panfrost_pack_work_groups_compute(&payload->prefix,
                         info->grid[0], info->grid[1], info->grid[2],
-                        info->block[0], info->block[1], info->block[2]);
+                        info->block[0], info->block[1], info->block[2], false);
 
         /* Upload the payload */
 
diff --git a/src/panfrost/encoder/pan_encoder.h b/src/panfrost/encoder/pan_encoder.h
index 4d8ab61..aba3eba 100644
--- a/src/panfrost/encoder/pan_encoder.h
+++ b/src/panfrost/encoder/pan_encoder.h
@@ -39,7 +39,8 @@
         unsigned num_z,
         unsigned size_x,
         unsigned size_y,
-        unsigned size_z);
+        unsigned size_z,
+        bool quirk_graphics);
 
 void
 panfrost_pack_work_groups_fused(
diff --git a/src/panfrost/encoder/pan_invocation.c b/src/panfrost/encoder/pan_invocation.c
index 810fed3..96efd19 100644
--- a/src/panfrost/encoder/pan_invocation.c
+++ b/src/panfrost/encoder/pan_invocation.c
@@ -47,7 +47,8 @@
         unsigned num_z,
         unsigned size_x,
         unsigned size_y,
-        unsigned size_z)
+        unsigned size_z,
+        bool quirk_graphics)
 {
         /* First of all, all 6 values are off-by-one (strictly positive).
          * Account for that, first by ensuring all values are strictly positive
@@ -98,6 +99,13 @@
         out->workgroups_y_shift = shifts[4];
         out->workgroups_z_shift = shifts[5];
 
+        /* Quirk: for non-instanced graphics, the blob sets workgroups_z_shift
+         * = 32. This doesn't appear to matter to the hardware, but it's good
+         * to be bit-identical. */
+
+        if (quirk_graphics && (num_z <= 1))
+                out->workgroups_z_shift = 32;
+
         /* Special fields */
         out->workgroups_x_shift_2 = MAX2(out->workgroups_x_shift, 2);
         out->workgroups_x_shift_3 = out->workgroups_x_shift_2;
@@ -115,7 +123,7 @@
         unsigned size_y,
         unsigned size_z)
 {
-        panfrost_pack_work_groups_compute(vertex, num_x, num_y, num_z, size_x, size_y, size_z);
+        panfrost_pack_work_groups_compute(vertex, num_x, num_y, num_z, size_x, size_y, size_z, true);
 
         /* Copy results over */
         tiler->invocation_count = vertex->invocation_count;