panfrost: Set workgroups z to 32 for non-instanced graphics

This is a blob quirk; in so much as I know, the hardware doesn't care.
But we're trying to be bit-identical to take as much entropy out of
traces as possible, so let's introduce the quirk.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
diff --git a/src/panfrost/encoder/pan_invocation.c b/src/panfrost/encoder/pan_invocation.c
index 810fed3..96efd19 100644
--- a/src/panfrost/encoder/pan_invocation.c
+++ b/src/panfrost/encoder/pan_invocation.c
@@ -47,7 +47,8 @@
         unsigned num_z,
         unsigned size_x,
         unsigned size_y,
-        unsigned size_z)
+        unsigned size_z,
+        bool quirk_graphics)
 {
         /* First of all, all 6 values are off-by-one (strictly positive).
          * Account for that, first by ensuring all values are strictly positive
@@ -98,6 +99,13 @@
         out->workgroups_y_shift = shifts[4];
         out->workgroups_z_shift = shifts[5];
 
+        /* Quirk: for non-instanced graphics, the blob sets workgroups_z_shift
+         * = 32. This doesn't appear to matter to the hardware, but it's good
+         * to be bit-identical. */
+
+        if (quirk_graphics && (num_z <= 1))
+                out->workgroups_z_shift = 32;
+
         /* Special fields */
         out->workgroups_x_shift_2 = MAX2(out->workgroups_x_shift, 2);
         out->workgroups_x_shift_3 = out->workgroups_x_shift_2;
@@ -115,7 +123,7 @@
         unsigned size_y,
         unsigned size_z)
 {
-        panfrost_pack_work_groups_compute(vertex, num_x, num_y, num_z, size_x, size_y, size_z);
+        panfrost_pack_work_groups_compute(vertex, num_x, num_y, num_z, size_x, size_y, size_z, true);
 
         /* Copy results over */
         tiler->invocation_count = vertex->invocation_count;