panfrost: XMLify invocations

Not so bad :)

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6476>
diff --git a/src/panfrost/lib/decode.c b/src/panfrost/lib/decode.c
index 78b0a54..c6e23b1 100644
--- a/src/panfrost/lib/decode.c
+++ b/src/panfrost/lib/decode.c
@@ -1248,21 +1248,17 @@
         /* Decode invocation_count. See the comment before the definition of
          * invocation_count for an explanation.
          */
+        struct MALI_INVOCATION invocation;
+        struct mali_invocation_packed invocation_packed = p->invocation;
+        MALI_INVOCATION_unpack((const uint8_t *) &invocation_packed, &invocation);
 
-        unsigned size_y_shift = bits(p->invocation_shifts, 0, 5);
-        unsigned size_z_shift = bits(p->invocation_shifts, 5, 10);
-        unsigned workgroups_x_shift = bits(p->invocation_shifts, 10, 16);
-        unsigned workgroups_y_shift = bits(p->invocation_shifts, 16, 22);
-        unsigned workgroups_z_shift = bits(p->invocation_shifts, 22, 28);
-        unsigned workgroups_x_shift_2 = bits(p->invocation_shifts, 28, 32);
+        unsigned size_x = bits(invocation.invocations, 0, invocation.size_y_shift) + 1;
+        unsigned size_y = bits(invocation.invocations, invocation.size_y_shift, invocation.size_z_shift) + 1;
+        unsigned size_z = bits(invocation.invocations, invocation.size_z_shift, invocation.workgroups_x_shift) + 1;
 
-        unsigned size_x = bits(p->invocation_count, 0, size_y_shift) + 1;
-        unsigned size_y = bits(p->invocation_count, size_y_shift, size_z_shift) + 1;
-        unsigned size_z = bits(p->invocation_count, size_z_shift, workgroups_x_shift) + 1;
-
-        unsigned groups_x = bits(p->invocation_count, workgroups_x_shift, workgroups_y_shift) + 1;
-        unsigned groups_y = bits(p->invocation_count, workgroups_y_shift, workgroups_z_shift) + 1;
-        unsigned groups_z = bits(p->invocation_count, workgroups_z_shift, 32) + 1;
+        unsigned groups_x = bits(invocation.invocations, invocation.workgroups_x_shift, invocation.workgroups_y_shift) + 1;
+        unsigned groups_y = bits(invocation.invocations, invocation.workgroups_y_shift, invocation.workgroups_z_shift) + 1;
+        unsigned groups_z = bits(invocation.invocations, invocation.workgroups_z_shift, 32) + 1;
 
         /* Even though we have this decoded, we want to ensure that the
          * representation is "unique" so we don't lose anything by printing only
@@ -1272,30 +1268,17 @@
          * decode and pack it ourselves! If it is bit exact with what we
          * decoded, we're good to go. */
 
-        struct mali_vertex_tiler_prefix ref;
+        struct mali_invocation_packed ref;
         panfrost_pack_work_groups_compute(&ref, groups_x, groups_y, groups_z, size_x, size_y, size_z, graphics);
 
-        bool canonical =
-                (p->invocation_count == ref.invocation_count) &&
-                (p->invocation_shifts == ref.invocation_shifts);
-
-        if (!canonical) {
+        if (memcmp(&ref, &invocation_packed, sizeof(ref))) {
                 pandecode_msg("XXX: non-canonical workgroups packing\n");
-                pandecode_msg("expected: %X, %X",
-                                ref.invocation_count,
-                                ref.invocation_shifts);
-
-                pandecode_prop("invocation_count = 0x%" PRIx32, p->invocation_count);
-                pandecode_prop("size_y_shift = %d", size_y_shift);
-                pandecode_prop("size_z_shift = %d", size_z_shift);
-                pandecode_prop("workgroups_x_shift = %d", workgroups_x_shift);
-                pandecode_prop("workgroups_y_shift = %d", workgroups_y_shift);
-                pandecode_prop("workgroups_z_shift = %d", workgroups_z_shift);
-                pandecode_prop("workgroups_x_shift_2 = %d", workgroups_x_shift_2);
+                MALI_INVOCATION_print(pandecode_dump_stream, &invocation, 1 * 2);
         }
 
         /* Regardless, print the decode */
-        pandecode_msg("size (%d, %d, %d), count (%d, %d, %d)\n",
+        fprintf(pandecode_dump_stream,
+                        "Invocation (%d, %d, %d) x (%d, %d, %d)\n",
                         size_x, size_y, size_z,
                         groups_x, groups_y, groups_z);
 
diff --git a/src/panfrost/lib/pan_blit.c b/src/panfrost/lib/pan_blit.c
index f9ca87e..79a9120 100644
--- a/src/panfrost/lib/pan_blit.c
+++ b/src/panfrost/lib/pan_blit.c
@@ -346,6 +346,7 @@
         struct midgard_payload_vertex_tiler payload = {};
         struct mali_primitive_packed primitive;
         struct mali_draw_packed draw;
+        struct mali_invocation_packed invocation;
 
         pan_pack(&draw, DRAW, cfg) {
                 cfg.unknown_1 = 0x7;
@@ -365,10 +366,11 @@
                 cfg.unknown_3 = 6;
         }
 
-        memcpy(&payload.prefix.primitive, &primitive, MALI_DRAW_LENGTH);
-        memcpy(&payload.postfix, &draw, MALI_DRAW_LENGTH);
+        panfrost_pack_work_groups_compute(&invocation, 1, vertex_count, 1, 1, 1, 1, true);
 
-        panfrost_pack_work_groups_compute(&payload.prefix, 1, vertex_count, 1, 1, 1, 1, true);
+        payload.prefix.primitive = primitive;
+        memcpy(&payload.postfix, &draw, MALI_DRAW_LENGTH);
+        payload.prefix.invocation = invocation;
 
         panfrost_new_job(pool, scoreboard, MALI_JOB_TYPE_TILER, false, 0, &payload, sizeof(payload), true);
 }
diff --git a/src/panfrost/lib/pan_encoder.h b/src/panfrost/lib/pan_encoder.h
index 0471701..9433f02 100644
--- a/src/panfrost/lib/pan_encoder.h
+++ b/src/panfrost/lib/pan_encoder.h
@@ -34,7 +34,7 @@
 
 void
 panfrost_pack_work_groups_compute(
-        struct mali_vertex_tiler_prefix *out,
+        struct mali_invocation_packed *out,
         unsigned num_x,
         unsigned num_y,
         unsigned num_z,
@@ -43,17 +43,6 @@
         unsigned size_z,
         bool quirk_graphics);
 
-void
-panfrost_pack_work_groups_fused(
-        struct mali_vertex_tiler_prefix *vertex,
-        struct mali_vertex_tiler_prefix *tiler,
-        unsigned num_x,
-        unsigned num_y,
-        unsigned num_z,
-        unsigned size_x,
-        unsigned size_y,
-        unsigned size_z);
-
 /* Tiler structure size computation */
 
 unsigned
diff --git a/src/panfrost/lib/pan_invocation.c b/src/panfrost/lib/pan_invocation.c
index cfb5bec..4c0f645 100644
--- a/src/panfrost/lib/pan_invocation.c
+++ b/src/panfrost/lib/pan_invocation.c
@@ -41,7 +41,7 @@
 
 void
 panfrost_pack_work_groups_compute(
-        struct mali_vertex_tiler_prefix *out,
+        struct mali_invocation_packed *out,
         unsigned num_x,
         unsigned num_y,
         unsigned num_z,
@@ -77,53 +77,24 @@
                 shifts[i + 1] = shifts[i] + bit_count;
         }
 
-        /* Quirk: for non-instanced graphics, the blob sets workgroups_z_shift
-         * = 32. This doesn't appear to matter to the hardware, but it's good
-         * to be bit-identical. */
+        pan_pack(out, INVOCATION, cfg) {
+                cfg.invocations = packed;
+                cfg.size_y_shift = shifts[1];
+                cfg.size_z_shift = shifts[2];
+                cfg.workgroups_x_shift = shifts[3];
+                cfg.workgroups_y_shift = shifts[4];
+                cfg.workgroups_z_shift = shifts[5];
 
-        if (quirk_graphics && (num_z <= 1))
-                shifts[5] = 32;
+                /* Quirk: for non-instanced graphics, the blob sets
+                 * workgroups_z_shift = 32. This doesn't appear to matter to
+                 * the hardware, but it's good to be bit-identical. */
 
-        /* Quirk: for graphics, workgroups_x_shift_2 must be at least 2,
-         * whereas for OpenCL it is simply equal to workgroups_x_shift. For GL
-         * compute, it is always 2 if no barriers are in use, but is equal to
-         * workgroups_x_shift is barriers are in use. */
+                if (quirk_graphics && (num_z <= 1))
+                        cfg.workgroups_z_shift = 32;
 
-        unsigned shift_2 = shifts[3];
+                /* Quirk: for graphics, >= 2.  For compute, 2 without barriers
+                 * but equal to workgroups_x_shift with barriers */
 
-        if (quirk_graphics)
-                shift_2 = MAX2(shift_2, 2);
-
-        /* Pack them in */
-        uint32_t packed_shifts =
-                (shifts[1] << 0) |
-                (shifts[2] << 5) |
-                (shifts[3] << 10) |
-                (shifts[4] << 16) |
-                (shifts[5] << 22) |
-                (shift_2 << 28);
-
-        /* Upload the packed bitfields */
-        out->invocation_count = packed;
-        out->invocation_shifts = packed_shifts;
+                cfg.unknown_shift = quirk_graphics ? 2 : cfg.workgroups_x_shift;
+        }
 }
-
-/* Packs vertex/tiler descriptors simultaneously */
-void
-panfrost_pack_work_groups_fused(
-        struct mali_vertex_tiler_prefix *vertex,
-        struct mali_vertex_tiler_prefix *tiler,
-        unsigned num_x,
-        unsigned num_y,
-        unsigned num_z,
-        unsigned size_x,
-        unsigned size_y,
-        unsigned size_z)
-{
-        panfrost_pack_work_groups_compute(vertex, num_x, num_y, num_z, size_x, size_y, size_z, true);
-
-        /* Copy results over */
-        tiler->invocation_count = vertex->invocation_count;
-        tiler->invocation_shifts = vertex->invocation_shifts;
-}
-