panfrost: Pack invocation_shifts manually instead of a bit field
gcc generates exceptionally bad code for panfrost_pack_work_groups_fused
otherwise ... although that routine is somehow still hot ...
Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3067>
diff --git a/src/panfrost/encoder/pan_invocation.c b/src/panfrost/encoder/pan_invocation.c
index 8fb1669..ecde3da 100644
--- a/src/panfrost/encoder/pan_invocation.c
+++ b/src/panfrost/encoder/pan_invocation.c
@@ -91,33 +91,38 @@
shifts[i + 1] = shifts[i] + bit_count;
}
- /* We're packed, so upload everything */
- out->invocation_count = packed;
- out->size_y_shift = shifts[1];
- out->size_z_shift = shifts[2];
- out->workgroups_x_shift = shifts[3];
- out->workgroups_y_shift = shifts[4];
- out->workgroups_z_shift = shifts[5];
-
/* Quirk: for non-instanced graphics, the blob sets workgroups_z_shift
* = 32. This doesn't appear to matter to the hardware, but it's good
* to be bit-identical. */
if (quirk_graphics && (num_z <= 1))
- out->workgroups_z_shift = 32;
+ shifts[5] = 32;
/* Quirk: for graphics, workgroups_x_shift_2 must be at least 2,
* whereas for OpenCL it is simply equal to workgroups_x_shift. For GL
* compute, it seems it might *always* be 2, but this is suspicious and
* needs further investigation. (I'm probably just using GL wrong). */
+ unsigned shift_2 = shifts[3];
+
if (quirk_graphics)
- out->workgroups_x_shift_2 = MAX2(out->workgroups_x_shift, 2);
- else
- out->workgroups_x_shift_2 = out->workgroups_x_shift;
+ shift_2 = MAX2(shift_2, 2);
+
+ /* Pack them in */
+ uint32_t packed_shifts =
+ (shifts[1] << 0) |
+ (shifts[2] << 5) |
+ (shifts[3] << 10) |
+ (shifts[4] << 16) |
+ (shifts[5] << 22) |
+ (shift_2 << 28);
+
+ /* Upload the packed bitfields */
+ out->invocation_count = packed;
+ out->invocation_shifts = packed_shifts;
/* TODO: Compute workgroups_x_shift_3 */
- out->workgroups_x_shift_3 = out->workgroups_x_shift_2;
+ out->workgroups_x_shift_3 = shift_2;
}
/* Packs vertex/tiler descriptors simultaneously */
@@ -136,12 +141,7 @@
/* Copy results over */
tiler->invocation_count = vertex->invocation_count;
- tiler->size_y_shift = vertex->size_y_shift;
- tiler->size_z_shift = vertex->size_z_shift;
- tiler->workgroups_x_shift = vertex->workgroups_x_shift;
- tiler->workgroups_x_shift_2 = vertex->workgroups_x_shift_2;
- tiler->workgroups_y_shift = vertex->workgroups_y_shift;
- tiler->workgroups_z_shift = vertex->workgroups_z_shift;
+ tiler->invocation_shifts = vertex->invocation_shifts;
/* Set special fields for each */
vertex->workgroups_x_shift_3 = 5;