aco: round bytes_written to dwords if larger than 4 bytes

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7276>
diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp
index 81e4072..2561bae 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -456,6 +456,7 @@
    default:
       break;
    }
+   bytes_written = bytes_written > 4 ? align(bytes_written, 4) : bytes_written;
    bytes_written = MAX2(bytes_written, instr_info.definition_size[(int)instr->opcode] / 8u);
 
    if (can_use_SDWA(chip, instr)) {