radeonsi: Use util_memcpy_cpu_to_le32()

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 2a7577f..3fcd314 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2553,7 +2553,7 @@
 {
 	unsigned r; /* llvm_compile result */
 	unsigned i;
-	uint32_t *ptr;
+	unsigned char *ptr;
 	struct radeon_shader_binary binary;
 	bool dump = r600_can_dump_shader(&sctx->screen->b,
 			shader->selector ? shader->selector->tokens : NULL);
@@ -2619,22 +2619,13 @@
 		return -ENOMEM;
 	}
 
-	ptr = (uint32_t*)sctx->b.ws->buffer_map(shader->bo->cs_buf, sctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
-	if (SI_BIG_ENDIAN) {
-		for (i = 0; i < binary.code_size / 4; ++i) {
-			ptr[i] = util_cpu_to_le32((*(uint32_t*)(binary.code + i*4)));
-		}
-		ptr += (binary.code_size / 4);
-		for (i = 0; i < binary.rodata_size / 4; ++i) {
-			ptr[i] = util_cpu_to_le32((*(uint32_t*)(binary.rodata + i * 4)));
-		}
-	} else {
-		memcpy(ptr, binary.code, binary.code_size);
-		if (binary.rodata_size > 0) {
-			ptr += (binary.code_size / 4);
-			memcpy(ptr, binary.rodata, binary.rodata_size);
-		}
+	ptr = sctx->b.ws->buffer_map(shader->bo->cs_buf, sctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
+	util_memcpy_cpu_to_le32(ptr, binary.code, binary.code_size);
+	if (binary.rodata_size > 0) {
+		ptr += binary.code_size;
+		util_memcpy_cpu_to_le32(ptr, binary.rodata, binary.rodata_size);
 	}
+
 	sctx->b.ws->buffer_unmap(shader->bo->cs_buf);
 
 	free(binary.code);