aco: switch aco_print_asm to a FILE *

Streams are really stateful and (IMO) difficult to read for non-trivial
usage. This is also more consistent with NIR and the rest of ACO.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7166>
diff --git a/src/amd/compiler/aco_interface.cpp b/src/amd/compiler/aco_interface.cpp
index 55ddd19..7a62fdb 100644
--- a/src/amd/compiler/aco_interface.cpp
+++ b/src/amd/compiler/aco_interface.cpp
@@ -28,7 +28,6 @@
 #include "vulkan/radv_shader_args.h"
 
 #include <iostream>
-#include <sstream>
 
 static aco_compiler_statistic_info statistic_infos[] = {
    [aco::statistic_hash] = {"Hash", "CRC32 hash of code and constant data"},
@@ -173,16 +172,25 @@
 
    std::string disasm;
    if (get_disasm) {
-      std::ostringstream stream;
-      if (aco::print_asm(program.get(), code, exec_size / 4u, stream)) {
-         std::cerr << "Failed to disassemble program:\n";
-         aco_print_program(program.get(), stderr);
-         std::cerr << stream.str() << std::endl;
-         abort();
+      char *data = NULL;
+      size_t disasm_size = 0;
+      FILE *f = open_memstream(&data, &disasm_size);
+      if (f) {
+         bool fail = aco::print_asm(program.get(), code, exec_size / 4u, f);
+         fputc(0, f);
+         fclose(f);
+
+         if (fail) {
+            fprintf(stderr, "Failed to disassemble program:\n");
+            aco_print_program(program.get(), stderr);
+            fputs(data, stderr);
+            abort();
+         }
       }
-      stream << '\0';
-      disasm = stream.str();
-      size += disasm.size();
+
+      disasm = std::string(data, data + disasm_size);
+      size += disasm_size;
+      free(data);
    }
 
    size_t stats_size = 0;
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 4535d26..10c78b3 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1748,7 +1748,7 @@
 void insert_NOPs(Program* program);
 unsigned emit_program(Program* program, std::vector<uint32_t>& code);
 bool print_asm(Program *program, std::vector<uint32_t>& binary,
-               unsigned exec_size, std::ostream& out);
+               unsigned exec_size, FILE *output);
 bool validate_ir(Program* program);
 bool validate_ra(Program* program);
 #ifndef NDEBUG
diff --git a/src/amd/compiler/aco_print_asm.cpp b/src/amd/compiler/aco_print_asm.cpp
index 663fa86..af12314 100644
--- a/src/amd/compiler/aco_print_asm.cpp
+++ b/src/amd/compiler/aco_print_asm.cpp
@@ -16,7 +16,7 @@
  * for GFX6-GFX7 if found on the system, this is better than nothing.
 */
 bool print_asm_gfx6_gfx7(Program *program, std::vector<uint32_t>& binary,
-                         std::ostream& out)
+                         FILE *output)
 {
    char path[] = "/tmp/fileXXXXXX";
    char line[2048], command[128];
@@ -72,13 +72,13 @@
    p = popen(command, "r");
    if (p) {
       if (!fgets(line, sizeof(line), p)) {
-         out << "clrxdisasm not found\n";
+         fprintf(output, "clrxdisasm not found\n");
          pclose(p);
          goto fail;
       }
 
       do {
-         out << line;
+         fputs(line, output);
       } while (fgets(line, sizeof(line), p));
 
       pclose(p);
@@ -94,7 +94,7 @@
 
 std::pair<bool, size_t> disasm_instr(chip_class chip, LLVMDisasmContextRef disasm,
                                      uint32_t *binary, unsigned exec_size, size_t pos,
-                                     std::ostream& out)
+                                     char *outline, unsigned outline_size)
 {
    /* mask out src2 on v_writelane_b32 */
    if (((chip == GFX8 || chip == GFX9) && (binary[pos] & 0xffff8000) == 0xd28a0000) ||
@@ -102,10 +102,9 @@
       binary[pos+1] = binary[pos+1] & 0xF803FFFF;
    }
 
-   char outline[1024];
    size_t l = LLVMDisasmInstruction(disasm, (uint8_t *) &binary[pos],
                                     (exec_size - pos) * sizeof(uint32_t), pos * 4,
-                                    outline, sizeof(outline));
+                                    outline, outline_size);
 
    if (chip >= GFX10 && l == 8 &&
        ((binary[pos] & 0xffff0000) == 0xd7610000) &&
@@ -122,34 +121,32 @@
         (chip <= GFX9 && (binary[pos] & 0xffff8000) == 0xd1268000) || /* v_add_u16_e64 + clamp */
         (chip >= GFX10 && (binary[pos] & 0xffff8000) == 0xd76d8000) || /* v_add3_u32 + clamp */
         (chip == GFX9 && (binary[pos] & 0xffff8000) == 0xd1ff8000)) /* v_add3_u32 + clamp */) {
-      out << "\tinteger addition + clamp";
+      strcpy(outline, "\tinteger addition + clamp");
       bool has_literal = chip >= GFX10 &&
                          (((binary[pos+1] & 0x1ff) == 0xff) || (((binary[pos+1] >> 9) & 0x1ff) == 0xff));
       size = 2 + has_literal;
    } else if (chip >= GFX10 && l == 4 && ((binary[pos] & 0xfe0001ff) == 0x020000f9)) {
-      out << "\tv_cndmask_b32 + sdwa";
+      strcpy(outline, "\tv_cndmask_b32 + sdwa");
       size = 2;
    } else if (!l) {
-      out << "(invalid instruction)";
+      strcpy(outline, "(invalid instruction)");
       size = 1;
       invalid = true;
    } else {
-      out << outline;
       assert(l % 4 == 0);
       size = l / 4;
    }
-   out << std::right;
 
    return std::make_pair(invalid, size);
 }
 } /* end namespace */
 
 bool print_asm(Program *program, std::vector<uint32_t>& binary,
-               unsigned exec_size, std::ostream& out)
+               unsigned exec_size, FILE *output)
 {
    if (program->chip_class <= GFX7) {
       /* Do not abort if clrxdisasm isn't found. */
-      print_asm_gfx6_gfx7(program, binary, out);
+      print_asm_gfx6_gfx7(program, binary, output);
       return false;
    }
 
@@ -201,58 +198,51 @@
          pos += prev_size;
          continue;
       } else {
-         if (repeat_count) {
-            out << std::left << std::setw(0) << std::dec << std::setfill(' ') << "\t(then repeated " << repeat_count << " times)" << std::endl;
-         }
+         if (repeat_count)
+            fprintf(output, "\t(then repeated %u times)\n", repeat_count);
          repeat_count = 0;
       }
 
       while (next_block < program->blocks.size() && pos == program->blocks[next_block].offset) {
          if (referenced_blocks[next_block])
-            out << "BB" << std::dec << next_block << ":" << std::endl;
+            fprintf(output, "BB%u:\n", next_block);
          next_block++;
       }
 
-      const int align_width = 60;
-      out << std::left << std::setw(align_width) << std::setfill(' ');
-
+      char outline[1024];
       std::pair<bool, size_t> res = disasm_instr(
-         program->chip_class, disasm, binary.data(), exec_size, pos, out);
+         program->chip_class, disasm, binary.data(), exec_size, pos, outline, sizeof(outline));
       invalid |= res.first;
 
-      out << std::right;
+      fprintf(output, "%-60s ;", outline);
 
-      out << " ;";
       for (unsigned i = 0; i < res.second; i++)
-         out << " " << std::setfill('0') << std::setw(8) << std::hex << binary[pos + i];
-      out << std::endl;
+         fprintf(output, " %.8x", binary[pos + i]);
+      fputc('\n', output);
 
       prev_size = res.second;
       prev_pos = pos;
       pos += res.second;
    }
-   out << std::setfill(' ') << std::setw(0) << std::dec;
    assert(next_block == program->blocks.size());
 
    LLVMDisasmDispose(disasm);
 
    if (program->constant_data.size()) {
-      out << std::endl << "/* constant data */" << std::endl;
+      fputs("\n/* constant data */\n", output);
       for (unsigned i = 0; i < program->constant_data.size(); i += 32) {
-         out << '[' << std::setw(6) << std::setfill('0') << std::dec << i << ']';
+         fprintf(output, "[%.6u]", i);
          unsigned line_size = std::min<size_t>(program->constant_data.size() - i, 32);
          for (unsigned j = 0; j < line_size; j += 4) {
             unsigned size = std::min<size_t>(program->constant_data.size() - (i + j), 4);
             uint32_t v = 0;
             memcpy(&v, &program->constant_data[i + j], size);
-            out << " " << std::setw(8) << std::setfill('0') << std::hex << v;
+            fprintf(output, " %.8x", v);
          }
-         out << std::endl;
+         fputc('\n', output);
       }
    }
 
-   out << std::setfill(' ') << std::setw(0) << std::dec;
-
    return invalid;
 }
 
diff --git a/src/amd/compiler/tests/helpers.cpp b/src/amd/compiler/tests/helpers.cpp
index 295108e..05395cd 100644
--- a/src/amd/compiler/tests/helpers.cpp
+++ b/src/amd/compiler/tests/helpers.cpp
@@ -187,10 +187,7 @@
    } else if (program->chip_class == GFX10 && LLVM_VERSION_MAJOR < 9) {
       skip_test("LLVM 9 needed for GFX10 disassembly");
    } else if (program->chip_class >= GFX8) {
-      std::ostringstream ss;
-      print_asm(program.get(), binary, exec_size / 4u, ss);
-
-      fputs(ss.str().c_str(), output);
+      print_asm(program.get(), binary, exec_size / 4u, output);
    } else {
       //TODO: maybe we should use CLRX and skip this test if it's not available?
       for (uint32_t dword : binary)