R600/SI: Fix emitting trailing whitespace after s_waitcnt

llvm-svn: 218486
diff --git a/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index fa7e0de..6d893b2 100644
--- a/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -493,12 +493,26 @@
   unsigned Vmcnt = SImm16 & 0xF;
   unsigned Expcnt = (SImm16 >> 4) & 0xF;
   unsigned Lgkmcnt = (SImm16 >> 8) & 0xF;
-  if (Vmcnt != 0xF)
-    O << "vmcnt(" << Vmcnt << ") ";
-  if (Expcnt != 0x7)
-    O << "expcnt(" << Expcnt << ") ";
-  if (Lgkmcnt != 0x7)
+
+  bool NeedSpace = false;
+
+  if (Vmcnt != 0xF) {
+    O << "vmcnt(" << Vmcnt << ')';
+    NeedSpace = true;
+  }
+
+  if (Expcnt != 0x7) {
+    if (NeedSpace)
+      O << ' ';
+    O << "expcnt(" << Expcnt << ')';
+    NeedSpace = true;
+  }
+
+  if (Lgkmcnt != 0x7) {
+    if (NeedSpace)
+      O << ' ';
     O << "lgkmcnt(" << Lgkmcnt << ')';
+  }
 }
 
 #include "AMDGPUGenAsmWriter.inc"
diff --git a/llvm/test/CodeGen/R600/wait.ll b/llvm/test/CodeGen/R600/wait.ll
index 17e0b82..b0b7e91 100644
--- a/llvm/test/CodeGen/R600/wait.ll
+++ b/llvm/test/CodeGen/R600/wait.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace %s
 
 ; CHECK-LABEL: @main
 ; CHECK: S_LOAD_DWORDX4
 ; CHECK: S_LOAD_DWORDX4
-; CHECK: S_WAITCNT lgkmcnt(0)
-; CHECK: S_WAITCNT vmcnt(0)
-; CHECK: S_WAITCNT expcnt(0) lgkmcnt(0)
+; CHECK: S_WAITCNT lgkmcnt(0){{$}}
+; CHECK: S_WAITCNT vmcnt(0){{$}}
+; CHECK: S_WAITCNT expcnt(0) lgkmcnt(0){{$}}
 define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
 main_body:
   %tmp = getelementptr <16 x i8> addrspace(2)* %arg3, i32 0