Call EmitFunctionHeader just before EmitFunctionBody.

This avoids switching to .AMDGPU.config and back and hardcoding the
section it switches back to.

llvm-svn: 232479
diff --git a/llvm/test/CodeGen/R600/hsa.ll b/llvm/test/CodeGen/R600/hsa.ll
index ff75b90..f911339 100644
--- a/llvm/test/CodeGen/R600/hsa.ll
+++ b/llvm/test/CodeGen/R600/hsa.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
 
-; HSA: {{^}}simple:
 ; HSA: .section        .hsa.version
 ; HSA-NEXT: .ascii  "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0"
+; HSA: {{^}}simple:
 ; Make sure we are setting the ATC bit:
 ; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000
 ; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0