Call EmitFunctionHeader just before EmitFunctionBody.

This avoids switching to .AMDGPU.config and back and hardcoding the
section it switches back to.

llvm-svn: 232479
diff --git a/llvm/test/CodeGen/R600/elf.ll b/llvm/test/CodeGen/R600/elf.ll
index f801b3f..127da2c 100644
--- a/llvm/test/CodeGen/R600/elf.ll
+++ b/llvm/test/CodeGen/R600/elf.ll
@@ -13,12 +13,12 @@
 ; ELF: Name: test
 ; ELF: Binding: Global
 
-; CONFIG: .align 256
-; CONFIG: test:
 ; CONFIG: .section .AMDGPU.config
 ; CONFIG-NEXT: .long   45096
 ; TYPICAL-NEXT: .long   0
 ; TONGA-NEXT: .long   576
+; CONFIG: .align 256
+; CONFIG: test:
 define void @test(i32 %p) #0 {
    %i = add i32 %p, 2
    %r = bitcast i32 %i to float