Call EmitFunctionHeader just before EmitFunctionBody.

This avoids switching to .AMDGPU.config and back and hardcoding the
section it switches back to.

llvm-svn: 232479
diff --git a/llvm/test/CodeGen/R600/lds-size.ll b/llvm/test/CodeGen/R600/lds-size.ll
index 5287723..3e83286 100644
--- a/llvm/test/CodeGen/R600/lds-size.ll
+++ b/llvm/test/CodeGen/R600/lds-size.ll
@@ -3,9 +3,9 @@
 ; This test makes sure we do not double count global values when they are
 ; used in different basic blocks.
 
-; CHECK-LABEL: {{^}}test:
 ; CHECK: .long   166120
 ; CHECK-NEXT: .long   1
+; CHECK-LABEL: {{^}}test:
 @lds = internal unnamed_addr addrspace(3) global i32 undef, align 4
 
 define void @test(i32 addrspace(1)* %out, i32 %cond) {