R600/SI: Use v_cvt_f32_ubyte* instructions This eliminates extra extract instructions when loading an i8 vector to a float vector. llvm-svn: 210666

commit: 364a6747aa06ead940783e99f09e4759d271bdb3 [log] [tgz]
author: Matt Arsenault <Matthew.Arsenault@amd.com> Wed Jun 11 17:50:44 2014 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> Wed Jun 11 17:50:44 2014 +0000
tree: 63a1323b8c9a0dea49763830e795eaf8d24bc210
parent: 11e0876bb2098ad734e9f5bbae31ce6ebb9f84cf [diff] [blame]
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
new file mode 100644
index 0000000..6facb47
--- /dev/null
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll

@@ -0,0 +1,42 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.cvt.f32.ubyte0(i32) nounwind readnone
+declare float @llvm.AMDGPU.cvt.f32.ubyte1(i32) nounwind readnone
+declare float @llvm.AMDGPU.cvt.f32.ubyte2(i32) nounwind readnone
+declare float @llvm.AMDGPU.cvt.f32.ubyte3(i32) nounwind readnone
+
+; SI-LABEL: @test_unpack_byte0_to_float:
+; SI: V_CVT_F32_UBYTE0
+define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %val = load i32 addrspace(1)* %in, align 4
+  %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte0(i32 %val) nounwind readnone
+  store float %cvt, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @test_unpack_byte1_to_float:
+; SI: V_CVT_F32_UBYTE1
+define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %val = load i32 addrspace(1)* %in, align 4
+  %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte1(i32 %val) nounwind readnone
+  store float %cvt, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @test_unpack_byte2_to_float:
+; SI: V_CVT_F32_UBYTE2
+define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %val = load i32 addrspace(1)* %in, align 4
+  %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte2(i32 %val) nounwind readnone
+  store float %cvt, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @test_unpack_byte3_to_float:
+; SI: V_CVT_F32_UBYTE3
+define void @test_unpack_byte3_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %val = load i32 addrspace(1)* %in, align 4
+  %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte3(i32 %val) nounwind readnone
+  store float %cvt, float addrspace(1)* %out, align 4
+  ret void
+}
commit	364a6747aa06ead940783e99f09e4759d271bdb3	[log] [tgz]
author	Matt Arsenault <Matthew.Arsenault@amd.com>	Wed Jun 11 17:50:44 2014 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	Wed Jun 11 17:50:44 2014 +0000
tree	63a1323b8c9a0dea49763830e795eaf8d24bc210
parent	11e0876bb2098ad734e9f5bbae31ce6ebb9f84cf [diff] [blame]