R600/SI: Use v_cvt_f32_ubyte* instructions
This eliminates extra extract instructions when loading an i8 vector to
a float vector.
llvm-svn: 210666
diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td
index 7d37d10..9af1a70 100644
--- a/llvm/lib/Target/R600/SIInstructions.td
+++ b/llvm/lib/Target/R600/SIInstructions.td
@@ -978,10 +978,18 @@
 defm V_CVT_F64_F32 : VOP1_64_32 <0x00000010, "V_CVT_F64_F32",
   [(set f64:$dst, (fextend f32:$src0))]
 >;
-//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>;
-//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>;
-//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>;
-//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>;
+defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0",
+  [(set f32:$dst, (AMDGPUcvt_f32_ubyte0 i32:$src0))]
+>;
+defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1",
+  [(set f32:$dst, (AMDGPUcvt_f32_ubyte1 i32:$src0))]
+>;
+defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2",
+  [(set f32:$dst, (AMDGPUcvt_f32_ubyte2 i32:$src0))]
+>;
+defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3",
+  [(set f32:$dst, (AMDGPUcvt_f32_ubyte3 i32:$src0))]
+>;
 defm V_CVT_U32_F64 : VOP1_32_64 <0x00000015, "V_CVT_U32_F64",
   [(set i32:$dst, (fp_to_uint f64:$src0))]
 >;