Colin LeMahieu | cefca69 | 2015-01-30 21:58:46 +0000 | [diff] [blame] | 1 | //===- HexagonInstrInfoVector.td - Hexagon Vector Patterns -*- tablegen -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file describes the Hexagon Vector instructions in TableGen format. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; |
| 15 | def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; |
| 16 | def V8I1: PatLeaf<(v8i1 PredRegs:$R)>; |
| 17 | def V4I8: PatLeaf<(v4i8 IntRegs:$R)>; |
| 18 | def V2I16: PatLeaf<(v2i16 IntRegs:$R)>; |
| 19 | def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; |
| 20 | def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; |
| 21 | def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; |
| 22 | |
Krzysztof Parzyszek | 4211334 | 2015-03-19 16:33:08 +0000 | [diff] [blame^] | 23 | |
| 24 | multiclass bitconvert_32<ValueType a, ValueType b> { |
| 25 | def : Pat <(b (bitconvert (a IntRegs:$src))), |
| 26 | (b IntRegs:$src)>; |
| 27 | def : Pat <(a (bitconvert (b IntRegs:$src))), |
| 28 | (a IntRegs:$src)>; |
| 29 | } |
| 30 | |
| 31 | multiclass bitconvert_64<ValueType a, ValueType b> { |
| 32 | def : Pat <(b (bitconvert (a DoubleRegs:$src))), |
| 33 | (b DoubleRegs:$src)>; |
| 34 | def : Pat <(a (bitconvert (b DoubleRegs:$src))), |
| 35 | (a DoubleRegs:$src)>; |
| 36 | } |
| 37 | |
| 38 | // Bit convert vector types. |
| 39 | defm : bitconvert_32<v4i8, i32>; |
| 40 | defm : bitconvert_32<v2i16, i32>; |
| 41 | defm : bitconvert_32<v2i16, v4i8>; |
| 42 | |
| 43 | defm : bitconvert_64<v8i8, i64>; |
| 44 | defm : bitconvert_64<v4i16, i64>; |
| 45 | defm : bitconvert_64<v2i32, i64>; |
| 46 | defm : bitconvert_64<v8i8, v4i16>; |
| 47 | defm : bitconvert_64<v8i8, v2i32>; |
| 48 | defm : bitconvert_64<v4i16, v2i32>; |
| 49 | |
| 50 | |
Colin LeMahieu | cefca69 | 2015-01-30 21:58:46 +0000 | [diff] [blame] | 51 | // Vector shift support. Vector shifting in Hexagon is rather different |
| 52 | // from internal representation of LLVM. |
| 53 | // LLVM assumes all shifts (in vector case) will have the form |
| 54 | // <VT> = SHL/SRA/SRL <VT> by <VT> |
| 55 | // while Hexagon has the following format: |
| 56 | // <VT> = SHL/SRA/SRL <VT> by <IT/i32> |
| 57 | // As a result, special care is needed to guarantee correctness and |
| 58 | // performance. |
| 59 | class vshift_v4i16<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp> |
| 60 | : S_2OpInstImm<Str, MajOp, MinOp, u4Imm, |
| 61 | [(set (v4i16 DoubleRegs:$dst), |
| 62 | (Op (v4i16 DoubleRegs:$src1), u4ImmPred:$src2))]> { |
| 63 | bits<4> src2; |
| 64 | let Inst{11-8} = src2; |
| 65 | } |
| 66 | |
| 67 | class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp> |
| 68 | : S_2OpInstImm<Str, MajOp, MinOp, u5Imm, |
| 69 | [(set (v2i32 DoubleRegs:$dst), |
| 70 | (Op (v2i32 DoubleRegs:$src1), u5ImmPred:$src2))]> { |
| 71 | bits<5> src2; |
| 72 | let Inst{12-8} = src2; |
| 73 | } |
| 74 | |
Krzysztof Parzyszek | 4211334 | 2015-03-19 16:33:08 +0000 | [diff] [blame^] | 75 | def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), |
| 76 | (A2_svaddh IntRegs:$src1, IntRegs:$src2)>; |
| 77 | |
| 78 | def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), |
| 79 | (A2_svsubh IntRegs:$src1, IntRegs:$src2)>; |
| 80 | |
Colin LeMahieu | cefca69 | 2015-01-30 21:58:46 +0000 | [diff] [blame] | 81 | def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>; |
| 82 | def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>; |
| 83 | def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>; |
| 84 | |
| 85 | def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>; |
| 86 | def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>; |
| 87 | def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>; |
| 88 | |
Krzysztof Parzyszek | 4211334 | 2015-03-19 16:33:08 +0000 | [diff] [blame^] | 89 | |
| 90 | def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>; |
| 91 | def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>; |
| 92 | |
| 93 | // Replicate the low 8-bits from 32-bits input register into each of the |
| 94 | // four bytes of 32-bits destination register. |
| 95 | def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>; |
| 96 | |
| 97 | // Replicate the low 16-bits from 32-bits input register into each of the |
| 98 | // four halfwords of 64-bits destination register. |
| 99 | def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>; |
| 100 | |
| 101 | |
| 102 | class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type> |
| 103 | : Pat <(Op Type:$Rss, Type:$Rtt), |
| 104 | (MI Type:$Rss, Type:$Rtt)>; |
| 105 | |
| 106 | def: VArith_pat <A2_vaddub, add, V8I8>; |
| 107 | def: VArith_pat <A2_vaddh, add, V4I16>; |
| 108 | def: VArith_pat <A2_vaddw, add, V2I32>; |
| 109 | def: VArith_pat <A2_vsubub, sub, V8I8>; |
| 110 | def: VArith_pat <A2_vsubh, sub, V4I16>; |
| 111 | def: VArith_pat <A2_vsubw, sub, V2I32>; |
| 112 | |
| 113 | def: VArith_pat <A2_and, and, V2I16>; |
| 114 | def: VArith_pat <A2_xor, xor, V2I16>; |
| 115 | def: VArith_pat <A2_or, or, V2I16>; |
| 116 | |
| 117 | def: VArith_pat <A2_andp, and, V8I8>; |
| 118 | def: VArith_pat <A2_andp, and, V4I16>; |
| 119 | def: VArith_pat <A2_andp, and, V2I32>; |
| 120 | def: VArith_pat <A2_orp, or, V8I8>; |
| 121 | def: VArith_pat <A2_orp, or, V4I16>; |
| 122 | def: VArith_pat <A2_orp, or, V2I32>; |
| 123 | def: VArith_pat <A2_xorp, xor, V8I8>; |
| 124 | def: VArith_pat <A2_xorp, xor, V4I16>; |
| 125 | def: VArith_pat <A2_xorp, xor, V2I32>; |
| 126 | |
| 127 | def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), |
| 128 | (i32 u5ImmPred:$c))))), |
| 129 | (S2_asr_i_vw V2I32:$b, imm:$c)>; |
| 130 | def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), |
| 131 | (i32 u5ImmPred:$c))))), |
| 132 | (S2_lsr_i_vw V2I32:$b, imm:$c)>; |
| 133 | def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), |
| 134 | (i32 u5ImmPred:$c))))), |
| 135 | (S2_asl_i_vw V2I32:$b, imm:$c)>; |
| 136 | |
| 137 | def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), |
| 138 | (S2_asr_i_vh V4I16:$b, imm:$c)>; |
| 139 | def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), |
| 140 | (S2_lsr_i_vh V4I16:$b, imm:$c)>; |
| 141 | def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), |
| 142 | (S2_asl_i_vh V4I16:$b, imm:$c)>; |
| 143 | |
| 144 | |
| 145 | def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2, |
| 146 | [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>; |
| 147 | def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2, |
| 148 | [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>; |
| 149 | |
| 150 | def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>; |
| 151 | def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>; |
| 152 | def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>; |
| 153 | def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>; |
| 154 | def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>; |
| 155 | def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>; |
| 156 | |
| 157 | def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5ImmPred:$u5)), |
| 158 | (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; |
| 159 | def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4ImmPred:$u4)), |
| 160 | (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; |
| 161 | def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5ImmPred:$u5)), |
| 162 | (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; |
| 163 | def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4ImmPred:$u4)), |
| 164 | (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; |
| 165 | def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5ImmPred:$u5)), |
| 166 | (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; |
| 167 | def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4ImmPred:$u4)), |
| 168 | (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; |
| 169 | |
Colin LeMahieu | cefca69 | 2015-01-30 21:58:46 +0000 | [diff] [blame] | 170 | // Vector shift words by register |
| 171 | def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>; |
| 172 | def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>; |
| 173 | def S2_asl_r_vw : T_S3op_shiftVect < "vaslw", 0b00, 0b10>; |
| 174 | def S2_lsl_r_vw : T_S3op_shiftVect < "vlslw", 0b00, 0b11>; |
| 175 | |
| 176 | // Vector shift halfwords by register |
| 177 | def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>; |
| 178 | def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>; |
| 179 | def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>; |
| 180 | def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>; |
Krzysztof Parzyszek | 4211334 | 2015-03-19 16:33:08 +0000 | [diff] [blame^] | 181 | |
| 182 | class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value> |
| 183 | : Pat <(Op Value:$Rs, I32:$Rt), |
| 184 | (MI Value:$Rs, I32:$Rt)>; |
| 185 | |
| 186 | def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>; |
| 187 | def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>; |
| 188 | def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>; |
| 189 | def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>; |
| 190 | def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>; |
| 191 | def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>; |
| 192 | |
| 193 | |
| 194 | def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2, |
| 195 | [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>; |
| 196 | def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2, |
| 197 | [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>; |
| 198 | def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2, |
| 199 | [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>; |
| 200 | |
| 201 | def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>; |
| 202 | def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>; |
| 203 | def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>; |
| 204 | def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>; |
| 205 | def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>; |
| 206 | def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>; |
| 207 | def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>; |
| 208 | def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>; |
| 209 | def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>; |
| 210 | |
| 211 | |
| 212 | class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value> |
| 213 | : Pat <(i1 (Op Value:$Rs, Value:$Rt)), |
| 214 | (MI Value:$Rs, Value:$Rt)>; |
| 215 | |
| 216 | def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>; |
| 217 | def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>; |
| 218 | def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>; |
| 219 | |
| 220 | def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>; |
| 221 | def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>; |
| 222 | def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>; |
| 223 | |
| 224 | def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>; |
| 225 | def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>; |
| 226 | def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>; |
| 227 | |
| 228 | |
| 229 | class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy> |
| 230 | : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)), |
| 231 | (MI InVal:$Rs, InVal:$Rt)>; |
| 232 | |
| 233 | def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>; |
| 234 | def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>; |
| 235 | def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>; |
| 236 | |
| 237 | def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>; |
| 238 | def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>; |
| 239 | def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>; |
| 240 | |
| 241 | |
| 242 | // Hexagon doesn't have a vector multiply with C semantics. |
| 243 | // Instead, generate a pseudo instruction that gets expaneded into two |
| 244 | // scalar MPYI instructions. |
| 245 | // This is expanded by ExpandPostRAPseudos. |
| 246 | let isPseudo = 1 in |
| 247 | def VMULW : PseudoM<(outs DoubleRegs:$Rd), |
| 248 | (ins DoubleRegs:$Rs, DoubleRegs:$Rt), |
| 249 | ".error \"Should never try to emit VMULW\"", |
| 250 | [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>; |
| 251 | |
| 252 | let isPseudo = 1 in |
| 253 | def VMULW_ACC : PseudoM<(outs DoubleRegs:$Rd), |
| 254 | (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), |
| 255 | ".error \"Should never try to emit VMULW_ACC\"", |
| 256 | [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))], |
| 257 | "$Rd = $Rx">; |
| 258 | |
| 259 | // Adds two v4i8: Hexagon does not have an insn for this one, so we |
| 260 | // use the double add v8i8, and use only the low part of the result. |
| 261 | def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), |
| 262 | (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>; |
| 263 | |
| 264 | // Subtract two v4i8: Hexagon does not have an insn for this one, so we |
| 265 | // use the double sub v8i8, and use only the low part of the result. |
| 266 | def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), |
| 267 | (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>; |
| 268 | |
| 269 | // |
| 270 | // No 32 bit vector mux. |
| 271 | // |
| 272 | def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)), |
| 273 | (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; |
| 274 | def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)), |
| 275 | (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; |
| 276 | |
| 277 | // |
| 278 | // 64-bit vector mux. |
| 279 | // |
| 280 | def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)), |
| 281 | (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; |
| 282 | def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)), |
| 283 | (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>; |
| 284 | def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)), |
| 285 | (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; |
| 286 | |
| 287 | // |
| 288 | // No 32 bit vector compare. |
| 289 | // |
| 290 | def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), |
| 291 | (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>; |
| 292 | def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), |
| 293 | (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>; |
| 294 | def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), |
| 295 | (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>; |
| 296 | |
| 297 | def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), |
| 298 | (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>; |
| 299 | def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), |
| 300 | (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>; |
| 301 | def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), |
| 302 | (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>; |
| 303 | |
| 304 | |
| 305 | class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value, |
| 306 | ValueType CmpTy> |
| 307 | : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)), |
| 308 | (InvMI Value:$Rt, Value:$Rs)>; |
| 309 | |
| 310 | // Map from a compare operation to the corresponding instruction with the |
| 311 | // order of operands reversed, e.g. x > y --> cmp.lt(y,x). |
| 312 | def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>; |
| 313 | def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>; |
| 314 | def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>; |
| 315 | def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>; |
| 316 | def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>; |
| 317 | def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>; |
| 318 | |
| 319 | def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>; |
| 320 | def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>; |
| 321 | def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>; |
| 322 | def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>; |
| 323 | def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>; |
| 324 | def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>; |
| 325 | |
| 326 | // Map from vcmpne(Rss) -> !vcmpew(Rss). |
| 327 | // rs != rt -> !(rs == rt). |
| 328 | def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), |
| 329 | (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; |
| 330 | |
| 331 | |
| 332 | // Truncate: from vector B copy all 'E'ven 'B'yte elements: |
| 333 | // A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6]; |
| 334 | def: Pat<(v4i8 (trunc V4I16:$Rs)), |
| 335 | (S2_vtrunehb V4I16:$Rs)>; |
| 336 | |
| 337 | // Truncate: from vector B copy all 'O'dd 'B'yte elements: |
| 338 | // A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7]; |
| 339 | // S2_vtrunohb |
| 340 | |
| 341 | // Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements: |
| 342 | // A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2]; |
| 343 | // S2_vtruneh |
| 344 | |
| 345 | def: Pat<(v2i16 (trunc V2I32:$Rs)), |
| 346 | (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; |
| 347 | |
| 348 | |
| 349 | def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>; |
| 350 | def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>; |
| 351 | |
| 352 | def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>; |
| 353 | def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>; |
| 354 | |
| 355 | def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; |
| 356 | def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; |
| 357 | def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; |
| 358 | def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; |
| 359 | def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; |
| 360 | def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; |
| 361 | |
| 362 | // Sign extends a v2i8 into a v2i32. |
| 363 | def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), |
| 364 | (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; |
| 365 | |
| 366 | // Sign extends a v2i16 into a v2i32. |
| 367 | def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)), |
| 368 | (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>; |
| 369 | |
| 370 | |
| 371 | // Multiplies two v2i16 and returns a v2i32. We are using here the |
| 372 | // saturating multiply, as hexagon does not provide a non saturating |
| 373 | // vector multiply, and saturation does not impact the result that is |
| 374 | // in double precision of the operands. |
| 375 | |
| 376 | // Multiplies two v2i16 vectors: as Hexagon does not have a multiply |
| 377 | // with the C semantics for this one, this pattern uses the half word |
| 378 | // multiply vmpyh that takes two v2i16 and returns a v2i32. This is |
| 379 | // then truncated to fit this back into a v2i16 and to simulate the |
| 380 | // wrap around semantics for unsigned in C. |
| 381 | def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt), |
| 382 | (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>; |
| 383 | |
| 384 | def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), |
| 385 | (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)), |
| 386 | (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>; |
| 387 | |
| 388 | // Multiplies two v4i16 vectors. |
| 389 | def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), |
| 390 | (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)), |
| 391 | (vmpyh (LoReg $Rs), (LoReg $Rt)))>; |
| 392 | |
| 393 | def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt), |
| 394 | (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))), |
| 395 | (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>; |
| 396 | |
| 397 | // Multiplies two v4i8 vectors. |
| 398 | def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), |
| 399 | (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>, |
| 400 | Requires<[HasV5T]>; |
| 401 | |
| 402 | def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), |
| 403 | (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>; |
| 404 | |
| 405 | // Multiplies two v8i8 vectors. |
| 406 | def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), |
| 407 | (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))), |
| 408 | (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>, |
| 409 | Requires<[HasV5T]>; |
| 410 | |
| 411 | def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), |
| 412 | (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))), |
| 413 | (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>; |
| 414 | |
| 415 | |
| 416 | class shuffler<SDNode Op, string Str> |
| 417 | : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c), |
| 418 | "$a = " # Str # "($b, $c)", |
| 419 | [(set (i64 DoubleRegs:$a), |
| 420 | (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))], |
| 421 | "", S_3op_tc_1_SLOT23>; |
| 422 | |
| 423 | def SDTHexagonBinOp64 : SDTypeProfile<1, 2, |
| 424 | [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>; |
| 425 | |
| 426 | def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>; |
| 427 | def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>; |
| 428 | def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>; |
| 429 | def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>; |
| 430 | |
| 431 | class ShufflePat<InstHexagon MI, SDNode Op> |
| 432 | : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)), |
| 433 | (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>; |
| 434 | |
| 435 | // Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b |
| 436 | def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>; |
| 437 | |
| 438 | // Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b |
| 439 | def: ShufflePat<S2_shuffob, HexagonSHUFFOB>; |
| 440 | |
| 441 | // Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h |
| 442 | def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>; |
| 443 | |
| 444 | // Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h |
| 445 | def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>; |
| 446 | |
| 447 | |
| 448 | // Truncated store from v4i16 to v4i8. |
| 449 | def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr), |
| 450 | (truncstore node:$val, node:$ptr), |
| 451 | [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>; |
| 452 | |
| 453 | // Truncated store from v2i32 to v2i16. |
| 454 | def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr), |
| 455 | (truncstore node:$val, node:$ptr), |
| 456 | [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>; |
| 457 | |
| 458 | def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt), |
| 459 | (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs), |
| 460 | (LoReg $Rs))))>; |
| 461 | |
| 462 | def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt), |
| 463 | (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>; |
| 464 | |
| 465 | |
| 466 | // Zero and sign extended load from v2i8 into v2i16. |
| 467 | def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), |
| 468 | [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; |
| 469 | |
| 470 | def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), |
| 471 | [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; |
| 472 | |
| 473 | def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)), |
| 474 | (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>; |
| 475 | |
| 476 | def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)), |
| 477 | (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>; |
| 478 | |
| 479 | def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)), |
| 480 | (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>; |
| 481 | |
| 482 | def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)), |
| 483 | (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>; |