Scott Michel | 4d07fb7 | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 1 | //====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====// |
| 2 | // |
| 3 | // Cell SPU 64-bit operations |
| 4 | // |
Scott Michel | 4d07fb7 | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 5 | //===----------------------------------------------------------------------===// |
| 6 | |
Scott Michel | 06eabde | 2008-12-27 04:51:36 +0000 | [diff] [blame] | 7 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 8 | // 64-bit comparisons: |
| 9 | // |
| 10 | // 1. The instruction sequences for vector vice scalar differ by a |
Scott Michel | 4d07fb7 | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 11 | // constant. In the scalar case, we're only interested in the |
| 12 | // top two 32-bit slots, whereas we're interested in an exact |
| 13 | // all-four-slot match in the vector case. |
Scott Michel | 06eabde | 2008-12-27 04:51:36 +0000 | [diff] [blame] | 14 | // |
| 15 | // 2. There are no "immediate" forms, since loading 64-bit constants |
| 16 | // could be a constant pool load. |
| 17 | // |
| 18 | // 3. i64 setcc results are i32, which are subsequently converted to a FSM |
| 19 | // mask when used in a select pattern. |
| 20 | // |
Scott Michel | 4d07fb7 | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 21 | // 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO) |
| 22 | // [Note: this may be moot, since gb produces v4i32 or r32.] |
Scott Michel | 06eabde | 2008-12-27 04:51:36 +0000 | [diff] [blame] | 23 | // |
Scott Michel | e0168c1 | 2009-01-05 01:34:35 +0000 | [diff] [blame] | 24 | // 5. The code sequences for r64 and v2i64 are probably overly conservative, |
| 25 | // compared to the code that gcc produces. |
| 26 | // |
Scott Michel | 4d07fb7 | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 27 | // M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!) |
Scott Michel | 06eabde | 2008-12-27 04:51:36 +0000 | [diff] [blame] | 28 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 29 | |
| 30 | // selb instruction definition for i64. Note that the selection mask is |
| 31 | // a vector, produced by various forms of FSM: |
| 32 | def SELBr64_cond: |
pingbak | 2f387e8 | 2009-01-26 03:31:40 +0000 | [diff] [blame] | 33 | SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), |
| 34 | [/* no pattern */]>; |
Scott Michel | 06eabde | 2008-12-27 04:51:36 +0000 | [diff] [blame] | 35 | |
Scott Michel | 8c67fa4 | 2009-01-21 04:58:48 +0000 | [diff] [blame] | 36 | // The generic i64 select pattern, which assumes that the comparison result |
| 37 | // is in a 32-bit register that contains a select mask pattern (i.e., gather |
| 38 | // bits result): |
| 39 | |
| 40 | def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue), |
| 41 | (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>; |
| 42 | |
Scott Michel | 4d07fb7 | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 43 | // select the negative condition: |
| 44 | class I64SELECTNegCond<PatFrag cond, CodeFrag compare>: |
Scott Michel | 06eabde | 2008-12-27 04:51:36 +0000 | [diff] [blame] | 45 | Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse), |
Scott Michel | 4d07fb7 | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 46 | (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>; |
Scott Michel | 06eabde | 2008-12-27 04:51:36 +0000 | [diff] [blame] | 47 | |
Scott Michel | 4d07fb7 | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 48 | // setcc the negative condition: |
| 49 | class I64SETCCNegCond<PatFrag cond, CodeFrag compare>: |
Scott Michel | 06eabde | 2008-12-27 04:51:36 +0000 | [diff] [blame] | 50 | Pat<(cond R64C:$rA, R64C:$rB), |
Scott Michel | 4d07fb7 | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 51 | (XORIr32 compare.Fragment, -1)>; |
Scott Michel | 06eabde | 2008-12-27 04:51:36 +0000 | [diff] [blame] | 52 | |
Scott Michel | e0168c1 | 2009-01-05 01:34:35 +0000 | [diff] [blame] | 53 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
Scott Michel | 06eabde | 2008-12-27 04:51:36 +0000 | [diff] [blame] | 54 | // The i64 seteq fragment that does the scalar->vector conversion and |
| 55 | // comparison: |
| 56 | def CEQr64compare: |
| 57 | CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA), |
Scott Michel | e0168c1 | 2009-01-05 01:34:35 +0000 | [diff] [blame] | 58 | (ORv2i64_i64 R64C:$rB))), 0xb)>; |
Scott Michel | 06eabde | 2008-12-27 04:51:36 +0000 | [diff] [blame] | 59 | |
| 60 | // The i64 seteq fragment that does the vector comparison |
| 61 | def CEQv2i64compare: |
Scott Michel | e0168c1 | 2009-01-05 01:34:35 +0000 | [diff] [blame] | 62 | CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>; |
Scott Michel | 06eabde | 2008-12-27 04:51:36 +0000 | [diff] [blame] | 63 | |
| 64 | // i64 seteq (equality): the setcc result is i32, which is converted to a |
| 65 | // vector FSM mask when used in a select pattern. |
| 66 | // |
| 67 | // v2i64 seteq (equality): the setcc result is v4i32 |
| 68 | multiclass CompareEqual64 { |
| 69 | // Plain old comparison, converts back to i32 scalar |
| 70 | def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>; |
| 71 | def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>; |
| 72 | |
| 73 | // SELB mask from FSM: |
| 74 | def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>; |
| 75 | def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>; |
| 76 | } |
| 77 | |
| 78 | defm I64EQ: CompareEqual64; |
| 79 | |
| 80 | def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>; |
Scott Michel | 4d07fb7 | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 81 | def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>; |
Scott Michel | 06eabde | 2008-12-27 04:51:36 +0000 | [diff] [blame] | 82 | |
Scott Michel | 4d07fb7 | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 83 | // i64 setne: |
Scott Michel | 06eabde | 2008-12-27 04:51:36 +0000 | [diff] [blame] | 84 | def : I64SETCCNegCond<setne, I64EQr64>; |
Scott Michel | 4d07fb7 | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 85 | def : I64SELECTNegCond<setne, I64EQr64>; |
Scott Michel | 06eabde | 2008-12-27 04:51:36 +0000 | [diff] [blame] | 86 | |
Scott Michel | e0168c1 | 2009-01-05 01:34:35 +0000 | [diff] [blame] | 87 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
Scott Michel | 0e2532a | 2009-01-05 04:05:53 +0000 | [diff] [blame] | 88 | // i64 setugt/setule: |
Scott Michel | e0168c1 | 2009-01-05 01:34:35 +0000 | [diff] [blame] | 89 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 90 | |
| 91 | def CLGTr64ugt: |
| 92 | CodeFrag<(CLGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>; |
| 93 | |
| 94 | def CLGTr64eq: |
| 95 | CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>; |
| 96 | |
| 97 | def CLGTr64compare: |
| 98 | CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment, |
| 99 | (XSWDv2i64 CLGTr64ugt.Fragment), |
| 100 | CLGTr64eq.Fragment)>; |
| 101 | |
| 102 | def CLGTv2i64ugt: |
| 103 | CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>; |
| 104 | |
| 105 | def CLGTv2i64eq: |
| 106 | CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>; |
| 107 | |
| 108 | def CLGTv2i64compare: |
| 109 | CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment, |
| 110 | (XSWDv2i64 CLGTr64ugt.Fragment), |
| 111 | CLGTv2i64eq.Fragment)>; |
| 112 | |
| 113 | multiclass CompareLogicalGreaterThan64 { |
| 114 | // Plain old comparison, converts back to i32 scalar |
| 115 | def r64: CodeFrag<(ORi32_v4i32 CLGTr64compare.Fragment)>; |
| 116 | def v2i64: CodeFrag<CLGTv2i64compare.Fragment>; |
| 117 | |
| 118 | // SELB mask from FSM: |
| 119 | def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTr64compare.Fragment))>; |
| 120 | def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTv2i64compare.Fragment))>; |
| 121 | } |
| 122 | |
| 123 | defm I64LGT: CompareLogicalGreaterThan64; |
| 124 | |
| 125 | def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>; |
| 126 | def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), |
| 127 | I64LGTv2i64.Fragment>; |
| 128 | |
| 129 | // i64 setult: |
| 130 | def : I64SETCCNegCond<setule, I64LGTr64>; |
| 131 | def : I64SELECTNegCond<setule, I64LGTr64>; |
Scott Michel | 0e2532a | 2009-01-05 04:05:53 +0000 | [diff] [blame] | 132 | |
| 133 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 134 | // i64 setuge/setult: |
| 135 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 136 | |
| 137 | def CLGEr64compare: |
| 138 | CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment, |
| 139 | CLGTr64eq.Fragment)), 0xb)>; |
| 140 | |
| 141 | def CLGEv2i64compare: |
| 142 | CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment, |
| 143 | CLGTv2i64eq.Fragment)), 0xf)>; |
| 144 | |
| 145 | multiclass CompareLogicalGreaterEqual64 { |
| 146 | // Plain old comparison, converts back to i32 scalar |
| 147 | def r64: CodeFrag<(ORi32_v4i32 CLGEr64compare.Fragment)>; |
| 148 | def v2i64: CodeFrag<CLGEv2i64compare.Fragment>; |
| 149 | |
| 150 | // SELB mask from FSM: |
| 151 | def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEr64compare.Fragment))>; |
| 152 | def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEv2i64compare.Fragment))>; |
| 153 | } |
| 154 | |
| 155 | defm I64LGE: CompareLogicalGreaterEqual64; |
| 156 | |
| 157 | def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>; |
| 158 | def : Pat<(setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), |
| 159 | I64LGEv2i64.Fragment>; |
| 160 | |
| 161 | // i64 setult: |
| 162 | def : I64SETCCNegCond<setult, I64LGEr64>; |
| 163 | def : I64SELECTNegCond<setult, I64LGEr64>; |
| 164 | |
| 165 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 166 | // i64 setgt/setle: |
| 167 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 168 | |
| 169 | def CGTr64sgt: |
| 170 | CodeFrag<(CGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>; |
| 171 | |
| 172 | def CGTr64eq: |
| 173 | CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>; |
| 174 | |
| 175 | def CGTr64compare: |
| 176 | CodeFrag<(SELBv2i64 CGTr64sgt.Fragment, |
| 177 | (XSWDv2i64 CGTr64sgt.Fragment), |
| 178 | CGTr64eq.Fragment)>; |
| 179 | |
| 180 | def CGTv2i64sgt: |
| 181 | CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>; |
| 182 | |
| 183 | def CGTv2i64eq: |
| 184 | CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>; |
| 185 | |
| 186 | def CGTv2i64compare: |
| 187 | CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment, |
| 188 | (XSWDv2i64 CGTr64sgt.Fragment), |
| 189 | CGTv2i64eq.Fragment)>; |
| 190 | |
| 191 | multiclass CompareGreaterThan64 { |
| 192 | // Plain old comparison, converts back to i32 scalar |
| 193 | def r64: CodeFrag<(ORi32_v4i32 CGTr64compare.Fragment)>; |
| 194 | def v2i64: CodeFrag<CGTv2i64compare.Fragment>; |
| 195 | |
| 196 | // SELB mask from FSM: |
| 197 | def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTr64compare.Fragment))>; |
| 198 | def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTv2i64compare.Fragment))>; |
| 199 | } |
| 200 | |
| 201 | defm I64GT: CompareLogicalGreaterThan64; |
| 202 | |
| 203 | def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>; |
| 204 | def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), |
| 205 | I64GTv2i64.Fragment>; |
| 206 | |
| 207 | // i64 setult: |
| 208 | def : I64SETCCNegCond<setle, I64GTr64>; |
| 209 | def : I64SELECTNegCond<setle, I64GTr64>; |
| 210 | |
| 211 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 212 | // i64 setge/setlt: |
| 213 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 214 | |
| 215 | def CGEr64compare: |
| 216 | CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment, |
| 217 | CGTr64eq.Fragment)), 0xb)>; |
| 218 | |
| 219 | def CGEv2i64compare: |
| 220 | CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment, |
| 221 | CGTv2i64eq.Fragment)), 0xf)>; |
| 222 | |
| 223 | multiclass CompareGreaterEqual64 { |
| 224 | // Plain old comparison, converts back to i32 scalar |
| 225 | def r64: CodeFrag<(ORi32_v4i32 CGEr64compare.Fragment)>; |
| 226 | def v2i64: CodeFrag<CGEv2i64compare.Fragment>; |
| 227 | |
| 228 | // SELB mask from FSM: |
| 229 | def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEr64compare.Fragment))>; |
| 230 | def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEv2i64compare.Fragment))>; |
| 231 | } |
| 232 | |
| 233 | defm I64GE: CompareGreaterEqual64; |
| 234 | |
| 235 | def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>; |
| 236 | def : Pat<(setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), |
| 237 | I64GEv2i64.Fragment>; |
| 238 | |
| 239 | // i64 setult: |
| 240 | def : I64SETCCNegCond<setlt, I64GEr64>; |
| 241 | def : I64SELECTNegCond<setlt, I64GEr64>; |
Scott Michel | 750b93f | 2009-01-15 04:41:47 +0000 | [diff] [blame] | 242 | |
| 243 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 244 | // v2i64, i64 add |
| 245 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 246 | |
| 247 | class v2i64_add_cg<dag lhs, dag rhs>: |
| 248 | CodeFrag<(CGv4i32 lhs, rhs)>; |
| 249 | |
| 250 | class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>: |
| 251 | CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>; |
| 252 | |
| 253 | class v2i64_add<dag lhs, dag rhs, dag cg_mask>: |
| 254 | v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>; |
| 255 | |
| 256 | def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), |
| 257 | (ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA), |
| 258 | (ORv2i64_i64 R64C:$rB), |
| 259 | (v4i32 VECREG:$rCGmask)>.Fragment)>; |
| 260 | |
| 261 | def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), |
| 262 | (v4i32 VECREG:$rCGmask)), |
| 263 | v2i64_add<(v2i64 VECREG:$rA), |
| 264 | (v2i64 VECREG:$rB), |
| 265 | (v4i32 VECREG:$rCGmask)>.Fragment>; |
| 266 | |
| 267 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 268 | // v2i64, i64 subtraction |
| 269 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 270 | |
| 271 | class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>; |
| 272 | |
| 273 | class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>: |
| 274 | CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>; |
| 275 | |
| 276 | def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), |
| 277 | (ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA), |
| 278 | (ORv2i64_i64 R64C:$rB), |
| 279 | v2i64_sub_bg<(ORv2i64_i64 R64C:$rA), |
| 280 | (ORv2i64_i64 R64C:$rB)>.Fragment, |
| 281 | (v4i32 VECREG:$rCGmask)>.Fragment)>; |
| 282 | |
| 283 | def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), |
| 284 | (v4i32 VECREG:$rCGmask)), |
| 285 | v2i64_sub<(v2i64 VECREG:$rA), |
| 286 | (v2i64 VECREG:$rB), |
| 287 | v2i64_sub_bg<(v2i64 VECREG:$rA), |
| 288 | (v2i64 VECREG:$rB)>.Fragment, |
| 289 | (v4i32 VECREG:$rCGmask)>.Fragment>; |
| 290 | |
| 291 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 292 | // v2i64, i64 multiply |
| 293 | // |
| 294 | // Note: i64 multiply is simply the vector->scalar conversion of the |
| 295 | // full-on v2i64 multiply, since the entire vector has to be manipulated |
| 296 | // anyway. |
| 297 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 298 | |
| 299 | class v2i64_mul_ahi64<dag rA> : |
| 300 | CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; |
| 301 | |
| 302 | class v2i64_mul_bhi64<dag rB> : |
| 303 | CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; |
| 304 | |
| 305 | class v2i64_mul_alo64<dag rB> : |
| 306 | CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; |
| 307 | |
| 308 | class v2i64_mul_blo64<dag rB> : |
| 309 | CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; |
| 310 | |
| 311 | class v2i64_mul_ashlq2<dag rA>: |
| 312 | CodeFrag<(SHLQBYIv4i32 rA, 0x2)>; |
| 313 | |
| 314 | class v2i64_mul_ashlq4<dag rA>: |
| 315 | CodeFrag<(SHLQBYIv4i32 rA, 0x4)>; |
| 316 | |
| 317 | class v2i64_mul_bshlq2<dag rB> : |
| 318 | CodeFrag<(SHLQBYIv4i32 rB, 0x2)>; |
| 319 | |
| 320 | class v2i64_mul_bshlq4<dag rB> : |
| 321 | CodeFrag<(SHLQBYIv4i32 rB, 0x4)>; |
| 322 | |
| 323 | class v2i64_highprod<dag rA, dag rB>: |
| 324 | CodeFrag<(Av4i32 |
| 325 | (Av4i32 |
| 326 | (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment, // a1 x b3 |
| 327 | v2i64_mul_ahi64<rA>.Fragment), |
| 328 | (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment, // a0 x b3 |
| 329 | v2i64_mul_bshlq4<rB>.Fragment)), |
| 330 | (Av4i32 |
| 331 | (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment, |
| 332 | v2i64_mul_ashlq4<rA>.Fragment), |
| 333 | (Av4i32 |
Scott Michel | 8c67fa4 | 2009-01-21 04:58:48 +0000 | [diff] [blame] | 334 | (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment, |
| 335 | v2i64_mul_bhi64<rB>.Fragment), |
Scott Michel | 750b93f | 2009-01-15 04:41:47 +0000 | [diff] [blame] | 336 | (Av4i32 |
| 337 | (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment, |
| 338 | v2i64_mul_bhi64<rB>.Fragment), |
| 339 | (Av4i32 |
| 340 | (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment, |
| 341 | v2i64_mul_bshlq2<rB>.Fragment), |
| 342 | (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment, |
| 343 | v2i64_mul_bshlq2<rB>.Fragment))))))>; |
| 344 | |
| 345 | class v2i64_mul_a3_b3<dag rA, dag rB>: |
| 346 | CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment, |
| 347 | v2i64_mul_blo64<rB>.Fragment)>; |
| 348 | |
| 349 | class v2i64_mul_a2_b3<dag rA, dag rB>: |
| 350 | CodeFrag<(SELBv4i32 (SHLQBYIv4i32 |
| 351 | (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment, |
| 352 | v2i64_mul_bshlq2<rB>.Fragment), 0x2), |
| 353 | (ILv4i32 0), |
| 354 | (FSMBIv4i32 0xc3c3))>; |
| 355 | |
| 356 | class v2i64_mul_a3_b2<dag rA, dag rB>: |
| 357 | CodeFrag<(SELBv4i32 (SHLQBYIv4i32 |
| 358 | (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment, |
| 359 | v2i64_mul_ashlq2<rA>.Fragment), 0x2), |
| 360 | (ILv4i32 0), |
| 361 | (FSMBIv4i32 0xc3c3))>; |
| 362 | |
| 363 | class v2i64_lowsum<dag rA, dag rB, dag rCGmask>: |
| 364 | v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment, |
| 365 | v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment, |
| 366 | v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>; |
| 367 | |
| 368 | class v2i64_mul<dag rA, dag rB, dag rCGmask>: |
| 369 | v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment, |
| 370 | (SELBv4i32 v2i64_highprod<rA, rB>.Fragment, |
| 371 | (ILv4i32 0), |
| 372 | (FSMBIv4i32 0x0f0f)), |
| 373 | rCGmask>; |
| 374 | |
| 375 | def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), |
| 376 | (ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA), |
| 377 | (ORv2i64_i64 R64C:$rB), |
| 378 | (v4i32 VECREG:$rCGmask)>.Fragment)>; |
| 379 | |
| 380 | def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), |
| 381 | (v4i32 VECREG:$rCGmask)), |
| 382 | v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB), |
| 383 | (v4i32 VECREG:$rCGmask)>.Fragment>; |
Scott Michel | 8c67fa4 | 2009-01-21 04:58:48 +0000 | [diff] [blame] | 384 | |
| 385 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 386 | // f64 comparisons |
| 387 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 388 | |
| 389 | // selb instruction definition for i64. Note that the selection mask is |
| 390 | // a vector, produced by various forms of FSM: |
| 391 | def SELBf64_cond: |
| 392 | SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC), |
| 393 | [(set R64FP:$rT, |
| 394 | (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>; |