Scott Michel | 02d711b | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 1 | //======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======// |
| 2 | // |
| 3 | // Cell SPU math operations |
| 4 | // |
| 5 | // This target description file contains instruction sequences for various |
| 6 | // math operations, such as vector multiplies, i32 multiply, etc., for the |
| 7 | // SPU's i32, i16 i8 and corresponding vector types. |
| 8 | // |
| 9 | // Any resemblance to libsimdmath or the Cell SDK simdmath library is |
| 10 | // purely and completely coincidental. |
Scott Michel | 02d711b | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 14 | // v16i8 multiply instruction sequence: |
| 15 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 16 | |
| 17 | def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)), |
| 18 | (ORv4i32 |
| 19 | (ANDv4i32 |
| 20 | (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB), |
| 21 | (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8), |
| 22 | (ROTMAHIv8i16 VECREG:$rB, 8)), 8), |
| 23 | (FSMBIv8i16 0x2222)), |
| 24 | (ILAv4i32 0x0000ffff)), |
| 25 | (SHLIv4i32 |
| 26 | (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16), |
| 27 | (ROTMAIv4i32_i32 VECREG:$rB, 16)), |
| 28 | (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8), |
| 29 | (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8), |
| 30 | (FSMBIv8i16 0x2222)), 16))>; |
| 31 | |
| 32 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 33 | // v8i16 multiply instruction sequence: |
| 34 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 35 | |
| 36 | def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), |
| 37 | (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB), |
| 38 | (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16), |
| 39 | (FSMBIv8i16 0xcccc))>; |
| 40 | |
| 41 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
Kalle Raiskila | 82fe467 | 2010-08-02 08:54:39 +0000 | [diff] [blame] | 42 | // v4i32, v2i32, i32 multiply instruction sequence: |
Scott Michel | 02d711b | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 43 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 44 | |
| 45 | def MPYv4i32: |
| 46 | Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), |
| 47 | (Av4i32 |
Chris Lattner | dd6fbd1 | 2010-03-08 18:59:49 +0000 | [diff] [blame] | 48 | (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)), |
| 49 | (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))), |
| 50 | (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>; |
Scott Michel | 02d711b | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 51 | |
Kalle Raiskila | 82fe467 | 2010-08-02 08:54:39 +0000 | [diff] [blame] | 52 | def MPYv2i32: |
| 53 | Pat<(mul (v2i32 VECREG:$rA), (v2i32 VECREG:$rB)), |
| 54 | (Av2i32 |
| 55 | (v2i32 (Av2i32 (v2i32 (MPYHv2i32 VECREG:$rA, VECREG:$rB)), |
| 56 | (v2i32 (MPYHv2i32 VECREG:$rB, VECREG:$rA)))), |
| 57 | (v2i32 (MPYUv2i32 VECREG:$rA, VECREG:$rB)))>; |
| 58 | |
| 59 | |
Scott Michel | 02d711b | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 60 | def MPYi32: |
| 61 | Pat<(mul R32C:$rA, R32C:$rB), |
| 62 | (Ar32 |
| 63 | (Ar32 (MPYHr32 R32C:$rA, R32C:$rB), |
| 64 | (MPYHr32 R32C:$rB, R32C:$rA)), |
| 65 | (MPYUr32 R32C:$rA, R32C:$rB))>; |
| 66 | |
| 67 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 68 | // f32, v4f32 divide instruction sequence: |
| 69 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 70 | |
| 71 | // Reciprocal estimate and interpolation |
| 72 | def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>; |
| 73 | // Division estimate |
| 74 | def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>; |
| 75 | // Newton-Raphson iteration |
| 76 | def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA), |
Scott Michel | 19c10e6 | 2009-01-26 03:37:41 +0000 | [diff] [blame] | 77 | Interpf32.Fragment, |
| 78 | DivEstf32.Fragment)>; |
Scott Michel | 02d711b | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 79 | // Epsilon addition |
| 80 | def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>; |
| 81 | |
| 82 | def : Pat<(fdiv R32FP:$rA, R32FP:$rB), |
Scott Michel | 19c10e6 | 2009-01-26 03:37:41 +0000 | [diff] [blame] | 83 | (SELBf32_cond NRaphf32.Fragment, |
| 84 | Epsilonf32.Fragment, |
| 85 | (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>; |
Scott Michel | 02d711b | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 86 | |
| 87 | // Reciprocal estimate and interpolation |
| 88 | def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>; |
| 89 | // Division estimate |
| 90 | def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>; |
| 91 | // Newton-Raphson iteration |
| 92 | def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment, |
Scott Michel | 19c10e6 | 2009-01-26 03:37:41 +0000 | [diff] [blame] | 93 | (v4f32 VECREG:$rB), |
| 94 | (v4f32 VECREG:$rA)), |
| 95 | Interpv4f32.Fragment, |
| 96 | DivEstv4f32.Fragment)>; |
Scott Michel | 02d711b | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 97 | // Epsilon addition |
| 98 | def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>; |
| 99 | |
| 100 | def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), |
Scott Michel | 19c10e6 | 2009-01-26 03:37:41 +0000 | [diff] [blame] | 101 | (SELBv4f32_cond NRaphv4f32.Fragment, |
| 102 | Epsilonv4f32.Fragment, |
| 103 | (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB), |
| 104 | Epsilonv4f32.Fragment, |
| 105 | (v4f32 VECREG:$rA)), -1))>; |