Scott Michel | 02d711b | 2008-12-30 23:28:25 +0000 | [diff] [blame^] | 1 | //======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======// |
| 2 | // |
| 3 | // Cell SPU math operations |
| 4 | // |
| 5 | // This target description file contains instruction sequences for various |
| 6 | // math operations, such as vector multiplies, i32 multiply, etc., for the |
| 7 | // SPU's i32, i16 i8 and corresponding vector types. |
| 8 | // |
| 9 | // Any resemblance to libsimdmath or the Cell SDK simdmath library is |
| 10 | // purely and completely coincidental. |
| 11 | // |
| 12 | // Primary author: Scott Michel (scottm@aero.org) |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 16 | // v16i8 multiply instruction sequence: |
| 17 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 18 | |
| 19 | def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)), |
| 20 | (ORv4i32 |
| 21 | (ANDv4i32 |
| 22 | (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB), |
| 23 | (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8), |
| 24 | (ROTMAHIv8i16 VECREG:$rB, 8)), 8), |
| 25 | (FSMBIv8i16 0x2222)), |
| 26 | (ILAv4i32 0x0000ffff)), |
| 27 | (SHLIv4i32 |
| 28 | (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16), |
| 29 | (ROTMAIv4i32_i32 VECREG:$rB, 16)), |
| 30 | (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8), |
| 31 | (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8), |
| 32 | (FSMBIv8i16 0x2222)), 16))>; |
| 33 | |
| 34 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 35 | // v8i16 multiply instruction sequence: |
| 36 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 37 | |
| 38 | def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), |
| 39 | (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB), |
| 40 | (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16), |
| 41 | (FSMBIv8i16 0xcccc))>; |
| 42 | |
| 43 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 44 | // v4i32, i32 multiply instruction sequence: |
| 45 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 46 | |
| 47 | def MPYv4i32: |
| 48 | Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), |
| 49 | (Av4i32 |
| 50 | (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB), |
| 51 | (MPYHv4i32 VECREG:$rB, VECREG:$rA)), |
| 52 | (MPYUv4i32 VECREG:$rA, VECREG:$rB))>; |
| 53 | |
| 54 | def MPYi32: |
| 55 | Pat<(mul R32C:$rA, R32C:$rB), |
| 56 | (Ar32 |
| 57 | (Ar32 (MPYHr32 R32C:$rA, R32C:$rB), |
| 58 | (MPYHr32 R32C:$rB, R32C:$rA)), |
| 59 | (MPYUr32 R32C:$rA, R32C:$rB))>; |
| 60 | |
| 61 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 62 | // f32, v4f32 divide instruction sequence: |
| 63 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 64 | |
| 65 | // Reciprocal estimate and interpolation |
| 66 | def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>; |
| 67 | // Division estimate |
| 68 | def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>; |
| 69 | // Newton-Raphson iteration |
| 70 | def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA), |
| 71 | Interpf32.Fragment, |
| 72 | DivEstf32.Fragment)>; |
| 73 | // Epsilon addition |
| 74 | def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>; |
| 75 | |
| 76 | def : Pat<(fdiv R32FP:$rA, R32FP:$rB), |
| 77 | (SELBf32_cond NRaphf32.Fragment, |
| 78 | Epsilonf32.Fragment, |
| 79 | (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>; |
| 80 | |
| 81 | // Reciprocal estimate and interpolation |
| 82 | def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>; |
| 83 | // Division estimate |
| 84 | def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>; |
| 85 | // Newton-Raphson iteration |
| 86 | def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment, |
| 87 | (v4f32 VECREG:$rB), |
| 88 | (v4f32 VECREG:$rA)), |
| 89 | Interpv4f32.Fragment, |
| 90 | DivEstv4f32.Fragment)>; |
| 91 | // Epsilon addition |
| 92 | def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>; |
| 93 | |
| 94 | def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), |
| 95 | (SELBv4f32_cond NRaphv4f32.Fragment, |
| 96 | Epsilonv4f32.Fragment, |
| 97 | (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB), |
| 98 | Epsilonv4f32.Fragment, |
| 99 | (v4f32 VECREG:$rA)), -1))>; |