Jia Liu | 31d157a | 2012-02-18 12:03:15 +0000 | [diff] [blame] | 1 | //===-- SPUMathInst.td - Cell SPU math operations ---------*- tablegen -*--===// |
Scott Michel | 02d711b | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 2 | // |
| 3 | // Cell SPU math operations |
| 4 | // |
| 5 | // This target description file contains instruction sequences for various |
| 6 | // math operations, such as vector multiplies, i32 multiply, etc., for the |
| 7 | // SPU's i32, i16 i8 and corresponding vector types. |
| 8 | // |
| 9 | // Any resemblance to libsimdmath or the Cell SDK simdmath library is |
| 10 | // purely and completely coincidental. |
Scott Michel | 02d711b | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 14 | // v16i8 multiply instruction sequence: |
| 15 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 16 | |
| 17 | def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)), |
| 18 | (ORv4i32 |
| 19 | (ANDv4i32 |
| 20 | (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB), |
| 21 | (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8), |
| 22 | (ROTMAHIv8i16 VECREG:$rB, 8)), 8), |
| 23 | (FSMBIv8i16 0x2222)), |
| 24 | (ILAv4i32 0x0000ffff)), |
| 25 | (SHLIv4i32 |
| 26 | (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16), |
| 27 | (ROTMAIv4i32_i32 VECREG:$rB, 16)), |
| 28 | (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8), |
| 29 | (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8), |
| 30 | (FSMBIv8i16 0x2222)), 16))>; |
| 31 | |
| 32 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 33 | // v8i16 multiply instruction sequence: |
| 34 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 35 | |
| 36 | def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), |
| 37 | (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB), |
| 38 | (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16), |
| 39 | (FSMBIv8i16 0xcccc))>; |
| 40 | |
| 41 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
Kalle Raiskila | 86a7912 | 2010-08-18 10:04:39 +0000 | [diff] [blame] | 42 | // v4i32, i32 multiply instruction sequence: |
Scott Michel | 02d711b | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 43 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 44 | |
| 45 | def MPYv4i32: |
| 46 | Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), |
| 47 | (Av4i32 |
Chris Lattner | dd6fbd1 | 2010-03-08 18:59:49 +0000 | [diff] [blame] | 48 | (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)), |
| 49 | (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))), |
| 50 | (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>; |
Scott Michel | 02d711b | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 51 | |
| 52 | def MPYi32: |
| 53 | Pat<(mul R32C:$rA, R32C:$rB), |
| 54 | (Ar32 |
| 55 | (Ar32 (MPYHr32 R32C:$rA, R32C:$rB), |
| 56 | (MPYHr32 R32C:$rB, R32C:$rA)), |
| 57 | (MPYUr32 R32C:$rA, R32C:$rB))>; |
| 58 | |
| 59 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 60 | // f32, v4f32 divide instruction sequence: |
| 61 | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| 62 | |
| 63 | // Reciprocal estimate and interpolation |
| 64 | def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>; |
| 65 | // Division estimate |
| 66 | def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>; |
| 67 | // Newton-Raphson iteration |
| 68 | def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA), |
Scott Michel | 19c10e6 | 2009-01-26 03:37:41 +0000 | [diff] [blame] | 69 | Interpf32.Fragment, |
| 70 | DivEstf32.Fragment)>; |
Scott Michel | 02d711b | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 71 | // Epsilon addition |
| 72 | def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>; |
| 73 | |
| 74 | def : Pat<(fdiv R32FP:$rA, R32FP:$rB), |
Scott Michel | 19c10e6 | 2009-01-26 03:37:41 +0000 | [diff] [blame] | 75 | (SELBf32_cond NRaphf32.Fragment, |
| 76 | Epsilonf32.Fragment, |
| 77 | (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>; |
Scott Michel | 02d711b | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 78 | |
| 79 | // Reciprocal estimate and interpolation |
| 80 | def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>; |
| 81 | // Division estimate |
| 82 | def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>; |
| 83 | // Newton-Raphson iteration |
| 84 | def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment, |
Scott Michel | 19c10e6 | 2009-01-26 03:37:41 +0000 | [diff] [blame] | 85 | (v4f32 VECREG:$rB), |
| 86 | (v4f32 VECREG:$rA)), |
| 87 | Interpv4f32.Fragment, |
| 88 | DivEstv4f32.Fragment)>; |
Scott Michel | 02d711b | 2008-12-30 23:28:25 +0000 | [diff] [blame] | 89 | // Epsilon addition |
| 90 | def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>; |
| 91 | |
| 92 | def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), |
Scott Michel | 19c10e6 | 2009-01-26 03:37:41 +0000 | [diff] [blame] | 93 | (SELBv4f32_cond NRaphv4f32.Fragment, |
| 94 | Epsilonv4f32.Fragment, |
| 95 | (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB), |
| 96 | Epsilonv4f32.Fragment, |
| 97 | (v4f32 VECREG:$rA)), -1))>; |