blob: 7205593b040854b1d31b7c5b67439c1344d4ba51 [file] [log] [blame]
Scott Michel02d711b2008-12-30 23:28:25 +00001//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
2//
3// Cell SPU math operations
4//
5// This target description file contains instruction sequences for various
6// math operations, such as vector multiplies, i32 multiply, etc., for the
7// SPU's i32, i16 i8 and corresponding vector types.
8//
9// Any resemblance to libsimdmath or the Cell SDK simdmath library is
10// purely and completely coincidental.
Scott Michel02d711b2008-12-30 23:28:25 +000011//===----------------------------------------------------------------------===//
12
13//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
14// v16i8 multiply instruction sequence:
15//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
16
17def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
18 (ORv4i32
19 (ANDv4i32
20 (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
21 (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
22 (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
23 (FSMBIv8i16 0x2222)),
24 (ILAv4i32 0x0000ffff)),
25 (SHLIv4i32
26 (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
27 (ROTMAIv4i32_i32 VECREG:$rB, 16)),
28 (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
29 (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
30 (FSMBIv8i16 0x2222)), 16))>;
31
32//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
33// v8i16 multiply instruction sequence:
34//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
35
36def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
37 (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
38 (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
39 (FSMBIv8i16 0xcccc))>;
40
41//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
Kalle Raiskila82fe4672010-08-02 08:54:39 +000042// v4i32, v2i32, i32 multiply instruction sequence:
Scott Michel02d711b2008-12-30 23:28:25 +000043//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
44
45def MPYv4i32:
46 Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
47 (Av4i32
Chris Lattnerdd6fbd12010-03-08 18:59:49 +000048 (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
49 (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
50 (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
Scott Michel02d711b2008-12-30 23:28:25 +000051
Kalle Raiskila82fe4672010-08-02 08:54:39 +000052def MPYv2i32:
53 Pat<(mul (v2i32 VECREG:$rA), (v2i32 VECREG:$rB)),
54 (Av2i32
55 (v2i32 (Av2i32 (v2i32 (MPYHv2i32 VECREG:$rA, VECREG:$rB)),
56 (v2i32 (MPYHv2i32 VECREG:$rB, VECREG:$rA)))),
57 (v2i32 (MPYUv2i32 VECREG:$rA, VECREG:$rB)))>;
58
59
Scott Michel02d711b2008-12-30 23:28:25 +000060def MPYi32:
61 Pat<(mul R32C:$rA, R32C:$rB),
62 (Ar32
63 (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
64 (MPYHr32 R32C:$rB, R32C:$rA)),
65 (MPYUr32 R32C:$rA, R32C:$rB))>;
66
67//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
68// f32, v4f32 divide instruction sequence:
69//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
70
71// Reciprocal estimate and interpolation
72def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
73// Division estimate
74def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
75// Newton-Raphson iteration
76def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
Scott Michel19c10e62009-01-26 03:37:41 +000077 Interpf32.Fragment,
78 DivEstf32.Fragment)>;
Scott Michel02d711b2008-12-30 23:28:25 +000079// Epsilon addition
80def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
81
82def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
Scott Michel19c10e62009-01-26 03:37:41 +000083 (SELBf32_cond NRaphf32.Fragment,
84 Epsilonf32.Fragment,
85 (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
Scott Michel02d711b2008-12-30 23:28:25 +000086
87// Reciprocal estimate and interpolation
88def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
89// Division estimate
90def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
91// Newton-Raphson iteration
92def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
Scott Michel19c10e62009-01-26 03:37:41 +000093 (v4f32 VECREG:$rB),
94 (v4f32 VECREG:$rA)),
95 Interpv4f32.Fragment,
96 DivEstv4f32.Fragment)>;
Scott Michel02d711b2008-12-30 23:28:25 +000097// Epsilon addition
98def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
99
100def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
Scott Michel19c10e62009-01-26 03:37:41 +0000101 (SELBv4f32_cond NRaphv4f32.Fragment,
102 Epsilonv4f32.Fragment,
103 (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
104 Epsilonv4f32.Fragment,
105 (v4f32 VECREG:$rA)), -1))>;