blob: 38279a0a9f81b19c3c8d62581c886a845740c7ef [file] [log] [blame]
Scott Michel02d711b2008-12-30 23:28:25 +00001//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
2//
3// Cell SPU math operations
4//
5// This target description file contains instruction sequences for various
6// math operations, such as vector multiplies, i32 multiply, etc., for the
7// SPU's i32, i16 i8 and corresponding vector types.
8//
9// Any resemblance to libsimdmath or the Cell SDK simdmath library is
10// purely and completely coincidental.
11//
12// Primary author: Scott Michel (scottm@aero.org)
13//===----------------------------------------------------------------------===//
14
15//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
16// v16i8 multiply instruction sequence:
17//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
18
19def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
20 (ORv4i32
21 (ANDv4i32
22 (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
23 (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
24 (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
25 (FSMBIv8i16 0x2222)),
26 (ILAv4i32 0x0000ffff)),
27 (SHLIv4i32
28 (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
29 (ROTMAIv4i32_i32 VECREG:$rB, 16)),
30 (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
31 (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
32 (FSMBIv8i16 0x2222)), 16))>;
33
34//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
35// v8i16 multiply instruction sequence:
36//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
37
38def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
39 (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
40 (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
41 (FSMBIv8i16 0xcccc))>;
42
43//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
44// v4i32, i32 multiply instruction sequence:
45//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
46
47def MPYv4i32:
48 Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
49 (Av4i32
50 (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB),
51 (MPYHv4i32 VECREG:$rB, VECREG:$rA)),
52 (MPYUv4i32 VECREG:$rA, VECREG:$rB))>;
53
54def MPYi32:
55 Pat<(mul R32C:$rA, R32C:$rB),
56 (Ar32
57 (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
58 (MPYHr32 R32C:$rB, R32C:$rA)),
59 (MPYUr32 R32C:$rA, R32C:$rB))>;
60
61//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
62// f32, v4f32 divide instruction sequence:
63//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
64
65// Reciprocal estimate and interpolation
66def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
67// Division estimate
68def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
69// Newton-Raphson iteration
70def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
71 Interpf32.Fragment,
72 DivEstf32.Fragment)>;
73// Epsilon addition
74def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
75
76def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
77 (SELBf32_cond NRaphf32.Fragment,
78 Epsilonf32.Fragment,
79 (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
80
81// Reciprocal estimate and interpolation
82def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
83// Division estimate
84def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
85// Newton-Raphson iteration
86def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
87 (v4f32 VECREG:$rB),
88 (v4f32 VECREG:$rA)),
89 Interpv4f32.Fragment,
90 DivEstv4f32.Fragment)>;
91// Epsilon addition
92def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
93
94def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
95 (SELBv4f32_cond NRaphv4f32.Fragment,
96 Epsilonv4f32.Fragment,
97 (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
98 Epsilonv4f32.Fragment,
99 (v4f32 VECREG:$rA)), -1))>;