blob: 814d61e223825d6f8cbd3bfeb4cba379f69c34ab [file] [log] [blame]
Simon Pilgrim63892402017-05-19 17:19:26 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_256 %s
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma,+avx512f | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_512 %s
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma4 | FileCheck -check-prefix=FMA4 %s
5
6; This test checks the fusing of MUL + SUB/ADD to FMSUBADD.
7
8define <2 x double> @mul_subadd_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 {
9; FMA3_256-LABEL: mul_subadd_pd128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000010; FMA3_256: # %bb.0: # %entry
Simon Pilgrim63892402017-05-19 17:19:26 +000011; FMA3_256-NEXT: vmulpd %xmm1, %xmm0, %xmm0
12; FMA3_256-NEXT: vsubpd %xmm2, %xmm0, %xmm1
13; FMA3_256-NEXT: vaddpd %xmm2, %xmm0, %xmm0
14; FMA3_256-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
15; FMA3_256-NEXT: retq
16;
17; FMA3_512-LABEL: mul_subadd_pd128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000018; FMA3_512: # %bb.0: # %entry
Simon Pilgrim63892402017-05-19 17:19:26 +000019; FMA3_512-NEXT: vmulpd %xmm1, %xmm0, %xmm0
20; FMA3_512-NEXT: vsubpd %xmm2, %xmm0, %xmm1
21; FMA3_512-NEXT: vaddpd %xmm2, %xmm0, %xmm0
22; FMA3_512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
23; FMA3_512-NEXT: retq
24;
25; FMA4-LABEL: mul_subadd_pd128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000026; FMA4: # %bb.0: # %entry
Simon Pilgrim63892402017-05-19 17:19:26 +000027; FMA4-NEXT: vmulpd %xmm1, %xmm0, %xmm0
28; FMA4-NEXT: vsubpd %xmm2, %xmm0, %xmm1
29; FMA4-NEXT: vaddpd %xmm2, %xmm0, %xmm0
30; FMA4-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
31; FMA4-NEXT: retq
32entry:
33 %AB = fmul <2 x double> %A, %B
34 %Sub = fsub <2 x double> %AB, %C
35 %Add = fadd <2 x double> %AB, %C
36 %subadd = shufflevector <2 x double> %Add, <2 x double> %Sub, <2 x i32> <i32 0, i32 3>
37 ret <2 x double> %subadd
38}
39
40define <4 x float> @mul_subadd_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) #0 {
41; FMA3-LABEL: mul_subadd_ps128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000042; FMA3: # %bb.0: # %entry
Simon Pilgrim63892402017-05-19 17:19:26 +000043; FMA3-NEXT: vmulps %xmm1, %xmm0, %xmm0
44; FMA3-NEXT: vsubps %xmm2, %xmm0, %xmm1
45; FMA3-NEXT: vaddps %xmm2, %xmm0, %xmm0
46; FMA3-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
47; FMA3-NEXT: retq
48;
49; FMA4-LABEL: mul_subadd_ps128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000050; FMA4: # %bb.0: # %entry
Simon Pilgrim63892402017-05-19 17:19:26 +000051; FMA4-NEXT: vmulps %xmm1, %xmm0, %xmm0
52; FMA4-NEXT: vsubps %xmm2, %xmm0, %xmm1
53; FMA4-NEXT: vaddps %xmm2, %xmm0, %xmm0
54; FMA4-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
55; FMA4-NEXT: retq
56entry:
57 %AB = fmul <4 x float> %A, %B
58 %Sub = fsub <4 x float> %AB, %C
59 %Add = fadd <4 x float> %AB, %C
60 %subadd = shufflevector <4 x float> %Add, <4 x float> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
61 ret <4 x float> %subadd
62}
63
64define <4 x double> @mul_subadd_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) #0 {
65; FMA3-LABEL: mul_subadd_pd256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000066; FMA3: # %bb.0: # %entry
Simon Pilgrim63892402017-05-19 17:19:26 +000067; FMA3-NEXT: vmulpd %ymm1, %ymm0, %ymm0
68; FMA3-NEXT: vsubpd %ymm2, %ymm0, %ymm1
69; FMA3-NEXT: vaddpd %ymm2, %ymm0, %ymm0
70; FMA3-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
71; FMA3-NEXT: retq
72;
73; FMA4-LABEL: mul_subadd_pd256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000074; FMA4: # %bb.0: # %entry
Simon Pilgrim63892402017-05-19 17:19:26 +000075; FMA4-NEXT: vmulpd %ymm1, %ymm0, %ymm0
76; FMA4-NEXT: vsubpd %ymm2, %ymm0, %ymm1
77; FMA4-NEXT: vaddpd %ymm2, %ymm0, %ymm0
78; FMA4-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
79; FMA4-NEXT: retq
80entry:
81 %AB = fmul <4 x double> %A, %B
82 %Sub = fsub <4 x double> %AB, %C
83 %Add = fadd <4 x double> %AB, %C
84 %subadd = shufflevector <4 x double> %Add, <4 x double> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
85 ret <4 x double> %subadd
86}
87
88define <8 x float> @mul_subadd_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) #0 {
89; FMA3-LABEL: mul_subadd_ps256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000090; FMA3: # %bb.0: # %entry
Simon Pilgrim63892402017-05-19 17:19:26 +000091; FMA3-NEXT: vmulps %ymm1, %ymm0, %ymm0
92; FMA3-NEXT: vsubps %ymm2, %ymm0, %ymm1
93; FMA3-NEXT: vaddps %ymm2, %ymm0, %ymm0
94; FMA3-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
95; FMA3-NEXT: retq
96;
97; FMA4-LABEL: mul_subadd_ps256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000098; FMA4: # %bb.0: # %entry
Simon Pilgrim63892402017-05-19 17:19:26 +000099; FMA4-NEXT: vmulps %ymm1, %ymm0, %ymm0
100; FMA4-NEXT: vsubps %ymm2, %ymm0, %ymm1
101; FMA4-NEXT: vaddps %ymm2, %ymm0, %ymm0
102; FMA4-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
103; FMA4-NEXT: retq
104entry:
105 %AB = fmul <8 x float> %A, %B
106 %Sub = fsub <8 x float> %AB, %C
107 %Add = fadd <8 x float> %AB, %C
108 %subadd = shufflevector <8 x float> %Add, <8 x float> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
109 ret <8 x float> %subadd
110}
111
112define <8 x double> @mul_subadd_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) #0 {
113; FMA3_256-LABEL: mul_subadd_pd512:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000114; FMA3_256: # %bb.0: # %entry
Simon Pilgrim63892402017-05-19 17:19:26 +0000115; FMA3_256-NEXT: vmulpd %ymm2, %ymm0, %ymm0
116; FMA3_256-NEXT: vmulpd %ymm3, %ymm1, %ymm1
117; FMA3_256-NEXT: vsubpd %ymm5, %ymm1, %ymm2
118; FMA3_256-NEXT: vsubpd %ymm4, %ymm0, %ymm3
119; FMA3_256-NEXT: vaddpd %ymm5, %ymm1, %ymm1
Dehao Chen6b737dd2017-05-31 23:25:25 +0000120; FMA3_256-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3]
Simon Pilgrim63892402017-05-19 17:19:26 +0000121; FMA3_256-NEXT: vaddpd %ymm4, %ymm0, %ymm0
122; FMA3_256-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm3[1],ymm0[2],ymm3[3]
Simon Pilgrim63892402017-05-19 17:19:26 +0000123; FMA3_256-NEXT: retq
124;
125; FMA3_512-LABEL: mul_subadd_pd512:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000126; FMA3_512: # %bb.0: # %entry
Simon Pilgrim63892402017-05-19 17:19:26 +0000127; FMA3_512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
128; FMA3_512-NEXT: vsubpd %zmm2, %zmm0, %zmm1
129; FMA3_512-NEXT: vaddpd %zmm2, %zmm0, %zmm0
130; FMA3_512-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[1],zmm0[2],zmm1[3],zmm0[4],zmm1[5],zmm0[6],zmm1[7]
131; FMA3_512-NEXT: retq
132;
133; FMA4-LABEL: mul_subadd_pd512:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000134; FMA4: # %bb.0: # %entry
Simon Pilgrim63892402017-05-19 17:19:26 +0000135; FMA4-NEXT: vmulpd %ymm2, %ymm0, %ymm0
136; FMA4-NEXT: vmulpd %ymm3, %ymm1, %ymm1
137; FMA4-NEXT: vsubpd %ymm5, %ymm1, %ymm2
138; FMA4-NEXT: vsubpd %ymm4, %ymm0, %ymm3
139; FMA4-NEXT: vaddpd %ymm5, %ymm1, %ymm1
Dehao Chen6b737dd2017-05-31 23:25:25 +0000140; FMA4-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3]
Simon Pilgrim63892402017-05-19 17:19:26 +0000141; FMA4-NEXT: vaddpd %ymm4, %ymm0, %ymm0
142; FMA4-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm3[1],ymm0[2],ymm3[3]
Simon Pilgrim63892402017-05-19 17:19:26 +0000143; FMA4-NEXT: retq
144entry:
145 %AB = fmul <8 x double> %A, %B
146 %Sub = fsub <8 x double> %AB, %C
147 %Add = fadd <8 x double> %AB, %C
148 %subadd = shufflevector <8 x double> %Add, <8 x double> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
149 ret <8 x double> %subadd
150}
151
152define <16 x float> @mul_subadd_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) #0 {
153; FMA3_256-LABEL: mul_subadd_ps512:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000154; FMA3_256: # %bb.0: # %entry
Simon Pilgrim63892402017-05-19 17:19:26 +0000155; FMA3_256-NEXT: vmulps %ymm2, %ymm0, %ymm0
156; FMA3_256-NEXT: vmulps %ymm3, %ymm1, %ymm1
157; FMA3_256-NEXT: vsubps %ymm5, %ymm1, %ymm2
158; FMA3_256-NEXT: vsubps %ymm4, %ymm0, %ymm3
159; FMA3_256-NEXT: vaddps %ymm5, %ymm1, %ymm1
Dehao Chen6b737dd2017-05-31 23:25:25 +0000160; FMA3_256-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
Simon Pilgrim63892402017-05-19 17:19:26 +0000161; FMA3_256-NEXT: vaddps %ymm4, %ymm0, %ymm0
162; FMA3_256-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm3[1],ymm0[2],ymm3[3],ymm0[4],ymm3[5],ymm0[6],ymm3[7]
Simon Pilgrim63892402017-05-19 17:19:26 +0000163; FMA3_256-NEXT: retq
164;
165; FMA3_512-LABEL: mul_subadd_ps512:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000166; FMA3_512: # %bb.0: # %entry
Simon Pilgrim63892402017-05-19 17:19:26 +0000167; FMA3_512-NEXT: vmulps %zmm1, %zmm0, %zmm1
168; FMA3_512-NEXT: vaddps %zmm2, %zmm1, %zmm0
169; FMA3_512-NEXT: movw $-21846, %ax # imm = 0xAAAA
170; FMA3_512-NEXT: kmovw %eax, %k1
171; FMA3_512-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1}
172; FMA3_512-NEXT: retq
173;
174; FMA4-LABEL: mul_subadd_ps512:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000175; FMA4: # %bb.0: # %entry
Simon Pilgrim63892402017-05-19 17:19:26 +0000176; FMA4-NEXT: vmulps %ymm2, %ymm0, %ymm0
177; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1
178; FMA4-NEXT: vsubps %ymm5, %ymm1, %ymm2
179; FMA4-NEXT: vsubps %ymm4, %ymm0, %ymm3
180; FMA4-NEXT: vaddps %ymm5, %ymm1, %ymm1
Dehao Chen6b737dd2017-05-31 23:25:25 +0000181; FMA4-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
Simon Pilgrim63892402017-05-19 17:19:26 +0000182; FMA4-NEXT: vaddps %ymm4, %ymm0, %ymm0
183; FMA4-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm3[1],ymm0[2],ymm3[3],ymm0[4],ymm3[5],ymm0[6],ymm3[7]
Simon Pilgrim63892402017-05-19 17:19:26 +0000184; FMA4-NEXT: retq
185entry:
186 %AB = fmul <16 x float> %A, %B
187 %Sub = fsub <16 x float> %AB, %C
188 %Add = fadd <16 x float> %AB, %C
189 %subadd = shufflevector <16 x float> %Add, <16 x float> %Sub, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
190 ret <16 x float> %subadd
191}
192
193attributes #0 = { nounwind "unsafe-fp-math"="true" }