blob: ca2c61a885071cb6a998531faddbe1d00f005273 [file] [log] [blame]
Simon Pilgrim63892402017-05-19 17:19:26 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_256 %s
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma,+avx512f | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_512 %s
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma4 | FileCheck -check-prefix=FMA4 %s
5
6; This test checks the fusing of MUL + SUB/ADD to FMSUBADD.
7
8define <2 x double> @mul_subadd_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 {
9; FMA3_256-LABEL: mul_subadd_pd128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000010; FMA3_256: # %bb.0: # %entry
Craig Topperbc92e002017-12-20 18:05:15 +000011; FMA3_256-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0
Simon Pilgrim63892402017-05-19 17:19:26 +000012; FMA3_256-NEXT: retq
13;
14; FMA3_512-LABEL: mul_subadd_pd128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000015; FMA3_512: # %bb.0: # %entry
Craig Topperbc92e002017-12-20 18:05:15 +000016; FMA3_512-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0
Simon Pilgrim63892402017-05-19 17:19:26 +000017; FMA3_512-NEXT: retq
18;
19; FMA4-LABEL: mul_subadd_pd128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000020; FMA4: # %bb.0: # %entry
Craig Topperbc92e002017-12-20 18:05:15 +000021; FMA4-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
Simon Pilgrim63892402017-05-19 17:19:26 +000022; FMA4-NEXT: retq
23entry:
24 %AB = fmul <2 x double> %A, %B
25 %Sub = fsub <2 x double> %AB, %C
26 %Add = fadd <2 x double> %AB, %C
27 %subadd = shufflevector <2 x double> %Add, <2 x double> %Sub, <2 x i32> <i32 0, i32 3>
28 ret <2 x double> %subadd
29}
30
31define <4 x float> @mul_subadd_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) #0 {
32; FMA3-LABEL: mul_subadd_ps128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000033; FMA3: # %bb.0: # %entry
Craig Topperbc92e002017-12-20 18:05:15 +000034; FMA3-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0
Simon Pilgrim63892402017-05-19 17:19:26 +000035; FMA3-NEXT: retq
36;
37; FMA4-LABEL: mul_subadd_ps128:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000038; FMA4: # %bb.0: # %entry
Craig Topperbc92e002017-12-20 18:05:15 +000039; FMA4-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
Simon Pilgrim63892402017-05-19 17:19:26 +000040; FMA4-NEXT: retq
41entry:
42 %AB = fmul <4 x float> %A, %B
43 %Sub = fsub <4 x float> %AB, %C
44 %Add = fadd <4 x float> %AB, %C
45 %subadd = shufflevector <4 x float> %Add, <4 x float> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
46 ret <4 x float> %subadd
47}
48
49define <4 x double> @mul_subadd_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) #0 {
50; FMA3-LABEL: mul_subadd_pd256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000051; FMA3: # %bb.0: # %entry
Craig Topperbc92e002017-12-20 18:05:15 +000052; FMA3-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0
Simon Pilgrim63892402017-05-19 17:19:26 +000053; FMA3-NEXT: retq
54;
55; FMA4-LABEL: mul_subadd_pd256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000056; FMA4: # %bb.0: # %entry
Craig Topperbc92e002017-12-20 18:05:15 +000057; FMA4-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
Simon Pilgrim63892402017-05-19 17:19:26 +000058; FMA4-NEXT: retq
59entry:
60 %AB = fmul <4 x double> %A, %B
61 %Sub = fsub <4 x double> %AB, %C
62 %Add = fadd <4 x double> %AB, %C
63 %subadd = shufflevector <4 x double> %Add, <4 x double> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
64 ret <4 x double> %subadd
65}
66
67define <8 x float> @mul_subadd_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) #0 {
68; FMA3-LABEL: mul_subadd_ps256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000069; FMA3: # %bb.0: # %entry
Craig Topperbc92e002017-12-20 18:05:15 +000070; FMA3-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0
Simon Pilgrim63892402017-05-19 17:19:26 +000071; FMA3-NEXT: retq
72;
73; FMA4-LABEL: mul_subadd_ps256:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000074; FMA4: # %bb.0: # %entry
Craig Topperbc92e002017-12-20 18:05:15 +000075; FMA4-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
Simon Pilgrim63892402017-05-19 17:19:26 +000076; FMA4-NEXT: retq
77entry:
78 %AB = fmul <8 x float> %A, %B
79 %Sub = fsub <8 x float> %AB, %C
80 %Add = fadd <8 x float> %AB, %C
81 %subadd = shufflevector <8 x float> %Add, <8 x float> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
82 ret <8 x float> %subadd
83}
84
85define <8 x double> @mul_subadd_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) #0 {
86; FMA3_256-LABEL: mul_subadd_pd512:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000087; FMA3_256: # %bb.0: # %entry
Craig Topperbc92e002017-12-20 18:05:15 +000088; FMA3_256-NEXT: vfmsubadd213pd %ymm4, %ymm2, %ymm0
89; FMA3_256-NEXT: vfmsubadd213pd %ymm5, %ymm3, %ymm1
Simon Pilgrim63892402017-05-19 17:19:26 +000090; FMA3_256-NEXT: retq
91;
92; FMA3_512-LABEL: mul_subadd_pd512:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000093; FMA3_512: # %bb.0: # %entry
Craig Topperbc92e002017-12-20 18:05:15 +000094; FMA3_512-NEXT: vfmsubadd213pd %zmm2, %zmm1, %zmm0
Simon Pilgrim63892402017-05-19 17:19:26 +000095; FMA3_512-NEXT: retq
96;
97; FMA4-LABEL: mul_subadd_pd512:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000098; FMA4: # %bb.0: # %entry
Craig Topperbc92e002017-12-20 18:05:15 +000099; FMA4-NEXT: vfmsubaddpd %ymm4, %ymm2, %ymm0, %ymm0
100; FMA4-NEXT: vfmsubaddpd %ymm5, %ymm3, %ymm1, %ymm1
Simon Pilgrim63892402017-05-19 17:19:26 +0000101; FMA4-NEXT: retq
102entry:
103 %AB = fmul <8 x double> %A, %B
104 %Sub = fsub <8 x double> %AB, %C
105 %Add = fadd <8 x double> %AB, %C
106 %subadd = shufflevector <8 x double> %Add, <8 x double> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
107 ret <8 x double> %subadd
108}
109
110define <16 x float> @mul_subadd_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) #0 {
111; FMA3_256-LABEL: mul_subadd_ps512:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000112; FMA3_256: # %bb.0: # %entry
Craig Topperbc92e002017-12-20 18:05:15 +0000113; FMA3_256-NEXT: vfmsubadd213ps %ymm4, %ymm2, %ymm0
114; FMA3_256-NEXT: vfmsubadd213ps %ymm5, %ymm3, %ymm1
Simon Pilgrim63892402017-05-19 17:19:26 +0000115; FMA3_256-NEXT: retq
116;
117; FMA3_512-LABEL: mul_subadd_ps512:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000118; FMA3_512: # %bb.0: # %entry
Craig Topperbc92e002017-12-20 18:05:15 +0000119; FMA3_512-NEXT: vfmsubadd213ps %zmm2, %zmm1, %zmm0
Simon Pilgrim63892402017-05-19 17:19:26 +0000120; FMA3_512-NEXT: retq
121;
122; FMA4-LABEL: mul_subadd_ps512:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000123; FMA4: # %bb.0: # %entry
Craig Topperbc92e002017-12-20 18:05:15 +0000124; FMA4-NEXT: vfmsubaddps %ymm4, %ymm2, %ymm0, %ymm0
125; FMA4-NEXT: vfmsubaddps %ymm5, %ymm3, %ymm1, %ymm1
Simon Pilgrim63892402017-05-19 17:19:26 +0000126; FMA4-NEXT: retq
127entry:
128 %AB = fmul <16 x float> %A, %B
129 %Sub = fsub <16 x float> %AB, %C
130 %Add = fadd <16 x float> %AB, %C
131 %subadd = shufflevector <16 x float> %Add, <16 x float> %Sub, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
132 ret <16 x float> %subadd
133}
134
135attributes #0 = { nounwind "unsafe-fp-math"="true" }