blob: 7bd8415e9e4908a76c255943e2460b1db5701a14 [file] [log] [blame]
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +00001//===-- X86InstrFMA3Info.cpp - X86 FMA3 Instruction Information -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the implementation of the classes providing information
11// about existing X86 FMA3 opcodes, classifying and grouping them.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86InstrFMA3Info.h"
16#include "X86InstrInfo.h"
17#include "llvm/Support/ManagedStatic.h"
18#include "llvm/Support/Threading.h"
19
20/// This flag is used in the method llvm::call_once() used below to make the
21/// initialization of the map 'OpcodeToGroup' thread safe.
22LLVM_DEFINE_ONCE_FLAG(InitGroupsOnceFlag);
23
24static ManagedStatic<X86InstrFMA3Info> X86InstrFMA3InfoObj;
25X86InstrFMA3Info *X86InstrFMA3Info::getX86InstrFMA3Info() {
26 return &*X86InstrFMA3InfoObj;
27}
28
29void X86InstrFMA3Info::initRMGroup(const uint16_t *RegOpcodes,
30 const uint16_t *MemOpcodes, unsigned Attr) {
31 // Create a new instance of this class that would hold a group of FMA opcodes.
32 X86InstrFMA3Group *G = new X86InstrFMA3Group(RegOpcodes, MemOpcodes, Attr);
33
34 // Add the references from indvidual opcodes to the group holding them.
35 assert((!OpcodeToGroup[RegOpcodes[0]] && !OpcodeToGroup[RegOpcodes[1]] &&
36 !OpcodeToGroup[RegOpcodes[2]] && !OpcodeToGroup[MemOpcodes[0]] &&
37 !OpcodeToGroup[MemOpcodes[1]] && !OpcodeToGroup[MemOpcodes[2]]) &&
38 "Duplication or rewrite of elements in OpcodeToGroup.");
39 OpcodeToGroup[RegOpcodes[0]] = G;
40 OpcodeToGroup[RegOpcodes[1]] = G;
41 OpcodeToGroup[RegOpcodes[2]] = G;
42 OpcodeToGroup[MemOpcodes[0]] = G;
43 OpcodeToGroup[MemOpcodes[1]] = G;
44 OpcodeToGroup[MemOpcodes[2]] = G;
45}
46
47void X86InstrFMA3Info::initRGroup(const uint16_t *RegOpcodes, unsigned Attr) {
48 // Create a new instance of this class that would hold a group of FMA opcodes.
49 X86InstrFMA3Group *G = new X86InstrFMA3Group(RegOpcodes, nullptr, Attr);
50
51 // Add the references from indvidual opcodes to the group holding them.
52 assert((!OpcodeToGroup[RegOpcodes[0]] && !OpcodeToGroup[RegOpcodes[1]] &&
53 !OpcodeToGroup[RegOpcodes[2]]) &&
54 "Duplication or rewrite of elements in OpcodeToGroup.");
55 OpcodeToGroup[RegOpcodes[0]] = G;
56 OpcodeToGroup[RegOpcodes[1]] = G;
57 OpcodeToGroup[RegOpcodes[2]] = G;
58}
59
60void X86InstrFMA3Info::initMGroup(const uint16_t *MemOpcodes, unsigned Attr) {
61 // Create a new instance of this class that would hold a group of FMA opcodes.
62 X86InstrFMA3Group *G = new X86InstrFMA3Group(nullptr, MemOpcodes, Attr);
63
64 // Add the references from indvidual opcodes to the group holding them.
65 assert((!OpcodeToGroup[MemOpcodes[0]] && !OpcodeToGroup[MemOpcodes[1]] &&
66 !OpcodeToGroup[MemOpcodes[2]]) &&
67 "Duplication or rewrite of elements in OpcodeToGroup.");
68 OpcodeToGroup[MemOpcodes[0]] = G;
69 OpcodeToGroup[MemOpcodes[1]] = G;
70 OpcodeToGroup[MemOpcodes[2]] = G;
71}
72
73#define FMA3RM(R132, R213, R231, M132, M213, M231) \
74 static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \
75 static const uint16_t Mem##R132[3] = {X86::M132, X86::M213, X86::M231}; \
76 initRMGroup(Reg##R132, Mem##R132);
77
78#define FMA3RMA(R132, R213, R231, M132, M213, M231, Attrs) \
79 static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \
80 static const uint16_t Mem##R132[3] = {X86::M132, X86::M213, X86::M231}; \
81 initRMGroup(Reg##R132, Mem##R132, (Attrs));
82
83#define FMA3R(R132, R213, R231) \
84 static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \
85 initRGroup(Reg##R132);
86
87#define FMA3RA(R132, R213, R231, Attrs) \
88 static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \
89 initRGroup(Reg##R132, (Attrs));
90
91#define FMA3M(M132, M213, M231) \
92 static const uint16_t Mem##M132[3] = {X86::M132, X86::M213, X86::M231}; \
93 initMGroup(Mem##M132);
94
95#define FMA3MA(M132, M213, M231, Attrs) \
96 static const uint16_t Mem##M132[3] = {X86::M132, X86::M213, X86::M231}; \
97 initMGroup(Mem##M132, (Attrs));
98
99#define FMA3_AVX2_VECTOR_GROUP(Name) \
100 FMA3RM(Name##132PSr, Name##213PSr, Name##231PSr, \
101 Name##132PSm, Name##213PSm, Name##231PSm); \
102 FMA3RM(Name##132PDr, Name##213PDr, Name##231PDr, \
103 Name##132PDm, Name##213PDm, Name##231PDm); \
104 FMA3RM(Name##132PSYr, Name##213PSYr, Name##231PSYr, \
105 Name##132PSYm, Name##213PSYm, Name##231PSYm); \
106 FMA3RM(Name##132PDYr, Name##213PDYr, Name##231PDYr, \
107 Name##132PDYm, Name##213PDYm, Name##231PDYm);
108
109#define FMA3_AVX2_SCALAR_GROUP(Name) \
110 FMA3RM(Name##132SSr, Name##213SSr, Name##231SSr, \
111 Name##132SSm, Name##213SSm, Name##231SSm); \
112 FMA3RM(Name##132SDr, Name##213SDr, Name##231SDr, \
113 Name##132SDm, Name##213SDm, Name##231SDm); \
114 FMA3RMA(Name##132SSr_Int, Name##213SSr_Int, Name##231SSr_Int, \
115 Name##132SSm_Int, Name##213SSm_Int, Name##231SSm_Int, \
116 X86InstrFMA3Group::X86FMA3Intrinsic); \
117 FMA3RMA(Name##132SDr_Int, Name##213SDr_Int, Name##231SDr_Int, \
118 Name##132SDm_Int, Name##213SDm_Int, Name##231SDm_Int, \
119 X86InstrFMA3Group::X86FMA3Intrinsic);
120
121#define FMA3_AVX2_FULL_GROUP(Name) \
122 FMA3_AVX2_VECTOR_GROUP(Name); \
123 FMA3_AVX2_SCALAR_GROUP(Name);
124
125#define FMA3_AVX512_VECTOR_GROUP(Name) \
126 FMA3RM(Name##132PSZ128r, Name##213PSZ128r, Name##231PSZ128r, \
127 Name##132PSZ128m, Name##213PSZ128m, Name##231PSZ128m); \
128 FMA3RM(Name##132PDZ128r, Name##213PDZ128r, Name##231PDZ128r, \
129 Name##132PDZ128m, Name##213PDZ128m, Name##231PDZ128m); \
130 FMA3RM(Name##132PSZ256r, Name##213PSZ256r, Name##231PSZ256r, \
131 Name##132PSZ256m, Name##213PSZ256m, Name##231PSZ256m); \
132 FMA3RM(Name##132PDZ256r, Name##213PDZ256r, Name##231PDZ256r, \
133 Name##132PDZ256m, Name##213PDZ256m, Name##231PDZ256m); \
134 FMA3RM(Name##132PSZr, Name##213PSZr, Name##231PSZr, \
135 Name##132PSZm, Name##213PSZm, Name##231PSZm); \
136 FMA3RM(Name##132PDZr, Name##213PDZr, Name##231PDZr, \
137 Name##132PDZm, Name##213PDZm, Name##231PDZm); \
138 FMA3RMA(Name##132PSZ128rk, Name##213PSZ128rk, Name##231PSZ128rk, \
139 Name##132PSZ128mk, Name##213PSZ128mk, Name##231PSZ128mk, \
140 X86InstrFMA3Group::X86FMA3KMergeMasked); \
141 FMA3RMA(Name##132PDZ128rk, Name##213PDZ128rk, Name##231PDZ128rk, \
142 Name##132PDZ128mk, Name##213PDZ128mk, Name##231PDZ128mk, \
143 X86InstrFMA3Group::X86FMA3KMergeMasked); \
144 FMA3RMA(Name##132PSZ256rk, Name##213PSZ256rk, Name##231PSZ256rk, \
145 Name##132PSZ256mk, Name##213PSZ256mk, Name##231PSZ256mk, \
146 X86InstrFMA3Group::X86FMA3KMergeMasked); \
147 FMA3RMA(Name##132PDZ256rk, Name##213PDZ256rk, Name##231PDZ256rk, \
148 Name##132PDZ256mk, Name##213PDZ256mk, Name##231PDZ256mk, \
149 X86InstrFMA3Group::X86FMA3KMergeMasked); \
150 FMA3RMA(Name##132PSZrk, Name##213PSZrk, Name##231PSZrk, \
151 Name##132PSZmk, Name##213PSZmk, Name##231PSZmk, \
152 X86InstrFMA3Group::X86FMA3KMergeMasked); \
153 FMA3RMA(Name##132PDZrk, Name##213PDZrk, Name##231PDZrk, \
154 Name##132PDZmk, Name##213PDZmk, Name##231PDZmk, \
155 X86InstrFMA3Group::X86FMA3KMergeMasked); \
156 FMA3RMA(Name##132PSZ128rkz, Name##213PSZ128rkz, Name##231PSZ128rkz, \
157 Name##132PSZ128mkz, Name##213PSZ128mkz, Name##231PSZ128mkz, \
158 X86InstrFMA3Group::X86FMA3KZeroMasked); \
159 FMA3RMA(Name##132PDZ128rkz, Name##213PDZ128rkz, Name##231PDZ128rkz, \
160 Name##132PDZ128mkz, Name##213PDZ128mkz, Name##231PDZ128mkz, \
161 X86InstrFMA3Group::X86FMA3KZeroMasked); \
162 FMA3RMA(Name##132PSZ256rkz, Name##213PSZ256rkz, Name##231PSZ256rkz, \
163 Name##132PSZ256mkz, Name##213PSZ256mkz, Name##231PSZ256mkz, \
164 X86InstrFMA3Group::X86FMA3KZeroMasked); \
165 FMA3RMA(Name##132PDZ256rkz, Name##213PDZ256rkz, Name##231PDZ256rkz, \
166 Name##132PDZ256mkz, Name##213PDZ256mkz, Name##231PDZ256mkz, \
167 X86InstrFMA3Group::X86FMA3KZeroMasked); \
168 FMA3RMA(Name##132PSZrkz, Name##213PSZrkz, Name##231PSZrkz, \
169 Name##132PSZmkz, Name##213PSZmkz, Name##231PSZmkz, \
170 X86InstrFMA3Group::X86FMA3KZeroMasked); \
171 FMA3RMA(Name##132PDZrkz, Name##213PDZrkz, Name##231PDZrkz, \
172 Name##132PDZmkz, Name##213PDZmkz, Name##231PDZmkz, \
173 X86InstrFMA3Group::X86FMA3KZeroMasked); \
174 FMA3R(Name##132PSZrb, Name##213PSZrb, Name##231PSZrb); \
175 FMA3R(Name##132PDZrb, Name##213PDZrb, Name##231PDZrb); \
176 FMA3RA(Name##132PSZrbk, Name##213PSZrbk, Name##231PSZrbk, \
177 X86InstrFMA3Group::X86FMA3KMergeMasked); \
178 FMA3RA(Name##132PDZrbk, Name##213PDZrbk, Name##231PDZrbk, \
179 X86InstrFMA3Group::X86FMA3KMergeMasked); \
180 FMA3RA(Name##132PSZrbkz, Name##213PSZrbkz, Name##231PSZrbkz, \
181 X86InstrFMA3Group::X86FMA3KZeroMasked); \
182 FMA3RA(Name##132PDZrbkz, Name##213PDZrbkz, Name##231PDZrbkz, \
183 X86InstrFMA3Group::X86FMA3KZeroMasked); \
184 FMA3M(Name##132PSZ128mb, Name##213PSZ128mb, Name##231PSZ128mb); \
185 FMA3M(Name##132PDZ128mb, Name##213PDZ128mb, Name##231PDZ128mb); \
186 FMA3M(Name##132PSZ256mb, Name##213PSZ256mb, Name##231PSZ256mb); \
187 FMA3M(Name##132PDZ256mb, Name##213PDZ256mb, Name##231PDZ256mb); \
188 FMA3M(Name##132PSZmb, Name##213PSZmb, Name##231PSZmb); \
189 FMA3M(Name##132PDZmb, Name##213PDZmb, Name##231PDZmb); \
190 FMA3MA(Name##132PSZ128mbk, Name##213PSZ128mbk, Name##231PSZ128mbk, \
191 X86InstrFMA3Group::X86FMA3KMergeMasked); \
192 FMA3MA(Name##132PDZ128mbk, Name##213PDZ128mbk, Name##231PDZ128mbk, \
193 X86InstrFMA3Group::X86FMA3KMergeMasked); \
194 FMA3MA(Name##132PSZ256mbk, Name##213PSZ256mbk, Name##231PSZ256mbk, \
195 X86InstrFMA3Group::X86FMA3KMergeMasked); \
196 FMA3MA(Name##132PDZ256mbk, Name##213PDZ256mbk, Name##231PDZ256mbk, \
197 X86InstrFMA3Group::X86FMA3KMergeMasked); \
198 FMA3MA(Name##132PSZmbk, Name##213PSZmbk, Name##231PSZmbk, \
199 X86InstrFMA3Group::X86FMA3KMergeMasked); \
200 FMA3MA(Name##132PDZmbk, Name##213PDZmbk, Name##231PDZmbk, \
201 X86InstrFMA3Group::X86FMA3KMergeMasked); \
202 FMA3MA(Name##132PSZ128mbkz, Name##213PSZ128mbkz, Name##231PSZ128mbkz, \
203 X86InstrFMA3Group::X86FMA3KZeroMasked); \
204 FMA3MA(Name##132PDZ128mbkz, Name##213PDZ128mbkz, Name##231PDZ128mbkz, \
205 X86InstrFMA3Group::X86FMA3KZeroMasked); \
206 FMA3MA(Name##132PSZ256mbkz, Name##213PSZ256mbkz, Name##231PSZ256mbkz, \
207 X86InstrFMA3Group::X86FMA3KZeroMasked); \
208 FMA3MA(Name##132PDZ256mbkz, Name##213PDZ256mbkz, Name##231PDZ256mbkz, \
209 X86InstrFMA3Group::X86FMA3KZeroMasked); \
210 FMA3MA(Name##132PSZmbkz, Name##213PSZmbkz, Name##231PSZmbkz, \
211 X86InstrFMA3Group::X86FMA3KZeroMasked); \
212 FMA3MA(Name##132PDZmbkz, Name##213PDZmbkz, Name##231PDZmbkz, \
213 X86InstrFMA3Group::X86FMA3KZeroMasked);
214
215#define FMA3_AVX512_SCALAR_GROUP(Name) \
216 FMA3RM(Name##132SSZr, Name##213SSZr, Name##231SSZr, \
217 Name##132SSZm, Name##213SSZm, Name##231SSZm); \
218 FMA3RM(Name##132SDZr, Name##213SDZr, Name##231SDZr, \
219 Name##132SDZm, Name##213SDZm, Name##231SDZm); \
220 FMA3RMA(Name##132SSZr_Int, Name##213SSZr_Int, Name##231SSZr_Int, \
221 Name##132SSZm_Int, Name##213SSZm_Int, Name##231SSZm_Int, \
222 X86InstrFMA3Group::X86FMA3Intrinsic); \
223 FMA3RMA(Name##132SDZr_Int, Name##213SDZr_Int, Name##231SDZr_Int, \
224 Name##132SDZm_Int, Name##213SDZm_Int, Name##231SDZm_Int, \
225 X86InstrFMA3Group::X86FMA3Intrinsic); \
226 FMA3RMA(Name##132SSZr_Intk, Name##213SSZr_Intk, Name##231SSZr_Intk, \
227 Name##132SSZm_Intk, Name##213SSZm_Intk, Name##231SSZm_Intk, \
228 X86InstrFMA3Group::X86FMA3Intrinsic | \
229 X86InstrFMA3Group::X86FMA3KMergeMasked); \
230 FMA3RMA(Name##132SDZr_Intk, Name##213SDZr_Intk, Name##231SDZr_Intk, \
231 Name##132SDZm_Intk, Name##213SDZm_Intk, Name##231SDZm_Intk, \
232 X86InstrFMA3Group::X86FMA3Intrinsic | \
233 X86InstrFMA3Group::X86FMA3KMergeMasked); \
234 FMA3RMA(Name##132SSZr_Intkz, Name##213SSZr_Intkz, Name##231SSZr_Intkz, \
235 Name##132SSZm_Intkz, Name##213SSZm_Intkz, Name##231SSZm_Intkz, \
236 X86InstrFMA3Group::X86FMA3Intrinsic | \
237 X86InstrFMA3Group::X86FMA3KZeroMasked); \
238 FMA3RMA(Name##132SDZr_Intkz, Name##213SDZr_Intkz, Name##231SDZr_Intkz, \
239 Name##132SDZm_Intkz, Name##213SDZm_Intkz, Name##231SDZm_Intkz, \
240 X86InstrFMA3Group::X86FMA3Intrinsic | \
241 X86InstrFMA3Group::X86FMA3KZeroMasked); \
242 FMA3RA(Name##132SSZrb_Int, Name##213SSZrb_Int, Name##231SSZrb_Int, \
243 X86InstrFMA3Group::X86FMA3Intrinsic); \
244 FMA3RA(Name##132SDZrb_Int, Name##213SDZrb_Int, Name##231SDZrb_Int, \
245 X86InstrFMA3Group::X86FMA3Intrinsic); \
246 FMA3RA(Name##132SSZrb_Intk, Name##213SSZrb_Intk, Name##231SSZrb_Intk, \
247 X86InstrFMA3Group::X86FMA3Intrinsic | \
248 X86InstrFMA3Group::X86FMA3KMergeMasked); \
249 FMA3RA(Name##132SDZrb_Intk, Name##213SDZrb_Intk, Name##231SDZrb_Intk, \
250 X86InstrFMA3Group::X86FMA3Intrinsic | \
251 X86InstrFMA3Group::X86FMA3KMergeMasked); \
252 FMA3RA(Name##132SSZrb_Intkz, Name##213SSZrb_Intkz, Name##231SSZrb_Intkz, \
253 X86InstrFMA3Group::X86FMA3Intrinsic | \
254 X86InstrFMA3Group::X86FMA3KZeroMasked); \
255 FMA3RA(Name##132SDZrb_Intkz, Name##213SDZrb_Intkz, Name##231SDZrb_Intkz, \
256 X86InstrFMA3Group::X86FMA3Intrinsic | \
257 X86InstrFMA3Group::X86FMA3KZeroMasked);
258
259#define FMA3_AVX512_FULL_GROUP(Name) \
260 FMA3_AVX512_VECTOR_GROUP(Name); \
261 FMA3_AVX512_SCALAR_GROUP(Name);
262
263void X86InstrFMA3Info::initGroupsOnceImpl() {
264 FMA3_AVX2_FULL_GROUP(VFMADD);
265 FMA3_AVX2_FULL_GROUP(VFMSUB);
266 FMA3_AVX2_FULL_GROUP(VFNMADD);
267 FMA3_AVX2_FULL_GROUP(VFNMSUB);
268
269 FMA3_AVX2_VECTOR_GROUP(VFMADDSUB);
270 FMA3_AVX2_VECTOR_GROUP(VFMSUBADD);
271
272 FMA3_AVX512_FULL_GROUP(VFMADD);
273 FMA3_AVX512_FULL_GROUP(VFMSUB);
274 FMA3_AVX512_FULL_GROUP(VFNMADD);
275 FMA3_AVX512_FULL_GROUP(VFNMSUB);
276
277 FMA3_AVX512_VECTOR_GROUP(VFMADDSUB);
278 FMA3_AVX512_VECTOR_GROUP(VFMSUBADD);
279}
280
281void X86InstrFMA3Info::initGroupsOnce() {
282 llvm::call_once(InitGroupsOnceFlag,
283 []() { getX86InstrFMA3Info()->initGroupsOnceImpl(); });
284}