blob: 03720d6ba5c30d924947046adeca75adee9d024d [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
Nicolas Capenscb986762017-01-20 11:34:37 -050015#include "Reactor.hpp"
Ben Claytoneb50d252019-04-15 13:50:01 -040016#include "Debug.hpp"
Ben Claytonac07ed82019-03-26 14:17:41 +000017#include "LLVMReactor.hpp"
18#include "LLVMReactorDebugInfo.hpp"
John Bauman89401822014-05-06 15:04:28 -040019
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040020#include "x86.hpp"
21#include "CPUID.hpp"
22#include "Thread.hpp"
Nicolas Capens1a3ce872018-10-10 10:42:36 -040023#include "ExecutableMemory.hpp"
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040024#include "MutexLock.hpp"
25
26#undef min
27#undef max
28
Ben Clayton5875be52019-04-11 14:57:40 -040029#include "llvm/Analysis/LoopPass.h"
30#include "llvm/ExecutionEngine/ExecutionEngine.h"
31#include "llvm/ExecutionEngine/JITSymbol.h"
32#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
33#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
34#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
35#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
36#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
37#include "llvm/ExecutionEngine/SectionMemoryManager.h"
38#include "llvm/IR/Constants.h"
39#include "llvm/IR/DataLayout.h"
40#include "llvm/IR/Function.h"
41#include "llvm/IR/GlobalVariable.h"
42#include "llvm/IR/IRBuilder.h"
43#include "llvm/IR/Intrinsics.h"
44#include "llvm/IR/LLVMContext.h"
45#include "llvm/IR/LegacyPassManager.h"
46#include "llvm/IR/Mangler.h"
47#include "llvm/IR/Module.h"
48#include "llvm/Support/Error.h"
49#include "llvm/Support/TargetSelect.h"
50#include "llvm/Target/TargetOptions.h"
51#include "llvm/Transforms/InstCombine/InstCombine.h"
52#include "llvm/Transforms/Scalar.h"
53#include "llvm/Transforms/Scalar/GVN.h"
Ben Clayton20507fa2019-04-20 01:40:15 -040054
Ben Clayton5875be52019-04-11 14:57:40 -040055#include "LLVMRoutine.hpp"
John Bauman89401822014-05-06 15:04:28 -040056
Ben Clayton5875be52019-04-11 14:57:40 -040057#define ARGS(...) {__VA_ARGS__}
58#define CreateCall2 CreateCall
59#define CreateCall3 CreateCall
Logan Chien0eedc8c2018-08-21 09:34:28 +080060
Ben Clayton5875be52019-04-11 14:57:40 -040061#include <unordered_map>
Logan Chien0eedc8c2018-08-21 09:34:28 +080062
John Bauman89401822014-05-06 15:04:28 -040063#include <fstream>
Ben Clayton1bc7ee92019-02-14 18:43:22 +000064#include <numeric>
65#include <thread>
John Bauman89401822014-05-06 15:04:28 -040066
Nicolas Capens47dc8672017-04-25 12:54:39 -040067#if defined(__i386__) || defined(__x86_64__)
68#include <xmmintrin.h>
69#endif
70
Logan Chien40a60052018-09-26 19:03:53 +080071#include <math.h>
72
Nicolas Capenscb122582014-05-06 23:34:44 -040073#if defined(__x86_64__) && defined(_WIN32)
John Bauman66b8ab22014-05-06 15:57:45 -040074extern "C" void X86CompilationCallback()
75{
Ben Claytoneb50d252019-04-15 13:50:01 -040076 UNIMPLEMENTED("X86CompilationCallback");
John Bauman66b8ab22014-05-06 15:57:45 -040077}
78#endif
79
Nicolas Capens48461502018-08-06 14:20:45 -040080namespace rr
Logan Chien52cde602018-09-03 19:37:57 +080081{
82 class LLVMReactorJIT;
83}
84
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -040085namespace
86{
Nicolas Capens48461502018-08-06 14:20:45 -040087 rr::LLVMReactorJIT *reactorJIT = nullptr;
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -040088 llvm::IRBuilder<> *builder = nullptr;
89 llvm::LLVMContext *context = nullptr;
90 llvm::Module *module = nullptr;
91 llvm::Function *function = nullptr;
Nicolas Capens3bbc5e12016-09-27 10:49:52 -040092
Ben Claytonac07ed82019-03-26 14:17:41 +000093#ifdef ENABLE_RR_DEBUG_INFO
94 std::unique_ptr<rr::DebugInfo> debugInfo;
95#endif
96
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040097 rr::MutexLock codegenMutex;
Logan Chien0eedc8c2018-08-21 09:34:28 +080098
Ben Clayton60a3d6f2019-02-26 17:24:46 +000099#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000100 std::string replace(std::string str, const std::string& substr, const std::string& replacement)
101 {
102 size_t pos = 0;
103 while((pos = str.find(substr, pos)) != std::string::npos) {
104 str.replace(pos, substr.length(), replacement);
105 pos += replacement.length();
106 }
107 return str;
108 }
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000109#endif // ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000110
Logan Chien0eedc8c2018-08-21 09:34:28 +0800111 llvm::Value *lowerPAVG(llvm::Value *x, llvm::Value *y)
112 {
113 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
114
115 llvm::VectorType *extTy =
116 llvm::VectorType::getExtendedElementVectorType(ty);
117 x = ::builder->CreateZExt(x, extTy);
118 y = ::builder->CreateZExt(y, extTy);
119
120 // (x + y + 1) >> 1
121 llvm::Constant *one = llvm::ConstantInt::get(extTy, 1);
122 llvm::Value *res = ::builder->CreateAdd(x, y);
123 res = ::builder->CreateAdd(res, one);
124 res = ::builder->CreateLShr(res, one);
125 return ::builder->CreateTrunc(res, ty);
126 }
127
128 llvm::Value *lowerPMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800129 llvm::ICmpInst::Predicate pred)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800130 {
131 return ::builder->CreateSelect(::builder->CreateICmp(pred, x, y), x, y);
132 }
133
134 llvm::Value *lowerPCMP(llvm::ICmpInst::Predicate pred, llvm::Value *x,
Logan Chienb5ce5092018-09-27 18:45:58 +0800135 llvm::Value *y, llvm::Type *dstTy)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800136 {
137 return ::builder->CreateSExt(::builder->CreateICmp(pred, x, y), dstTy, "");
138 }
139
Logan Chiene3191012018-08-24 22:01:50 +0800140#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800141 llvm::Value *lowerPMOV(llvm::Value *op, llvm::Type *dstType, bool sext)
142 {
143 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(op->getType());
144 llvm::VectorType *dstTy = llvm::cast<llvm::VectorType>(dstType);
145
146 llvm::Value *undef = llvm::UndefValue::get(srcTy);
147 llvm::SmallVector<uint32_t, 16> mask(dstTy->getNumElements());
148 std::iota(mask.begin(), mask.end(), 0);
149 llvm::Value *v = ::builder->CreateShuffleVector(op, undef, mask);
150
151 return sext ? ::builder->CreateSExt(v, dstTy)
Logan Chienb5ce5092018-09-27 18:45:58 +0800152 : ::builder->CreateZExt(v, dstTy);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800153 }
154
155 llvm::Value *lowerPABS(llvm::Value *v)
156 {
157 llvm::Value *zero = llvm::Constant::getNullValue(v->getType());
158 llvm::Value *cmp = ::builder->CreateICmp(llvm::ICmpInst::ICMP_SGT, v, zero);
159 llvm::Value *neg = ::builder->CreateNeg(v);
160 return ::builder->CreateSelect(cmp, v, neg);
161 }
162#endif // defined(__i386__) || defined(__x86_64__)
Logan Chiene3191012018-08-24 22:01:50 +0800163
164#if !defined(__i386__) && !defined(__x86_64__)
165 llvm::Value *lowerPFMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800166 llvm::FCmpInst::Predicate pred)
Logan Chiene3191012018-08-24 22:01:50 +0800167 {
168 return ::builder->CreateSelect(::builder->CreateFCmp(pred, x, y), x, y);
169 }
170
Logan Chien83fc07a2018-09-26 22:14:00 +0800171 llvm::Value *lowerRound(llvm::Value *x)
172 {
173 llvm::Function *nearbyint = llvm::Intrinsic::getDeclaration(
174 ::module, llvm::Intrinsic::nearbyint, {x->getType()});
175 return ::builder->CreateCall(nearbyint, ARGS(x));
176 }
177
Logan Chien2faa24a2018-09-26 19:59:32 +0800178 llvm::Value *lowerRoundInt(llvm::Value *x, llvm::Type *ty)
179 {
180 return ::builder->CreateFPToSI(lowerRound(x), ty);
181 }
182
Logan Chien40a60052018-09-26 19:03:53 +0800183 llvm::Value *lowerFloor(llvm::Value *x)
184 {
185 llvm::Function *floor = llvm::Intrinsic::getDeclaration(
186 ::module, llvm::Intrinsic::floor, {x->getType()});
187 return ::builder->CreateCall(floor, ARGS(x));
188 }
189
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800190 llvm::Value *lowerTrunc(llvm::Value *x)
191 {
192 llvm::Function *trunc = llvm::Intrinsic::getDeclaration(
193 ::module, llvm::Intrinsic::trunc, {x->getType()});
194 return ::builder->CreateCall(trunc, ARGS(x));
195 }
196
Logan Chiene3191012018-08-24 22:01:50 +0800197 // Packed add/sub saturatation
Logan Chien28794cf2018-09-26 18:58:03 +0800198 llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned)
Logan Chiene3191012018-08-24 22:01:50 +0800199 {
Logan Chien28794cf2018-09-26 18:58:03 +0800200 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
201 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
202
203 unsigned numBits = ty->getScalarSizeInBits();
204
205 llvm::Value *max, *min, *extX, *extY;
206 if (isSigned)
207 {
208 max = llvm::ConstantInt::get(extTy, (1LL << (numBits - 1)) - 1, true);
209 min = llvm::ConstantInt::get(extTy, (-1LL << (numBits - 1)), true);
210 extX = ::builder->CreateSExt(x, extTy);
211 extY = ::builder->CreateSExt(y, extTy);
212 }
213 else
214 {
Ben Claytoneb50d252019-04-15 13:50:01 -0400215 ASSERT_MSG(numBits <= 64, "numBits: %d", int(numBits));
Logan Chien28794cf2018-09-26 18:58:03 +0800216 uint64_t maxVal = (numBits == 64) ? ~0ULL : (1ULL << numBits) - 1;
217 max = llvm::ConstantInt::get(extTy, maxVal, false);
218 min = llvm::ConstantInt::get(extTy, 0, false);
219 extX = ::builder->CreateZExt(x, extTy);
220 extY = ::builder->CreateZExt(y, extTy);
221 }
222
223 llvm::Value *res = isAdd ? ::builder->CreateAdd(extX, extY)
224 : ::builder->CreateSub(extX, extY);
225
226 res = lowerPMINMAX(res, min, llvm::ICmpInst::ICMP_SGT);
227 res = lowerPMINMAX(res, max, llvm::ICmpInst::ICMP_SLT);
228
229 return ::builder->CreateTrunc(res, ty);
Logan Chiene3191012018-08-24 22:01:50 +0800230 }
231
232 llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y)
233 {
Logan Chien28794cf2018-09-26 18:58:03 +0800234 return lowerPSAT(x, y, true, false);
Logan Chiene3191012018-08-24 22:01:50 +0800235 }
236
237 llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y)
238 {
Logan Chien28794cf2018-09-26 18:58:03 +0800239 return lowerPSAT(x, y, true, true);
Logan Chiene3191012018-08-24 22:01:50 +0800240 }
241
242 llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y)
243 {
Logan Chien28794cf2018-09-26 18:58:03 +0800244 return lowerPSAT(x, y, false, false);
Logan Chiene3191012018-08-24 22:01:50 +0800245 }
246
247 llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y)
248 {
Logan Chien28794cf2018-09-26 18:58:03 +0800249 return lowerPSAT(x, y, false, true);
Logan Chiene3191012018-08-24 22:01:50 +0800250 }
251
252 llvm::Value *lowerSQRT(llvm::Value *x)
253 {
254 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(
255 ::module, llvm::Intrinsic::sqrt, {x->getType()});
256 return ::builder->CreateCall(sqrt, ARGS(x));
257 }
258
259 llvm::Value *lowerRCP(llvm::Value *x)
260 {
261 llvm::Type *ty = x->getType();
262 llvm::Constant *one;
263 if (llvm::VectorType *vectorTy = llvm::dyn_cast<llvm::VectorType>(ty))
264 {
265 one = llvm::ConstantVector::getSplat(
266 vectorTy->getNumElements(),
267 llvm::ConstantFP::get(vectorTy->getElementType(), 1));
268 }
269 else
270 {
271 one = llvm::ConstantFP::get(ty, 1);
272 }
273 return ::builder->CreateFDiv(one, x);
274 }
275
276 llvm::Value *lowerRSQRT(llvm::Value *x)
277 {
278 return lowerRCP(lowerSQRT(x));
279 }
280
281 llvm::Value *lowerVectorShl(llvm::Value *x, uint64_t scalarY)
282 {
283 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
284 llvm::Value *y = llvm::ConstantVector::getSplat(
285 ty->getNumElements(),
286 llvm::ConstantInt::get(ty->getElementType(), scalarY));
287 return ::builder->CreateShl(x, y);
288 }
289
290 llvm::Value *lowerVectorAShr(llvm::Value *x, uint64_t scalarY)
291 {
292 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
293 llvm::Value *y = llvm::ConstantVector::getSplat(
294 ty->getNumElements(),
295 llvm::ConstantInt::get(ty->getElementType(), scalarY));
296 return ::builder->CreateAShr(x, y);
297 }
298
299 llvm::Value *lowerVectorLShr(llvm::Value *x, uint64_t scalarY)
300 {
301 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
302 llvm::Value *y = llvm::ConstantVector::getSplat(
303 ty->getNumElements(),
304 llvm::ConstantInt::get(ty->getElementType(), scalarY));
305 return ::builder->CreateLShr(x, y);
306 }
307
308 llvm::Value *lowerMulAdd(llvm::Value *x, llvm::Value *y)
309 {
310 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
311 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
312
313 llvm::Value *extX = ::builder->CreateSExt(x, extTy);
314 llvm::Value *extY = ::builder->CreateSExt(y, extTy);
315 llvm::Value *mult = ::builder->CreateMul(extX, extY);
316
317 llvm::Value *undef = llvm::UndefValue::get(extTy);
318
319 llvm::SmallVector<uint32_t, 16> evenIdx;
320 llvm::SmallVector<uint32_t, 16> oddIdx;
321 for (uint64_t i = 0, n = ty->getNumElements(); i < n; i += 2)
322 {
323 evenIdx.push_back(i);
324 oddIdx.push_back(i + 1);
325 }
326
327 llvm::Value *lhs = ::builder->CreateShuffleVector(mult, undef, evenIdx);
328 llvm::Value *rhs = ::builder->CreateShuffleVector(mult, undef, oddIdx);
329 return ::builder->CreateAdd(lhs, rhs);
330 }
331
Logan Chiene3191012018-08-24 22:01:50 +0800332 llvm::Value *lowerPack(llvm::Value *x, llvm::Value *y, bool isSigned)
333 {
334 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(x->getType());
335 llvm::VectorType *dstTy = llvm::VectorType::getTruncatedElementVectorType(srcTy);
336
337 llvm::IntegerType *dstElemTy =
338 llvm::cast<llvm::IntegerType>(dstTy->getElementType());
339
340 uint64_t truncNumBits = dstElemTy->getIntegerBitWidth();
Ben Claytoneb50d252019-04-15 13:50:01 -0400341 ASSERT_MSG(truncNumBits < 64, "shift 64 must be handled separately. truncNumBits: %d", int(truncNumBits));
Logan Chiene3191012018-08-24 22:01:50 +0800342 llvm::Constant *max, *min;
343 if (isSigned)
344 {
345 max = llvm::ConstantInt::get(srcTy, (1LL << (truncNumBits - 1)) - 1, true);
346 min = llvm::ConstantInt::get(srcTy, (-1LL << (truncNumBits - 1)), true);
347 }
348 else
349 {
350 max = llvm::ConstantInt::get(srcTy, (1ULL << truncNumBits) - 1, false);
351 min = llvm::ConstantInt::get(srcTy, 0, false);
352 }
353
354 x = lowerPMINMAX(x, min, llvm::ICmpInst::ICMP_SGT);
355 x = lowerPMINMAX(x, max, llvm::ICmpInst::ICMP_SLT);
356 y = lowerPMINMAX(y, min, llvm::ICmpInst::ICMP_SGT);
357 y = lowerPMINMAX(y, max, llvm::ICmpInst::ICMP_SLT);
358
359 x = ::builder->CreateTrunc(x, dstTy);
360 y = ::builder->CreateTrunc(y, dstTy);
361
362 llvm::SmallVector<uint32_t, 16> index(srcTy->getNumElements() * 2);
363 std::iota(index.begin(), index.end(), 0);
364
365 return ::builder->CreateShuffleVector(x, y, index);
366 }
367
368 llvm::Value *lowerSignMask(llvm::Value *x, llvm::Type *retTy)
369 {
370 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
371 llvm::Constant *zero = llvm::ConstantInt::get(ty, 0);
372 llvm::Value *cmp = ::builder->CreateICmpSLT(x, zero);
373
374 llvm::Value *ret = ::builder->CreateZExt(
375 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
376 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
377 {
378 llvm::Value *elem = ::builder->CreateZExt(
379 ::builder->CreateExtractElement(cmp, i), retTy);
380 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
381 }
382 return ret;
383 }
384
385 llvm::Value *lowerFPSignMask(llvm::Value *x, llvm::Type *retTy)
386 {
387 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
388 llvm::Constant *zero = llvm::ConstantFP::get(ty, 0);
389 llvm::Value *cmp = ::builder->CreateFCmpULT(x, zero);
390
391 llvm::Value *ret = ::builder->CreateZExt(
392 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
393 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
394 {
395 llvm::Value *elem = ::builder->CreateZExt(
396 ::builder->CreateExtractElement(cmp, i), retTy);
397 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
398 }
399 return ret;
400 }
401#endif // !defined(__i386__) && !defined(__x86_64__)
Chris Forbese86b6dc2019-03-01 09:08:47 -0800402
403 llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext)
404 {
405 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
406 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
407
408 llvm::Value *extX, *extY;
409 if (sext)
410 {
411 extX = ::builder->CreateSExt(x, extTy);
412 extY = ::builder->CreateSExt(y, extTy);
413 }
414 else
415 {
416 extX = ::builder->CreateZExt(x, extTy);
417 extY = ::builder->CreateZExt(y, extTy);
418 }
419
420 llvm::Value *mult = ::builder->CreateMul(extX, extY);
421
422 llvm::IntegerType *intTy = llvm::cast<llvm::IntegerType>(ty->getElementType());
423 llvm::Value *mulh = ::builder->CreateAShr(mult, intTy->getBitWidth());
424 return ::builder->CreateTrunc(mulh, ty);
425 }
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400426}
427
Nicolas Capens48461502018-08-06 14:20:45 -0400428namespace rr
John Bauman89401822014-05-06 15:04:28 -0400429{
Ben Claytonc7904162019-04-17 17:35:48 -0400430 const Capabilities Caps =
431 {
432 true, // CallSupported
433 };
434
Logan Chien40a60052018-09-26 19:03:53 +0800435 class ExternalFunctionSymbolResolver
436 {
437 private:
438 using FunctionMap = std::unordered_map<std::string, void *>;
439 FunctionMap func_;
440
441 public:
442 ExternalFunctionSymbolResolver()
443 {
Ben Claytonac07ed82019-03-26 14:17:41 +0000444 struct F { static void nop() {} };
445 func_.emplace("nop", reinterpret_cast<void*>(F::nop));
446
Logan Chien40a60052018-09-26 19:03:53 +0800447 func_.emplace("floorf", reinterpret_cast<void*>(floorf));
Logan Chien83fc07a2018-09-26 22:14:00 +0800448 func_.emplace("nearbyintf", reinterpret_cast<void*>(nearbyintf));
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800449 func_.emplace("truncf", reinterpret_cast<void*>(truncf));
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000450 func_.emplace("printf", reinterpret_cast<void*>(printf));
451 func_.emplace("puts", reinterpret_cast<void*>(puts));
Chris Forbes1a4c7122019-03-15 14:50:47 -0700452 func_.emplace("fmodf", reinterpret_cast<void*>(fmodf));
Ben Claytona2c8b772019-04-09 13:42:36 -0400453 func_.emplace("sinf", reinterpret_cast<void*>(sinf));
Ben Clayton1b6f8c72019-04-09 13:47:43 -0400454 func_.emplace("cosf", reinterpret_cast<void*>(cosf));
Ben Claytonf9350d72019-04-09 14:19:02 -0400455 func_.emplace("asinf", reinterpret_cast<void*>(asinf));
Ben Claytoneafae472019-04-09 14:22:38 -0400456 func_.emplace("acosf", reinterpret_cast<void*>(acosf));
Ben Clayton749b4e02019-04-09 14:27:43 -0400457 func_.emplace("atanf", reinterpret_cast<void*>(atanf));
Ben Claytond9636972019-04-09 15:09:54 -0400458 func_.emplace("sinhf", reinterpret_cast<void*>(sinhf));
Ben Clayton900ea2c2019-04-09 15:25:36 -0400459 func_.emplace("coshf", reinterpret_cast<void*>(coshf));
Ben Clayton3928bd92019-04-09 15:27:41 -0400460 func_.emplace("tanhf", reinterpret_cast<void*>(tanhf));
Ben Claytonf6d77ab2019-04-09 15:30:04 -0400461 func_.emplace("asinhf", reinterpret_cast<void*>(asinhf));
Ben Clayton28ebcb02019-04-09 15:33:38 -0400462 func_.emplace("acoshf", reinterpret_cast<void*>(acoshf));
Ben Claytonfa6a5392019-04-09 15:35:24 -0400463 func_.emplace("atanhf", reinterpret_cast<void*>(atanhf));
Ben Claytona520c3e2019-04-09 15:43:45 -0400464 func_.emplace("atan2f", reinterpret_cast<void*>(atan2f));
Ben Claytonbfe94f02019-04-09 15:52:12 -0400465 func_.emplace("powf", reinterpret_cast<void*>(powf));
Ben Clayton242f0022019-04-09 16:00:53 -0400466 func_.emplace("expf", reinterpret_cast<void*>(expf));
Ben Clayton2c1da722019-04-09 16:03:03 -0400467 func_.emplace("logf", reinterpret_cast<void*>(logf));
Ben Claytonf40b56c2019-04-09 16:06:55 -0400468 func_.emplace("exp2f", reinterpret_cast<void*>(exp2f));
Ben Claytone17acfe2019-04-09 16:09:13 -0400469 func_.emplace("log2f", reinterpret_cast<void*>(log2f));
Ben Clayton14740062019-04-09 13:48:41 -0400470
471#ifdef __APPLE__
472 // LLVM uses this function on macOS for tan.
473 func_.emplace("sincosf_stret", reinterpret_cast<void*>(__sincosf_stret));
474#elif defined(__linux__)
475 func_.emplace("sincosf", reinterpret_cast<void*>(sincosf));
476#endif // __APPLE__
Logan Chien40a60052018-09-26 19:03:53 +0800477 }
478
479 void *findSymbol(const std::string &name) const
480 {
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000481 // Trim off any underscores from the start of the symbol. LLVM likes
482 // to append these on macOS.
483 const char* trimmed = name.c_str();
484 while (trimmed[0] == '_') { trimmed++; }
485
486 FunctionMap::const_iterator it = func_.find(trimmed);
Ben Claytoneb50d252019-04-15 13:50:01 -0400487 // Missing functions will likely make the module fail in exciting non-obvious ways.
488 ASSERT_MSG(it != func_.end(), "Missing external function: '%s'", name.c_str());
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000489 return it->second;
Logan Chien40a60052018-09-26 19:03:53 +0800490 }
491 };
492
Logan Chien0eedc8c2018-08-21 09:34:28 +0800493 class LLVMReactorJIT
494 {
495 private:
496 using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
497 using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
498
499 llvm::orc::ExecutionSession session;
Logan Chien40a60052018-09-26 19:03:53 +0800500 ExternalFunctionSymbolResolver externalSymbolResolver;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800501 std::shared_ptr<llvm::orc::SymbolResolver> resolver;
502 std::unique_ptr<llvm::TargetMachine> targetMachine;
503 const llvm::DataLayout dataLayout;
504 ObjLayer objLayer;
505 CompileLayer compileLayer;
506 size_t emittedFunctionsNum;
507
508 public:
509 LLVMReactorJIT(const char *arch, const llvm::SmallVectorImpl<std::string>& mattrs,
510 const llvm::TargetOptions &targetOpts):
511 resolver(createLegacyLookupResolver(
512 session,
513 [this](const std::string &name) {
Logan Chien40a60052018-09-26 19:03:53 +0800514 void *func = externalSymbolResolver.findSymbol(name);
515 if (func != nullptr)
516 {
517 return llvm::JITSymbol(
518 reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
519 }
520
Logan Chien0eedc8c2018-08-21 09:34:28 +0800521 return objLayer.findSymbol(name, true);
522 },
523 [](llvm::Error err) {
524 if (err)
525 {
526 // TODO: Log the symbol resolution errors.
527 return;
528 }
529 })),
530 targetMachine(llvm::EngineBuilder()
Ben Claytonac07ed82019-03-26 14:17:41 +0000531#ifdef ENABLE_RR_DEBUG_INFO
532 .setOptLevel(llvm::CodeGenOpt::None)
533#endif // ENABLE_RR_DEBUG_INFO
Logan Chien0eedc8c2018-08-21 09:34:28 +0800534 .setMArch(arch)
535 .setMAttrs(mattrs)
536 .setTargetOptions(targetOpts)
537 .selectTarget()),
538 dataLayout(targetMachine->createDataLayout()),
539 objLayer(
540 session,
541 [this](llvm::orc::VModuleKey) {
542 return ObjLayer::Resources{
543 std::make_shared<llvm::SectionMemoryManager>(),
544 resolver};
Ben Claytonac07ed82019-03-26 14:17:41 +0000545 },
546 ObjLayer::NotifyLoadedFtor(),
547 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj, const llvm::RuntimeDyld::LoadedObjectInfo &L) {
548#ifdef ENABLE_RR_DEBUG_INFO
549 if (debugInfo != nullptr)
550 {
551 debugInfo->NotifyObjectEmitted(Obj, L);
552 }
553#endif // ENABLE_RR_DEBUG_INFO
554 },
555 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj) {
556#ifdef ENABLE_RR_DEBUG_INFO
557 if (debugInfo != nullptr)
558 {
559 debugInfo->NotifyFreeingObject(Obj);
560 }
561#endif // ENABLE_RR_DEBUG_INFO
562 }
563 ),
Logan Chien0eedc8c2018-08-21 09:34:28 +0800564 compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine)),
565 emittedFunctionsNum(0)
566 {
567 }
568
569 void startSession()
570 {
571 ::module = new llvm::Module("", *::context);
572 }
573
574 void endSession()
575 {
576 ::function = nullptr;
577 ::module = nullptr;
578 }
579
580 LLVMRoutine *acquireRoutine(llvm::Function *func)
581 {
582 std::string name = "f" + llvm::Twine(emittedFunctionsNum++).str();
583 func->setName(name);
584 func->setLinkage(llvm::GlobalValue::ExternalLinkage);
585 func->setDoesNotThrow();
586
587 std::unique_ptr<llvm::Module> mod(::module);
588 ::module = nullptr;
589 mod->setDataLayout(dataLayout);
590
591 auto moduleKey = session.allocateVModule();
592 llvm::cantFail(compileLayer.addModule(moduleKey, std::move(mod)));
593
Nicolas Capensadfbbcb2018-10-31 14:38:53 -0400594 std::string mangledName;
595 {
596 llvm::raw_string_ostream mangledNameStream(mangledName);
597 llvm::Mangler::getNameWithPrefix(mangledNameStream, name, dataLayout);
598 }
599
600 llvm::JITSymbol symbol = compileLayer.findSymbolIn(moduleKey, mangledName, false);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800601
602 llvm::Expected<llvm::JITTargetAddress> expectAddr = symbol.getAddress();
Nicolas Capensadfbbcb2018-10-31 14:38:53 -0400603 if(!expectAddr)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800604 {
605 return nullptr;
606 }
607
608 void *addr = reinterpret_cast<void *>(static_cast<intptr_t>(expectAddr.get()));
609 return new LLVMRoutine(addr, releaseRoutineCallback, this, moduleKey);
610 }
611
612 void optimize(llvm::Module *module)
613 {
Ben Claytonac07ed82019-03-26 14:17:41 +0000614#ifdef ENABLE_RR_DEBUG_INFO
615 if (debugInfo != nullptr)
616 {
617 return; // Don't optimize if we're generating debug info.
618 }
619#endif // ENABLE_RR_DEBUG_INFO
620
Logan Chien0eedc8c2018-08-21 09:34:28 +0800621 std::unique_ptr<llvm::legacy::PassManager> passManager(
622 new llvm::legacy::PassManager());
623
624 passManager->add(llvm::createSROAPass());
625
626 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
627 {
628 switch(optimization[pass])
629 {
630 case Disabled: break;
631 case CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
632 case LICM: passManager->add(llvm::createLICMPass()); break;
633 case AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
634 case GVN: passManager->add(llvm::createGVNPass()); break;
635 case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
636 case Reassociate: passManager->add(llvm::createReassociatePass()); break;
637 case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
638 case SCCP: passManager->add(llvm::createSCCPPass()); break;
639 case ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break;
640 default:
Ben Claytoneb50d252019-04-15 13:50:01 -0400641 UNREACHABLE("optimization[pass]: %d, pass: %d", int(optimization[pass]), int(pass));
Logan Chien0eedc8c2018-08-21 09:34:28 +0800642 }
643 }
644
645 passManager->run(*::module);
646 }
647
648 private:
649 void releaseRoutineModule(llvm::orc::VModuleKey moduleKey)
650 {
651 llvm::cantFail(compileLayer.removeModule(moduleKey));
652 }
653
654 static void releaseRoutineCallback(LLVMReactorJIT *jit, uint64_t moduleKey)
655 {
656 jit->releaseRoutineModule(moduleKey);
657 }
658 };
Logan Chien52cde602018-09-03 19:37:57 +0800659
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400660 Optimization optimization[10] = {InstructionCombining, Disabled};
John Bauman89401822014-05-06 15:04:28 -0400661
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500662 // The abstract Type* types are implemented as LLVM types, except that
663 // 64-bit vectors are emulated using 128-bit ones to avoid use of MMX in x86
664 // and VFP in ARM, and eliminate the overhead of converting them to explicit
665 // 128-bit ones. LLVM types are pointers, so we can represent emulated types
666 // as abstract pointers with small enum values.
667 enum InternalType : uintptr_t
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400668 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500669 // Emulated types:
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400670 Type_v2i32,
671 Type_v4i16,
672 Type_v2i16,
673 Type_v8i8,
674 Type_v4i8,
675 Type_v2f32,
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500676 EmulatedTypeCount,
677 // Returned by asInternalType() to indicate that the abstract Type*
678 // should be interpreted as LLVM type pointer:
679 Type_LLVM
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400680 };
681
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500682 inline InternalType asInternalType(Type *type)
683 {
684 InternalType t = static_cast<InternalType>(reinterpret_cast<uintptr_t>(type));
685 return (t < EmulatedTypeCount) ? t : Type_LLVM;
686 }
687
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400688 llvm::Type *T(Type *t)
689 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500690 // Use 128-bit vectors to implement logically shorter ones.
691 switch(asInternalType(t))
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400692 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500693 case Type_v2i32: return T(Int4::getType());
694 case Type_v4i16: return T(Short8::getType());
695 case Type_v2i16: return T(Short8::getType());
696 case Type_v8i8: return T(Byte16::getType());
697 case Type_v4i8: return T(Byte16::getType());
698 case Type_v2f32: return T(Float4::getType());
699 case Type_LLVM: return reinterpret_cast<llvm::Type*>(t);
Ben Claytoneb50d252019-04-15 13:50:01 -0400700 default:
701 UNREACHABLE("asInternalType(t): %d", int(asInternalType(t)));
702 return nullptr;
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400703 }
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400704 }
705
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500706 Type *T(InternalType t)
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400707 {
708 return reinterpret_cast<Type*>(t);
709 }
710
Nicolas Capensac230122016-09-20 14:30:06 -0400711 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
712 {
713 return reinterpret_cast<std::vector<llvm::Type*>&>(t);
714 }
715
Logan Chien191b3052018-08-31 16:57:15 +0800716 inline llvm::BasicBlock *B(BasicBlock *t)
717 {
718 return reinterpret_cast<llvm::BasicBlock*>(t);
719 }
720
Nicolas Capensc8b67a42016-09-25 15:02:52 -0400721 inline BasicBlock *B(llvm::BasicBlock *t)
722 {
723 return reinterpret_cast<BasicBlock*>(t);
724 }
725
Nicolas Capens01a97962017-07-28 17:30:51 -0400726 static size_t typeSize(Type *type)
727 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500728 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -0400729 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500730 case Type_v2i32: return 8;
731 case Type_v4i16: return 8;
732 case Type_v2i16: return 4;
733 case Type_v8i8: return 8;
734 case Type_v4i8: return 4;
735 case Type_v2f32: return 8;
736 case Type_LLVM:
Nicolas Capens01a97962017-07-28 17:30:51 -0400737 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500738 llvm::Type *t = T(type);
Nicolas Capens01a97962017-07-28 17:30:51 -0400739
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500740 if(t->isPointerTy())
741 {
742 return sizeof(void*);
743 }
744
745 // At this point we should only have LLVM 'primitive' types.
746 unsigned int bits = t->getPrimitiveSizeInBits();
Ben Claytoneb50d252019-04-15 13:50:01 -0400747 ASSERT_MSG(bits != 0, "bits: %d", int(bits));
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500748
749 // TODO(capn): Booleans are 1 bit integers in LLVM's SSA type system,
750 // but are typically stored as one byte. The DataLayout structure should
751 // be used here and many other places if this assumption fails.
752 return (bits + 7) / 8;
753 }
754 break;
755 default:
Ben Claytoneb50d252019-04-15 13:50:01 -0400756 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500757 return 0;
758 }
Nicolas Capens01a97962017-07-28 17:30:51 -0400759 }
760
Nicolas Capens69674fb2017-09-01 11:08:44 -0400761 static unsigned int elementCount(Type *type)
762 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500763 switch(asInternalType(type))
Nicolas Capens69674fb2017-09-01 11:08:44 -0400764 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500765 case Type_v2i32: return 2;
766 case Type_v4i16: return 4;
767 case Type_v2i16: return 2;
768 case Type_v8i8: return 8;
769 case Type_v4i8: return 4;
770 case Type_v2f32: return 2;
771 case Type_LLVM: return llvm::cast<llvm::VectorType>(T(type))->getNumElements();
Ben Claytoneb50d252019-04-15 13:50:01 -0400772 default:
773 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
774 return 0;
Nicolas Capens69674fb2017-09-01 11:08:44 -0400775 }
Nicolas Capens69674fb2017-09-01 11:08:44 -0400776 }
777
Nicolas Capens86509d92019-03-21 13:23:50 -0400778 static llvm::AtomicOrdering atomicOrdering(bool atomic, std::memory_order memoryOrder)
779 {
Nicolas Capens86509d92019-03-21 13:23:50 -0400780 if(!atomic)
781 {
782 return llvm::AtomicOrdering::NotAtomic;
783 }
784
785 switch(memoryOrder)
786 {
787 case std::memory_order_relaxed: return llvm::AtomicOrdering::Monotonic; // https://llvm.org/docs/Atomics.html#monotonic
788 case std::memory_order_consume: return llvm::AtomicOrdering::Acquire; // https://llvm.org/docs/Atomics.html#acquire: "It should also be used for C++11/C11 memory_order_consume."
789 case std::memory_order_acquire: return llvm::AtomicOrdering::Acquire;
790 case std::memory_order_release: return llvm::AtomicOrdering::Release;
791 case std::memory_order_acq_rel: return llvm::AtomicOrdering::AcquireRelease;
792 case std::memory_order_seq_cst: return llvm::AtomicOrdering::SequentiallyConsistent;
Ben Claytoneb50d252019-04-15 13:50:01 -0400793 default:
794 UNREACHABLE("memoryOrder: %d", int(memoryOrder));
795 return llvm::AtomicOrdering::AcquireRelease;
Nicolas Capens86509d92019-03-21 13:23:50 -0400796 }
797 }
798
John Bauman89401822014-05-06 15:04:28 -0400799 Nucleus::Nucleus()
800 {
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400801 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
Nicolas Capensb7ea9842015-04-01 10:54:59 -0400802
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400803 llvm::InitializeNativeTarget();
Logan Chien0eedc8c2018-08-21 09:34:28 +0800804 llvm::InitializeNativeTargetAsmPrinter();
805 llvm::InitializeNativeTargetAsmParser();
Logan Chien0eedc8c2018-08-21 09:34:28 +0800806
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400807 if(!::context)
John Bauman89401822014-05-06 15:04:28 -0400808 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400809 ::context = new llvm::LLVMContext();
John Bauman89401822014-05-06 15:04:28 -0400810 }
811
John Bauman89401822014-05-06 15:04:28 -0400812 #if defined(__x86_64__)
Logan Chien52cde602018-09-03 19:37:57 +0800813 static const char arch[] = "x86-64";
Logan Chiene3191012018-08-24 22:01:50 +0800814 #elif defined(__i386__)
Logan Chien52cde602018-09-03 19:37:57 +0800815 static const char arch[] = "x86";
Logan Chiene3191012018-08-24 22:01:50 +0800816 #elif defined(__aarch64__)
817 static const char arch[] = "arm64";
818 #elif defined(__arm__)
819 static const char arch[] = "arm";
Gordana Cmiljanovic082dfec2018-10-19 11:36:15 +0200820 #elif defined(__mips__)
Gordana Cmiljanovic20622c02018-11-05 15:00:11 +0100821 #if defined(__mips64)
822 static const char arch[] = "mips64el";
823 #else
824 static const char arch[] = "mipsel";
825 #endif
Logan Chiene3191012018-08-24 22:01:50 +0800826 #else
827 #error "unknown architecture"
John Bauman89401822014-05-06 15:04:28 -0400828 #endif
829
Logan Chien52cde602018-09-03 19:37:57 +0800830 llvm::SmallVector<std::string, 1> mattrs;
Logan Chiene3191012018-08-24 22:01:50 +0800831#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800832 mattrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx");
833 mattrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov");
834 mattrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse");
835 mattrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2");
836 mattrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3");
837 mattrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3");
Logan Chien0eedc8c2018-08-21 09:34:28 +0800838 mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse4.1" : "-sse4.1");
Logan Chiene3191012018-08-24 22:01:50 +0800839#elif defined(__arm__)
840#if __ARM_ARCH >= 8
841 mattrs.push_back("+armv8-a");
842#else
843 // armv7-a requires compiler-rt routines; otherwise, compiled kernel
844 // might fail to link.
845#endif
846#endif
John Bauman89401822014-05-06 15:04:28 -0400847
Logan Chien0eedc8c2018-08-21 09:34:28 +0800848 llvm::TargetOptions targetOpts;
Nicolas Capensa7643812018-09-13 14:20:06 -0400849 targetOpts.UnsafeFPMath = false;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800850 // targetOpts.NoInfsFPMath = true;
851 // targetOpts.NoNaNsFPMath = true;
Logan Chien52cde602018-09-03 19:37:57 +0800852
853 if(!::reactorJIT)
854 {
Logan Chien0eedc8c2018-08-21 09:34:28 +0800855 ::reactorJIT = new LLVMReactorJIT(arch, mattrs, targetOpts);
Logan Chien52cde602018-09-03 19:37:57 +0800856 }
857
858 ::reactorJIT->startSession();
John Bauman89401822014-05-06 15:04:28 -0400859
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400860 if(!::builder)
John Bauman89401822014-05-06 15:04:28 -0400861 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400862 ::builder = new llvm::IRBuilder<>(*::context);
John Bauman89401822014-05-06 15:04:28 -0400863 }
864 }
865
866 Nucleus::~Nucleus()
867 {
Logan Chien52cde602018-09-03 19:37:57 +0800868 ::reactorJIT->endSession();
Nicolas Capensb7ea9842015-04-01 10:54:59 -0400869
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400870 ::codegenMutex.unlock();
John Bauman89401822014-05-06 15:04:28 -0400871 }
872
Chris Forbes878d4b02019-01-21 10:48:35 -0800873 Routine *Nucleus::acquireRoutine(const char *name, bool runOptimizations)
John Bauman89401822014-05-06 15:04:28 -0400874 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400875 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
John Bauman19bac1e2014-05-06 15:23:49 -0400876 {
Nicolas Capensac230122016-09-20 14:30:06 -0400877 llvm::Type *type = ::function->getReturnType();
John Bauman19bac1e2014-05-06 15:23:49 -0400878
879 if(type->isVoidTy())
880 {
881 createRetVoid();
882 }
883 else
884 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400885 createRet(V(llvm::UndefValue::get(type)));
John Bauman19bac1e2014-05-06 15:23:49 -0400886 }
887 }
John Bauman89401822014-05-06 15:04:28 -0400888
889 if(false)
890 {
Ben Clayton5875be52019-04-11 14:57:40 -0400891 std::error_code error;
892 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400893 ::module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -0400894 }
895
896 if(runOptimizations)
897 {
898 optimize();
899 }
900
901 if(false)
902 {
Ben Clayton5875be52019-04-11 14:57:40 -0400903 std::error_code error;
904 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400905 ::module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -0400906 }
907
Ben Claytonac07ed82019-03-26 14:17:41 +0000908#ifdef ENABLE_RR_DEBUG_INFO
909 if (debugInfo != nullptr)
910 {
911 debugInfo->Finalize();
912 }
913#endif // ENABLE_RR_DEBUG_INFO
914
Logan Chien52cde602018-09-03 19:37:57 +0800915 LLVMRoutine *routine = ::reactorJIT->acquireRoutine(::function);
John Bauman89401822014-05-06 15:04:28 -0400916
John Bauman89401822014-05-06 15:04:28 -0400917 return routine;
918 }
919
920 void Nucleus::optimize()
921 {
Logan Chien52cde602018-09-03 19:37:57 +0800922 ::reactorJIT->optimize(::module);
John Bauman89401822014-05-06 15:04:28 -0400923 }
924
John Bauman19bac1e2014-05-06 15:23:49 -0400925 Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
John Bauman89401822014-05-06 15:04:28 -0400926 {
927 // Need to allocate it in the entry block for mem2reg to work
Nicolas Capensc8b67a42016-09-25 15:02:52 -0400928 llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
John Bauman89401822014-05-06 15:04:28 -0400929
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400930 llvm::Instruction *declaration;
John Bauman89401822014-05-06 15:04:28 -0400931
932 if(arraySize)
933 {
Logan Chien0eedc8c2018-08-21 09:34:28 +0800934 declaration = new llvm::AllocaInst(T(type), 0, V(Nucleus::createConstantInt(arraySize)));
John Bauman89401822014-05-06 15:04:28 -0400935 }
936 else
937 {
Logan Chien0eedc8c2018-08-21 09:34:28 +0800938 declaration = new llvm::AllocaInst(T(type), 0, (llvm::Value*)nullptr);
John Bauman89401822014-05-06 15:04:28 -0400939 }
940
941 entryBlock.getInstList().push_front(declaration);
942
Nicolas Capens19336542016-09-26 10:32:29 -0400943 return V(declaration);
John Bauman89401822014-05-06 15:04:28 -0400944 }
945
946 BasicBlock *Nucleus::createBasicBlock()
947 {
Logan Chien191b3052018-08-31 16:57:15 +0800948 return B(llvm::BasicBlock::Create(*::context, "", ::function));
John Bauman89401822014-05-06 15:04:28 -0400949 }
950
951 BasicBlock *Nucleus::getInsertBlock()
952 {
Nicolas Capensc8b67a42016-09-25 15:02:52 -0400953 return B(::builder->GetInsertBlock());
John Bauman89401822014-05-06 15:04:28 -0400954 }
955
956 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
957 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400958 // assert(::builder->GetInsertBlock()->back().isTerminator());
Nicolas Capens0192d152019-03-27 14:46:07 -0400959
960 Variable::materializeAll();
961
Logan Chien191b3052018-08-31 16:57:15 +0800962 ::builder->SetInsertPoint(B(basicBlock));
John Bauman89401822014-05-06 15:04:28 -0400963 }
964
Nicolas Capensac230122016-09-20 14:30:06 -0400965 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
John Bauman89401822014-05-06 15:04:28 -0400966 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400967 llvm::FunctionType *functionType = llvm::FunctionType::get(T(ReturnType), T(Params), false);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400968 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
969 ::function->setCallingConv(llvm::CallingConv::C);
John Bauman89401822014-05-06 15:04:28 -0400970
Ben Clayton5875be52019-04-11 14:57:40 -0400971 #if defined(_WIN32)
Nicolas Capens52551d12018-09-13 14:30:56 -0400972 // FIXME(capn):
973 // On Windows, stack memory is committed in increments of 4 kB pages, with the last page
974 // having a trap which allows the OS to grow the stack. For functions with a stack frame
975 // larger than 4 kB this can cause an issue when a variable is accessed beyond the guard
976 // page. Therefore the compiler emits a call to __chkstk in the function prolog to probe
977 // the stack and ensure all pages have been committed. This is currently broken in LLVM
978 // JIT, but we can prevent emitting the stack probe call:
979 ::function->addFnAttr("stack-probe-size", "1048576");
980 #endif
981
Ben Claytonac07ed82019-03-26 14:17:41 +0000982#ifdef ENABLE_RR_DEBUG_INFO
983 ::debugInfo = std::unique_ptr<DebugInfo>(new DebugInfo(::builder, ::context, ::module, ::function));
984#endif // ENABLE_RR_DEBUG_INFO
985
Logan Chien191b3052018-08-31 16:57:15 +0800986 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "", ::function));
John Bauman89401822014-05-06 15:04:28 -0400987 }
988
Nicolas Capens19336542016-09-26 10:32:29 -0400989 Value *Nucleus::getArgument(unsigned int index)
John Bauman89401822014-05-06 15:04:28 -0400990 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400991 llvm::Function::arg_iterator args = ::function->arg_begin();
John Bauman89401822014-05-06 15:04:28 -0400992
993 while(index)
994 {
995 args++;
996 index--;
997 }
998
Nicolas Capens19336542016-09-26 10:32:29 -0400999 return V(&*args);
John Bauman89401822014-05-06 15:04:28 -04001000 }
1001
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001002 void Nucleus::createRetVoid()
John Bauman89401822014-05-06 15:04:28 -04001003 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001004 RR_DEBUG_INFO_UPDATE_LOC();
1005
Nicolas Capens0192d152019-03-27 14:46:07 -04001006 // Code generated after this point is unreachable, so any variables
1007 // being read can safely return an undefined value. We have to avoid
1008 // materializing variables after the terminator ret instruction.
1009 Variable::killUnmaterialized();
1010
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001011 ::builder->CreateRetVoid();
John Bauman89401822014-05-06 15:04:28 -04001012 }
1013
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001014 void Nucleus::createRet(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001015 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001016 RR_DEBUG_INFO_UPDATE_LOC();
1017
Nicolas Capens0192d152019-03-27 14:46:07 -04001018 // Code generated after this point is unreachable, so any variables
1019 // being read can safely return an undefined value. We have to avoid
1020 // materializing variables after the terminator ret instruction.
1021 Variable::killUnmaterialized();
1022
Logan Chien191b3052018-08-31 16:57:15 +08001023 ::builder->CreateRet(V(v));
John Bauman89401822014-05-06 15:04:28 -04001024 }
1025
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001026 void Nucleus::createBr(BasicBlock *dest)
John Bauman89401822014-05-06 15:04:28 -04001027 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001028 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens0192d152019-03-27 14:46:07 -04001029 Variable::materializeAll();
1030
Logan Chien191b3052018-08-31 16:57:15 +08001031 ::builder->CreateBr(B(dest));
John Bauman89401822014-05-06 15:04:28 -04001032 }
1033
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001034 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001035 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001036 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens0192d152019-03-27 14:46:07 -04001037 Variable::materializeAll();
Logan Chien191b3052018-08-31 16:57:15 +08001038 ::builder->CreateCondBr(V(cond), B(ifTrue), B(ifFalse));
John Bauman89401822014-05-06 15:04:28 -04001039 }
1040
1041 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1042 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001043 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001044 return V(::builder->CreateAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001045 }
1046
1047 Value *Nucleus::createSub(Value *lhs, Value *rhs)
1048 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001049 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001050 return V(::builder->CreateSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001051 }
1052
1053 Value *Nucleus::createMul(Value *lhs, Value *rhs)
1054 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001055 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001056 return V(::builder->CreateMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001057 }
1058
1059 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1060 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001061 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001062 return V(::builder->CreateUDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001063 }
1064
1065 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1066 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001067 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001068 return V(::builder->CreateSDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001069 }
1070
1071 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1072 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001073 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001074 return V(::builder->CreateFAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001075 }
1076
1077 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1078 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001079 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001080 return V(::builder->CreateFSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001081 }
1082
1083 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1084 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001085 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001086 return V(::builder->CreateFMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001087 }
1088
1089 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1090 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001091 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001092 return V(::builder->CreateFDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001093 }
1094
1095 Value *Nucleus::createURem(Value *lhs, Value *rhs)
1096 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001097 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001098 return V(::builder->CreateURem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001099 }
1100
1101 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1102 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001103 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001104 return V(::builder->CreateSRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001105 }
1106
1107 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1108 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001109 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001110 return V(::builder->CreateFRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001111 }
1112
1113 Value *Nucleus::createShl(Value *lhs, Value *rhs)
1114 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001115 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001116 return V(::builder->CreateShl(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001117 }
1118
1119 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1120 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001121 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001122 return V(::builder->CreateLShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001123 }
1124
1125 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1126 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001127 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001128 return V(::builder->CreateAShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001129 }
1130
1131 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1132 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001133 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001134 return V(::builder->CreateAnd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001135 }
1136
1137 Value *Nucleus::createOr(Value *lhs, Value *rhs)
1138 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001139 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001140 return V(::builder->CreateOr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001141 }
1142
1143 Value *Nucleus::createXor(Value *lhs, Value *rhs)
1144 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001145 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001146 return V(::builder->CreateXor(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001147 }
1148
Nicolas Capens19336542016-09-26 10:32:29 -04001149 Value *Nucleus::createNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001150 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001151 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001152 return V(::builder->CreateNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001153 }
1154
Nicolas Capens19336542016-09-26 10:32:29 -04001155 Value *Nucleus::createFNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001156 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001157 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001158 return V(::builder->CreateFNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001159 }
1160
Nicolas Capens19336542016-09-26 10:32:29 -04001161 Value *Nucleus::createNot(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001162 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001163 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001164 return V(::builder->CreateNot(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001165 }
1166
Nicolas Capens86509d92019-03-21 13:23:50 -04001167 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001168 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001169 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001170 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001171 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001172 case Type_v2i32:
1173 case Type_v4i16:
1174 case Type_v8i8:
1175 case Type_v2f32:
1176 return createBitCast(
1177 createInsertElement(
1178 V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))),
Nicolas Capens86509d92019-03-21 13:23:50 -04001179 createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment, atomic, memoryOrder),
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001180 0),
1181 type);
1182 case Type_v2i16:
1183 case Type_v4i8:
1184 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001185 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001186 Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
Nicolas Capens86509d92019-03-21 13:23:50 -04001187 Value *i = createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001188 i = createZExt(i, Long::getType());
1189 Value *v = createInsertElement(u, i, 0);
1190 return createBitCast(v, type);
Nicolas Capens01a97962017-07-28 17:30:51 -04001191 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001192 // Fallthrough to non-emulated case.
1193 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001194 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001195 ASSERT(V(ptr)->getType()->getContainedType(0) == T(type));
Nicolas Capens86509d92019-03-21 13:23:50 -04001196 auto load = new llvm::LoadInst(V(ptr), "", isVolatile, alignment);
1197 load->setAtomic(atomicOrdering(atomic, memoryOrder));
1198
1199 return V(::builder->Insert(load));
1200 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001201 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001202 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1203 return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001204 }
John Bauman89401822014-05-06 15:04:28 -04001205 }
1206
Nicolas Capens86509d92019-03-21 13:23:50 -04001207 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001208 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001209 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001210 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001211 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001212 case Type_v2i32:
1213 case Type_v4i16:
1214 case Type_v8i8:
1215 case Type_v2f32:
1216 createStore(
1217 createExtractElement(
1218 createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0),
1219 createBitCast(ptr, Pointer<Long>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001220 Long::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001221 return value;
1222 case Type_v2i16:
1223 case Type_v4i8:
1224 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001225 {
Logan Chien191b3052018-08-31 16:57:15 +08001226 createStore(
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001227 createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0),
1228 createBitCast(ptr, Pointer<Int>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001229 Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens01a97962017-07-28 17:30:51 -04001230 return value;
Nicolas Capens01a97962017-07-28 17:30:51 -04001231 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001232 // Fallthrough to non-emulated case.
1233 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001234 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001235 ASSERT(V(ptr)->getType()->getContainedType(0) == T(type));
Nicolas Capens86509d92019-03-21 13:23:50 -04001236 auto store = ::builder->Insert(new llvm::StoreInst(V(value), V(ptr), isVolatile, alignment));
1237 store->setAtomic(atomicOrdering(atomic, memoryOrder));
1238
1239 return value;
1240 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001241 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001242 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1243 return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001244 }
John Bauman89401822014-05-06 15:04:28 -04001245 }
1246
Nicolas Capensd294def2017-01-26 17:44:37 -08001247 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
John Bauman89401822014-05-06 15:04:28 -04001248 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001249 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytoneb50d252019-04-15 13:50:01 -04001250 ASSERT(V(ptr)->getType()->getContainedType(0) == T(type));
Nicolas Capens01a97962017-07-28 17:30:51 -04001251 if(sizeof(void*) == 8)
Nicolas Capensd294def2017-01-26 17:44:37 -08001252 {
Ben Claytonb1243732019-02-27 23:56:18 +00001253 // LLVM manual: "When indexing into an array, pointer or vector,
1254 // integers of any width are allowed, and they are not required to
1255 // be constant. These integers are treated as signed values where
1256 // relevant."
1257 //
1258 // Thus if we want indexes to be treated as unsigned we have to
1259 // zero-extend them ourselves.
1260 //
1261 // Note that this is not because we want to address anywhere near
1262 // 4 GB of data. Instead this is important for performance because
1263 // x86 supports automatic zero-extending of 32-bit registers to
1264 // 64-bit. Thus when indexing into an array using a uint32 is
1265 // actually faster than an int32.
1266 index = unsignedIndex ?
1267 createZExt(index, Long::getType()) :
1268 createSExt(index, Long::getType());
Nicolas Capens01a97962017-07-28 17:30:51 -04001269 }
Ben Claytonb1243732019-02-27 23:56:18 +00001270
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001271 // For non-emulated types we can rely on LLVM's GEP to calculate the
1272 // effective address correctly.
1273 if(asInternalType(type) == Type_LLVM)
Nicolas Capens01a97962017-07-28 17:30:51 -04001274 {
Ben Claytonb1243732019-02-27 23:56:18 +00001275 return V(::builder->CreateGEP(V(ptr), V(index)));
Nicolas Capensd294def2017-01-26 17:44:37 -08001276 }
1277
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001278 // For emulated types we have to multiply the index by the intended
1279 // type size ourselves to obain the byte offset.
Ben Claytonb1243732019-02-27 23:56:18 +00001280 index = (sizeof(void*) == 8) ?
1281 createMul(index, createConstantLong((int64_t)typeSize(type))) :
1282 createMul(index, createConstantInt((int)typeSize(type)));
1283
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001284 // Cast to a byte pointer, apply the byte offset, and cast back to the
1285 // original pointer type.
Logan Chien191b3052018-08-31 16:57:15 +08001286 return createBitCast(
1287 V(::builder->CreateGEP(V(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0)))), V(index))),
1288 T(llvm::PointerType::get(T(type), 0)));
John Bauman89401822014-05-06 15:04:28 -04001289 }
1290
Chris Forbes17813932019-04-18 11:45:54 -07001291 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
John Bauman19bac1e2014-05-06 15:23:49 -04001292 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001293 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbes17813932019-04-18 11:45:54 -07001294 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1295 }
1296
1297 Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1298 {
1299 RR_DEBUG_INFO_UPDATE_LOC();
1300 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::And, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1301 }
1302
1303 Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1304 {
1305 RR_DEBUG_INFO_UPDATE_LOC();
1306 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Or, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1307 }
1308
1309 Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1310 {
1311 RR_DEBUG_INFO_UPDATE_LOC();
1312 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xor, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1313 }
1314
1315 Value *Nucleus::createAtomicMin(Value *ptr, Value *value, std::memory_order memoryOrder)
1316 {
1317 RR_DEBUG_INFO_UPDATE_LOC();
1318 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Min, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1319 }
1320
1321 Value *Nucleus::createAtomicMax(Value *ptr, Value *value, std::memory_order memoryOrder)
1322 {
1323 RR_DEBUG_INFO_UPDATE_LOC();
1324 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Max, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1325 }
1326
1327 Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1328 {
1329 RR_DEBUG_INFO_UPDATE_LOC();
1330 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
John Bauman19bac1e2014-05-06 15:23:49 -04001331 }
1332
Chris Forbesa16238d2019-04-18 16:31:54 -07001333 Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1334 {
1335 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbesc9ca99e2019-04-19 07:53:34 -07001336 // Note: AtomicCmpXchgInstruction returns a 2-member struct containing {result, success-flag}, not the result directly.
Chris Forbesa16238d2019-04-18 16:31:54 -07001337 return V(::builder->CreateExtractValue(
1338 ::builder->CreateAtomicCmpXchg(V(ptr), V(compare), V(value), atomicOrdering(true, memoryOrderEqual), atomicOrdering(true, memoryOrderUnequal)),
1339 llvm::ArrayRef<unsigned>(0u)));
1340 }
1341
Nicolas Capens19336542016-09-26 10:32:29 -04001342 Value *Nucleus::createTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001343 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001344 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001345 return V(::builder->CreateTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001346 }
1347
Nicolas Capens19336542016-09-26 10:32:29 -04001348 Value *Nucleus::createZExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001349 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001350 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001351 return V(::builder->CreateZExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001352 }
1353
Nicolas Capens19336542016-09-26 10:32:29 -04001354 Value *Nucleus::createSExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001355 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001356 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001357 return V(::builder->CreateSExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001358 }
1359
Nicolas Capens19336542016-09-26 10:32:29 -04001360 Value *Nucleus::createFPToSI(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001361 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001362 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001363 return V(::builder->CreateFPToSI(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001364 }
1365
Nicolas Capens19336542016-09-26 10:32:29 -04001366 Value *Nucleus::createSIToFP(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001367 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001368 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001369 return V(::builder->CreateSIToFP(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001370 }
1371
Nicolas Capens19336542016-09-26 10:32:29 -04001372 Value *Nucleus::createFPTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001373 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001374 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001375 return V(::builder->CreateFPTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001376 }
1377
Nicolas Capens19336542016-09-26 10:32:29 -04001378 Value *Nucleus::createFPExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001379 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001380 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001381 return V(::builder->CreateFPExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001382 }
1383
Nicolas Capens19336542016-09-26 10:32:29 -04001384 Value *Nucleus::createBitCast(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001385 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001386 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens01a97962017-07-28 17:30:51 -04001387 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1388 // support for casting between scalars and wide vectors. Emulate them by writing to the stack and
1389 // reading back as the destination type.
Logan Chien191b3052018-08-31 16:57:15 +08001390 if(!V(v)->getType()->isVectorTy() && T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001391 {
1392 Value *readAddress = allocateStackVariable(destType);
Logan Chien191b3052018-08-31 16:57:15 +08001393 Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(V(v)->getType(), 0)));
1394 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001395 return createLoad(readAddress, destType);
1396 }
Logan Chien191b3052018-08-31 16:57:15 +08001397 else if(V(v)->getType()->isVectorTy() && !T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001398 {
Logan Chien191b3052018-08-31 16:57:15 +08001399 Value *writeAddress = allocateStackVariable(T(V(v)->getType()));
1400 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001401 Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
1402 return createLoad(readAddress, destType);
1403 }
1404
Logan Chien191b3052018-08-31 16:57:15 +08001405 return V(::builder->CreateBitCast(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001406 }
1407
John Bauman89401822014-05-06 15:04:28 -04001408 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1409 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001410 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001411 return V(::builder->CreateICmpEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001412 }
1413
1414 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1415 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001416 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001417 return V(::builder->CreateICmpNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001418 }
1419
1420 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1421 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001422 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001423 return V(::builder->CreateICmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001424 }
1425
1426 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1427 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001428 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001429 return V(::builder->CreateICmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001430 }
1431
1432 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1433 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001434 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001435 return V(::builder->CreateICmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001436 }
1437
1438 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1439 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001440 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001441 return V(::builder->CreateICmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001442 }
1443
1444 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1445 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001446 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001447 return V(::builder->CreateICmpSGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001448 }
1449
1450 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1451 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001452 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001453 return V(::builder->CreateICmpSGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001454 }
1455
1456 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1457 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001458 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001459 return V(::builder->CreateICmpSLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001460 }
1461
1462 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1463 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001464 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001465 return V(::builder->CreateICmpSLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001466 }
1467
1468 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1469 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001470 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001471 return V(::builder->CreateFCmpOEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001472 }
1473
1474 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1475 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001476 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001477 return V(::builder->CreateFCmpOGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001478 }
1479
1480 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1481 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001482 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001483 return V(::builder->CreateFCmpOGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001484 }
1485
1486 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1487 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001488 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001489 return V(::builder->CreateFCmpOLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001490 }
1491
1492 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1493 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001494 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001495 return V(::builder->CreateFCmpOLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001496 }
1497
1498 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1499 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001500 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001501 return V(::builder->CreateFCmpONE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001502 }
1503
1504 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1505 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001506 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001507 return V(::builder->CreateFCmpORD(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001508 }
1509
1510 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1511 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001512 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001513 return V(::builder->CreateFCmpUNO(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001514 }
1515
1516 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1517 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001518 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001519 return V(::builder->CreateFCmpUEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001520 }
1521
1522 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1523 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001524 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001525 return V(::builder->CreateFCmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001526 }
1527
1528 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1529 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001530 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001531 return V(::builder->CreateFCmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001532 }
1533
1534 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1535 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001536 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001537 return V(::builder->CreateFCmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001538 }
1539
1540 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1541 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001542 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001543 return V(::builder->CreateFCmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001544 }
1545
1546 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1547 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001548 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton71008d82019-03-05 17:17:59 +00001549 return V(::builder->CreateFCmpUNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001550 }
1551
Nicolas Capense95d5342016-09-30 11:37:28 -04001552 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
John Bauman89401822014-05-06 15:04:28 -04001553 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001554 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytoneb50d252019-04-15 13:50:01 -04001555 ASSERT(V(vector)->getType()->getContainedType(0) == T(type));
Logan Chien191b3052018-08-31 16:57:15 +08001556 return V(::builder->CreateExtractElement(V(vector), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04001557 }
1558
1559 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1560 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001561 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001562 return V(::builder->CreateInsertElement(V(vector), V(element), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04001563 }
1564
Logan Chien191b3052018-08-31 16:57:15 +08001565 Value *Nucleus::createShuffleVector(Value *v1, Value *v2, const int *select)
John Bauman89401822014-05-06 15:04:28 -04001566 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001567 RR_DEBUG_INFO_UPDATE_LOC();
1568
Logan Chien191b3052018-08-31 16:57:15 +08001569 int size = llvm::cast<llvm::VectorType>(V(v1)->getType())->getNumElements();
Nicolas Capense89cd582016-09-30 14:23:47 -04001570 const int maxSize = 16;
1571 llvm::Constant *swizzle[maxSize];
Ben Claytoneb50d252019-04-15 13:50:01 -04001572 ASSERT(size <= maxSize);
Nicolas Capense89cd582016-09-30 14:23:47 -04001573
1574 for(int i = 0; i < size; i++)
1575 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001576 swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]);
Nicolas Capense89cd582016-09-30 14:23:47 -04001577 }
1578
1579 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
1580
Logan Chien191b3052018-08-31 16:57:15 +08001581 return V(::builder->CreateShuffleVector(V(v1), V(v2), shuffle));
John Bauman89401822014-05-06 15:04:28 -04001582 }
1583
Logan Chien191b3052018-08-31 16:57:15 +08001584 Value *Nucleus::createSelect(Value *c, Value *ifTrue, Value *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001585 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001586 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001587 return V(::builder->CreateSelect(V(c), V(ifTrue), V(ifFalse)));
John Bauman89401822014-05-06 15:04:28 -04001588 }
1589
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001590 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
John Bauman89401822014-05-06 15:04:28 -04001591 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001592 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001593 return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(V(control), B(defaultBranch), numCases));
John Bauman89401822014-05-06 15:04:28 -04001594 }
1595
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001596 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
John Bauman89401822014-05-06 15:04:28 -04001597 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001598 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001599 llvm::SwitchInst *sw = reinterpret_cast<llvm::SwitchInst *>(switchCases);
1600 sw->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), B(branch));
John Bauman89401822014-05-06 15:04:28 -04001601 }
1602
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001603 void Nucleus::createUnreachable()
John Bauman89401822014-05-06 15:04:28 -04001604 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001605 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001606 ::builder->CreateUnreachable();
John Bauman89401822014-05-06 15:04:28 -04001607 }
1608
Nicolas Capensac230122016-09-20 14:30:06 -04001609 Type *Nucleus::getPointerType(Type *ElementType)
John Bauman89401822014-05-06 15:04:28 -04001610 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001611 return T(llvm::PointerType::get(T(ElementType), 0));
John Bauman89401822014-05-06 15:04:28 -04001612 }
1613
Nicolas Capens13ac2322016-10-13 14:52:12 -04001614 Value *Nucleus::createNullValue(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04001615 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001616 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001617 return V(llvm::Constant::getNullValue(T(Ty)));
John Bauman89401822014-05-06 15:04:28 -04001618 }
1619
Nicolas Capens13ac2322016-10-13 14:52:12 -04001620 Value *Nucleus::createConstantLong(int64_t i)
John Bauman89401822014-05-06 15:04:28 -04001621 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001622 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001623 return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001624 }
1625
Nicolas Capens13ac2322016-10-13 14:52:12 -04001626 Value *Nucleus::createConstantInt(int i)
John Bauman89401822014-05-06 15:04:28 -04001627 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001628 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001629 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001630 }
1631
Nicolas Capens13ac2322016-10-13 14:52:12 -04001632 Value *Nucleus::createConstantInt(unsigned int i)
John Bauman89401822014-05-06 15:04:28 -04001633 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001634 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001635 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001636 }
1637
Nicolas Capens13ac2322016-10-13 14:52:12 -04001638 Value *Nucleus::createConstantBool(bool b)
John Bauman89401822014-05-06 15:04:28 -04001639 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001640 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001641 return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b));
John Bauman89401822014-05-06 15:04:28 -04001642 }
1643
Nicolas Capens13ac2322016-10-13 14:52:12 -04001644 Value *Nucleus::createConstantByte(signed char i)
John Bauman89401822014-05-06 15:04:28 -04001645 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001646 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001647 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001648 }
1649
Nicolas Capens13ac2322016-10-13 14:52:12 -04001650 Value *Nucleus::createConstantByte(unsigned char i)
John Bauman89401822014-05-06 15:04:28 -04001651 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001652 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001653 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001654 }
1655
Nicolas Capens13ac2322016-10-13 14:52:12 -04001656 Value *Nucleus::createConstantShort(short i)
John Bauman89401822014-05-06 15:04:28 -04001657 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001658 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001659 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001660 }
1661
Nicolas Capens13ac2322016-10-13 14:52:12 -04001662 Value *Nucleus::createConstantShort(unsigned short i)
John Bauman89401822014-05-06 15:04:28 -04001663 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001664 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001665 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001666 }
1667
Nicolas Capens13ac2322016-10-13 14:52:12 -04001668 Value *Nucleus::createConstantFloat(float x)
John Bauman89401822014-05-06 15:04:28 -04001669 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001670 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001671 return V(llvm::ConstantFP::get(T(Float::getType()), x));
John Bauman89401822014-05-06 15:04:28 -04001672 }
1673
Nicolas Capens13ac2322016-10-13 14:52:12 -04001674 Value *Nucleus::createNullPointer(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04001675 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001676 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001677 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0)));
John Bauman89401822014-05-06 15:04:28 -04001678 }
1679
Nicolas Capens13ac2322016-10-13 14:52:12 -04001680 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
John Bauman89401822014-05-06 15:04:28 -04001681 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001682 ASSERT(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04001683 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1684 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
Ben Claytoneb50d252019-04-15 13:50:01 -04001685 ASSERT(numElements <= 16 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001686 llvm::Constant *constantVector[16];
1687
Nicolas Capens69674fb2017-09-01 11:08:44 -04001688 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04001689 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04001690 constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001691 }
1692
Nicolas Capens69674fb2017-09-01 11:08:44 -04001693 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
Nicolas Capens13ac2322016-10-13 14:52:12 -04001694 }
1695
1696 Value *Nucleus::createConstantVector(const double *constants, Type *type)
1697 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001698 ASSERT(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04001699 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1700 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
Ben Claytoneb50d252019-04-15 13:50:01 -04001701 ASSERT(numElements <= 8 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001702 llvm::Constant *constantVector[8];
1703
Nicolas Capens69674fb2017-09-01 11:08:44 -04001704 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04001705 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04001706 constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001707 }
1708
Nicolas Capens69674fb2017-09-01 11:08:44 -04001709 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
John Bauman89401822014-05-06 15:04:28 -04001710 }
1711
John Bauman19bac1e2014-05-06 15:23:49 -04001712 Type *Void::getType()
John Bauman89401822014-05-06 15:04:28 -04001713 {
Nicolas Capensac230122016-09-20 14:30:06 -04001714 return T(llvm::Type::getVoidTy(*::context));
John Bauman89401822014-05-06 15:04:28 -04001715 }
1716
John Bauman19bac1e2014-05-06 15:23:49 -04001717 Type *Bool::getType()
John Bauman89401822014-05-06 15:04:28 -04001718 {
Nicolas Capensac230122016-09-20 14:30:06 -04001719 return T(llvm::Type::getInt1Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001720 }
1721
John Bauman19bac1e2014-05-06 15:23:49 -04001722 Type *Byte::getType()
John Bauman89401822014-05-06 15:04:28 -04001723 {
Nicolas Capensac230122016-09-20 14:30:06 -04001724 return T(llvm::Type::getInt8Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001725 }
1726
John Bauman19bac1e2014-05-06 15:23:49 -04001727 Type *SByte::getType()
John Bauman89401822014-05-06 15:04:28 -04001728 {
Nicolas Capensac230122016-09-20 14:30:06 -04001729 return T(llvm::Type::getInt8Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001730 }
1731
John Bauman19bac1e2014-05-06 15:23:49 -04001732 Type *Short::getType()
John Bauman89401822014-05-06 15:04:28 -04001733 {
Nicolas Capensac230122016-09-20 14:30:06 -04001734 return T(llvm::Type::getInt16Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001735 }
1736
John Bauman19bac1e2014-05-06 15:23:49 -04001737 Type *UShort::getType()
John Bauman89401822014-05-06 15:04:28 -04001738 {
Nicolas Capensac230122016-09-20 14:30:06 -04001739 return T(llvm::Type::getInt16Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001740 }
1741
John Bauman19bac1e2014-05-06 15:23:49 -04001742 Type *Byte4::getType()
John Bauman89401822014-05-06 15:04:28 -04001743 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001744 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04001745 }
1746
John Bauman19bac1e2014-05-06 15:23:49 -04001747 Type *SByte4::getType()
John Bauman89401822014-05-06 15:04:28 -04001748 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001749 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04001750 }
1751
John Bauman19bac1e2014-05-06 15:23:49 -04001752 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001753 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001754 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001755#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001756 return x86::paddusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001757#else
1758 return As<Byte8>(V(lowerPUADDSAT(V(x.value), V(y.value))));
1759#endif
John Bauman89401822014-05-06 15:04:28 -04001760 }
John Bauman66b8ab22014-05-06 15:57:45 -04001761
John Bauman19bac1e2014-05-06 15:23:49 -04001762 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001763 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001764 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001765#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001766 return x86::psubusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001767#else
1768 return As<Byte8>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
1769#endif
John Bauman89401822014-05-06 15:04:28 -04001770 }
1771
John Bauman19bac1e2014-05-06 15:23:49 -04001772 RValue<Int> SignMask(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04001773 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001774 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001775#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001776 return x86::pmovmskb(x);
Logan Chiene3191012018-08-24 22:01:50 +08001777#else
1778 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
1779#endif
John Bauman89401822014-05-06 15:04:28 -04001780 }
1781
John Bauman19bac1e2014-05-06 15:23:49 -04001782// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001783// {
Logan Chiene3191012018-08-24 22:01:50 +08001784//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001785// return x86::pcmpgtb(x, y); // FIXME: Signedness
Logan Chiene3191012018-08-24 22:01:50 +08001786//#else
1787// return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
1788//#endif
John Bauman89401822014-05-06 15:04:28 -04001789// }
John Bauman66b8ab22014-05-06 15:57:45 -04001790
John Bauman19bac1e2014-05-06 15:23:49 -04001791 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001792 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001793 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001794#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001795 return x86::pcmpeqb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001796#else
1797 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
1798#endif
John Bauman89401822014-05-06 15:04:28 -04001799 }
1800
John Bauman19bac1e2014-05-06 15:23:49 -04001801 Type *Byte8::getType()
John Bauman89401822014-05-06 15:04:28 -04001802 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001803 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04001804 }
1805
John Bauman19bac1e2014-05-06 15:23:49 -04001806 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04001807 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001808 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001809#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001810 return x86::paddsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001811#else
1812 return As<SByte8>(V(lowerPSADDSAT(V(x.value), V(y.value))));
1813#endif
John Bauman89401822014-05-06 15:04:28 -04001814 }
John Bauman66b8ab22014-05-06 15:57:45 -04001815
John Bauman19bac1e2014-05-06 15:23:49 -04001816 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04001817 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001818 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001819#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001820 return x86::psubsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001821#else
1822 return As<SByte8>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
1823#endif
John Bauman89401822014-05-06 15:04:28 -04001824 }
1825
John Bauman19bac1e2014-05-06 15:23:49 -04001826 RValue<Int> SignMask(RValue<SByte8> x)
John Bauman89401822014-05-06 15:04:28 -04001827 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001828 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001829#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001830 return x86::pmovmskb(As<Byte8>(x));
Logan Chiene3191012018-08-24 22:01:50 +08001831#else
1832 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
1833#endif
John Bauman89401822014-05-06 15:04:28 -04001834 }
1835
John Bauman19bac1e2014-05-06 15:23:49 -04001836 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04001837 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001838 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001839#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001840 return x86::pcmpgtb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001841#else
1842 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
1843#endif
John Bauman89401822014-05-06 15:04:28 -04001844 }
John Bauman66b8ab22014-05-06 15:57:45 -04001845
John Bauman19bac1e2014-05-06 15:23:49 -04001846 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04001847 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001848 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001849#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001850 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
Logan Chiene3191012018-08-24 22:01:50 +08001851#else
1852 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
1853#endif
John Bauman89401822014-05-06 15:04:28 -04001854 }
1855
John Bauman19bac1e2014-05-06 15:23:49 -04001856 Type *SByte8::getType()
John Bauman89401822014-05-06 15:04:28 -04001857 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001858 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04001859 }
1860
John Bauman19bac1e2014-05-06 15:23:49 -04001861 Type *Byte16::getType()
John Bauman89401822014-05-06 15:04:28 -04001862 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001863 return T(llvm::VectorType::get(T(Byte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04001864 }
1865
John Bauman19bac1e2014-05-06 15:23:49 -04001866 Type *SByte16::getType()
John Bauman89401822014-05-06 15:04:28 -04001867 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001868 return T(llvm::VectorType::get(T(SByte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04001869 }
1870
Nicolas Capens16b5f152016-10-13 13:39:01 -04001871 Type *Short2::getType()
1872 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001873 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04001874 }
1875
Nicolas Capens16b5f152016-10-13 13:39:01 -04001876 Type *UShort2::getType()
1877 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001878 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04001879 }
1880
John Bauman19bac1e2014-05-06 15:23:49 -04001881 Short4::Short4(RValue<Int4> cast)
John Bauman89401822014-05-06 15:04:28 -04001882 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001883 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens01a97962017-07-28 17:30:51 -04001884 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
John Bauman89401822014-05-06 15:04:28 -04001885 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
1886
Nicolas Capens01a97962017-07-28 17:30:51 -04001887 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
1888 Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
John Bauman89401822014-05-06 15:04:28 -04001889
John Bauman66b8ab22014-05-06 15:57:45 -04001890 storeValue(short4);
John Bauman89401822014-05-06 15:04:28 -04001891 }
1892
John Bauman19bac1e2014-05-06 15:23:49 -04001893// Short4::Short4(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04001894// {
1895// }
1896
John Bauman19bac1e2014-05-06 15:23:49 -04001897 Short4::Short4(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04001898 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001899 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04001900 Int4 v4i32 = Int4(cast);
Logan Chiene3191012018-08-24 22:01:50 +08001901#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001902 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
Logan Chiene3191012018-08-24 22:01:50 +08001903#else
1904 Value *v = v4i32.loadValue();
1905 v4i32 = As<Int4>(V(lowerPack(V(v), V(v), true)));
1906#endif
John Bauman66b8ab22014-05-06 15:57:45 -04001907
1908 storeValue(As<Short4>(Int2(v4i32)).value);
John Bauman89401822014-05-06 15:04:28 -04001909 }
1910
John Bauman19bac1e2014-05-06 15:23:49 -04001911 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04001912 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001913 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001914#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001915 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
1916
1917 return x86::psllw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08001918#else
1919 return As<Short4>(V(lowerVectorShl(V(lhs.value), rhs)));
1920#endif
John Bauman89401822014-05-06 15:04:28 -04001921 }
1922
John Bauman19bac1e2014-05-06 15:23:49 -04001923 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04001924 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001925 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001926#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001927 return x86::psraw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08001928#else
1929 return As<Short4>(V(lowerVectorAShr(V(lhs.value), rhs)));
1930#endif
John Bauman89401822014-05-06 15:04:28 -04001931 }
1932
John Bauman19bac1e2014-05-06 15:23:49 -04001933 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001934 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001935 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001936#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001937 return x86::pmaxsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001938#else
1939 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
1940#endif
John Bauman89401822014-05-06 15:04:28 -04001941 }
1942
John Bauman19bac1e2014-05-06 15:23:49 -04001943 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001944 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001945 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001946#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001947 return x86::pminsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001948#else
1949 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
1950#endif
John Bauman89401822014-05-06 15:04:28 -04001951 }
1952
John Bauman19bac1e2014-05-06 15:23:49 -04001953 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001954 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001955 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001956#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001957 return x86::paddsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001958#else
1959 return As<Short4>(V(lowerPSADDSAT(V(x.value), V(y.value))));
1960#endif
John Bauman89401822014-05-06 15:04:28 -04001961 }
1962
John Bauman19bac1e2014-05-06 15:23:49 -04001963 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001964 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001965 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001966#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001967 return x86::psubsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001968#else
1969 return As<Short4>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
1970#endif
John Bauman89401822014-05-06 15:04:28 -04001971 }
1972
John Bauman19bac1e2014-05-06 15:23:49 -04001973 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001974 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001975 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001976#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001977 return x86::pmulhw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001978#else
1979 return As<Short4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
1980#endif
John Bauman89401822014-05-06 15:04:28 -04001981 }
1982
John Bauman19bac1e2014-05-06 15:23:49 -04001983 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001984 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001985 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001986#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001987 return x86::pmaddwd(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001988#else
1989 return As<Int2>(V(lowerMulAdd(V(x.value), V(y.value))));
1990#endif
John Bauman89401822014-05-06 15:04:28 -04001991 }
1992
Nicolas Capens33438a62017-09-27 11:47:35 -04001993 RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001994 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001995 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001996#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04001997 auto result = x86::packsswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001998#else
1999 auto result = V(lowerPack(V(x.value), V(y.value), true));
2000#endif
Nicolas Capens01a97962017-07-28 17:30:51 -04002001 return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
John Bauman89401822014-05-06 15:04:28 -04002002 }
2003
Nicolas Capens33438a62017-09-27 11:47:35 -04002004 RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2005 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002006 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002007#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04002008 auto result = x86::packuswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002009#else
2010 auto result = V(lowerPack(V(x.value), V(y.value), false));
2011#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04002012 return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
2013 }
2014
John Bauman19bac1e2014-05-06 15:23:49 -04002015 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002016 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002017 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002018#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002019 return x86::pcmpgtw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002020#else
2021 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
2022#endif
John Bauman89401822014-05-06 15:04:28 -04002023 }
2024
John Bauman19bac1e2014-05-06 15:23:49 -04002025 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002026 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002027 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002028#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002029 return x86::pcmpeqw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002030#else
2031 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
2032#endif
John Bauman89401822014-05-06 15:04:28 -04002033 }
2034
John Bauman19bac1e2014-05-06 15:23:49 -04002035 Type *Short4::getType()
John Bauman89401822014-05-06 15:04:28 -04002036 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002037 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002038 }
2039
John Bauman19bac1e2014-05-06 15:23:49 -04002040 UShort4::UShort4(RValue<Float4> cast, bool saturate)
John Bauman89401822014-05-06 15:04:28 -04002041 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002042 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002043 if(saturate)
2044 {
Logan Chiena8385ed2018-09-26 19:22:54 +08002045#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002046 if(CPUID::supportsSSE4_1())
2047 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002048 Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation
Nicolas Capens33438a62017-09-27 11:47:35 -04002049 *this = As<Short4>(PackUnsigned(int4, int4));
John Bauman89401822014-05-06 15:04:28 -04002050 }
2051 else
Logan Chiena8385ed2018-09-26 19:22:54 +08002052#endif
John Bauman89401822014-05-06 15:04:28 -04002053 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002054 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
John Bauman89401822014-05-06 15:04:28 -04002055 }
2056 }
2057 else
2058 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002059 *this = Short4(Int4(cast));
John Bauman89401822014-05-06 15:04:28 -04002060 }
2061 }
2062
John Bauman19bac1e2014-05-06 15:23:49 -04002063 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002064 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002065 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002066#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002067 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2068
2069 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002070#else
2071 return As<UShort4>(V(lowerVectorShl(V(lhs.value), rhs)));
2072#endif
John Bauman89401822014-05-06 15:04:28 -04002073 }
2074
John Bauman19bac1e2014-05-06 15:23:49 -04002075 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002076 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002077 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002078#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002079 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
2080
2081 return x86::psrlw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002082#else
2083 return As<UShort4>(V(lowerVectorLShr(V(lhs.value), rhs)));
2084#endif
John Bauman89401822014-05-06 15:04:28 -04002085 }
2086
John Bauman19bac1e2014-05-06 15:23:49 -04002087 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002088 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002089 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04002090 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002091 }
2092
John Bauman19bac1e2014-05-06 15:23:49 -04002093 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002094 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002095 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04002096 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002097 }
2098
John Bauman19bac1e2014-05-06 15:23:49 -04002099 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002100 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002101 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002102#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002103 return x86::paddusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002104#else
2105 return As<UShort4>(V(lowerPUADDSAT(V(x.value), V(y.value))));
2106#endif
John Bauman89401822014-05-06 15:04:28 -04002107 }
2108
John Bauman19bac1e2014-05-06 15:23:49 -04002109 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002110 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002111 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002112#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002113 return x86::psubusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002114#else
2115 return As<UShort4>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
2116#endif
John Bauman89401822014-05-06 15:04:28 -04002117 }
2118
John Bauman19bac1e2014-05-06 15:23:49 -04002119 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002120 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002121 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002122#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002123 return x86::pmulhuw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002124#else
2125 return As<UShort4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2126#endif
John Bauman89401822014-05-06 15:04:28 -04002127 }
2128
John Bauman19bac1e2014-05-06 15:23:49 -04002129 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002130 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002131 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002132#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002133 return x86::pavgw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002134#else
2135 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
2136#endif
John Bauman89401822014-05-06 15:04:28 -04002137 }
2138
John Bauman19bac1e2014-05-06 15:23:49 -04002139 Type *UShort4::getType()
John Bauman89401822014-05-06 15:04:28 -04002140 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002141 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002142 }
2143
John Bauman19bac1e2014-05-06 15:23:49 -04002144 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002145 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002146 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002147#if defined(__i386__) || defined(__x86_64__)
2148 return x86::psllw(lhs, rhs);
2149#else
2150 return As<Short8>(V(lowerVectorShl(V(lhs.value), rhs)));
2151#endif
John Bauman89401822014-05-06 15:04:28 -04002152 }
2153
John Bauman19bac1e2014-05-06 15:23:49 -04002154 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002155 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002156 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002157#if defined(__i386__) || defined(__x86_64__)
2158 return x86::psraw(lhs, rhs);
2159#else
2160 return As<Short8>(V(lowerVectorAShr(V(lhs.value), rhs)));
2161#endif
John Bauman89401822014-05-06 15:04:28 -04002162 }
2163
John Bauman19bac1e2014-05-06 15:23:49 -04002164 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002165 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002166 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002167#if defined(__i386__) || defined(__x86_64__)
2168 return x86::pmaddwd(x, y);
2169#else
2170 return As<Int4>(V(lowerMulAdd(V(x.value), V(y.value))));
2171#endif
John Bauman89401822014-05-06 15:04:28 -04002172 }
2173
John Bauman19bac1e2014-05-06 15:23:49 -04002174 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002175 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002176 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002177#if defined(__i386__) || defined(__x86_64__)
2178 return x86::pmulhw(x, y);
2179#else
2180 return As<Short8>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2181#endif
John Bauman89401822014-05-06 15:04:28 -04002182 }
2183
John Bauman19bac1e2014-05-06 15:23:49 -04002184 Type *Short8::getType()
John Bauman89401822014-05-06 15:04:28 -04002185 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002186 return T(llvm::VectorType::get(T(Short::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002187 }
2188
John Bauman19bac1e2014-05-06 15:23:49 -04002189 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002190 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002191 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002192#if defined(__i386__) || defined(__x86_64__)
2193 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));
2194#else
2195 return As<UShort8>(V(lowerVectorShl(V(lhs.value), rhs)));
2196#endif
John Bauman89401822014-05-06 15:04:28 -04002197 }
2198
John Bauman19bac1e2014-05-06 15:23:49 -04002199 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002200 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002201 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002202#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002203 return x86::psrlw(lhs, rhs); // FIXME: Fallback required
Logan Chiene3191012018-08-24 22:01:50 +08002204#else
2205 return As<UShort8>(V(lowerVectorLShr(V(lhs.value), rhs)));
2206#endif
John Bauman89401822014-05-06 15:04:28 -04002207 }
2208
John Bauman19bac1e2014-05-06 15:23:49 -04002209 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
John Bauman89401822014-05-06 15:04:28 -04002210 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002211 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capense89cd582016-09-30 14:23:47 -04002212 int pshufb[16] =
2213 {
2214 select0 + 0,
2215 select0 + 1,
2216 select1 + 0,
2217 select1 + 1,
2218 select2 + 0,
2219 select2 + 1,
2220 select3 + 0,
2221 select3 + 1,
2222 select4 + 0,
2223 select4 + 1,
2224 select5 + 0,
2225 select5 + 1,
2226 select6 + 0,
2227 select6 + 1,
2228 select7 + 0,
2229 select7 + 1,
2230 };
John Bauman89401822014-05-06 15:04:28 -04002231
2232 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
Nicolas Capense89cd582016-09-30 14:23:47 -04002233 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
John Bauman89401822014-05-06 15:04:28 -04002234 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
2235
2236 return RValue<UShort8>(short8);
2237 }
2238
John Bauman19bac1e2014-05-06 15:23:49 -04002239 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04002240 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002241 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002242#if defined(__i386__) || defined(__x86_64__)
2243 return x86::pmulhuw(x, y);
2244#else
2245 return As<UShort8>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2246#endif
John Bauman89401822014-05-06 15:04:28 -04002247 }
2248
John Bauman19bac1e2014-05-06 15:23:49 -04002249 Type *UShort8::getType()
John Bauman89401822014-05-06 15:04:28 -04002250 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002251 return T(llvm::VectorType::get(T(UShort::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002252 }
2253
Nicolas Capens96d4e092016-11-18 14:22:38 -05002254 RValue<Int> operator++(Int &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002255 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002256 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002257 RValue<Int> res = val;
2258
Logan Chien191b3052018-08-31 16:57:15 +08002259 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002260 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002261
2262 return res;
2263 }
2264
Nicolas Capens96d4e092016-11-18 14:22:38 -05002265 const Int &operator++(Int &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002266 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002267 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002268 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002269 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002270
2271 return val;
2272 }
2273
Nicolas Capens96d4e092016-11-18 14:22:38 -05002274 RValue<Int> operator--(Int &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002275 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002276 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002277 RValue<Int> res = val;
2278
Logan Chien191b3052018-08-31 16:57:15 +08002279 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002280 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002281
2282 return res;
2283 }
2284
Nicolas Capens96d4e092016-11-18 14:22:38 -05002285 const Int &operator--(Int &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002286 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002287 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002288 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002289 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002290
2291 return val;
2292 }
2293
John Bauman19bac1e2014-05-06 15:23:49 -04002294 RValue<Int> RoundInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002295 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002296 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002297#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002298 return x86::cvtss2si(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002299#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002300 return RValue<Int>(V(lowerRoundInt(V(cast.value), T(Int::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002301#endif
John Bauman89401822014-05-06 15:04:28 -04002302 }
2303
John Bauman19bac1e2014-05-06 15:23:49 -04002304 Type *Int::getType()
John Bauman89401822014-05-06 15:04:28 -04002305 {
Nicolas Capensac230122016-09-20 14:30:06 -04002306 return T(llvm::Type::getInt32Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002307 }
2308
John Bauman19bac1e2014-05-06 15:23:49 -04002309 Type *Long::getType()
John Bauman89401822014-05-06 15:04:28 -04002310 {
Nicolas Capensac230122016-09-20 14:30:06 -04002311 return T(llvm::Type::getInt64Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002312 }
2313
John Bauman19bac1e2014-05-06 15:23:49 -04002314 UInt::UInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002315 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002316 RR_DEBUG_INFO_UPDATE_LOC();
Alexis Hetu764d1422016-09-28 08:44:22 -04002317 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2318 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
John Bauman89401822014-05-06 15:04:28 -04002319
Alexis Hetu764d1422016-09-28 08:44:22 -04002320 // Smallest positive value representable in UInt, but not in Int
2321 const unsigned int ustart = 0x80000000u;
2322 const float ustartf = float(ustart);
2323
2324 // If the value is negative, store 0, otherwise store the result of the conversion
2325 storeValue((~(As<Int>(cast) >> 31) &
2326 // Check if the value can be represented as an Int
2327 IfThenElse(cast >= ustartf,
2328 // If the value is too large, subtract ustart and re-add it after conversion.
2329 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
2330 // Otherwise, just convert normally
2331 Int(cast))).value);
John Bauman89401822014-05-06 15:04:28 -04002332 }
2333
Nicolas Capens96d4e092016-11-18 14:22:38 -05002334 RValue<UInt> operator++(UInt &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002335 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002336 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002337 RValue<UInt> res = val;
2338
Logan Chien191b3052018-08-31 16:57:15 +08002339 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002340 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002341
2342 return res;
2343 }
2344
Nicolas Capens96d4e092016-11-18 14:22:38 -05002345 const UInt &operator++(UInt &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002346 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002347 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002348 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002349 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002350
2351 return val;
2352 }
2353
Nicolas Capens96d4e092016-11-18 14:22:38 -05002354 RValue<UInt> operator--(UInt &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002355 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002356 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002357 RValue<UInt> res = val;
2358
Logan Chien191b3052018-08-31 16:57:15 +08002359 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002360 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002361
2362 return res;
2363 }
2364
Nicolas Capens96d4e092016-11-18 14:22:38 -05002365 const UInt &operator--(UInt &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002366 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002367 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002368 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002369 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002370
2371 return val;
2372 }
2373
John Bauman19bac1e2014-05-06 15:23:49 -04002374// RValue<UInt> RoundUInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002375// {
Logan Chiene3191012018-08-24 22:01:50 +08002376//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002377// return x86::cvtss2si(val); // FIXME: Unsigned
Logan Chiene3191012018-08-24 22:01:50 +08002378//#else
2379// return IfThenElse(cast > 0.0f, Int(cast + 0.5f), Int(cast - 0.5f));
2380//#endif
John Bauman89401822014-05-06 15:04:28 -04002381// }
2382
John Bauman19bac1e2014-05-06 15:23:49 -04002383 Type *UInt::getType()
John Bauman89401822014-05-06 15:04:28 -04002384 {
Nicolas Capensac230122016-09-20 14:30:06 -04002385 return T(llvm::Type::getInt32Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002386 }
2387
John Bauman19bac1e2014-05-06 15:23:49 -04002388// Int2::Int2(RValue<Int> cast)
2389// {
John Bauman19bac1e2014-05-06 15:23:49 -04002390// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2391// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
John Bauman66b8ab22014-05-06 15:57:45 -04002392//
Nicolas Capense89cd582016-09-30 14:23:47 -04002393// int shuffle[2] = {0, 0};
2394// Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
John Bauman19bac1e2014-05-06 15:23:49 -04002395//
John Bauman66b8ab22014-05-06 15:57:45 -04002396// storeValue(replicate);
John Bauman19bac1e2014-05-06 15:23:49 -04002397// }
John Bauman89401822014-05-06 15:04:28 -04002398
John Bauman19bac1e2014-05-06 15:23:49 -04002399 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002400 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002401 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002402#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002403 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
2404
2405 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002406#else
2407 return As<Int2>(V(lowerVectorShl(V(lhs.value), rhs)));
2408#endif
John Bauman89401822014-05-06 15:04:28 -04002409 }
2410
John Bauman19bac1e2014-05-06 15:23:49 -04002411 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002412 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002413 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002414#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002415 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
2416
2417 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002418#else
2419 return As<Int2>(V(lowerVectorAShr(V(lhs.value), rhs)));
2420#endif
John Bauman89401822014-05-06 15:04:28 -04002421 }
2422
John Bauman19bac1e2014-05-06 15:23:49 -04002423 Type *Int2::getType()
John Bauman89401822014-05-06 15:04:28 -04002424 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002425 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002426 }
2427
John Bauman19bac1e2014-05-06 15:23:49 -04002428 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002429 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002430 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002431#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002432 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
2433
2434 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002435#else
2436 return As<UInt2>(V(lowerVectorShl(V(lhs.value), rhs)));
2437#endif
John Bauman89401822014-05-06 15:04:28 -04002438 }
2439
John Bauman19bac1e2014-05-06 15:23:49 -04002440 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002441 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002442 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002443#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002444 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
2445
2446 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002447#else
2448 return As<UInt2>(V(lowerVectorLShr(V(lhs.value), rhs)));
2449#endif
John Bauman89401822014-05-06 15:04:28 -04002450 }
2451
John Bauman19bac1e2014-05-06 15:23:49 -04002452 Type *UInt2::getType()
John Bauman89401822014-05-06 15:04:28 -04002453 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002454 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002455 }
2456
Nicolas Capenscb986762017-01-20 11:34:37 -05002457 Int4::Int4(RValue<Byte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002458 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002459 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002460#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002461 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002462 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002463 *this = x86::pmovzxbd(As<Byte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002464 }
2465 else
Logan Chiene3191012018-08-24 22:01:50 +08002466#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002467 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002468 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
Nicolas Capens01a97962017-07-28 17:30:51 -04002469 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
Logan Chien191b3052018-08-31 16:57:15 +08002470 Value *b = Nucleus::createShuffleVector(a, Nucleus::createNullValue(Byte16::getType()), swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002471
Nicolas Capense89cd582016-09-30 14:23:47 -04002472 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04002473 Value *c = Nucleus::createBitCast(b, Short8::getType());
Logan Chien191b3052018-08-31 16:57:15 +08002474 Value *d = Nucleus::createShuffleVector(c, Nucleus::createNullValue(Short8::getType()), swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002475
Nicolas Capens01a97962017-07-28 17:30:51 -04002476 *this = As<Int4>(d);
2477 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002478 }
2479
Nicolas Capenscb986762017-01-20 11:34:37 -05002480 Int4::Int4(RValue<SByte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002481 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002482 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002483#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002484 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002485 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002486 *this = x86::pmovsxbd(As<SByte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002487 }
2488 else
Logan Chiene3191012018-08-24 22:01:50 +08002489#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002490 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002491 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
2492 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
2493 Value *b = Nucleus::createShuffleVector(a, a, swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002494
Nicolas Capense89cd582016-09-30 14:23:47 -04002495 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04002496 Value *c = Nucleus::createBitCast(b, Short8::getType());
2497 Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002498
Nicolas Capens01a97962017-07-28 17:30:51 -04002499 *this = As<Int4>(d) >> 24;
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002500 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002501 }
2502
Nicolas Capenscb986762017-01-20 11:34:37 -05002503 Int4::Int4(RValue<Short4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002504 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002505 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002506#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002507 if(CPUID::supportsSSE4_1())
2508 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002509 *this = x86::pmovsxwd(As<Short8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002510 }
2511 else
Logan Chiene3191012018-08-24 22:01:50 +08002512#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002513 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002514 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04002515 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
2516 *this = As<Int4>(c) >> 16;
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002517 }
2518 }
2519
Nicolas Capenscb986762017-01-20 11:34:37 -05002520 Int4::Int4(RValue<UShort4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002521 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002522 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002523#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002524 if(CPUID::supportsSSE4_1())
2525 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002526 *this = x86::pmovzxwd(As<UShort8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002527 }
2528 else
Logan Chiene3191012018-08-24 22:01:50 +08002529#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002530 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002531 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04002532 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
2533 *this = As<Int4>(c);
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002534 }
2535 }
2536
Nicolas Capenscb986762017-01-20 11:34:37 -05002537 Int4::Int4(RValue<Int> rhs) : XYZW(this)
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002538 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002539 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002540 Value *vector = loadValue();
2541 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
2542
Nicolas Capense89cd582016-09-30 14:23:47 -04002543 int swizzle[4] = {0, 0, 0, 0};
2544 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002545
2546 storeValue(replicate);
2547 }
2548
John Bauman19bac1e2014-05-06 15:23:49 -04002549 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002550 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002551 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002552#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002553 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002554#else
2555 return As<Int4>(V(lowerVectorShl(V(lhs.value), rhs)));
2556#endif
John Bauman89401822014-05-06 15:04:28 -04002557 }
2558
John Bauman19bac1e2014-05-06 15:23:49 -04002559 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002560 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002561 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002562#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002563 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002564#else
2565 return As<Int4>(V(lowerVectorAShr(V(lhs.value), rhs)));
2566#endif
John Bauman89401822014-05-06 15:04:28 -04002567 }
2568
John Bauman19bac1e2014-05-06 15:23:49 -04002569 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
2570 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002571 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002572 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
Alexis Hetufb603992016-04-26 11:50:40 -04002573 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2574 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
2575 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002576 }
2577
2578 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
2579 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002580 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002581 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2582 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2583 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
2584 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002585 }
2586
2587 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
2588 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002589 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002590 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2591 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2592 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
2593 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002594 }
2595
2596 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
2597 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002598 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002599 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2600 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2601 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
2602 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002603 }
2604
2605 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
2606 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002607 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002608 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2609 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2610 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
2611 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002612 }
2613
2614 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
2615 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002616 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002617 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2618 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2619 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
2620 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002621 }
2622
2623 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
2624 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002625 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002626#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002627 if(CPUID::supportsSSE4_1())
2628 {
2629 return x86::pmaxsd(x, y);
2630 }
2631 else
Logan Chiene3191012018-08-24 22:01:50 +08002632#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002633 {
2634 RValue<Int4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002635 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04002636 }
2637 }
2638
2639 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
2640 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002641 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002642#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002643 if(CPUID::supportsSSE4_1())
2644 {
2645 return x86::pminsd(x, y);
2646 }
2647 else
Logan Chiene3191012018-08-24 22:01:50 +08002648#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002649 {
2650 RValue<Int4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002651 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04002652 }
2653 }
2654
2655 RValue<Int4> RoundInt(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04002656 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002657 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002658#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002659 return x86::cvtps2dq(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002660#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002661 return As<Int4>(V(lowerRoundInt(V(cast.value), T(Int4::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002662#endif
John Bauman89401822014-05-06 15:04:28 -04002663 }
2664
Chris Forbese86b6dc2019-03-01 09:08:47 -08002665 RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2666 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002667 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbese86b6dc2019-03-01 09:08:47 -08002668 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2669 return As<Int4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2670 }
2671
2672 RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2673 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002674 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbese86b6dc2019-03-01 09:08:47 -08002675 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2676 return As<UInt4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2677 }
2678
Nicolas Capens33438a62017-09-27 11:47:35 -04002679 RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04002680 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002681 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002682#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002683 return x86::packssdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002684#else
2685 return As<Short8>(V(lowerPack(V(x.value), V(y.value), true)));
2686#endif
John Bauman89401822014-05-06 15:04:28 -04002687 }
2688
Nicolas Capens33438a62017-09-27 11:47:35 -04002689 RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
2690 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002691 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002692#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04002693 return x86::packusdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002694#else
2695 return As<UShort8>(V(lowerPack(V(x.value), V(y.value), false)));
2696#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04002697 }
2698
John Bauman19bac1e2014-05-06 15:23:49 -04002699 RValue<Int> SignMask(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04002700 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002701 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002702#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002703 return x86::movmskps(As<Float4>(x));
Logan Chiene3191012018-08-24 22:01:50 +08002704#else
2705 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2706#endif
John Bauman89401822014-05-06 15:04:28 -04002707 }
2708
John Bauman19bac1e2014-05-06 15:23:49 -04002709 Type *Int4::getType()
John Bauman89401822014-05-06 15:04:28 -04002710 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002711 return T(llvm::VectorType::get(T(Int::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04002712 }
2713
Nicolas Capenscb986762017-01-20 11:34:37 -05002714 UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04002715 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002716 RR_DEBUG_INFO_UPDATE_LOC();
Alexis Hetu764d1422016-09-28 08:44:22 -04002717 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2718 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
John Bauman89401822014-05-06 15:04:28 -04002719
Alexis Hetu764d1422016-09-28 08:44:22 -04002720 // Smallest positive value representable in UInt, but not in Int
2721 const unsigned int ustart = 0x80000000u;
2722 const float ustartf = float(ustart);
2723
2724 // Check if the value can be represented as an Int
2725 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
2726 // If the value is too large, subtract ustart and re-add it after conversion.
2727 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
2728 // Otherwise, just convert normally
2729 (~uiValue & Int4(cast));
2730 // If the value is negative, store 0, otherwise store the result of the conversion
2731 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
John Bauman89401822014-05-06 15:04:28 -04002732 }
2733
John Bauman19bac1e2014-05-06 15:23:49 -04002734 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002735 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002736 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002737#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002738 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002739#else
2740 return As<UInt4>(V(lowerVectorShl(V(lhs.value), rhs)));
2741#endif
John Bauman89401822014-05-06 15:04:28 -04002742 }
2743
John Bauman19bac1e2014-05-06 15:23:49 -04002744 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002745 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002746 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002747#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002748 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002749#else
2750 return As<UInt4>(V(lowerVectorLShr(V(lhs.value), rhs)));
2751#endif
John Bauman89401822014-05-06 15:04:28 -04002752 }
2753
John Bauman19bac1e2014-05-06 15:23:49 -04002754 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
2755 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002756 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002757 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
Alexis Hetufb603992016-04-26 11:50:40 -04002758 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2759 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
2760 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002761 }
2762
2763 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
2764 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002765 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04002766 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
2767 }
2768
2769 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
2770 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002771 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002772 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2773 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2774 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
2775 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002776 }
2777
2778 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
2779 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002780 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04002781 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
2782 }
2783
2784 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
2785 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002786 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002787 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2788 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2789 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
2790 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002791 }
2792
2793 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
2794 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002795 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04002796 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
2797 }
2798
2799 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
2800 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002801 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002802#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002803 if(CPUID::supportsSSE4_1())
2804 {
2805 return x86::pmaxud(x, y);
2806 }
2807 else
Logan Chiene3191012018-08-24 22:01:50 +08002808#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002809 {
2810 RValue<UInt4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002811 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04002812 }
2813 }
2814
2815 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
2816 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002817 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002818#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002819 if(CPUID::supportsSSE4_1())
2820 {
2821 return x86::pminud(x, y);
2822 }
2823 else
Logan Chiene3191012018-08-24 22:01:50 +08002824#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002825 {
2826 RValue<UInt4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002827 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04002828 }
2829 }
2830
John Bauman19bac1e2014-05-06 15:23:49 -04002831 Type *UInt4::getType()
John Bauman89401822014-05-06 15:04:28 -04002832 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002833 return T(llvm::VectorType::get(T(UInt::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04002834 }
2835
Alexis Hetu734e2572018-12-20 14:00:49 -05002836 Type *Half::getType()
2837 {
2838 return T(llvm::Type::getInt16Ty(*::context));
2839 }
2840
Nicolas Capens05b3d662016-02-25 23:58:33 -05002841 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04002842 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002843 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002844#if defined(__i386__) || defined(__x86_64__)
2845 if(exactAtPow2)
2846 {
2847 // rcpss uses a piecewise-linear approximation which minimizes the relative error
2848 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
2849 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
2850 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04002851 return x86::rcpss(x);
Logan Chiene3191012018-08-24 22:01:50 +08002852#else
2853 return As<Float>(V(lowerRCP(V(x.value))));
2854#endif
John Bauman89401822014-05-06 15:04:28 -04002855 }
John Bauman66b8ab22014-05-06 15:57:45 -04002856
John Bauman19bac1e2014-05-06 15:23:49 -04002857 RValue<Float> RcpSqrt_pp(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04002858 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002859 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002860#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002861 return x86::rsqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08002862#else
2863 return As<Float>(V(lowerRSQRT(V(x.value))));
2864#endif
John Bauman89401822014-05-06 15:04:28 -04002865 }
2866
John Bauman19bac1e2014-05-06 15:23:49 -04002867 RValue<Float> Sqrt(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04002868 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002869 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002870#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002871 return x86::sqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08002872#else
2873 return As<Float>(V(lowerSQRT(V(x.value))));
2874#endif
John Bauman89401822014-05-06 15:04:28 -04002875 }
2876
John Bauman19bac1e2014-05-06 15:23:49 -04002877 RValue<Float> Round(RValue<Float> x)
2878 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002879 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002880#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002881 if(CPUID::supportsSSE4_1())
2882 {
2883 return x86::roundss(x, 0);
2884 }
2885 else
2886 {
2887 return Float4(Round(Float4(x))).x;
2888 }
Logan Chien83fc07a2018-09-26 22:14:00 +08002889#else
2890 return RValue<Float>(V(lowerRound(V(x.value))));
2891#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002892 }
2893
2894 RValue<Float> Trunc(RValue<Float> x)
2895 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002896 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002897#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002898 if(CPUID::supportsSSE4_1())
2899 {
2900 return x86::roundss(x, 3);
2901 }
2902 else
2903 {
2904 return Float(Int(x)); // Rounded toward zero
2905 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08002906#else
2907 return RValue<Float>(V(lowerTrunc(V(x.value))));
2908#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002909 }
2910
2911 RValue<Float> Frac(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04002912 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002913 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002914#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002915 if(CPUID::supportsSSE4_1())
2916 {
2917 return x - x86::floorss(x);
2918 }
2919 else
2920 {
John Bauman19bac1e2014-05-06 15:23:49 -04002921 return Float4(Frac(Float4(x))).x;
John Bauman89401822014-05-06 15:04:28 -04002922 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08002923#else
2924 // x - floor(x) can be 1.0 for very small negative x.
2925 // Clamp against the value just below 1.0.
2926 return Min(x - Floor(x), As<Float>(Int(0x3F7FFFFF)));
2927#endif
John Bauman89401822014-05-06 15:04:28 -04002928 }
2929
John Bauman19bac1e2014-05-06 15:23:49 -04002930 RValue<Float> Floor(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04002931 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002932 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002933#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002934 if(CPUID::supportsSSE4_1())
2935 {
2936 return x86::floorss(x);
2937 }
2938 else
2939 {
2940 return Float4(Floor(Float4(x))).x;
2941 }
Logan Chien40a60052018-09-26 19:03:53 +08002942#else
2943 return RValue<Float>(V(lowerFloor(V(x.value))));
2944#endif
John Bauman89401822014-05-06 15:04:28 -04002945 }
2946
John Bauman19bac1e2014-05-06 15:23:49 -04002947 RValue<Float> Ceil(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04002948 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002949 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002950#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002951 if(CPUID::supportsSSE4_1())
2952 {
2953 return x86::ceilss(x);
2954 }
2955 else
Logan Chiene3191012018-08-24 22:01:50 +08002956#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002957 {
2958 return Float4(Ceil(Float4(x))).x;
2959 }
John Bauman89401822014-05-06 15:04:28 -04002960 }
2961
John Bauman19bac1e2014-05-06 15:23:49 -04002962 Type *Float::getType()
John Bauman89401822014-05-06 15:04:28 -04002963 {
Nicolas Capensac230122016-09-20 14:30:06 -04002964 return T(llvm::Type::getFloatTy(*::context));
John Bauman89401822014-05-06 15:04:28 -04002965 }
2966
John Bauman19bac1e2014-05-06 15:23:49 -04002967 Type *Float2::getType()
John Bauman89401822014-05-06 15:04:28 -04002968 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002969 return T(Type_v2f32);
John Bauman89401822014-05-06 15:04:28 -04002970 }
2971
Nicolas Capenscb986762017-01-20 11:34:37 -05002972 Float4::Float4(RValue<Float> rhs) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04002973 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002974 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04002975 Value *vector = loadValue();
John Bauman89401822014-05-06 15:04:28 -04002976 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
2977
Nicolas Capense89cd582016-09-30 14:23:47 -04002978 int swizzle[4] = {0, 0, 0, 0};
2979 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
John Bauman89401822014-05-06 15:04:28 -04002980
John Bauman66b8ab22014-05-06 15:57:45 -04002981 storeValue(replicate);
John Bauman89401822014-05-06 15:04:28 -04002982 }
2983
John Bauman19bac1e2014-05-06 15:23:49 -04002984 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002985 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002986 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002987#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002988 return x86::maxps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002989#else
2990 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OGT)));
2991#endif
John Bauman89401822014-05-06 15:04:28 -04002992 }
2993
John Bauman19bac1e2014-05-06 15:23:49 -04002994 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002995 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002996 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002997#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002998 return x86::minps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002999#else
3000 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OLT)));
3001#endif
John Bauman89401822014-05-06 15:04:28 -04003002 }
3003
Nicolas Capens05b3d662016-02-25 23:58:33 -05003004 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04003005 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003006 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003007#if defined(__i386__) || defined(__x86_64__)
3008 if(exactAtPow2)
3009 {
3010 // rcpps uses a piecewise-linear approximation which minimizes the relative error
3011 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
3012 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
3013 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003014 return x86::rcpps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003015#else
3016 return As<Float4>(V(lowerRCP(V(x.value))));
3017#endif
John Bauman89401822014-05-06 15:04:28 -04003018 }
John Bauman66b8ab22014-05-06 15:57:45 -04003019
John Bauman19bac1e2014-05-06 15:23:49 -04003020 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003021 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003022 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003023#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003024 return x86::rsqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003025#else
3026 return As<Float4>(V(lowerRSQRT(V(x.value))));
3027#endif
John Bauman89401822014-05-06 15:04:28 -04003028 }
3029
John Bauman19bac1e2014-05-06 15:23:49 -04003030 RValue<Float4> Sqrt(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003031 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003032 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003033#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003034 return x86::sqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003035#else
3036 return As<Float4>(V(lowerSQRT(V(x.value))));
3037#endif
John Bauman89401822014-05-06 15:04:28 -04003038 }
3039
John Bauman19bac1e2014-05-06 15:23:49 -04003040 RValue<Int> SignMask(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003041 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003042 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003043#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003044 return x86::movmskps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003045#else
3046 return As<Int>(V(lowerFPSignMask(V(x.value), T(Int::getType()))));
3047#endif
John Bauman89401822014-05-06 15:04:28 -04003048 }
3049
John Bauman19bac1e2014-05-06 15:23:49 -04003050 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003051 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003052 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003053 // return As<Int4>(x86::cmpeqps(x, y));
3054 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
3055 }
3056
John Bauman19bac1e2014-05-06 15:23:49 -04003057 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003058 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003059 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003060 // return As<Int4>(x86::cmpltps(x, y));
3061 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
3062 }
3063
John Bauman19bac1e2014-05-06 15:23:49 -04003064 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003065 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003066 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003067 // return As<Int4>(x86::cmpleps(x, y));
3068 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
3069 }
3070
John Bauman19bac1e2014-05-06 15:23:49 -04003071 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003072 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003073 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003074 // return As<Int4>(x86::cmpneqps(x, y));
3075 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
3076 }
3077
John Bauman19bac1e2014-05-06 15:23:49 -04003078 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003079 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003080 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003081 // return As<Int4>(x86::cmpnltps(x, y));
3082 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
3083 }
3084
John Bauman19bac1e2014-05-06 15:23:49 -04003085 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003086 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003087 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003088 // return As<Int4>(x86::cmpnleps(x, y));
3089 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
3090 }
3091
Ben Claytonec1aeb82019-03-04 19:33:27 +00003092 RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3093 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003094 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003095 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUEQ(x.value, y.value), Int4::getType()));
3096 }
3097
3098 RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3099 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003100 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003101 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULT(x.value, y.value), Int4::getType()));
3102 }
3103
3104 RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3105 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003106 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003107 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULE(x.value, y.value), Int4::getType()));
3108 }
3109
3110 RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3111 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003112 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003113 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUNE(x.value, y.value), Int4::getType()));
3114 }
3115
3116 RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3117 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003118 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003119 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGE(x.value, y.value), Int4::getType()));
3120 }
3121
3122 RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3123 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003124 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003125 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGT(x.value, y.value), Int4::getType()));
3126 }
3127
John Bauman19bac1e2014-05-06 15:23:49 -04003128 RValue<Float4> Round(RValue<Float4> x)
3129 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003130 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003131#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003132 if(CPUID::supportsSSE4_1())
3133 {
3134 return x86::roundps(x, 0);
3135 }
3136 else
3137 {
3138 return Float4(RoundInt(x));
3139 }
Logan Chien83fc07a2018-09-26 22:14:00 +08003140#else
3141 return RValue<Float4>(V(lowerRound(V(x.value))));
3142#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003143 }
3144
3145 RValue<Float4> Trunc(RValue<Float4> x)
3146 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003147 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003148#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003149 if(CPUID::supportsSSE4_1())
3150 {
3151 return x86::roundps(x, 3);
3152 }
3153 else
3154 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003155 return Float4(Int4(x));
John Bauman19bac1e2014-05-06 15:23:49 -04003156 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08003157#else
3158 return RValue<Float4>(V(lowerTrunc(V(x.value))));
3159#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003160 }
3161
3162 RValue<Float4> Frac(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003163 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003164 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb9230422017-07-17 10:27:33 -04003165 Float4 frc;
3166
Logan Chien40a60052018-09-26 19:03:53 +08003167#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003168 if(CPUID::supportsSSE4_1())
3169 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003170 frc = x - Floor(x);
John Bauman89401822014-05-06 15:04:28 -04003171 }
3172 else
3173 {
Nicolas Capensb9230422017-07-17 10:27:33 -04003174 frc = x - Float4(Int4(x)); // Signed fractional part.
John Bauman89401822014-05-06 15:04:28 -04003175
Nicolas Capensb9230422017-07-17 10:27:33 -04003176 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f))); // Add 1.0 if negative.
John Bauman89401822014-05-06 15:04:28 -04003177 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08003178#else
3179 frc = x - Floor(x);
3180#endif
Nicolas Capensb9230422017-07-17 10:27:33 -04003181
3182 // x - floor(x) can be 1.0 for very small negative x.
3183 // Clamp against the value just below 1.0.
3184 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
John Bauman89401822014-05-06 15:04:28 -04003185 }
3186
John Bauman19bac1e2014-05-06 15:23:49 -04003187 RValue<Float4> Floor(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003188 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003189 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003190#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003191 if(CPUID::supportsSSE4_1())
3192 {
3193 return x86::floorps(x);
3194 }
3195 else
3196 {
John Bauman19bac1e2014-05-06 15:23:49 -04003197 return x - Frac(x);
John Bauman89401822014-05-06 15:04:28 -04003198 }
Logan Chien40a60052018-09-26 19:03:53 +08003199#else
3200 return RValue<Float4>(V(lowerFloor(V(x.value))));
3201#endif
John Bauman89401822014-05-06 15:04:28 -04003202 }
3203
John Bauman19bac1e2014-05-06 15:23:49 -04003204 RValue<Float4> Ceil(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003205 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003206 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003207#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003208 if(CPUID::supportsSSE4_1())
3209 {
3210 return x86::ceilps(x);
3211 }
3212 else
Logan Chiene3191012018-08-24 22:01:50 +08003213#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003214 {
3215 return -Floor(-x);
3216 }
John Bauman89401822014-05-06 15:04:28 -04003217 }
3218
Ben Claytona2c8b772019-04-09 13:42:36 -04003219 RValue<Float4> Sin(RValue<Float4> v)
3220 {
3221 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sin, { V(v.value)->getType() } );
3222 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
3223 }
3224
Ben Clayton1b6f8c72019-04-09 13:47:43 -04003225 RValue<Float4> Cos(RValue<Float4> v)
3226 {
3227 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::cos, { V(v.value)->getType() } );
3228 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
3229 }
3230
Ben Clayton14740062019-04-09 13:48:41 -04003231 RValue<Float4> Tan(RValue<Float4> v)
3232 {
3233 return Sin(v) / Cos(v);
3234 }
3235
Ben Claytoneafae472019-04-09 14:22:38 -04003236 static RValue<Float4> TransformFloat4PerElement(RValue<Float4> v, const char* name)
Ben Claytonf9350d72019-04-09 14:19:02 -04003237 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003238 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), ::llvm::ArrayRef<llvm::Type*>(T(Float::getType())), false);
Ben Claytoneafae472019-04-09 14:22:38 -04003239 auto func = ::module->getOrInsertFunction(name, funcTy);
Ben Claytonf9350d72019-04-09 14:19:02 -04003240 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3241 for (uint64_t i = 0; i < 4; i++)
3242 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003243 auto el = ::builder->CreateCall(func, V(Nucleus::createExtractElement(v.value, Float::getType(), i)));
3244 out = V(Nucleus::createInsertElement(V(out), V(el), i));
Ben Claytonf9350d72019-04-09 14:19:02 -04003245 }
3246 return RValue<Float4>(V(out));
3247 }
3248
Ben Claytoneafae472019-04-09 14:22:38 -04003249 RValue<Float4> Asin(RValue<Float4> v)
3250 {
3251 return TransformFloat4PerElement(v, "asinf");
3252 }
3253
3254 RValue<Float4> Acos(RValue<Float4> v)
3255 {
3256 return TransformFloat4PerElement(v, "acosf");
3257 }
3258
Ben Clayton749b4e02019-04-09 14:27:43 -04003259 RValue<Float4> Atan(RValue<Float4> v)
3260 {
3261 return TransformFloat4PerElement(v, "atanf");
3262 }
3263
Ben Claytond9636972019-04-09 15:09:54 -04003264 RValue<Float4> Sinh(RValue<Float4> v)
3265 {
3266 return TransformFloat4PerElement(v, "sinhf");
3267 }
3268
Ben Clayton900ea2c2019-04-09 15:25:36 -04003269 RValue<Float4> Cosh(RValue<Float4> v)
3270 {
3271 return TransformFloat4PerElement(v, "coshf");
3272 }
3273
Ben Clayton3928bd92019-04-09 15:27:41 -04003274 RValue<Float4> Tanh(RValue<Float4> v)
3275 {
3276 return TransformFloat4PerElement(v, "tanhf");
3277 }
3278
Ben Claytonf6d77ab2019-04-09 15:30:04 -04003279 RValue<Float4> Asinh(RValue<Float4> v)
3280 {
3281 return TransformFloat4PerElement(v, "asinhf");
3282 }
3283
Ben Clayton28ebcb02019-04-09 15:33:38 -04003284 RValue<Float4> Acosh(RValue<Float4> v)
3285 {
3286 return TransformFloat4PerElement(v, "acoshf");
3287 }
3288
Ben Claytonfa6a5392019-04-09 15:35:24 -04003289 RValue<Float4> Atanh(RValue<Float4> v)
3290 {
3291 return TransformFloat4PerElement(v, "atanhf");
3292 }
3293
Ben Claytona520c3e2019-04-09 15:43:45 -04003294 RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
3295 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003296 ::llvm::SmallVector<::llvm::Type*, 2> paramTys;
3297 paramTys.push_back(T(Float::getType()));
3298 paramTys.push_back(T(Float::getType()));
3299 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), paramTys, false);
Ben Claytona520c3e2019-04-09 15:43:45 -04003300 auto func = ::module->getOrInsertFunction("atan2f", funcTy);
3301 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3302 for (uint64_t i = 0; i < 4; i++)
3303 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003304 auto el = ::builder->CreateCall2(func, ARGS(
3305 V(Nucleus::createExtractElement(x.value, Float::getType(), i)),
3306 V(Nucleus::createExtractElement(y.value, Float::getType(), i))
3307 ));
3308 out = V(Nucleus::createInsertElement(V(out), V(el), i));
Ben Claytona520c3e2019-04-09 15:43:45 -04003309 }
3310 return RValue<Float4>(V(out));
3311 }
3312
Ben Claytonbfe94f02019-04-09 15:52:12 -04003313 RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
3314 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003315 ::llvm::SmallVector<::llvm::Type*, 2> paramTys;
3316 paramTys.push_back(T(Float4::getType()));
3317 paramTys.push_back(T(Float4::getType()));
3318 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::pow, paramTys);
3319 return RValue<Float4>(V(::builder->CreateCall2(func, ARGS(V(x.value), V(y.value)))));
Ben Claytonbfe94f02019-04-09 15:52:12 -04003320 }
3321
Ben Clayton242f0022019-04-09 16:00:53 -04003322 RValue<Float4> Exp(RValue<Float4> v)
3323 {
3324 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::exp, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003325 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Clayton242f0022019-04-09 16:00:53 -04003326 }
3327
Ben Clayton2c1da722019-04-09 16:03:03 -04003328 RValue<Float4> Log(RValue<Float4> v)
3329 {
3330 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::log, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003331 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Clayton2c1da722019-04-09 16:03:03 -04003332 }
3333
Ben Claytonf40b56c2019-04-09 16:06:55 -04003334 RValue<Float4> Exp2(RValue<Float4> v)
3335 {
3336 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::exp2, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003337 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Claytonf40b56c2019-04-09 16:06:55 -04003338 }
3339
Ben Claytone17acfe2019-04-09 16:09:13 -04003340 RValue<Float4> Log2(RValue<Float4> v)
3341 {
3342 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::log2, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003343 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Claytone17acfe2019-04-09 16:09:13 -04003344 }
3345
Ben Clayton60958262019-04-10 14:53:30 -04003346 RValue<UInt4> Ctlz(RValue<UInt4> v, bool isZeroUndef)
3347 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003348 ::llvm::SmallVector<::llvm::Type*, 2> paramTys;
3349 paramTys.push_back(T(UInt4::getType()));
3350 paramTys.push_back(T(Bool::getType()));
3351 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::ctlz, paramTys);
3352 return RValue<UInt4>(V(::builder->CreateCall2(func, ARGS(
Ben Clayton60958262019-04-10 14:53:30 -04003353 V(v.value),
3354 isZeroUndef ? ::llvm::ConstantInt::getTrue(*::context) : ::llvm::ConstantInt::getFalse(*::context)
Ben Claytonc38fc122019-04-11 08:58:49 -04003355 ))));
Ben Clayton60958262019-04-10 14:53:30 -04003356 }
3357
Ben Clayton3f007c42019-04-10 14:54:23 -04003358 RValue<UInt4> Cttz(RValue<UInt4> v, bool isZeroUndef)
3359 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003360 ::llvm::SmallVector<::llvm::Type*, 2> paramTys;
3361 paramTys.push_back(T(UInt4::getType()));
3362 paramTys.push_back(T(Bool::getType()));
3363 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::cttz, paramTys);
3364 return RValue<UInt4>(V(::builder->CreateCall2(func, ARGS(
Ben Clayton3f007c42019-04-10 14:54:23 -04003365 V(v.value),
3366 isZeroUndef ? ::llvm::ConstantInt::getTrue(*::context) : ::llvm::ConstantInt::getFalse(*::context)
Ben Claytonc38fc122019-04-11 08:58:49 -04003367 ))));
Ben Clayton3f007c42019-04-10 14:54:23 -04003368 }
3369
John Bauman19bac1e2014-05-06 15:23:49 -04003370 Type *Float4::getType()
John Bauman89401822014-05-06 15:04:28 -04003371 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003372 return T(llvm::VectorType::get(T(Float::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003373 }
3374
John Bauman89401822014-05-06 15:04:28 -04003375 RValue<Long> Ticks()
3376 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003377 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003378 llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter);
John Bauman89401822014-05-06 15:04:28 -04003379
Nicolas Capens2ab69ee2016-09-26 11:45:17 -04003380 return RValue<Long>(V(::builder->CreateCall(rdtsc)));
John Bauman89401822014-05-06 15:04:28 -04003381 }
Ben Claytond853c122019-04-16 17:51:49 -04003382
3383 RValue<Pointer<Byte>> ConstantPointer(void const * ptr)
3384 {
3385 // Note: this should work for 32-bit pointers as well because 'inttoptr'
3386 // is defined to truncate (and zero extend) if necessary.
3387 auto ptrAsInt = ::llvm::ConstantInt::get(::llvm::Type::getInt64Ty(*::context), reinterpret_cast<uintptr_t>(ptr));
3388 return RValue<Pointer<Byte>>(V(::builder->CreateIntToPtr(ptrAsInt, T(Pointer<Byte>::getType()))));
3389 }
3390
3391 Value* Call(RValue<Pointer<Byte>> fptr, Type* retTy, std::initializer_list<Value*> args, std::initializer_list<Type*> argTys)
3392 {
3393 ::llvm::SmallVector<::llvm::Type*, 8> paramTys;
3394 for (auto ty : argTys) { paramTys.push_back(T(ty)); }
3395 auto funcTy = ::llvm::FunctionType::get(T(retTy), paramTys, false);
3396
3397 auto funcPtrTy = funcTy->getPointerTo();
3398 auto funcPtr = ::builder->CreatePointerCast(V(fptr.value), funcPtrTy);
3399
3400 ::llvm::SmallVector<::llvm::Value*, 8> arguments;
3401 for (auto arg : args) { arguments.push_back(V(arg)); }
3402 return V(::builder->CreateCall(funcPtr, arguments));
3403 }
John Bauman89401822014-05-06 15:04:28 -04003404}
3405
Nicolas Capens48461502018-08-06 14:20:45 -04003406namespace rr
John Bauman89401822014-05-06 15:04:28 -04003407{
Logan Chiene3191012018-08-24 22:01:50 +08003408#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003409 namespace x86
3410 {
John Bauman19bac1e2014-05-06 15:23:49 -04003411 RValue<Int> cvtss2si(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003412 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003413 llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si);
John Bauman66b8ab22014-05-06 15:57:45 -04003414
John Bauman89401822014-05-06 15:04:28 -04003415 Float4 vector;
3416 vector.x = val;
3417
Logan Chien813d5032018-08-31 17:19:45 +08003418 return RValue<Int>(V(::builder->CreateCall(cvtss2si, ARGS(V(RValue<Float4>(vector).value)))));
John Bauman89401822014-05-06 15:04:28 -04003419 }
3420
John Bauman19bac1e2014-05-06 15:23:49 -04003421 RValue<Int4> cvtps2dq(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003422 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003423 llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
John Bauman89401822014-05-06 15:04:28 -04003424
Logan Chien813d5032018-08-31 17:19:45 +08003425 return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003426 }
3427
John Bauman19bac1e2014-05-06 15:23:49 -04003428 RValue<Float> rcpss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003429 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003430 llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss);
John Bauman89401822014-05-06 15:04:28 -04003431
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003432 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman66b8ab22014-05-06 15:57:45 -04003433
Logan Chien813d5032018-08-31 17:19:45 +08003434 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003435 }
3436
John Bauman19bac1e2014-05-06 15:23:49 -04003437 RValue<Float> sqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003438 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003439 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
3440 return RValue<Float>(V(::builder->CreateCall(sqrt, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003441 }
3442
John Bauman19bac1e2014-05-06 15:23:49 -04003443 RValue<Float> rsqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003444 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003445 llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss);
John Bauman66b8ab22014-05-06 15:57:45 -04003446
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003447 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman89401822014-05-06 15:04:28 -04003448
Logan Chien813d5032018-08-31 17:19:45 +08003449 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003450 }
3451
John Bauman19bac1e2014-05-06 15:23:49 -04003452 RValue<Float4> rcpps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003453 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003454 llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003455
Logan Chien813d5032018-08-31 17:19:45 +08003456 return RValue<Float4>(V(::builder->CreateCall(rcpps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003457 }
3458
John Bauman19bac1e2014-05-06 15:23:49 -04003459 RValue<Float4> sqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003460 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003461 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
John Bauman66b8ab22014-05-06 15:57:45 -04003462
Logan Chien813d5032018-08-31 17:19:45 +08003463 return RValue<Float4>(V(::builder->CreateCall(sqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003464 }
3465
John Bauman19bac1e2014-05-06 15:23:49 -04003466 RValue<Float4> rsqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003467 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003468 llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003469
Logan Chien813d5032018-08-31 17:19:45 +08003470 return RValue<Float4>(V(::builder->CreateCall(rsqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003471 }
3472
John Bauman19bac1e2014-05-06 15:23:49 -04003473 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003474 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003475 llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps);
John Bauman89401822014-05-06 15:04:28 -04003476
Logan Chien813d5032018-08-31 17:19:45 +08003477 return RValue<Float4>(V(::builder->CreateCall2(maxps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003478 }
3479
John Bauman19bac1e2014-05-06 15:23:49 -04003480 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003481 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003482 llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps);
John Bauman89401822014-05-06 15:04:28 -04003483
Logan Chien813d5032018-08-31 17:19:45 +08003484 return RValue<Float4>(V(::builder->CreateCall2(minps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003485 }
3486
John Bauman19bac1e2014-05-06 15:23:49 -04003487 RValue<Float> roundss(RValue<Float> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04003488 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003489 llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss);
John Bauman89401822014-05-06 15:04:28 -04003490
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003491 Value *undef = V(llvm::UndefValue::get(T(Float4::getType())));
John Bauman89401822014-05-06 15:04:28 -04003492 Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
3493
Logan Chien813d5032018-08-31 17:19:45 +08003494 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, ARGS(V(undef), V(vector), V(Nucleus::createConstantInt(imm))))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003495 }
3496
John Bauman19bac1e2014-05-06 15:23:49 -04003497 RValue<Float> floorss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003498 {
3499 return roundss(val, 1);
3500 }
3501
John Bauman19bac1e2014-05-06 15:23:49 -04003502 RValue<Float> ceilss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003503 {
3504 return roundss(val, 2);
3505 }
3506
John Bauman19bac1e2014-05-06 15:23:49 -04003507 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04003508 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003509 llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps);
John Bauman89401822014-05-06 15:04:28 -04003510
Logan Chien813d5032018-08-31 17:19:45 +08003511 return RValue<Float4>(V(::builder->CreateCall2(roundps, ARGS(V(val.value), V(Nucleus::createConstantInt(imm))))));
John Bauman89401822014-05-06 15:04:28 -04003512 }
3513
John Bauman19bac1e2014-05-06 15:23:49 -04003514 RValue<Float4> floorps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003515 {
3516 return roundps(val, 1);
3517 }
3518
John Bauman19bac1e2014-05-06 15:23:49 -04003519 RValue<Float4> ceilps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003520 {
3521 return roundps(val, 2);
3522 }
3523
Alexis Hetu0f448072016-03-18 10:56:08 -04003524 RValue<Int4> pabsd(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04003525 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003526 return RValue<Int4>(V(lowerPABS(V(x.value))));
John Bauman89401822014-05-06 15:04:28 -04003527 }
3528
John Bauman19bac1e2014-05-06 15:23:49 -04003529 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003530 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003531 llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w);
John Bauman89401822014-05-06 15:04:28 -04003532
Logan Chien813d5032018-08-31 17:19:45 +08003533 return As<Short4>(V(::builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003534 }
John Bauman66b8ab22014-05-06 15:57:45 -04003535
John Bauman19bac1e2014-05-06 15:23:49 -04003536 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003537 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003538 llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w);
John Bauman89401822014-05-06 15:04:28 -04003539
Logan Chien813d5032018-08-31 17:19:45 +08003540 return As<Short4>(V(::builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003541 }
3542
John Bauman19bac1e2014-05-06 15:23:49 -04003543 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003544 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003545 llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w);
John Bauman89401822014-05-06 15:04:28 -04003546
Logan Chien813d5032018-08-31 17:19:45 +08003547 return As<UShort4>(V(::builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003548 }
John Bauman66b8ab22014-05-06 15:57:45 -04003549
John Bauman19bac1e2014-05-06 15:23:49 -04003550 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003551 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003552 llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w);
John Bauman89401822014-05-06 15:04:28 -04003553
Logan Chien813d5032018-08-31 17:19:45 +08003554 return As<UShort4>(V(::builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003555 }
3556
John Bauman19bac1e2014-05-06 15:23:49 -04003557 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003558 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003559 llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b);
John Bauman89401822014-05-06 15:04:28 -04003560
Logan Chien813d5032018-08-31 17:19:45 +08003561 return As<SByte8>(V(::builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003562 }
John Bauman66b8ab22014-05-06 15:57:45 -04003563
John Bauman19bac1e2014-05-06 15:23:49 -04003564 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003565 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003566 llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b);
John Bauman89401822014-05-06 15:04:28 -04003567
Logan Chien813d5032018-08-31 17:19:45 +08003568 return As<SByte8>(V(::builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003569 }
John Bauman66b8ab22014-05-06 15:57:45 -04003570
John Bauman19bac1e2014-05-06 15:23:49 -04003571 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003572 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003573 llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b);
John Bauman89401822014-05-06 15:04:28 -04003574
Logan Chien813d5032018-08-31 17:19:45 +08003575 return As<Byte8>(V(::builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003576 }
John Bauman66b8ab22014-05-06 15:57:45 -04003577
John Bauman19bac1e2014-05-06 15:23:49 -04003578 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003579 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003580 llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b);
John Bauman89401822014-05-06 15:04:28 -04003581
Logan Chien813d5032018-08-31 17:19:45 +08003582 return As<Byte8>(V(::builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value)))));
John Bauman19bac1e2014-05-06 15:23:49 -04003583 }
3584
3585 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003586 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003587 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
John Bauman89401822014-05-06 15:04:28 -04003588 }
3589
John Bauman19bac1e2014-05-06 15:23:49 -04003590 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003591 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003592 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
John Bauman89401822014-05-06 15:04:28 -04003593 }
3594
John Bauman19bac1e2014-05-06 15:23:49 -04003595 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003596 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003597 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
John Bauman89401822014-05-06 15:04:28 -04003598 }
3599
John Bauman19bac1e2014-05-06 15:23:49 -04003600 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003601 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003602 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003603 }
3604
John Bauman19bac1e2014-05-06 15:23:49 -04003605 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003606 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003607 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003608 }
3609
John Bauman19bac1e2014-05-06 15:23:49 -04003610 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003611 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003612 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003613 }
3614
John Bauman19bac1e2014-05-06 15:23:49 -04003615 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003616 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003617 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003618 }
3619
John Bauman19bac1e2014-05-06 15:23:49 -04003620 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
John Bauman89401822014-05-06 15:04:28 -04003621 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003622 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04003623
Logan Chien813d5032018-08-31 17:19:45 +08003624 return As<Short4>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003625 }
3626
John Bauman19bac1e2014-05-06 15:23:49 -04003627 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003628 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003629 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04003630
Logan Chien813d5032018-08-31 17:19:45 +08003631 return RValue<Short8>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003632 }
3633
John Bauman19bac1e2014-05-06 15:23:49 -04003634 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003635 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003636 llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128);
John Bauman89401822014-05-06 15:04:28 -04003637
Logan Chien813d5032018-08-31 17:19:45 +08003638 return As<SByte8>(V(::builder->CreateCall2(packsswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003639 }
3640
Nicolas Capens33438a62017-09-27 11:47:35 -04003641 RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003642 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003643 llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
John Bauman89401822014-05-06 15:04:28 -04003644
Logan Chien813d5032018-08-31 17:19:45 +08003645 return As<Byte8>(V(::builder->CreateCall2(packuswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003646 }
3647
Nicolas Capens3e7062b2017-01-17 14:01:33 -05003648 RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003649 {
3650 if(CPUID::supportsSSE4_1())
3651 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003652 llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw);
John Bauman66b8ab22014-05-06 15:57:45 -04003653
Logan Chien813d5032018-08-31 17:19:45 +08003654 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003655 }
3656 else
3657 {
Nicolas Capens3e7062b2017-01-17 14:01:33 -05003658 RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
3659 RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
3660
3661 return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
John Bauman89401822014-05-06 15:04:28 -04003662 }
3663 }
3664
John Bauman19bac1e2014-05-06 15:23:49 -04003665 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003666 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003667 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04003668
Logan Chien813d5032018-08-31 17:19:45 +08003669 return As<UShort4>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003670 }
3671
John Bauman19bac1e2014-05-06 15:23:49 -04003672 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003673 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003674 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04003675
Logan Chien813d5032018-08-31 17:19:45 +08003676 return RValue<UShort8>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003677 }
3678
John Bauman19bac1e2014-05-06 15:23:49 -04003679 RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003680 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003681 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04003682
Logan Chien813d5032018-08-31 17:19:45 +08003683 return As<Short4>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003684 }
3685
John Bauman19bac1e2014-05-06 15:23:49 -04003686 RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003687 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003688 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04003689
Logan Chien813d5032018-08-31 17:19:45 +08003690 return RValue<Short8>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003691 }
3692
John Bauman19bac1e2014-05-06 15:23:49 -04003693 RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003694 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003695 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04003696
Logan Chien813d5032018-08-31 17:19:45 +08003697 return As<Short4>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003698 }
3699
John Bauman19bac1e2014-05-06 15:23:49 -04003700 RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003701 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003702 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04003703
Logan Chien813d5032018-08-31 17:19:45 +08003704 return RValue<Short8>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003705 }
3706
John Bauman19bac1e2014-05-06 15:23:49 -04003707 RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003708 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003709 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04003710
Logan Chien813d5032018-08-31 17:19:45 +08003711 return As<Int2>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003712 }
3713
John Bauman19bac1e2014-05-06 15:23:49 -04003714 RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003715 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003716 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04003717
Logan Chien813d5032018-08-31 17:19:45 +08003718 return RValue<Int4>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003719 }
3720
John Bauman19bac1e2014-05-06 15:23:49 -04003721 RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003722 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003723 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04003724
Logan Chien813d5032018-08-31 17:19:45 +08003725 return As<Int2>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003726 }
3727
John Bauman19bac1e2014-05-06 15:23:49 -04003728 RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003729 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003730 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04003731
Logan Chien813d5032018-08-31 17:19:45 +08003732 return RValue<Int4>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003733 }
3734
John Bauman19bac1e2014-05-06 15:23:49 -04003735 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003736 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003737 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04003738
Logan Chien813d5032018-08-31 17:19:45 +08003739 return As<UInt2>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003740 }
3741
John Bauman19bac1e2014-05-06 15:23:49 -04003742 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003743 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003744 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04003745
Logan Chien813d5032018-08-31 17:19:45 +08003746 return RValue<UInt4>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003747 }
3748
John Bauman19bac1e2014-05-06 15:23:49 -04003749 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
3750 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003751 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
John Bauman19bac1e2014-05-06 15:23:49 -04003752 }
3753
3754 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
3755 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003756 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
John Bauman19bac1e2014-05-06 15:23:49 -04003757 }
3758
3759 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
3760 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003761 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_UGT)));
John Bauman19bac1e2014-05-06 15:23:49 -04003762 }
3763
3764 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
3765 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003766 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_ULT)));
John Bauman19bac1e2014-05-06 15:23:49 -04003767 }
3768
3769 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003770 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003771 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04003772
Logan Chien813d5032018-08-31 17:19:45 +08003773 return As<Short4>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003774 }
3775
John Bauman19bac1e2014-05-06 15:23:49 -04003776 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003777 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003778 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04003779
Logan Chien813d5032018-08-31 17:19:45 +08003780 return As<UShort4>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003781 }
3782
John Bauman19bac1e2014-05-06 15:23:49 -04003783 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003784 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003785 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04003786
Logan Chien813d5032018-08-31 17:19:45 +08003787 return As<Int2>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003788 }
3789
John Bauman19bac1e2014-05-06 15:23:49 -04003790 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04003791 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003792 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04003793
Logan Chien813d5032018-08-31 17:19:45 +08003794 return RValue<Short8>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003795 }
3796
John Bauman19bac1e2014-05-06 15:23:49 -04003797 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04003798 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003799 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04003800
Logan Chien813d5032018-08-31 17:19:45 +08003801 return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003802 }
3803
John Bauman19bac1e2014-05-06 15:23:49 -04003804 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04003805 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003806 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04003807
Logan Chien813d5032018-08-31 17:19:45 +08003808 return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003809 }
3810
John Bauman19bac1e2014-05-06 15:23:49 -04003811 RValue<Int> movmskps(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003812 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003813 llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps);
John Bauman89401822014-05-06 15:04:28 -04003814
Logan Chien813d5032018-08-31 17:19:45 +08003815 return RValue<Int>(V(::builder->CreateCall(movmskps, ARGS(V(x.value)))));
John Bauman89401822014-05-06 15:04:28 -04003816 }
3817
John Bauman19bac1e2014-05-06 15:23:49 -04003818 RValue<Int> pmovmskb(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04003819 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003820 llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128);
John Bauman89401822014-05-06 15:04:28 -04003821
Logan Chien813d5032018-08-31 17:19:45 +08003822 return RValue<Int>(V(::builder->CreateCall(pmovmskb, ARGS(V(x.value))))) & 0xFF;
John Bauman89401822014-05-06 15:04:28 -04003823 }
3824
Nicolas Capens01a97962017-07-28 17:30:51 -04003825 RValue<Int4> pmovzxbd(RValue<Byte16> x)
John Bauman89401822014-05-06 15:04:28 -04003826 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003827 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
John Bauman89401822014-05-06 15:04:28 -04003828 }
3829
Nicolas Capens01a97962017-07-28 17:30:51 -04003830 RValue<Int4> pmovsxbd(RValue<SByte16> x)
John Bauman89401822014-05-06 15:04:28 -04003831 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003832 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
John Bauman89401822014-05-06 15:04:28 -04003833 }
3834
Nicolas Capens01a97962017-07-28 17:30:51 -04003835 RValue<Int4> pmovzxwd(RValue<UShort8> x)
John Bauman89401822014-05-06 15:04:28 -04003836 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003837 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
John Bauman89401822014-05-06 15:04:28 -04003838 }
3839
Nicolas Capens01a97962017-07-28 17:30:51 -04003840 RValue<Int4> pmovsxwd(RValue<Short8> x)
John Bauman89401822014-05-06 15:04:28 -04003841 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003842 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
John Bauman89401822014-05-06 15:04:28 -04003843 }
3844 }
Logan Chiene3191012018-08-24 22:01:50 +08003845#endif // defined(__i386__) || defined(__x86_64__)
Ben Clayton1bc7ee92019-02-14 18:43:22 +00003846
Ben Clayton60a3d6f2019-02-26 17:24:46 +00003847#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +00003848 // extractAll returns a vector containing the extracted n scalar value of
3849 // the vector vec.
3850 static std::vector<Value*> extractAll(Value* vec, int n)
3851 {
3852 std::vector<Value*> elements;
3853 elements.reserve(n);
3854 for (int i = 0; i < n; i++)
3855 {
3856 auto el = V(::builder->CreateExtractElement(V(vec), i));
3857 elements.push_back(el);
3858 }
3859 return elements;
3860 }
3861
3862 // toDouble returns all the float values in vals extended to doubles.
3863 static std::vector<Value*> toDouble(const std::vector<Value*>& vals)
3864 {
3865 auto doubleTy = ::llvm::Type::getDoubleTy(*::context);
3866 std::vector<Value*> elements;
3867 elements.reserve(vals.size());
3868 for (auto v : vals)
3869 {
3870 elements.push_back(V(::builder->CreateFPExt(V(v), doubleTy)));
3871 }
3872 return elements;
3873 }
3874
3875 std::vector<Value*> PrintValue::Ty<Byte4>::val(const RValue<Byte4>& v) { return extractAll(v.value, 4); }
3876 std::vector<Value*> PrintValue::Ty<Int4>::val(const RValue<Int4>& v) { return extractAll(v.value, 4); }
3877 std::vector<Value*> PrintValue::Ty<UInt4>::val(const RValue<UInt4>& v) { return extractAll(v.value, 4); }
3878 std::vector<Value*> PrintValue::Ty<Short4>::val(const RValue<Short4>& v) { return extractAll(v.value, 4); }
3879 std::vector<Value*> PrintValue::Ty<UShort4>::val(const RValue<UShort4>& v) { return extractAll(v.value, 4); }
3880 std::vector<Value*> PrintValue::Ty<Float>::val(const RValue<Float>& v) { return toDouble({v.value}); }
3881 std::vector<Value*> PrintValue::Ty<Float4>::val(const RValue<Float4>& v) { return toDouble(extractAll(v.value, 4)); }
3882
3883 void Printv(const char* function, const char* file, int line, const char* fmt, std::initializer_list<PrintValue> args)
3884 {
3885 // LLVM types used below.
3886 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
3887 auto intTy = ::llvm::Type::getInt64Ty(*::context); // TODO: Natural int width.
3888 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(*::context);
3889 auto funcTy = ::llvm::FunctionType::get(i32Ty, {i8PtrTy}, true);
3890
3891 auto func = ::module->getOrInsertFunction("printf", funcTy);
3892
3893 // Build the printf format message string.
3894 std::string str;
3895 if (file != nullptr) { str += (line > 0) ? "%s:%d " : "%s "; }
3896 if (function != nullptr) { str += "%s "; }
3897 str += fmt;
3898
3899 // Perform subsitution on all '{n}' bracketed indices in the format
3900 // message.
3901 int i = 0;
3902 for (const PrintValue& arg : args)
3903 {
3904 str = replace(str, "{" + std::to_string(i++) + "}", arg.format);
3905 }
3906
3907 ::llvm::SmallVector<::llvm::Value*, 8> vals;
3908
3909 // The format message is always the first argument.
3910 vals.push_back(::builder->CreateGlobalStringPtr(str));
3911
3912 // Add optional file, line and function info if provided.
3913 if (file != nullptr)
3914 {
3915 vals.push_back(::builder->CreateGlobalStringPtr(file));
3916 if (line > 0)
3917 {
3918 vals.push_back(::llvm::ConstantInt::get(intTy, line));
3919 }
3920 }
3921 if (function != nullptr)
3922 {
3923 vals.push_back(::builder->CreateGlobalStringPtr(function));
3924 }
3925
3926 // Add all format arguments.
3927 for (const PrintValue& arg : args)
3928 {
3929 for (auto val : arg.values)
3930 {
3931 vals.push_back(V(val));
3932 }
3933 }
3934
3935 ::builder->CreateCall(func, vals);
3936 }
3937#endif // ENABLE_RR_PRINT
3938
Ben Claytonac07ed82019-03-26 14:17:41 +00003939 void Break()
3940 {
3941 auto trap = ::llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::trap);
3942 builder->CreateCall(trap);
3943 }
3944
3945 void Nop()
3946 {
3947 auto voidTy = ::llvm::Type::getVoidTy(*context);
3948 auto funcTy = ::llvm::FunctionType::get(voidTy, {}, false);
3949 auto func = ::module->getOrInsertFunction("nop", funcTy);
3950 builder->CreateCall(func);
3951 }
3952
3953 void EmitDebugLocation()
3954 {
3955#ifdef ENABLE_RR_DEBUG_INFO
3956 if (debugInfo != nullptr)
3957 {
3958 debugInfo->EmitLocation();
3959 }
3960#endif // ENABLE_RR_DEBUG_INFO
3961 }
3962
3963 void EmitDebugVariable(Value* value)
3964 {
3965#ifdef ENABLE_RR_DEBUG_INFO
3966 if (debugInfo != nullptr)
3967 {
3968 debugInfo->EmitVariable(value);
3969 }
3970#endif // ENABLE_RR_DEBUG_INFO
3971 }
3972
3973 void FlushDebug()
3974 {
3975#ifdef ENABLE_RR_DEBUG_INFO
3976 if (debugInfo != nullptr)
3977 {
3978 debugInfo->Flush();
3979 }
3980#endif // ENABLE_RR_DEBUG_INFO
3981 }
3982
John Bauman89401822014-05-06 15:04:28 -04003983}