blob: c97e9d3f0c672d7451d4161061c8754ff15df7e0 [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
Nicolas Capenscb986762017-01-20 11:34:37 -050015#include "Reactor.hpp"
John Bauman89401822014-05-06 15:04:28 -040016
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040017#include "x86.hpp"
18#include "CPUID.hpp"
19#include "Thread.hpp"
Nicolas Capens1a3ce872018-10-10 10:42:36 -040020#include "ExecutableMemory.hpp"
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040021#include "MutexLock.hpp"
22
23#undef min
24#undef max
25
Nicolas Capensf417d9d2018-10-10 10:49:30 -040026#if REACTOR_LLVM_VERSION < 7
Logan Chien0eedc8c2018-08-21 09:34:28 +080027 #include "llvm/Analysis/LoopPass.h"
28 #include "llvm/Constants.h"
29 #include "llvm/Function.h"
30 #include "llvm/GlobalVariable.h"
31 #include "llvm/Intrinsics.h"
32 #include "llvm/LLVMContext.h"
33 #include "llvm/Module.h"
34 #include "llvm/PassManager.h"
35 #include "llvm/Support/IRBuilder.h"
36 #include "llvm/Support/TargetSelect.h"
37 #include "llvm/Target/TargetData.h"
38 #include "llvm/Target/TargetOptions.h"
39 #include "llvm/Transforms/Scalar.h"
40 #include "../lib/ExecutionEngine/JIT/JIT.h"
John Bauman89401822014-05-06 15:04:28 -040041
Logan Chien0eedc8c2018-08-21 09:34:28 +080042 #include "LLVMRoutine.hpp"
43 #include "LLVMRoutineManager.hpp"
44
45 #define ARGS(...) __VA_ARGS__
46#else
47 #include "llvm/Analysis/LoopPass.h"
48 #include "llvm/ExecutionEngine/ExecutionEngine.h"
49 #include "llvm/ExecutionEngine/JITSymbol.h"
50 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
51 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
52 #include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
53 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
54 #include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
55 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
56 #include "llvm/IR/Constants.h"
57 #include "llvm/IR/DataLayout.h"
58 #include "llvm/IR/Function.h"
59 #include "llvm/IR/GlobalVariable.h"
60 #include "llvm/IR/IRBuilder.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/LLVMContext.h"
63 #include "llvm/IR/LegacyPassManager.h"
Nicolas Capensadfbbcb2018-10-31 14:38:53 -040064 #include "llvm/IR/Mangler.h"
Logan Chien0eedc8c2018-08-21 09:34:28 +080065 #include "llvm/IR/Module.h"
66 #include "llvm/Support/Error.h"
67 #include "llvm/Support/TargetSelect.h"
68 #include "llvm/Target/TargetOptions.h"
69 #include "llvm/Transforms/InstCombine/InstCombine.h"
70 #include "llvm/Transforms/Scalar.h"
71 #include "llvm/Transforms/Scalar/GVN.h"
72
73 #include "LLVMRoutine.hpp"
74
75 #define ARGS(...) {__VA_ARGS__}
76 #define CreateCall2 CreateCall
77 #define CreateCall3 CreateCall
Logan Chien40a60052018-09-26 19:03:53 +080078
79 #include <unordered_map>
Logan Chien0eedc8c2018-08-21 09:34:28 +080080#endif
81
John Bauman89401822014-05-06 15:04:28 -040082#include <fstream>
Ben Clayton1bc7ee92019-02-14 18:43:22 +000083#include <numeric>
84#include <thread>
John Bauman89401822014-05-06 15:04:28 -040085
Nicolas Capens47dc8672017-04-25 12:54:39 -040086#if defined(__i386__) || defined(__x86_64__)
87#include <xmmintrin.h>
88#endif
89
Logan Chien40a60052018-09-26 19:03:53 +080090#include <math.h>
91
Nicolas Capenscb122582014-05-06 23:34:44 -040092#if defined(__x86_64__) && defined(_WIN32)
John Bauman66b8ab22014-05-06 15:57:45 -040093extern "C" void X86CompilationCallback()
94{
95 assert(false); // UNIMPLEMENTED
96}
97#endif
98
Nicolas Capensf417d9d2018-10-10 10:49:30 -040099#if REACTOR_LLVM_VERSION < 7
John Bauman89401822014-05-06 15:04:28 -0400100namespace llvm
101{
102 extern bool JITEmitDebugInfo;
103}
Logan Chien0eedc8c2018-08-21 09:34:28 +0800104#endif
John Bauman89401822014-05-06 15:04:28 -0400105
Nicolas Capens48461502018-08-06 14:20:45 -0400106namespace rr
Logan Chien52cde602018-09-03 19:37:57 +0800107{
108 class LLVMReactorJIT;
109}
110
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400111namespace
112{
Nicolas Capens48461502018-08-06 14:20:45 -0400113 rr::LLVMReactorJIT *reactorJIT = nullptr;
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400114 llvm::IRBuilder<> *builder = nullptr;
115 llvm::LLVMContext *context = nullptr;
116 llvm::Module *module = nullptr;
117 llvm::Function *function = nullptr;
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400118
Nicolas Capensc07dc4b2018-08-06 14:20:45 -0400119 rr::MutexLock codegenMutex;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800120
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000121#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000122 std::string replace(std::string str, const std::string& substr, const std::string& replacement)
123 {
124 size_t pos = 0;
125 while((pos = str.find(substr, pos)) != std::string::npos) {
126 str.replace(pos, substr.length(), replacement);
127 pos += replacement.length();
128 }
129 return str;
130 }
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000131#endif // ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000132
Nicolas Capensf417d9d2018-10-10 10:49:30 -0400133#if REACTOR_LLVM_VERSION >= 7
Logan Chien0eedc8c2018-08-21 09:34:28 +0800134 llvm::Value *lowerPAVG(llvm::Value *x, llvm::Value *y)
135 {
136 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
137
138 llvm::VectorType *extTy =
139 llvm::VectorType::getExtendedElementVectorType(ty);
140 x = ::builder->CreateZExt(x, extTy);
141 y = ::builder->CreateZExt(y, extTy);
142
143 // (x + y + 1) >> 1
144 llvm::Constant *one = llvm::ConstantInt::get(extTy, 1);
145 llvm::Value *res = ::builder->CreateAdd(x, y);
146 res = ::builder->CreateAdd(res, one);
147 res = ::builder->CreateLShr(res, one);
148 return ::builder->CreateTrunc(res, ty);
149 }
150
151 llvm::Value *lowerPMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800152 llvm::ICmpInst::Predicate pred)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800153 {
154 return ::builder->CreateSelect(::builder->CreateICmp(pred, x, y), x, y);
155 }
156
157 llvm::Value *lowerPCMP(llvm::ICmpInst::Predicate pred, llvm::Value *x,
Logan Chienb5ce5092018-09-27 18:45:58 +0800158 llvm::Value *y, llvm::Type *dstTy)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800159 {
160 return ::builder->CreateSExt(::builder->CreateICmp(pred, x, y), dstTy, "");
161 }
162
Logan Chiene3191012018-08-24 22:01:50 +0800163#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800164 llvm::Value *lowerPMOV(llvm::Value *op, llvm::Type *dstType, bool sext)
165 {
166 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(op->getType());
167 llvm::VectorType *dstTy = llvm::cast<llvm::VectorType>(dstType);
168
169 llvm::Value *undef = llvm::UndefValue::get(srcTy);
170 llvm::SmallVector<uint32_t, 16> mask(dstTy->getNumElements());
171 std::iota(mask.begin(), mask.end(), 0);
172 llvm::Value *v = ::builder->CreateShuffleVector(op, undef, mask);
173
174 return sext ? ::builder->CreateSExt(v, dstTy)
Logan Chienb5ce5092018-09-27 18:45:58 +0800175 : ::builder->CreateZExt(v, dstTy);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800176 }
177
178 llvm::Value *lowerPABS(llvm::Value *v)
179 {
180 llvm::Value *zero = llvm::Constant::getNullValue(v->getType());
181 llvm::Value *cmp = ::builder->CreateICmp(llvm::ICmpInst::ICMP_SGT, v, zero);
182 llvm::Value *neg = ::builder->CreateNeg(v);
183 return ::builder->CreateSelect(cmp, v, neg);
184 }
185#endif // defined(__i386__) || defined(__x86_64__)
Logan Chiene3191012018-08-24 22:01:50 +0800186
187#if !defined(__i386__) && !defined(__x86_64__)
188 llvm::Value *lowerPFMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800189 llvm::FCmpInst::Predicate pred)
Logan Chiene3191012018-08-24 22:01:50 +0800190 {
191 return ::builder->CreateSelect(::builder->CreateFCmp(pred, x, y), x, y);
192 }
193
Logan Chien83fc07a2018-09-26 22:14:00 +0800194 llvm::Value *lowerRound(llvm::Value *x)
195 {
196 llvm::Function *nearbyint = llvm::Intrinsic::getDeclaration(
197 ::module, llvm::Intrinsic::nearbyint, {x->getType()});
198 return ::builder->CreateCall(nearbyint, ARGS(x));
199 }
200
Logan Chien2faa24a2018-09-26 19:59:32 +0800201 llvm::Value *lowerRoundInt(llvm::Value *x, llvm::Type *ty)
202 {
203 return ::builder->CreateFPToSI(lowerRound(x), ty);
204 }
205
Logan Chien40a60052018-09-26 19:03:53 +0800206 llvm::Value *lowerFloor(llvm::Value *x)
207 {
208 llvm::Function *floor = llvm::Intrinsic::getDeclaration(
209 ::module, llvm::Intrinsic::floor, {x->getType()});
210 return ::builder->CreateCall(floor, ARGS(x));
211 }
212
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800213 llvm::Value *lowerTrunc(llvm::Value *x)
214 {
215 llvm::Function *trunc = llvm::Intrinsic::getDeclaration(
216 ::module, llvm::Intrinsic::trunc, {x->getType()});
217 return ::builder->CreateCall(trunc, ARGS(x));
218 }
219
Logan Chiene3191012018-08-24 22:01:50 +0800220 // Packed add/sub saturatation
Logan Chien28794cf2018-09-26 18:58:03 +0800221 llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned)
Logan Chiene3191012018-08-24 22:01:50 +0800222 {
Logan Chien28794cf2018-09-26 18:58:03 +0800223 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
224 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
225
226 unsigned numBits = ty->getScalarSizeInBits();
227
228 llvm::Value *max, *min, *extX, *extY;
229 if (isSigned)
230 {
231 max = llvm::ConstantInt::get(extTy, (1LL << (numBits - 1)) - 1, true);
232 min = llvm::ConstantInt::get(extTy, (-1LL << (numBits - 1)), true);
233 extX = ::builder->CreateSExt(x, extTy);
234 extY = ::builder->CreateSExt(y, extTy);
235 }
236 else
237 {
238 assert(numBits <= 64);
239 uint64_t maxVal = (numBits == 64) ? ~0ULL : (1ULL << numBits) - 1;
240 max = llvm::ConstantInt::get(extTy, maxVal, false);
241 min = llvm::ConstantInt::get(extTy, 0, false);
242 extX = ::builder->CreateZExt(x, extTy);
243 extY = ::builder->CreateZExt(y, extTy);
244 }
245
246 llvm::Value *res = isAdd ? ::builder->CreateAdd(extX, extY)
247 : ::builder->CreateSub(extX, extY);
248
249 res = lowerPMINMAX(res, min, llvm::ICmpInst::ICMP_SGT);
250 res = lowerPMINMAX(res, max, llvm::ICmpInst::ICMP_SLT);
251
252 return ::builder->CreateTrunc(res, ty);
Logan Chiene3191012018-08-24 22:01:50 +0800253 }
254
255 llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y)
256 {
Logan Chien28794cf2018-09-26 18:58:03 +0800257 return lowerPSAT(x, y, true, false);
Logan Chiene3191012018-08-24 22:01:50 +0800258 }
259
260 llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y)
261 {
Logan Chien28794cf2018-09-26 18:58:03 +0800262 return lowerPSAT(x, y, true, true);
Logan Chiene3191012018-08-24 22:01:50 +0800263 }
264
265 llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y)
266 {
Logan Chien28794cf2018-09-26 18:58:03 +0800267 return lowerPSAT(x, y, false, false);
Logan Chiene3191012018-08-24 22:01:50 +0800268 }
269
270 llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y)
271 {
Logan Chien28794cf2018-09-26 18:58:03 +0800272 return lowerPSAT(x, y, false, true);
Logan Chiene3191012018-08-24 22:01:50 +0800273 }
274
275 llvm::Value *lowerSQRT(llvm::Value *x)
276 {
277 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(
278 ::module, llvm::Intrinsic::sqrt, {x->getType()});
279 return ::builder->CreateCall(sqrt, ARGS(x));
280 }
281
282 llvm::Value *lowerRCP(llvm::Value *x)
283 {
284 llvm::Type *ty = x->getType();
285 llvm::Constant *one;
286 if (llvm::VectorType *vectorTy = llvm::dyn_cast<llvm::VectorType>(ty))
287 {
288 one = llvm::ConstantVector::getSplat(
289 vectorTy->getNumElements(),
290 llvm::ConstantFP::get(vectorTy->getElementType(), 1));
291 }
292 else
293 {
294 one = llvm::ConstantFP::get(ty, 1);
295 }
296 return ::builder->CreateFDiv(one, x);
297 }
298
299 llvm::Value *lowerRSQRT(llvm::Value *x)
300 {
301 return lowerRCP(lowerSQRT(x));
302 }
303
304 llvm::Value *lowerVectorShl(llvm::Value *x, uint64_t scalarY)
305 {
306 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
307 llvm::Value *y = llvm::ConstantVector::getSplat(
308 ty->getNumElements(),
309 llvm::ConstantInt::get(ty->getElementType(), scalarY));
310 return ::builder->CreateShl(x, y);
311 }
312
313 llvm::Value *lowerVectorAShr(llvm::Value *x, uint64_t scalarY)
314 {
315 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
316 llvm::Value *y = llvm::ConstantVector::getSplat(
317 ty->getNumElements(),
318 llvm::ConstantInt::get(ty->getElementType(), scalarY));
319 return ::builder->CreateAShr(x, y);
320 }
321
322 llvm::Value *lowerVectorLShr(llvm::Value *x, uint64_t scalarY)
323 {
324 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
325 llvm::Value *y = llvm::ConstantVector::getSplat(
326 ty->getNumElements(),
327 llvm::ConstantInt::get(ty->getElementType(), scalarY));
328 return ::builder->CreateLShr(x, y);
329 }
330
331 llvm::Value *lowerMulAdd(llvm::Value *x, llvm::Value *y)
332 {
333 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
334 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
335
336 llvm::Value *extX = ::builder->CreateSExt(x, extTy);
337 llvm::Value *extY = ::builder->CreateSExt(y, extTy);
338 llvm::Value *mult = ::builder->CreateMul(extX, extY);
339
340 llvm::Value *undef = llvm::UndefValue::get(extTy);
341
342 llvm::SmallVector<uint32_t, 16> evenIdx;
343 llvm::SmallVector<uint32_t, 16> oddIdx;
344 for (uint64_t i = 0, n = ty->getNumElements(); i < n; i += 2)
345 {
346 evenIdx.push_back(i);
347 oddIdx.push_back(i + 1);
348 }
349
350 llvm::Value *lhs = ::builder->CreateShuffleVector(mult, undef, evenIdx);
351 llvm::Value *rhs = ::builder->CreateShuffleVector(mult, undef, oddIdx);
352 return ::builder->CreateAdd(lhs, rhs);
353 }
354
Logan Chiene3191012018-08-24 22:01:50 +0800355 llvm::Value *lowerPack(llvm::Value *x, llvm::Value *y, bool isSigned)
356 {
357 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(x->getType());
358 llvm::VectorType *dstTy = llvm::VectorType::getTruncatedElementVectorType(srcTy);
359
360 llvm::IntegerType *dstElemTy =
361 llvm::cast<llvm::IntegerType>(dstTy->getElementType());
362
363 uint64_t truncNumBits = dstElemTy->getIntegerBitWidth();
364 assert(truncNumBits < 64 && "shift 64 must be handled separately");
365 llvm::Constant *max, *min;
366 if (isSigned)
367 {
368 max = llvm::ConstantInt::get(srcTy, (1LL << (truncNumBits - 1)) - 1, true);
369 min = llvm::ConstantInt::get(srcTy, (-1LL << (truncNumBits - 1)), true);
370 }
371 else
372 {
373 max = llvm::ConstantInt::get(srcTy, (1ULL << truncNumBits) - 1, false);
374 min = llvm::ConstantInt::get(srcTy, 0, false);
375 }
376
377 x = lowerPMINMAX(x, min, llvm::ICmpInst::ICMP_SGT);
378 x = lowerPMINMAX(x, max, llvm::ICmpInst::ICMP_SLT);
379 y = lowerPMINMAX(y, min, llvm::ICmpInst::ICMP_SGT);
380 y = lowerPMINMAX(y, max, llvm::ICmpInst::ICMP_SLT);
381
382 x = ::builder->CreateTrunc(x, dstTy);
383 y = ::builder->CreateTrunc(y, dstTy);
384
385 llvm::SmallVector<uint32_t, 16> index(srcTy->getNumElements() * 2);
386 std::iota(index.begin(), index.end(), 0);
387
388 return ::builder->CreateShuffleVector(x, y, index);
389 }
390
391 llvm::Value *lowerSignMask(llvm::Value *x, llvm::Type *retTy)
392 {
393 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
394 llvm::Constant *zero = llvm::ConstantInt::get(ty, 0);
395 llvm::Value *cmp = ::builder->CreateICmpSLT(x, zero);
396
397 llvm::Value *ret = ::builder->CreateZExt(
398 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
399 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
400 {
401 llvm::Value *elem = ::builder->CreateZExt(
402 ::builder->CreateExtractElement(cmp, i), retTy);
403 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
404 }
405 return ret;
406 }
407
408 llvm::Value *lowerFPSignMask(llvm::Value *x, llvm::Type *retTy)
409 {
410 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
411 llvm::Constant *zero = llvm::ConstantFP::get(ty, 0);
412 llvm::Value *cmp = ::builder->CreateFCmpULT(x, zero);
413
414 llvm::Value *ret = ::builder->CreateZExt(
415 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
416 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
417 {
418 llvm::Value *elem = ::builder->CreateZExt(
419 ::builder->CreateExtractElement(cmp, i), retTy);
420 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
421 }
422 return ret;
423 }
424#endif // !defined(__i386__) && !defined(__x86_64__)
Nicolas Capensf417d9d2018-10-10 10:49:30 -0400425#endif // REACTOR_LLVM_VERSION >= 7
Chris Forbese86b6dc2019-03-01 09:08:47 -0800426
427 llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext)
428 {
429 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
430 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
431
432 llvm::Value *extX, *extY;
433 if (sext)
434 {
435 extX = ::builder->CreateSExt(x, extTy);
436 extY = ::builder->CreateSExt(y, extTy);
437 }
438 else
439 {
440 extX = ::builder->CreateZExt(x, extTy);
441 extY = ::builder->CreateZExt(y, extTy);
442 }
443
444 llvm::Value *mult = ::builder->CreateMul(extX, extY);
445
446 llvm::IntegerType *intTy = llvm::cast<llvm::IntegerType>(ty->getElementType());
447 llvm::Value *mulh = ::builder->CreateAShr(mult, intTy->getBitWidth());
448 return ::builder->CreateTrunc(mulh, ty);
449 }
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400450}
451
Nicolas Capens48461502018-08-06 14:20:45 -0400452namespace rr
John Bauman89401822014-05-06 15:04:28 -0400453{
Nicolas Capensf417d9d2018-10-10 10:49:30 -0400454#if REACTOR_LLVM_VERSION < 7
Logan Chien52cde602018-09-03 19:37:57 +0800455 class LLVMReactorJIT
456 {
457 private:
458 std::string arch;
459 llvm::SmallVector<std::string, 16> mattrs;
Logan Chien52cde602018-09-03 19:37:57 +0800460 llvm::ExecutionEngine *executionEngine;
Nicolas Capens48461502018-08-06 14:20:45 -0400461 LLVMRoutineManager *routineManager;
Logan Chien52cde602018-09-03 19:37:57 +0800462
463 public:
464 LLVMReactorJIT(const std::string &arch_,
Logan Chienb5ce5092018-09-27 18:45:58 +0800465 const llvm::SmallVectorImpl<std::string> &mattrs_) :
Logan Chien52cde602018-09-03 19:37:57 +0800466 arch(arch_),
467 mattrs(mattrs_.begin(), mattrs_.end()),
Nicolas Capens48461502018-08-06 14:20:45 -0400468 executionEngine(nullptr),
469 routineManager(nullptr)
Logan Chien52cde602018-09-03 19:37:57 +0800470 {
471 }
472
473 void startSession()
474 {
475 std::string error;
476
477 ::module = new llvm::Module("", *::context);
478
479 routineManager = new LLVMRoutineManager();
480
481 llvm::TargetMachine *targetMachine =
482 llvm::EngineBuilder::selectTarget(
483 ::module, arch, "", mattrs, llvm::Reloc::Default,
484 llvm::CodeModel::JITDefault, &error);
485
486 executionEngine = llvm::JIT::createJIT(
487 ::module, &error, routineManager, llvm::CodeGenOpt::Aggressive,
488 true, targetMachine);
489 }
490
491 void endSession()
492 {
493 delete executionEngine;
494 executionEngine = nullptr;
495 routineManager = nullptr;
496
497 ::function = nullptr;
498 ::module = nullptr;
499 }
500
501 LLVMRoutine *acquireRoutine(llvm::Function *func)
502 {
503 void *entry = executionEngine->getPointerToFunction(::function);
504 return routineManager->acquireRoutine(entry);
505 }
506
507 void optimize(llvm::Module *module)
508 {
509 static llvm::PassManager *passManager = nullptr;
510
511 if(!passManager)
512 {
513 passManager = new llvm::PassManager();
514
515 passManager->add(new llvm::TargetData(*executionEngine->getTargetData()));
516 passManager->add(llvm::createScalarReplAggregatesPass());
517
518 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
519 {
520 switch(optimization[pass])
521 {
522 case Disabled: break;
523 case CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
524 case LICM: passManager->add(llvm::createLICMPass()); break;
525 case AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
526 case GVN: passManager->add(llvm::createGVNPass()); break;
527 case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
528 case Reassociate: passManager->add(llvm::createReassociatePass()); break;
529 case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
530 case SCCP: passManager->add(llvm::createSCCPPass()); break;
531 case ScalarReplAggregates: passManager->add(llvm::createScalarReplAggregatesPass()); break;
532 default:
533 assert(false);
534 }
535 }
536 }
537
538 passManager->run(*::module);
539 }
540 };
Logan Chien0eedc8c2018-08-21 09:34:28 +0800541#else
Logan Chien40a60052018-09-26 19:03:53 +0800542 class ExternalFunctionSymbolResolver
543 {
544 private:
545 using FunctionMap = std::unordered_map<std::string, void *>;
546 FunctionMap func_;
547
548 public:
549 ExternalFunctionSymbolResolver()
550 {
551 func_.emplace("floorf", reinterpret_cast<void*>(floorf));
Logan Chien83fc07a2018-09-26 22:14:00 +0800552 func_.emplace("nearbyintf", reinterpret_cast<void*>(nearbyintf));
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800553 func_.emplace("truncf", reinterpret_cast<void*>(truncf));
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000554 func_.emplace("printf", reinterpret_cast<void*>(printf));
555 func_.emplace("puts", reinterpret_cast<void*>(puts));
Chris Forbes1a4c7122019-03-15 14:50:47 -0700556 func_.emplace("fmodf", reinterpret_cast<void*>(fmodf));
Ben Claytona2c8b772019-04-09 13:42:36 -0400557 func_.emplace("sinf", reinterpret_cast<void*>(sinf));
Ben Clayton1b6f8c72019-04-09 13:47:43 -0400558 func_.emplace("cosf", reinterpret_cast<void*>(cosf));
Ben Claytonf9350d72019-04-09 14:19:02 -0400559 func_.emplace("asinf", reinterpret_cast<void*>(asinf));
Ben Clayton14740062019-04-09 13:48:41 -0400560
561#ifdef __APPLE__
562 // LLVM uses this function on macOS for tan.
563 func_.emplace("sincosf_stret", reinterpret_cast<void*>(__sincosf_stret));
564#elif defined(__linux__)
565 func_.emplace("sincosf", reinterpret_cast<void*>(sincosf));
566#endif // __APPLE__
Logan Chien40a60052018-09-26 19:03:53 +0800567 }
568
569 void *findSymbol(const std::string &name) const
570 {
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000571 // Trim off any underscores from the start of the symbol. LLVM likes
572 // to append these on macOS.
573 const char* trimmed = name.c_str();
574 while (trimmed[0] == '_') { trimmed++; }
575
576 FunctionMap::const_iterator it = func_.find(trimmed);
577 assert(it != func_.end()); // Missing functions will likely make the module fail in exciting non-obvious ways.
578 return it->second;
Logan Chien40a60052018-09-26 19:03:53 +0800579 }
580 };
581
Logan Chien0eedc8c2018-08-21 09:34:28 +0800582 class LLVMReactorJIT
583 {
584 private:
585 using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
586 using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
587
588 llvm::orc::ExecutionSession session;
Logan Chien40a60052018-09-26 19:03:53 +0800589 ExternalFunctionSymbolResolver externalSymbolResolver;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800590 std::shared_ptr<llvm::orc::SymbolResolver> resolver;
591 std::unique_ptr<llvm::TargetMachine> targetMachine;
592 const llvm::DataLayout dataLayout;
593 ObjLayer objLayer;
594 CompileLayer compileLayer;
595 size_t emittedFunctionsNum;
596
597 public:
598 LLVMReactorJIT(const char *arch, const llvm::SmallVectorImpl<std::string>& mattrs,
599 const llvm::TargetOptions &targetOpts):
600 resolver(createLegacyLookupResolver(
601 session,
602 [this](const std::string &name) {
Logan Chien40a60052018-09-26 19:03:53 +0800603 void *func = externalSymbolResolver.findSymbol(name);
604 if (func != nullptr)
605 {
606 return llvm::JITSymbol(
607 reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
608 }
609
Logan Chien0eedc8c2018-08-21 09:34:28 +0800610 return objLayer.findSymbol(name, true);
611 },
612 [](llvm::Error err) {
613 if (err)
614 {
615 // TODO: Log the symbol resolution errors.
616 return;
617 }
618 })),
619 targetMachine(llvm::EngineBuilder()
620 .setMArch(arch)
621 .setMAttrs(mattrs)
622 .setTargetOptions(targetOpts)
623 .selectTarget()),
624 dataLayout(targetMachine->createDataLayout()),
625 objLayer(
626 session,
627 [this](llvm::orc::VModuleKey) {
628 return ObjLayer::Resources{
629 std::make_shared<llvm::SectionMemoryManager>(),
630 resolver};
631 }),
632 compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine)),
633 emittedFunctionsNum(0)
634 {
635 }
636
637 void startSession()
638 {
639 ::module = new llvm::Module("", *::context);
640 }
641
642 void endSession()
643 {
644 ::function = nullptr;
645 ::module = nullptr;
646 }
647
648 LLVMRoutine *acquireRoutine(llvm::Function *func)
649 {
650 std::string name = "f" + llvm::Twine(emittedFunctionsNum++).str();
651 func->setName(name);
652 func->setLinkage(llvm::GlobalValue::ExternalLinkage);
653 func->setDoesNotThrow();
654
655 std::unique_ptr<llvm::Module> mod(::module);
656 ::module = nullptr;
657 mod->setDataLayout(dataLayout);
658
659 auto moduleKey = session.allocateVModule();
660 llvm::cantFail(compileLayer.addModule(moduleKey, std::move(mod)));
661
Nicolas Capensadfbbcb2018-10-31 14:38:53 -0400662 std::string mangledName;
663 {
664 llvm::raw_string_ostream mangledNameStream(mangledName);
665 llvm::Mangler::getNameWithPrefix(mangledNameStream, name, dataLayout);
666 }
667
668 llvm::JITSymbol symbol = compileLayer.findSymbolIn(moduleKey, mangledName, false);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800669
670 llvm::Expected<llvm::JITTargetAddress> expectAddr = symbol.getAddress();
Nicolas Capensadfbbcb2018-10-31 14:38:53 -0400671 if(!expectAddr)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800672 {
673 return nullptr;
674 }
675
676 void *addr = reinterpret_cast<void *>(static_cast<intptr_t>(expectAddr.get()));
677 return new LLVMRoutine(addr, releaseRoutineCallback, this, moduleKey);
678 }
679
680 void optimize(llvm::Module *module)
681 {
682 std::unique_ptr<llvm::legacy::PassManager> passManager(
683 new llvm::legacy::PassManager());
684
685 passManager->add(llvm::createSROAPass());
686
687 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
688 {
689 switch(optimization[pass])
690 {
691 case Disabled: break;
692 case CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
693 case LICM: passManager->add(llvm::createLICMPass()); break;
694 case AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
695 case GVN: passManager->add(llvm::createGVNPass()); break;
696 case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
697 case Reassociate: passManager->add(llvm::createReassociatePass()); break;
698 case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
699 case SCCP: passManager->add(llvm::createSCCPPass()); break;
700 case ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break;
701 default:
Logan Chienb5ce5092018-09-27 18:45:58 +0800702 assert(false);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800703 }
704 }
705
706 passManager->run(*::module);
707 }
708
709 private:
710 void releaseRoutineModule(llvm::orc::VModuleKey moduleKey)
711 {
712 llvm::cantFail(compileLayer.removeModule(moduleKey));
713 }
714
715 static void releaseRoutineCallback(LLVMReactorJIT *jit, uint64_t moduleKey)
716 {
717 jit->releaseRoutineModule(moduleKey);
718 }
719 };
720#endif
Logan Chien52cde602018-09-03 19:37:57 +0800721
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400722 Optimization optimization[10] = {InstructionCombining, Disabled};
John Bauman89401822014-05-06 15:04:28 -0400723
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500724 // The abstract Type* types are implemented as LLVM types, except that
725 // 64-bit vectors are emulated using 128-bit ones to avoid use of MMX in x86
726 // and VFP in ARM, and eliminate the overhead of converting them to explicit
727 // 128-bit ones. LLVM types are pointers, so we can represent emulated types
728 // as abstract pointers with small enum values.
729 enum InternalType : uintptr_t
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400730 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500731 // Emulated types:
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400732 Type_v2i32,
733 Type_v4i16,
734 Type_v2i16,
735 Type_v8i8,
736 Type_v4i8,
737 Type_v2f32,
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500738 EmulatedTypeCount,
739 // Returned by asInternalType() to indicate that the abstract Type*
740 // should be interpreted as LLVM type pointer:
741 Type_LLVM
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400742 };
743
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500744 inline InternalType asInternalType(Type *type)
745 {
746 InternalType t = static_cast<InternalType>(reinterpret_cast<uintptr_t>(type));
747 return (t < EmulatedTypeCount) ? t : Type_LLVM;
748 }
749
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400750 llvm::Type *T(Type *t)
751 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500752 // Use 128-bit vectors to implement logically shorter ones.
753 switch(asInternalType(t))
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400754 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500755 case Type_v2i32: return T(Int4::getType());
756 case Type_v4i16: return T(Short8::getType());
757 case Type_v2i16: return T(Short8::getType());
758 case Type_v8i8: return T(Byte16::getType());
759 case Type_v4i8: return T(Byte16::getType());
760 case Type_v2f32: return T(Float4::getType());
761 case Type_LLVM: return reinterpret_cast<llvm::Type*>(t);
762 default: assert(false); return nullptr;
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400763 }
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400764 }
765
Nicolas Capensac230122016-09-20 14:30:06 -0400766 inline Type *T(llvm::Type *t)
767 {
768 return reinterpret_cast<Type*>(t);
769 }
770
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500771 Type *T(InternalType t)
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400772 {
773 return reinterpret_cast<Type*>(t);
774 }
775
Logan Chien191b3052018-08-31 16:57:15 +0800776 inline llvm::Value *V(Value *t)
777 {
778 return reinterpret_cast<llvm::Value*>(t);
779 }
780
Nicolas Capens19336542016-09-26 10:32:29 -0400781 inline Value *V(llvm::Value *t)
782 {
783 return reinterpret_cast<Value*>(t);
784 }
785
Nicolas Capensac230122016-09-20 14:30:06 -0400786 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
787 {
788 return reinterpret_cast<std::vector<llvm::Type*>&>(t);
789 }
790
Logan Chien191b3052018-08-31 16:57:15 +0800791 inline llvm::BasicBlock *B(BasicBlock *t)
792 {
793 return reinterpret_cast<llvm::BasicBlock*>(t);
794 }
795
Nicolas Capensc8b67a42016-09-25 15:02:52 -0400796 inline BasicBlock *B(llvm::BasicBlock *t)
797 {
798 return reinterpret_cast<BasicBlock*>(t);
799 }
800
Nicolas Capens01a97962017-07-28 17:30:51 -0400801 static size_t typeSize(Type *type)
802 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500803 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -0400804 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500805 case Type_v2i32: return 8;
806 case Type_v4i16: return 8;
807 case Type_v2i16: return 4;
808 case Type_v8i8: return 8;
809 case Type_v4i8: return 4;
810 case Type_v2f32: return 8;
811 case Type_LLVM:
Nicolas Capens01a97962017-07-28 17:30:51 -0400812 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500813 llvm::Type *t = T(type);
Nicolas Capens01a97962017-07-28 17:30:51 -0400814
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500815 if(t->isPointerTy())
816 {
817 return sizeof(void*);
818 }
819
820 // At this point we should only have LLVM 'primitive' types.
821 unsigned int bits = t->getPrimitiveSizeInBits();
822 assert(bits != 0);
823
824 // TODO(capn): Booleans are 1 bit integers in LLVM's SSA type system,
825 // but are typically stored as one byte. The DataLayout structure should
826 // be used here and many other places if this assumption fails.
827 return (bits + 7) / 8;
828 }
829 break;
830 default:
831 assert(false);
832 return 0;
833 }
Nicolas Capens01a97962017-07-28 17:30:51 -0400834 }
835
Nicolas Capens69674fb2017-09-01 11:08:44 -0400836 static unsigned int elementCount(Type *type)
837 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500838 switch(asInternalType(type))
Nicolas Capens69674fb2017-09-01 11:08:44 -0400839 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500840 case Type_v2i32: return 2;
841 case Type_v4i16: return 4;
842 case Type_v2i16: return 2;
843 case Type_v8i8: return 8;
844 case Type_v4i8: return 4;
845 case Type_v2f32: return 2;
846 case Type_LLVM: return llvm::cast<llvm::VectorType>(T(type))->getNumElements();
847 default: assert(false); return 0;
Nicolas Capens69674fb2017-09-01 11:08:44 -0400848 }
Nicolas Capens69674fb2017-09-01 11:08:44 -0400849 }
850
Nicolas Capens86509d92019-03-21 13:23:50 -0400851 static llvm::AtomicOrdering atomicOrdering(bool atomic, std::memory_order memoryOrder)
852 {
853 #if REACTOR_LLVM_VERSION < 7
854 return llvm::AtomicOrdering::NotAtomic;
855 #endif
856
857 if(!atomic)
858 {
859 return llvm::AtomicOrdering::NotAtomic;
860 }
861
862 switch(memoryOrder)
863 {
864 case std::memory_order_relaxed: return llvm::AtomicOrdering::Monotonic; // https://llvm.org/docs/Atomics.html#monotonic
865 case std::memory_order_consume: return llvm::AtomicOrdering::Acquire; // https://llvm.org/docs/Atomics.html#acquire: "It should also be used for C++11/C11 memory_order_consume."
866 case std::memory_order_acquire: return llvm::AtomicOrdering::Acquire;
867 case std::memory_order_release: return llvm::AtomicOrdering::Release;
868 case std::memory_order_acq_rel: return llvm::AtomicOrdering::AcquireRelease;
869 case std::memory_order_seq_cst: return llvm::AtomicOrdering::SequentiallyConsistent;
870 default: assert(false); return llvm::AtomicOrdering::AcquireRelease;
871 }
872 }
873
John Bauman89401822014-05-06 15:04:28 -0400874 Nucleus::Nucleus()
875 {
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400876 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
Nicolas Capensb7ea9842015-04-01 10:54:59 -0400877
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400878 llvm::InitializeNativeTarget();
John Bauman89401822014-05-06 15:04:28 -0400879
Nicolas Capensf417d9d2018-10-10 10:49:30 -0400880#if REACTOR_LLVM_VERSION >= 7
Logan Chien0eedc8c2018-08-21 09:34:28 +0800881 llvm::InitializeNativeTargetAsmPrinter();
882 llvm::InitializeNativeTargetAsmParser();
883#endif
884
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400885 if(!::context)
John Bauman89401822014-05-06 15:04:28 -0400886 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400887 ::context = new llvm::LLVMContext();
John Bauman89401822014-05-06 15:04:28 -0400888 }
889
John Bauman89401822014-05-06 15:04:28 -0400890 #if defined(__x86_64__)
Logan Chien52cde602018-09-03 19:37:57 +0800891 static const char arch[] = "x86-64";
Logan Chiene3191012018-08-24 22:01:50 +0800892 #elif defined(__i386__)
Logan Chien52cde602018-09-03 19:37:57 +0800893 static const char arch[] = "x86";
Logan Chiene3191012018-08-24 22:01:50 +0800894 #elif defined(__aarch64__)
895 static const char arch[] = "arm64";
896 #elif defined(__arm__)
897 static const char arch[] = "arm";
Gordana Cmiljanovic082dfec2018-10-19 11:36:15 +0200898 #elif defined(__mips__)
Gordana Cmiljanovic20622c02018-11-05 15:00:11 +0100899 #if defined(__mips64)
900 static const char arch[] = "mips64el";
901 #else
902 static const char arch[] = "mipsel";
903 #endif
Logan Chiene3191012018-08-24 22:01:50 +0800904 #else
905 #error "unknown architecture"
John Bauman89401822014-05-06 15:04:28 -0400906 #endif
907
Logan Chien52cde602018-09-03 19:37:57 +0800908 llvm::SmallVector<std::string, 1> mattrs;
Logan Chiene3191012018-08-24 22:01:50 +0800909#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800910 mattrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx");
911 mattrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov");
912 mattrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse");
913 mattrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2");
914 mattrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3");
915 mattrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3");
Nicolas Capensf417d9d2018-10-10 10:49:30 -0400916#if REACTOR_LLVM_VERSION < 7
Logan Chien0eedc8c2018-08-21 09:34:28 +0800917 mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41");
918#else
919 mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse4.1" : "-sse4.1");
920#endif
Logan Chiene3191012018-08-24 22:01:50 +0800921#elif defined(__arm__)
922#if __ARM_ARCH >= 8
923 mattrs.push_back("+armv8-a");
924#else
925 // armv7-a requires compiler-rt routines; otherwise, compiled kernel
926 // might fail to link.
927#endif
928#endif
John Bauman89401822014-05-06 15:04:28 -0400929
Nicolas Capensf417d9d2018-10-10 10:49:30 -0400930#if REACTOR_LLVM_VERSION < 7
Logan Chien52cde602018-09-03 19:37:57 +0800931 llvm::JITEmitDebugInfo = false;
932 llvm::UnsafeFPMath = true;
933 // llvm::NoInfsFPMath = true;
934 // llvm::NoNaNsFPMath = true;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800935#else
936 llvm::TargetOptions targetOpts;
Nicolas Capensa7643812018-09-13 14:20:06 -0400937 targetOpts.UnsafeFPMath = false;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800938 // targetOpts.NoInfsFPMath = true;
939 // targetOpts.NoNaNsFPMath = true;
940#endif
Logan Chien52cde602018-09-03 19:37:57 +0800941
942 if(!::reactorJIT)
943 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -0400944#if REACTOR_LLVM_VERSION < 7
Logan Chien52cde602018-09-03 19:37:57 +0800945 ::reactorJIT = new LLVMReactorJIT(arch, mattrs);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800946#else
947 ::reactorJIT = new LLVMReactorJIT(arch, mattrs, targetOpts);
948#endif
Logan Chien52cde602018-09-03 19:37:57 +0800949 }
950
951 ::reactorJIT->startSession();
John Bauman89401822014-05-06 15:04:28 -0400952
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400953 if(!::builder)
John Bauman89401822014-05-06 15:04:28 -0400954 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400955 ::builder = new llvm::IRBuilder<>(*::context);
John Bauman89401822014-05-06 15:04:28 -0400956 }
957 }
958
959 Nucleus::~Nucleus()
960 {
Logan Chien52cde602018-09-03 19:37:57 +0800961 ::reactorJIT->endSession();
Nicolas Capensb7ea9842015-04-01 10:54:59 -0400962
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400963 ::codegenMutex.unlock();
John Bauman89401822014-05-06 15:04:28 -0400964 }
965
Chris Forbes878d4b02019-01-21 10:48:35 -0800966 Routine *Nucleus::acquireRoutine(const char *name, bool runOptimizations)
John Bauman89401822014-05-06 15:04:28 -0400967 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400968 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
John Bauman19bac1e2014-05-06 15:23:49 -0400969 {
Nicolas Capensac230122016-09-20 14:30:06 -0400970 llvm::Type *type = ::function->getReturnType();
John Bauman19bac1e2014-05-06 15:23:49 -0400971
972 if(type->isVoidTy())
973 {
974 createRetVoid();
975 }
976 else
977 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400978 createRet(V(llvm::UndefValue::get(type)));
John Bauman19bac1e2014-05-06 15:23:49 -0400979 }
980 }
John Bauman89401822014-05-06 15:04:28 -0400981
982 if(false)
983 {
Nicolas Capens543629b2019-01-28 11:36:01 -0500984 #if REACTOR_LLVM_VERSION < 7
985 std::string error;
986 llvm::raw_fd_ostream file((std::string(name) + "-llvm-dump-unopt.txt").c_str(), error);
987 #else
988 std::error_code error;
989 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
990 #endif
991
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400992 ::module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -0400993 }
994
995 if(runOptimizations)
996 {
997 optimize();
998 }
999
1000 if(false)
1001 {
Nicolas Capens543629b2019-01-28 11:36:01 -05001002 #if REACTOR_LLVM_VERSION < 7
1003 std::string error;
1004 llvm::raw_fd_ostream file((std::string(name) + "-llvm-dump-opt.txt").c_str(), error);
1005 #else
1006 std::error_code error;
1007 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
1008 #endif
1009
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001010 ::module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -04001011 }
1012
Logan Chien52cde602018-09-03 19:37:57 +08001013 LLVMRoutine *routine = ::reactorJIT->acquireRoutine(::function);
John Bauman89401822014-05-06 15:04:28 -04001014
John Bauman89401822014-05-06 15:04:28 -04001015 return routine;
1016 }
1017
1018 void Nucleus::optimize()
1019 {
Logan Chien52cde602018-09-03 19:37:57 +08001020 ::reactorJIT->optimize(::module);
John Bauman89401822014-05-06 15:04:28 -04001021 }
1022
John Bauman19bac1e2014-05-06 15:23:49 -04001023 Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
John Bauman89401822014-05-06 15:04:28 -04001024 {
1025 // Need to allocate it in the entry block for mem2reg to work
Nicolas Capensc8b67a42016-09-25 15:02:52 -04001026 llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
John Bauman89401822014-05-06 15:04:28 -04001027
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001028 llvm::Instruction *declaration;
John Bauman89401822014-05-06 15:04:28 -04001029
1030 if(arraySize)
1031 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04001032#if REACTOR_LLVM_VERSION < 7
Logan Chien191b3052018-08-31 16:57:15 +08001033 declaration = new llvm::AllocaInst(T(type), V(Nucleus::createConstantInt(arraySize)));
Logan Chien0eedc8c2018-08-21 09:34:28 +08001034#else
1035 declaration = new llvm::AllocaInst(T(type), 0, V(Nucleus::createConstantInt(arraySize)));
1036#endif
John Bauman89401822014-05-06 15:04:28 -04001037 }
1038 else
1039 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04001040#if REACTOR_LLVM_VERSION < 7
Logan Chien191b3052018-08-31 16:57:15 +08001041 declaration = new llvm::AllocaInst(T(type), (llvm::Value*)nullptr);
Logan Chien0eedc8c2018-08-21 09:34:28 +08001042#else
1043 declaration = new llvm::AllocaInst(T(type), 0, (llvm::Value*)nullptr);
1044#endif
John Bauman89401822014-05-06 15:04:28 -04001045 }
1046
1047 entryBlock.getInstList().push_front(declaration);
1048
Nicolas Capens19336542016-09-26 10:32:29 -04001049 return V(declaration);
John Bauman89401822014-05-06 15:04:28 -04001050 }
1051
1052 BasicBlock *Nucleus::createBasicBlock()
1053 {
Logan Chien191b3052018-08-31 16:57:15 +08001054 return B(llvm::BasicBlock::Create(*::context, "", ::function));
John Bauman89401822014-05-06 15:04:28 -04001055 }
1056
1057 BasicBlock *Nucleus::getInsertBlock()
1058 {
Nicolas Capensc8b67a42016-09-25 15:02:52 -04001059 return B(::builder->GetInsertBlock());
John Bauman89401822014-05-06 15:04:28 -04001060 }
1061
1062 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1063 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001064 // assert(::builder->GetInsertBlock()->back().isTerminator());
Nicolas Capens0192d152019-03-27 14:46:07 -04001065
1066 Variable::materializeAll();
1067
Logan Chien191b3052018-08-31 16:57:15 +08001068 ::builder->SetInsertPoint(B(basicBlock));
John Bauman89401822014-05-06 15:04:28 -04001069 }
1070
Nicolas Capensac230122016-09-20 14:30:06 -04001071 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
John Bauman89401822014-05-06 15:04:28 -04001072 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001073 llvm::FunctionType *functionType = llvm::FunctionType::get(T(ReturnType), T(Params), false);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001074 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
1075 ::function->setCallingConv(llvm::CallingConv::C);
John Bauman89401822014-05-06 15:04:28 -04001076
Nicolas Capensf417d9d2018-10-10 10:49:30 -04001077 #if defined(_WIN32) && REACTOR_LLVM_VERSION >= 7
Nicolas Capens52551d12018-09-13 14:30:56 -04001078 // FIXME(capn):
1079 // On Windows, stack memory is committed in increments of 4 kB pages, with the last page
1080 // having a trap which allows the OS to grow the stack. For functions with a stack frame
1081 // larger than 4 kB this can cause an issue when a variable is accessed beyond the guard
1082 // page. Therefore the compiler emits a call to __chkstk in the function prolog to probe
1083 // the stack and ensure all pages have been committed. This is currently broken in LLVM
1084 // JIT, but we can prevent emitting the stack probe call:
1085 ::function->addFnAttr("stack-probe-size", "1048576");
1086 #endif
1087
Logan Chien191b3052018-08-31 16:57:15 +08001088 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "", ::function));
John Bauman89401822014-05-06 15:04:28 -04001089 }
1090
Nicolas Capens19336542016-09-26 10:32:29 -04001091 Value *Nucleus::getArgument(unsigned int index)
John Bauman89401822014-05-06 15:04:28 -04001092 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001093 llvm::Function::arg_iterator args = ::function->arg_begin();
John Bauman89401822014-05-06 15:04:28 -04001094
1095 while(index)
1096 {
1097 args++;
1098 index--;
1099 }
1100
Nicolas Capens19336542016-09-26 10:32:29 -04001101 return V(&*args);
John Bauman89401822014-05-06 15:04:28 -04001102 }
1103
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001104 void Nucleus::createRetVoid()
John Bauman89401822014-05-06 15:04:28 -04001105 {
Nicolas Capens0192d152019-03-27 14:46:07 -04001106 // Code generated after this point is unreachable, so any variables
1107 // being read can safely return an undefined value. We have to avoid
1108 // materializing variables after the terminator ret instruction.
1109 Variable::killUnmaterialized();
1110
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001111 ::builder->CreateRetVoid();
John Bauman89401822014-05-06 15:04:28 -04001112 }
1113
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001114 void Nucleus::createRet(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001115 {
Nicolas Capens0192d152019-03-27 14:46:07 -04001116 // Code generated after this point is unreachable, so any variables
1117 // being read can safely return an undefined value. We have to avoid
1118 // materializing variables after the terminator ret instruction.
1119 Variable::killUnmaterialized();
1120
Logan Chien191b3052018-08-31 16:57:15 +08001121 ::builder->CreateRet(V(v));
John Bauman89401822014-05-06 15:04:28 -04001122 }
1123
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001124 void Nucleus::createBr(BasicBlock *dest)
John Bauman89401822014-05-06 15:04:28 -04001125 {
Nicolas Capens0192d152019-03-27 14:46:07 -04001126 Variable::materializeAll();
1127
Logan Chien191b3052018-08-31 16:57:15 +08001128 ::builder->CreateBr(B(dest));
John Bauman89401822014-05-06 15:04:28 -04001129 }
1130
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001131 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001132 {
Nicolas Capens0192d152019-03-27 14:46:07 -04001133 Variable::materializeAll();
1134
Logan Chien191b3052018-08-31 16:57:15 +08001135 ::builder->CreateCondBr(V(cond), B(ifTrue), B(ifFalse));
John Bauman89401822014-05-06 15:04:28 -04001136 }
1137
1138 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1139 {
Logan Chien191b3052018-08-31 16:57:15 +08001140 return V(::builder->CreateAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001141 }
1142
1143 Value *Nucleus::createSub(Value *lhs, Value *rhs)
1144 {
Logan Chien191b3052018-08-31 16:57:15 +08001145 return V(::builder->CreateSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001146 }
1147
1148 Value *Nucleus::createMul(Value *lhs, Value *rhs)
1149 {
Logan Chien191b3052018-08-31 16:57:15 +08001150 return V(::builder->CreateMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001151 }
1152
1153 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1154 {
Logan Chien191b3052018-08-31 16:57:15 +08001155 return V(::builder->CreateUDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001156 }
1157
1158 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1159 {
Logan Chien191b3052018-08-31 16:57:15 +08001160 return V(::builder->CreateSDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001161 }
1162
1163 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1164 {
Logan Chien191b3052018-08-31 16:57:15 +08001165 return V(::builder->CreateFAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001166 }
1167
1168 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1169 {
Logan Chien191b3052018-08-31 16:57:15 +08001170 return V(::builder->CreateFSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001171 }
1172
1173 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1174 {
Logan Chien191b3052018-08-31 16:57:15 +08001175 return V(::builder->CreateFMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001176 }
1177
1178 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1179 {
Logan Chien191b3052018-08-31 16:57:15 +08001180 return V(::builder->CreateFDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001181 }
1182
1183 Value *Nucleus::createURem(Value *lhs, Value *rhs)
1184 {
Logan Chien191b3052018-08-31 16:57:15 +08001185 return V(::builder->CreateURem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001186 }
1187
1188 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1189 {
Logan Chien191b3052018-08-31 16:57:15 +08001190 return V(::builder->CreateSRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001191 }
1192
1193 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1194 {
Logan Chien191b3052018-08-31 16:57:15 +08001195 return V(::builder->CreateFRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001196 }
1197
1198 Value *Nucleus::createShl(Value *lhs, Value *rhs)
1199 {
Logan Chien191b3052018-08-31 16:57:15 +08001200 return V(::builder->CreateShl(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001201 }
1202
1203 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1204 {
Logan Chien191b3052018-08-31 16:57:15 +08001205 return V(::builder->CreateLShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001206 }
1207
1208 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1209 {
Logan Chien191b3052018-08-31 16:57:15 +08001210 return V(::builder->CreateAShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001211 }
1212
1213 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1214 {
Logan Chien191b3052018-08-31 16:57:15 +08001215 return V(::builder->CreateAnd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001216 }
1217
1218 Value *Nucleus::createOr(Value *lhs, Value *rhs)
1219 {
Logan Chien191b3052018-08-31 16:57:15 +08001220 return V(::builder->CreateOr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001221 }
1222
1223 Value *Nucleus::createXor(Value *lhs, Value *rhs)
1224 {
Logan Chien191b3052018-08-31 16:57:15 +08001225 return V(::builder->CreateXor(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001226 }
1227
Nicolas Capens19336542016-09-26 10:32:29 -04001228 Value *Nucleus::createNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001229 {
Logan Chien191b3052018-08-31 16:57:15 +08001230 return V(::builder->CreateNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001231 }
1232
Nicolas Capens19336542016-09-26 10:32:29 -04001233 Value *Nucleus::createFNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001234 {
Logan Chien191b3052018-08-31 16:57:15 +08001235 return V(::builder->CreateFNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001236 }
1237
Nicolas Capens19336542016-09-26 10:32:29 -04001238 Value *Nucleus::createNot(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001239 {
Logan Chien191b3052018-08-31 16:57:15 +08001240 return V(::builder->CreateNot(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001241 }
1242
Nicolas Capens86509d92019-03-21 13:23:50 -04001243 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001244 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001245 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001246 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001247 case Type_v2i32:
1248 case Type_v4i16:
1249 case Type_v8i8:
1250 case Type_v2f32:
1251 return createBitCast(
1252 createInsertElement(
1253 V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))),
Nicolas Capens86509d92019-03-21 13:23:50 -04001254 createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment, atomic, memoryOrder),
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001255 0),
1256 type);
1257 case Type_v2i16:
1258 case Type_v4i8:
1259 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001260 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001261 Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
Nicolas Capens86509d92019-03-21 13:23:50 -04001262 Value *i = createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001263 i = createZExt(i, Long::getType());
1264 Value *v = createInsertElement(u, i, 0);
1265 return createBitCast(v, type);
Nicolas Capens01a97962017-07-28 17:30:51 -04001266 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001267 // Fallthrough to non-emulated case.
1268 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001269 {
1270 assert(V(ptr)->getType()->getContainedType(0) == T(type));
1271 auto load = new llvm::LoadInst(V(ptr), "", isVolatile, alignment);
1272 load->setAtomic(atomicOrdering(atomic, memoryOrder));
1273
1274 return V(::builder->Insert(load));
1275 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001276 default:
1277 assert(false); return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001278 }
John Bauman89401822014-05-06 15:04:28 -04001279 }
1280
Nicolas Capens86509d92019-03-21 13:23:50 -04001281 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001282 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001283 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001284 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001285 case Type_v2i32:
1286 case Type_v4i16:
1287 case Type_v8i8:
1288 case Type_v2f32:
1289 createStore(
1290 createExtractElement(
1291 createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0),
1292 createBitCast(ptr, Pointer<Long>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001293 Long::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001294 return value;
1295 case Type_v2i16:
1296 case Type_v4i8:
1297 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001298 {
Logan Chien191b3052018-08-31 16:57:15 +08001299 createStore(
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001300 createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0),
1301 createBitCast(ptr, Pointer<Int>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001302 Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens01a97962017-07-28 17:30:51 -04001303 return value;
Nicolas Capens01a97962017-07-28 17:30:51 -04001304 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001305 // Fallthrough to non-emulated case.
1306 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001307 {
1308 assert(V(ptr)->getType()->getContainedType(0) == T(type));
1309 auto store = ::builder->Insert(new llvm::StoreInst(V(value), V(ptr), isVolatile, alignment));
1310 store->setAtomic(atomicOrdering(atomic, memoryOrder));
1311
1312 return value;
1313 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001314 default:
1315 assert(false); return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001316 }
John Bauman89401822014-05-06 15:04:28 -04001317 }
1318
Nicolas Capensd294def2017-01-26 17:44:37 -08001319 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
John Bauman89401822014-05-06 15:04:28 -04001320 {
Ben Claytonb1243732019-02-27 23:56:18 +00001321 assert(V(ptr)->getType()->getContainedType(0) == T(type));
1322
Nicolas Capens01a97962017-07-28 17:30:51 -04001323 if(sizeof(void*) == 8)
Nicolas Capensd294def2017-01-26 17:44:37 -08001324 {
Ben Claytonb1243732019-02-27 23:56:18 +00001325 // LLVM manual: "When indexing into an array, pointer or vector,
1326 // integers of any width are allowed, and they are not required to
1327 // be constant. These integers are treated as signed values where
1328 // relevant."
1329 //
1330 // Thus if we want indexes to be treated as unsigned we have to
1331 // zero-extend them ourselves.
1332 //
1333 // Note that this is not because we want to address anywhere near
1334 // 4 GB of data. Instead this is important for performance because
1335 // x86 supports automatic zero-extending of 32-bit registers to
1336 // 64-bit. Thus when indexing into an array using a uint32 is
1337 // actually faster than an int32.
1338 index = unsignedIndex ?
1339 createZExt(index, Long::getType()) :
1340 createSExt(index, Long::getType());
Nicolas Capens01a97962017-07-28 17:30:51 -04001341 }
Ben Claytonb1243732019-02-27 23:56:18 +00001342
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001343 // For non-emulated types we can rely on LLVM's GEP to calculate the
1344 // effective address correctly.
1345 if(asInternalType(type) == Type_LLVM)
Nicolas Capens01a97962017-07-28 17:30:51 -04001346 {
Ben Claytonb1243732019-02-27 23:56:18 +00001347 return V(::builder->CreateGEP(V(ptr), V(index)));
Nicolas Capensd294def2017-01-26 17:44:37 -08001348 }
1349
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001350 // For emulated types we have to multiply the index by the intended
1351 // type size ourselves to obain the byte offset.
Ben Claytonb1243732019-02-27 23:56:18 +00001352 index = (sizeof(void*) == 8) ?
1353 createMul(index, createConstantLong((int64_t)typeSize(type))) :
1354 createMul(index, createConstantInt((int)typeSize(type)));
1355
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001356 // Cast to a byte pointer, apply the byte offset, and cast back to the
1357 // original pointer type.
Logan Chien191b3052018-08-31 16:57:15 +08001358 return createBitCast(
1359 V(::builder->CreateGEP(V(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0)))), V(index))),
1360 T(llvm::PointerType::get(T(type), 0)));
John Bauman89401822014-05-06 15:04:28 -04001361 }
1362
John Bauman19bac1e2014-05-06 15:23:49 -04001363 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
1364 {
Logan Chien191b3052018-08-31 16:57:15 +08001365 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, V(ptr), V(value), llvm::AtomicOrdering::SequentiallyConsistent));
John Bauman19bac1e2014-05-06 15:23:49 -04001366 }
1367
Nicolas Capens19336542016-09-26 10:32:29 -04001368 Value *Nucleus::createTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001369 {
Logan Chien191b3052018-08-31 16:57:15 +08001370 return V(::builder->CreateTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001371 }
1372
Nicolas Capens19336542016-09-26 10:32:29 -04001373 Value *Nucleus::createZExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001374 {
Logan Chien191b3052018-08-31 16:57:15 +08001375 return V(::builder->CreateZExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001376 }
1377
Nicolas Capens19336542016-09-26 10:32:29 -04001378 Value *Nucleus::createSExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001379 {
Logan Chien191b3052018-08-31 16:57:15 +08001380 return V(::builder->CreateSExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001381 }
1382
Nicolas Capens19336542016-09-26 10:32:29 -04001383 Value *Nucleus::createFPToSI(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001384 {
Logan Chien191b3052018-08-31 16:57:15 +08001385 return V(::builder->CreateFPToSI(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001386 }
1387
Nicolas Capens19336542016-09-26 10:32:29 -04001388 Value *Nucleus::createSIToFP(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001389 {
Logan Chien191b3052018-08-31 16:57:15 +08001390 return V(::builder->CreateSIToFP(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001391 }
1392
Nicolas Capens19336542016-09-26 10:32:29 -04001393 Value *Nucleus::createFPTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001394 {
Logan Chien191b3052018-08-31 16:57:15 +08001395 return V(::builder->CreateFPTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001396 }
1397
Nicolas Capens19336542016-09-26 10:32:29 -04001398 Value *Nucleus::createFPExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001399 {
Logan Chien191b3052018-08-31 16:57:15 +08001400 return V(::builder->CreateFPExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001401 }
1402
Nicolas Capens19336542016-09-26 10:32:29 -04001403 Value *Nucleus::createBitCast(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001404 {
Nicolas Capens01a97962017-07-28 17:30:51 -04001405 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1406 // support for casting between scalars and wide vectors. Emulate them by writing to the stack and
1407 // reading back as the destination type.
Logan Chien191b3052018-08-31 16:57:15 +08001408 if(!V(v)->getType()->isVectorTy() && T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001409 {
1410 Value *readAddress = allocateStackVariable(destType);
Logan Chien191b3052018-08-31 16:57:15 +08001411 Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(V(v)->getType(), 0)));
1412 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001413 return createLoad(readAddress, destType);
1414 }
Logan Chien191b3052018-08-31 16:57:15 +08001415 else if(V(v)->getType()->isVectorTy() && !T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001416 {
Logan Chien191b3052018-08-31 16:57:15 +08001417 Value *writeAddress = allocateStackVariable(T(V(v)->getType()));
1418 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001419 Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
1420 return createLoad(readAddress, destType);
1421 }
1422
Logan Chien191b3052018-08-31 16:57:15 +08001423 return V(::builder->CreateBitCast(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001424 }
1425
John Bauman89401822014-05-06 15:04:28 -04001426 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1427 {
Logan Chien191b3052018-08-31 16:57:15 +08001428 return V(::builder->CreateICmpEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001429 }
1430
1431 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1432 {
Logan Chien191b3052018-08-31 16:57:15 +08001433 return V(::builder->CreateICmpNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001434 }
1435
1436 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1437 {
Logan Chien191b3052018-08-31 16:57:15 +08001438 return V(::builder->CreateICmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001439 }
1440
1441 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1442 {
Logan Chien191b3052018-08-31 16:57:15 +08001443 return V(::builder->CreateICmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001444 }
1445
1446 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1447 {
Logan Chien191b3052018-08-31 16:57:15 +08001448 return V(::builder->CreateICmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001449 }
1450
1451 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1452 {
Logan Chien191b3052018-08-31 16:57:15 +08001453 return V(::builder->CreateICmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001454 }
1455
1456 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1457 {
Logan Chien191b3052018-08-31 16:57:15 +08001458 return V(::builder->CreateICmpSGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001459 }
1460
1461 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1462 {
Logan Chien191b3052018-08-31 16:57:15 +08001463 return V(::builder->CreateICmpSGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001464 }
1465
1466 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1467 {
Logan Chien191b3052018-08-31 16:57:15 +08001468 return V(::builder->CreateICmpSLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001469 }
1470
1471 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1472 {
Logan Chien191b3052018-08-31 16:57:15 +08001473 return V(::builder->CreateICmpSLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001474 }
1475
1476 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1477 {
Logan Chien191b3052018-08-31 16:57:15 +08001478 return V(::builder->CreateFCmpOEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001479 }
1480
1481 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1482 {
Logan Chien191b3052018-08-31 16:57:15 +08001483 return V(::builder->CreateFCmpOGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001484 }
1485
1486 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1487 {
Logan Chien191b3052018-08-31 16:57:15 +08001488 return V(::builder->CreateFCmpOGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001489 }
1490
1491 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1492 {
Logan Chien191b3052018-08-31 16:57:15 +08001493 return V(::builder->CreateFCmpOLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001494 }
1495
1496 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1497 {
Logan Chien191b3052018-08-31 16:57:15 +08001498 return V(::builder->CreateFCmpOLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001499 }
1500
1501 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1502 {
Logan Chien191b3052018-08-31 16:57:15 +08001503 return V(::builder->CreateFCmpONE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001504 }
1505
1506 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1507 {
Logan Chien191b3052018-08-31 16:57:15 +08001508 return V(::builder->CreateFCmpORD(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001509 }
1510
1511 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1512 {
Logan Chien191b3052018-08-31 16:57:15 +08001513 return V(::builder->CreateFCmpUNO(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001514 }
1515
1516 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1517 {
Logan Chien191b3052018-08-31 16:57:15 +08001518 return V(::builder->CreateFCmpUEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001519 }
1520
1521 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1522 {
Logan Chien191b3052018-08-31 16:57:15 +08001523 return V(::builder->CreateFCmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001524 }
1525
1526 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1527 {
Logan Chien191b3052018-08-31 16:57:15 +08001528 return V(::builder->CreateFCmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001529 }
1530
1531 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1532 {
Logan Chien191b3052018-08-31 16:57:15 +08001533 return V(::builder->CreateFCmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001534 }
1535
1536 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1537 {
Logan Chien191b3052018-08-31 16:57:15 +08001538 return V(::builder->CreateFCmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001539 }
1540
1541 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1542 {
Ben Clayton71008d82019-03-05 17:17:59 +00001543 return V(::builder->CreateFCmpUNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001544 }
1545
Nicolas Capense95d5342016-09-30 11:37:28 -04001546 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
John Bauman89401822014-05-06 15:04:28 -04001547 {
Logan Chien191b3052018-08-31 16:57:15 +08001548 assert(V(vector)->getType()->getContainedType(0) == T(type));
1549 return V(::builder->CreateExtractElement(V(vector), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04001550 }
1551
1552 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1553 {
Logan Chien191b3052018-08-31 16:57:15 +08001554 return V(::builder->CreateInsertElement(V(vector), V(element), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04001555 }
1556
Logan Chien191b3052018-08-31 16:57:15 +08001557 Value *Nucleus::createShuffleVector(Value *v1, Value *v2, const int *select)
John Bauman89401822014-05-06 15:04:28 -04001558 {
Logan Chien191b3052018-08-31 16:57:15 +08001559 int size = llvm::cast<llvm::VectorType>(V(v1)->getType())->getNumElements();
Nicolas Capense89cd582016-09-30 14:23:47 -04001560 const int maxSize = 16;
1561 llvm::Constant *swizzle[maxSize];
1562 assert(size <= maxSize);
1563
1564 for(int i = 0; i < size; i++)
1565 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001566 swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]);
Nicolas Capense89cd582016-09-30 14:23:47 -04001567 }
1568
1569 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
1570
Logan Chien191b3052018-08-31 16:57:15 +08001571 return V(::builder->CreateShuffleVector(V(v1), V(v2), shuffle));
John Bauman89401822014-05-06 15:04:28 -04001572 }
1573
Logan Chien191b3052018-08-31 16:57:15 +08001574 Value *Nucleus::createSelect(Value *c, Value *ifTrue, Value *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001575 {
Logan Chien191b3052018-08-31 16:57:15 +08001576 return V(::builder->CreateSelect(V(c), V(ifTrue), V(ifFalse)));
John Bauman89401822014-05-06 15:04:28 -04001577 }
1578
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001579 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
John Bauman89401822014-05-06 15:04:28 -04001580 {
Logan Chien191b3052018-08-31 16:57:15 +08001581 return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(V(control), B(defaultBranch), numCases));
John Bauman89401822014-05-06 15:04:28 -04001582 }
1583
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001584 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
John Bauman89401822014-05-06 15:04:28 -04001585 {
Logan Chien191b3052018-08-31 16:57:15 +08001586 llvm::SwitchInst *sw = reinterpret_cast<llvm::SwitchInst *>(switchCases);
1587 sw->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), B(branch));
John Bauman89401822014-05-06 15:04:28 -04001588 }
1589
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001590 void Nucleus::createUnreachable()
John Bauman89401822014-05-06 15:04:28 -04001591 {
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001592 ::builder->CreateUnreachable();
John Bauman89401822014-05-06 15:04:28 -04001593 }
1594
Nicolas Capensac230122016-09-20 14:30:06 -04001595 Type *Nucleus::getPointerType(Type *ElementType)
John Bauman89401822014-05-06 15:04:28 -04001596 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001597 return T(llvm::PointerType::get(T(ElementType), 0));
John Bauman89401822014-05-06 15:04:28 -04001598 }
1599
Nicolas Capens13ac2322016-10-13 14:52:12 -04001600 Value *Nucleus::createNullValue(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04001601 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001602 return V(llvm::Constant::getNullValue(T(Ty)));
John Bauman89401822014-05-06 15:04:28 -04001603 }
1604
Nicolas Capens13ac2322016-10-13 14:52:12 -04001605 Value *Nucleus::createConstantLong(int64_t i)
John Bauman89401822014-05-06 15:04:28 -04001606 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001607 return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001608 }
1609
Nicolas Capens13ac2322016-10-13 14:52:12 -04001610 Value *Nucleus::createConstantInt(int i)
John Bauman89401822014-05-06 15:04:28 -04001611 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001612 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001613 }
1614
Nicolas Capens13ac2322016-10-13 14:52:12 -04001615 Value *Nucleus::createConstantInt(unsigned int i)
John Bauman89401822014-05-06 15:04:28 -04001616 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001617 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001618 }
1619
Nicolas Capens13ac2322016-10-13 14:52:12 -04001620 Value *Nucleus::createConstantBool(bool b)
John Bauman89401822014-05-06 15:04:28 -04001621 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001622 return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b));
John Bauman89401822014-05-06 15:04:28 -04001623 }
1624
Nicolas Capens13ac2322016-10-13 14:52:12 -04001625 Value *Nucleus::createConstantByte(signed char i)
John Bauman89401822014-05-06 15:04:28 -04001626 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001627 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001628 }
1629
Nicolas Capens13ac2322016-10-13 14:52:12 -04001630 Value *Nucleus::createConstantByte(unsigned char i)
John Bauman89401822014-05-06 15:04:28 -04001631 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001632 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001633 }
1634
Nicolas Capens13ac2322016-10-13 14:52:12 -04001635 Value *Nucleus::createConstantShort(short i)
John Bauman89401822014-05-06 15:04:28 -04001636 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001637 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001638 }
1639
Nicolas Capens13ac2322016-10-13 14:52:12 -04001640 Value *Nucleus::createConstantShort(unsigned short i)
John Bauman89401822014-05-06 15:04:28 -04001641 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001642 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001643 }
1644
Nicolas Capens13ac2322016-10-13 14:52:12 -04001645 Value *Nucleus::createConstantFloat(float x)
John Bauman89401822014-05-06 15:04:28 -04001646 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001647 return V(llvm::ConstantFP::get(T(Float::getType()), x));
John Bauman89401822014-05-06 15:04:28 -04001648 }
1649
Nicolas Capens13ac2322016-10-13 14:52:12 -04001650 Value *Nucleus::createNullPointer(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04001651 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001652 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0)));
John Bauman89401822014-05-06 15:04:28 -04001653 }
1654
Nicolas Capens13ac2322016-10-13 14:52:12 -04001655 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
John Bauman89401822014-05-06 15:04:28 -04001656 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001657 assert(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04001658 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1659 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
1660 assert(numElements <= 16 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001661 llvm::Constant *constantVector[16];
1662
Nicolas Capens69674fb2017-09-01 11:08:44 -04001663 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04001664 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04001665 constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001666 }
1667
Nicolas Capens69674fb2017-09-01 11:08:44 -04001668 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
Nicolas Capens13ac2322016-10-13 14:52:12 -04001669 }
1670
1671 Value *Nucleus::createConstantVector(const double *constants, Type *type)
1672 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001673 assert(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04001674 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1675 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
1676 assert(numElements <= 8 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001677 llvm::Constant *constantVector[8];
1678
Nicolas Capens69674fb2017-09-01 11:08:44 -04001679 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04001680 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04001681 constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001682 }
1683
Nicolas Capens69674fb2017-09-01 11:08:44 -04001684 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
John Bauman89401822014-05-06 15:04:28 -04001685 }
1686
John Bauman19bac1e2014-05-06 15:23:49 -04001687 Type *Void::getType()
John Bauman89401822014-05-06 15:04:28 -04001688 {
Nicolas Capensac230122016-09-20 14:30:06 -04001689 return T(llvm::Type::getVoidTy(*::context));
John Bauman89401822014-05-06 15:04:28 -04001690 }
1691
John Bauman19bac1e2014-05-06 15:23:49 -04001692 Type *Bool::getType()
John Bauman89401822014-05-06 15:04:28 -04001693 {
Nicolas Capensac230122016-09-20 14:30:06 -04001694 return T(llvm::Type::getInt1Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001695 }
1696
John Bauman19bac1e2014-05-06 15:23:49 -04001697 Type *Byte::getType()
John Bauman89401822014-05-06 15:04:28 -04001698 {
Nicolas Capensac230122016-09-20 14:30:06 -04001699 return T(llvm::Type::getInt8Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001700 }
1701
John Bauman19bac1e2014-05-06 15:23:49 -04001702 Type *SByte::getType()
John Bauman89401822014-05-06 15:04:28 -04001703 {
Nicolas Capensac230122016-09-20 14:30:06 -04001704 return T(llvm::Type::getInt8Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001705 }
1706
John Bauman19bac1e2014-05-06 15:23:49 -04001707 Type *Short::getType()
John Bauman89401822014-05-06 15:04:28 -04001708 {
Nicolas Capensac230122016-09-20 14:30:06 -04001709 return T(llvm::Type::getInt16Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001710 }
1711
John Bauman19bac1e2014-05-06 15:23:49 -04001712 Type *UShort::getType()
John Bauman89401822014-05-06 15:04:28 -04001713 {
Nicolas Capensac230122016-09-20 14:30:06 -04001714 return T(llvm::Type::getInt16Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001715 }
1716
John Bauman19bac1e2014-05-06 15:23:49 -04001717 Type *Byte4::getType()
John Bauman89401822014-05-06 15:04:28 -04001718 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001719 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04001720 }
1721
John Bauman19bac1e2014-05-06 15:23:49 -04001722 Type *SByte4::getType()
John Bauman89401822014-05-06 15:04:28 -04001723 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001724 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04001725 }
1726
John Bauman19bac1e2014-05-06 15:23:49 -04001727 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001728 {
Logan Chiene3191012018-08-24 22:01:50 +08001729#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001730 return x86::paddusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001731#else
1732 return As<Byte8>(V(lowerPUADDSAT(V(x.value), V(y.value))));
1733#endif
John Bauman89401822014-05-06 15:04:28 -04001734 }
John Bauman66b8ab22014-05-06 15:57:45 -04001735
John Bauman19bac1e2014-05-06 15:23:49 -04001736 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001737 {
Logan Chiene3191012018-08-24 22:01:50 +08001738#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001739 return x86::psubusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001740#else
1741 return As<Byte8>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
1742#endif
John Bauman89401822014-05-06 15:04:28 -04001743 }
1744
John Bauman19bac1e2014-05-06 15:23:49 -04001745 RValue<Int> SignMask(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04001746 {
Logan Chiene3191012018-08-24 22:01:50 +08001747#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001748 return x86::pmovmskb(x);
Logan Chiene3191012018-08-24 22:01:50 +08001749#else
1750 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
1751#endif
John Bauman89401822014-05-06 15:04:28 -04001752 }
1753
John Bauman19bac1e2014-05-06 15:23:49 -04001754// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001755// {
Logan Chiene3191012018-08-24 22:01:50 +08001756//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001757// return x86::pcmpgtb(x, y); // FIXME: Signedness
Logan Chiene3191012018-08-24 22:01:50 +08001758//#else
1759// return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
1760//#endif
John Bauman89401822014-05-06 15:04:28 -04001761// }
John Bauman66b8ab22014-05-06 15:57:45 -04001762
John Bauman19bac1e2014-05-06 15:23:49 -04001763 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001764 {
Logan Chiene3191012018-08-24 22:01:50 +08001765#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001766 return x86::pcmpeqb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001767#else
1768 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
1769#endif
John Bauman89401822014-05-06 15:04:28 -04001770 }
1771
John Bauman19bac1e2014-05-06 15:23:49 -04001772 Type *Byte8::getType()
John Bauman89401822014-05-06 15:04:28 -04001773 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001774 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04001775 }
1776
John Bauman19bac1e2014-05-06 15:23:49 -04001777 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04001778 {
Logan Chiene3191012018-08-24 22:01:50 +08001779#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001780 return x86::paddsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001781#else
1782 return As<SByte8>(V(lowerPSADDSAT(V(x.value), V(y.value))));
1783#endif
John Bauman89401822014-05-06 15:04:28 -04001784 }
John Bauman66b8ab22014-05-06 15:57:45 -04001785
John Bauman19bac1e2014-05-06 15:23:49 -04001786 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04001787 {
Logan Chiene3191012018-08-24 22:01:50 +08001788#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001789 return x86::psubsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001790#else
1791 return As<SByte8>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
1792#endif
John Bauman89401822014-05-06 15:04:28 -04001793 }
1794
John Bauman19bac1e2014-05-06 15:23:49 -04001795 RValue<Int> SignMask(RValue<SByte8> x)
John Bauman89401822014-05-06 15:04:28 -04001796 {
Logan Chiene3191012018-08-24 22:01:50 +08001797#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001798 return x86::pmovmskb(As<Byte8>(x));
Logan Chiene3191012018-08-24 22:01:50 +08001799#else
1800 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
1801#endif
John Bauman89401822014-05-06 15:04:28 -04001802 }
1803
John Bauman19bac1e2014-05-06 15:23:49 -04001804 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04001805 {
Logan Chiene3191012018-08-24 22:01:50 +08001806#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001807 return x86::pcmpgtb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001808#else
1809 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
1810#endif
John Bauman89401822014-05-06 15:04:28 -04001811 }
John Bauman66b8ab22014-05-06 15:57:45 -04001812
John Bauman19bac1e2014-05-06 15:23:49 -04001813 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04001814 {
Logan Chiene3191012018-08-24 22:01:50 +08001815#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001816 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
Logan Chiene3191012018-08-24 22:01:50 +08001817#else
1818 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
1819#endif
John Bauman89401822014-05-06 15:04:28 -04001820 }
1821
John Bauman19bac1e2014-05-06 15:23:49 -04001822 Type *SByte8::getType()
John Bauman89401822014-05-06 15:04:28 -04001823 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001824 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04001825 }
1826
John Bauman19bac1e2014-05-06 15:23:49 -04001827 Type *Byte16::getType()
John Bauman89401822014-05-06 15:04:28 -04001828 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001829 return T(llvm::VectorType::get(T(Byte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04001830 }
1831
John Bauman19bac1e2014-05-06 15:23:49 -04001832 Type *SByte16::getType()
John Bauman89401822014-05-06 15:04:28 -04001833 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001834 return T(llvm::VectorType::get(T(SByte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04001835 }
1836
Nicolas Capens16b5f152016-10-13 13:39:01 -04001837 Type *Short2::getType()
1838 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001839 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04001840 }
1841
Nicolas Capens16b5f152016-10-13 13:39:01 -04001842 Type *UShort2::getType()
1843 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001844 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04001845 }
1846
John Bauman19bac1e2014-05-06 15:23:49 -04001847 Short4::Short4(RValue<Int4> cast)
John Bauman89401822014-05-06 15:04:28 -04001848 {
Nicolas Capens01a97962017-07-28 17:30:51 -04001849 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
John Bauman89401822014-05-06 15:04:28 -04001850 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
1851
Nicolas Capens01a97962017-07-28 17:30:51 -04001852 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
1853 Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
John Bauman89401822014-05-06 15:04:28 -04001854
John Bauman66b8ab22014-05-06 15:57:45 -04001855 storeValue(short4);
John Bauman89401822014-05-06 15:04:28 -04001856 }
1857
John Bauman19bac1e2014-05-06 15:23:49 -04001858// Short4::Short4(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04001859// {
1860// }
1861
John Bauman19bac1e2014-05-06 15:23:49 -04001862 Short4::Short4(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04001863 {
John Bauman89401822014-05-06 15:04:28 -04001864 Int4 v4i32 = Int4(cast);
Logan Chiene3191012018-08-24 22:01:50 +08001865#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001866 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
Logan Chiene3191012018-08-24 22:01:50 +08001867#else
1868 Value *v = v4i32.loadValue();
1869 v4i32 = As<Int4>(V(lowerPack(V(v), V(v), true)));
1870#endif
John Bauman66b8ab22014-05-06 15:57:45 -04001871
1872 storeValue(As<Short4>(Int2(v4i32)).value);
John Bauman89401822014-05-06 15:04:28 -04001873 }
1874
John Bauman19bac1e2014-05-06 15:23:49 -04001875 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04001876 {
Logan Chiene3191012018-08-24 22:01:50 +08001877#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001878 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
1879
1880 return x86::psllw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08001881#else
1882 return As<Short4>(V(lowerVectorShl(V(lhs.value), rhs)));
1883#endif
John Bauman89401822014-05-06 15:04:28 -04001884 }
1885
John Bauman19bac1e2014-05-06 15:23:49 -04001886 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04001887 {
Logan Chiene3191012018-08-24 22:01:50 +08001888#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001889 return x86::psraw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08001890#else
1891 return As<Short4>(V(lowerVectorAShr(V(lhs.value), rhs)));
1892#endif
John Bauman89401822014-05-06 15:04:28 -04001893 }
1894
John Bauman19bac1e2014-05-06 15:23:49 -04001895 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001896 {
Logan Chiene3191012018-08-24 22:01:50 +08001897#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001898 return x86::pmaxsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001899#else
1900 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
1901#endif
John Bauman89401822014-05-06 15:04:28 -04001902 }
1903
John Bauman19bac1e2014-05-06 15:23:49 -04001904 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001905 {
Logan Chiene3191012018-08-24 22:01:50 +08001906#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001907 return x86::pminsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001908#else
1909 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
1910#endif
John Bauman89401822014-05-06 15:04:28 -04001911 }
1912
John Bauman19bac1e2014-05-06 15:23:49 -04001913 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001914 {
Logan Chiene3191012018-08-24 22:01:50 +08001915#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001916 return x86::paddsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001917#else
1918 return As<Short4>(V(lowerPSADDSAT(V(x.value), V(y.value))));
1919#endif
John Bauman89401822014-05-06 15:04:28 -04001920 }
1921
John Bauman19bac1e2014-05-06 15:23:49 -04001922 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001923 {
Logan Chiene3191012018-08-24 22:01:50 +08001924#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001925 return x86::psubsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001926#else
1927 return As<Short4>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
1928#endif
John Bauman89401822014-05-06 15:04:28 -04001929 }
1930
John Bauman19bac1e2014-05-06 15:23:49 -04001931 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001932 {
Logan Chiene3191012018-08-24 22:01:50 +08001933#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001934 return x86::pmulhw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001935#else
1936 return As<Short4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
1937#endif
John Bauman89401822014-05-06 15:04:28 -04001938 }
1939
John Bauman19bac1e2014-05-06 15:23:49 -04001940 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001941 {
Logan Chiene3191012018-08-24 22:01:50 +08001942#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001943 return x86::pmaddwd(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001944#else
1945 return As<Int2>(V(lowerMulAdd(V(x.value), V(y.value))));
1946#endif
John Bauman89401822014-05-06 15:04:28 -04001947 }
1948
Nicolas Capens33438a62017-09-27 11:47:35 -04001949 RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001950 {
Logan Chiene3191012018-08-24 22:01:50 +08001951#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04001952 auto result = x86::packsswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001953#else
1954 auto result = V(lowerPack(V(x.value), V(y.value), true));
1955#endif
Nicolas Capens01a97962017-07-28 17:30:51 -04001956 return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
John Bauman89401822014-05-06 15:04:28 -04001957 }
1958
Nicolas Capens33438a62017-09-27 11:47:35 -04001959 RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
1960 {
Logan Chiene3191012018-08-24 22:01:50 +08001961#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04001962 auto result = x86::packuswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001963#else
1964 auto result = V(lowerPack(V(x.value), V(y.value), false));
1965#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04001966 return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
1967 }
1968
John Bauman19bac1e2014-05-06 15:23:49 -04001969 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001970 {
Logan Chiene3191012018-08-24 22:01:50 +08001971#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001972 return x86::pcmpgtw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001973#else
1974 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
1975#endif
John Bauman89401822014-05-06 15:04:28 -04001976 }
1977
John Bauman19bac1e2014-05-06 15:23:49 -04001978 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001979 {
Logan Chiene3191012018-08-24 22:01:50 +08001980#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001981 return x86::pcmpeqw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001982#else
1983 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
1984#endif
John Bauman89401822014-05-06 15:04:28 -04001985 }
1986
John Bauman19bac1e2014-05-06 15:23:49 -04001987 Type *Short4::getType()
John Bauman89401822014-05-06 15:04:28 -04001988 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001989 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04001990 }
1991
John Bauman19bac1e2014-05-06 15:23:49 -04001992 UShort4::UShort4(RValue<Float4> cast, bool saturate)
John Bauman89401822014-05-06 15:04:28 -04001993 {
John Bauman89401822014-05-06 15:04:28 -04001994 if(saturate)
1995 {
Logan Chiena8385ed2018-09-26 19:22:54 +08001996#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001997 if(CPUID::supportsSSE4_1())
1998 {
Nicolas Capens01a97962017-07-28 17:30:51 -04001999 Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation
Nicolas Capens33438a62017-09-27 11:47:35 -04002000 *this = As<Short4>(PackUnsigned(int4, int4));
John Bauman89401822014-05-06 15:04:28 -04002001 }
2002 else
Logan Chiena8385ed2018-09-26 19:22:54 +08002003#endif
John Bauman89401822014-05-06 15:04:28 -04002004 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002005 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
John Bauman89401822014-05-06 15:04:28 -04002006 }
2007 }
2008 else
2009 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002010 *this = Short4(Int4(cast));
John Bauman89401822014-05-06 15:04:28 -04002011 }
2012 }
2013
John Bauman19bac1e2014-05-06 15:23:49 -04002014 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002015 {
Logan Chiene3191012018-08-24 22:01:50 +08002016#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002017 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2018
2019 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002020#else
2021 return As<UShort4>(V(lowerVectorShl(V(lhs.value), rhs)));
2022#endif
John Bauman89401822014-05-06 15:04:28 -04002023 }
2024
John Bauman19bac1e2014-05-06 15:23:49 -04002025 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002026 {
Logan Chiene3191012018-08-24 22:01:50 +08002027#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002028 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
2029
2030 return x86::psrlw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002031#else
2032 return As<UShort4>(V(lowerVectorLShr(V(lhs.value), rhs)));
2033#endif
John Bauman89401822014-05-06 15:04:28 -04002034 }
2035
John Bauman19bac1e2014-05-06 15:23:49 -04002036 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002037 {
John Bauman66b8ab22014-05-06 15:57:45 -04002038 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002039 }
2040
John Bauman19bac1e2014-05-06 15:23:49 -04002041 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002042 {
John Bauman66b8ab22014-05-06 15:57:45 -04002043 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002044 }
2045
John Bauman19bac1e2014-05-06 15:23:49 -04002046 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002047 {
Logan Chiene3191012018-08-24 22:01:50 +08002048#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002049 return x86::paddusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002050#else
2051 return As<UShort4>(V(lowerPUADDSAT(V(x.value), V(y.value))));
2052#endif
John Bauman89401822014-05-06 15:04:28 -04002053 }
2054
John Bauman19bac1e2014-05-06 15:23:49 -04002055 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002056 {
Logan Chiene3191012018-08-24 22:01:50 +08002057#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002058 return x86::psubusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002059#else
2060 return As<UShort4>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
2061#endif
John Bauman89401822014-05-06 15:04:28 -04002062 }
2063
John Bauman19bac1e2014-05-06 15:23:49 -04002064 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002065 {
Logan Chiene3191012018-08-24 22:01:50 +08002066#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002067 return x86::pmulhuw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002068#else
2069 return As<UShort4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2070#endif
John Bauman89401822014-05-06 15:04:28 -04002071 }
2072
John Bauman19bac1e2014-05-06 15:23:49 -04002073 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002074 {
Logan Chiene3191012018-08-24 22:01:50 +08002075#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002076 return x86::pavgw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002077#else
2078 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
2079#endif
John Bauman89401822014-05-06 15:04:28 -04002080 }
2081
John Bauman19bac1e2014-05-06 15:23:49 -04002082 Type *UShort4::getType()
John Bauman89401822014-05-06 15:04:28 -04002083 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002084 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002085 }
2086
John Bauman19bac1e2014-05-06 15:23:49 -04002087 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002088 {
Logan Chiene3191012018-08-24 22:01:50 +08002089#if defined(__i386__) || defined(__x86_64__)
2090 return x86::psllw(lhs, rhs);
2091#else
2092 return As<Short8>(V(lowerVectorShl(V(lhs.value), rhs)));
2093#endif
John Bauman89401822014-05-06 15:04:28 -04002094 }
2095
John Bauman19bac1e2014-05-06 15:23:49 -04002096 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002097 {
Logan Chiene3191012018-08-24 22:01:50 +08002098#if defined(__i386__) || defined(__x86_64__)
2099 return x86::psraw(lhs, rhs);
2100#else
2101 return As<Short8>(V(lowerVectorAShr(V(lhs.value), rhs)));
2102#endif
John Bauman89401822014-05-06 15:04:28 -04002103 }
2104
John Bauman19bac1e2014-05-06 15:23:49 -04002105 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002106 {
Logan Chiene3191012018-08-24 22:01:50 +08002107#if defined(__i386__) || defined(__x86_64__)
2108 return x86::pmaddwd(x, y);
2109#else
2110 return As<Int4>(V(lowerMulAdd(V(x.value), V(y.value))));
2111#endif
John Bauman89401822014-05-06 15:04:28 -04002112 }
2113
John Bauman19bac1e2014-05-06 15:23:49 -04002114 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002115 {
Logan Chiene3191012018-08-24 22:01:50 +08002116#if defined(__i386__) || defined(__x86_64__)
2117 return x86::pmulhw(x, y);
2118#else
2119 return As<Short8>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2120#endif
John Bauman89401822014-05-06 15:04:28 -04002121 }
2122
John Bauman19bac1e2014-05-06 15:23:49 -04002123 Type *Short8::getType()
John Bauman89401822014-05-06 15:04:28 -04002124 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002125 return T(llvm::VectorType::get(T(Short::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002126 }
2127
John Bauman19bac1e2014-05-06 15:23:49 -04002128 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002129 {
Logan Chiene3191012018-08-24 22:01:50 +08002130#if defined(__i386__) || defined(__x86_64__)
2131 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));
2132#else
2133 return As<UShort8>(V(lowerVectorShl(V(lhs.value), rhs)));
2134#endif
John Bauman89401822014-05-06 15:04:28 -04002135 }
2136
John Bauman19bac1e2014-05-06 15:23:49 -04002137 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002138 {
Logan Chiene3191012018-08-24 22:01:50 +08002139#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002140 return x86::psrlw(lhs, rhs); // FIXME: Fallback required
Logan Chiene3191012018-08-24 22:01:50 +08002141#else
2142 return As<UShort8>(V(lowerVectorLShr(V(lhs.value), rhs)));
2143#endif
John Bauman89401822014-05-06 15:04:28 -04002144 }
2145
John Bauman19bac1e2014-05-06 15:23:49 -04002146 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
John Bauman89401822014-05-06 15:04:28 -04002147 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002148 int pshufb[16] =
2149 {
2150 select0 + 0,
2151 select0 + 1,
2152 select1 + 0,
2153 select1 + 1,
2154 select2 + 0,
2155 select2 + 1,
2156 select3 + 0,
2157 select3 + 1,
2158 select4 + 0,
2159 select4 + 1,
2160 select5 + 0,
2161 select5 + 1,
2162 select6 + 0,
2163 select6 + 1,
2164 select7 + 0,
2165 select7 + 1,
2166 };
John Bauman89401822014-05-06 15:04:28 -04002167
2168 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
Nicolas Capense89cd582016-09-30 14:23:47 -04002169 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
John Bauman89401822014-05-06 15:04:28 -04002170 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
2171
2172 return RValue<UShort8>(short8);
2173 }
2174
John Bauman19bac1e2014-05-06 15:23:49 -04002175 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04002176 {
Logan Chiene3191012018-08-24 22:01:50 +08002177#if defined(__i386__) || defined(__x86_64__)
2178 return x86::pmulhuw(x, y);
2179#else
2180 return As<UShort8>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2181#endif
John Bauman89401822014-05-06 15:04:28 -04002182 }
2183
John Bauman19bac1e2014-05-06 15:23:49 -04002184 Type *UShort8::getType()
John Bauman89401822014-05-06 15:04:28 -04002185 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002186 return T(llvm::VectorType::get(T(UShort::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002187 }
2188
Nicolas Capens96d4e092016-11-18 14:22:38 -05002189 RValue<Int> operator++(Int &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002190 {
2191 RValue<Int> res = val;
2192
Logan Chien191b3052018-08-31 16:57:15 +08002193 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002194 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002195
2196 return res;
2197 }
2198
Nicolas Capens96d4e092016-11-18 14:22:38 -05002199 const Int &operator++(Int &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002200 {
Logan Chien191b3052018-08-31 16:57:15 +08002201 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002202 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002203
2204 return val;
2205 }
2206
Nicolas Capens96d4e092016-11-18 14:22:38 -05002207 RValue<Int> operator--(Int &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002208 {
2209 RValue<Int> res = val;
2210
Logan Chien191b3052018-08-31 16:57:15 +08002211 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002212 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002213
2214 return res;
2215 }
2216
Nicolas Capens96d4e092016-11-18 14:22:38 -05002217 const Int &operator--(Int &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002218 {
Logan Chien191b3052018-08-31 16:57:15 +08002219 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002220 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002221
2222 return val;
2223 }
2224
John Bauman19bac1e2014-05-06 15:23:49 -04002225 RValue<Int> RoundInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002226 {
Logan Chiene3191012018-08-24 22:01:50 +08002227#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002228 return x86::cvtss2si(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002229#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002230 return RValue<Int>(V(lowerRoundInt(V(cast.value), T(Int::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002231#endif
John Bauman89401822014-05-06 15:04:28 -04002232 }
2233
John Bauman19bac1e2014-05-06 15:23:49 -04002234 Type *Int::getType()
John Bauman89401822014-05-06 15:04:28 -04002235 {
Nicolas Capensac230122016-09-20 14:30:06 -04002236 return T(llvm::Type::getInt32Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002237 }
2238
John Bauman19bac1e2014-05-06 15:23:49 -04002239 Type *Long::getType()
John Bauman89401822014-05-06 15:04:28 -04002240 {
Nicolas Capensac230122016-09-20 14:30:06 -04002241 return T(llvm::Type::getInt64Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002242 }
2243
John Bauman19bac1e2014-05-06 15:23:49 -04002244 UInt::UInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002245 {
Alexis Hetu764d1422016-09-28 08:44:22 -04002246 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2247 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
John Bauman89401822014-05-06 15:04:28 -04002248
Alexis Hetu764d1422016-09-28 08:44:22 -04002249 // Smallest positive value representable in UInt, but not in Int
2250 const unsigned int ustart = 0x80000000u;
2251 const float ustartf = float(ustart);
2252
2253 // If the value is negative, store 0, otherwise store the result of the conversion
2254 storeValue((~(As<Int>(cast) >> 31) &
2255 // Check if the value can be represented as an Int
2256 IfThenElse(cast >= ustartf,
2257 // If the value is too large, subtract ustart and re-add it after conversion.
2258 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
2259 // Otherwise, just convert normally
2260 Int(cast))).value);
John Bauman89401822014-05-06 15:04:28 -04002261 }
2262
Nicolas Capens96d4e092016-11-18 14:22:38 -05002263 RValue<UInt> operator++(UInt &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002264 {
2265 RValue<UInt> res = val;
2266
Logan Chien191b3052018-08-31 16:57:15 +08002267 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002268 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002269
2270 return res;
2271 }
2272
Nicolas Capens96d4e092016-11-18 14:22:38 -05002273 const UInt &operator++(UInt &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002274 {
Logan Chien191b3052018-08-31 16:57:15 +08002275 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002276 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002277
2278 return val;
2279 }
2280
Nicolas Capens96d4e092016-11-18 14:22:38 -05002281 RValue<UInt> operator--(UInt &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002282 {
2283 RValue<UInt> res = val;
2284
Logan Chien191b3052018-08-31 16:57:15 +08002285 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002286 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002287
2288 return res;
2289 }
2290
Nicolas Capens96d4e092016-11-18 14:22:38 -05002291 const UInt &operator--(UInt &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002292 {
Logan Chien191b3052018-08-31 16:57:15 +08002293 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002294 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002295
2296 return val;
2297 }
2298
John Bauman19bac1e2014-05-06 15:23:49 -04002299// RValue<UInt> RoundUInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002300// {
Logan Chiene3191012018-08-24 22:01:50 +08002301//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002302// return x86::cvtss2si(val); // FIXME: Unsigned
Logan Chiene3191012018-08-24 22:01:50 +08002303//#else
2304// return IfThenElse(cast > 0.0f, Int(cast + 0.5f), Int(cast - 0.5f));
2305//#endif
John Bauman89401822014-05-06 15:04:28 -04002306// }
2307
John Bauman19bac1e2014-05-06 15:23:49 -04002308 Type *UInt::getType()
John Bauman89401822014-05-06 15:04:28 -04002309 {
Nicolas Capensac230122016-09-20 14:30:06 -04002310 return T(llvm::Type::getInt32Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002311 }
2312
John Bauman19bac1e2014-05-06 15:23:49 -04002313// Int2::Int2(RValue<Int> cast)
2314// {
John Bauman19bac1e2014-05-06 15:23:49 -04002315// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2316// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
John Bauman66b8ab22014-05-06 15:57:45 -04002317//
Nicolas Capense89cd582016-09-30 14:23:47 -04002318// int shuffle[2] = {0, 0};
2319// Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
John Bauman19bac1e2014-05-06 15:23:49 -04002320//
John Bauman66b8ab22014-05-06 15:57:45 -04002321// storeValue(replicate);
John Bauman19bac1e2014-05-06 15:23:49 -04002322// }
John Bauman89401822014-05-06 15:04:28 -04002323
John Bauman19bac1e2014-05-06 15:23:49 -04002324 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002325 {
Logan Chiene3191012018-08-24 22:01:50 +08002326#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002327 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
2328
2329 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002330#else
2331 return As<Int2>(V(lowerVectorShl(V(lhs.value), rhs)));
2332#endif
John Bauman89401822014-05-06 15:04:28 -04002333 }
2334
John Bauman19bac1e2014-05-06 15:23:49 -04002335 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002336 {
Logan Chiene3191012018-08-24 22:01:50 +08002337#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002338 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
2339
2340 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002341#else
2342 return As<Int2>(V(lowerVectorAShr(V(lhs.value), rhs)));
2343#endif
John Bauman89401822014-05-06 15:04:28 -04002344 }
2345
John Bauman19bac1e2014-05-06 15:23:49 -04002346 Type *Int2::getType()
John Bauman89401822014-05-06 15:04:28 -04002347 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002348 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002349 }
2350
John Bauman19bac1e2014-05-06 15:23:49 -04002351 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002352 {
Logan Chiene3191012018-08-24 22:01:50 +08002353#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002354 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
2355
2356 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002357#else
2358 return As<UInt2>(V(lowerVectorShl(V(lhs.value), rhs)));
2359#endif
John Bauman89401822014-05-06 15:04:28 -04002360 }
2361
John Bauman19bac1e2014-05-06 15:23:49 -04002362 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002363 {
Logan Chiene3191012018-08-24 22:01:50 +08002364#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002365 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
2366
2367 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002368#else
2369 return As<UInt2>(V(lowerVectorLShr(V(lhs.value), rhs)));
2370#endif
John Bauman89401822014-05-06 15:04:28 -04002371 }
2372
John Bauman19bac1e2014-05-06 15:23:49 -04002373 Type *UInt2::getType()
John Bauman89401822014-05-06 15:04:28 -04002374 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002375 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002376 }
2377
Nicolas Capenscb986762017-01-20 11:34:37 -05002378 Int4::Int4(RValue<Byte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002379 {
Logan Chiene3191012018-08-24 22:01:50 +08002380#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002381 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002382 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002383 *this = x86::pmovzxbd(As<Byte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002384 }
2385 else
Logan Chiene3191012018-08-24 22:01:50 +08002386#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002387 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002388 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
Nicolas Capens01a97962017-07-28 17:30:51 -04002389 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
Logan Chien191b3052018-08-31 16:57:15 +08002390 Value *b = Nucleus::createShuffleVector(a, Nucleus::createNullValue(Byte16::getType()), swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002391
Nicolas Capense89cd582016-09-30 14:23:47 -04002392 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04002393 Value *c = Nucleus::createBitCast(b, Short8::getType());
Logan Chien191b3052018-08-31 16:57:15 +08002394 Value *d = Nucleus::createShuffleVector(c, Nucleus::createNullValue(Short8::getType()), swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002395
Nicolas Capens01a97962017-07-28 17:30:51 -04002396 *this = As<Int4>(d);
2397 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002398 }
2399
Nicolas Capenscb986762017-01-20 11:34:37 -05002400 Int4::Int4(RValue<SByte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002401 {
Logan Chiene3191012018-08-24 22:01:50 +08002402#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002403 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002404 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002405 *this = x86::pmovsxbd(As<SByte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002406 }
2407 else
Logan Chiene3191012018-08-24 22:01:50 +08002408#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002409 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002410 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
2411 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
2412 Value *b = Nucleus::createShuffleVector(a, a, swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002413
Nicolas Capense89cd582016-09-30 14:23:47 -04002414 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04002415 Value *c = Nucleus::createBitCast(b, Short8::getType());
2416 Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002417
Nicolas Capens01a97962017-07-28 17:30:51 -04002418 *this = As<Int4>(d) >> 24;
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002419 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002420 }
2421
Nicolas Capenscb986762017-01-20 11:34:37 -05002422 Int4::Int4(RValue<Short4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002423 {
Logan Chiene3191012018-08-24 22:01:50 +08002424#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002425 if(CPUID::supportsSSE4_1())
2426 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002427 *this = x86::pmovsxwd(As<Short8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002428 }
2429 else
Logan Chiene3191012018-08-24 22:01:50 +08002430#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002431 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002432 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04002433 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
2434 *this = As<Int4>(c) >> 16;
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002435 }
2436 }
2437
Nicolas Capenscb986762017-01-20 11:34:37 -05002438 Int4::Int4(RValue<UShort4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002439 {
Logan Chiene3191012018-08-24 22:01:50 +08002440#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002441 if(CPUID::supportsSSE4_1())
2442 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002443 *this = x86::pmovzxwd(As<UShort8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002444 }
2445 else
Logan Chiene3191012018-08-24 22:01:50 +08002446#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002447 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002448 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04002449 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
2450 *this = As<Int4>(c);
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002451 }
2452 }
2453
Nicolas Capenscb986762017-01-20 11:34:37 -05002454 Int4::Int4(RValue<Int> rhs) : XYZW(this)
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002455 {
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002456 Value *vector = loadValue();
2457 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
2458
Nicolas Capense89cd582016-09-30 14:23:47 -04002459 int swizzle[4] = {0, 0, 0, 0};
2460 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002461
2462 storeValue(replicate);
2463 }
2464
John Bauman19bac1e2014-05-06 15:23:49 -04002465 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002466 {
Logan Chiene3191012018-08-24 22:01:50 +08002467#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002468 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002469#else
2470 return As<Int4>(V(lowerVectorShl(V(lhs.value), rhs)));
2471#endif
John Bauman89401822014-05-06 15:04:28 -04002472 }
2473
John Bauman19bac1e2014-05-06 15:23:49 -04002474 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002475 {
Logan Chiene3191012018-08-24 22:01:50 +08002476#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002477 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002478#else
2479 return As<Int4>(V(lowerVectorAShr(V(lhs.value), rhs)));
2480#endif
John Bauman89401822014-05-06 15:04:28 -04002481 }
2482
John Bauman19bac1e2014-05-06 15:23:49 -04002483 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
2484 {
Nicolas Capens197226a2016-04-27 23:08:50 -04002485 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
Alexis Hetufb603992016-04-26 11:50:40 -04002486 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2487 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
2488 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002489 }
2490
2491 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
2492 {
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002493 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2494 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2495 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
2496 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002497 }
2498
2499 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
2500 {
Nicolas Capens197226a2016-04-27 23:08:50 -04002501 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2502 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2503 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
2504 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002505 }
2506
2507 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
2508 {
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002509 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2510 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2511 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
2512 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002513 }
2514
2515 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
2516 {
Nicolas Capens197226a2016-04-27 23:08:50 -04002517 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2518 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2519 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
2520 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002521 }
2522
2523 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
2524 {
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002525 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2526 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2527 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
2528 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002529 }
2530
2531 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
2532 {
Logan Chiene3191012018-08-24 22:01:50 +08002533#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002534 if(CPUID::supportsSSE4_1())
2535 {
2536 return x86::pmaxsd(x, y);
2537 }
2538 else
Logan Chiene3191012018-08-24 22:01:50 +08002539#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002540 {
2541 RValue<Int4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002542 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04002543 }
2544 }
2545
2546 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
2547 {
Logan Chiene3191012018-08-24 22:01:50 +08002548#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002549 if(CPUID::supportsSSE4_1())
2550 {
2551 return x86::pminsd(x, y);
2552 }
2553 else
Logan Chiene3191012018-08-24 22:01:50 +08002554#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002555 {
2556 RValue<Int4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002557 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04002558 }
2559 }
2560
2561 RValue<Int4> RoundInt(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04002562 {
Logan Chiene3191012018-08-24 22:01:50 +08002563#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002564 return x86::cvtps2dq(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002565#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002566 return As<Int4>(V(lowerRoundInt(V(cast.value), T(Int4::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002567#endif
John Bauman89401822014-05-06 15:04:28 -04002568 }
2569
Chris Forbese86b6dc2019-03-01 09:08:47 -08002570 RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2571 {
2572 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2573 return As<Int4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2574 }
2575
2576 RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2577 {
2578 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2579 return As<UInt4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2580 }
2581
Nicolas Capens33438a62017-09-27 11:47:35 -04002582 RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04002583 {
Logan Chiene3191012018-08-24 22:01:50 +08002584#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002585 return x86::packssdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002586#else
2587 return As<Short8>(V(lowerPack(V(x.value), V(y.value), true)));
2588#endif
John Bauman89401822014-05-06 15:04:28 -04002589 }
2590
Nicolas Capens33438a62017-09-27 11:47:35 -04002591 RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
2592 {
Logan Chiene3191012018-08-24 22:01:50 +08002593#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04002594 return x86::packusdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002595#else
2596 return As<UShort8>(V(lowerPack(V(x.value), V(y.value), false)));
2597#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04002598 }
2599
John Bauman19bac1e2014-05-06 15:23:49 -04002600 RValue<Int> SignMask(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04002601 {
Logan Chiene3191012018-08-24 22:01:50 +08002602#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002603 return x86::movmskps(As<Float4>(x));
Logan Chiene3191012018-08-24 22:01:50 +08002604#else
2605 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2606#endif
John Bauman89401822014-05-06 15:04:28 -04002607 }
2608
John Bauman19bac1e2014-05-06 15:23:49 -04002609 Type *Int4::getType()
John Bauman89401822014-05-06 15:04:28 -04002610 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002611 return T(llvm::VectorType::get(T(Int::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04002612 }
2613
Nicolas Capenscb986762017-01-20 11:34:37 -05002614 UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04002615 {
Alexis Hetu764d1422016-09-28 08:44:22 -04002616 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2617 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
John Bauman89401822014-05-06 15:04:28 -04002618
Alexis Hetu764d1422016-09-28 08:44:22 -04002619 // Smallest positive value representable in UInt, but not in Int
2620 const unsigned int ustart = 0x80000000u;
2621 const float ustartf = float(ustart);
2622
2623 // Check if the value can be represented as an Int
2624 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
2625 // If the value is too large, subtract ustart and re-add it after conversion.
2626 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
2627 // Otherwise, just convert normally
2628 (~uiValue & Int4(cast));
2629 // If the value is negative, store 0, otherwise store the result of the conversion
2630 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
John Bauman89401822014-05-06 15:04:28 -04002631 }
2632
John Bauman19bac1e2014-05-06 15:23:49 -04002633 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002634 {
Logan Chiene3191012018-08-24 22:01:50 +08002635#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002636 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002637#else
2638 return As<UInt4>(V(lowerVectorShl(V(lhs.value), rhs)));
2639#endif
John Bauman89401822014-05-06 15:04:28 -04002640 }
2641
John Bauman19bac1e2014-05-06 15:23:49 -04002642 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002643 {
Logan Chiene3191012018-08-24 22:01:50 +08002644#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002645 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002646#else
2647 return As<UInt4>(V(lowerVectorLShr(V(lhs.value), rhs)));
2648#endif
John Bauman89401822014-05-06 15:04:28 -04002649 }
2650
John Bauman19bac1e2014-05-06 15:23:49 -04002651 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
2652 {
Nicolas Capens197226a2016-04-27 23:08:50 -04002653 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
Alexis Hetufb603992016-04-26 11:50:40 -04002654 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2655 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
2656 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002657 }
2658
2659 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
2660 {
2661 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
2662 }
2663
2664 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
2665 {
Nicolas Capens197226a2016-04-27 23:08:50 -04002666 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2667 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2668 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
2669 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002670 }
2671
2672 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
2673 {
2674 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
2675 }
2676
2677 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
2678 {
Nicolas Capens197226a2016-04-27 23:08:50 -04002679 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2680 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2681 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
2682 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002683 }
2684
2685 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
2686 {
2687 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
2688 }
2689
2690 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
2691 {
Logan Chiene3191012018-08-24 22:01:50 +08002692#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002693 if(CPUID::supportsSSE4_1())
2694 {
2695 return x86::pmaxud(x, y);
2696 }
2697 else
Logan Chiene3191012018-08-24 22:01:50 +08002698#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002699 {
2700 RValue<UInt4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002701 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04002702 }
2703 }
2704
2705 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
2706 {
Logan Chiene3191012018-08-24 22:01:50 +08002707#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002708 if(CPUID::supportsSSE4_1())
2709 {
2710 return x86::pminud(x, y);
2711 }
2712 else
Logan Chiene3191012018-08-24 22:01:50 +08002713#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002714 {
2715 RValue<UInt4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002716 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04002717 }
2718 }
2719
John Bauman19bac1e2014-05-06 15:23:49 -04002720 Type *UInt4::getType()
John Bauman89401822014-05-06 15:04:28 -04002721 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002722 return T(llvm::VectorType::get(T(UInt::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04002723 }
2724
Alexis Hetu734e2572018-12-20 14:00:49 -05002725 Type *Half::getType()
2726 {
2727 return T(llvm::Type::getInt16Ty(*::context));
2728 }
2729
Nicolas Capens05b3d662016-02-25 23:58:33 -05002730 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04002731 {
Logan Chiene3191012018-08-24 22:01:50 +08002732#if defined(__i386__) || defined(__x86_64__)
2733 if(exactAtPow2)
2734 {
2735 // rcpss uses a piecewise-linear approximation which minimizes the relative error
2736 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
2737 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
2738 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04002739 return x86::rcpss(x);
Logan Chiene3191012018-08-24 22:01:50 +08002740#else
2741 return As<Float>(V(lowerRCP(V(x.value))));
2742#endif
John Bauman89401822014-05-06 15:04:28 -04002743 }
John Bauman66b8ab22014-05-06 15:57:45 -04002744
John Bauman19bac1e2014-05-06 15:23:49 -04002745 RValue<Float> RcpSqrt_pp(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04002746 {
Logan Chiene3191012018-08-24 22:01:50 +08002747#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002748 return x86::rsqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08002749#else
2750 return As<Float>(V(lowerRSQRT(V(x.value))));
2751#endif
John Bauman89401822014-05-06 15:04:28 -04002752 }
2753
John Bauman19bac1e2014-05-06 15:23:49 -04002754 RValue<Float> Sqrt(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04002755 {
Logan Chiene3191012018-08-24 22:01:50 +08002756#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002757 return x86::sqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08002758#else
2759 return As<Float>(V(lowerSQRT(V(x.value))));
2760#endif
John Bauman89401822014-05-06 15:04:28 -04002761 }
2762
John Bauman19bac1e2014-05-06 15:23:49 -04002763 RValue<Float> Round(RValue<Float> x)
2764 {
Logan Chiene3191012018-08-24 22:01:50 +08002765#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002766 if(CPUID::supportsSSE4_1())
2767 {
2768 return x86::roundss(x, 0);
2769 }
2770 else
2771 {
2772 return Float4(Round(Float4(x))).x;
2773 }
Logan Chien83fc07a2018-09-26 22:14:00 +08002774#else
2775 return RValue<Float>(V(lowerRound(V(x.value))));
2776#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002777 }
2778
2779 RValue<Float> Trunc(RValue<Float> x)
2780 {
Logan Chiene3191012018-08-24 22:01:50 +08002781#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002782 if(CPUID::supportsSSE4_1())
2783 {
2784 return x86::roundss(x, 3);
2785 }
2786 else
2787 {
2788 return Float(Int(x)); // Rounded toward zero
2789 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08002790#else
2791 return RValue<Float>(V(lowerTrunc(V(x.value))));
2792#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002793 }
2794
2795 RValue<Float> Frac(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04002796 {
Logan Chiene3191012018-08-24 22:01:50 +08002797#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002798 if(CPUID::supportsSSE4_1())
2799 {
2800 return x - x86::floorss(x);
2801 }
2802 else
2803 {
John Bauman19bac1e2014-05-06 15:23:49 -04002804 return Float4(Frac(Float4(x))).x;
John Bauman89401822014-05-06 15:04:28 -04002805 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08002806#else
2807 // x - floor(x) can be 1.0 for very small negative x.
2808 // Clamp against the value just below 1.0.
2809 return Min(x - Floor(x), As<Float>(Int(0x3F7FFFFF)));
2810#endif
John Bauman89401822014-05-06 15:04:28 -04002811 }
2812
John Bauman19bac1e2014-05-06 15:23:49 -04002813 RValue<Float> Floor(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04002814 {
Logan Chiene3191012018-08-24 22:01:50 +08002815#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002816 if(CPUID::supportsSSE4_1())
2817 {
2818 return x86::floorss(x);
2819 }
2820 else
2821 {
2822 return Float4(Floor(Float4(x))).x;
2823 }
Logan Chien40a60052018-09-26 19:03:53 +08002824#else
2825 return RValue<Float>(V(lowerFloor(V(x.value))));
2826#endif
John Bauman89401822014-05-06 15:04:28 -04002827 }
2828
John Bauman19bac1e2014-05-06 15:23:49 -04002829 RValue<Float> Ceil(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04002830 {
Logan Chiene3191012018-08-24 22:01:50 +08002831#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002832 if(CPUID::supportsSSE4_1())
2833 {
2834 return x86::ceilss(x);
2835 }
2836 else
Logan Chiene3191012018-08-24 22:01:50 +08002837#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002838 {
2839 return Float4(Ceil(Float4(x))).x;
2840 }
John Bauman89401822014-05-06 15:04:28 -04002841 }
2842
John Bauman19bac1e2014-05-06 15:23:49 -04002843 Type *Float::getType()
John Bauman89401822014-05-06 15:04:28 -04002844 {
Nicolas Capensac230122016-09-20 14:30:06 -04002845 return T(llvm::Type::getFloatTy(*::context));
John Bauman89401822014-05-06 15:04:28 -04002846 }
2847
John Bauman19bac1e2014-05-06 15:23:49 -04002848 Type *Float2::getType()
John Bauman89401822014-05-06 15:04:28 -04002849 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002850 return T(Type_v2f32);
John Bauman89401822014-05-06 15:04:28 -04002851 }
2852
Nicolas Capenscb986762017-01-20 11:34:37 -05002853 Float4::Float4(RValue<Float> rhs) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04002854 {
John Bauman66b8ab22014-05-06 15:57:45 -04002855 Value *vector = loadValue();
John Bauman89401822014-05-06 15:04:28 -04002856 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
2857
Nicolas Capense89cd582016-09-30 14:23:47 -04002858 int swizzle[4] = {0, 0, 0, 0};
2859 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
John Bauman89401822014-05-06 15:04:28 -04002860
John Bauman66b8ab22014-05-06 15:57:45 -04002861 storeValue(replicate);
John Bauman89401822014-05-06 15:04:28 -04002862 }
2863
John Bauman19bac1e2014-05-06 15:23:49 -04002864 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002865 {
Logan Chiene3191012018-08-24 22:01:50 +08002866#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002867 return x86::maxps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002868#else
2869 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OGT)));
2870#endif
John Bauman89401822014-05-06 15:04:28 -04002871 }
2872
John Bauman19bac1e2014-05-06 15:23:49 -04002873 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002874 {
Logan Chiene3191012018-08-24 22:01:50 +08002875#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002876 return x86::minps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002877#else
2878 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OLT)));
2879#endif
John Bauman89401822014-05-06 15:04:28 -04002880 }
2881
Nicolas Capens05b3d662016-02-25 23:58:33 -05002882 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04002883 {
Logan Chiene3191012018-08-24 22:01:50 +08002884#if defined(__i386__) || defined(__x86_64__)
2885 if(exactAtPow2)
2886 {
2887 // rcpps uses a piecewise-linear approximation which minimizes the relative error
2888 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
2889 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
2890 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04002891 return x86::rcpps(x);
Logan Chiene3191012018-08-24 22:01:50 +08002892#else
2893 return As<Float4>(V(lowerRCP(V(x.value))));
2894#endif
John Bauman89401822014-05-06 15:04:28 -04002895 }
John Bauman66b8ab22014-05-06 15:57:45 -04002896
John Bauman19bac1e2014-05-06 15:23:49 -04002897 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04002898 {
Logan Chiene3191012018-08-24 22:01:50 +08002899#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002900 return x86::rsqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08002901#else
2902 return As<Float4>(V(lowerRSQRT(V(x.value))));
2903#endif
John Bauman89401822014-05-06 15:04:28 -04002904 }
2905
John Bauman19bac1e2014-05-06 15:23:49 -04002906 RValue<Float4> Sqrt(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04002907 {
Logan Chiene3191012018-08-24 22:01:50 +08002908#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002909 return x86::sqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08002910#else
2911 return As<Float4>(V(lowerSQRT(V(x.value))));
2912#endif
John Bauman89401822014-05-06 15:04:28 -04002913 }
2914
John Bauman19bac1e2014-05-06 15:23:49 -04002915 RValue<Int> SignMask(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04002916 {
Logan Chiene3191012018-08-24 22:01:50 +08002917#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002918 return x86::movmskps(x);
Logan Chiene3191012018-08-24 22:01:50 +08002919#else
2920 return As<Int>(V(lowerFPSignMask(V(x.value), T(Int::getType()))));
2921#endif
John Bauman89401822014-05-06 15:04:28 -04002922 }
2923
John Bauman19bac1e2014-05-06 15:23:49 -04002924 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002925 {
2926 // return As<Int4>(x86::cmpeqps(x, y));
2927 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
2928 }
2929
John Bauman19bac1e2014-05-06 15:23:49 -04002930 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002931 {
2932 // return As<Int4>(x86::cmpltps(x, y));
2933 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
2934 }
2935
John Bauman19bac1e2014-05-06 15:23:49 -04002936 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002937 {
2938 // return As<Int4>(x86::cmpleps(x, y));
2939 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
2940 }
2941
John Bauman19bac1e2014-05-06 15:23:49 -04002942 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002943 {
2944 // return As<Int4>(x86::cmpneqps(x, y));
2945 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
2946 }
2947
John Bauman19bac1e2014-05-06 15:23:49 -04002948 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002949 {
2950 // return As<Int4>(x86::cmpnltps(x, y));
2951 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
2952 }
2953
John Bauman19bac1e2014-05-06 15:23:49 -04002954 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002955 {
2956 // return As<Int4>(x86::cmpnleps(x, y));
2957 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
2958 }
2959
Ben Claytonec1aeb82019-03-04 19:33:27 +00002960 RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
2961 {
2962 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUEQ(x.value, y.value), Int4::getType()));
2963 }
2964
2965 RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
2966 {
2967 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULT(x.value, y.value), Int4::getType()));
2968 }
2969
2970 RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
2971 {
2972 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULE(x.value, y.value), Int4::getType()));
2973 }
2974
2975 RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
2976 {
2977 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUNE(x.value, y.value), Int4::getType()));
2978 }
2979
2980 RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
2981 {
2982 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGE(x.value, y.value), Int4::getType()));
2983 }
2984
2985 RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
2986 {
2987 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGT(x.value, y.value), Int4::getType()));
2988 }
2989
John Bauman19bac1e2014-05-06 15:23:49 -04002990 RValue<Float4> Round(RValue<Float4> x)
2991 {
Logan Chiene3191012018-08-24 22:01:50 +08002992#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002993 if(CPUID::supportsSSE4_1())
2994 {
2995 return x86::roundps(x, 0);
2996 }
2997 else
2998 {
2999 return Float4(RoundInt(x));
3000 }
Logan Chien83fc07a2018-09-26 22:14:00 +08003001#else
3002 return RValue<Float4>(V(lowerRound(V(x.value))));
3003#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003004 }
3005
3006 RValue<Float4> Trunc(RValue<Float4> x)
3007 {
Logan Chiene3191012018-08-24 22:01:50 +08003008#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003009 if(CPUID::supportsSSE4_1())
3010 {
3011 return x86::roundps(x, 3);
3012 }
3013 else
3014 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003015 return Float4(Int4(x));
John Bauman19bac1e2014-05-06 15:23:49 -04003016 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08003017#else
3018 return RValue<Float4>(V(lowerTrunc(V(x.value))));
3019#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003020 }
3021
3022 RValue<Float4> Frac(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003023 {
Nicolas Capensb9230422017-07-17 10:27:33 -04003024 Float4 frc;
3025
Logan Chien40a60052018-09-26 19:03:53 +08003026#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003027 if(CPUID::supportsSSE4_1())
3028 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003029 frc = x - Floor(x);
John Bauman89401822014-05-06 15:04:28 -04003030 }
3031 else
3032 {
Nicolas Capensb9230422017-07-17 10:27:33 -04003033 frc = x - Float4(Int4(x)); // Signed fractional part.
John Bauman89401822014-05-06 15:04:28 -04003034
Nicolas Capensb9230422017-07-17 10:27:33 -04003035 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f))); // Add 1.0 if negative.
John Bauman89401822014-05-06 15:04:28 -04003036 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08003037#else
3038 frc = x - Floor(x);
3039#endif
Nicolas Capensb9230422017-07-17 10:27:33 -04003040
3041 // x - floor(x) can be 1.0 for very small negative x.
3042 // Clamp against the value just below 1.0.
3043 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
John Bauman89401822014-05-06 15:04:28 -04003044 }
3045
John Bauman19bac1e2014-05-06 15:23:49 -04003046 RValue<Float4> Floor(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003047 {
Logan Chiene3191012018-08-24 22:01:50 +08003048#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003049 if(CPUID::supportsSSE4_1())
3050 {
3051 return x86::floorps(x);
3052 }
3053 else
3054 {
John Bauman19bac1e2014-05-06 15:23:49 -04003055 return x - Frac(x);
John Bauman89401822014-05-06 15:04:28 -04003056 }
Logan Chien40a60052018-09-26 19:03:53 +08003057#else
3058 return RValue<Float4>(V(lowerFloor(V(x.value))));
3059#endif
John Bauman89401822014-05-06 15:04:28 -04003060 }
3061
John Bauman19bac1e2014-05-06 15:23:49 -04003062 RValue<Float4> Ceil(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003063 {
Logan Chiene3191012018-08-24 22:01:50 +08003064#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003065 if(CPUID::supportsSSE4_1())
3066 {
3067 return x86::ceilps(x);
3068 }
3069 else
Logan Chiene3191012018-08-24 22:01:50 +08003070#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003071 {
3072 return -Floor(-x);
3073 }
John Bauman89401822014-05-06 15:04:28 -04003074 }
3075
Ben Claytona2c8b772019-04-09 13:42:36 -04003076 RValue<Float4> Sin(RValue<Float4> v)
3077 {
3078 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sin, { V(v.value)->getType() } );
3079 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
3080 }
3081
Ben Clayton1b6f8c72019-04-09 13:47:43 -04003082 RValue<Float4> Cos(RValue<Float4> v)
3083 {
3084 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::cos, { V(v.value)->getType() } );
3085 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
3086 }
3087
Ben Clayton14740062019-04-09 13:48:41 -04003088 RValue<Float4> Tan(RValue<Float4> v)
3089 {
3090 return Sin(v) / Cos(v);
3091 }
3092
Ben Claytonf9350d72019-04-09 14:19:02 -04003093 RValue<Float4> Asin(RValue<Float4> v)
3094 {
3095 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), {T(Float::getType())}, false);
3096 auto func = ::module->getOrInsertFunction("asinf", funcTy);
3097 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3098 for (uint64_t i = 0; i < 4; i++)
3099 {
3100 auto el = ::builder->CreateCall(func, ::builder->CreateExtractElement(V(v.value), i));
3101 out = ::builder->CreateInsertElement(out, el, i);
3102 }
3103 return RValue<Float4>(V(out));
3104 }
3105
John Bauman19bac1e2014-05-06 15:23:49 -04003106 Type *Float4::getType()
John Bauman89401822014-05-06 15:04:28 -04003107 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003108 return T(llvm::VectorType::get(T(Float::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003109 }
3110
John Bauman89401822014-05-06 15:04:28 -04003111 RValue<Long> Ticks()
3112 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003113 llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter);
John Bauman89401822014-05-06 15:04:28 -04003114
Nicolas Capens2ab69ee2016-09-26 11:45:17 -04003115 return RValue<Long>(V(::builder->CreateCall(rdtsc)));
John Bauman89401822014-05-06 15:04:28 -04003116 }
John Bauman89401822014-05-06 15:04:28 -04003117}
3118
Nicolas Capens48461502018-08-06 14:20:45 -04003119namespace rr
John Bauman89401822014-05-06 15:04:28 -04003120{
Logan Chiene3191012018-08-24 22:01:50 +08003121#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003122 namespace x86
3123 {
John Bauman19bac1e2014-05-06 15:23:49 -04003124 RValue<Int> cvtss2si(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003125 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003126 llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si);
John Bauman66b8ab22014-05-06 15:57:45 -04003127
John Bauman89401822014-05-06 15:04:28 -04003128 Float4 vector;
3129 vector.x = val;
3130
Logan Chien813d5032018-08-31 17:19:45 +08003131 return RValue<Int>(V(::builder->CreateCall(cvtss2si, ARGS(V(RValue<Float4>(vector).value)))));
John Bauman89401822014-05-06 15:04:28 -04003132 }
3133
John Bauman19bac1e2014-05-06 15:23:49 -04003134 RValue<Int4> cvtps2dq(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003135 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003136 llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
John Bauman89401822014-05-06 15:04:28 -04003137
Logan Chien813d5032018-08-31 17:19:45 +08003138 return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003139 }
3140
John Bauman19bac1e2014-05-06 15:23:49 -04003141 RValue<Float> rcpss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003142 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003143 llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss);
John Bauman89401822014-05-06 15:04:28 -04003144
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003145 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman66b8ab22014-05-06 15:57:45 -04003146
Logan Chien813d5032018-08-31 17:19:45 +08003147 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003148 }
3149
John Bauman19bac1e2014-05-06 15:23:49 -04003150 RValue<Float> sqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003151 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003152#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003153 llvm::Function *sqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ss);
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003154 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman66b8ab22014-05-06 15:57:45 -04003155
Logan Chien813d5032018-08-31 17:19:45 +08003156 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(sqrtss, ARGS(V(vector)))), Float::getType(), 0));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003157#else
3158 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
3159 return RValue<Float>(V(::builder->CreateCall(sqrt, ARGS(V(val.value)))));
3160#endif
John Bauman89401822014-05-06 15:04:28 -04003161 }
3162
John Bauman19bac1e2014-05-06 15:23:49 -04003163 RValue<Float> rsqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003164 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003165 llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss);
John Bauman66b8ab22014-05-06 15:57:45 -04003166
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003167 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman89401822014-05-06 15:04:28 -04003168
Logan Chien813d5032018-08-31 17:19:45 +08003169 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003170 }
3171
John Bauman19bac1e2014-05-06 15:23:49 -04003172 RValue<Float4> rcpps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003173 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003174 llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003175
Logan Chien813d5032018-08-31 17:19:45 +08003176 return RValue<Float4>(V(::builder->CreateCall(rcpps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003177 }
3178
John Bauman19bac1e2014-05-06 15:23:49 -04003179 RValue<Float4> sqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003180 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003181#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003182 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ps);
Logan Chien0eedc8c2018-08-21 09:34:28 +08003183#else
3184 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
3185#endif
John Bauman66b8ab22014-05-06 15:57:45 -04003186
Logan Chien813d5032018-08-31 17:19:45 +08003187 return RValue<Float4>(V(::builder->CreateCall(sqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003188 }
3189
John Bauman19bac1e2014-05-06 15:23:49 -04003190 RValue<Float4> rsqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003191 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003192 llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003193
Logan Chien813d5032018-08-31 17:19:45 +08003194 return RValue<Float4>(V(::builder->CreateCall(rsqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003195 }
3196
John Bauman19bac1e2014-05-06 15:23:49 -04003197 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003198 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003199 llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps);
John Bauman89401822014-05-06 15:04:28 -04003200
Logan Chien813d5032018-08-31 17:19:45 +08003201 return RValue<Float4>(V(::builder->CreateCall2(maxps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003202 }
3203
John Bauman19bac1e2014-05-06 15:23:49 -04003204 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003205 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003206 llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps);
John Bauman89401822014-05-06 15:04:28 -04003207
Logan Chien813d5032018-08-31 17:19:45 +08003208 return RValue<Float4>(V(::builder->CreateCall2(minps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003209 }
3210
John Bauman19bac1e2014-05-06 15:23:49 -04003211 RValue<Float> roundss(RValue<Float> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04003212 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003213 llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss);
John Bauman89401822014-05-06 15:04:28 -04003214
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003215 Value *undef = V(llvm::UndefValue::get(T(Float4::getType())));
John Bauman89401822014-05-06 15:04:28 -04003216 Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
3217
Logan Chien813d5032018-08-31 17:19:45 +08003218 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, ARGS(V(undef), V(vector), V(Nucleus::createConstantInt(imm))))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003219 }
3220
John Bauman19bac1e2014-05-06 15:23:49 -04003221 RValue<Float> floorss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003222 {
3223 return roundss(val, 1);
3224 }
3225
John Bauman19bac1e2014-05-06 15:23:49 -04003226 RValue<Float> ceilss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003227 {
3228 return roundss(val, 2);
3229 }
3230
John Bauman19bac1e2014-05-06 15:23:49 -04003231 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04003232 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003233 llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps);
John Bauman89401822014-05-06 15:04:28 -04003234
Logan Chien813d5032018-08-31 17:19:45 +08003235 return RValue<Float4>(V(::builder->CreateCall2(roundps, ARGS(V(val.value), V(Nucleus::createConstantInt(imm))))));
John Bauman89401822014-05-06 15:04:28 -04003236 }
3237
John Bauman19bac1e2014-05-06 15:23:49 -04003238 RValue<Float4> floorps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003239 {
3240 return roundps(val, 1);
3241 }
3242
John Bauman19bac1e2014-05-06 15:23:49 -04003243 RValue<Float4> ceilps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003244 {
3245 return roundps(val, 2);
3246 }
3247
Alexis Hetu0f448072016-03-18 10:56:08 -04003248 RValue<Int4> pabsd(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04003249 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003250#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003251 llvm::Function *pabsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_ssse3_pabs_d_128);
John Bauman89401822014-05-06 15:04:28 -04003252
Logan Chien813d5032018-08-31 17:19:45 +08003253 return RValue<Int4>(V(::builder->CreateCall(pabsd, ARGS(V(x.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003254#else
3255 return RValue<Int4>(V(lowerPABS(V(x.value))));
3256#endif
John Bauman89401822014-05-06 15:04:28 -04003257 }
3258
John Bauman19bac1e2014-05-06 15:23:49 -04003259 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003260 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003261 llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w);
John Bauman89401822014-05-06 15:04:28 -04003262
Logan Chien813d5032018-08-31 17:19:45 +08003263 return As<Short4>(V(::builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003264 }
John Bauman66b8ab22014-05-06 15:57:45 -04003265
John Bauman19bac1e2014-05-06 15:23:49 -04003266 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003267 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003268 llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w);
John Bauman89401822014-05-06 15:04:28 -04003269
Logan Chien813d5032018-08-31 17:19:45 +08003270 return As<Short4>(V(::builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003271 }
3272
John Bauman19bac1e2014-05-06 15:23:49 -04003273 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003274 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003275 llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w);
John Bauman89401822014-05-06 15:04:28 -04003276
Logan Chien813d5032018-08-31 17:19:45 +08003277 return As<UShort4>(V(::builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003278 }
John Bauman66b8ab22014-05-06 15:57:45 -04003279
John Bauman19bac1e2014-05-06 15:23:49 -04003280 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003281 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003282 llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w);
John Bauman89401822014-05-06 15:04:28 -04003283
Logan Chien813d5032018-08-31 17:19:45 +08003284 return As<UShort4>(V(::builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003285 }
3286
John Bauman19bac1e2014-05-06 15:23:49 -04003287 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003288 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003289 llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b);
John Bauman89401822014-05-06 15:04:28 -04003290
Logan Chien813d5032018-08-31 17:19:45 +08003291 return As<SByte8>(V(::builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003292 }
John Bauman66b8ab22014-05-06 15:57:45 -04003293
John Bauman19bac1e2014-05-06 15:23:49 -04003294 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003295 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003296 llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b);
John Bauman89401822014-05-06 15:04:28 -04003297
Logan Chien813d5032018-08-31 17:19:45 +08003298 return As<SByte8>(V(::builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003299 }
John Bauman66b8ab22014-05-06 15:57:45 -04003300
John Bauman19bac1e2014-05-06 15:23:49 -04003301 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003302 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003303 llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b);
John Bauman89401822014-05-06 15:04:28 -04003304
Logan Chien813d5032018-08-31 17:19:45 +08003305 return As<Byte8>(V(::builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003306 }
John Bauman66b8ab22014-05-06 15:57:45 -04003307
John Bauman19bac1e2014-05-06 15:23:49 -04003308 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003309 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003310 llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b);
John Bauman89401822014-05-06 15:04:28 -04003311
Logan Chien813d5032018-08-31 17:19:45 +08003312 return As<Byte8>(V(::builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value)))));
John Bauman19bac1e2014-05-06 15:23:49 -04003313 }
3314
3315 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003316 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003317#if REACTOR_LLVM_VERSION < 7
Nicolas Capens01a97962017-07-28 17:30:51 -04003318 llvm::Function *pavgw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pavg_w);
John Bauman89401822014-05-06 15:04:28 -04003319
Logan Chien813d5032018-08-31 17:19:45 +08003320 return As<UShort4>(V(::builder->CreateCall2(pavgw, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003321#else
3322 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
3323#endif
John Bauman89401822014-05-06 15:04:28 -04003324 }
3325
John Bauman19bac1e2014-05-06 15:23:49 -04003326 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003327 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003328#if REACTOR_LLVM_VERSION < 7
Nicolas Capens01a97962017-07-28 17:30:51 -04003329 llvm::Function *pmaxsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmaxs_w);
John Bauman89401822014-05-06 15:04:28 -04003330
Logan Chien813d5032018-08-31 17:19:45 +08003331 return As<Short4>(V(::builder->CreateCall2(pmaxsw, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003332#else
3333 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
3334#endif
John Bauman89401822014-05-06 15:04:28 -04003335 }
3336
John Bauman19bac1e2014-05-06 15:23:49 -04003337 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003338 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003339#if REACTOR_LLVM_VERSION < 7
Nicolas Capens01a97962017-07-28 17:30:51 -04003340 llvm::Function *pminsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmins_w);
John Bauman89401822014-05-06 15:04:28 -04003341
Logan Chien813d5032018-08-31 17:19:45 +08003342 return As<Short4>(V(::builder->CreateCall2(pminsw, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003343#else
3344 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
3345#endif
John Bauman89401822014-05-06 15:04:28 -04003346 }
3347
John Bauman19bac1e2014-05-06 15:23:49 -04003348 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003349 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003350#if REACTOR_LLVM_VERSION < 7
Nicolas Capens01a97962017-07-28 17:30:51 -04003351 llvm::Function *pcmpgtw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_w);
John Bauman89401822014-05-06 15:04:28 -04003352
Logan Chien813d5032018-08-31 17:19:45 +08003353 return As<Short4>(V(::builder->CreateCall2(pcmpgtw, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003354#else
3355 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
3356#endif
John Bauman89401822014-05-06 15:04:28 -04003357 }
3358
John Bauman19bac1e2014-05-06 15:23:49 -04003359 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003360 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003361#if REACTOR_LLVM_VERSION < 7
Nicolas Capens01a97962017-07-28 17:30:51 -04003362 llvm::Function *pcmpeqw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_w);
John Bauman89401822014-05-06 15:04:28 -04003363
Logan Chien813d5032018-08-31 17:19:45 +08003364 return As<Short4>(V(::builder->CreateCall2(pcmpeqw, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003365#else
3366 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
3367#endif
John Bauman89401822014-05-06 15:04:28 -04003368 }
3369
John Bauman19bac1e2014-05-06 15:23:49 -04003370 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003371 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003372#if REACTOR_LLVM_VERSION < 7
Nicolas Capens01a97962017-07-28 17:30:51 -04003373 llvm::Function *pcmpgtb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_b);
John Bauman89401822014-05-06 15:04:28 -04003374
Logan Chien813d5032018-08-31 17:19:45 +08003375 return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003376#else
3377 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
3378#endif
John Bauman89401822014-05-06 15:04:28 -04003379 }
3380
John Bauman19bac1e2014-05-06 15:23:49 -04003381 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003382 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003383#if REACTOR_LLVM_VERSION < 7
Nicolas Capens01a97962017-07-28 17:30:51 -04003384 llvm::Function *pcmpeqb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_b);
John Bauman89401822014-05-06 15:04:28 -04003385
Logan Chien813d5032018-08-31 17:19:45 +08003386 return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003387#else
3388 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
3389#endif
John Bauman89401822014-05-06 15:04:28 -04003390 }
3391
John Bauman19bac1e2014-05-06 15:23:49 -04003392 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
John Bauman89401822014-05-06 15:04:28 -04003393 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003394 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04003395
Logan Chien813d5032018-08-31 17:19:45 +08003396 return As<Short4>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003397 }
3398
John Bauman19bac1e2014-05-06 15:23:49 -04003399 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003400 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003401 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04003402
Logan Chien813d5032018-08-31 17:19:45 +08003403 return RValue<Short8>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003404 }
3405
John Bauman19bac1e2014-05-06 15:23:49 -04003406 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003407 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003408 llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128);
John Bauman89401822014-05-06 15:04:28 -04003409
Logan Chien813d5032018-08-31 17:19:45 +08003410 return As<SByte8>(V(::builder->CreateCall2(packsswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003411 }
3412
Nicolas Capens33438a62017-09-27 11:47:35 -04003413 RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003414 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003415 llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
John Bauman89401822014-05-06 15:04:28 -04003416
Logan Chien813d5032018-08-31 17:19:45 +08003417 return As<Byte8>(V(::builder->CreateCall2(packuswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003418 }
3419
Nicolas Capens3e7062b2017-01-17 14:01:33 -05003420 RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003421 {
3422 if(CPUID::supportsSSE4_1())
3423 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003424 llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw);
John Bauman66b8ab22014-05-06 15:57:45 -04003425
Logan Chien813d5032018-08-31 17:19:45 +08003426 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003427 }
3428 else
3429 {
Nicolas Capens3e7062b2017-01-17 14:01:33 -05003430 RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
3431 RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
3432
3433 return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
John Bauman89401822014-05-06 15:04:28 -04003434 }
3435 }
3436
John Bauman19bac1e2014-05-06 15:23:49 -04003437 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003438 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003439 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04003440
Logan Chien813d5032018-08-31 17:19:45 +08003441 return As<UShort4>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003442 }
3443
John Bauman19bac1e2014-05-06 15:23:49 -04003444 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003445 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003446 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04003447
Logan Chien813d5032018-08-31 17:19:45 +08003448 return RValue<UShort8>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003449 }
3450
John Bauman19bac1e2014-05-06 15:23:49 -04003451 RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003452 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003453 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04003454
Logan Chien813d5032018-08-31 17:19:45 +08003455 return As<Short4>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003456 }
3457
John Bauman19bac1e2014-05-06 15:23:49 -04003458 RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003459 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003460 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04003461
Logan Chien813d5032018-08-31 17:19:45 +08003462 return RValue<Short8>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003463 }
3464
John Bauman19bac1e2014-05-06 15:23:49 -04003465 RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003466 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003467 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04003468
Logan Chien813d5032018-08-31 17:19:45 +08003469 return As<Short4>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003470 }
3471
John Bauman19bac1e2014-05-06 15:23:49 -04003472 RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003473 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003474 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04003475
Logan Chien813d5032018-08-31 17:19:45 +08003476 return RValue<Short8>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003477 }
3478
John Bauman19bac1e2014-05-06 15:23:49 -04003479 RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003480 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003481 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04003482
Logan Chien813d5032018-08-31 17:19:45 +08003483 return As<Int2>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003484 }
3485
John Bauman19bac1e2014-05-06 15:23:49 -04003486 RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003487 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003488 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04003489
Logan Chien813d5032018-08-31 17:19:45 +08003490 return RValue<Int4>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003491 }
3492
John Bauman19bac1e2014-05-06 15:23:49 -04003493 RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003494 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003495 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04003496
Logan Chien813d5032018-08-31 17:19:45 +08003497 return As<Int2>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003498 }
3499
John Bauman19bac1e2014-05-06 15:23:49 -04003500 RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003501 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003502 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04003503
Logan Chien813d5032018-08-31 17:19:45 +08003504 return RValue<Int4>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003505 }
3506
John Bauman19bac1e2014-05-06 15:23:49 -04003507 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003508 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003509 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04003510
Logan Chien813d5032018-08-31 17:19:45 +08003511 return As<UInt2>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003512 }
3513
John Bauman19bac1e2014-05-06 15:23:49 -04003514 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003515 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003516 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04003517
Logan Chien813d5032018-08-31 17:19:45 +08003518 return RValue<UInt4>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003519 }
3520
John Bauman19bac1e2014-05-06 15:23:49 -04003521 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
3522 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003523#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003524 llvm::Function *pmaxsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxsd);
John Bauman19bac1e2014-05-06 15:23:49 -04003525
Logan Chien813d5032018-08-31 17:19:45 +08003526 return RValue<Int4>(V(::builder->CreateCall2(pmaxsd, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003527#else
3528 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
3529#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003530 }
3531
3532 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
3533 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003534#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003535 llvm::Function *pminsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminsd);
John Bauman19bac1e2014-05-06 15:23:49 -04003536
Logan Chien813d5032018-08-31 17:19:45 +08003537 return RValue<Int4>(V(::builder->CreateCall2(pminsd, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003538#else
3539 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
3540#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003541 }
3542
3543 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
3544 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003545#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003546 llvm::Function *pmaxud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxud);
John Bauman19bac1e2014-05-06 15:23:49 -04003547
Logan Chien813d5032018-08-31 17:19:45 +08003548 return RValue<UInt4>(V(::builder->CreateCall2(pmaxud, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003549#else
3550 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_UGT)));
3551#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003552 }
3553
3554 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
3555 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003556#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003557 llvm::Function *pminud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminud);
John Bauman19bac1e2014-05-06 15:23:49 -04003558
Logan Chien813d5032018-08-31 17:19:45 +08003559 return RValue<UInt4>(V(::builder->CreateCall2(pminud, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003560#else
3561 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_ULT)));
3562#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003563 }
3564
3565 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003566 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003567 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04003568
Logan Chien813d5032018-08-31 17:19:45 +08003569 return As<Short4>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003570 }
3571
John Bauman19bac1e2014-05-06 15:23:49 -04003572 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003573 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003574 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04003575
Logan Chien813d5032018-08-31 17:19:45 +08003576 return As<UShort4>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003577 }
3578
John Bauman19bac1e2014-05-06 15:23:49 -04003579 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003580 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003581 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04003582
Logan Chien813d5032018-08-31 17:19:45 +08003583 return As<Int2>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003584 }
3585
John Bauman19bac1e2014-05-06 15:23:49 -04003586 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04003587 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003588 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04003589
Logan Chien813d5032018-08-31 17:19:45 +08003590 return RValue<Short8>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003591 }
3592
John Bauman19bac1e2014-05-06 15:23:49 -04003593 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04003594 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003595 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04003596
Logan Chien813d5032018-08-31 17:19:45 +08003597 return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003598 }
3599
John Bauman19bac1e2014-05-06 15:23:49 -04003600 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04003601 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003602 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04003603
Logan Chien813d5032018-08-31 17:19:45 +08003604 return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003605 }
3606
John Bauman19bac1e2014-05-06 15:23:49 -04003607 RValue<Int> movmskps(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003608 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003609 llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps);
John Bauman89401822014-05-06 15:04:28 -04003610
Logan Chien813d5032018-08-31 17:19:45 +08003611 return RValue<Int>(V(::builder->CreateCall(movmskps, ARGS(V(x.value)))));
John Bauman89401822014-05-06 15:04:28 -04003612 }
3613
John Bauman19bac1e2014-05-06 15:23:49 -04003614 RValue<Int> pmovmskb(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04003615 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003616 llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128);
John Bauman89401822014-05-06 15:04:28 -04003617
Logan Chien813d5032018-08-31 17:19:45 +08003618 return RValue<Int>(V(::builder->CreateCall(pmovmskb, ARGS(V(x.value))))) & 0xFF;
John Bauman89401822014-05-06 15:04:28 -04003619 }
3620
Nicolas Capens01a97962017-07-28 17:30:51 -04003621 RValue<Int4> pmovzxbd(RValue<Byte16> x)
John Bauman89401822014-05-06 15:04:28 -04003622 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003623#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003624 llvm::Function *pmovzxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxbd);
John Bauman66b8ab22014-05-06 15:57:45 -04003625
Logan Chien813d5032018-08-31 17:19:45 +08003626 return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, ARGS(V(x.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003627#else
3628 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
3629#endif
John Bauman89401822014-05-06 15:04:28 -04003630 }
3631
Nicolas Capens01a97962017-07-28 17:30:51 -04003632 RValue<Int4> pmovsxbd(RValue<SByte16> x)
John Bauman89401822014-05-06 15:04:28 -04003633 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003634#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003635 llvm::Function *pmovsxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxbd);
John Bauman66b8ab22014-05-06 15:57:45 -04003636
Logan Chien813d5032018-08-31 17:19:45 +08003637 return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, ARGS(V(x.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003638#else
3639 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
3640#endif
John Bauman89401822014-05-06 15:04:28 -04003641 }
3642
Nicolas Capens01a97962017-07-28 17:30:51 -04003643 RValue<Int4> pmovzxwd(RValue<UShort8> x)
John Bauman89401822014-05-06 15:04:28 -04003644 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003645#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003646 llvm::Function *pmovzxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxwd);
John Bauman66b8ab22014-05-06 15:57:45 -04003647
Logan Chien813d5032018-08-31 17:19:45 +08003648 return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, ARGS(V(x.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003649#else
3650 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
3651#endif
John Bauman89401822014-05-06 15:04:28 -04003652 }
3653
Nicolas Capens01a97962017-07-28 17:30:51 -04003654 RValue<Int4> pmovsxwd(RValue<Short8> x)
John Bauman89401822014-05-06 15:04:28 -04003655 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003656#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003657 llvm::Function *pmovsxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxwd);
John Bauman66b8ab22014-05-06 15:57:45 -04003658
Logan Chien813d5032018-08-31 17:19:45 +08003659 return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, ARGS(V(x.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003660#else
3661 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
3662#endif
John Bauman89401822014-05-06 15:04:28 -04003663 }
3664 }
Logan Chiene3191012018-08-24 22:01:50 +08003665#endif // defined(__i386__) || defined(__x86_64__)
Ben Clayton1bc7ee92019-02-14 18:43:22 +00003666
Ben Clayton60a3d6f2019-02-26 17:24:46 +00003667#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +00003668 // extractAll returns a vector containing the extracted n scalar value of
3669 // the vector vec.
3670 static std::vector<Value*> extractAll(Value* vec, int n)
3671 {
3672 std::vector<Value*> elements;
3673 elements.reserve(n);
3674 for (int i = 0; i < n; i++)
3675 {
3676 auto el = V(::builder->CreateExtractElement(V(vec), i));
3677 elements.push_back(el);
3678 }
3679 return elements;
3680 }
3681
3682 // toDouble returns all the float values in vals extended to doubles.
3683 static std::vector<Value*> toDouble(const std::vector<Value*>& vals)
3684 {
3685 auto doubleTy = ::llvm::Type::getDoubleTy(*::context);
3686 std::vector<Value*> elements;
3687 elements.reserve(vals.size());
3688 for (auto v : vals)
3689 {
3690 elements.push_back(V(::builder->CreateFPExt(V(v), doubleTy)));
3691 }
3692 return elements;
3693 }
3694
3695 std::vector<Value*> PrintValue::Ty<Byte4>::val(const RValue<Byte4>& v) { return extractAll(v.value, 4); }
3696 std::vector<Value*> PrintValue::Ty<Int4>::val(const RValue<Int4>& v) { return extractAll(v.value, 4); }
3697 std::vector<Value*> PrintValue::Ty<UInt4>::val(const RValue<UInt4>& v) { return extractAll(v.value, 4); }
3698 std::vector<Value*> PrintValue::Ty<Short4>::val(const RValue<Short4>& v) { return extractAll(v.value, 4); }
3699 std::vector<Value*> PrintValue::Ty<UShort4>::val(const RValue<UShort4>& v) { return extractAll(v.value, 4); }
3700 std::vector<Value*> PrintValue::Ty<Float>::val(const RValue<Float>& v) { return toDouble({v.value}); }
3701 std::vector<Value*> PrintValue::Ty<Float4>::val(const RValue<Float4>& v) { return toDouble(extractAll(v.value, 4)); }
3702
3703 void Printv(const char* function, const char* file, int line, const char* fmt, std::initializer_list<PrintValue> args)
3704 {
3705 // LLVM types used below.
3706 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
3707 auto intTy = ::llvm::Type::getInt64Ty(*::context); // TODO: Natural int width.
3708 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(*::context);
3709 auto funcTy = ::llvm::FunctionType::get(i32Ty, {i8PtrTy}, true);
3710
3711 auto func = ::module->getOrInsertFunction("printf", funcTy);
3712
3713 // Build the printf format message string.
3714 std::string str;
3715 if (file != nullptr) { str += (line > 0) ? "%s:%d " : "%s "; }
3716 if (function != nullptr) { str += "%s "; }
3717 str += fmt;
3718
3719 // Perform subsitution on all '{n}' bracketed indices in the format
3720 // message.
3721 int i = 0;
3722 for (const PrintValue& arg : args)
3723 {
3724 str = replace(str, "{" + std::to_string(i++) + "}", arg.format);
3725 }
3726
3727 ::llvm::SmallVector<::llvm::Value*, 8> vals;
3728
3729 // The format message is always the first argument.
3730 vals.push_back(::builder->CreateGlobalStringPtr(str));
3731
3732 // Add optional file, line and function info if provided.
3733 if (file != nullptr)
3734 {
3735 vals.push_back(::builder->CreateGlobalStringPtr(file));
3736 if (line > 0)
3737 {
3738 vals.push_back(::llvm::ConstantInt::get(intTy, line));
3739 }
3740 }
3741 if (function != nullptr)
3742 {
3743 vals.push_back(::builder->CreateGlobalStringPtr(function));
3744 }
3745
3746 // Add all format arguments.
3747 for (const PrintValue& arg : args)
3748 {
3749 for (auto val : arg.values)
3750 {
3751 vals.push_back(V(val));
3752 }
3753 }
3754
3755 ::builder->CreateCall(func, vals);
3756 }
3757#endif // ENABLE_RR_PRINT
3758
John Bauman89401822014-05-06 15:04:28 -04003759}