blob: 11ea23ab7aba04d4fd5cd36947dc26be6fe30fb5 [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
Nicolas Capenscb986762017-01-20 11:34:37 -050015#include "Reactor.hpp"
Ben Claytoneb50d252019-04-15 13:50:01 -040016#include "Debug.hpp"
Ben Claytonac07ed82019-03-26 14:17:41 +000017#include "LLVMReactor.hpp"
18#include "LLVMReactorDebugInfo.hpp"
John Bauman89401822014-05-06 15:04:28 -040019
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040020#include "x86.hpp"
21#include "CPUID.hpp"
22#include "Thread.hpp"
Nicolas Capens1a3ce872018-10-10 10:42:36 -040023#include "ExecutableMemory.hpp"
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040024#include "MutexLock.hpp"
25
26#undef min
27#undef max
28
Ben Clayton09a7f452019-04-25 15:22:43 +010029#if defined(__clang__)
30// LLVM has occurances of the extra-semi warning in its headers, which will be
31// treated as an error in SwiftShader targets.
32#pragma clang diagnostic push
33#pragma clang diagnostic ignored "-Wextra-semi"
34#endif // defined(__clang__)
35
Ben Clayton5875be52019-04-11 14:57:40 -040036#include "llvm/Analysis/LoopPass.h"
37#include "llvm/ExecutionEngine/ExecutionEngine.h"
38#include "llvm/ExecutionEngine/JITSymbol.h"
39#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
40#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
41#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
42#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
43#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
44#include "llvm/ExecutionEngine/SectionMemoryManager.h"
45#include "llvm/IR/Constants.h"
46#include "llvm/IR/DataLayout.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/GlobalVariable.h"
49#include "llvm/IR/IRBuilder.h"
50#include "llvm/IR/Intrinsics.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/LegacyPassManager.h"
53#include "llvm/IR/Mangler.h"
54#include "llvm/IR/Module.h"
55#include "llvm/Support/Error.h"
56#include "llvm/Support/TargetSelect.h"
57#include "llvm/Target/TargetOptions.h"
58#include "llvm/Transforms/InstCombine/InstCombine.h"
59#include "llvm/Transforms/Scalar.h"
60#include "llvm/Transforms/Scalar/GVN.h"
Ben Clayton20507fa2019-04-20 01:40:15 -040061
Ben Clayton09a7f452019-04-25 15:22:43 +010062#if defined(__clang__)
63#pragma clang diagnostic pop
64#endif // defined(__clang__)
65
Ben Clayton5875be52019-04-11 14:57:40 -040066#include "LLVMRoutine.hpp"
John Bauman89401822014-05-06 15:04:28 -040067
Ben Clayton5875be52019-04-11 14:57:40 -040068#define ARGS(...) {__VA_ARGS__}
69#define CreateCall2 CreateCall
70#define CreateCall3 CreateCall
Logan Chien0eedc8c2018-08-21 09:34:28 +080071
Ben Clayton5875be52019-04-11 14:57:40 -040072#include <unordered_map>
Logan Chien0eedc8c2018-08-21 09:34:28 +080073
John Bauman89401822014-05-06 15:04:28 -040074#include <fstream>
Ben Clayton1bc7ee92019-02-14 18:43:22 +000075#include <numeric>
76#include <thread>
John Bauman89401822014-05-06 15:04:28 -040077
Nicolas Capens47dc8672017-04-25 12:54:39 -040078#if defined(__i386__) || defined(__x86_64__)
79#include <xmmintrin.h>
80#endif
81
Logan Chien40a60052018-09-26 19:03:53 +080082#include <math.h>
83
Nicolas Capenscb122582014-05-06 23:34:44 -040084#if defined(__x86_64__) && defined(_WIN32)
John Bauman66b8ab22014-05-06 15:57:45 -040085extern "C" void X86CompilationCallback()
86{
Ben Claytoneb50d252019-04-15 13:50:01 -040087 UNIMPLEMENTED("X86CompilationCallback");
John Bauman66b8ab22014-05-06 15:57:45 -040088}
89#endif
90
Nicolas Capens48461502018-08-06 14:20:45 -040091namespace rr
Logan Chien52cde602018-09-03 19:37:57 +080092{
93 class LLVMReactorJIT;
94}
95
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -040096namespace
97{
Nicolas Capens48461502018-08-06 14:20:45 -040098 rr::LLVMReactorJIT *reactorJIT = nullptr;
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -040099 llvm::IRBuilder<> *builder = nullptr;
100 llvm::LLVMContext *context = nullptr;
101 llvm::Module *module = nullptr;
102 llvm::Function *function = nullptr;
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400103
Ben Claytonac07ed82019-03-26 14:17:41 +0000104#ifdef ENABLE_RR_DEBUG_INFO
105 std::unique_ptr<rr::DebugInfo> debugInfo;
106#endif
107
Nicolas Capensc07dc4b2018-08-06 14:20:45 -0400108 rr::MutexLock codegenMutex;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800109
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000110#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000111 std::string replace(std::string str, const std::string& substr, const std::string& replacement)
112 {
113 size_t pos = 0;
114 while((pos = str.find(substr, pos)) != std::string::npos) {
115 str.replace(pos, substr.length(), replacement);
116 pos += replacement.length();
117 }
118 return str;
119 }
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000120#endif // ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000121
Logan Chien0eedc8c2018-08-21 09:34:28 +0800122 llvm::Value *lowerPAVG(llvm::Value *x, llvm::Value *y)
123 {
124 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
125
126 llvm::VectorType *extTy =
127 llvm::VectorType::getExtendedElementVectorType(ty);
128 x = ::builder->CreateZExt(x, extTy);
129 y = ::builder->CreateZExt(y, extTy);
130
131 // (x + y + 1) >> 1
132 llvm::Constant *one = llvm::ConstantInt::get(extTy, 1);
133 llvm::Value *res = ::builder->CreateAdd(x, y);
134 res = ::builder->CreateAdd(res, one);
135 res = ::builder->CreateLShr(res, one);
136 return ::builder->CreateTrunc(res, ty);
137 }
138
139 llvm::Value *lowerPMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800140 llvm::ICmpInst::Predicate pred)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800141 {
142 return ::builder->CreateSelect(::builder->CreateICmp(pred, x, y), x, y);
143 }
144
145 llvm::Value *lowerPCMP(llvm::ICmpInst::Predicate pred, llvm::Value *x,
Logan Chienb5ce5092018-09-27 18:45:58 +0800146 llvm::Value *y, llvm::Type *dstTy)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800147 {
148 return ::builder->CreateSExt(::builder->CreateICmp(pred, x, y), dstTy, "");
149 }
150
Logan Chiene3191012018-08-24 22:01:50 +0800151#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800152 llvm::Value *lowerPMOV(llvm::Value *op, llvm::Type *dstType, bool sext)
153 {
154 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(op->getType());
155 llvm::VectorType *dstTy = llvm::cast<llvm::VectorType>(dstType);
156
157 llvm::Value *undef = llvm::UndefValue::get(srcTy);
158 llvm::SmallVector<uint32_t, 16> mask(dstTy->getNumElements());
159 std::iota(mask.begin(), mask.end(), 0);
160 llvm::Value *v = ::builder->CreateShuffleVector(op, undef, mask);
161
162 return sext ? ::builder->CreateSExt(v, dstTy)
Logan Chienb5ce5092018-09-27 18:45:58 +0800163 : ::builder->CreateZExt(v, dstTy);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800164 }
165
166 llvm::Value *lowerPABS(llvm::Value *v)
167 {
168 llvm::Value *zero = llvm::Constant::getNullValue(v->getType());
169 llvm::Value *cmp = ::builder->CreateICmp(llvm::ICmpInst::ICMP_SGT, v, zero);
170 llvm::Value *neg = ::builder->CreateNeg(v);
171 return ::builder->CreateSelect(cmp, v, neg);
172 }
173#endif // defined(__i386__) || defined(__x86_64__)
Logan Chiene3191012018-08-24 22:01:50 +0800174
175#if !defined(__i386__) && !defined(__x86_64__)
176 llvm::Value *lowerPFMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800177 llvm::FCmpInst::Predicate pred)
Logan Chiene3191012018-08-24 22:01:50 +0800178 {
179 return ::builder->CreateSelect(::builder->CreateFCmp(pred, x, y), x, y);
180 }
181
Logan Chien83fc07a2018-09-26 22:14:00 +0800182 llvm::Value *lowerRound(llvm::Value *x)
183 {
184 llvm::Function *nearbyint = llvm::Intrinsic::getDeclaration(
185 ::module, llvm::Intrinsic::nearbyint, {x->getType()});
186 return ::builder->CreateCall(nearbyint, ARGS(x));
187 }
188
Logan Chien2faa24a2018-09-26 19:59:32 +0800189 llvm::Value *lowerRoundInt(llvm::Value *x, llvm::Type *ty)
190 {
191 return ::builder->CreateFPToSI(lowerRound(x), ty);
192 }
193
Logan Chien40a60052018-09-26 19:03:53 +0800194 llvm::Value *lowerFloor(llvm::Value *x)
195 {
196 llvm::Function *floor = llvm::Intrinsic::getDeclaration(
197 ::module, llvm::Intrinsic::floor, {x->getType()});
198 return ::builder->CreateCall(floor, ARGS(x));
199 }
200
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800201 llvm::Value *lowerTrunc(llvm::Value *x)
202 {
203 llvm::Function *trunc = llvm::Intrinsic::getDeclaration(
204 ::module, llvm::Intrinsic::trunc, {x->getType()});
205 return ::builder->CreateCall(trunc, ARGS(x));
206 }
207
Logan Chiene3191012018-08-24 22:01:50 +0800208 // Packed add/sub saturatation
Logan Chien28794cf2018-09-26 18:58:03 +0800209 llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned)
Logan Chiene3191012018-08-24 22:01:50 +0800210 {
Logan Chien28794cf2018-09-26 18:58:03 +0800211 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
212 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
213
214 unsigned numBits = ty->getScalarSizeInBits();
215
216 llvm::Value *max, *min, *extX, *extY;
217 if (isSigned)
218 {
219 max = llvm::ConstantInt::get(extTy, (1LL << (numBits - 1)) - 1, true);
220 min = llvm::ConstantInt::get(extTy, (-1LL << (numBits - 1)), true);
221 extX = ::builder->CreateSExt(x, extTy);
222 extY = ::builder->CreateSExt(y, extTy);
223 }
224 else
225 {
Ben Claytoneb50d252019-04-15 13:50:01 -0400226 ASSERT_MSG(numBits <= 64, "numBits: %d", int(numBits));
Logan Chien28794cf2018-09-26 18:58:03 +0800227 uint64_t maxVal = (numBits == 64) ? ~0ULL : (1ULL << numBits) - 1;
228 max = llvm::ConstantInt::get(extTy, maxVal, false);
229 min = llvm::ConstantInt::get(extTy, 0, false);
230 extX = ::builder->CreateZExt(x, extTy);
231 extY = ::builder->CreateZExt(y, extTy);
232 }
233
234 llvm::Value *res = isAdd ? ::builder->CreateAdd(extX, extY)
235 : ::builder->CreateSub(extX, extY);
236
237 res = lowerPMINMAX(res, min, llvm::ICmpInst::ICMP_SGT);
238 res = lowerPMINMAX(res, max, llvm::ICmpInst::ICMP_SLT);
239
240 return ::builder->CreateTrunc(res, ty);
Logan Chiene3191012018-08-24 22:01:50 +0800241 }
242
243 llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y)
244 {
Logan Chien28794cf2018-09-26 18:58:03 +0800245 return lowerPSAT(x, y, true, false);
Logan Chiene3191012018-08-24 22:01:50 +0800246 }
247
248 llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y)
249 {
Logan Chien28794cf2018-09-26 18:58:03 +0800250 return lowerPSAT(x, y, true, true);
Logan Chiene3191012018-08-24 22:01:50 +0800251 }
252
253 llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y)
254 {
Logan Chien28794cf2018-09-26 18:58:03 +0800255 return lowerPSAT(x, y, false, false);
Logan Chiene3191012018-08-24 22:01:50 +0800256 }
257
258 llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y)
259 {
Logan Chien28794cf2018-09-26 18:58:03 +0800260 return lowerPSAT(x, y, false, true);
Logan Chiene3191012018-08-24 22:01:50 +0800261 }
262
263 llvm::Value *lowerSQRT(llvm::Value *x)
264 {
265 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(
266 ::module, llvm::Intrinsic::sqrt, {x->getType()});
267 return ::builder->CreateCall(sqrt, ARGS(x));
268 }
269
270 llvm::Value *lowerRCP(llvm::Value *x)
271 {
272 llvm::Type *ty = x->getType();
273 llvm::Constant *one;
274 if (llvm::VectorType *vectorTy = llvm::dyn_cast<llvm::VectorType>(ty))
275 {
276 one = llvm::ConstantVector::getSplat(
277 vectorTy->getNumElements(),
278 llvm::ConstantFP::get(vectorTy->getElementType(), 1));
279 }
280 else
281 {
282 one = llvm::ConstantFP::get(ty, 1);
283 }
284 return ::builder->CreateFDiv(one, x);
285 }
286
287 llvm::Value *lowerRSQRT(llvm::Value *x)
288 {
289 return lowerRCP(lowerSQRT(x));
290 }
291
292 llvm::Value *lowerVectorShl(llvm::Value *x, uint64_t scalarY)
293 {
294 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
295 llvm::Value *y = llvm::ConstantVector::getSplat(
296 ty->getNumElements(),
297 llvm::ConstantInt::get(ty->getElementType(), scalarY));
298 return ::builder->CreateShl(x, y);
299 }
300
301 llvm::Value *lowerVectorAShr(llvm::Value *x, uint64_t scalarY)
302 {
303 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
304 llvm::Value *y = llvm::ConstantVector::getSplat(
305 ty->getNumElements(),
306 llvm::ConstantInt::get(ty->getElementType(), scalarY));
307 return ::builder->CreateAShr(x, y);
308 }
309
310 llvm::Value *lowerVectorLShr(llvm::Value *x, uint64_t scalarY)
311 {
312 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
313 llvm::Value *y = llvm::ConstantVector::getSplat(
314 ty->getNumElements(),
315 llvm::ConstantInt::get(ty->getElementType(), scalarY));
316 return ::builder->CreateLShr(x, y);
317 }
318
319 llvm::Value *lowerMulAdd(llvm::Value *x, llvm::Value *y)
320 {
321 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
322 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
323
324 llvm::Value *extX = ::builder->CreateSExt(x, extTy);
325 llvm::Value *extY = ::builder->CreateSExt(y, extTy);
326 llvm::Value *mult = ::builder->CreateMul(extX, extY);
327
328 llvm::Value *undef = llvm::UndefValue::get(extTy);
329
330 llvm::SmallVector<uint32_t, 16> evenIdx;
331 llvm::SmallVector<uint32_t, 16> oddIdx;
332 for (uint64_t i = 0, n = ty->getNumElements(); i < n; i += 2)
333 {
334 evenIdx.push_back(i);
335 oddIdx.push_back(i + 1);
336 }
337
338 llvm::Value *lhs = ::builder->CreateShuffleVector(mult, undef, evenIdx);
339 llvm::Value *rhs = ::builder->CreateShuffleVector(mult, undef, oddIdx);
340 return ::builder->CreateAdd(lhs, rhs);
341 }
342
Logan Chiene3191012018-08-24 22:01:50 +0800343 llvm::Value *lowerPack(llvm::Value *x, llvm::Value *y, bool isSigned)
344 {
345 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(x->getType());
346 llvm::VectorType *dstTy = llvm::VectorType::getTruncatedElementVectorType(srcTy);
347
348 llvm::IntegerType *dstElemTy =
349 llvm::cast<llvm::IntegerType>(dstTy->getElementType());
350
351 uint64_t truncNumBits = dstElemTy->getIntegerBitWidth();
Ben Claytoneb50d252019-04-15 13:50:01 -0400352 ASSERT_MSG(truncNumBits < 64, "shift 64 must be handled separately. truncNumBits: %d", int(truncNumBits));
Logan Chiene3191012018-08-24 22:01:50 +0800353 llvm::Constant *max, *min;
354 if (isSigned)
355 {
356 max = llvm::ConstantInt::get(srcTy, (1LL << (truncNumBits - 1)) - 1, true);
357 min = llvm::ConstantInt::get(srcTy, (-1LL << (truncNumBits - 1)), true);
358 }
359 else
360 {
361 max = llvm::ConstantInt::get(srcTy, (1ULL << truncNumBits) - 1, false);
362 min = llvm::ConstantInt::get(srcTy, 0, false);
363 }
364
365 x = lowerPMINMAX(x, min, llvm::ICmpInst::ICMP_SGT);
366 x = lowerPMINMAX(x, max, llvm::ICmpInst::ICMP_SLT);
367 y = lowerPMINMAX(y, min, llvm::ICmpInst::ICMP_SGT);
368 y = lowerPMINMAX(y, max, llvm::ICmpInst::ICMP_SLT);
369
370 x = ::builder->CreateTrunc(x, dstTy);
371 y = ::builder->CreateTrunc(y, dstTy);
372
373 llvm::SmallVector<uint32_t, 16> index(srcTy->getNumElements() * 2);
374 std::iota(index.begin(), index.end(), 0);
375
376 return ::builder->CreateShuffleVector(x, y, index);
377 }
378
379 llvm::Value *lowerSignMask(llvm::Value *x, llvm::Type *retTy)
380 {
381 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
382 llvm::Constant *zero = llvm::ConstantInt::get(ty, 0);
383 llvm::Value *cmp = ::builder->CreateICmpSLT(x, zero);
384
385 llvm::Value *ret = ::builder->CreateZExt(
386 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
387 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
388 {
389 llvm::Value *elem = ::builder->CreateZExt(
390 ::builder->CreateExtractElement(cmp, i), retTy);
391 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
392 }
393 return ret;
394 }
395
396 llvm::Value *lowerFPSignMask(llvm::Value *x, llvm::Type *retTy)
397 {
398 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
399 llvm::Constant *zero = llvm::ConstantFP::get(ty, 0);
400 llvm::Value *cmp = ::builder->CreateFCmpULT(x, zero);
401
402 llvm::Value *ret = ::builder->CreateZExt(
403 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
404 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
405 {
406 llvm::Value *elem = ::builder->CreateZExt(
407 ::builder->CreateExtractElement(cmp, i), retTy);
408 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
409 }
410 return ret;
411 }
412#endif // !defined(__i386__) && !defined(__x86_64__)
Chris Forbese86b6dc2019-03-01 09:08:47 -0800413
414 llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext)
415 {
416 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
417 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
418
419 llvm::Value *extX, *extY;
420 if (sext)
421 {
422 extX = ::builder->CreateSExt(x, extTy);
423 extY = ::builder->CreateSExt(y, extTy);
424 }
425 else
426 {
427 extX = ::builder->CreateZExt(x, extTy);
428 extY = ::builder->CreateZExt(y, extTy);
429 }
430
431 llvm::Value *mult = ::builder->CreateMul(extX, extY);
432
433 llvm::IntegerType *intTy = llvm::cast<llvm::IntegerType>(ty->getElementType());
434 llvm::Value *mulh = ::builder->CreateAShr(mult, intTy->getBitWidth());
435 return ::builder->CreateTrunc(mulh, ty);
436 }
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400437}
438
Nicolas Capens48461502018-08-06 14:20:45 -0400439namespace rr
John Bauman89401822014-05-06 15:04:28 -0400440{
Ben Claytonc7904162019-04-17 17:35:48 -0400441 const Capabilities Caps =
442 {
443 true, // CallSupported
444 };
445
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400446 static std::memory_order atomicOrdering(llvm::AtomicOrdering memoryOrder)
447 {
448 switch(memoryOrder)
449 {
450 case llvm::AtomicOrdering::Monotonic: return std::memory_order_relaxed; // https://llvm.org/docs/Atomics.html#monotonic
451 case llvm::AtomicOrdering::Acquire: return std::memory_order_acquire;
452 case llvm::AtomicOrdering::Release: return std::memory_order_release;
453 case llvm::AtomicOrdering::AcquireRelease: return std::memory_order_acq_rel;
454 case llvm::AtomicOrdering::SequentiallyConsistent: return std::memory_order_seq_cst;
455 default:
Ben Claytonfb280672019-04-25 11:16:15 +0100456 UNREACHABLE("memoryOrder: %d", int(memoryOrder));
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400457 return std::memory_order_acq_rel;
458 }
459 }
460
461 static llvm::AtomicOrdering atomicOrdering(bool atomic, std::memory_order memoryOrder)
462 {
463 if(!atomic)
464 {
465 return llvm::AtomicOrdering::NotAtomic;
466 }
467
468 switch(memoryOrder)
469 {
470 case std::memory_order_relaxed: return llvm::AtomicOrdering::Monotonic; // https://llvm.org/docs/Atomics.html#monotonic
471 case std::memory_order_consume: return llvm::AtomicOrdering::Acquire; // https://llvm.org/docs/Atomics.html#acquire: "It should also be used for C++11/C11 memory_order_consume."
472 case std::memory_order_acquire: return llvm::AtomicOrdering::Acquire;
473 case std::memory_order_release: return llvm::AtomicOrdering::Release;
474 case std::memory_order_acq_rel: return llvm::AtomicOrdering::AcquireRelease;
475 case std::memory_order_seq_cst: return llvm::AtomicOrdering::SequentiallyConsistent;
476 default:
477 UNREACHABLE("memoryOrder: %d", int(memoryOrder));
478 return llvm::AtomicOrdering::AcquireRelease;
479 }
480 }
481
482 template <typename T>
483 static void atomicLoad(void *ptr, void *ret, llvm::AtomicOrdering ordering)
484 {
485 *reinterpret_cast<T*>(ret) = std::atomic_load_explicit<T>(reinterpret_cast<std::atomic<T>*>(ptr), atomicOrdering(ordering));
486 }
487
488 template <typename T>
489 static void atomicStore(void *ptr, void *val, llvm::AtomicOrdering ordering)
490 {
491 std::atomic_store_explicit<T>(reinterpret_cast<std::atomic<T>*>(ptr), *reinterpret_cast<T*>(val), atomicOrdering(ordering));
492 }
493
Logan Chien40a60052018-09-26 19:03:53 +0800494 class ExternalFunctionSymbolResolver
495 {
496 private:
497 using FunctionMap = std::unordered_map<std::string, void *>;
498 FunctionMap func_;
499
500 public:
501 ExternalFunctionSymbolResolver()
502 {
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400503 struct Atomic
504 {
505 static void load(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
506 {
507 switch (size)
508 {
509 case 1: atomicLoad<uint8_t>(ptr, ret, ordering); break;
510 case 2: atomicLoad<uint16_t>(ptr, ret, ordering); break;
511 case 4: atomicLoad<uint32_t>(ptr, ret, ordering); break;
512 case 8: atomicLoad<uint64_t>(ptr, ret, ordering); break;
513 default:
514 UNIMPLEMENTED("Atomic::load(size: %d)", int(size));
515 }
516 }
517 static void store(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
518 {
519 switch (size)
520 {
521 case 1: atomicStore<uint8_t>(ptr, ret, ordering); break;
522 case 2: atomicStore<uint16_t>(ptr, ret, ordering); break;
523 case 4: atomicStore<uint32_t>(ptr, ret, ordering); break;
524 case 8: atomicStore<uint64_t>(ptr, ret, ordering); break;
525 default:
526 UNIMPLEMENTED("Atomic::store(size: %d)", int(size));
527 }
528 }
529 };
Ben Claytonac07ed82019-03-26 14:17:41 +0000530 struct F { static void nop() {} };
Ben Claytonac07ed82019-03-26 14:17:41 +0000531
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400532 func_.emplace("nop", reinterpret_cast<void*>(F::nop));
Logan Chien40a60052018-09-26 19:03:53 +0800533 func_.emplace("floorf", reinterpret_cast<void*>(floorf));
Logan Chien83fc07a2018-09-26 22:14:00 +0800534 func_.emplace("nearbyintf", reinterpret_cast<void*>(nearbyintf));
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800535 func_.emplace("truncf", reinterpret_cast<void*>(truncf));
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000536 func_.emplace("printf", reinterpret_cast<void*>(printf));
537 func_.emplace("puts", reinterpret_cast<void*>(puts));
Chris Forbes1a4c7122019-03-15 14:50:47 -0700538 func_.emplace("fmodf", reinterpret_cast<void*>(fmodf));
Ben Claytona2c8b772019-04-09 13:42:36 -0400539 func_.emplace("sinf", reinterpret_cast<void*>(sinf));
Ben Clayton1b6f8c72019-04-09 13:47:43 -0400540 func_.emplace("cosf", reinterpret_cast<void*>(cosf));
Ben Claytonf9350d72019-04-09 14:19:02 -0400541 func_.emplace("asinf", reinterpret_cast<void*>(asinf));
Ben Claytoneafae472019-04-09 14:22:38 -0400542 func_.emplace("acosf", reinterpret_cast<void*>(acosf));
Ben Clayton749b4e02019-04-09 14:27:43 -0400543 func_.emplace("atanf", reinterpret_cast<void*>(atanf));
Ben Claytond9636972019-04-09 15:09:54 -0400544 func_.emplace("sinhf", reinterpret_cast<void*>(sinhf));
Ben Clayton900ea2c2019-04-09 15:25:36 -0400545 func_.emplace("coshf", reinterpret_cast<void*>(coshf));
Ben Clayton3928bd92019-04-09 15:27:41 -0400546 func_.emplace("tanhf", reinterpret_cast<void*>(tanhf));
Ben Claytonf6d77ab2019-04-09 15:30:04 -0400547 func_.emplace("asinhf", reinterpret_cast<void*>(asinhf));
Ben Clayton28ebcb02019-04-09 15:33:38 -0400548 func_.emplace("acoshf", reinterpret_cast<void*>(acoshf));
Ben Claytonfa6a5392019-04-09 15:35:24 -0400549 func_.emplace("atanhf", reinterpret_cast<void*>(atanhf));
Ben Claytona520c3e2019-04-09 15:43:45 -0400550 func_.emplace("atan2f", reinterpret_cast<void*>(atan2f));
Ben Claytonbfe94f02019-04-09 15:52:12 -0400551 func_.emplace("powf", reinterpret_cast<void*>(powf));
Ben Clayton242f0022019-04-09 16:00:53 -0400552 func_.emplace("expf", reinterpret_cast<void*>(expf));
Ben Clayton2c1da722019-04-09 16:03:03 -0400553 func_.emplace("logf", reinterpret_cast<void*>(logf));
Ben Claytonf40b56c2019-04-09 16:06:55 -0400554 func_.emplace("exp2f", reinterpret_cast<void*>(exp2f));
Ben Claytone17acfe2019-04-09 16:09:13 -0400555 func_.emplace("log2f", reinterpret_cast<void*>(log2f));
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400556 func_.emplace("atomic_load", reinterpret_cast<void*>(Atomic::load));
557 func_.emplace("atomic_store", reinterpret_cast<void*>(Atomic::store));
Ben Clayton14740062019-04-09 13:48:41 -0400558
559#ifdef __APPLE__
Ben Clayton14740062019-04-09 13:48:41 -0400560 func_.emplace("sincosf_stret", reinterpret_cast<void*>(__sincosf_stret));
561#elif defined(__linux__)
562 func_.emplace("sincosf", reinterpret_cast<void*>(sincosf));
563#endif // __APPLE__
Logan Chien40a60052018-09-26 19:03:53 +0800564 }
565
566 void *findSymbol(const std::string &name) const
567 {
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000568 // Trim off any underscores from the start of the symbol. LLVM likes
569 // to append these on macOS.
570 const char* trimmed = name.c_str();
571 while (trimmed[0] == '_') { trimmed++; }
572
573 FunctionMap::const_iterator it = func_.find(trimmed);
Ben Claytoneb50d252019-04-15 13:50:01 -0400574 // Missing functions will likely make the module fail in exciting non-obvious ways.
575 ASSERT_MSG(it != func_.end(), "Missing external function: '%s'", name.c_str());
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000576 return it->second;
Logan Chien40a60052018-09-26 19:03:53 +0800577 }
578 };
579
Logan Chien0eedc8c2018-08-21 09:34:28 +0800580 class LLVMReactorJIT
581 {
582 private:
583 using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
584 using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
585
586 llvm::orc::ExecutionSession session;
Logan Chien40a60052018-09-26 19:03:53 +0800587 ExternalFunctionSymbolResolver externalSymbolResolver;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800588 std::shared_ptr<llvm::orc::SymbolResolver> resolver;
589 std::unique_ptr<llvm::TargetMachine> targetMachine;
590 const llvm::DataLayout dataLayout;
591 ObjLayer objLayer;
592 CompileLayer compileLayer;
593 size_t emittedFunctionsNum;
594
595 public:
596 LLVMReactorJIT(const char *arch, const llvm::SmallVectorImpl<std::string>& mattrs,
597 const llvm::TargetOptions &targetOpts):
598 resolver(createLegacyLookupResolver(
599 session,
600 [this](const std::string &name) {
Logan Chien40a60052018-09-26 19:03:53 +0800601 void *func = externalSymbolResolver.findSymbol(name);
602 if (func != nullptr)
603 {
604 return llvm::JITSymbol(
605 reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
606 }
607
Logan Chien0eedc8c2018-08-21 09:34:28 +0800608 return objLayer.findSymbol(name, true);
609 },
610 [](llvm::Error err) {
611 if (err)
612 {
613 // TODO: Log the symbol resolution errors.
614 return;
615 }
616 })),
617 targetMachine(llvm::EngineBuilder()
Ben Claytonac07ed82019-03-26 14:17:41 +0000618#ifdef ENABLE_RR_DEBUG_INFO
619 .setOptLevel(llvm::CodeGenOpt::None)
620#endif // ENABLE_RR_DEBUG_INFO
Logan Chien0eedc8c2018-08-21 09:34:28 +0800621 .setMArch(arch)
622 .setMAttrs(mattrs)
623 .setTargetOptions(targetOpts)
624 .selectTarget()),
625 dataLayout(targetMachine->createDataLayout()),
626 objLayer(
627 session,
628 [this](llvm::orc::VModuleKey) {
629 return ObjLayer::Resources{
630 std::make_shared<llvm::SectionMemoryManager>(),
631 resolver};
Ben Claytonac07ed82019-03-26 14:17:41 +0000632 },
633 ObjLayer::NotifyLoadedFtor(),
634 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj, const llvm::RuntimeDyld::LoadedObjectInfo &L) {
635#ifdef ENABLE_RR_DEBUG_INFO
636 if (debugInfo != nullptr)
637 {
638 debugInfo->NotifyObjectEmitted(Obj, L);
639 }
640#endif // ENABLE_RR_DEBUG_INFO
641 },
642 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj) {
643#ifdef ENABLE_RR_DEBUG_INFO
644 if (debugInfo != nullptr)
645 {
646 debugInfo->NotifyFreeingObject(Obj);
647 }
648#endif // ENABLE_RR_DEBUG_INFO
649 }
650 ),
Logan Chien0eedc8c2018-08-21 09:34:28 +0800651 compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine)),
652 emittedFunctionsNum(0)
653 {
654 }
655
656 void startSession()
657 {
658 ::module = new llvm::Module("", *::context);
659 }
660
661 void endSession()
662 {
663 ::function = nullptr;
664 ::module = nullptr;
665 }
666
667 LLVMRoutine *acquireRoutine(llvm::Function *func)
668 {
669 std::string name = "f" + llvm::Twine(emittedFunctionsNum++).str();
670 func->setName(name);
671 func->setLinkage(llvm::GlobalValue::ExternalLinkage);
672 func->setDoesNotThrow();
673
674 std::unique_ptr<llvm::Module> mod(::module);
675 ::module = nullptr;
676 mod->setDataLayout(dataLayout);
677
678 auto moduleKey = session.allocateVModule();
679 llvm::cantFail(compileLayer.addModule(moduleKey, std::move(mod)));
680
Nicolas Capensadfbbcb2018-10-31 14:38:53 -0400681 std::string mangledName;
682 {
683 llvm::raw_string_ostream mangledNameStream(mangledName);
684 llvm::Mangler::getNameWithPrefix(mangledNameStream, name, dataLayout);
685 }
686
687 llvm::JITSymbol symbol = compileLayer.findSymbolIn(moduleKey, mangledName, false);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800688
689 llvm::Expected<llvm::JITTargetAddress> expectAddr = symbol.getAddress();
Nicolas Capensadfbbcb2018-10-31 14:38:53 -0400690 if(!expectAddr)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800691 {
692 return nullptr;
693 }
694
695 void *addr = reinterpret_cast<void *>(static_cast<intptr_t>(expectAddr.get()));
696 return new LLVMRoutine(addr, releaseRoutineCallback, this, moduleKey);
697 }
698
699 void optimize(llvm::Module *module)
700 {
Ben Claytonac07ed82019-03-26 14:17:41 +0000701#ifdef ENABLE_RR_DEBUG_INFO
702 if (debugInfo != nullptr)
703 {
704 return; // Don't optimize if we're generating debug info.
705 }
706#endif // ENABLE_RR_DEBUG_INFO
707
Logan Chien0eedc8c2018-08-21 09:34:28 +0800708 std::unique_ptr<llvm::legacy::PassManager> passManager(
709 new llvm::legacy::PassManager());
710
711 passManager->add(llvm::createSROAPass());
712
713 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
714 {
715 switch(optimization[pass])
716 {
717 case Disabled: break;
718 case CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
719 case LICM: passManager->add(llvm::createLICMPass()); break;
720 case AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
721 case GVN: passManager->add(llvm::createGVNPass()); break;
722 case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
723 case Reassociate: passManager->add(llvm::createReassociatePass()); break;
724 case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
725 case SCCP: passManager->add(llvm::createSCCPPass()); break;
726 case ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break;
727 default:
Ben Claytoneb50d252019-04-15 13:50:01 -0400728 UNREACHABLE("optimization[pass]: %d, pass: %d", int(optimization[pass]), int(pass));
Logan Chien0eedc8c2018-08-21 09:34:28 +0800729 }
730 }
731
732 passManager->run(*::module);
733 }
734
735 private:
736 void releaseRoutineModule(llvm::orc::VModuleKey moduleKey)
737 {
738 llvm::cantFail(compileLayer.removeModule(moduleKey));
739 }
740
741 static void releaseRoutineCallback(LLVMReactorJIT *jit, uint64_t moduleKey)
742 {
743 jit->releaseRoutineModule(moduleKey);
744 }
745 };
Logan Chien52cde602018-09-03 19:37:57 +0800746
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400747 Optimization optimization[10] = {InstructionCombining, Disabled};
John Bauman89401822014-05-06 15:04:28 -0400748
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500749 // The abstract Type* types are implemented as LLVM types, except that
750 // 64-bit vectors are emulated using 128-bit ones to avoid use of MMX in x86
751 // and VFP in ARM, and eliminate the overhead of converting them to explicit
752 // 128-bit ones. LLVM types are pointers, so we can represent emulated types
753 // as abstract pointers with small enum values.
754 enum InternalType : uintptr_t
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400755 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500756 // Emulated types:
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400757 Type_v2i32,
758 Type_v4i16,
759 Type_v2i16,
760 Type_v8i8,
761 Type_v4i8,
762 Type_v2f32,
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500763 EmulatedTypeCount,
764 // Returned by asInternalType() to indicate that the abstract Type*
765 // should be interpreted as LLVM type pointer:
766 Type_LLVM
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400767 };
768
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500769 inline InternalType asInternalType(Type *type)
770 {
771 InternalType t = static_cast<InternalType>(reinterpret_cast<uintptr_t>(type));
772 return (t < EmulatedTypeCount) ? t : Type_LLVM;
773 }
774
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400775 llvm::Type *T(Type *t)
776 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500777 // Use 128-bit vectors to implement logically shorter ones.
778 switch(asInternalType(t))
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400779 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500780 case Type_v2i32: return T(Int4::getType());
781 case Type_v4i16: return T(Short8::getType());
782 case Type_v2i16: return T(Short8::getType());
783 case Type_v8i8: return T(Byte16::getType());
784 case Type_v4i8: return T(Byte16::getType());
785 case Type_v2f32: return T(Float4::getType());
786 case Type_LLVM: return reinterpret_cast<llvm::Type*>(t);
Ben Claytoneb50d252019-04-15 13:50:01 -0400787 default:
788 UNREACHABLE("asInternalType(t): %d", int(asInternalType(t)));
789 return nullptr;
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400790 }
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400791 }
792
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500793 Type *T(InternalType t)
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400794 {
795 return reinterpret_cast<Type*>(t);
796 }
797
Nicolas Capensac230122016-09-20 14:30:06 -0400798 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
799 {
800 return reinterpret_cast<std::vector<llvm::Type*>&>(t);
801 }
802
Logan Chien191b3052018-08-31 16:57:15 +0800803 inline llvm::BasicBlock *B(BasicBlock *t)
804 {
805 return reinterpret_cast<llvm::BasicBlock*>(t);
806 }
807
Nicolas Capensc8b67a42016-09-25 15:02:52 -0400808 inline BasicBlock *B(llvm::BasicBlock *t)
809 {
810 return reinterpret_cast<BasicBlock*>(t);
811 }
812
Nicolas Capens01a97962017-07-28 17:30:51 -0400813 static size_t typeSize(Type *type)
814 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500815 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -0400816 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500817 case Type_v2i32: return 8;
818 case Type_v4i16: return 8;
819 case Type_v2i16: return 4;
820 case Type_v8i8: return 8;
821 case Type_v4i8: return 4;
822 case Type_v2f32: return 8;
823 case Type_LLVM:
Nicolas Capens01a97962017-07-28 17:30:51 -0400824 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500825 llvm::Type *t = T(type);
Nicolas Capens01a97962017-07-28 17:30:51 -0400826
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500827 if(t->isPointerTy())
828 {
829 return sizeof(void*);
830 }
831
832 // At this point we should only have LLVM 'primitive' types.
833 unsigned int bits = t->getPrimitiveSizeInBits();
Ben Claytoneb50d252019-04-15 13:50:01 -0400834 ASSERT_MSG(bits != 0, "bits: %d", int(bits));
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500835
836 // TODO(capn): Booleans are 1 bit integers in LLVM's SSA type system,
837 // but are typically stored as one byte. The DataLayout structure should
838 // be used here and many other places if this assumption fails.
839 return (bits + 7) / 8;
840 }
841 break;
842 default:
Ben Claytoneb50d252019-04-15 13:50:01 -0400843 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500844 return 0;
845 }
Nicolas Capens01a97962017-07-28 17:30:51 -0400846 }
847
Nicolas Capens69674fb2017-09-01 11:08:44 -0400848 static unsigned int elementCount(Type *type)
849 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500850 switch(asInternalType(type))
Nicolas Capens69674fb2017-09-01 11:08:44 -0400851 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500852 case Type_v2i32: return 2;
853 case Type_v4i16: return 4;
854 case Type_v2i16: return 2;
855 case Type_v8i8: return 8;
856 case Type_v4i8: return 4;
857 case Type_v2f32: return 2;
858 case Type_LLVM: return llvm::cast<llvm::VectorType>(T(type))->getNumElements();
Ben Claytoneb50d252019-04-15 13:50:01 -0400859 default:
860 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
861 return 0;
Nicolas Capens69674fb2017-09-01 11:08:44 -0400862 }
Nicolas Capens69674fb2017-09-01 11:08:44 -0400863 }
864
John Bauman89401822014-05-06 15:04:28 -0400865 Nucleus::Nucleus()
866 {
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400867 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
Nicolas Capensb7ea9842015-04-01 10:54:59 -0400868
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400869 llvm::InitializeNativeTarget();
Logan Chien0eedc8c2018-08-21 09:34:28 +0800870 llvm::InitializeNativeTargetAsmPrinter();
871 llvm::InitializeNativeTargetAsmParser();
Logan Chien0eedc8c2018-08-21 09:34:28 +0800872
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400873 if(!::context)
John Bauman89401822014-05-06 15:04:28 -0400874 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400875 ::context = new llvm::LLVMContext();
John Bauman89401822014-05-06 15:04:28 -0400876 }
877
John Bauman89401822014-05-06 15:04:28 -0400878 #if defined(__x86_64__)
Logan Chien52cde602018-09-03 19:37:57 +0800879 static const char arch[] = "x86-64";
Logan Chiene3191012018-08-24 22:01:50 +0800880 #elif defined(__i386__)
Logan Chien52cde602018-09-03 19:37:57 +0800881 static const char arch[] = "x86";
Logan Chiene3191012018-08-24 22:01:50 +0800882 #elif defined(__aarch64__)
883 static const char arch[] = "arm64";
884 #elif defined(__arm__)
885 static const char arch[] = "arm";
Gordana Cmiljanovic082dfec2018-10-19 11:36:15 +0200886 #elif defined(__mips__)
Gordana Cmiljanovic20622c02018-11-05 15:00:11 +0100887 #if defined(__mips64)
888 static const char arch[] = "mips64el";
889 #else
890 static const char arch[] = "mipsel";
891 #endif
Logan Chiene3191012018-08-24 22:01:50 +0800892 #else
893 #error "unknown architecture"
John Bauman89401822014-05-06 15:04:28 -0400894 #endif
895
Ben Clayton0fc611f2019-04-18 11:23:27 -0400896 llvm::SmallVector<std::string, 8> mattrs;
897
898 llvm::StringMap<bool> features;
899 bool ok = llvm::sys::getHostCPUFeatures(features);
900 ASSERT_MSG(ok, "llvm::sys::getHostCPUFeatures returned false");
901 for (auto &feature : features)
902 {
903 if (feature.second) { mattrs.push_back(feature.first()); }
904 }
905
906#if 0
Logan Chiene3191012018-08-24 22:01:50 +0800907#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800908 mattrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx");
909 mattrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov");
910 mattrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse");
911 mattrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2");
912 mattrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3");
913 mattrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3");
Logan Chien0eedc8c2018-08-21 09:34:28 +0800914 mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse4.1" : "-sse4.1");
Logan Chiene3191012018-08-24 22:01:50 +0800915#elif defined(__arm__)
916#if __ARM_ARCH >= 8
917 mattrs.push_back("+armv8-a");
918#else
919 // armv7-a requires compiler-rt routines; otherwise, compiled kernel
920 // might fail to link.
921#endif
922#endif
Ben Clayton0fc611f2019-04-18 11:23:27 -0400923#endif
John Bauman89401822014-05-06 15:04:28 -0400924
Logan Chien0eedc8c2018-08-21 09:34:28 +0800925 llvm::TargetOptions targetOpts;
Nicolas Capensa7643812018-09-13 14:20:06 -0400926 targetOpts.UnsafeFPMath = false;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800927 // targetOpts.NoInfsFPMath = true;
928 // targetOpts.NoNaNsFPMath = true;
Logan Chien52cde602018-09-03 19:37:57 +0800929
930 if(!::reactorJIT)
931 {
Logan Chien0eedc8c2018-08-21 09:34:28 +0800932 ::reactorJIT = new LLVMReactorJIT(arch, mattrs, targetOpts);
Logan Chien52cde602018-09-03 19:37:57 +0800933 }
934
935 ::reactorJIT->startSession();
John Bauman89401822014-05-06 15:04:28 -0400936
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400937 if(!::builder)
John Bauman89401822014-05-06 15:04:28 -0400938 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400939 ::builder = new llvm::IRBuilder<>(*::context);
John Bauman89401822014-05-06 15:04:28 -0400940 }
941 }
942
943 Nucleus::~Nucleus()
944 {
Logan Chien52cde602018-09-03 19:37:57 +0800945 ::reactorJIT->endSession();
Nicolas Capensb7ea9842015-04-01 10:54:59 -0400946
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400947 ::codegenMutex.unlock();
John Bauman89401822014-05-06 15:04:28 -0400948 }
949
Chris Forbes878d4b02019-01-21 10:48:35 -0800950 Routine *Nucleus::acquireRoutine(const char *name, bool runOptimizations)
John Bauman89401822014-05-06 15:04:28 -0400951 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400952 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
John Bauman19bac1e2014-05-06 15:23:49 -0400953 {
Nicolas Capensac230122016-09-20 14:30:06 -0400954 llvm::Type *type = ::function->getReturnType();
John Bauman19bac1e2014-05-06 15:23:49 -0400955
956 if(type->isVoidTy())
957 {
958 createRetVoid();
959 }
960 else
961 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400962 createRet(V(llvm::UndefValue::get(type)));
John Bauman19bac1e2014-05-06 15:23:49 -0400963 }
964 }
John Bauman89401822014-05-06 15:04:28 -0400965
Ben Clayton97c13ad2019-05-02 11:59:30 +0100966#ifdef ENABLE_RR_DEBUG_INFO
967 if (debugInfo != nullptr)
968 {
969 debugInfo->Finalize();
970 }
971#endif // ENABLE_RR_DEBUG_INFO
972
John Bauman89401822014-05-06 15:04:28 -0400973 if(false)
974 {
Ben Clayton5875be52019-04-11 14:57:40 -0400975 std::error_code error;
976 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400977 ::module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -0400978 }
979
980 if(runOptimizations)
981 {
982 optimize();
983 }
984
985 if(false)
986 {
Ben Clayton5875be52019-04-11 14:57:40 -0400987 std::error_code error;
988 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400989 ::module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -0400990 }
991
Logan Chien52cde602018-09-03 19:37:57 +0800992 LLVMRoutine *routine = ::reactorJIT->acquireRoutine(::function);
John Bauman89401822014-05-06 15:04:28 -0400993
John Bauman89401822014-05-06 15:04:28 -0400994 return routine;
995 }
996
997 void Nucleus::optimize()
998 {
Logan Chien52cde602018-09-03 19:37:57 +0800999 ::reactorJIT->optimize(::module);
John Bauman89401822014-05-06 15:04:28 -04001000 }
1001
John Bauman19bac1e2014-05-06 15:23:49 -04001002 Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
John Bauman89401822014-05-06 15:04:28 -04001003 {
1004 // Need to allocate it in the entry block for mem2reg to work
Nicolas Capensc8b67a42016-09-25 15:02:52 -04001005 llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
John Bauman89401822014-05-06 15:04:28 -04001006
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001007 llvm::Instruction *declaration;
John Bauman89401822014-05-06 15:04:28 -04001008
1009 if(arraySize)
1010 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08001011 declaration = new llvm::AllocaInst(T(type), 0, V(Nucleus::createConstantInt(arraySize)));
John Bauman89401822014-05-06 15:04:28 -04001012 }
1013 else
1014 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08001015 declaration = new llvm::AllocaInst(T(type), 0, (llvm::Value*)nullptr);
John Bauman89401822014-05-06 15:04:28 -04001016 }
1017
1018 entryBlock.getInstList().push_front(declaration);
1019
Nicolas Capens19336542016-09-26 10:32:29 -04001020 return V(declaration);
John Bauman89401822014-05-06 15:04:28 -04001021 }
1022
1023 BasicBlock *Nucleus::createBasicBlock()
1024 {
Logan Chien191b3052018-08-31 16:57:15 +08001025 return B(llvm::BasicBlock::Create(*::context, "", ::function));
John Bauman89401822014-05-06 15:04:28 -04001026 }
1027
1028 BasicBlock *Nucleus::getInsertBlock()
1029 {
Nicolas Capensc8b67a42016-09-25 15:02:52 -04001030 return B(::builder->GetInsertBlock());
John Bauman89401822014-05-06 15:04:28 -04001031 }
1032
1033 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1034 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001035 // assert(::builder->GetInsertBlock()->back().isTerminator());
Nicolas Capens0192d152019-03-27 14:46:07 -04001036
1037 Variable::materializeAll();
1038
Logan Chien191b3052018-08-31 16:57:15 +08001039 ::builder->SetInsertPoint(B(basicBlock));
John Bauman89401822014-05-06 15:04:28 -04001040 }
1041
Nicolas Capensac230122016-09-20 14:30:06 -04001042 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
John Bauman89401822014-05-06 15:04:28 -04001043 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001044 llvm::FunctionType *functionType = llvm::FunctionType::get(T(ReturnType), T(Params), false);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001045 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
1046 ::function->setCallingConv(llvm::CallingConv::C);
John Bauman89401822014-05-06 15:04:28 -04001047
Ben Clayton5875be52019-04-11 14:57:40 -04001048 #if defined(_WIN32)
Nicolas Capens52551d12018-09-13 14:30:56 -04001049 // FIXME(capn):
1050 // On Windows, stack memory is committed in increments of 4 kB pages, with the last page
1051 // having a trap which allows the OS to grow the stack. For functions with a stack frame
1052 // larger than 4 kB this can cause an issue when a variable is accessed beyond the guard
1053 // page. Therefore the compiler emits a call to __chkstk in the function prolog to probe
1054 // the stack and ensure all pages have been committed. This is currently broken in LLVM
1055 // JIT, but we can prevent emitting the stack probe call:
1056 ::function->addFnAttr("stack-probe-size", "1048576");
1057 #endif
1058
Ben Claytonac07ed82019-03-26 14:17:41 +00001059#ifdef ENABLE_RR_DEBUG_INFO
1060 ::debugInfo = std::unique_ptr<DebugInfo>(new DebugInfo(::builder, ::context, ::module, ::function));
1061#endif // ENABLE_RR_DEBUG_INFO
1062
Logan Chien191b3052018-08-31 16:57:15 +08001063 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "", ::function));
John Bauman89401822014-05-06 15:04:28 -04001064 }
1065
Nicolas Capens19336542016-09-26 10:32:29 -04001066 Value *Nucleus::getArgument(unsigned int index)
John Bauman89401822014-05-06 15:04:28 -04001067 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001068 llvm::Function::arg_iterator args = ::function->arg_begin();
John Bauman89401822014-05-06 15:04:28 -04001069
1070 while(index)
1071 {
1072 args++;
1073 index--;
1074 }
1075
Nicolas Capens19336542016-09-26 10:32:29 -04001076 return V(&*args);
John Bauman89401822014-05-06 15:04:28 -04001077 }
1078
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001079 void Nucleus::createRetVoid()
John Bauman89401822014-05-06 15:04:28 -04001080 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001081 RR_DEBUG_INFO_UPDATE_LOC();
1082
Ben Claytonc958b172019-05-02 12:20:59 +01001083 ASSERT_MSG(::function->getReturnType() == T(Void::getType()), "Return type mismatch");
1084
Nicolas Capens0192d152019-03-27 14:46:07 -04001085 // Code generated after this point is unreachable, so any variables
1086 // being read can safely return an undefined value. We have to avoid
1087 // materializing variables after the terminator ret instruction.
1088 Variable::killUnmaterialized();
1089
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001090 ::builder->CreateRetVoid();
John Bauman89401822014-05-06 15:04:28 -04001091 }
1092
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001093 void Nucleus::createRet(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001094 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001095 RR_DEBUG_INFO_UPDATE_LOC();
1096
Ben Claytonc958b172019-05-02 12:20:59 +01001097 ASSERT_MSG(::function->getReturnType() == V(v)->getType(), "Return type mismatch");
1098
Nicolas Capens0192d152019-03-27 14:46:07 -04001099 // Code generated after this point is unreachable, so any variables
1100 // being read can safely return an undefined value. We have to avoid
1101 // materializing variables after the terminator ret instruction.
1102 Variable::killUnmaterialized();
1103
Logan Chien191b3052018-08-31 16:57:15 +08001104 ::builder->CreateRet(V(v));
John Bauman89401822014-05-06 15:04:28 -04001105 }
1106
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001107 void Nucleus::createBr(BasicBlock *dest)
John Bauman89401822014-05-06 15:04:28 -04001108 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001109 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens0192d152019-03-27 14:46:07 -04001110 Variable::materializeAll();
1111
Logan Chien191b3052018-08-31 16:57:15 +08001112 ::builder->CreateBr(B(dest));
John Bauman89401822014-05-06 15:04:28 -04001113 }
1114
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001115 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001116 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001117 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens0192d152019-03-27 14:46:07 -04001118 Variable::materializeAll();
Logan Chien191b3052018-08-31 16:57:15 +08001119 ::builder->CreateCondBr(V(cond), B(ifTrue), B(ifFalse));
John Bauman89401822014-05-06 15:04:28 -04001120 }
1121
1122 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1123 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001124 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001125 return V(::builder->CreateAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001126 }
1127
1128 Value *Nucleus::createSub(Value *lhs, Value *rhs)
1129 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001130 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001131 return V(::builder->CreateSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001132 }
1133
1134 Value *Nucleus::createMul(Value *lhs, Value *rhs)
1135 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001136 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001137 return V(::builder->CreateMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001138 }
1139
1140 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1141 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001142 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001143 return V(::builder->CreateUDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001144 }
1145
1146 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1147 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001148 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001149 return V(::builder->CreateSDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001150 }
1151
1152 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1153 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001154 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001155 return V(::builder->CreateFAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001156 }
1157
1158 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1159 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001160 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001161 return V(::builder->CreateFSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001162 }
1163
1164 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1165 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001166 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001167 return V(::builder->CreateFMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001168 }
1169
1170 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1171 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001172 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001173 return V(::builder->CreateFDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001174 }
1175
1176 Value *Nucleus::createURem(Value *lhs, Value *rhs)
1177 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001178 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001179 return V(::builder->CreateURem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001180 }
1181
1182 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1183 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001184 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001185 return V(::builder->CreateSRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001186 }
1187
1188 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1189 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001190 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001191 return V(::builder->CreateFRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001192 }
1193
1194 Value *Nucleus::createShl(Value *lhs, Value *rhs)
1195 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001196 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001197 return V(::builder->CreateShl(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001198 }
1199
1200 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1201 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001202 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001203 return V(::builder->CreateLShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001204 }
1205
1206 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1207 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001208 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001209 return V(::builder->CreateAShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001210 }
1211
1212 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1213 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001214 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001215 return V(::builder->CreateAnd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001216 }
1217
1218 Value *Nucleus::createOr(Value *lhs, Value *rhs)
1219 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001220 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001221 return V(::builder->CreateOr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001222 }
1223
1224 Value *Nucleus::createXor(Value *lhs, Value *rhs)
1225 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001226 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001227 return V(::builder->CreateXor(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001228 }
1229
Nicolas Capens19336542016-09-26 10:32:29 -04001230 Value *Nucleus::createNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001231 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001232 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001233 return V(::builder->CreateNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001234 }
1235
Nicolas Capens19336542016-09-26 10:32:29 -04001236 Value *Nucleus::createFNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001237 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001238 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001239 return V(::builder->CreateFNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001240 }
1241
Nicolas Capens19336542016-09-26 10:32:29 -04001242 Value *Nucleus::createNot(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001243 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001244 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001245 return V(::builder->CreateNot(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001246 }
1247
Nicolas Capens86509d92019-03-21 13:23:50 -04001248 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001249 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001250 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001251 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001252 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001253 case Type_v2i32:
1254 case Type_v4i16:
1255 case Type_v8i8:
1256 case Type_v2f32:
1257 return createBitCast(
1258 createInsertElement(
1259 V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))),
Nicolas Capens86509d92019-03-21 13:23:50 -04001260 createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment, atomic, memoryOrder),
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001261 0),
1262 type);
1263 case Type_v2i16:
1264 case Type_v4i8:
1265 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001266 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001267 Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
Nicolas Capens86509d92019-03-21 13:23:50 -04001268 Value *i = createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001269 i = createZExt(i, Long::getType());
1270 Value *v = createInsertElement(u, i, 0);
1271 return createBitCast(v, type);
Nicolas Capens01a97962017-07-28 17:30:51 -04001272 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001273 // Fallthrough to non-emulated case.
1274 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001275 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001276 ASSERT(V(ptr)->getType()->getContainedType(0) == T(type));
Nicolas Capens86509d92019-03-21 13:23:50 -04001277 auto load = new llvm::LoadInst(V(ptr), "", isVolatile, alignment);
1278 load->setAtomic(atomicOrdering(atomic, memoryOrder));
1279
1280 return V(::builder->Insert(load));
1281 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001282 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001283 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1284 return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001285 }
John Bauman89401822014-05-06 15:04:28 -04001286 }
1287
Nicolas Capens86509d92019-03-21 13:23:50 -04001288 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001289 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001290 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001291 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001292 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001293 case Type_v2i32:
1294 case Type_v4i16:
1295 case Type_v8i8:
1296 case Type_v2f32:
1297 createStore(
1298 createExtractElement(
1299 createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0),
1300 createBitCast(ptr, Pointer<Long>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001301 Long::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001302 return value;
1303 case Type_v2i16:
1304 case Type_v4i8:
1305 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001306 {
Logan Chien191b3052018-08-31 16:57:15 +08001307 createStore(
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001308 createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0),
1309 createBitCast(ptr, Pointer<Int>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001310 Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens01a97962017-07-28 17:30:51 -04001311 return value;
Nicolas Capens01a97962017-07-28 17:30:51 -04001312 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001313 // Fallthrough to non-emulated case.
1314 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001315 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001316 ASSERT(V(ptr)->getType()->getContainedType(0) == T(type));
Nicolas Capens86509d92019-03-21 13:23:50 -04001317 auto store = ::builder->Insert(new llvm::StoreInst(V(value), V(ptr), isVolatile, alignment));
1318 store->setAtomic(atomicOrdering(atomic, memoryOrder));
1319
1320 return value;
1321 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001322 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001323 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1324 return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001325 }
John Bauman89401822014-05-06 15:04:28 -04001326 }
1327
Ben Clayton0fc611f2019-04-18 11:23:27 -04001328 Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment)
1329 {
1330 ASSERT(V(base)->getType()->isPointerTy());
1331 ASSERT(V(offsets)->getType()->isVectorTy());
1332 ASSERT(V(mask)->getType()->isVectorTy());
1333
1334 auto numEls = V(mask)->getType()->getVectorNumElements();
1335 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
1336 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
1337 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1338 auto i8PtrTy = i8Ty->getPointerTo();
1339 auto elPtrTy = T(elTy)->getPointerTo();
1340 auto elVecTy = ::llvm::VectorType::get(T(elTy), numEls);
1341 auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
1342 auto i8Base = ::builder->CreatePointerCast(V(base), i8PtrTy);
1343 auto i8Ptrs = ::builder->CreateGEP(i8Base, V(offsets));
1344 auto elPtrs = ::builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
1345 auto i8Mask = ::builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
1346 auto passthrough = ::llvm::Constant::getNullValue(elVecTy);
1347 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
1348 auto func = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::masked_gather, { elVecTy, elPtrVecTy } );
1349 return V(::builder->CreateCall(func, { elPtrs, align, i8Mask, passthrough }));
1350 }
1351
1352 void Nucleus::createScatter(Value *base, Value *val, Value *offsets, Value *mask, unsigned int alignment)
1353 {
1354 ASSERT(V(base)->getType()->isPointerTy());
1355 ASSERT(V(val)->getType()->isVectorTy());
1356 ASSERT(V(offsets)->getType()->isVectorTy());
1357 ASSERT(V(mask)->getType()->isVectorTy());
1358
1359 auto numEls = V(mask)->getType()->getVectorNumElements();
1360 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
1361 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
1362 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1363 auto i8PtrTy = i8Ty->getPointerTo();
1364 auto elVecTy = V(val)->getType();
1365 auto elTy = elVecTy->getVectorElementType();
1366 auto elPtrTy = elTy->getPointerTo();
1367 auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
1368 auto i8Base = ::builder->CreatePointerCast(V(base), i8PtrTy);
1369 auto i8Ptrs = ::builder->CreateGEP(i8Base, V(offsets));
1370 auto elPtrs = ::builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
1371 auto i8Mask = ::builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
1372 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
1373 auto func = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::masked_scatter, { elVecTy, elPtrVecTy } );
1374 ::builder->CreateCall(func, { V(val), elPtrs, align, i8Mask });
1375 }
1376
Nicolas Capensd294def2017-01-26 17:44:37 -08001377 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
John Bauman89401822014-05-06 15:04:28 -04001378 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001379 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytoneb50d252019-04-15 13:50:01 -04001380 ASSERT(V(ptr)->getType()->getContainedType(0) == T(type));
Nicolas Capens01a97962017-07-28 17:30:51 -04001381 if(sizeof(void*) == 8)
Nicolas Capensd294def2017-01-26 17:44:37 -08001382 {
Ben Claytonb1243732019-02-27 23:56:18 +00001383 // LLVM manual: "When indexing into an array, pointer or vector,
1384 // integers of any width are allowed, and they are not required to
1385 // be constant. These integers are treated as signed values where
1386 // relevant."
1387 //
1388 // Thus if we want indexes to be treated as unsigned we have to
1389 // zero-extend them ourselves.
1390 //
1391 // Note that this is not because we want to address anywhere near
1392 // 4 GB of data. Instead this is important for performance because
1393 // x86 supports automatic zero-extending of 32-bit registers to
1394 // 64-bit. Thus when indexing into an array using a uint32 is
1395 // actually faster than an int32.
1396 index = unsignedIndex ?
1397 createZExt(index, Long::getType()) :
1398 createSExt(index, Long::getType());
Nicolas Capens01a97962017-07-28 17:30:51 -04001399 }
Ben Claytonb1243732019-02-27 23:56:18 +00001400
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001401 // For non-emulated types we can rely on LLVM's GEP to calculate the
1402 // effective address correctly.
1403 if(asInternalType(type) == Type_LLVM)
Nicolas Capens01a97962017-07-28 17:30:51 -04001404 {
Ben Claytonb1243732019-02-27 23:56:18 +00001405 return V(::builder->CreateGEP(V(ptr), V(index)));
Nicolas Capensd294def2017-01-26 17:44:37 -08001406 }
1407
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001408 // For emulated types we have to multiply the index by the intended
1409 // type size ourselves to obain the byte offset.
Ben Claytonb1243732019-02-27 23:56:18 +00001410 index = (sizeof(void*) == 8) ?
1411 createMul(index, createConstantLong((int64_t)typeSize(type))) :
1412 createMul(index, createConstantInt((int)typeSize(type)));
1413
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001414 // Cast to a byte pointer, apply the byte offset, and cast back to the
1415 // original pointer type.
Logan Chien191b3052018-08-31 16:57:15 +08001416 return createBitCast(
1417 V(::builder->CreateGEP(V(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0)))), V(index))),
1418 T(llvm::PointerType::get(T(type), 0)));
John Bauman89401822014-05-06 15:04:28 -04001419 }
1420
Chris Forbes17813932019-04-18 11:45:54 -07001421 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
John Bauman19bac1e2014-05-06 15:23:49 -04001422 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001423 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbes17813932019-04-18 11:45:54 -07001424 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1425 }
1426
Chris Forbes707ed992019-04-18 18:17:35 -07001427 Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1428 {
1429 RR_DEBUG_INFO_UPDATE_LOC();
1430 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Sub, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1431 }
1432
Chris Forbes17813932019-04-18 11:45:54 -07001433 Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1434 {
1435 RR_DEBUG_INFO_UPDATE_LOC();
1436 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::And, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1437 }
1438
1439 Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1440 {
1441 RR_DEBUG_INFO_UPDATE_LOC();
1442 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Or, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1443 }
1444
1445 Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1446 {
1447 RR_DEBUG_INFO_UPDATE_LOC();
1448 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xor, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1449 }
1450
1451 Value *Nucleus::createAtomicMin(Value *ptr, Value *value, std::memory_order memoryOrder)
1452 {
1453 RR_DEBUG_INFO_UPDATE_LOC();
1454 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Min, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1455 }
1456
1457 Value *Nucleus::createAtomicMax(Value *ptr, Value *value, std::memory_order memoryOrder)
1458 {
1459 RR_DEBUG_INFO_UPDATE_LOC();
1460 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Max, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1461 }
1462
1463 Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1464 {
1465 RR_DEBUG_INFO_UPDATE_LOC();
1466 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
John Bauman19bac1e2014-05-06 15:23:49 -04001467 }
1468
Chris Forbesa16238d2019-04-18 16:31:54 -07001469 Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1470 {
1471 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbesc9ca99e2019-04-19 07:53:34 -07001472 // Note: AtomicCmpXchgInstruction returns a 2-member struct containing {result, success-flag}, not the result directly.
Chris Forbesa16238d2019-04-18 16:31:54 -07001473 return V(::builder->CreateExtractValue(
1474 ::builder->CreateAtomicCmpXchg(V(ptr), V(compare), V(value), atomicOrdering(true, memoryOrderEqual), atomicOrdering(true, memoryOrderUnequal)),
1475 llvm::ArrayRef<unsigned>(0u)));
1476 }
1477
Nicolas Capens19336542016-09-26 10:32:29 -04001478 Value *Nucleus::createTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001479 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001480 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001481 return V(::builder->CreateTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001482 }
1483
Nicolas Capens19336542016-09-26 10:32:29 -04001484 Value *Nucleus::createZExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001485 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001486 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001487 return V(::builder->CreateZExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001488 }
1489
Nicolas Capens19336542016-09-26 10:32:29 -04001490 Value *Nucleus::createSExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001491 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001492 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001493 return V(::builder->CreateSExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001494 }
1495
Nicolas Capens19336542016-09-26 10:32:29 -04001496 Value *Nucleus::createFPToSI(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001497 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001498 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001499 return V(::builder->CreateFPToSI(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001500 }
1501
Nicolas Capens19336542016-09-26 10:32:29 -04001502 Value *Nucleus::createSIToFP(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001503 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001504 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001505 return V(::builder->CreateSIToFP(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001506 }
1507
Nicolas Capens19336542016-09-26 10:32:29 -04001508 Value *Nucleus::createFPTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001509 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001510 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001511 return V(::builder->CreateFPTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001512 }
1513
Nicolas Capens19336542016-09-26 10:32:29 -04001514 Value *Nucleus::createFPExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001515 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001516 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001517 return V(::builder->CreateFPExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001518 }
1519
Nicolas Capens19336542016-09-26 10:32:29 -04001520 Value *Nucleus::createBitCast(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001521 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001522 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens01a97962017-07-28 17:30:51 -04001523 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1524 // support for casting between scalars and wide vectors. Emulate them by writing to the stack and
1525 // reading back as the destination type.
Logan Chien191b3052018-08-31 16:57:15 +08001526 if(!V(v)->getType()->isVectorTy() && T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001527 {
1528 Value *readAddress = allocateStackVariable(destType);
Logan Chien191b3052018-08-31 16:57:15 +08001529 Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(V(v)->getType(), 0)));
1530 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001531 return createLoad(readAddress, destType);
1532 }
Logan Chien191b3052018-08-31 16:57:15 +08001533 else if(V(v)->getType()->isVectorTy() && !T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001534 {
Logan Chien191b3052018-08-31 16:57:15 +08001535 Value *writeAddress = allocateStackVariable(T(V(v)->getType()));
1536 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001537 Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
1538 return createLoad(readAddress, destType);
1539 }
1540
Logan Chien191b3052018-08-31 16:57:15 +08001541 return V(::builder->CreateBitCast(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001542 }
1543
John Bauman89401822014-05-06 15:04:28 -04001544 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1545 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001546 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001547 return V(::builder->CreateICmpEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001548 }
1549
1550 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1551 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001552 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001553 return V(::builder->CreateICmpNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001554 }
1555
1556 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1557 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001558 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001559 return V(::builder->CreateICmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001560 }
1561
1562 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1563 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001564 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001565 return V(::builder->CreateICmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001566 }
1567
1568 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1569 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001570 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001571 return V(::builder->CreateICmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001572 }
1573
1574 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1575 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001576 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001577 return V(::builder->CreateICmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001578 }
1579
1580 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1581 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001582 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001583 return V(::builder->CreateICmpSGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001584 }
1585
1586 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1587 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001588 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001589 return V(::builder->CreateICmpSGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001590 }
1591
1592 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1593 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001594 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001595 return V(::builder->CreateICmpSLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001596 }
1597
1598 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1599 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001600 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001601 return V(::builder->CreateICmpSLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001602 }
1603
1604 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1605 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001606 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001607 return V(::builder->CreateFCmpOEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001608 }
1609
1610 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1611 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001612 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001613 return V(::builder->CreateFCmpOGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001614 }
1615
1616 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1617 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001618 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001619 return V(::builder->CreateFCmpOGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001620 }
1621
1622 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1623 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001624 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001625 return V(::builder->CreateFCmpOLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001626 }
1627
1628 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1629 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001630 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001631 return V(::builder->CreateFCmpOLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001632 }
1633
1634 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1635 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001636 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001637 return V(::builder->CreateFCmpONE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001638 }
1639
1640 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1641 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001642 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001643 return V(::builder->CreateFCmpORD(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001644 }
1645
1646 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1647 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001648 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001649 return V(::builder->CreateFCmpUNO(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001650 }
1651
1652 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1653 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001654 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001655 return V(::builder->CreateFCmpUEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001656 }
1657
1658 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1659 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001660 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001661 return V(::builder->CreateFCmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001662 }
1663
1664 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1665 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001666 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001667 return V(::builder->CreateFCmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001668 }
1669
1670 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1671 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001672 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001673 return V(::builder->CreateFCmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001674 }
1675
1676 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1677 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001678 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001679 return V(::builder->CreateFCmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001680 }
1681
1682 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1683 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001684 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton71008d82019-03-05 17:17:59 +00001685 return V(::builder->CreateFCmpUNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001686 }
1687
Nicolas Capense95d5342016-09-30 11:37:28 -04001688 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
John Bauman89401822014-05-06 15:04:28 -04001689 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001690 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytoneb50d252019-04-15 13:50:01 -04001691 ASSERT(V(vector)->getType()->getContainedType(0) == T(type));
Logan Chien191b3052018-08-31 16:57:15 +08001692 return V(::builder->CreateExtractElement(V(vector), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04001693 }
1694
1695 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1696 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001697 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001698 return V(::builder->CreateInsertElement(V(vector), V(element), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04001699 }
1700
Logan Chien191b3052018-08-31 16:57:15 +08001701 Value *Nucleus::createShuffleVector(Value *v1, Value *v2, const int *select)
John Bauman89401822014-05-06 15:04:28 -04001702 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001703 RR_DEBUG_INFO_UPDATE_LOC();
1704
Logan Chien191b3052018-08-31 16:57:15 +08001705 int size = llvm::cast<llvm::VectorType>(V(v1)->getType())->getNumElements();
Nicolas Capense89cd582016-09-30 14:23:47 -04001706 const int maxSize = 16;
1707 llvm::Constant *swizzle[maxSize];
Ben Claytoneb50d252019-04-15 13:50:01 -04001708 ASSERT(size <= maxSize);
Nicolas Capense89cd582016-09-30 14:23:47 -04001709
1710 for(int i = 0; i < size; i++)
1711 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001712 swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]);
Nicolas Capense89cd582016-09-30 14:23:47 -04001713 }
1714
1715 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
1716
Logan Chien191b3052018-08-31 16:57:15 +08001717 return V(::builder->CreateShuffleVector(V(v1), V(v2), shuffle));
John Bauman89401822014-05-06 15:04:28 -04001718 }
1719
Logan Chien191b3052018-08-31 16:57:15 +08001720 Value *Nucleus::createSelect(Value *c, Value *ifTrue, Value *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001721 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001722 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001723 return V(::builder->CreateSelect(V(c), V(ifTrue), V(ifFalse)));
John Bauman89401822014-05-06 15:04:28 -04001724 }
1725
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001726 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
John Bauman89401822014-05-06 15:04:28 -04001727 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001728 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001729 return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(V(control), B(defaultBranch), numCases));
John Bauman89401822014-05-06 15:04:28 -04001730 }
1731
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001732 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
John Bauman89401822014-05-06 15:04:28 -04001733 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001734 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001735 llvm::SwitchInst *sw = reinterpret_cast<llvm::SwitchInst *>(switchCases);
1736 sw->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), B(branch));
John Bauman89401822014-05-06 15:04:28 -04001737 }
1738
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001739 void Nucleus::createUnreachable()
John Bauman89401822014-05-06 15:04:28 -04001740 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001741 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001742 ::builder->CreateUnreachable();
John Bauman89401822014-05-06 15:04:28 -04001743 }
1744
Nicolas Capensac230122016-09-20 14:30:06 -04001745 Type *Nucleus::getPointerType(Type *ElementType)
John Bauman89401822014-05-06 15:04:28 -04001746 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001747 return T(llvm::PointerType::get(T(ElementType), 0));
John Bauman89401822014-05-06 15:04:28 -04001748 }
1749
Nicolas Capens13ac2322016-10-13 14:52:12 -04001750 Value *Nucleus::createNullValue(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04001751 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001752 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001753 return V(llvm::Constant::getNullValue(T(Ty)));
John Bauman89401822014-05-06 15:04:28 -04001754 }
1755
Nicolas Capens13ac2322016-10-13 14:52:12 -04001756 Value *Nucleus::createConstantLong(int64_t i)
John Bauman89401822014-05-06 15:04:28 -04001757 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001758 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001759 return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001760 }
1761
Nicolas Capens13ac2322016-10-13 14:52:12 -04001762 Value *Nucleus::createConstantInt(int i)
John Bauman89401822014-05-06 15:04:28 -04001763 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001764 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001765 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001766 }
1767
Nicolas Capens13ac2322016-10-13 14:52:12 -04001768 Value *Nucleus::createConstantInt(unsigned int i)
John Bauman89401822014-05-06 15:04:28 -04001769 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001770 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001771 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001772 }
1773
Nicolas Capens13ac2322016-10-13 14:52:12 -04001774 Value *Nucleus::createConstantBool(bool b)
John Bauman89401822014-05-06 15:04:28 -04001775 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001776 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001777 return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b));
John Bauman89401822014-05-06 15:04:28 -04001778 }
1779
Nicolas Capens13ac2322016-10-13 14:52:12 -04001780 Value *Nucleus::createConstantByte(signed char i)
John Bauman89401822014-05-06 15:04:28 -04001781 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001782 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001783 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001784 }
1785
Nicolas Capens13ac2322016-10-13 14:52:12 -04001786 Value *Nucleus::createConstantByte(unsigned char i)
John Bauman89401822014-05-06 15:04:28 -04001787 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001788 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001789 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001790 }
1791
Nicolas Capens13ac2322016-10-13 14:52:12 -04001792 Value *Nucleus::createConstantShort(short i)
John Bauman89401822014-05-06 15:04:28 -04001793 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001794 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001795 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001796 }
1797
Nicolas Capens13ac2322016-10-13 14:52:12 -04001798 Value *Nucleus::createConstantShort(unsigned short i)
John Bauman89401822014-05-06 15:04:28 -04001799 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001800 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001801 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001802 }
1803
Nicolas Capens13ac2322016-10-13 14:52:12 -04001804 Value *Nucleus::createConstantFloat(float x)
John Bauman89401822014-05-06 15:04:28 -04001805 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001806 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001807 return V(llvm::ConstantFP::get(T(Float::getType()), x));
John Bauman89401822014-05-06 15:04:28 -04001808 }
1809
Nicolas Capens13ac2322016-10-13 14:52:12 -04001810 Value *Nucleus::createNullPointer(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04001811 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001812 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001813 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0)));
John Bauman89401822014-05-06 15:04:28 -04001814 }
1815
Nicolas Capens13ac2322016-10-13 14:52:12 -04001816 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
John Bauman89401822014-05-06 15:04:28 -04001817 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001818 ASSERT(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04001819 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1820 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
Ben Claytoneb50d252019-04-15 13:50:01 -04001821 ASSERT(numElements <= 16 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001822 llvm::Constant *constantVector[16];
1823
Nicolas Capens69674fb2017-09-01 11:08:44 -04001824 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04001825 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04001826 constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001827 }
1828
Nicolas Capens69674fb2017-09-01 11:08:44 -04001829 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
Nicolas Capens13ac2322016-10-13 14:52:12 -04001830 }
1831
1832 Value *Nucleus::createConstantVector(const double *constants, Type *type)
1833 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001834 ASSERT(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04001835 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1836 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
Ben Claytoneb50d252019-04-15 13:50:01 -04001837 ASSERT(numElements <= 8 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001838 llvm::Constant *constantVector[8];
1839
Nicolas Capens69674fb2017-09-01 11:08:44 -04001840 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04001841 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04001842 constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001843 }
1844
Nicolas Capens69674fb2017-09-01 11:08:44 -04001845 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
John Bauman89401822014-05-06 15:04:28 -04001846 }
1847
John Bauman19bac1e2014-05-06 15:23:49 -04001848 Type *Void::getType()
John Bauman89401822014-05-06 15:04:28 -04001849 {
Nicolas Capensac230122016-09-20 14:30:06 -04001850 return T(llvm::Type::getVoidTy(*::context));
John Bauman89401822014-05-06 15:04:28 -04001851 }
1852
John Bauman19bac1e2014-05-06 15:23:49 -04001853 Type *Bool::getType()
John Bauman89401822014-05-06 15:04:28 -04001854 {
Nicolas Capensac230122016-09-20 14:30:06 -04001855 return T(llvm::Type::getInt1Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001856 }
1857
John Bauman19bac1e2014-05-06 15:23:49 -04001858 Type *Byte::getType()
John Bauman89401822014-05-06 15:04:28 -04001859 {
Nicolas Capensac230122016-09-20 14:30:06 -04001860 return T(llvm::Type::getInt8Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001861 }
1862
John Bauman19bac1e2014-05-06 15:23:49 -04001863 Type *SByte::getType()
John Bauman89401822014-05-06 15:04:28 -04001864 {
Nicolas Capensac230122016-09-20 14:30:06 -04001865 return T(llvm::Type::getInt8Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001866 }
1867
John Bauman19bac1e2014-05-06 15:23:49 -04001868 Type *Short::getType()
John Bauman89401822014-05-06 15:04:28 -04001869 {
Nicolas Capensac230122016-09-20 14:30:06 -04001870 return T(llvm::Type::getInt16Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001871 }
1872
John Bauman19bac1e2014-05-06 15:23:49 -04001873 Type *UShort::getType()
John Bauman89401822014-05-06 15:04:28 -04001874 {
Nicolas Capensac230122016-09-20 14:30:06 -04001875 return T(llvm::Type::getInt16Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001876 }
1877
John Bauman19bac1e2014-05-06 15:23:49 -04001878 Type *Byte4::getType()
John Bauman89401822014-05-06 15:04:28 -04001879 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001880 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04001881 }
1882
John Bauman19bac1e2014-05-06 15:23:49 -04001883 Type *SByte4::getType()
John Bauman89401822014-05-06 15:04:28 -04001884 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001885 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04001886 }
1887
John Bauman19bac1e2014-05-06 15:23:49 -04001888 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001889 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001890 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001891#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001892 return x86::paddusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001893#else
1894 return As<Byte8>(V(lowerPUADDSAT(V(x.value), V(y.value))));
1895#endif
John Bauman89401822014-05-06 15:04:28 -04001896 }
John Bauman66b8ab22014-05-06 15:57:45 -04001897
John Bauman19bac1e2014-05-06 15:23:49 -04001898 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001899 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001900 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001901#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001902 return x86::psubusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001903#else
1904 return As<Byte8>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
1905#endif
John Bauman89401822014-05-06 15:04:28 -04001906 }
1907
John Bauman19bac1e2014-05-06 15:23:49 -04001908 RValue<Int> SignMask(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04001909 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001910 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001911#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001912 return x86::pmovmskb(x);
Logan Chiene3191012018-08-24 22:01:50 +08001913#else
1914 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
1915#endif
John Bauman89401822014-05-06 15:04:28 -04001916 }
1917
John Bauman19bac1e2014-05-06 15:23:49 -04001918// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001919// {
Logan Chiene3191012018-08-24 22:01:50 +08001920//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001921// return x86::pcmpgtb(x, y); // FIXME: Signedness
Logan Chiene3191012018-08-24 22:01:50 +08001922//#else
1923// return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
1924//#endif
John Bauman89401822014-05-06 15:04:28 -04001925// }
John Bauman66b8ab22014-05-06 15:57:45 -04001926
John Bauman19bac1e2014-05-06 15:23:49 -04001927 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001928 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001929 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001930#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001931 return x86::pcmpeqb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001932#else
1933 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
1934#endif
John Bauman89401822014-05-06 15:04:28 -04001935 }
1936
John Bauman19bac1e2014-05-06 15:23:49 -04001937 Type *Byte8::getType()
John Bauman89401822014-05-06 15:04:28 -04001938 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001939 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04001940 }
1941
John Bauman19bac1e2014-05-06 15:23:49 -04001942 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04001943 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001944 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001945#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001946 return x86::paddsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001947#else
1948 return As<SByte8>(V(lowerPSADDSAT(V(x.value), V(y.value))));
1949#endif
John Bauman89401822014-05-06 15:04:28 -04001950 }
John Bauman66b8ab22014-05-06 15:57:45 -04001951
John Bauman19bac1e2014-05-06 15:23:49 -04001952 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04001953 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001954 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001955#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001956 return x86::psubsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001957#else
1958 return As<SByte8>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
1959#endif
John Bauman89401822014-05-06 15:04:28 -04001960 }
1961
John Bauman19bac1e2014-05-06 15:23:49 -04001962 RValue<Int> SignMask(RValue<SByte8> x)
John Bauman89401822014-05-06 15:04:28 -04001963 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001964 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001965#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001966 return x86::pmovmskb(As<Byte8>(x));
Logan Chiene3191012018-08-24 22:01:50 +08001967#else
1968 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
1969#endif
John Bauman89401822014-05-06 15:04:28 -04001970 }
1971
John Bauman19bac1e2014-05-06 15:23:49 -04001972 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04001973 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001974 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001975#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001976 return x86::pcmpgtb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001977#else
1978 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
1979#endif
John Bauman89401822014-05-06 15:04:28 -04001980 }
John Bauman66b8ab22014-05-06 15:57:45 -04001981
John Bauman19bac1e2014-05-06 15:23:49 -04001982 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04001983 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001984 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001985#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001986 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
Logan Chiene3191012018-08-24 22:01:50 +08001987#else
1988 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
1989#endif
John Bauman89401822014-05-06 15:04:28 -04001990 }
1991
John Bauman19bac1e2014-05-06 15:23:49 -04001992 Type *SByte8::getType()
John Bauman89401822014-05-06 15:04:28 -04001993 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001994 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04001995 }
1996
John Bauman19bac1e2014-05-06 15:23:49 -04001997 Type *Byte16::getType()
John Bauman89401822014-05-06 15:04:28 -04001998 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001999 return T(llvm::VectorType::get(T(Byte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04002000 }
2001
John Bauman19bac1e2014-05-06 15:23:49 -04002002 Type *SByte16::getType()
John Bauman89401822014-05-06 15:04:28 -04002003 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002004 return T(llvm::VectorType::get(T(SByte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04002005 }
2006
Nicolas Capens16b5f152016-10-13 13:39:01 -04002007 Type *Short2::getType()
2008 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002009 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04002010 }
2011
Nicolas Capens16b5f152016-10-13 13:39:01 -04002012 Type *UShort2::getType()
2013 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002014 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04002015 }
2016
John Bauman19bac1e2014-05-06 15:23:49 -04002017 Short4::Short4(RValue<Int4> cast)
John Bauman89401822014-05-06 15:04:28 -04002018 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002019 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens01a97962017-07-28 17:30:51 -04002020 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
John Bauman89401822014-05-06 15:04:28 -04002021 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2022
Nicolas Capens01a97962017-07-28 17:30:51 -04002023 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2024 Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
John Bauman89401822014-05-06 15:04:28 -04002025
John Bauman66b8ab22014-05-06 15:57:45 -04002026 storeValue(short4);
John Bauman89401822014-05-06 15:04:28 -04002027 }
2028
John Bauman19bac1e2014-05-06 15:23:49 -04002029// Short4::Short4(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002030// {
2031// }
2032
John Bauman19bac1e2014-05-06 15:23:49 -04002033 Short4::Short4(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04002034 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002035 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002036 Int4 v4i32 = Int4(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002037#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002038 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
Logan Chiene3191012018-08-24 22:01:50 +08002039#else
2040 Value *v = v4i32.loadValue();
2041 v4i32 = As<Int4>(V(lowerPack(V(v), V(v), true)));
2042#endif
John Bauman66b8ab22014-05-06 15:57:45 -04002043
2044 storeValue(As<Short4>(Int2(v4i32)).value);
John Bauman89401822014-05-06 15:04:28 -04002045 }
2046
John Bauman19bac1e2014-05-06 15:23:49 -04002047 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002048 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002049 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002050#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002051 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2052
2053 return x86::psllw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002054#else
2055 return As<Short4>(V(lowerVectorShl(V(lhs.value), rhs)));
2056#endif
John Bauman89401822014-05-06 15:04:28 -04002057 }
2058
John Bauman19bac1e2014-05-06 15:23:49 -04002059 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002060 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002061 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002062#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002063 return x86::psraw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002064#else
2065 return As<Short4>(V(lowerVectorAShr(V(lhs.value), rhs)));
2066#endif
John Bauman89401822014-05-06 15:04:28 -04002067 }
2068
John Bauman19bac1e2014-05-06 15:23:49 -04002069 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002070 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002071 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002072#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002073 return x86::pmaxsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002074#else
2075 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
2076#endif
John Bauman89401822014-05-06 15:04:28 -04002077 }
2078
John Bauman19bac1e2014-05-06 15:23:49 -04002079 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002080 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002081 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002082#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002083 return x86::pminsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002084#else
2085 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
2086#endif
John Bauman89401822014-05-06 15:04:28 -04002087 }
2088
John Bauman19bac1e2014-05-06 15:23:49 -04002089 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002090 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002091 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002092#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002093 return x86::paddsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002094#else
2095 return As<Short4>(V(lowerPSADDSAT(V(x.value), V(y.value))));
2096#endif
John Bauman89401822014-05-06 15:04:28 -04002097 }
2098
John Bauman19bac1e2014-05-06 15:23:49 -04002099 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002100 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002101 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002102#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002103 return x86::psubsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002104#else
2105 return As<Short4>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
2106#endif
John Bauman89401822014-05-06 15:04:28 -04002107 }
2108
John Bauman19bac1e2014-05-06 15:23:49 -04002109 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002110 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002111 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002112#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002113 return x86::pmulhw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002114#else
2115 return As<Short4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2116#endif
John Bauman89401822014-05-06 15:04:28 -04002117 }
2118
John Bauman19bac1e2014-05-06 15:23:49 -04002119 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002120 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002121 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002122#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002123 return x86::pmaddwd(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002124#else
2125 return As<Int2>(V(lowerMulAdd(V(x.value), V(y.value))));
2126#endif
John Bauman89401822014-05-06 15:04:28 -04002127 }
2128
Nicolas Capens33438a62017-09-27 11:47:35 -04002129 RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002130 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002131 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002132#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002133 auto result = x86::packsswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002134#else
2135 auto result = V(lowerPack(V(x.value), V(y.value), true));
2136#endif
Nicolas Capens01a97962017-07-28 17:30:51 -04002137 return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
John Bauman89401822014-05-06 15:04:28 -04002138 }
2139
Nicolas Capens33438a62017-09-27 11:47:35 -04002140 RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2141 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002142 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002143#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04002144 auto result = x86::packuswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002145#else
2146 auto result = V(lowerPack(V(x.value), V(y.value), false));
2147#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04002148 return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
2149 }
2150
John Bauman19bac1e2014-05-06 15:23:49 -04002151 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002152 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002153 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002154#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002155 return x86::pcmpgtw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002156#else
2157 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
2158#endif
John Bauman89401822014-05-06 15:04:28 -04002159 }
2160
John Bauman19bac1e2014-05-06 15:23:49 -04002161 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002162 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002163 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002164#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002165 return x86::pcmpeqw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002166#else
2167 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
2168#endif
John Bauman89401822014-05-06 15:04:28 -04002169 }
2170
John Bauman19bac1e2014-05-06 15:23:49 -04002171 Type *Short4::getType()
John Bauman89401822014-05-06 15:04:28 -04002172 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002173 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002174 }
2175
John Bauman19bac1e2014-05-06 15:23:49 -04002176 UShort4::UShort4(RValue<Float4> cast, bool saturate)
John Bauman89401822014-05-06 15:04:28 -04002177 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002178 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002179 if(saturate)
2180 {
Logan Chiena8385ed2018-09-26 19:22:54 +08002181#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002182 if(CPUID::supportsSSE4_1())
2183 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002184 Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation
Nicolas Capens33438a62017-09-27 11:47:35 -04002185 *this = As<Short4>(PackUnsigned(int4, int4));
John Bauman89401822014-05-06 15:04:28 -04002186 }
2187 else
Logan Chiena8385ed2018-09-26 19:22:54 +08002188#endif
John Bauman89401822014-05-06 15:04:28 -04002189 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002190 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
John Bauman89401822014-05-06 15:04:28 -04002191 }
2192 }
2193 else
2194 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002195 *this = Short4(Int4(cast));
John Bauman89401822014-05-06 15:04:28 -04002196 }
2197 }
2198
John Bauman19bac1e2014-05-06 15:23:49 -04002199 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002200 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002201 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002202#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002203 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2204
2205 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002206#else
2207 return As<UShort4>(V(lowerVectorShl(V(lhs.value), rhs)));
2208#endif
John Bauman89401822014-05-06 15:04:28 -04002209 }
2210
John Bauman19bac1e2014-05-06 15:23:49 -04002211 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002212 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002213 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002214#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002215 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
2216
2217 return x86::psrlw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002218#else
2219 return As<UShort4>(V(lowerVectorLShr(V(lhs.value), rhs)));
2220#endif
John Bauman89401822014-05-06 15:04:28 -04002221 }
2222
John Bauman19bac1e2014-05-06 15:23:49 -04002223 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002224 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002225 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04002226 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002227 }
2228
John Bauman19bac1e2014-05-06 15:23:49 -04002229 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002230 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002231 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04002232 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002233 }
2234
John Bauman19bac1e2014-05-06 15:23:49 -04002235 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002236 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002237 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002238#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002239 return x86::paddusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002240#else
2241 return As<UShort4>(V(lowerPUADDSAT(V(x.value), V(y.value))));
2242#endif
John Bauman89401822014-05-06 15:04:28 -04002243 }
2244
John Bauman19bac1e2014-05-06 15:23:49 -04002245 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002246 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002247 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002248#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002249 return x86::psubusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002250#else
2251 return As<UShort4>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
2252#endif
John Bauman89401822014-05-06 15:04:28 -04002253 }
2254
John Bauman19bac1e2014-05-06 15:23:49 -04002255 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002256 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002257 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002258#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002259 return x86::pmulhuw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002260#else
2261 return As<UShort4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2262#endif
John Bauman89401822014-05-06 15:04:28 -04002263 }
2264
John Bauman19bac1e2014-05-06 15:23:49 -04002265 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002266 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002267 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002268#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002269 return x86::pavgw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002270#else
2271 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
2272#endif
John Bauman89401822014-05-06 15:04:28 -04002273 }
2274
John Bauman19bac1e2014-05-06 15:23:49 -04002275 Type *UShort4::getType()
John Bauman89401822014-05-06 15:04:28 -04002276 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002277 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002278 }
2279
John Bauman19bac1e2014-05-06 15:23:49 -04002280 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002281 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002282 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002283#if defined(__i386__) || defined(__x86_64__)
2284 return x86::psllw(lhs, rhs);
2285#else
2286 return As<Short8>(V(lowerVectorShl(V(lhs.value), rhs)));
2287#endif
John Bauman89401822014-05-06 15:04:28 -04002288 }
2289
John Bauman19bac1e2014-05-06 15:23:49 -04002290 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002291 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002292 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002293#if defined(__i386__) || defined(__x86_64__)
2294 return x86::psraw(lhs, rhs);
2295#else
2296 return As<Short8>(V(lowerVectorAShr(V(lhs.value), rhs)));
2297#endif
John Bauman89401822014-05-06 15:04:28 -04002298 }
2299
John Bauman19bac1e2014-05-06 15:23:49 -04002300 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002301 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002302 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002303#if defined(__i386__) || defined(__x86_64__)
2304 return x86::pmaddwd(x, y);
2305#else
2306 return As<Int4>(V(lowerMulAdd(V(x.value), V(y.value))));
2307#endif
John Bauman89401822014-05-06 15:04:28 -04002308 }
2309
John Bauman19bac1e2014-05-06 15:23:49 -04002310 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002311 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002312 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002313#if defined(__i386__) || defined(__x86_64__)
2314 return x86::pmulhw(x, y);
2315#else
2316 return As<Short8>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2317#endif
John Bauman89401822014-05-06 15:04:28 -04002318 }
2319
John Bauman19bac1e2014-05-06 15:23:49 -04002320 Type *Short8::getType()
John Bauman89401822014-05-06 15:04:28 -04002321 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002322 return T(llvm::VectorType::get(T(Short::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002323 }
2324
John Bauman19bac1e2014-05-06 15:23:49 -04002325 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002326 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002327 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002328#if defined(__i386__) || defined(__x86_64__)
2329 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));
2330#else
2331 return As<UShort8>(V(lowerVectorShl(V(lhs.value), rhs)));
2332#endif
John Bauman89401822014-05-06 15:04:28 -04002333 }
2334
John Bauman19bac1e2014-05-06 15:23:49 -04002335 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002336 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002337 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002338#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002339 return x86::psrlw(lhs, rhs); // FIXME: Fallback required
Logan Chiene3191012018-08-24 22:01:50 +08002340#else
2341 return As<UShort8>(V(lowerVectorLShr(V(lhs.value), rhs)));
2342#endif
John Bauman89401822014-05-06 15:04:28 -04002343 }
2344
John Bauman19bac1e2014-05-06 15:23:49 -04002345 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
John Bauman89401822014-05-06 15:04:28 -04002346 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002347 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capense89cd582016-09-30 14:23:47 -04002348 int pshufb[16] =
2349 {
2350 select0 + 0,
2351 select0 + 1,
2352 select1 + 0,
2353 select1 + 1,
2354 select2 + 0,
2355 select2 + 1,
2356 select3 + 0,
2357 select3 + 1,
2358 select4 + 0,
2359 select4 + 1,
2360 select5 + 0,
2361 select5 + 1,
2362 select6 + 0,
2363 select6 + 1,
2364 select7 + 0,
2365 select7 + 1,
2366 };
John Bauman89401822014-05-06 15:04:28 -04002367
2368 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
Nicolas Capense89cd582016-09-30 14:23:47 -04002369 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
John Bauman89401822014-05-06 15:04:28 -04002370 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
2371
2372 return RValue<UShort8>(short8);
2373 }
2374
John Bauman19bac1e2014-05-06 15:23:49 -04002375 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04002376 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002377 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002378#if defined(__i386__) || defined(__x86_64__)
2379 return x86::pmulhuw(x, y);
2380#else
2381 return As<UShort8>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2382#endif
John Bauman89401822014-05-06 15:04:28 -04002383 }
2384
John Bauman19bac1e2014-05-06 15:23:49 -04002385 Type *UShort8::getType()
John Bauman89401822014-05-06 15:04:28 -04002386 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002387 return T(llvm::VectorType::get(T(UShort::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002388 }
2389
Nicolas Capens96d4e092016-11-18 14:22:38 -05002390 RValue<Int> operator++(Int &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002391 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002392 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002393 RValue<Int> res = val;
2394
Logan Chien191b3052018-08-31 16:57:15 +08002395 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002396 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002397
2398 return res;
2399 }
2400
Nicolas Capens96d4e092016-11-18 14:22:38 -05002401 const Int &operator++(Int &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002402 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002403 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002404 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002405 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002406
2407 return val;
2408 }
2409
Nicolas Capens96d4e092016-11-18 14:22:38 -05002410 RValue<Int> operator--(Int &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002411 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002412 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002413 RValue<Int> res = val;
2414
Logan Chien191b3052018-08-31 16:57:15 +08002415 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002416 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002417
2418 return res;
2419 }
2420
Nicolas Capens96d4e092016-11-18 14:22:38 -05002421 const Int &operator--(Int &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002422 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002423 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002424 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002425 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002426
2427 return val;
2428 }
2429
John Bauman19bac1e2014-05-06 15:23:49 -04002430 RValue<Int> RoundInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002431 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002432 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002433#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002434 return x86::cvtss2si(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002435#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002436 return RValue<Int>(V(lowerRoundInt(V(cast.value), T(Int::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002437#endif
John Bauman89401822014-05-06 15:04:28 -04002438 }
2439
John Bauman19bac1e2014-05-06 15:23:49 -04002440 Type *Int::getType()
John Bauman89401822014-05-06 15:04:28 -04002441 {
Nicolas Capensac230122016-09-20 14:30:06 -04002442 return T(llvm::Type::getInt32Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002443 }
2444
John Bauman19bac1e2014-05-06 15:23:49 -04002445 Type *Long::getType()
John Bauman89401822014-05-06 15:04:28 -04002446 {
Nicolas Capensac230122016-09-20 14:30:06 -04002447 return T(llvm::Type::getInt64Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002448 }
2449
John Bauman19bac1e2014-05-06 15:23:49 -04002450 UInt::UInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002451 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002452 RR_DEBUG_INFO_UPDATE_LOC();
Alexis Hetu764d1422016-09-28 08:44:22 -04002453 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2454 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
John Bauman89401822014-05-06 15:04:28 -04002455
Alexis Hetu764d1422016-09-28 08:44:22 -04002456 // Smallest positive value representable in UInt, but not in Int
2457 const unsigned int ustart = 0x80000000u;
2458 const float ustartf = float(ustart);
2459
2460 // If the value is negative, store 0, otherwise store the result of the conversion
2461 storeValue((~(As<Int>(cast) >> 31) &
2462 // Check if the value can be represented as an Int
2463 IfThenElse(cast >= ustartf,
2464 // If the value is too large, subtract ustart and re-add it after conversion.
2465 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
2466 // Otherwise, just convert normally
2467 Int(cast))).value);
John Bauman89401822014-05-06 15:04:28 -04002468 }
2469
Nicolas Capens96d4e092016-11-18 14:22:38 -05002470 RValue<UInt> operator++(UInt &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002471 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002472 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002473 RValue<UInt> res = val;
2474
Logan Chien191b3052018-08-31 16:57:15 +08002475 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002476 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002477
2478 return res;
2479 }
2480
Nicolas Capens96d4e092016-11-18 14:22:38 -05002481 const UInt &operator++(UInt &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002482 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002483 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002484 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002485 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002486
2487 return val;
2488 }
2489
Nicolas Capens96d4e092016-11-18 14:22:38 -05002490 RValue<UInt> operator--(UInt &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002491 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002492 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002493 RValue<UInt> res = val;
2494
Logan Chien191b3052018-08-31 16:57:15 +08002495 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002496 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002497
2498 return res;
2499 }
2500
Nicolas Capens96d4e092016-11-18 14:22:38 -05002501 const UInt &operator--(UInt &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002502 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002503 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002504 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002505 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002506
2507 return val;
2508 }
2509
John Bauman19bac1e2014-05-06 15:23:49 -04002510// RValue<UInt> RoundUInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002511// {
Logan Chiene3191012018-08-24 22:01:50 +08002512//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002513// return x86::cvtss2si(val); // FIXME: Unsigned
Logan Chiene3191012018-08-24 22:01:50 +08002514//#else
2515// return IfThenElse(cast > 0.0f, Int(cast + 0.5f), Int(cast - 0.5f));
2516//#endif
John Bauman89401822014-05-06 15:04:28 -04002517// }
2518
John Bauman19bac1e2014-05-06 15:23:49 -04002519 Type *UInt::getType()
John Bauman89401822014-05-06 15:04:28 -04002520 {
Nicolas Capensac230122016-09-20 14:30:06 -04002521 return T(llvm::Type::getInt32Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002522 }
2523
John Bauman19bac1e2014-05-06 15:23:49 -04002524// Int2::Int2(RValue<Int> cast)
2525// {
John Bauman19bac1e2014-05-06 15:23:49 -04002526// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2527// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
John Bauman66b8ab22014-05-06 15:57:45 -04002528//
Nicolas Capense89cd582016-09-30 14:23:47 -04002529// int shuffle[2] = {0, 0};
2530// Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
John Bauman19bac1e2014-05-06 15:23:49 -04002531//
John Bauman66b8ab22014-05-06 15:57:45 -04002532// storeValue(replicate);
John Bauman19bac1e2014-05-06 15:23:49 -04002533// }
John Bauman89401822014-05-06 15:04:28 -04002534
John Bauman19bac1e2014-05-06 15:23:49 -04002535 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002536 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002537 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002538#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002539 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
2540
2541 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002542#else
2543 return As<Int2>(V(lowerVectorShl(V(lhs.value), rhs)));
2544#endif
John Bauman89401822014-05-06 15:04:28 -04002545 }
2546
John Bauman19bac1e2014-05-06 15:23:49 -04002547 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002548 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002549 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002550#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002551 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
2552
2553 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002554#else
2555 return As<Int2>(V(lowerVectorAShr(V(lhs.value), rhs)));
2556#endif
John Bauman89401822014-05-06 15:04:28 -04002557 }
2558
John Bauman19bac1e2014-05-06 15:23:49 -04002559 Type *Int2::getType()
John Bauman89401822014-05-06 15:04:28 -04002560 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002561 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002562 }
2563
John Bauman19bac1e2014-05-06 15:23:49 -04002564 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002565 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002566 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002567#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002568 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
2569
2570 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002571#else
2572 return As<UInt2>(V(lowerVectorShl(V(lhs.value), rhs)));
2573#endif
John Bauman89401822014-05-06 15:04:28 -04002574 }
2575
John Bauman19bac1e2014-05-06 15:23:49 -04002576 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002577 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002578 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002579#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002580 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
2581
2582 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002583#else
2584 return As<UInt2>(V(lowerVectorLShr(V(lhs.value), rhs)));
2585#endif
John Bauman89401822014-05-06 15:04:28 -04002586 }
2587
John Bauman19bac1e2014-05-06 15:23:49 -04002588 Type *UInt2::getType()
John Bauman89401822014-05-06 15:04:28 -04002589 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002590 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002591 }
2592
Nicolas Capenscb986762017-01-20 11:34:37 -05002593 Int4::Int4(RValue<Byte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002594 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002595 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002596#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002597 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002598 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002599 *this = x86::pmovzxbd(As<Byte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002600 }
2601 else
Logan Chiene3191012018-08-24 22:01:50 +08002602#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002603 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002604 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
Nicolas Capens01a97962017-07-28 17:30:51 -04002605 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
Logan Chien191b3052018-08-31 16:57:15 +08002606 Value *b = Nucleus::createShuffleVector(a, Nucleus::createNullValue(Byte16::getType()), swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002607
Nicolas Capense89cd582016-09-30 14:23:47 -04002608 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04002609 Value *c = Nucleus::createBitCast(b, Short8::getType());
Logan Chien191b3052018-08-31 16:57:15 +08002610 Value *d = Nucleus::createShuffleVector(c, Nucleus::createNullValue(Short8::getType()), swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002611
Nicolas Capens01a97962017-07-28 17:30:51 -04002612 *this = As<Int4>(d);
2613 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002614 }
2615
Nicolas Capenscb986762017-01-20 11:34:37 -05002616 Int4::Int4(RValue<SByte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002617 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002618 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002619#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002620 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002621 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002622 *this = x86::pmovsxbd(As<SByte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002623 }
2624 else
Logan Chiene3191012018-08-24 22:01:50 +08002625#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002626 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002627 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
2628 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
2629 Value *b = Nucleus::createShuffleVector(a, a, swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002630
Nicolas Capense89cd582016-09-30 14:23:47 -04002631 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04002632 Value *c = Nucleus::createBitCast(b, Short8::getType());
2633 Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002634
Nicolas Capens01a97962017-07-28 17:30:51 -04002635 *this = As<Int4>(d) >> 24;
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002636 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002637 }
2638
Nicolas Capenscb986762017-01-20 11:34:37 -05002639 Int4::Int4(RValue<Short4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002640 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002641 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002642#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002643 if(CPUID::supportsSSE4_1())
2644 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002645 *this = x86::pmovsxwd(As<Short8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002646 }
2647 else
Logan Chiene3191012018-08-24 22:01:50 +08002648#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002649 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002650 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04002651 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
2652 *this = As<Int4>(c) >> 16;
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002653 }
2654 }
2655
Nicolas Capenscb986762017-01-20 11:34:37 -05002656 Int4::Int4(RValue<UShort4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002657 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002658 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002659#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002660 if(CPUID::supportsSSE4_1())
2661 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002662 *this = x86::pmovzxwd(As<UShort8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002663 }
2664 else
Logan Chiene3191012018-08-24 22:01:50 +08002665#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002666 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002667 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04002668 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
2669 *this = As<Int4>(c);
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002670 }
2671 }
2672
Nicolas Capenscb986762017-01-20 11:34:37 -05002673 Int4::Int4(RValue<Int> rhs) : XYZW(this)
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002674 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002675 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002676 Value *vector = loadValue();
2677 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
2678
Nicolas Capense89cd582016-09-30 14:23:47 -04002679 int swizzle[4] = {0, 0, 0, 0};
2680 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002681
2682 storeValue(replicate);
2683 }
2684
John Bauman19bac1e2014-05-06 15:23:49 -04002685 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002686 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002687 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002688#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002689 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002690#else
2691 return As<Int4>(V(lowerVectorShl(V(lhs.value), rhs)));
2692#endif
John Bauman89401822014-05-06 15:04:28 -04002693 }
2694
John Bauman19bac1e2014-05-06 15:23:49 -04002695 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002696 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002697 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002698#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002699 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002700#else
2701 return As<Int4>(V(lowerVectorAShr(V(lhs.value), rhs)));
2702#endif
John Bauman89401822014-05-06 15:04:28 -04002703 }
2704
John Bauman19bac1e2014-05-06 15:23:49 -04002705 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
2706 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002707 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002708 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
Alexis Hetufb603992016-04-26 11:50:40 -04002709 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2710 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
2711 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002712 }
2713
2714 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
2715 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002716 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002717 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2718 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2719 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
2720 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002721 }
2722
2723 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
2724 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002725 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002726 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2727 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2728 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
2729 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002730 }
2731
2732 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
2733 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002734 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002735 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2736 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2737 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
2738 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002739 }
2740
2741 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
2742 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002743 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002744 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2745 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2746 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
2747 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002748 }
2749
2750 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
2751 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002752 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002753 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2754 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2755 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
2756 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002757 }
2758
2759 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
2760 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002761 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002762#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002763 if(CPUID::supportsSSE4_1())
2764 {
2765 return x86::pmaxsd(x, y);
2766 }
2767 else
Logan Chiene3191012018-08-24 22:01:50 +08002768#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002769 {
2770 RValue<Int4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002771 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04002772 }
2773 }
2774
2775 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
2776 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002777 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002778#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002779 if(CPUID::supportsSSE4_1())
2780 {
2781 return x86::pminsd(x, y);
2782 }
2783 else
Logan Chiene3191012018-08-24 22:01:50 +08002784#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002785 {
2786 RValue<Int4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002787 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04002788 }
2789 }
2790
2791 RValue<Int4> RoundInt(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04002792 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002793 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002794#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002795 return x86::cvtps2dq(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002796#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002797 return As<Int4>(V(lowerRoundInt(V(cast.value), T(Int4::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002798#endif
John Bauman89401822014-05-06 15:04:28 -04002799 }
2800
Chris Forbese86b6dc2019-03-01 09:08:47 -08002801 RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2802 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002803 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbese86b6dc2019-03-01 09:08:47 -08002804 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2805 return As<Int4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2806 }
2807
2808 RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2809 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002810 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbese86b6dc2019-03-01 09:08:47 -08002811 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2812 return As<UInt4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2813 }
2814
Nicolas Capens33438a62017-09-27 11:47:35 -04002815 RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04002816 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002817 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002818#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002819 return x86::packssdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002820#else
2821 return As<Short8>(V(lowerPack(V(x.value), V(y.value), true)));
2822#endif
John Bauman89401822014-05-06 15:04:28 -04002823 }
2824
Nicolas Capens33438a62017-09-27 11:47:35 -04002825 RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
2826 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002827 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002828#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04002829 return x86::packusdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002830#else
2831 return As<UShort8>(V(lowerPack(V(x.value), V(y.value), false)));
2832#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04002833 }
2834
John Bauman19bac1e2014-05-06 15:23:49 -04002835 RValue<Int> SignMask(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04002836 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002837 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002838#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002839 return x86::movmskps(As<Float4>(x));
Logan Chiene3191012018-08-24 22:01:50 +08002840#else
2841 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2842#endif
John Bauman89401822014-05-06 15:04:28 -04002843 }
2844
John Bauman19bac1e2014-05-06 15:23:49 -04002845 Type *Int4::getType()
John Bauman89401822014-05-06 15:04:28 -04002846 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002847 return T(llvm::VectorType::get(T(Int::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04002848 }
2849
Nicolas Capenscb986762017-01-20 11:34:37 -05002850 UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04002851 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002852 RR_DEBUG_INFO_UPDATE_LOC();
Alexis Hetu764d1422016-09-28 08:44:22 -04002853 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2854 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
John Bauman89401822014-05-06 15:04:28 -04002855
Alexis Hetu764d1422016-09-28 08:44:22 -04002856 // Smallest positive value representable in UInt, but not in Int
2857 const unsigned int ustart = 0x80000000u;
2858 const float ustartf = float(ustart);
2859
2860 // Check if the value can be represented as an Int
2861 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
2862 // If the value is too large, subtract ustart and re-add it after conversion.
2863 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
2864 // Otherwise, just convert normally
2865 (~uiValue & Int4(cast));
2866 // If the value is negative, store 0, otherwise store the result of the conversion
2867 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
John Bauman89401822014-05-06 15:04:28 -04002868 }
2869
John Bauman19bac1e2014-05-06 15:23:49 -04002870 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002871 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002872 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002873#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002874 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002875#else
2876 return As<UInt4>(V(lowerVectorShl(V(lhs.value), rhs)));
2877#endif
John Bauman89401822014-05-06 15:04:28 -04002878 }
2879
John Bauman19bac1e2014-05-06 15:23:49 -04002880 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002881 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002882 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002883#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002884 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002885#else
2886 return As<UInt4>(V(lowerVectorLShr(V(lhs.value), rhs)));
2887#endif
John Bauman89401822014-05-06 15:04:28 -04002888 }
2889
John Bauman19bac1e2014-05-06 15:23:49 -04002890 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
2891 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002892 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002893 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
Alexis Hetufb603992016-04-26 11:50:40 -04002894 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2895 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
2896 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002897 }
2898
2899 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
2900 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002901 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04002902 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
2903 }
2904
2905 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
2906 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002907 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002908 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2909 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2910 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
2911 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002912 }
2913
2914 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
2915 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002916 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04002917 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
2918 }
2919
2920 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
2921 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002922 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002923 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2924 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2925 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
2926 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002927 }
2928
2929 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
2930 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002931 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04002932 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
2933 }
2934
2935 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
2936 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002937 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002938#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002939 if(CPUID::supportsSSE4_1())
2940 {
2941 return x86::pmaxud(x, y);
2942 }
2943 else
Logan Chiene3191012018-08-24 22:01:50 +08002944#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002945 {
2946 RValue<UInt4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002947 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04002948 }
2949 }
2950
2951 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
2952 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002953 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002954#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002955 if(CPUID::supportsSSE4_1())
2956 {
2957 return x86::pminud(x, y);
2958 }
2959 else
Logan Chiene3191012018-08-24 22:01:50 +08002960#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002961 {
2962 RValue<UInt4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002963 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04002964 }
2965 }
2966
John Bauman19bac1e2014-05-06 15:23:49 -04002967 Type *UInt4::getType()
John Bauman89401822014-05-06 15:04:28 -04002968 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002969 return T(llvm::VectorType::get(T(UInt::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04002970 }
2971
Alexis Hetu734e2572018-12-20 14:00:49 -05002972 Type *Half::getType()
2973 {
2974 return T(llvm::Type::getInt16Ty(*::context));
2975 }
2976
Nicolas Capens05b3d662016-02-25 23:58:33 -05002977 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04002978 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002979 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002980#if defined(__i386__) || defined(__x86_64__)
2981 if(exactAtPow2)
2982 {
2983 // rcpss uses a piecewise-linear approximation which minimizes the relative error
2984 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
2985 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
2986 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04002987 return x86::rcpss(x);
Logan Chiene3191012018-08-24 22:01:50 +08002988#else
2989 return As<Float>(V(lowerRCP(V(x.value))));
2990#endif
John Bauman89401822014-05-06 15:04:28 -04002991 }
John Bauman66b8ab22014-05-06 15:57:45 -04002992
John Bauman19bac1e2014-05-06 15:23:49 -04002993 RValue<Float> RcpSqrt_pp(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04002994 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002995 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002996#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002997 return x86::rsqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08002998#else
2999 return As<Float>(V(lowerRSQRT(V(x.value))));
3000#endif
John Bauman89401822014-05-06 15:04:28 -04003001 }
3002
John Bauman19bac1e2014-05-06 15:23:49 -04003003 RValue<Float> Sqrt(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003004 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003005 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003006#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003007 return x86::sqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003008#else
3009 return As<Float>(V(lowerSQRT(V(x.value))));
3010#endif
John Bauman89401822014-05-06 15:04:28 -04003011 }
3012
John Bauman19bac1e2014-05-06 15:23:49 -04003013 RValue<Float> Round(RValue<Float> x)
3014 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003015 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003016#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003017 if(CPUID::supportsSSE4_1())
3018 {
3019 return x86::roundss(x, 0);
3020 }
3021 else
3022 {
3023 return Float4(Round(Float4(x))).x;
3024 }
Logan Chien83fc07a2018-09-26 22:14:00 +08003025#else
3026 return RValue<Float>(V(lowerRound(V(x.value))));
3027#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003028 }
3029
3030 RValue<Float> Trunc(RValue<Float> x)
3031 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003032 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003033#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003034 if(CPUID::supportsSSE4_1())
3035 {
3036 return x86::roundss(x, 3);
3037 }
3038 else
3039 {
3040 return Float(Int(x)); // Rounded toward zero
3041 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08003042#else
3043 return RValue<Float>(V(lowerTrunc(V(x.value))));
3044#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003045 }
3046
3047 RValue<Float> Frac(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003048 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003049 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003050#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003051 if(CPUID::supportsSSE4_1())
3052 {
3053 return x - x86::floorss(x);
3054 }
3055 else
3056 {
John Bauman19bac1e2014-05-06 15:23:49 -04003057 return Float4(Frac(Float4(x))).x;
John Bauman89401822014-05-06 15:04:28 -04003058 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08003059#else
3060 // x - floor(x) can be 1.0 for very small negative x.
3061 // Clamp against the value just below 1.0.
3062 return Min(x - Floor(x), As<Float>(Int(0x3F7FFFFF)));
3063#endif
John Bauman89401822014-05-06 15:04:28 -04003064 }
3065
John Bauman19bac1e2014-05-06 15:23:49 -04003066 RValue<Float> Floor(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003067 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003068 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003069#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003070 if(CPUID::supportsSSE4_1())
3071 {
3072 return x86::floorss(x);
3073 }
3074 else
3075 {
3076 return Float4(Floor(Float4(x))).x;
3077 }
Logan Chien40a60052018-09-26 19:03:53 +08003078#else
3079 return RValue<Float>(V(lowerFloor(V(x.value))));
3080#endif
John Bauman89401822014-05-06 15:04:28 -04003081 }
3082
John Bauman19bac1e2014-05-06 15:23:49 -04003083 RValue<Float> Ceil(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003084 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003085 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003086#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003087 if(CPUID::supportsSSE4_1())
3088 {
3089 return x86::ceilss(x);
3090 }
3091 else
Logan Chiene3191012018-08-24 22:01:50 +08003092#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003093 {
3094 return Float4(Ceil(Float4(x))).x;
3095 }
John Bauman89401822014-05-06 15:04:28 -04003096 }
3097
John Bauman19bac1e2014-05-06 15:23:49 -04003098 Type *Float::getType()
John Bauman89401822014-05-06 15:04:28 -04003099 {
Nicolas Capensac230122016-09-20 14:30:06 -04003100 return T(llvm::Type::getFloatTy(*::context));
John Bauman89401822014-05-06 15:04:28 -04003101 }
3102
John Bauman19bac1e2014-05-06 15:23:49 -04003103 Type *Float2::getType()
John Bauman89401822014-05-06 15:04:28 -04003104 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003105 return T(Type_v2f32);
John Bauman89401822014-05-06 15:04:28 -04003106 }
3107
Nicolas Capenscb986762017-01-20 11:34:37 -05003108 Float4::Float4(RValue<Float> rhs) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04003109 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003110 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04003111 Value *vector = loadValue();
John Bauman89401822014-05-06 15:04:28 -04003112 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
3113
Nicolas Capense89cd582016-09-30 14:23:47 -04003114 int swizzle[4] = {0, 0, 0, 0};
3115 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
John Bauman89401822014-05-06 15:04:28 -04003116
John Bauman66b8ab22014-05-06 15:57:45 -04003117 storeValue(replicate);
John Bauman89401822014-05-06 15:04:28 -04003118 }
3119
John Bauman19bac1e2014-05-06 15:23:49 -04003120 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003121 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003122 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003123#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003124 return x86::maxps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003125#else
3126 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OGT)));
3127#endif
John Bauman89401822014-05-06 15:04:28 -04003128 }
3129
John Bauman19bac1e2014-05-06 15:23:49 -04003130 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003131 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003132 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003133#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003134 return x86::minps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003135#else
3136 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OLT)));
3137#endif
John Bauman89401822014-05-06 15:04:28 -04003138 }
3139
Nicolas Capens05b3d662016-02-25 23:58:33 -05003140 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04003141 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003142 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003143#if defined(__i386__) || defined(__x86_64__)
3144 if(exactAtPow2)
3145 {
3146 // rcpps uses a piecewise-linear approximation which minimizes the relative error
3147 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
3148 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
3149 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003150 return x86::rcpps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003151#else
3152 return As<Float4>(V(lowerRCP(V(x.value))));
3153#endif
John Bauman89401822014-05-06 15:04:28 -04003154 }
John Bauman66b8ab22014-05-06 15:57:45 -04003155
John Bauman19bac1e2014-05-06 15:23:49 -04003156 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003157 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003158 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003159#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003160 return x86::rsqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003161#else
3162 return As<Float4>(V(lowerRSQRT(V(x.value))));
3163#endif
John Bauman89401822014-05-06 15:04:28 -04003164 }
3165
John Bauman19bac1e2014-05-06 15:23:49 -04003166 RValue<Float4> Sqrt(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003167 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003168 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003169#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003170 return x86::sqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003171#else
3172 return As<Float4>(V(lowerSQRT(V(x.value))));
3173#endif
John Bauman89401822014-05-06 15:04:28 -04003174 }
3175
John Bauman19bac1e2014-05-06 15:23:49 -04003176 RValue<Int> SignMask(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003177 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003178 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003179#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003180 return x86::movmskps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003181#else
3182 return As<Int>(V(lowerFPSignMask(V(x.value), T(Int::getType()))));
3183#endif
John Bauman89401822014-05-06 15:04:28 -04003184 }
3185
John Bauman19bac1e2014-05-06 15:23:49 -04003186 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003187 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003188 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003189 // return As<Int4>(x86::cmpeqps(x, y));
3190 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
3191 }
3192
John Bauman19bac1e2014-05-06 15:23:49 -04003193 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003194 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003195 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003196 // return As<Int4>(x86::cmpltps(x, y));
3197 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
3198 }
3199
John Bauman19bac1e2014-05-06 15:23:49 -04003200 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003201 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003202 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003203 // return As<Int4>(x86::cmpleps(x, y));
3204 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
3205 }
3206
John Bauman19bac1e2014-05-06 15:23:49 -04003207 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003208 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003209 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003210 // return As<Int4>(x86::cmpneqps(x, y));
3211 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
3212 }
3213
John Bauman19bac1e2014-05-06 15:23:49 -04003214 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003215 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003216 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003217 // return As<Int4>(x86::cmpnltps(x, y));
3218 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
3219 }
3220
John Bauman19bac1e2014-05-06 15:23:49 -04003221 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003222 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003223 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003224 // return As<Int4>(x86::cmpnleps(x, y));
3225 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
3226 }
3227
Ben Claytonec1aeb82019-03-04 19:33:27 +00003228 RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3229 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003230 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003231 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUEQ(x.value, y.value), Int4::getType()));
3232 }
3233
3234 RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3235 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003236 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003237 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULT(x.value, y.value), Int4::getType()));
3238 }
3239
3240 RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3241 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003242 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003243 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULE(x.value, y.value), Int4::getType()));
3244 }
3245
3246 RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3247 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003248 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003249 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUNE(x.value, y.value), Int4::getType()));
3250 }
3251
3252 RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3253 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003254 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003255 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGE(x.value, y.value), Int4::getType()));
3256 }
3257
3258 RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3259 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003260 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003261 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGT(x.value, y.value), Int4::getType()));
3262 }
3263
John Bauman19bac1e2014-05-06 15:23:49 -04003264 RValue<Float4> Round(RValue<Float4> x)
3265 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003266 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003267#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003268 if(CPUID::supportsSSE4_1())
3269 {
3270 return x86::roundps(x, 0);
3271 }
3272 else
3273 {
3274 return Float4(RoundInt(x));
3275 }
Logan Chien83fc07a2018-09-26 22:14:00 +08003276#else
3277 return RValue<Float4>(V(lowerRound(V(x.value))));
3278#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003279 }
3280
3281 RValue<Float4> Trunc(RValue<Float4> x)
3282 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003283 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003284#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003285 if(CPUID::supportsSSE4_1())
3286 {
3287 return x86::roundps(x, 3);
3288 }
3289 else
3290 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003291 return Float4(Int4(x));
John Bauman19bac1e2014-05-06 15:23:49 -04003292 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08003293#else
3294 return RValue<Float4>(V(lowerTrunc(V(x.value))));
3295#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003296 }
3297
3298 RValue<Float4> Frac(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003299 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003300 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb9230422017-07-17 10:27:33 -04003301 Float4 frc;
3302
Logan Chien40a60052018-09-26 19:03:53 +08003303#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003304 if(CPUID::supportsSSE4_1())
3305 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003306 frc = x - Floor(x);
John Bauman89401822014-05-06 15:04:28 -04003307 }
3308 else
3309 {
Nicolas Capensb9230422017-07-17 10:27:33 -04003310 frc = x - Float4(Int4(x)); // Signed fractional part.
John Bauman89401822014-05-06 15:04:28 -04003311
Nicolas Capensb9230422017-07-17 10:27:33 -04003312 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f))); // Add 1.0 if negative.
John Bauman89401822014-05-06 15:04:28 -04003313 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08003314#else
3315 frc = x - Floor(x);
3316#endif
Nicolas Capensb9230422017-07-17 10:27:33 -04003317
3318 // x - floor(x) can be 1.0 for very small negative x.
3319 // Clamp against the value just below 1.0.
3320 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
John Bauman89401822014-05-06 15:04:28 -04003321 }
3322
John Bauman19bac1e2014-05-06 15:23:49 -04003323 RValue<Float4> Floor(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003324 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003325 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003326#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003327 if(CPUID::supportsSSE4_1())
3328 {
3329 return x86::floorps(x);
3330 }
3331 else
3332 {
John Bauman19bac1e2014-05-06 15:23:49 -04003333 return x - Frac(x);
John Bauman89401822014-05-06 15:04:28 -04003334 }
Logan Chien40a60052018-09-26 19:03:53 +08003335#else
3336 return RValue<Float4>(V(lowerFloor(V(x.value))));
3337#endif
John Bauman89401822014-05-06 15:04:28 -04003338 }
3339
John Bauman19bac1e2014-05-06 15:23:49 -04003340 RValue<Float4> Ceil(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003341 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003342 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003343#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003344 if(CPUID::supportsSSE4_1())
3345 {
3346 return x86::ceilps(x);
3347 }
3348 else
Logan Chiene3191012018-08-24 22:01:50 +08003349#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003350 {
3351 return -Floor(-x);
3352 }
John Bauman89401822014-05-06 15:04:28 -04003353 }
3354
Ben Claytona2c8b772019-04-09 13:42:36 -04003355 RValue<Float4> Sin(RValue<Float4> v)
3356 {
3357 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sin, { V(v.value)->getType() } );
3358 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
3359 }
3360
Ben Clayton1b6f8c72019-04-09 13:47:43 -04003361 RValue<Float4> Cos(RValue<Float4> v)
3362 {
3363 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::cos, { V(v.value)->getType() } );
3364 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
3365 }
3366
Ben Clayton14740062019-04-09 13:48:41 -04003367 RValue<Float4> Tan(RValue<Float4> v)
3368 {
3369 return Sin(v) / Cos(v);
3370 }
3371
Ben Claytoneafae472019-04-09 14:22:38 -04003372 static RValue<Float4> TransformFloat4PerElement(RValue<Float4> v, const char* name)
Ben Claytonf9350d72019-04-09 14:19:02 -04003373 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003374 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), ::llvm::ArrayRef<llvm::Type*>(T(Float::getType())), false);
Ben Claytoneafae472019-04-09 14:22:38 -04003375 auto func = ::module->getOrInsertFunction(name, funcTy);
Ben Claytonf9350d72019-04-09 14:19:02 -04003376 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3377 for (uint64_t i = 0; i < 4; i++)
3378 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003379 auto el = ::builder->CreateCall(func, V(Nucleus::createExtractElement(v.value, Float::getType(), i)));
3380 out = V(Nucleus::createInsertElement(V(out), V(el), i));
Ben Claytonf9350d72019-04-09 14:19:02 -04003381 }
3382 return RValue<Float4>(V(out));
3383 }
3384
Ben Claytoneafae472019-04-09 14:22:38 -04003385 RValue<Float4> Asin(RValue<Float4> v)
3386 {
3387 return TransformFloat4PerElement(v, "asinf");
3388 }
3389
3390 RValue<Float4> Acos(RValue<Float4> v)
3391 {
3392 return TransformFloat4PerElement(v, "acosf");
3393 }
3394
Ben Clayton749b4e02019-04-09 14:27:43 -04003395 RValue<Float4> Atan(RValue<Float4> v)
3396 {
3397 return TransformFloat4PerElement(v, "atanf");
3398 }
3399
Ben Claytond9636972019-04-09 15:09:54 -04003400 RValue<Float4> Sinh(RValue<Float4> v)
3401 {
3402 return TransformFloat4PerElement(v, "sinhf");
3403 }
3404
Ben Clayton900ea2c2019-04-09 15:25:36 -04003405 RValue<Float4> Cosh(RValue<Float4> v)
3406 {
3407 return TransformFloat4PerElement(v, "coshf");
3408 }
3409
Ben Clayton3928bd92019-04-09 15:27:41 -04003410 RValue<Float4> Tanh(RValue<Float4> v)
3411 {
3412 return TransformFloat4PerElement(v, "tanhf");
3413 }
3414
Ben Claytonf6d77ab2019-04-09 15:30:04 -04003415 RValue<Float4> Asinh(RValue<Float4> v)
3416 {
3417 return TransformFloat4PerElement(v, "asinhf");
3418 }
3419
Ben Clayton28ebcb02019-04-09 15:33:38 -04003420 RValue<Float4> Acosh(RValue<Float4> v)
3421 {
3422 return TransformFloat4PerElement(v, "acoshf");
3423 }
3424
Ben Claytonfa6a5392019-04-09 15:35:24 -04003425 RValue<Float4> Atanh(RValue<Float4> v)
3426 {
3427 return TransformFloat4PerElement(v, "atanhf");
3428 }
3429
Ben Claytona520c3e2019-04-09 15:43:45 -04003430 RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
3431 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003432 ::llvm::SmallVector<::llvm::Type*, 2> paramTys;
3433 paramTys.push_back(T(Float::getType()));
3434 paramTys.push_back(T(Float::getType()));
3435 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), paramTys, false);
Ben Claytona520c3e2019-04-09 15:43:45 -04003436 auto func = ::module->getOrInsertFunction("atan2f", funcTy);
3437 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3438 for (uint64_t i = 0; i < 4; i++)
3439 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003440 auto el = ::builder->CreateCall2(func, ARGS(
3441 V(Nucleus::createExtractElement(x.value, Float::getType(), i)),
3442 V(Nucleus::createExtractElement(y.value, Float::getType(), i))
3443 ));
3444 out = V(Nucleus::createInsertElement(V(out), V(el), i));
Ben Claytona520c3e2019-04-09 15:43:45 -04003445 }
3446 return RValue<Float4>(V(out));
3447 }
3448
Ben Claytonbfe94f02019-04-09 15:52:12 -04003449 RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
3450 {
Ben Clayton7579db12019-05-02 08:37:12 +01003451 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::pow, { T(Float4::getType()) });
Ben Claytonc38fc122019-04-11 08:58:49 -04003452 return RValue<Float4>(V(::builder->CreateCall2(func, ARGS(V(x.value), V(y.value)))));
Ben Claytonbfe94f02019-04-09 15:52:12 -04003453 }
3454
Ben Clayton242f0022019-04-09 16:00:53 -04003455 RValue<Float4> Exp(RValue<Float4> v)
3456 {
3457 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::exp, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003458 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Clayton242f0022019-04-09 16:00:53 -04003459 }
3460
Ben Clayton2c1da722019-04-09 16:03:03 -04003461 RValue<Float4> Log(RValue<Float4> v)
3462 {
3463 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::log, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003464 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Clayton2c1da722019-04-09 16:03:03 -04003465 }
3466
Ben Claytonf40b56c2019-04-09 16:06:55 -04003467 RValue<Float4> Exp2(RValue<Float4> v)
3468 {
3469 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::exp2, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003470 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Claytonf40b56c2019-04-09 16:06:55 -04003471 }
3472
Ben Claytone17acfe2019-04-09 16:09:13 -04003473 RValue<Float4> Log2(RValue<Float4> v)
3474 {
3475 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::log2, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003476 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Claytone17acfe2019-04-09 16:09:13 -04003477 }
3478
Ben Clayton60958262019-04-10 14:53:30 -04003479 RValue<UInt4> Ctlz(RValue<UInt4> v, bool isZeroUndef)
3480 {
Ben Clayton7579db12019-05-02 08:37:12 +01003481 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::ctlz, { T(UInt4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003482 return RValue<UInt4>(V(::builder->CreateCall2(func, ARGS(
Ben Clayton60958262019-04-10 14:53:30 -04003483 V(v.value),
3484 isZeroUndef ? ::llvm::ConstantInt::getTrue(*::context) : ::llvm::ConstantInt::getFalse(*::context)
Ben Claytonc38fc122019-04-11 08:58:49 -04003485 ))));
Ben Clayton60958262019-04-10 14:53:30 -04003486 }
3487
Ben Clayton3f007c42019-04-10 14:54:23 -04003488 RValue<UInt4> Cttz(RValue<UInt4> v, bool isZeroUndef)
3489 {
Ben Clayton7579db12019-05-02 08:37:12 +01003490 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::cttz, { T(UInt4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003491 return RValue<UInt4>(V(::builder->CreateCall2(func, ARGS(
Ben Clayton3f007c42019-04-10 14:54:23 -04003492 V(v.value),
3493 isZeroUndef ? ::llvm::ConstantInt::getTrue(*::context) : ::llvm::ConstantInt::getFalse(*::context)
Ben Claytonc38fc122019-04-11 08:58:49 -04003494 ))));
Ben Clayton3f007c42019-04-10 14:54:23 -04003495 }
3496
John Bauman19bac1e2014-05-06 15:23:49 -04003497 Type *Float4::getType()
John Bauman89401822014-05-06 15:04:28 -04003498 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003499 return T(llvm::VectorType::get(T(Float::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003500 }
3501
John Bauman89401822014-05-06 15:04:28 -04003502 RValue<Long> Ticks()
3503 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003504 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003505 llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter);
John Bauman89401822014-05-06 15:04:28 -04003506
Nicolas Capens2ab69ee2016-09-26 11:45:17 -04003507 return RValue<Long>(V(::builder->CreateCall(rdtsc)));
John Bauman89401822014-05-06 15:04:28 -04003508 }
Ben Claytond853c122019-04-16 17:51:49 -04003509
3510 RValue<Pointer<Byte>> ConstantPointer(void const * ptr)
3511 {
3512 // Note: this should work for 32-bit pointers as well because 'inttoptr'
3513 // is defined to truncate (and zero extend) if necessary.
3514 auto ptrAsInt = ::llvm::ConstantInt::get(::llvm::Type::getInt64Ty(*::context), reinterpret_cast<uintptr_t>(ptr));
3515 return RValue<Pointer<Byte>>(V(::builder->CreateIntToPtr(ptrAsInt, T(Pointer<Byte>::getType()))));
3516 }
3517
3518 Value* Call(RValue<Pointer<Byte>> fptr, Type* retTy, std::initializer_list<Value*> args, std::initializer_list<Type*> argTys)
3519 {
3520 ::llvm::SmallVector<::llvm::Type*, 8> paramTys;
3521 for (auto ty : argTys) { paramTys.push_back(T(ty)); }
3522 auto funcTy = ::llvm::FunctionType::get(T(retTy), paramTys, false);
3523
3524 auto funcPtrTy = funcTy->getPointerTo();
3525 auto funcPtr = ::builder->CreatePointerCast(V(fptr.value), funcPtrTy);
3526
3527 ::llvm::SmallVector<::llvm::Value*, 8> arguments;
3528 for (auto arg : args) { arguments.push_back(V(arg)); }
3529 return V(::builder->CreateCall(funcPtr, arguments));
3530 }
John Bauman89401822014-05-06 15:04:28 -04003531}
3532
Nicolas Capens48461502018-08-06 14:20:45 -04003533namespace rr
John Bauman89401822014-05-06 15:04:28 -04003534{
Logan Chiene3191012018-08-24 22:01:50 +08003535#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003536 namespace x86
3537 {
John Bauman19bac1e2014-05-06 15:23:49 -04003538 RValue<Int> cvtss2si(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003539 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003540 llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si);
John Bauman66b8ab22014-05-06 15:57:45 -04003541
John Bauman89401822014-05-06 15:04:28 -04003542 Float4 vector;
3543 vector.x = val;
3544
Logan Chien813d5032018-08-31 17:19:45 +08003545 return RValue<Int>(V(::builder->CreateCall(cvtss2si, ARGS(V(RValue<Float4>(vector).value)))));
John Bauman89401822014-05-06 15:04:28 -04003546 }
3547
John Bauman19bac1e2014-05-06 15:23:49 -04003548 RValue<Int4> cvtps2dq(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003549 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003550 llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
John Bauman89401822014-05-06 15:04:28 -04003551
Logan Chien813d5032018-08-31 17:19:45 +08003552 return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003553 }
3554
John Bauman19bac1e2014-05-06 15:23:49 -04003555 RValue<Float> rcpss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003556 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003557 llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss);
John Bauman89401822014-05-06 15:04:28 -04003558
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003559 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman66b8ab22014-05-06 15:57:45 -04003560
Logan Chien813d5032018-08-31 17:19:45 +08003561 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003562 }
3563
John Bauman19bac1e2014-05-06 15:23:49 -04003564 RValue<Float> sqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003565 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003566 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
3567 return RValue<Float>(V(::builder->CreateCall(sqrt, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003568 }
3569
John Bauman19bac1e2014-05-06 15:23:49 -04003570 RValue<Float> rsqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003571 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003572 llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss);
John Bauman66b8ab22014-05-06 15:57:45 -04003573
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003574 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman89401822014-05-06 15:04:28 -04003575
Logan Chien813d5032018-08-31 17:19:45 +08003576 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003577 }
3578
John Bauman19bac1e2014-05-06 15:23:49 -04003579 RValue<Float4> rcpps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003580 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003581 llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003582
Logan Chien813d5032018-08-31 17:19:45 +08003583 return RValue<Float4>(V(::builder->CreateCall(rcpps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003584 }
3585
John Bauman19bac1e2014-05-06 15:23:49 -04003586 RValue<Float4> sqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003587 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003588 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
John Bauman66b8ab22014-05-06 15:57:45 -04003589
Logan Chien813d5032018-08-31 17:19:45 +08003590 return RValue<Float4>(V(::builder->CreateCall(sqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003591 }
3592
John Bauman19bac1e2014-05-06 15:23:49 -04003593 RValue<Float4> rsqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003594 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003595 llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003596
Logan Chien813d5032018-08-31 17:19:45 +08003597 return RValue<Float4>(V(::builder->CreateCall(rsqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003598 }
3599
John Bauman19bac1e2014-05-06 15:23:49 -04003600 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003601 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003602 llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps);
John Bauman89401822014-05-06 15:04:28 -04003603
Logan Chien813d5032018-08-31 17:19:45 +08003604 return RValue<Float4>(V(::builder->CreateCall2(maxps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003605 }
3606
John Bauman19bac1e2014-05-06 15:23:49 -04003607 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003608 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003609 llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps);
John Bauman89401822014-05-06 15:04:28 -04003610
Logan Chien813d5032018-08-31 17:19:45 +08003611 return RValue<Float4>(V(::builder->CreateCall2(minps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003612 }
3613
John Bauman19bac1e2014-05-06 15:23:49 -04003614 RValue<Float> roundss(RValue<Float> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04003615 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003616 llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss);
John Bauman89401822014-05-06 15:04:28 -04003617
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003618 Value *undef = V(llvm::UndefValue::get(T(Float4::getType())));
John Bauman89401822014-05-06 15:04:28 -04003619 Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
3620
Logan Chien813d5032018-08-31 17:19:45 +08003621 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, ARGS(V(undef), V(vector), V(Nucleus::createConstantInt(imm))))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003622 }
3623
John Bauman19bac1e2014-05-06 15:23:49 -04003624 RValue<Float> floorss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003625 {
3626 return roundss(val, 1);
3627 }
3628
John Bauman19bac1e2014-05-06 15:23:49 -04003629 RValue<Float> ceilss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003630 {
3631 return roundss(val, 2);
3632 }
3633
John Bauman19bac1e2014-05-06 15:23:49 -04003634 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04003635 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003636 llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps);
John Bauman89401822014-05-06 15:04:28 -04003637
Logan Chien813d5032018-08-31 17:19:45 +08003638 return RValue<Float4>(V(::builder->CreateCall2(roundps, ARGS(V(val.value), V(Nucleus::createConstantInt(imm))))));
John Bauman89401822014-05-06 15:04:28 -04003639 }
3640
John Bauman19bac1e2014-05-06 15:23:49 -04003641 RValue<Float4> floorps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003642 {
3643 return roundps(val, 1);
3644 }
3645
John Bauman19bac1e2014-05-06 15:23:49 -04003646 RValue<Float4> ceilps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003647 {
3648 return roundps(val, 2);
3649 }
3650
Alexis Hetu0f448072016-03-18 10:56:08 -04003651 RValue<Int4> pabsd(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04003652 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003653 return RValue<Int4>(V(lowerPABS(V(x.value))));
John Bauman89401822014-05-06 15:04:28 -04003654 }
3655
John Bauman19bac1e2014-05-06 15:23:49 -04003656 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003657 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003658 llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w);
John Bauman89401822014-05-06 15:04:28 -04003659
Logan Chien813d5032018-08-31 17:19:45 +08003660 return As<Short4>(V(::builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003661 }
John Bauman66b8ab22014-05-06 15:57:45 -04003662
John Bauman19bac1e2014-05-06 15:23:49 -04003663 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003664 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003665 llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w);
John Bauman89401822014-05-06 15:04:28 -04003666
Logan Chien813d5032018-08-31 17:19:45 +08003667 return As<Short4>(V(::builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003668 }
3669
John Bauman19bac1e2014-05-06 15:23:49 -04003670 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003671 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003672 llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w);
John Bauman89401822014-05-06 15:04:28 -04003673
Logan Chien813d5032018-08-31 17:19:45 +08003674 return As<UShort4>(V(::builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003675 }
John Bauman66b8ab22014-05-06 15:57:45 -04003676
John Bauman19bac1e2014-05-06 15:23:49 -04003677 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003678 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003679 llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w);
John Bauman89401822014-05-06 15:04:28 -04003680
Logan Chien813d5032018-08-31 17:19:45 +08003681 return As<UShort4>(V(::builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003682 }
3683
John Bauman19bac1e2014-05-06 15:23:49 -04003684 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003685 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003686 llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b);
John Bauman89401822014-05-06 15:04:28 -04003687
Logan Chien813d5032018-08-31 17:19:45 +08003688 return As<SByte8>(V(::builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003689 }
John Bauman66b8ab22014-05-06 15:57:45 -04003690
John Bauman19bac1e2014-05-06 15:23:49 -04003691 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003692 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003693 llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b);
John Bauman89401822014-05-06 15:04:28 -04003694
Logan Chien813d5032018-08-31 17:19:45 +08003695 return As<SByte8>(V(::builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003696 }
John Bauman66b8ab22014-05-06 15:57:45 -04003697
John Bauman19bac1e2014-05-06 15:23:49 -04003698 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003699 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003700 llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b);
John Bauman89401822014-05-06 15:04:28 -04003701
Logan Chien813d5032018-08-31 17:19:45 +08003702 return As<Byte8>(V(::builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003703 }
John Bauman66b8ab22014-05-06 15:57:45 -04003704
John Bauman19bac1e2014-05-06 15:23:49 -04003705 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003706 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003707 llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b);
John Bauman89401822014-05-06 15:04:28 -04003708
Logan Chien813d5032018-08-31 17:19:45 +08003709 return As<Byte8>(V(::builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value)))));
John Bauman19bac1e2014-05-06 15:23:49 -04003710 }
3711
3712 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003713 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003714 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
John Bauman89401822014-05-06 15:04:28 -04003715 }
3716
John Bauman19bac1e2014-05-06 15:23:49 -04003717 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003718 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003719 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
John Bauman89401822014-05-06 15:04:28 -04003720 }
3721
John Bauman19bac1e2014-05-06 15:23:49 -04003722 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003723 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003724 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
John Bauman89401822014-05-06 15:04:28 -04003725 }
3726
John Bauman19bac1e2014-05-06 15:23:49 -04003727 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003728 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003729 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003730 }
3731
John Bauman19bac1e2014-05-06 15:23:49 -04003732 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003733 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003734 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003735 }
3736
John Bauman19bac1e2014-05-06 15:23:49 -04003737 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003738 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003739 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003740 }
3741
John Bauman19bac1e2014-05-06 15:23:49 -04003742 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003743 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003744 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003745 }
3746
John Bauman19bac1e2014-05-06 15:23:49 -04003747 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
John Bauman89401822014-05-06 15:04:28 -04003748 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003749 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04003750
Logan Chien813d5032018-08-31 17:19:45 +08003751 return As<Short4>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003752 }
3753
John Bauman19bac1e2014-05-06 15:23:49 -04003754 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003755 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003756 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04003757
Logan Chien813d5032018-08-31 17:19:45 +08003758 return RValue<Short8>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003759 }
3760
John Bauman19bac1e2014-05-06 15:23:49 -04003761 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003762 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003763 llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128);
John Bauman89401822014-05-06 15:04:28 -04003764
Logan Chien813d5032018-08-31 17:19:45 +08003765 return As<SByte8>(V(::builder->CreateCall2(packsswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003766 }
3767
Nicolas Capens33438a62017-09-27 11:47:35 -04003768 RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003769 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003770 llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
John Bauman89401822014-05-06 15:04:28 -04003771
Logan Chien813d5032018-08-31 17:19:45 +08003772 return As<Byte8>(V(::builder->CreateCall2(packuswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003773 }
3774
Nicolas Capens3e7062b2017-01-17 14:01:33 -05003775 RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003776 {
3777 if(CPUID::supportsSSE4_1())
3778 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003779 llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw);
John Bauman66b8ab22014-05-06 15:57:45 -04003780
Logan Chien813d5032018-08-31 17:19:45 +08003781 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003782 }
3783 else
3784 {
Nicolas Capens3e7062b2017-01-17 14:01:33 -05003785 RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
3786 RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
3787
3788 return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
John Bauman89401822014-05-06 15:04:28 -04003789 }
3790 }
3791
John Bauman19bac1e2014-05-06 15:23:49 -04003792 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003793 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003794 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04003795
Logan Chien813d5032018-08-31 17:19:45 +08003796 return As<UShort4>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003797 }
3798
John Bauman19bac1e2014-05-06 15:23:49 -04003799 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003800 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003801 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04003802
Logan Chien813d5032018-08-31 17:19:45 +08003803 return RValue<UShort8>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003804 }
3805
John Bauman19bac1e2014-05-06 15:23:49 -04003806 RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003807 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003808 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04003809
Logan Chien813d5032018-08-31 17:19:45 +08003810 return As<Short4>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003811 }
3812
John Bauman19bac1e2014-05-06 15:23:49 -04003813 RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003814 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003815 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04003816
Logan Chien813d5032018-08-31 17:19:45 +08003817 return RValue<Short8>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003818 }
3819
John Bauman19bac1e2014-05-06 15:23:49 -04003820 RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003821 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003822 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04003823
Logan Chien813d5032018-08-31 17:19:45 +08003824 return As<Short4>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003825 }
3826
John Bauman19bac1e2014-05-06 15:23:49 -04003827 RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003828 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003829 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04003830
Logan Chien813d5032018-08-31 17:19:45 +08003831 return RValue<Short8>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003832 }
3833
John Bauman19bac1e2014-05-06 15:23:49 -04003834 RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003835 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003836 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04003837
Logan Chien813d5032018-08-31 17:19:45 +08003838 return As<Int2>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003839 }
3840
John Bauman19bac1e2014-05-06 15:23:49 -04003841 RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003842 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003843 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04003844
Logan Chien813d5032018-08-31 17:19:45 +08003845 return RValue<Int4>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003846 }
3847
John Bauman19bac1e2014-05-06 15:23:49 -04003848 RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003849 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003850 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04003851
Logan Chien813d5032018-08-31 17:19:45 +08003852 return As<Int2>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003853 }
3854
John Bauman19bac1e2014-05-06 15:23:49 -04003855 RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003856 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003857 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04003858
Logan Chien813d5032018-08-31 17:19:45 +08003859 return RValue<Int4>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003860 }
3861
John Bauman19bac1e2014-05-06 15:23:49 -04003862 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003863 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003864 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04003865
Logan Chien813d5032018-08-31 17:19:45 +08003866 return As<UInt2>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003867 }
3868
John Bauman19bac1e2014-05-06 15:23:49 -04003869 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003870 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003871 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04003872
Logan Chien813d5032018-08-31 17:19:45 +08003873 return RValue<UInt4>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003874 }
3875
John Bauman19bac1e2014-05-06 15:23:49 -04003876 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
3877 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003878 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
John Bauman19bac1e2014-05-06 15:23:49 -04003879 }
3880
3881 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
3882 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003883 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
John Bauman19bac1e2014-05-06 15:23:49 -04003884 }
3885
3886 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
3887 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003888 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_UGT)));
John Bauman19bac1e2014-05-06 15:23:49 -04003889 }
3890
3891 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
3892 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003893 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_ULT)));
John Bauman19bac1e2014-05-06 15:23:49 -04003894 }
3895
3896 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003897 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003898 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04003899
Logan Chien813d5032018-08-31 17:19:45 +08003900 return As<Short4>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003901 }
3902
John Bauman19bac1e2014-05-06 15:23:49 -04003903 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003904 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003905 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04003906
Logan Chien813d5032018-08-31 17:19:45 +08003907 return As<UShort4>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003908 }
3909
John Bauman19bac1e2014-05-06 15:23:49 -04003910 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003911 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003912 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04003913
Logan Chien813d5032018-08-31 17:19:45 +08003914 return As<Int2>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003915 }
3916
John Bauman19bac1e2014-05-06 15:23:49 -04003917 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04003918 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003919 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04003920
Logan Chien813d5032018-08-31 17:19:45 +08003921 return RValue<Short8>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003922 }
3923
John Bauman19bac1e2014-05-06 15:23:49 -04003924 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04003925 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003926 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04003927
Logan Chien813d5032018-08-31 17:19:45 +08003928 return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003929 }
3930
John Bauman19bac1e2014-05-06 15:23:49 -04003931 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04003932 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003933 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04003934
Logan Chien813d5032018-08-31 17:19:45 +08003935 return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003936 }
3937
John Bauman19bac1e2014-05-06 15:23:49 -04003938 RValue<Int> movmskps(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003939 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003940 llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps);
John Bauman89401822014-05-06 15:04:28 -04003941
Logan Chien813d5032018-08-31 17:19:45 +08003942 return RValue<Int>(V(::builder->CreateCall(movmskps, ARGS(V(x.value)))));
John Bauman89401822014-05-06 15:04:28 -04003943 }
3944
John Bauman19bac1e2014-05-06 15:23:49 -04003945 RValue<Int> pmovmskb(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04003946 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003947 llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128);
John Bauman89401822014-05-06 15:04:28 -04003948
Logan Chien813d5032018-08-31 17:19:45 +08003949 return RValue<Int>(V(::builder->CreateCall(pmovmskb, ARGS(V(x.value))))) & 0xFF;
John Bauman89401822014-05-06 15:04:28 -04003950 }
3951
Nicolas Capens01a97962017-07-28 17:30:51 -04003952 RValue<Int4> pmovzxbd(RValue<Byte16> x)
John Bauman89401822014-05-06 15:04:28 -04003953 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003954 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
John Bauman89401822014-05-06 15:04:28 -04003955 }
3956
Nicolas Capens01a97962017-07-28 17:30:51 -04003957 RValue<Int4> pmovsxbd(RValue<SByte16> x)
John Bauman89401822014-05-06 15:04:28 -04003958 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003959 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
John Bauman89401822014-05-06 15:04:28 -04003960 }
3961
Nicolas Capens01a97962017-07-28 17:30:51 -04003962 RValue<Int4> pmovzxwd(RValue<UShort8> x)
John Bauman89401822014-05-06 15:04:28 -04003963 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003964 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
John Bauman89401822014-05-06 15:04:28 -04003965 }
3966
Nicolas Capens01a97962017-07-28 17:30:51 -04003967 RValue<Int4> pmovsxwd(RValue<Short8> x)
John Bauman89401822014-05-06 15:04:28 -04003968 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003969 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
John Bauman89401822014-05-06 15:04:28 -04003970 }
3971 }
Logan Chiene3191012018-08-24 22:01:50 +08003972#endif // defined(__i386__) || defined(__x86_64__)
Ben Clayton1bc7ee92019-02-14 18:43:22 +00003973
Ben Clayton60a3d6f2019-02-26 17:24:46 +00003974#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +00003975 // extractAll returns a vector containing the extracted n scalar value of
3976 // the vector vec.
3977 static std::vector<Value*> extractAll(Value* vec, int n)
3978 {
3979 std::vector<Value*> elements;
3980 elements.reserve(n);
3981 for (int i = 0; i < n; i++)
3982 {
3983 auto el = V(::builder->CreateExtractElement(V(vec), i));
3984 elements.push_back(el);
3985 }
3986 return elements;
3987 }
3988
3989 // toDouble returns all the float values in vals extended to doubles.
3990 static std::vector<Value*> toDouble(const std::vector<Value*>& vals)
3991 {
3992 auto doubleTy = ::llvm::Type::getDoubleTy(*::context);
3993 std::vector<Value*> elements;
3994 elements.reserve(vals.size());
3995 for (auto v : vals)
3996 {
3997 elements.push_back(V(::builder->CreateFPExt(V(v), doubleTy)));
3998 }
3999 return elements;
4000 }
4001
4002 std::vector<Value*> PrintValue::Ty<Byte4>::val(const RValue<Byte4>& v) { return extractAll(v.value, 4); }
4003 std::vector<Value*> PrintValue::Ty<Int4>::val(const RValue<Int4>& v) { return extractAll(v.value, 4); }
4004 std::vector<Value*> PrintValue::Ty<UInt4>::val(const RValue<UInt4>& v) { return extractAll(v.value, 4); }
4005 std::vector<Value*> PrintValue::Ty<Short4>::val(const RValue<Short4>& v) { return extractAll(v.value, 4); }
4006 std::vector<Value*> PrintValue::Ty<UShort4>::val(const RValue<UShort4>& v) { return extractAll(v.value, 4); }
4007 std::vector<Value*> PrintValue::Ty<Float>::val(const RValue<Float>& v) { return toDouble({v.value}); }
4008 std::vector<Value*> PrintValue::Ty<Float4>::val(const RValue<Float4>& v) { return toDouble(extractAll(v.value, 4)); }
4009
4010 void Printv(const char* function, const char* file, int line, const char* fmt, std::initializer_list<PrintValue> args)
4011 {
4012 // LLVM types used below.
4013 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
4014 auto intTy = ::llvm::Type::getInt64Ty(*::context); // TODO: Natural int width.
4015 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(*::context);
4016 auto funcTy = ::llvm::FunctionType::get(i32Ty, {i8PtrTy}, true);
4017
4018 auto func = ::module->getOrInsertFunction("printf", funcTy);
4019
4020 // Build the printf format message string.
4021 std::string str;
4022 if (file != nullptr) { str += (line > 0) ? "%s:%d " : "%s "; }
4023 if (function != nullptr) { str += "%s "; }
4024 str += fmt;
4025
4026 // Perform subsitution on all '{n}' bracketed indices in the format
4027 // message.
4028 int i = 0;
4029 for (const PrintValue& arg : args)
4030 {
4031 str = replace(str, "{" + std::to_string(i++) + "}", arg.format);
4032 }
4033
4034 ::llvm::SmallVector<::llvm::Value*, 8> vals;
4035
4036 // The format message is always the first argument.
4037 vals.push_back(::builder->CreateGlobalStringPtr(str));
4038
4039 // Add optional file, line and function info if provided.
4040 if (file != nullptr)
4041 {
4042 vals.push_back(::builder->CreateGlobalStringPtr(file));
4043 if (line > 0)
4044 {
4045 vals.push_back(::llvm::ConstantInt::get(intTy, line));
4046 }
4047 }
4048 if (function != nullptr)
4049 {
4050 vals.push_back(::builder->CreateGlobalStringPtr(function));
4051 }
4052
4053 // Add all format arguments.
4054 for (const PrintValue& arg : args)
4055 {
4056 for (auto val : arg.values)
4057 {
4058 vals.push_back(V(val));
4059 }
4060 }
4061
4062 ::builder->CreateCall(func, vals);
4063 }
4064#endif // ENABLE_RR_PRINT
4065
Ben Claytonac07ed82019-03-26 14:17:41 +00004066 void Break()
4067 {
4068 auto trap = ::llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::trap);
4069 builder->CreateCall(trap);
4070 }
4071
4072 void Nop()
4073 {
4074 auto voidTy = ::llvm::Type::getVoidTy(*context);
4075 auto funcTy = ::llvm::FunctionType::get(voidTy, {}, false);
4076 auto func = ::module->getOrInsertFunction("nop", funcTy);
4077 builder->CreateCall(func);
4078 }
4079
4080 void EmitDebugLocation()
4081 {
4082#ifdef ENABLE_RR_DEBUG_INFO
4083 if (debugInfo != nullptr)
4084 {
4085 debugInfo->EmitLocation();
4086 }
4087#endif // ENABLE_RR_DEBUG_INFO
4088 }
4089
4090 void EmitDebugVariable(Value* value)
4091 {
4092#ifdef ENABLE_RR_DEBUG_INFO
4093 if (debugInfo != nullptr)
4094 {
4095 debugInfo->EmitVariable(value);
4096 }
4097#endif // ENABLE_RR_DEBUG_INFO
4098 }
4099
4100 void FlushDebug()
4101 {
4102#ifdef ENABLE_RR_DEBUG_INFO
4103 if (debugInfo != nullptr)
4104 {
4105 debugInfo->Flush();
4106 }
4107#endif // ENABLE_RR_DEBUG_INFO
4108 }
4109
John Bauman89401822014-05-06 15:04:28 -04004110}