blob: 272d8f072fe200c6d234175ff65bd3a6b134fee1 [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
Nicolas Capenscb986762017-01-20 11:34:37 -050015#include "Reactor.hpp"
Ben Claytoneb50d252019-04-15 13:50:01 -040016#include "Debug.hpp"
Ben Claytonac07ed82019-03-26 14:17:41 +000017#include "LLVMReactor.hpp"
18#include "LLVMReactorDebugInfo.hpp"
John Bauman89401822014-05-06 15:04:28 -040019
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040020#include "x86.hpp"
21#include "CPUID.hpp"
22#include "Thread.hpp"
Nicolas Capens1a3ce872018-10-10 10:42:36 -040023#include "ExecutableMemory.hpp"
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040024#include "MutexLock.hpp"
25
26#undef min
27#undef max
28
Ben Clayton09a7f452019-04-25 15:22:43 +010029#if defined(__clang__)
30// LLVM has occurances of the extra-semi warning in its headers, which will be
31// treated as an error in SwiftShader targets.
32#pragma clang diagnostic push
33#pragma clang diagnostic ignored "-Wextra-semi"
34#endif // defined(__clang__)
35
Ben Clayton5875be52019-04-11 14:57:40 -040036#include "llvm/Analysis/LoopPass.h"
37#include "llvm/ExecutionEngine/ExecutionEngine.h"
38#include "llvm/ExecutionEngine/JITSymbol.h"
39#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
40#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
41#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
42#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
43#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
44#include "llvm/ExecutionEngine/SectionMemoryManager.h"
45#include "llvm/IR/Constants.h"
46#include "llvm/IR/DataLayout.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/GlobalVariable.h"
Ben Clayton5875be52019-04-11 14:57:40 -040049#include "llvm/IR/Intrinsics.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010050#include "llvm/IR/IRBuilder.h"
Ben Clayton5875be52019-04-11 14:57:40 -040051#include "llvm/IR/LegacyPassManager.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010052#include "llvm/IR/LLVMContext.h"
Ben Clayton5875be52019-04-11 14:57:40 -040053#include "llvm/IR/Mangler.h"
54#include "llvm/IR/Module.h"
Ben Clayton4b944652019-05-02 10:56:19 +010055#include "llvm/IR/Verifier.h"
Ben Clayton5875be52019-04-11 14:57:40 -040056#include "llvm/Support/Error.h"
57#include "llvm/Support/TargetSelect.h"
58#include "llvm/Target/TargetOptions.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010059#include "llvm/Transforms/Coroutines.h"
Ben Clayton5875be52019-04-11 14:57:40 -040060#include "llvm/Transforms/InstCombine/InstCombine.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010061#include "llvm/Transforms/IPO.h"
62#include "llvm/Transforms/IPO/PassManagerBuilder.h"
Ben Clayton5875be52019-04-11 14:57:40 -040063#include "llvm/Transforms/Scalar.h"
64#include "llvm/Transforms/Scalar/GVN.h"
Ben Clayton20507fa2019-04-20 01:40:15 -040065
Ben Clayton09a7f452019-04-25 15:22:43 +010066#if defined(__clang__)
67#pragma clang diagnostic pop
68#endif // defined(__clang__)
69
Ben Clayton5875be52019-04-11 14:57:40 -040070#include "LLVMRoutine.hpp"
John Bauman89401822014-05-06 15:04:28 -040071
Ben Clayton5875be52019-04-11 14:57:40 -040072#define ARGS(...) {__VA_ARGS__}
73#define CreateCall2 CreateCall
74#define CreateCall3 CreateCall
Logan Chien0eedc8c2018-08-21 09:34:28 +080075
Ben Clayton5875be52019-04-11 14:57:40 -040076#include <unordered_map>
Logan Chien0eedc8c2018-08-21 09:34:28 +080077
John Bauman89401822014-05-06 15:04:28 -040078#include <fstream>
Ben Clayton1bc7ee92019-02-14 18:43:22 +000079#include <numeric>
80#include <thread>
Ben Clayton1c82c7b2019-04-30 12:49:27 +010081#include <iostream>
John Bauman89401822014-05-06 15:04:28 -040082
Nicolas Capens47dc8672017-04-25 12:54:39 -040083#if defined(__i386__) || defined(__x86_64__)
84#include <xmmintrin.h>
85#endif
86
Logan Chien40a60052018-09-26 19:03:53 +080087#include <math.h>
88
Nicolas Capenscb122582014-05-06 23:34:44 -040089#if defined(__x86_64__) && defined(_WIN32)
John Bauman66b8ab22014-05-06 15:57:45 -040090extern "C" void X86CompilationCallback()
91{
Ben Claytoneb50d252019-04-15 13:50:01 -040092 UNIMPLEMENTED("X86CompilationCallback");
John Bauman66b8ab22014-05-06 15:57:45 -040093}
94#endif
95
Nicolas Capens48461502018-08-06 14:20:45 -040096namespace rr
Logan Chien52cde602018-09-03 19:37:57 +080097{
98 class LLVMReactorJIT;
99}
100
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400101namespace
102{
Nicolas Capens48461502018-08-06 14:20:45 -0400103 rr::LLVMReactorJIT *reactorJIT = nullptr;
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400104 llvm::IRBuilder<> *builder = nullptr;
105 llvm::LLVMContext *context = nullptr;
106 llvm::Module *module = nullptr;
107 llvm::Function *function = nullptr;
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400108
Ben Claytonac07ed82019-03-26 14:17:41 +0000109#ifdef ENABLE_RR_DEBUG_INFO
110 std::unique_ptr<rr::DebugInfo> debugInfo;
111#endif
112
Nicolas Capensc07dc4b2018-08-06 14:20:45 -0400113 rr::MutexLock codegenMutex;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800114
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000115#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000116 std::string replace(std::string str, const std::string& substr, const std::string& replacement)
117 {
118 size_t pos = 0;
119 while((pos = str.find(substr, pos)) != std::string::npos) {
120 str.replace(pos, substr.length(), replacement);
121 pos += replacement.length();
122 }
123 return str;
124 }
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000125#endif // ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000126
Logan Chien0eedc8c2018-08-21 09:34:28 +0800127 llvm::Value *lowerPAVG(llvm::Value *x, llvm::Value *y)
128 {
129 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
130
131 llvm::VectorType *extTy =
132 llvm::VectorType::getExtendedElementVectorType(ty);
133 x = ::builder->CreateZExt(x, extTy);
134 y = ::builder->CreateZExt(y, extTy);
135
136 // (x + y + 1) >> 1
137 llvm::Constant *one = llvm::ConstantInt::get(extTy, 1);
138 llvm::Value *res = ::builder->CreateAdd(x, y);
139 res = ::builder->CreateAdd(res, one);
140 res = ::builder->CreateLShr(res, one);
141 return ::builder->CreateTrunc(res, ty);
142 }
143
144 llvm::Value *lowerPMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800145 llvm::ICmpInst::Predicate pred)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800146 {
147 return ::builder->CreateSelect(::builder->CreateICmp(pred, x, y), x, y);
148 }
149
150 llvm::Value *lowerPCMP(llvm::ICmpInst::Predicate pred, llvm::Value *x,
Logan Chienb5ce5092018-09-27 18:45:58 +0800151 llvm::Value *y, llvm::Type *dstTy)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800152 {
153 return ::builder->CreateSExt(::builder->CreateICmp(pred, x, y), dstTy, "");
154 }
155
Logan Chiene3191012018-08-24 22:01:50 +0800156#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800157 llvm::Value *lowerPMOV(llvm::Value *op, llvm::Type *dstType, bool sext)
158 {
159 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(op->getType());
160 llvm::VectorType *dstTy = llvm::cast<llvm::VectorType>(dstType);
161
162 llvm::Value *undef = llvm::UndefValue::get(srcTy);
163 llvm::SmallVector<uint32_t, 16> mask(dstTy->getNumElements());
164 std::iota(mask.begin(), mask.end(), 0);
165 llvm::Value *v = ::builder->CreateShuffleVector(op, undef, mask);
166
167 return sext ? ::builder->CreateSExt(v, dstTy)
Logan Chienb5ce5092018-09-27 18:45:58 +0800168 : ::builder->CreateZExt(v, dstTy);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800169 }
170
171 llvm::Value *lowerPABS(llvm::Value *v)
172 {
173 llvm::Value *zero = llvm::Constant::getNullValue(v->getType());
174 llvm::Value *cmp = ::builder->CreateICmp(llvm::ICmpInst::ICMP_SGT, v, zero);
175 llvm::Value *neg = ::builder->CreateNeg(v);
176 return ::builder->CreateSelect(cmp, v, neg);
177 }
178#endif // defined(__i386__) || defined(__x86_64__)
Logan Chiene3191012018-08-24 22:01:50 +0800179
180#if !defined(__i386__) && !defined(__x86_64__)
181 llvm::Value *lowerPFMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800182 llvm::FCmpInst::Predicate pred)
Logan Chiene3191012018-08-24 22:01:50 +0800183 {
184 return ::builder->CreateSelect(::builder->CreateFCmp(pred, x, y), x, y);
185 }
186
Logan Chien83fc07a2018-09-26 22:14:00 +0800187 llvm::Value *lowerRound(llvm::Value *x)
188 {
189 llvm::Function *nearbyint = llvm::Intrinsic::getDeclaration(
190 ::module, llvm::Intrinsic::nearbyint, {x->getType()});
191 return ::builder->CreateCall(nearbyint, ARGS(x));
192 }
193
Logan Chien2faa24a2018-09-26 19:59:32 +0800194 llvm::Value *lowerRoundInt(llvm::Value *x, llvm::Type *ty)
195 {
196 return ::builder->CreateFPToSI(lowerRound(x), ty);
197 }
198
Logan Chien40a60052018-09-26 19:03:53 +0800199 llvm::Value *lowerFloor(llvm::Value *x)
200 {
201 llvm::Function *floor = llvm::Intrinsic::getDeclaration(
202 ::module, llvm::Intrinsic::floor, {x->getType()});
203 return ::builder->CreateCall(floor, ARGS(x));
204 }
205
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800206 llvm::Value *lowerTrunc(llvm::Value *x)
207 {
208 llvm::Function *trunc = llvm::Intrinsic::getDeclaration(
209 ::module, llvm::Intrinsic::trunc, {x->getType()});
210 return ::builder->CreateCall(trunc, ARGS(x));
211 }
212
Logan Chiene3191012018-08-24 22:01:50 +0800213 // Packed add/sub saturatation
Logan Chien28794cf2018-09-26 18:58:03 +0800214 llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned)
Logan Chiene3191012018-08-24 22:01:50 +0800215 {
Logan Chien28794cf2018-09-26 18:58:03 +0800216 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
217 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
218
219 unsigned numBits = ty->getScalarSizeInBits();
220
221 llvm::Value *max, *min, *extX, *extY;
222 if (isSigned)
223 {
224 max = llvm::ConstantInt::get(extTy, (1LL << (numBits - 1)) - 1, true);
225 min = llvm::ConstantInt::get(extTy, (-1LL << (numBits - 1)), true);
226 extX = ::builder->CreateSExt(x, extTy);
227 extY = ::builder->CreateSExt(y, extTy);
228 }
229 else
230 {
Ben Claytoneb50d252019-04-15 13:50:01 -0400231 ASSERT_MSG(numBits <= 64, "numBits: %d", int(numBits));
Logan Chien28794cf2018-09-26 18:58:03 +0800232 uint64_t maxVal = (numBits == 64) ? ~0ULL : (1ULL << numBits) - 1;
233 max = llvm::ConstantInt::get(extTy, maxVal, false);
234 min = llvm::ConstantInt::get(extTy, 0, false);
235 extX = ::builder->CreateZExt(x, extTy);
236 extY = ::builder->CreateZExt(y, extTy);
237 }
238
239 llvm::Value *res = isAdd ? ::builder->CreateAdd(extX, extY)
240 : ::builder->CreateSub(extX, extY);
241
242 res = lowerPMINMAX(res, min, llvm::ICmpInst::ICMP_SGT);
243 res = lowerPMINMAX(res, max, llvm::ICmpInst::ICMP_SLT);
244
245 return ::builder->CreateTrunc(res, ty);
Logan Chiene3191012018-08-24 22:01:50 +0800246 }
247
248 llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y)
249 {
Logan Chien28794cf2018-09-26 18:58:03 +0800250 return lowerPSAT(x, y, true, false);
Logan Chiene3191012018-08-24 22:01:50 +0800251 }
252
253 llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y)
254 {
Logan Chien28794cf2018-09-26 18:58:03 +0800255 return lowerPSAT(x, y, true, true);
Logan Chiene3191012018-08-24 22:01:50 +0800256 }
257
258 llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y)
259 {
Logan Chien28794cf2018-09-26 18:58:03 +0800260 return lowerPSAT(x, y, false, false);
Logan Chiene3191012018-08-24 22:01:50 +0800261 }
262
263 llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y)
264 {
Logan Chien28794cf2018-09-26 18:58:03 +0800265 return lowerPSAT(x, y, false, true);
Logan Chiene3191012018-08-24 22:01:50 +0800266 }
267
268 llvm::Value *lowerSQRT(llvm::Value *x)
269 {
270 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(
271 ::module, llvm::Intrinsic::sqrt, {x->getType()});
272 return ::builder->CreateCall(sqrt, ARGS(x));
273 }
274
275 llvm::Value *lowerRCP(llvm::Value *x)
276 {
277 llvm::Type *ty = x->getType();
278 llvm::Constant *one;
279 if (llvm::VectorType *vectorTy = llvm::dyn_cast<llvm::VectorType>(ty))
280 {
281 one = llvm::ConstantVector::getSplat(
282 vectorTy->getNumElements(),
283 llvm::ConstantFP::get(vectorTy->getElementType(), 1));
284 }
285 else
286 {
287 one = llvm::ConstantFP::get(ty, 1);
288 }
289 return ::builder->CreateFDiv(one, x);
290 }
291
292 llvm::Value *lowerRSQRT(llvm::Value *x)
293 {
294 return lowerRCP(lowerSQRT(x));
295 }
296
297 llvm::Value *lowerVectorShl(llvm::Value *x, uint64_t scalarY)
298 {
299 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
300 llvm::Value *y = llvm::ConstantVector::getSplat(
301 ty->getNumElements(),
302 llvm::ConstantInt::get(ty->getElementType(), scalarY));
303 return ::builder->CreateShl(x, y);
304 }
305
306 llvm::Value *lowerVectorAShr(llvm::Value *x, uint64_t scalarY)
307 {
308 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
309 llvm::Value *y = llvm::ConstantVector::getSplat(
310 ty->getNumElements(),
311 llvm::ConstantInt::get(ty->getElementType(), scalarY));
312 return ::builder->CreateAShr(x, y);
313 }
314
315 llvm::Value *lowerVectorLShr(llvm::Value *x, uint64_t scalarY)
316 {
317 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
318 llvm::Value *y = llvm::ConstantVector::getSplat(
319 ty->getNumElements(),
320 llvm::ConstantInt::get(ty->getElementType(), scalarY));
321 return ::builder->CreateLShr(x, y);
322 }
323
324 llvm::Value *lowerMulAdd(llvm::Value *x, llvm::Value *y)
325 {
326 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
327 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
328
329 llvm::Value *extX = ::builder->CreateSExt(x, extTy);
330 llvm::Value *extY = ::builder->CreateSExt(y, extTy);
331 llvm::Value *mult = ::builder->CreateMul(extX, extY);
332
333 llvm::Value *undef = llvm::UndefValue::get(extTy);
334
335 llvm::SmallVector<uint32_t, 16> evenIdx;
336 llvm::SmallVector<uint32_t, 16> oddIdx;
337 for (uint64_t i = 0, n = ty->getNumElements(); i < n; i += 2)
338 {
339 evenIdx.push_back(i);
340 oddIdx.push_back(i + 1);
341 }
342
343 llvm::Value *lhs = ::builder->CreateShuffleVector(mult, undef, evenIdx);
344 llvm::Value *rhs = ::builder->CreateShuffleVector(mult, undef, oddIdx);
345 return ::builder->CreateAdd(lhs, rhs);
346 }
347
Logan Chiene3191012018-08-24 22:01:50 +0800348 llvm::Value *lowerPack(llvm::Value *x, llvm::Value *y, bool isSigned)
349 {
350 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(x->getType());
351 llvm::VectorType *dstTy = llvm::VectorType::getTruncatedElementVectorType(srcTy);
352
353 llvm::IntegerType *dstElemTy =
354 llvm::cast<llvm::IntegerType>(dstTy->getElementType());
355
356 uint64_t truncNumBits = dstElemTy->getIntegerBitWidth();
Ben Claytoneb50d252019-04-15 13:50:01 -0400357 ASSERT_MSG(truncNumBits < 64, "shift 64 must be handled separately. truncNumBits: %d", int(truncNumBits));
Logan Chiene3191012018-08-24 22:01:50 +0800358 llvm::Constant *max, *min;
359 if (isSigned)
360 {
361 max = llvm::ConstantInt::get(srcTy, (1LL << (truncNumBits - 1)) - 1, true);
362 min = llvm::ConstantInt::get(srcTy, (-1LL << (truncNumBits - 1)), true);
363 }
364 else
365 {
366 max = llvm::ConstantInt::get(srcTy, (1ULL << truncNumBits) - 1, false);
367 min = llvm::ConstantInt::get(srcTy, 0, false);
368 }
369
370 x = lowerPMINMAX(x, min, llvm::ICmpInst::ICMP_SGT);
371 x = lowerPMINMAX(x, max, llvm::ICmpInst::ICMP_SLT);
372 y = lowerPMINMAX(y, min, llvm::ICmpInst::ICMP_SGT);
373 y = lowerPMINMAX(y, max, llvm::ICmpInst::ICMP_SLT);
374
375 x = ::builder->CreateTrunc(x, dstTy);
376 y = ::builder->CreateTrunc(y, dstTy);
377
378 llvm::SmallVector<uint32_t, 16> index(srcTy->getNumElements() * 2);
379 std::iota(index.begin(), index.end(), 0);
380
381 return ::builder->CreateShuffleVector(x, y, index);
382 }
383
384 llvm::Value *lowerSignMask(llvm::Value *x, llvm::Type *retTy)
385 {
386 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
387 llvm::Constant *zero = llvm::ConstantInt::get(ty, 0);
388 llvm::Value *cmp = ::builder->CreateICmpSLT(x, zero);
389
390 llvm::Value *ret = ::builder->CreateZExt(
391 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
392 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
393 {
394 llvm::Value *elem = ::builder->CreateZExt(
395 ::builder->CreateExtractElement(cmp, i), retTy);
396 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
397 }
398 return ret;
399 }
400
401 llvm::Value *lowerFPSignMask(llvm::Value *x, llvm::Type *retTy)
402 {
403 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
404 llvm::Constant *zero = llvm::ConstantFP::get(ty, 0);
405 llvm::Value *cmp = ::builder->CreateFCmpULT(x, zero);
406
407 llvm::Value *ret = ::builder->CreateZExt(
408 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
409 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
410 {
411 llvm::Value *elem = ::builder->CreateZExt(
412 ::builder->CreateExtractElement(cmp, i), retTy);
413 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
414 }
415 return ret;
416 }
417#endif // !defined(__i386__) && !defined(__x86_64__)
Chris Forbese86b6dc2019-03-01 09:08:47 -0800418
419 llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext)
420 {
421 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
422 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
423
424 llvm::Value *extX, *extY;
425 if (sext)
426 {
427 extX = ::builder->CreateSExt(x, extTy);
428 extY = ::builder->CreateSExt(y, extTy);
429 }
430 else
431 {
432 extX = ::builder->CreateZExt(x, extTy);
433 extY = ::builder->CreateZExt(y, extTy);
434 }
435
436 llvm::Value *mult = ::builder->CreateMul(extX, extY);
437
438 llvm::IntegerType *intTy = llvm::cast<llvm::IntegerType>(ty->getElementType());
439 llvm::Value *mulh = ::builder->CreateAShr(mult, intTy->getBitWidth());
440 return ::builder->CreateTrunc(mulh, ty);
441 }
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400442}
443
Nicolas Capens48461502018-08-06 14:20:45 -0400444namespace rr
John Bauman89401822014-05-06 15:04:28 -0400445{
Ben Claytonc7904162019-04-17 17:35:48 -0400446 const Capabilities Caps =
447 {
448 true, // CallSupported
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100449 true, // CoroutinesSupported
Ben Claytonc7904162019-04-17 17:35:48 -0400450 };
451
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400452 static std::memory_order atomicOrdering(llvm::AtomicOrdering memoryOrder)
453 {
454 switch(memoryOrder)
455 {
456 case llvm::AtomicOrdering::Monotonic: return std::memory_order_relaxed; // https://llvm.org/docs/Atomics.html#monotonic
457 case llvm::AtomicOrdering::Acquire: return std::memory_order_acquire;
458 case llvm::AtomicOrdering::Release: return std::memory_order_release;
459 case llvm::AtomicOrdering::AcquireRelease: return std::memory_order_acq_rel;
460 case llvm::AtomicOrdering::SequentiallyConsistent: return std::memory_order_seq_cst;
461 default:
Ben Claytonfb280672019-04-25 11:16:15 +0100462 UNREACHABLE("memoryOrder: %d", int(memoryOrder));
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400463 return std::memory_order_acq_rel;
464 }
465 }
466
467 static llvm::AtomicOrdering atomicOrdering(bool atomic, std::memory_order memoryOrder)
468 {
469 if(!atomic)
470 {
471 return llvm::AtomicOrdering::NotAtomic;
472 }
473
474 switch(memoryOrder)
475 {
476 case std::memory_order_relaxed: return llvm::AtomicOrdering::Monotonic; // https://llvm.org/docs/Atomics.html#monotonic
477 case std::memory_order_consume: return llvm::AtomicOrdering::Acquire; // https://llvm.org/docs/Atomics.html#acquire: "It should also be used for C++11/C11 memory_order_consume."
478 case std::memory_order_acquire: return llvm::AtomicOrdering::Acquire;
479 case std::memory_order_release: return llvm::AtomicOrdering::Release;
480 case std::memory_order_acq_rel: return llvm::AtomicOrdering::AcquireRelease;
481 case std::memory_order_seq_cst: return llvm::AtomicOrdering::SequentiallyConsistent;
482 default:
483 UNREACHABLE("memoryOrder: %d", int(memoryOrder));
484 return llvm::AtomicOrdering::AcquireRelease;
485 }
486 }
487
488 template <typename T>
489 static void atomicLoad(void *ptr, void *ret, llvm::AtomicOrdering ordering)
490 {
491 *reinterpret_cast<T*>(ret) = std::atomic_load_explicit<T>(reinterpret_cast<std::atomic<T>*>(ptr), atomicOrdering(ordering));
492 }
493
494 template <typename T>
495 static void atomicStore(void *ptr, void *val, llvm::AtomicOrdering ordering)
496 {
497 std::atomic_store_explicit<T>(reinterpret_cast<std::atomic<T>*>(ptr), *reinterpret_cast<T*>(val), atomicOrdering(ordering));
498 }
499
Logan Chien40a60052018-09-26 19:03:53 +0800500 class ExternalFunctionSymbolResolver
501 {
502 private:
503 using FunctionMap = std::unordered_map<std::string, void *>;
504 FunctionMap func_;
505
506 public:
507 ExternalFunctionSymbolResolver()
508 {
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400509 struct Atomic
510 {
511 static void load(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
512 {
513 switch (size)
514 {
515 case 1: atomicLoad<uint8_t>(ptr, ret, ordering); break;
516 case 2: atomicLoad<uint16_t>(ptr, ret, ordering); break;
517 case 4: atomicLoad<uint32_t>(ptr, ret, ordering); break;
518 case 8: atomicLoad<uint64_t>(ptr, ret, ordering); break;
519 default:
520 UNIMPLEMENTED("Atomic::load(size: %d)", int(size));
521 }
522 }
523 static void store(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
524 {
525 switch (size)
526 {
527 case 1: atomicStore<uint8_t>(ptr, ret, ordering); break;
528 case 2: atomicStore<uint16_t>(ptr, ret, ordering); break;
529 case 4: atomicStore<uint32_t>(ptr, ret, ordering); break;
530 case 8: atomicStore<uint64_t>(ptr, ret, ordering); break;
531 default:
532 UNIMPLEMENTED("Atomic::store(size: %d)", int(size));
533 }
534 }
535 };
Ben Claytonac07ed82019-03-26 14:17:41 +0000536 struct F { static void nop() {} };
Ben Claytonac07ed82019-03-26 14:17:41 +0000537
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400538 func_.emplace("nop", reinterpret_cast<void*>(F::nop));
Logan Chien40a60052018-09-26 19:03:53 +0800539 func_.emplace("floorf", reinterpret_cast<void*>(floorf));
Logan Chien83fc07a2018-09-26 22:14:00 +0800540 func_.emplace("nearbyintf", reinterpret_cast<void*>(nearbyintf));
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800541 func_.emplace("truncf", reinterpret_cast<void*>(truncf));
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000542 func_.emplace("printf", reinterpret_cast<void*>(printf));
543 func_.emplace("puts", reinterpret_cast<void*>(puts));
Chris Forbes1a4c7122019-03-15 14:50:47 -0700544 func_.emplace("fmodf", reinterpret_cast<void*>(fmodf));
Ben Claytona2c8b772019-04-09 13:42:36 -0400545 func_.emplace("sinf", reinterpret_cast<void*>(sinf));
Ben Clayton1b6f8c72019-04-09 13:47:43 -0400546 func_.emplace("cosf", reinterpret_cast<void*>(cosf));
Ben Claytonf9350d72019-04-09 14:19:02 -0400547 func_.emplace("asinf", reinterpret_cast<void*>(asinf));
Ben Claytoneafae472019-04-09 14:22:38 -0400548 func_.emplace("acosf", reinterpret_cast<void*>(acosf));
Ben Clayton749b4e02019-04-09 14:27:43 -0400549 func_.emplace("atanf", reinterpret_cast<void*>(atanf));
Ben Claytond9636972019-04-09 15:09:54 -0400550 func_.emplace("sinhf", reinterpret_cast<void*>(sinhf));
Ben Clayton900ea2c2019-04-09 15:25:36 -0400551 func_.emplace("coshf", reinterpret_cast<void*>(coshf));
Ben Clayton3928bd92019-04-09 15:27:41 -0400552 func_.emplace("tanhf", reinterpret_cast<void*>(tanhf));
Ben Claytonf6d77ab2019-04-09 15:30:04 -0400553 func_.emplace("asinhf", reinterpret_cast<void*>(asinhf));
Ben Clayton28ebcb02019-04-09 15:33:38 -0400554 func_.emplace("acoshf", reinterpret_cast<void*>(acoshf));
Ben Claytonfa6a5392019-04-09 15:35:24 -0400555 func_.emplace("atanhf", reinterpret_cast<void*>(atanhf));
Ben Claytona520c3e2019-04-09 15:43:45 -0400556 func_.emplace("atan2f", reinterpret_cast<void*>(atan2f));
Ben Claytonbfe94f02019-04-09 15:52:12 -0400557 func_.emplace("powf", reinterpret_cast<void*>(powf));
Ben Clayton242f0022019-04-09 16:00:53 -0400558 func_.emplace("expf", reinterpret_cast<void*>(expf));
Ben Clayton2c1da722019-04-09 16:03:03 -0400559 func_.emplace("logf", reinterpret_cast<void*>(logf));
Ben Claytonf40b56c2019-04-09 16:06:55 -0400560 func_.emplace("exp2f", reinterpret_cast<void*>(exp2f));
Ben Claytone17acfe2019-04-09 16:09:13 -0400561 func_.emplace("log2f", reinterpret_cast<void*>(log2f));
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400562 func_.emplace("atomic_load", reinterpret_cast<void*>(Atomic::load));
563 func_.emplace("atomic_store", reinterpret_cast<void*>(Atomic::store));
Ben Clayton14740062019-04-09 13:48:41 -0400564
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100565 // FIXME (b/119409619): use an allocator here so we can control all memory allocations
566 func_.emplace("coroutine_alloc_frame", reinterpret_cast<void*>(malloc));
567 func_.emplace("coroutine_free_frame", reinterpret_cast<void*>(free));
568
Ben Clayton14740062019-04-09 13:48:41 -0400569#ifdef __APPLE__
Ben Clayton14740062019-04-09 13:48:41 -0400570 func_.emplace("sincosf_stret", reinterpret_cast<void*>(__sincosf_stret));
571#elif defined(__linux__)
572 func_.emplace("sincosf", reinterpret_cast<void*>(sincosf));
573#endif // __APPLE__
Logan Chien40a60052018-09-26 19:03:53 +0800574 }
575
576 void *findSymbol(const std::string &name) const
577 {
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000578 // Trim off any underscores from the start of the symbol. LLVM likes
579 // to append these on macOS.
580 const char* trimmed = name.c_str();
581 while (trimmed[0] == '_') { trimmed++; }
582
583 FunctionMap::const_iterator it = func_.find(trimmed);
Ben Claytoneb50d252019-04-15 13:50:01 -0400584 // Missing functions will likely make the module fail in exciting non-obvious ways.
585 ASSERT_MSG(it != func_.end(), "Missing external function: '%s'", name.c_str());
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000586 return it->second;
Logan Chien40a60052018-09-26 19:03:53 +0800587 }
588 };
589
Logan Chien0eedc8c2018-08-21 09:34:28 +0800590 class LLVMReactorJIT
591 {
592 private:
593 using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
594 using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
595
596 llvm::orc::ExecutionSession session;
Logan Chien40a60052018-09-26 19:03:53 +0800597 ExternalFunctionSymbolResolver externalSymbolResolver;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800598 std::shared_ptr<llvm::orc::SymbolResolver> resolver;
599 std::unique_ptr<llvm::TargetMachine> targetMachine;
600 const llvm::DataLayout dataLayout;
601 ObjLayer objLayer;
602 CompileLayer compileLayer;
603 size_t emittedFunctionsNum;
604
605 public:
606 LLVMReactorJIT(const char *arch, const llvm::SmallVectorImpl<std::string>& mattrs,
607 const llvm::TargetOptions &targetOpts):
608 resolver(createLegacyLookupResolver(
609 session,
610 [this](const std::string &name) {
Logan Chien40a60052018-09-26 19:03:53 +0800611 void *func = externalSymbolResolver.findSymbol(name);
612 if (func != nullptr)
613 {
614 return llvm::JITSymbol(
615 reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
616 }
617
Logan Chien0eedc8c2018-08-21 09:34:28 +0800618 return objLayer.findSymbol(name, true);
619 },
620 [](llvm::Error err) {
621 if (err)
622 {
623 // TODO: Log the symbol resolution errors.
624 return;
625 }
626 })),
627 targetMachine(llvm::EngineBuilder()
Ben Claytonac07ed82019-03-26 14:17:41 +0000628#ifdef ENABLE_RR_DEBUG_INFO
629 .setOptLevel(llvm::CodeGenOpt::None)
630#endif // ENABLE_RR_DEBUG_INFO
Logan Chien0eedc8c2018-08-21 09:34:28 +0800631 .setMArch(arch)
632 .setMAttrs(mattrs)
633 .setTargetOptions(targetOpts)
634 .selectTarget()),
635 dataLayout(targetMachine->createDataLayout()),
636 objLayer(
637 session,
638 [this](llvm::orc::VModuleKey) {
639 return ObjLayer::Resources{
640 std::make_shared<llvm::SectionMemoryManager>(),
641 resolver};
Ben Claytonac07ed82019-03-26 14:17:41 +0000642 },
643 ObjLayer::NotifyLoadedFtor(),
644 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj, const llvm::RuntimeDyld::LoadedObjectInfo &L) {
645#ifdef ENABLE_RR_DEBUG_INFO
646 if (debugInfo != nullptr)
647 {
648 debugInfo->NotifyObjectEmitted(Obj, L);
649 }
650#endif // ENABLE_RR_DEBUG_INFO
651 },
652 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj) {
653#ifdef ENABLE_RR_DEBUG_INFO
654 if (debugInfo != nullptr)
655 {
656 debugInfo->NotifyFreeingObject(Obj);
657 }
658#endif // ENABLE_RR_DEBUG_INFO
659 }
660 ),
Logan Chien0eedc8c2018-08-21 09:34:28 +0800661 compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine)),
662 emittedFunctionsNum(0)
663 {
664 }
665
666 void startSession()
667 {
668 ::module = new llvm::Module("", *::context);
669 }
670
671 void endSession()
672 {
673 ::function = nullptr;
674 ::module = nullptr;
675 }
676
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100677 LLVMRoutine *acquireRoutine(llvm::Function **funcs, size_t count)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800678 {
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100679 std::vector<std::string> mangledNames(count);
680 for (size_t i = 0; i < count; i++)
681 {
682 auto func = funcs[i];
683 std::string name = "f" + llvm::Twine(emittedFunctionsNum++).str();
684 func->setName(name);
685 func->setLinkage(llvm::GlobalValue::ExternalLinkage);
686 func->setDoesNotThrow();
Logan Chien0eedc8c2018-08-21 09:34:28 +0800687
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100688 llvm::raw_string_ostream mangledNameStream(mangledNames[i]);
689 llvm::Mangler::getNameWithPrefix(mangledNameStream, name, dataLayout);
690 }
691
692 // Compile the module - after this the llvm::Functions will have
693 // been freed.
Logan Chien0eedc8c2018-08-21 09:34:28 +0800694 std::unique_ptr<llvm::Module> mod(::module);
695 ::module = nullptr;
696 mod->setDataLayout(dataLayout);
697
698 auto moduleKey = session.allocateVModule();
699 llvm::cantFail(compileLayer.addModule(moduleKey, std::move(mod)));
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100700 funcs = nullptr; // Now points to released memory.
Logan Chien0eedc8c2018-08-21 09:34:28 +0800701
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100702 // Resolve the function addresses.
703 std::vector<void*> addresses(count);
704 for (size_t i = 0; i < count; i++)
Nicolas Capensadfbbcb2018-10-31 14:38:53 -0400705 {
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100706 llvm::JITSymbol symbol = compileLayer.findSymbolIn(moduleKey, mangledNames[i], false);
707
708 llvm::Expected<llvm::JITTargetAddress> expectAddr = symbol.getAddress();
709 if(!expectAddr)
710 {
711 return nullptr;
712 }
713
714 addresses[i] = reinterpret_cast<void *>(static_cast<intptr_t>(expectAddr.get()));
Nicolas Capensadfbbcb2018-10-31 14:38:53 -0400715 }
716
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100717 return new LLVMRoutine(addresses.data(), count, releaseRoutineCallback, this, moduleKey);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800718 }
719
720 void optimize(llvm::Module *module)
721 {
Ben Claytonac07ed82019-03-26 14:17:41 +0000722#ifdef ENABLE_RR_DEBUG_INFO
723 if (debugInfo != nullptr)
724 {
725 return; // Don't optimize if we're generating debug info.
726 }
727#endif // ENABLE_RR_DEBUG_INFO
728
Logan Chien0eedc8c2018-08-21 09:34:28 +0800729 std::unique_ptr<llvm::legacy::PassManager> passManager(
730 new llvm::legacy::PassManager());
731
732 passManager->add(llvm::createSROAPass());
733
734 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
735 {
736 switch(optimization[pass])
737 {
738 case Disabled: break;
739 case CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
740 case LICM: passManager->add(llvm::createLICMPass()); break;
741 case AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
742 case GVN: passManager->add(llvm::createGVNPass()); break;
743 case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
744 case Reassociate: passManager->add(llvm::createReassociatePass()); break;
745 case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
746 case SCCP: passManager->add(llvm::createSCCPPass()); break;
747 case ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break;
748 default:
Ben Claytoneb50d252019-04-15 13:50:01 -0400749 UNREACHABLE("optimization[pass]: %d, pass: %d", int(optimization[pass]), int(pass));
Logan Chien0eedc8c2018-08-21 09:34:28 +0800750 }
751 }
752
753 passManager->run(*::module);
754 }
755
756 private:
757 void releaseRoutineModule(llvm::orc::VModuleKey moduleKey)
758 {
759 llvm::cantFail(compileLayer.removeModule(moduleKey));
760 }
761
762 static void releaseRoutineCallback(LLVMReactorJIT *jit, uint64_t moduleKey)
763 {
764 jit->releaseRoutineModule(moduleKey);
765 }
766 };
Logan Chien52cde602018-09-03 19:37:57 +0800767
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400768 Optimization optimization[10] = {InstructionCombining, Disabled};
John Bauman89401822014-05-06 15:04:28 -0400769
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500770 // The abstract Type* types are implemented as LLVM types, except that
771 // 64-bit vectors are emulated using 128-bit ones to avoid use of MMX in x86
772 // and VFP in ARM, and eliminate the overhead of converting them to explicit
773 // 128-bit ones. LLVM types are pointers, so we can represent emulated types
774 // as abstract pointers with small enum values.
775 enum InternalType : uintptr_t
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400776 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500777 // Emulated types:
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400778 Type_v2i32,
779 Type_v4i16,
780 Type_v2i16,
781 Type_v8i8,
782 Type_v4i8,
783 Type_v2f32,
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500784 EmulatedTypeCount,
785 // Returned by asInternalType() to indicate that the abstract Type*
786 // should be interpreted as LLVM type pointer:
787 Type_LLVM
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400788 };
789
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500790 inline InternalType asInternalType(Type *type)
791 {
792 InternalType t = static_cast<InternalType>(reinterpret_cast<uintptr_t>(type));
793 return (t < EmulatedTypeCount) ? t : Type_LLVM;
794 }
795
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400796 llvm::Type *T(Type *t)
797 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500798 // Use 128-bit vectors to implement logically shorter ones.
799 switch(asInternalType(t))
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400800 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500801 case Type_v2i32: return T(Int4::getType());
802 case Type_v4i16: return T(Short8::getType());
803 case Type_v2i16: return T(Short8::getType());
804 case Type_v8i8: return T(Byte16::getType());
805 case Type_v4i8: return T(Byte16::getType());
806 case Type_v2f32: return T(Float4::getType());
807 case Type_LLVM: return reinterpret_cast<llvm::Type*>(t);
Ben Claytoneb50d252019-04-15 13:50:01 -0400808 default:
809 UNREACHABLE("asInternalType(t): %d", int(asInternalType(t)));
810 return nullptr;
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400811 }
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400812 }
813
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500814 Type *T(InternalType t)
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400815 {
816 return reinterpret_cast<Type*>(t);
817 }
818
Nicolas Capensac230122016-09-20 14:30:06 -0400819 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
820 {
821 return reinterpret_cast<std::vector<llvm::Type*>&>(t);
822 }
823
Logan Chien191b3052018-08-31 16:57:15 +0800824 inline llvm::BasicBlock *B(BasicBlock *t)
825 {
826 return reinterpret_cast<llvm::BasicBlock*>(t);
827 }
828
Nicolas Capensc8b67a42016-09-25 15:02:52 -0400829 inline BasicBlock *B(llvm::BasicBlock *t)
830 {
831 return reinterpret_cast<BasicBlock*>(t);
832 }
833
Nicolas Capens01a97962017-07-28 17:30:51 -0400834 static size_t typeSize(Type *type)
835 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500836 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -0400837 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500838 case Type_v2i32: return 8;
839 case Type_v4i16: return 8;
840 case Type_v2i16: return 4;
841 case Type_v8i8: return 8;
842 case Type_v4i8: return 4;
843 case Type_v2f32: return 8;
844 case Type_LLVM:
Nicolas Capens01a97962017-07-28 17:30:51 -0400845 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500846 llvm::Type *t = T(type);
Nicolas Capens01a97962017-07-28 17:30:51 -0400847
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500848 if(t->isPointerTy())
849 {
850 return sizeof(void*);
851 }
852
853 // At this point we should only have LLVM 'primitive' types.
854 unsigned int bits = t->getPrimitiveSizeInBits();
Ben Claytoneb50d252019-04-15 13:50:01 -0400855 ASSERT_MSG(bits != 0, "bits: %d", int(bits));
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500856
857 // TODO(capn): Booleans are 1 bit integers in LLVM's SSA type system,
858 // but are typically stored as one byte. The DataLayout structure should
859 // be used here and many other places if this assumption fails.
860 return (bits + 7) / 8;
861 }
862 break;
863 default:
Ben Claytoneb50d252019-04-15 13:50:01 -0400864 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500865 return 0;
866 }
Nicolas Capens01a97962017-07-28 17:30:51 -0400867 }
868
Nicolas Capens69674fb2017-09-01 11:08:44 -0400869 static unsigned int elementCount(Type *type)
870 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500871 switch(asInternalType(type))
Nicolas Capens69674fb2017-09-01 11:08:44 -0400872 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500873 case Type_v2i32: return 2;
874 case Type_v4i16: return 4;
875 case Type_v2i16: return 2;
876 case Type_v8i8: return 8;
877 case Type_v4i8: return 4;
878 case Type_v2f32: return 2;
879 case Type_LLVM: return llvm::cast<llvm::VectorType>(T(type))->getNumElements();
Ben Claytoneb50d252019-04-15 13:50:01 -0400880 default:
881 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
882 return 0;
Nicolas Capens69674fb2017-09-01 11:08:44 -0400883 }
Nicolas Capens69674fb2017-09-01 11:08:44 -0400884 }
885
John Bauman89401822014-05-06 15:04:28 -0400886 Nucleus::Nucleus()
887 {
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400888 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
Nicolas Capensb7ea9842015-04-01 10:54:59 -0400889
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400890 llvm::InitializeNativeTarget();
Logan Chien0eedc8c2018-08-21 09:34:28 +0800891 llvm::InitializeNativeTargetAsmPrinter();
892 llvm::InitializeNativeTargetAsmParser();
Logan Chien0eedc8c2018-08-21 09:34:28 +0800893
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400894 if(!::context)
John Bauman89401822014-05-06 15:04:28 -0400895 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400896 ::context = new llvm::LLVMContext();
John Bauman89401822014-05-06 15:04:28 -0400897 }
898
John Bauman89401822014-05-06 15:04:28 -0400899 #if defined(__x86_64__)
Logan Chien52cde602018-09-03 19:37:57 +0800900 static const char arch[] = "x86-64";
Logan Chiene3191012018-08-24 22:01:50 +0800901 #elif defined(__i386__)
Logan Chien52cde602018-09-03 19:37:57 +0800902 static const char arch[] = "x86";
Logan Chiene3191012018-08-24 22:01:50 +0800903 #elif defined(__aarch64__)
904 static const char arch[] = "arm64";
905 #elif defined(__arm__)
906 static const char arch[] = "arm";
Gordana Cmiljanovic082dfec2018-10-19 11:36:15 +0200907 #elif defined(__mips__)
Gordana Cmiljanovic20622c02018-11-05 15:00:11 +0100908 #if defined(__mips64)
909 static const char arch[] = "mips64el";
910 #else
911 static const char arch[] = "mipsel";
912 #endif
Logan Chiene3191012018-08-24 22:01:50 +0800913 #else
914 #error "unknown architecture"
John Bauman89401822014-05-06 15:04:28 -0400915 #endif
916
Ben Clayton0fc611f2019-04-18 11:23:27 -0400917 llvm::SmallVector<std::string, 8> mattrs;
918
919 llvm::StringMap<bool> features;
920 bool ok = llvm::sys::getHostCPUFeatures(features);
921 ASSERT_MSG(ok, "llvm::sys::getHostCPUFeatures returned false");
922 for (auto &feature : features)
923 {
924 if (feature.second) { mattrs.push_back(feature.first()); }
925 }
926
927#if 0
Logan Chiene3191012018-08-24 22:01:50 +0800928#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800929 mattrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx");
930 mattrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov");
931 mattrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse");
932 mattrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2");
933 mattrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3");
934 mattrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3");
Logan Chien0eedc8c2018-08-21 09:34:28 +0800935 mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse4.1" : "-sse4.1");
Logan Chiene3191012018-08-24 22:01:50 +0800936#elif defined(__arm__)
937#if __ARM_ARCH >= 8
938 mattrs.push_back("+armv8-a");
939#else
940 // armv7-a requires compiler-rt routines; otherwise, compiled kernel
941 // might fail to link.
942#endif
943#endif
Ben Clayton0fc611f2019-04-18 11:23:27 -0400944#endif
John Bauman89401822014-05-06 15:04:28 -0400945
Logan Chien0eedc8c2018-08-21 09:34:28 +0800946 llvm::TargetOptions targetOpts;
Nicolas Capensa7643812018-09-13 14:20:06 -0400947 targetOpts.UnsafeFPMath = false;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800948 // targetOpts.NoInfsFPMath = true;
949 // targetOpts.NoNaNsFPMath = true;
Logan Chien52cde602018-09-03 19:37:57 +0800950
951 if(!::reactorJIT)
952 {
Logan Chien0eedc8c2018-08-21 09:34:28 +0800953 ::reactorJIT = new LLVMReactorJIT(arch, mattrs, targetOpts);
Logan Chien52cde602018-09-03 19:37:57 +0800954 }
955
956 ::reactorJIT->startSession();
John Bauman89401822014-05-06 15:04:28 -0400957
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400958 if(!::builder)
John Bauman89401822014-05-06 15:04:28 -0400959 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400960 ::builder = new llvm::IRBuilder<>(*::context);
John Bauman89401822014-05-06 15:04:28 -0400961 }
962 }
963
964 Nucleus::~Nucleus()
965 {
Logan Chien52cde602018-09-03 19:37:57 +0800966 ::reactorJIT->endSession();
Nicolas Capensb7ea9842015-04-01 10:54:59 -0400967
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400968 ::codegenMutex.unlock();
John Bauman89401822014-05-06 15:04:28 -0400969 }
970
Chris Forbes878d4b02019-01-21 10:48:35 -0800971 Routine *Nucleus::acquireRoutine(const char *name, bool runOptimizations)
John Bauman89401822014-05-06 15:04:28 -0400972 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400973 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
John Bauman19bac1e2014-05-06 15:23:49 -0400974 {
Nicolas Capensac230122016-09-20 14:30:06 -0400975 llvm::Type *type = ::function->getReturnType();
John Bauman19bac1e2014-05-06 15:23:49 -0400976
977 if(type->isVoidTy())
978 {
979 createRetVoid();
980 }
981 else
982 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400983 createRet(V(llvm::UndefValue::get(type)));
John Bauman19bac1e2014-05-06 15:23:49 -0400984 }
985 }
John Bauman89401822014-05-06 15:04:28 -0400986
Ben Clayton97c13ad2019-05-02 11:59:30 +0100987#ifdef ENABLE_RR_DEBUG_INFO
988 if (debugInfo != nullptr)
989 {
990 debugInfo->Finalize();
991 }
992#endif // ENABLE_RR_DEBUG_INFO
993
John Bauman89401822014-05-06 15:04:28 -0400994 if(false)
995 {
Ben Clayton5875be52019-04-11 14:57:40 -0400996 std::error_code error;
997 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400998 ::module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -0400999 }
1000
Ben Clayton4b944652019-05-02 10:56:19 +01001001 // FIXME: Disable for release builds once heavy development is over.
1002 bool verifyIR = true;
1003 if(verifyIR)
1004 {
1005 llvm::legacy::PassManager pm;
1006 pm.add(llvm::createVerifierPass());
1007 pm.run(*::module);
1008 }
1009
John Bauman89401822014-05-06 15:04:28 -04001010 if(runOptimizations)
1011 {
1012 optimize();
1013 }
1014
1015 if(false)
1016 {
Ben Clayton5875be52019-04-11 14:57:40 -04001017 std::error_code error;
1018 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001019 ::module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -04001020 }
1021
Ben Clayton1c82c7b2019-04-30 12:49:27 +01001022 LLVMRoutine *routine = ::reactorJIT->acquireRoutine(&::function, 1);
John Bauman89401822014-05-06 15:04:28 -04001023
John Bauman89401822014-05-06 15:04:28 -04001024 return routine;
1025 }
1026
1027 void Nucleus::optimize()
1028 {
Logan Chien52cde602018-09-03 19:37:57 +08001029 ::reactorJIT->optimize(::module);
John Bauman89401822014-05-06 15:04:28 -04001030 }
1031
John Bauman19bac1e2014-05-06 15:23:49 -04001032 Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
John Bauman89401822014-05-06 15:04:28 -04001033 {
1034 // Need to allocate it in the entry block for mem2reg to work
Nicolas Capensc8b67a42016-09-25 15:02:52 -04001035 llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
John Bauman89401822014-05-06 15:04:28 -04001036
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001037 llvm::Instruction *declaration;
John Bauman89401822014-05-06 15:04:28 -04001038
1039 if(arraySize)
1040 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08001041 declaration = new llvm::AllocaInst(T(type), 0, V(Nucleus::createConstantInt(arraySize)));
John Bauman89401822014-05-06 15:04:28 -04001042 }
1043 else
1044 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08001045 declaration = new llvm::AllocaInst(T(type), 0, (llvm::Value*)nullptr);
John Bauman89401822014-05-06 15:04:28 -04001046 }
1047
1048 entryBlock.getInstList().push_front(declaration);
1049
Nicolas Capens19336542016-09-26 10:32:29 -04001050 return V(declaration);
John Bauman89401822014-05-06 15:04:28 -04001051 }
1052
1053 BasicBlock *Nucleus::createBasicBlock()
1054 {
Logan Chien191b3052018-08-31 16:57:15 +08001055 return B(llvm::BasicBlock::Create(*::context, "", ::function));
John Bauman89401822014-05-06 15:04:28 -04001056 }
1057
1058 BasicBlock *Nucleus::getInsertBlock()
1059 {
Nicolas Capensc8b67a42016-09-25 15:02:52 -04001060 return B(::builder->GetInsertBlock());
John Bauman89401822014-05-06 15:04:28 -04001061 }
1062
1063 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1064 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001065 // assert(::builder->GetInsertBlock()->back().isTerminator());
Nicolas Capens0192d152019-03-27 14:46:07 -04001066
1067 Variable::materializeAll();
1068
Logan Chien191b3052018-08-31 16:57:15 +08001069 ::builder->SetInsertPoint(B(basicBlock));
John Bauman89401822014-05-06 15:04:28 -04001070 }
1071
Nicolas Capensac230122016-09-20 14:30:06 -04001072 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
John Bauman89401822014-05-06 15:04:28 -04001073 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001074 llvm::FunctionType *functionType = llvm::FunctionType::get(T(ReturnType), T(Params), false);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001075 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
1076 ::function->setCallingConv(llvm::CallingConv::C);
John Bauman89401822014-05-06 15:04:28 -04001077
Ben Clayton5875be52019-04-11 14:57:40 -04001078 #if defined(_WIN32)
Nicolas Capens52551d12018-09-13 14:30:56 -04001079 // FIXME(capn):
1080 // On Windows, stack memory is committed in increments of 4 kB pages, with the last page
1081 // having a trap which allows the OS to grow the stack. For functions with a stack frame
1082 // larger than 4 kB this can cause an issue when a variable is accessed beyond the guard
1083 // page. Therefore the compiler emits a call to __chkstk in the function prolog to probe
1084 // the stack and ensure all pages have been committed. This is currently broken in LLVM
1085 // JIT, but we can prevent emitting the stack probe call:
1086 ::function->addFnAttr("stack-probe-size", "1048576");
1087 #endif
1088
Ben Claytonac07ed82019-03-26 14:17:41 +00001089#ifdef ENABLE_RR_DEBUG_INFO
1090 ::debugInfo = std::unique_ptr<DebugInfo>(new DebugInfo(::builder, ::context, ::module, ::function));
1091#endif // ENABLE_RR_DEBUG_INFO
1092
Logan Chien191b3052018-08-31 16:57:15 +08001093 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "", ::function));
John Bauman89401822014-05-06 15:04:28 -04001094 }
1095
Nicolas Capens19336542016-09-26 10:32:29 -04001096 Value *Nucleus::getArgument(unsigned int index)
John Bauman89401822014-05-06 15:04:28 -04001097 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001098 llvm::Function::arg_iterator args = ::function->arg_begin();
John Bauman89401822014-05-06 15:04:28 -04001099
1100 while(index)
1101 {
1102 args++;
1103 index--;
1104 }
1105
Nicolas Capens19336542016-09-26 10:32:29 -04001106 return V(&*args);
John Bauman89401822014-05-06 15:04:28 -04001107 }
1108
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001109 void Nucleus::createRetVoid()
John Bauman89401822014-05-06 15:04:28 -04001110 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001111 RR_DEBUG_INFO_UPDATE_LOC();
1112
Ben Claytonc958b172019-05-02 12:20:59 +01001113 ASSERT_MSG(::function->getReturnType() == T(Void::getType()), "Return type mismatch");
1114
Nicolas Capens0192d152019-03-27 14:46:07 -04001115 // Code generated after this point is unreachable, so any variables
1116 // being read can safely return an undefined value. We have to avoid
1117 // materializing variables after the terminator ret instruction.
1118 Variable::killUnmaterialized();
1119
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001120 ::builder->CreateRetVoid();
John Bauman89401822014-05-06 15:04:28 -04001121 }
1122
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001123 void Nucleus::createRet(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001124 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001125 RR_DEBUG_INFO_UPDATE_LOC();
1126
Ben Claytonc958b172019-05-02 12:20:59 +01001127 ASSERT_MSG(::function->getReturnType() == V(v)->getType(), "Return type mismatch");
1128
Nicolas Capens0192d152019-03-27 14:46:07 -04001129 // Code generated after this point is unreachable, so any variables
1130 // being read can safely return an undefined value. We have to avoid
1131 // materializing variables after the terminator ret instruction.
1132 Variable::killUnmaterialized();
1133
Logan Chien191b3052018-08-31 16:57:15 +08001134 ::builder->CreateRet(V(v));
John Bauman89401822014-05-06 15:04:28 -04001135 }
1136
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001137 void Nucleus::createBr(BasicBlock *dest)
John Bauman89401822014-05-06 15:04:28 -04001138 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001139 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens0192d152019-03-27 14:46:07 -04001140 Variable::materializeAll();
1141
Logan Chien191b3052018-08-31 16:57:15 +08001142 ::builder->CreateBr(B(dest));
John Bauman89401822014-05-06 15:04:28 -04001143 }
1144
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001145 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001146 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001147 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens0192d152019-03-27 14:46:07 -04001148 Variable::materializeAll();
Logan Chien191b3052018-08-31 16:57:15 +08001149 ::builder->CreateCondBr(V(cond), B(ifTrue), B(ifFalse));
John Bauman89401822014-05-06 15:04:28 -04001150 }
1151
1152 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1153 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001154 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001155 return V(::builder->CreateAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001156 }
1157
1158 Value *Nucleus::createSub(Value *lhs, Value *rhs)
1159 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001160 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001161 return V(::builder->CreateSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001162 }
1163
1164 Value *Nucleus::createMul(Value *lhs, Value *rhs)
1165 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001166 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001167 return V(::builder->CreateMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001168 }
1169
1170 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1171 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001172 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001173 return V(::builder->CreateUDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001174 }
1175
1176 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1177 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001178 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001179 return V(::builder->CreateSDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001180 }
1181
1182 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1183 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001184 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001185 return V(::builder->CreateFAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001186 }
1187
1188 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1189 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001190 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001191 return V(::builder->CreateFSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001192 }
1193
1194 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1195 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001196 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001197 return V(::builder->CreateFMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001198 }
1199
1200 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1201 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001202 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001203 return V(::builder->CreateFDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001204 }
1205
1206 Value *Nucleus::createURem(Value *lhs, Value *rhs)
1207 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001208 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001209 return V(::builder->CreateURem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001210 }
1211
1212 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1213 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001214 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001215 return V(::builder->CreateSRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001216 }
1217
1218 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1219 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001220 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001221 return V(::builder->CreateFRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001222 }
1223
1224 Value *Nucleus::createShl(Value *lhs, Value *rhs)
1225 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001226 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001227 return V(::builder->CreateShl(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001228 }
1229
1230 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1231 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001232 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001233 return V(::builder->CreateLShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001234 }
1235
1236 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1237 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001238 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001239 return V(::builder->CreateAShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001240 }
1241
1242 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1243 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001244 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001245 return V(::builder->CreateAnd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001246 }
1247
1248 Value *Nucleus::createOr(Value *lhs, Value *rhs)
1249 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001250 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001251 return V(::builder->CreateOr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001252 }
1253
1254 Value *Nucleus::createXor(Value *lhs, Value *rhs)
1255 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001256 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001257 return V(::builder->CreateXor(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001258 }
1259
Nicolas Capens19336542016-09-26 10:32:29 -04001260 Value *Nucleus::createNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001261 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001262 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001263 return V(::builder->CreateNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001264 }
1265
Nicolas Capens19336542016-09-26 10:32:29 -04001266 Value *Nucleus::createFNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001267 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001268 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001269 return V(::builder->CreateFNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001270 }
1271
Nicolas Capens19336542016-09-26 10:32:29 -04001272 Value *Nucleus::createNot(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001273 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001274 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001275 return V(::builder->CreateNot(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001276 }
1277
Nicolas Capens86509d92019-03-21 13:23:50 -04001278 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001279 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001280 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001281 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001282 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001283 case Type_v2i32:
1284 case Type_v4i16:
1285 case Type_v8i8:
1286 case Type_v2f32:
1287 return createBitCast(
1288 createInsertElement(
1289 V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))),
Nicolas Capens86509d92019-03-21 13:23:50 -04001290 createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment, atomic, memoryOrder),
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001291 0),
1292 type);
1293 case Type_v2i16:
1294 case Type_v4i8:
1295 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001296 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001297 Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
Nicolas Capens86509d92019-03-21 13:23:50 -04001298 Value *i = createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001299 i = createZExt(i, Long::getType());
1300 Value *v = createInsertElement(u, i, 0);
1301 return createBitCast(v, type);
Nicolas Capens01a97962017-07-28 17:30:51 -04001302 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001303 // Fallthrough to non-emulated case.
1304 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001305 {
Ben Clayton99e57192019-05-03 13:25:08 +01001306 auto elTy = T(type);
1307 ASSERT(V(ptr)->getType()->getContainedType(0) == elTy);
1308 if (atomic && !(elTy->isIntegerTy() || elTy->isPointerTy() || elTy->isFloatTy()))
1309 {
1310 // atomic load operand must have integer, pointer, or floating point type
1311 // Fall back to using:
1312 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1313 auto sizetTy = ::llvm::IntegerType::get(*::context, sizeof(size_t) * 8);
1314 auto intTy = ::llvm::IntegerType::get(*::context, sizeof(int) * 8);
1315 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1316 auto i8PtrTy = i8Ty->getPointerTo();
1317 auto voidTy = ::llvm::Type::getVoidTy(*::context);
1318 auto funcTy = ::llvm::FunctionType::get(voidTy, {sizetTy, i8PtrTy, i8PtrTy, intTy}, false);
1319 auto func = ::module->getOrInsertFunction("__atomic_load", funcTy);
1320 auto size = ::module->getDataLayout().getTypeStoreSize(elTy);
1321 auto out = allocateStackVariable(type);
1322 ::builder->CreateCall(func, {
1323 ::llvm::ConstantInt::get(sizetTy, size),
1324 ::builder->CreatePointerCast(V(ptr), i8PtrTy),
1325 ::builder->CreatePointerCast(V(out), i8PtrTy),
1326 ::llvm::ConstantInt::get(intTy, uint64_t(atomicOrdering(true, memoryOrder))),
1327 });
1328 return V(::builder->CreateLoad(V(out)));
1329 }
1330 else
1331 {
1332 auto load = new llvm::LoadInst(V(ptr), "", isVolatile, alignment);
1333 load->setAtomic(atomicOrdering(atomic, memoryOrder));
1334 return V(::builder->Insert(load));
1335 }
Nicolas Capens86509d92019-03-21 13:23:50 -04001336 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001337 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001338 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1339 return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001340 }
John Bauman89401822014-05-06 15:04:28 -04001341 }
1342
Nicolas Capens86509d92019-03-21 13:23:50 -04001343 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001344 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001345 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001346 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001347 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001348 case Type_v2i32:
1349 case Type_v4i16:
1350 case Type_v8i8:
1351 case Type_v2f32:
1352 createStore(
1353 createExtractElement(
1354 createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0),
1355 createBitCast(ptr, Pointer<Long>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001356 Long::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001357 return value;
1358 case Type_v2i16:
1359 case Type_v4i8:
1360 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001361 {
Logan Chien191b3052018-08-31 16:57:15 +08001362 createStore(
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001363 createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0),
1364 createBitCast(ptr, Pointer<Int>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001365 Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens01a97962017-07-28 17:30:51 -04001366 return value;
Nicolas Capens01a97962017-07-28 17:30:51 -04001367 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001368 // Fallthrough to non-emulated case.
1369 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001370 {
Ben Clayton99e57192019-05-03 13:25:08 +01001371 auto elTy = T(type);
1372 ASSERT(V(ptr)->getType()->getContainedType(0) == elTy);
1373 if (atomic && !(elTy->isIntegerTy() || elTy->isPointerTy() || elTy->isFloatTy()))
1374 {
1375 // atomic store operand must have integer, pointer, or floating point type
1376 // Fall back to using:
1377 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1378 auto sizetTy = ::llvm::IntegerType::get(*::context, sizeof(size_t) * 8);
1379 auto intTy = ::llvm::IntegerType::get(*::context, sizeof(int) * 8);
1380 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1381 auto i8PtrTy = i8Ty->getPointerTo();
1382 auto voidTy = ::llvm::Type::getVoidTy(*::context);
1383 auto funcTy = ::llvm::FunctionType::get(voidTy, {sizetTy, i8PtrTy, i8PtrTy, intTy}, false);
1384 auto func = ::module->getOrInsertFunction("__atomic_store", funcTy);
1385 auto size = ::module->getDataLayout().getTypeStoreSize(elTy);
1386 auto copy = allocateStackVariable(type);
1387 ::builder->CreateStore(V(value), V(copy));
1388 ::builder->CreateCall(func, {
1389 ::llvm::ConstantInt::get(sizetTy, size),
1390 ::builder->CreatePointerCast(V(ptr), i8PtrTy),
1391 ::builder->CreatePointerCast(V(copy), i8PtrTy),
1392 ::llvm::ConstantInt::get(intTy, uint64_t(atomicOrdering(true, memoryOrder))),
1393 });
1394 }
1395 else
1396 {
1397 auto store = ::builder->Insert(new llvm::StoreInst(V(value), V(ptr), isVolatile, alignment));
1398 store->setAtomic(atomicOrdering(atomic, memoryOrder));
1399 }
Nicolas Capens86509d92019-03-21 13:23:50 -04001400
1401 return value;
1402 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001403 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001404 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1405 return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001406 }
John Bauman89401822014-05-06 15:04:28 -04001407 }
1408
Ben Clayton0fc611f2019-04-18 11:23:27 -04001409 Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment)
1410 {
1411 ASSERT(V(base)->getType()->isPointerTy());
1412 ASSERT(V(offsets)->getType()->isVectorTy());
1413 ASSERT(V(mask)->getType()->isVectorTy());
1414
1415 auto numEls = V(mask)->getType()->getVectorNumElements();
1416 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
1417 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
1418 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1419 auto i8PtrTy = i8Ty->getPointerTo();
1420 auto elPtrTy = T(elTy)->getPointerTo();
1421 auto elVecTy = ::llvm::VectorType::get(T(elTy), numEls);
1422 auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
1423 auto i8Base = ::builder->CreatePointerCast(V(base), i8PtrTy);
1424 auto i8Ptrs = ::builder->CreateGEP(i8Base, V(offsets));
1425 auto elPtrs = ::builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
1426 auto i8Mask = ::builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
1427 auto passthrough = ::llvm::Constant::getNullValue(elVecTy);
1428 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
1429 auto func = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::masked_gather, { elVecTy, elPtrVecTy } );
1430 return V(::builder->CreateCall(func, { elPtrs, align, i8Mask, passthrough }));
1431 }
1432
1433 void Nucleus::createScatter(Value *base, Value *val, Value *offsets, Value *mask, unsigned int alignment)
1434 {
1435 ASSERT(V(base)->getType()->isPointerTy());
1436 ASSERT(V(val)->getType()->isVectorTy());
1437 ASSERT(V(offsets)->getType()->isVectorTy());
1438 ASSERT(V(mask)->getType()->isVectorTy());
1439
1440 auto numEls = V(mask)->getType()->getVectorNumElements();
1441 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
1442 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
1443 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1444 auto i8PtrTy = i8Ty->getPointerTo();
1445 auto elVecTy = V(val)->getType();
1446 auto elTy = elVecTy->getVectorElementType();
1447 auto elPtrTy = elTy->getPointerTo();
1448 auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
1449 auto i8Base = ::builder->CreatePointerCast(V(base), i8PtrTy);
1450 auto i8Ptrs = ::builder->CreateGEP(i8Base, V(offsets));
1451 auto elPtrs = ::builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
1452 auto i8Mask = ::builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
1453 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
1454 auto func = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::masked_scatter, { elVecTy, elPtrVecTy } );
1455 ::builder->CreateCall(func, { V(val), elPtrs, align, i8Mask });
1456 }
1457
Ben Claytonb16c5862019-05-08 14:01:38 +01001458 void Nucleus::createFence(std::memory_order memoryOrder)
1459 {
1460 ::builder->CreateFence(atomicOrdering(true, memoryOrder));
1461 }
1462
Nicolas Capensd294def2017-01-26 17:44:37 -08001463 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
John Bauman89401822014-05-06 15:04:28 -04001464 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001465 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytoneb50d252019-04-15 13:50:01 -04001466 ASSERT(V(ptr)->getType()->getContainedType(0) == T(type));
Nicolas Capens01a97962017-07-28 17:30:51 -04001467 if(sizeof(void*) == 8)
Nicolas Capensd294def2017-01-26 17:44:37 -08001468 {
Ben Claytonb1243732019-02-27 23:56:18 +00001469 // LLVM manual: "When indexing into an array, pointer or vector,
1470 // integers of any width are allowed, and they are not required to
1471 // be constant. These integers are treated as signed values where
1472 // relevant."
1473 //
1474 // Thus if we want indexes to be treated as unsigned we have to
1475 // zero-extend them ourselves.
1476 //
1477 // Note that this is not because we want to address anywhere near
1478 // 4 GB of data. Instead this is important for performance because
1479 // x86 supports automatic zero-extending of 32-bit registers to
1480 // 64-bit. Thus when indexing into an array using a uint32 is
1481 // actually faster than an int32.
1482 index = unsignedIndex ?
1483 createZExt(index, Long::getType()) :
1484 createSExt(index, Long::getType());
Nicolas Capens01a97962017-07-28 17:30:51 -04001485 }
Ben Claytonb1243732019-02-27 23:56:18 +00001486
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001487 // For non-emulated types we can rely on LLVM's GEP to calculate the
1488 // effective address correctly.
1489 if(asInternalType(type) == Type_LLVM)
Nicolas Capens01a97962017-07-28 17:30:51 -04001490 {
Ben Claytonb1243732019-02-27 23:56:18 +00001491 return V(::builder->CreateGEP(V(ptr), V(index)));
Nicolas Capensd294def2017-01-26 17:44:37 -08001492 }
1493
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001494 // For emulated types we have to multiply the index by the intended
1495 // type size ourselves to obain the byte offset.
Ben Claytonb1243732019-02-27 23:56:18 +00001496 index = (sizeof(void*) == 8) ?
1497 createMul(index, createConstantLong((int64_t)typeSize(type))) :
1498 createMul(index, createConstantInt((int)typeSize(type)));
1499
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001500 // Cast to a byte pointer, apply the byte offset, and cast back to the
1501 // original pointer type.
Logan Chien191b3052018-08-31 16:57:15 +08001502 return createBitCast(
1503 V(::builder->CreateGEP(V(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0)))), V(index))),
1504 T(llvm::PointerType::get(T(type), 0)));
John Bauman89401822014-05-06 15:04:28 -04001505 }
1506
Chris Forbes17813932019-04-18 11:45:54 -07001507 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
John Bauman19bac1e2014-05-06 15:23:49 -04001508 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001509 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbes17813932019-04-18 11:45:54 -07001510 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1511 }
1512
Chris Forbes707ed992019-04-18 18:17:35 -07001513 Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1514 {
1515 RR_DEBUG_INFO_UPDATE_LOC();
1516 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Sub, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1517 }
1518
Chris Forbes17813932019-04-18 11:45:54 -07001519 Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1520 {
1521 RR_DEBUG_INFO_UPDATE_LOC();
1522 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::And, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1523 }
1524
1525 Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1526 {
1527 RR_DEBUG_INFO_UPDATE_LOC();
1528 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Or, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1529 }
1530
1531 Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1532 {
1533 RR_DEBUG_INFO_UPDATE_LOC();
1534 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xor, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1535 }
1536
1537 Value *Nucleus::createAtomicMin(Value *ptr, Value *value, std::memory_order memoryOrder)
1538 {
1539 RR_DEBUG_INFO_UPDATE_LOC();
1540 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Min, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1541 }
1542
1543 Value *Nucleus::createAtomicMax(Value *ptr, Value *value, std::memory_order memoryOrder)
1544 {
1545 RR_DEBUG_INFO_UPDATE_LOC();
1546 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Max, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1547 }
1548
1549 Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1550 {
1551 RR_DEBUG_INFO_UPDATE_LOC();
1552 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
John Bauman19bac1e2014-05-06 15:23:49 -04001553 }
1554
Chris Forbesa16238d2019-04-18 16:31:54 -07001555 Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1556 {
1557 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbesc9ca99e2019-04-19 07:53:34 -07001558 // Note: AtomicCmpXchgInstruction returns a 2-member struct containing {result, success-flag}, not the result directly.
Chris Forbesa16238d2019-04-18 16:31:54 -07001559 return V(::builder->CreateExtractValue(
1560 ::builder->CreateAtomicCmpXchg(V(ptr), V(compare), V(value), atomicOrdering(true, memoryOrderEqual), atomicOrdering(true, memoryOrderUnequal)),
1561 llvm::ArrayRef<unsigned>(0u)));
1562 }
1563
Nicolas Capens19336542016-09-26 10:32:29 -04001564 Value *Nucleus::createTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001565 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001566 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001567 return V(::builder->CreateTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001568 }
1569
Nicolas Capens19336542016-09-26 10:32:29 -04001570 Value *Nucleus::createZExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001571 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001572 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001573 return V(::builder->CreateZExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001574 }
1575
Nicolas Capens19336542016-09-26 10:32:29 -04001576 Value *Nucleus::createSExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001577 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001578 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001579 return V(::builder->CreateSExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001580 }
1581
Nicolas Capens19336542016-09-26 10:32:29 -04001582 Value *Nucleus::createFPToSI(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001583 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001584 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001585 return V(::builder->CreateFPToSI(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001586 }
1587
Nicolas Capens19336542016-09-26 10:32:29 -04001588 Value *Nucleus::createSIToFP(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001589 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001590 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001591 return V(::builder->CreateSIToFP(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001592 }
1593
Nicolas Capens19336542016-09-26 10:32:29 -04001594 Value *Nucleus::createFPTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001595 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001596 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001597 return V(::builder->CreateFPTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001598 }
1599
Nicolas Capens19336542016-09-26 10:32:29 -04001600 Value *Nucleus::createFPExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001601 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001602 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001603 return V(::builder->CreateFPExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001604 }
1605
Nicolas Capens19336542016-09-26 10:32:29 -04001606 Value *Nucleus::createBitCast(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001607 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001608 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens01a97962017-07-28 17:30:51 -04001609 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1610 // support for casting between scalars and wide vectors. Emulate them by writing to the stack and
1611 // reading back as the destination type.
Logan Chien191b3052018-08-31 16:57:15 +08001612 if(!V(v)->getType()->isVectorTy() && T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001613 {
1614 Value *readAddress = allocateStackVariable(destType);
Logan Chien191b3052018-08-31 16:57:15 +08001615 Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(V(v)->getType(), 0)));
1616 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001617 return createLoad(readAddress, destType);
1618 }
Logan Chien191b3052018-08-31 16:57:15 +08001619 else if(V(v)->getType()->isVectorTy() && !T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001620 {
Logan Chien191b3052018-08-31 16:57:15 +08001621 Value *writeAddress = allocateStackVariable(T(V(v)->getType()));
1622 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001623 Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
1624 return createLoad(readAddress, destType);
1625 }
1626
Logan Chien191b3052018-08-31 16:57:15 +08001627 return V(::builder->CreateBitCast(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001628 }
1629
John Bauman89401822014-05-06 15:04:28 -04001630 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1631 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001632 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001633 return V(::builder->CreateICmpEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001634 }
1635
1636 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1637 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001638 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001639 return V(::builder->CreateICmpNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001640 }
1641
1642 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1643 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001644 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001645 return V(::builder->CreateICmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001646 }
1647
1648 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1649 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001650 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001651 return V(::builder->CreateICmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001652 }
1653
1654 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1655 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001656 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001657 return V(::builder->CreateICmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001658 }
1659
1660 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1661 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001662 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001663 return V(::builder->CreateICmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001664 }
1665
1666 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1667 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001668 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001669 return V(::builder->CreateICmpSGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001670 }
1671
1672 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1673 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001674 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001675 return V(::builder->CreateICmpSGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001676 }
1677
1678 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1679 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001680 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001681 return V(::builder->CreateICmpSLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001682 }
1683
1684 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1685 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001686 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001687 return V(::builder->CreateICmpSLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001688 }
1689
1690 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1691 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001692 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001693 return V(::builder->CreateFCmpOEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001694 }
1695
1696 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1697 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001698 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001699 return V(::builder->CreateFCmpOGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001700 }
1701
1702 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1703 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001704 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001705 return V(::builder->CreateFCmpOGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001706 }
1707
1708 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1709 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001710 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001711 return V(::builder->CreateFCmpOLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001712 }
1713
1714 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1715 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001716 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001717 return V(::builder->CreateFCmpOLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001718 }
1719
1720 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1721 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001722 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001723 return V(::builder->CreateFCmpONE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001724 }
1725
1726 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1727 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001728 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001729 return V(::builder->CreateFCmpORD(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001730 }
1731
1732 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1733 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001734 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001735 return V(::builder->CreateFCmpUNO(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001736 }
1737
1738 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1739 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001740 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001741 return V(::builder->CreateFCmpUEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001742 }
1743
1744 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1745 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001746 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001747 return V(::builder->CreateFCmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001748 }
1749
1750 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1751 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001752 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001753 return V(::builder->CreateFCmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001754 }
1755
1756 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1757 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001758 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001759 return V(::builder->CreateFCmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001760 }
1761
1762 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1763 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001764 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001765 return V(::builder->CreateFCmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001766 }
1767
1768 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1769 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001770 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton71008d82019-03-05 17:17:59 +00001771 return V(::builder->CreateFCmpUNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001772 }
1773
Nicolas Capense95d5342016-09-30 11:37:28 -04001774 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
John Bauman89401822014-05-06 15:04:28 -04001775 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001776 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytoneb50d252019-04-15 13:50:01 -04001777 ASSERT(V(vector)->getType()->getContainedType(0) == T(type));
Logan Chien191b3052018-08-31 16:57:15 +08001778 return V(::builder->CreateExtractElement(V(vector), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04001779 }
1780
1781 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1782 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001783 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001784 return V(::builder->CreateInsertElement(V(vector), V(element), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04001785 }
1786
Logan Chien191b3052018-08-31 16:57:15 +08001787 Value *Nucleus::createShuffleVector(Value *v1, Value *v2, const int *select)
John Bauman89401822014-05-06 15:04:28 -04001788 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001789 RR_DEBUG_INFO_UPDATE_LOC();
1790
Logan Chien191b3052018-08-31 16:57:15 +08001791 int size = llvm::cast<llvm::VectorType>(V(v1)->getType())->getNumElements();
Nicolas Capense89cd582016-09-30 14:23:47 -04001792 const int maxSize = 16;
1793 llvm::Constant *swizzle[maxSize];
Ben Claytoneb50d252019-04-15 13:50:01 -04001794 ASSERT(size <= maxSize);
Nicolas Capense89cd582016-09-30 14:23:47 -04001795
1796 for(int i = 0; i < size; i++)
1797 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001798 swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]);
Nicolas Capense89cd582016-09-30 14:23:47 -04001799 }
1800
1801 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
1802
Logan Chien191b3052018-08-31 16:57:15 +08001803 return V(::builder->CreateShuffleVector(V(v1), V(v2), shuffle));
John Bauman89401822014-05-06 15:04:28 -04001804 }
1805
Logan Chien191b3052018-08-31 16:57:15 +08001806 Value *Nucleus::createSelect(Value *c, Value *ifTrue, Value *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001807 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001808 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001809 return V(::builder->CreateSelect(V(c), V(ifTrue), V(ifFalse)));
John Bauman89401822014-05-06 15:04:28 -04001810 }
1811
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001812 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
John Bauman89401822014-05-06 15:04:28 -04001813 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001814 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001815 return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(V(control), B(defaultBranch), numCases));
John Bauman89401822014-05-06 15:04:28 -04001816 }
1817
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001818 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
John Bauman89401822014-05-06 15:04:28 -04001819 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001820 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001821 llvm::SwitchInst *sw = reinterpret_cast<llvm::SwitchInst *>(switchCases);
1822 sw->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), B(branch));
John Bauman89401822014-05-06 15:04:28 -04001823 }
1824
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001825 void Nucleus::createUnreachable()
John Bauman89401822014-05-06 15:04:28 -04001826 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001827 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001828 ::builder->CreateUnreachable();
John Bauman89401822014-05-06 15:04:28 -04001829 }
1830
Nicolas Capensac230122016-09-20 14:30:06 -04001831 Type *Nucleus::getPointerType(Type *ElementType)
John Bauman89401822014-05-06 15:04:28 -04001832 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001833 return T(llvm::PointerType::get(T(ElementType), 0));
John Bauman89401822014-05-06 15:04:28 -04001834 }
1835
Nicolas Capens13ac2322016-10-13 14:52:12 -04001836 Value *Nucleus::createNullValue(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04001837 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001838 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001839 return V(llvm::Constant::getNullValue(T(Ty)));
John Bauman89401822014-05-06 15:04:28 -04001840 }
1841
Nicolas Capens13ac2322016-10-13 14:52:12 -04001842 Value *Nucleus::createConstantLong(int64_t i)
John Bauman89401822014-05-06 15:04:28 -04001843 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001844 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001845 return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001846 }
1847
Nicolas Capens13ac2322016-10-13 14:52:12 -04001848 Value *Nucleus::createConstantInt(int i)
John Bauman89401822014-05-06 15:04:28 -04001849 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001850 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001851 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001852 }
1853
Nicolas Capens13ac2322016-10-13 14:52:12 -04001854 Value *Nucleus::createConstantInt(unsigned int i)
John Bauman89401822014-05-06 15:04:28 -04001855 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001856 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001857 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001858 }
1859
Nicolas Capens13ac2322016-10-13 14:52:12 -04001860 Value *Nucleus::createConstantBool(bool b)
John Bauman89401822014-05-06 15:04:28 -04001861 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001862 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001863 return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b));
John Bauman89401822014-05-06 15:04:28 -04001864 }
1865
Nicolas Capens13ac2322016-10-13 14:52:12 -04001866 Value *Nucleus::createConstantByte(signed char i)
John Bauman89401822014-05-06 15:04:28 -04001867 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001868 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001869 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001870 }
1871
Nicolas Capens13ac2322016-10-13 14:52:12 -04001872 Value *Nucleus::createConstantByte(unsigned char i)
John Bauman89401822014-05-06 15:04:28 -04001873 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001874 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001875 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001876 }
1877
Nicolas Capens13ac2322016-10-13 14:52:12 -04001878 Value *Nucleus::createConstantShort(short i)
John Bauman89401822014-05-06 15:04:28 -04001879 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001880 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001881 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001882 }
1883
Nicolas Capens13ac2322016-10-13 14:52:12 -04001884 Value *Nucleus::createConstantShort(unsigned short i)
John Bauman89401822014-05-06 15:04:28 -04001885 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001886 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001887 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001888 }
1889
Nicolas Capens13ac2322016-10-13 14:52:12 -04001890 Value *Nucleus::createConstantFloat(float x)
John Bauman89401822014-05-06 15:04:28 -04001891 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001892 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001893 return V(llvm::ConstantFP::get(T(Float::getType()), x));
John Bauman89401822014-05-06 15:04:28 -04001894 }
1895
Nicolas Capens13ac2322016-10-13 14:52:12 -04001896 Value *Nucleus::createNullPointer(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04001897 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001898 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001899 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0)));
John Bauman89401822014-05-06 15:04:28 -04001900 }
1901
Nicolas Capens13ac2322016-10-13 14:52:12 -04001902 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
John Bauman89401822014-05-06 15:04:28 -04001903 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001904 ASSERT(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04001905 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1906 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
Ben Claytoneb50d252019-04-15 13:50:01 -04001907 ASSERT(numElements <= 16 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001908 llvm::Constant *constantVector[16];
1909
Nicolas Capens69674fb2017-09-01 11:08:44 -04001910 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04001911 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04001912 constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001913 }
1914
Nicolas Capens69674fb2017-09-01 11:08:44 -04001915 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
Nicolas Capens13ac2322016-10-13 14:52:12 -04001916 }
1917
1918 Value *Nucleus::createConstantVector(const double *constants, Type *type)
1919 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001920 ASSERT(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04001921 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1922 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
Ben Claytoneb50d252019-04-15 13:50:01 -04001923 ASSERT(numElements <= 8 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001924 llvm::Constant *constantVector[8];
1925
Nicolas Capens69674fb2017-09-01 11:08:44 -04001926 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04001927 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04001928 constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001929 }
1930
Nicolas Capens69674fb2017-09-01 11:08:44 -04001931 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
John Bauman89401822014-05-06 15:04:28 -04001932 }
1933
John Bauman19bac1e2014-05-06 15:23:49 -04001934 Type *Void::getType()
John Bauman89401822014-05-06 15:04:28 -04001935 {
Nicolas Capensac230122016-09-20 14:30:06 -04001936 return T(llvm::Type::getVoidTy(*::context));
John Bauman89401822014-05-06 15:04:28 -04001937 }
1938
John Bauman19bac1e2014-05-06 15:23:49 -04001939 Type *Bool::getType()
John Bauman89401822014-05-06 15:04:28 -04001940 {
Nicolas Capensac230122016-09-20 14:30:06 -04001941 return T(llvm::Type::getInt1Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001942 }
1943
John Bauman19bac1e2014-05-06 15:23:49 -04001944 Type *Byte::getType()
John Bauman89401822014-05-06 15:04:28 -04001945 {
Nicolas Capensac230122016-09-20 14:30:06 -04001946 return T(llvm::Type::getInt8Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001947 }
1948
John Bauman19bac1e2014-05-06 15:23:49 -04001949 Type *SByte::getType()
John Bauman89401822014-05-06 15:04:28 -04001950 {
Nicolas Capensac230122016-09-20 14:30:06 -04001951 return T(llvm::Type::getInt8Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001952 }
1953
John Bauman19bac1e2014-05-06 15:23:49 -04001954 Type *Short::getType()
John Bauman89401822014-05-06 15:04:28 -04001955 {
Nicolas Capensac230122016-09-20 14:30:06 -04001956 return T(llvm::Type::getInt16Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001957 }
1958
John Bauman19bac1e2014-05-06 15:23:49 -04001959 Type *UShort::getType()
John Bauman89401822014-05-06 15:04:28 -04001960 {
Nicolas Capensac230122016-09-20 14:30:06 -04001961 return T(llvm::Type::getInt16Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001962 }
1963
John Bauman19bac1e2014-05-06 15:23:49 -04001964 Type *Byte4::getType()
John Bauman89401822014-05-06 15:04:28 -04001965 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001966 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04001967 }
1968
John Bauman19bac1e2014-05-06 15:23:49 -04001969 Type *SByte4::getType()
John Bauman89401822014-05-06 15:04:28 -04001970 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001971 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04001972 }
1973
John Bauman19bac1e2014-05-06 15:23:49 -04001974 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001975 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001976 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001977#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001978 return x86::paddusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001979#else
1980 return As<Byte8>(V(lowerPUADDSAT(V(x.value), V(y.value))));
1981#endif
John Bauman89401822014-05-06 15:04:28 -04001982 }
John Bauman66b8ab22014-05-06 15:57:45 -04001983
John Bauman19bac1e2014-05-06 15:23:49 -04001984 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001985 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001986 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001987#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001988 return x86::psubusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001989#else
1990 return As<Byte8>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
1991#endif
John Bauman89401822014-05-06 15:04:28 -04001992 }
1993
John Bauman19bac1e2014-05-06 15:23:49 -04001994 RValue<Int> SignMask(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04001995 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001996 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001997#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001998 return x86::pmovmskb(x);
Logan Chiene3191012018-08-24 22:01:50 +08001999#else
2000 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2001#endif
John Bauman89401822014-05-06 15:04:28 -04002002 }
2003
John Bauman19bac1e2014-05-06 15:23:49 -04002004// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002005// {
Logan Chiene3191012018-08-24 22:01:50 +08002006//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002007// return x86::pcmpgtb(x, y); // FIXME: Signedness
Logan Chiene3191012018-08-24 22:01:50 +08002008//#else
2009// return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
2010//#endif
John Bauman89401822014-05-06 15:04:28 -04002011// }
John Bauman66b8ab22014-05-06 15:57:45 -04002012
John Bauman19bac1e2014-05-06 15:23:49 -04002013 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002014 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002015 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002016#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002017 return x86::pcmpeqb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002018#else
2019 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
2020#endif
John Bauman89401822014-05-06 15:04:28 -04002021 }
2022
John Bauman19bac1e2014-05-06 15:23:49 -04002023 Type *Byte8::getType()
John Bauman89401822014-05-06 15:04:28 -04002024 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002025 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04002026 }
2027
John Bauman19bac1e2014-05-06 15:23:49 -04002028 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002029 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002030 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002031#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002032 return x86::paddsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002033#else
2034 return As<SByte8>(V(lowerPSADDSAT(V(x.value), V(y.value))));
2035#endif
John Bauman89401822014-05-06 15:04:28 -04002036 }
John Bauman66b8ab22014-05-06 15:57:45 -04002037
John Bauman19bac1e2014-05-06 15:23:49 -04002038 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002039 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002040 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002041#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002042 return x86::psubsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002043#else
2044 return As<SByte8>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
2045#endif
John Bauman89401822014-05-06 15:04:28 -04002046 }
2047
John Bauman19bac1e2014-05-06 15:23:49 -04002048 RValue<Int> SignMask(RValue<SByte8> x)
John Bauman89401822014-05-06 15:04:28 -04002049 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002050 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002051#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002052 return x86::pmovmskb(As<Byte8>(x));
Logan Chiene3191012018-08-24 22:01:50 +08002053#else
2054 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2055#endif
John Bauman89401822014-05-06 15:04:28 -04002056 }
2057
John Bauman19bac1e2014-05-06 15:23:49 -04002058 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002059 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002060 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002061#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002062 return x86::pcmpgtb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002063#else
2064 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
2065#endif
John Bauman89401822014-05-06 15:04:28 -04002066 }
John Bauman66b8ab22014-05-06 15:57:45 -04002067
John Bauman19bac1e2014-05-06 15:23:49 -04002068 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002069 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002070 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002071#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002072 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
Logan Chiene3191012018-08-24 22:01:50 +08002073#else
2074 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
2075#endif
John Bauman89401822014-05-06 15:04:28 -04002076 }
2077
John Bauman19bac1e2014-05-06 15:23:49 -04002078 Type *SByte8::getType()
John Bauman89401822014-05-06 15:04:28 -04002079 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002080 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04002081 }
2082
John Bauman19bac1e2014-05-06 15:23:49 -04002083 Type *Byte16::getType()
John Bauman89401822014-05-06 15:04:28 -04002084 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002085 return T(llvm::VectorType::get(T(Byte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04002086 }
2087
John Bauman19bac1e2014-05-06 15:23:49 -04002088 Type *SByte16::getType()
John Bauman89401822014-05-06 15:04:28 -04002089 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002090 return T(llvm::VectorType::get(T(SByte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04002091 }
2092
Nicolas Capens16b5f152016-10-13 13:39:01 -04002093 Type *Short2::getType()
2094 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002095 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04002096 }
2097
Nicolas Capens16b5f152016-10-13 13:39:01 -04002098 Type *UShort2::getType()
2099 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002100 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04002101 }
2102
John Bauman19bac1e2014-05-06 15:23:49 -04002103 Short4::Short4(RValue<Int4> cast)
John Bauman89401822014-05-06 15:04:28 -04002104 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002105 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens01a97962017-07-28 17:30:51 -04002106 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
John Bauman89401822014-05-06 15:04:28 -04002107 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2108
Nicolas Capens01a97962017-07-28 17:30:51 -04002109 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2110 Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
John Bauman89401822014-05-06 15:04:28 -04002111
John Bauman66b8ab22014-05-06 15:57:45 -04002112 storeValue(short4);
John Bauman89401822014-05-06 15:04:28 -04002113 }
2114
John Bauman19bac1e2014-05-06 15:23:49 -04002115// Short4::Short4(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002116// {
2117// }
2118
John Bauman19bac1e2014-05-06 15:23:49 -04002119 Short4::Short4(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04002120 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002121 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002122 Int4 v4i32 = Int4(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002123#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002124 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
Logan Chiene3191012018-08-24 22:01:50 +08002125#else
2126 Value *v = v4i32.loadValue();
2127 v4i32 = As<Int4>(V(lowerPack(V(v), V(v), true)));
2128#endif
John Bauman66b8ab22014-05-06 15:57:45 -04002129
2130 storeValue(As<Short4>(Int2(v4i32)).value);
John Bauman89401822014-05-06 15:04:28 -04002131 }
2132
John Bauman19bac1e2014-05-06 15:23:49 -04002133 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002134 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002135 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002136#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002137 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2138
2139 return x86::psllw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002140#else
2141 return As<Short4>(V(lowerVectorShl(V(lhs.value), rhs)));
2142#endif
John Bauman89401822014-05-06 15:04:28 -04002143 }
2144
John Bauman19bac1e2014-05-06 15:23:49 -04002145 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002146 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002147 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002148#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002149 return x86::psraw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002150#else
2151 return As<Short4>(V(lowerVectorAShr(V(lhs.value), rhs)));
2152#endif
John Bauman89401822014-05-06 15:04:28 -04002153 }
2154
John Bauman19bac1e2014-05-06 15:23:49 -04002155 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002156 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002157 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002158#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002159 return x86::pmaxsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002160#else
2161 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
2162#endif
John Bauman89401822014-05-06 15:04:28 -04002163 }
2164
John Bauman19bac1e2014-05-06 15:23:49 -04002165 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002166 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002167 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002168#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002169 return x86::pminsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002170#else
2171 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
2172#endif
John Bauman89401822014-05-06 15:04:28 -04002173 }
2174
John Bauman19bac1e2014-05-06 15:23:49 -04002175 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002176 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002177 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002178#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002179 return x86::paddsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002180#else
2181 return As<Short4>(V(lowerPSADDSAT(V(x.value), V(y.value))));
2182#endif
John Bauman89401822014-05-06 15:04:28 -04002183 }
2184
John Bauman19bac1e2014-05-06 15:23:49 -04002185 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002186 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002187 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002188#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002189 return x86::psubsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002190#else
2191 return As<Short4>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
2192#endif
John Bauman89401822014-05-06 15:04:28 -04002193 }
2194
John Bauman19bac1e2014-05-06 15:23:49 -04002195 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002196 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002197 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002198#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002199 return x86::pmulhw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002200#else
2201 return As<Short4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2202#endif
John Bauman89401822014-05-06 15:04:28 -04002203 }
2204
John Bauman19bac1e2014-05-06 15:23:49 -04002205 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002206 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002207 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002208#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002209 return x86::pmaddwd(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002210#else
2211 return As<Int2>(V(lowerMulAdd(V(x.value), V(y.value))));
2212#endif
John Bauman89401822014-05-06 15:04:28 -04002213 }
2214
Nicolas Capens33438a62017-09-27 11:47:35 -04002215 RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002216 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002217 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002218#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002219 auto result = x86::packsswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002220#else
2221 auto result = V(lowerPack(V(x.value), V(y.value), true));
2222#endif
Nicolas Capens01a97962017-07-28 17:30:51 -04002223 return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
John Bauman89401822014-05-06 15:04:28 -04002224 }
2225
Nicolas Capens33438a62017-09-27 11:47:35 -04002226 RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2227 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002228 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002229#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04002230 auto result = x86::packuswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002231#else
2232 auto result = V(lowerPack(V(x.value), V(y.value), false));
2233#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04002234 return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
2235 }
2236
John Bauman19bac1e2014-05-06 15:23:49 -04002237 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002238 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002239 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002240#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002241 return x86::pcmpgtw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002242#else
2243 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
2244#endif
John Bauman89401822014-05-06 15:04:28 -04002245 }
2246
John Bauman19bac1e2014-05-06 15:23:49 -04002247 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002248 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002249 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002250#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002251 return x86::pcmpeqw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002252#else
2253 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
2254#endif
John Bauman89401822014-05-06 15:04:28 -04002255 }
2256
John Bauman19bac1e2014-05-06 15:23:49 -04002257 Type *Short4::getType()
John Bauman89401822014-05-06 15:04:28 -04002258 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002259 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002260 }
2261
John Bauman19bac1e2014-05-06 15:23:49 -04002262 UShort4::UShort4(RValue<Float4> cast, bool saturate)
John Bauman89401822014-05-06 15:04:28 -04002263 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002264 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002265 if(saturate)
2266 {
Logan Chiena8385ed2018-09-26 19:22:54 +08002267#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002268 if(CPUID::supportsSSE4_1())
2269 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002270 Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation
Nicolas Capens33438a62017-09-27 11:47:35 -04002271 *this = As<Short4>(PackUnsigned(int4, int4));
John Bauman89401822014-05-06 15:04:28 -04002272 }
2273 else
Logan Chiena8385ed2018-09-26 19:22:54 +08002274#endif
John Bauman89401822014-05-06 15:04:28 -04002275 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002276 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
John Bauman89401822014-05-06 15:04:28 -04002277 }
2278 }
2279 else
2280 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002281 *this = Short4(Int4(cast));
John Bauman89401822014-05-06 15:04:28 -04002282 }
2283 }
2284
John Bauman19bac1e2014-05-06 15:23:49 -04002285 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002286 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002287 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002288#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002289 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2290
2291 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002292#else
2293 return As<UShort4>(V(lowerVectorShl(V(lhs.value), rhs)));
2294#endif
John Bauman89401822014-05-06 15:04:28 -04002295 }
2296
John Bauman19bac1e2014-05-06 15:23:49 -04002297 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002298 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002299 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002300#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002301 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
2302
2303 return x86::psrlw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002304#else
2305 return As<UShort4>(V(lowerVectorLShr(V(lhs.value), rhs)));
2306#endif
John Bauman89401822014-05-06 15:04:28 -04002307 }
2308
John Bauman19bac1e2014-05-06 15:23:49 -04002309 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002310 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002311 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04002312 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002313 }
2314
John Bauman19bac1e2014-05-06 15:23:49 -04002315 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002316 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002317 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04002318 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002319 }
2320
John Bauman19bac1e2014-05-06 15:23:49 -04002321 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002322 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002323 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002324#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002325 return x86::paddusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002326#else
2327 return As<UShort4>(V(lowerPUADDSAT(V(x.value), V(y.value))));
2328#endif
John Bauman89401822014-05-06 15:04:28 -04002329 }
2330
John Bauman19bac1e2014-05-06 15:23:49 -04002331 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002332 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002333 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002334#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002335 return x86::psubusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002336#else
2337 return As<UShort4>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
2338#endif
John Bauman89401822014-05-06 15:04:28 -04002339 }
2340
John Bauman19bac1e2014-05-06 15:23:49 -04002341 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002342 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002343 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002344#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002345 return x86::pmulhuw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002346#else
2347 return As<UShort4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2348#endif
John Bauman89401822014-05-06 15:04:28 -04002349 }
2350
John Bauman19bac1e2014-05-06 15:23:49 -04002351 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002352 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002353 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002354#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002355 return x86::pavgw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002356#else
2357 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
2358#endif
John Bauman89401822014-05-06 15:04:28 -04002359 }
2360
John Bauman19bac1e2014-05-06 15:23:49 -04002361 Type *UShort4::getType()
John Bauman89401822014-05-06 15:04:28 -04002362 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002363 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002364 }
2365
John Bauman19bac1e2014-05-06 15:23:49 -04002366 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002367 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002368 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002369#if defined(__i386__) || defined(__x86_64__)
2370 return x86::psllw(lhs, rhs);
2371#else
2372 return As<Short8>(V(lowerVectorShl(V(lhs.value), rhs)));
2373#endif
John Bauman89401822014-05-06 15:04:28 -04002374 }
2375
John Bauman19bac1e2014-05-06 15:23:49 -04002376 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002377 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002378 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002379#if defined(__i386__) || defined(__x86_64__)
2380 return x86::psraw(lhs, rhs);
2381#else
2382 return As<Short8>(V(lowerVectorAShr(V(lhs.value), rhs)));
2383#endif
John Bauman89401822014-05-06 15:04:28 -04002384 }
2385
John Bauman19bac1e2014-05-06 15:23:49 -04002386 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002387 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002388 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002389#if defined(__i386__) || defined(__x86_64__)
2390 return x86::pmaddwd(x, y);
2391#else
2392 return As<Int4>(V(lowerMulAdd(V(x.value), V(y.value))));
2393#endif
John Bauman89401822014-05-06 15:04:28 -04002394 }
2395
John Bauman19bac1e2014-05-06 15:23:49 -04002396 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002397 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002398 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002399#if defined(__i386__) || defined(__x86_64__)
2400 return x86::pmulhw(x, y);
2401#else
2402 return As<Short8>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2403#endif
John Bauman89401822014-05-06 15:04:28 -04002404 }
2405
John Bauman19bac1e2014-05-06 15:23:49 -04002406 Type *Short8::getType()
John Bauman89401822014-05-06 15:04:28 -04002407 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002408 return T(llvm::VectorType::get(T(Short::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002409 }
2410
John Bauman19bac1e2014-05-06 15:23:49 -04002411 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002412 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002413 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002414#if defined(__i386__) || defined(__x86_64__)
2415 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));
2416#else
2417 return As<UShort8>(V(lowerVectorShl(V(lhs.value), rhs)));
2418#endif
John Bauman89401822014-05-06 15:04:28 -04002419 }
2420
John Bauman19bac1e2014-05-06 15:23:49 -04002421 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002422 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002423 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002424#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002425 return x86::psrlw(lhs, rhs); // FIXME: Fallback required
Logan Chiene3191012018-08-24 22:01:50 +08002426#else
2427 return As<UShort8>(V(lowerVectorLShr(V(lhs.value), rhs)));
2428#endif
John Bauman89401822014-05-06 15:04:28 -04002429 }
2430
John Bauman19bac1e2014-05-06 15:23:49 -04002431 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
John Bauman89401822014-05-06 15:04:28 -04002432 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002433 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capense89cd582016-09-30 14:23:47 -04002434 int pshufb[16] =
2435 {
2436 select0 + 0,
2437 select0 + 1,
2438 select1 + 0,
2439 select1 + 1,
2440 select2 + 0,
2441 select2 + 1,
2442 select3 + 0,
2443 select3 + 1,
2444 select4 + 0,
2445 select4 + 1,
2446 select5 + 0,
2447 select5 + 1,
2448 select6 + 0,
2449 select6 + 1,
2450 select7 + 0,
2451 select7 + 1,
2452 };
John Bauman89401822014-05-06 15:04:28 -04002453
2454 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
Nicolas Capense89cd582016-09-30 14:23:47 -04002455 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
John Bauman89401822014-05-06 15:04:28 -04002456 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
2457
2458 return RValue<UShort8>(short8);
2459 }
2460
John Bauman19bac1e2014-05-06 15:23:49 -04002461 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04002462 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002463 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002464#if defined(__i386__) || defined(__x86_64__)
2465 return x86::pmulhuw(x, y);
2466#else
2467 return As<UShort8>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2468#endif
John Bauman89401822014-05-06 15:04:28 -04002469 }
2470
John Bauman19bac1e2014-05-06 15:23:49 -04002471 Type *UShort8::getType()
John Bauman89401822014-05-06 15:04:28 -04002472 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002473 return T(llvm::VectorType::get(T(UShort::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002474 }
2475
Nicolas Capens96d4e092016-11-18 14:22:38 -05002476 RValue<Int> operator++(Int &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002477 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002478 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002479 RValue<Int> res = val;
2480
Logan Chien191b3052018-08-31 16:57:15 +08002481 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002482 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002483
2484 return res;
2485 }
2486
Nicolas Capens96d4e092016-11-18 14:22:38 -05002487 const Int &operator++(Int &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002488 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002489 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002490 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002491 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002492
2493 return val;
2494 }
2495
Nicolas Capens96d4e092016-11-18 14:22:38 -05002496 RValue<Int> operator--(Int &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002497 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002498 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002499 RValue<Int> res = val;
2500
Logan Chien191b3052018-08-31 16:57:15 +08002501 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002502 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002503
2504 return res;
2505 }
2506
Nicolas Capens96d4e092016-11-18 14:22:38 -05002507 const Int &operator--(Int &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002508 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002509 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002510 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002511 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002512
2513 return val;
2514 }
2515
John Bauman19bac1e2014-05-06 15:23:49 -04002516 RValue<Int> RoundInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002517 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002518 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002519#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002520 return x86::cvtss2si(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002521#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002522 return RValue<Int>(V(lowerRoundInt(V(cast.value), T(Int::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002523#endif
John Bauman89401822014-05-06 15:04:28 -04002524 }
2525
John Bauman19bac1e2014-05-06 15:23:49 -04002526 Type *Int::getType()
John Bauman89401822014-05-06 15:04:28 -04002527 {
Nicolas Capensac230122016-09-20 14:30:06 -04002528 return T(llvm::Type::getInt32Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002529 }
2530
John Bauman19bac1e2014-05-06 15:23:49 -04002531 Type *Long::getType()
John Bauman89401822014-05-06 15:04:28 -04002532 {
Nicolas Capensac230122016-09-20 14:30:06 -04002533 return T(llvm::Type::getInt64Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002534 }
2535
John Bauman19bac1e2014-05-06 15:23:49 -04002536 UInt::UInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002537 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002538 RR_DEBUG_INFO_UPDATE_LOC();
Alexis Hetu764d1422016-09-28 08:44:22 -04002539 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2540 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
John Bauman89401822014-05-06 15:04:28 -04002541
Alexis Hetu764d1422016-09-28 08:44:22 -04002542 // Smallest positive value representable in UInt, but not in Int
2543 const unsigned int ustart = 0x80000000u;
2544 const float ustartf = float(ustart);
2545
2546 // If the value is negative, store 0, otherwise store the result of the conversion
2547 storeValue((~(As<Int>(cast) >> 31) &
2548 // Check if the value can be represented as an Int
2549 IfThenElse(cast >= ustartf,
2550 // If the value is too large, subtract ustart and re-add it after conversion.
2551 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
2552 // Otherwise, just convert normally
2553 Int(cast))).value);
John Bauman89401822014-05-06 15:04:28 -04002554 }
2555
Nicolas Capens96d4e092016-11-18 14:22:38 -05002556 RValue<UInt> operator++(UInt &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002557 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002558 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002559 RValue<UInt> res = val;
2560
Logan Chien191b3052018-08-31 16:57:15 +08002561 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002562 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002563
2564 return res;
2565 }
2566
Nicolas Capens96d4e092016-11-18 14:22:38 -05002567 const UInt &operator++(UInt &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002568 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002569 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002570 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002571 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002572
2573 return val;
2574 }
2575
Nicolas Capens96d4e092016-11-18 14:22:38 -05002576 RValue<UInt> operator--(UInt &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002577 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002578 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002579 RValue<UInt> res = val;
2580
Logan Chien191b3052018-08-31 16:57:15 +08002581 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002582 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002583
2584 return res;
2585 }
2586
Nicolas Capens96d4e092016-11-18 14:22:38 -05002587 const UInt &operator--(UInt &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002588 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002589 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002590 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002591 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002592
2593 return val;
2594 }
2595
John Bauman19bac1e2014-05-06 15:23:49 -04002596// RValue<UInt> RoundUInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002597// {
Logan Chiene3191012018-08-24 22:01:50 +08002598//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002599// return x86::cvtss2si(val); // FIXME: Unsigned
Logan Chiene3191012018-08-24 22:01:50 +08002600//#else
2601// return IfThenElse(cast > 0.0f, Int(cast + 0.5f), Int(cast - 0.5f));
2602//#endif
John Bauman89401822014-05-06 15:04:28 -04002603// }
2604
John Bauman19bac1e2014-05-06 15:23:49 -04002605 Type *UInt::getType()
John Bauman89401822014-05-06 15:04:28 -04002606 {
Nicolas Capensac230122016-09-20 14:30:06 -04002607 return T(llvm::Type::getInt32Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002608 }
2609
John Bauman19bac1e2014-05-06 15:23:49 -04002610// Int2::Int2(RValue<Int> cast)
2611// {
John Bauman19bac1e2014-05-06 15:23:49 -04002612// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2613// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
John Bauman66b8ab22014-05-06 15:57:45 -04002614//
Nicolas Capense89cd582016-09-30 14:23:47 -04002615// int shuffle[2] = {0, 0};
2616// Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
John Bauman19bac1e2014-05-06 15:23:49 -04002617//
John Bauman66b8ab22014-05-06 15:57:45 -04002618// storeValue(replicate);
John Bauman19bac1e2014-05-06 15:23:49 -04002619// }
John Bauman89401822014-05-06 15:04:28 -04002620
John Bauman19bac1e2014-05-06 15:23:49 -04002621 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002622 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002623 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002624#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002625 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
2626
2627 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002628#else
2629 return As<Int2>(V(lowerVectorShl(V(lhs.value), rhs)));
2630#endif
John Bauman89401822014-05-06 15:04:28 -04002631 }
2632
John Bauman19bac1e2014-05-06 15:23:49 -04002633 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002634 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002635 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002636#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002637 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
2638
2639 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002640#else
2641 return As<Int2>(V(lowerVectorAShr(V(lhs.value), rhs)));
2642#endif
John Bauman89401822014-05-06 15:04:28 -04002643 }
2644
John Bauman19bac1e2014-05-06 15:23:49 -04002645 Type *Int2::getType()
John Bauman89401822014-05-06 15:04:28 -04002646 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002647 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002648 }
2649
John Bauman19bac1e2014-05-06 15:23:49 -04002650 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002651 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002652 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002653#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002654 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
2655
2656 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002657#else
2658 return As<UInt2>(V(lowerVectorShl(V(lhs.value), rhs)));
2659#endif
John Bauman89401822014-05-06 15:04:28 -04002660 }
2661
John Bauman19bac1e2014-05-06 15:23:49 -04002662 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002663 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002664 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002665#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002666 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
2667
2668 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002669#else
2670 return As<UInt2>(V(lowerVectorLShr(V(lhs.value), rhs)));
2671#endif
John Bauman89401822014-05-06 15:04:28 -04002672 }
2673
John Bauman19bac1e2014-05-06 15:23:49 -04002674 Type *UInt2::getType()
John Bauman89401822014-05-06 15:04:28 -04002675 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002676 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002677 }
2678
Nicolas Capenscb986762017-01-20 11:34:37 -05002679 Int4::Int4(RValue<Byte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002680 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002681 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002682#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002683 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002684 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002685 *this = x86::pmovzxbd(As<Byte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002686 }
2687 else
Logan Chiene3191012018-08-24 22:01:50 +08002688#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002689 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002690 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
Nicolas Capens01a97962017-07-28 17:30:51 -04002691 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
Logan Chien191b3052018-08-31 16:57:15 +08002692 Value *b = Nucleus::createShuffleVector(a, Nucleus::createNullValue(Byte16::getType()), swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002693
Nicolas Capense89cd582016-09-30 14:23:47 -04002694 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04002695 Value *c = Nucleus::createBitCast(b, Short8::getType());
Logan Chien191b3052018-08-31 16:57:15 +08002696 Value *d = Nucleus::createShuffleVector(c, Nucleus::createNullValue(Short8::getType()), swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002697
Nicolas Capens01a97962017-07-28 17:30:51 -04002698 *this = As<Int4>(d);
2699 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002700 }
2701
Nicolas Capenscb986762017-01-20 11:34:37 -05002702 Int4::Int4(RValue<SByte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002703 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002704 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002705#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002706 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002707 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002708 *this = x86::pmovsxbd(As<SByte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002709 }
2710 else
Logan Chiene3191012018-08-24 22:01:50 +08002711#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002712 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002713 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
2714 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
2715 Value *b = Nucleus::createShuffleVector(a, a, swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002716
Nicolas Capense89cd582016-09-30 14:23:47 -04002717 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04002718 Value *c = Nucleus::createBitCast(b, Short8::getType());
2719 Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002720
Nicolas Capens01a97962017-07-28 17:30:51 -04002721 *this = As<Int4>(d) >> 24;
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002722 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002723 }
2724
Nicolas Capenscb986762017-01-20 11:34:37 -05002725 Int4::Int4(RValue<Short4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002726 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002727 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002728#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002729 if(CPUID::supportsSSE4_1())
2730 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002731 *this = x86::pmovsxwd(As<Short8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002732 }
2733 else
Logan Chiene3191012018-08-24 22:01:50 +08002734#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002735 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002736 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04002737 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
2738 *this = As<Int4>(c) >> 16;
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002739 }
2740 }
2741
Nicolas Capenscb986762017-01-20 11:34:37 -05002742 Int4::Int4(RValue<UShort4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002743 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002744 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002745#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002746 if(CPUID::supportsSSE4_1())
2747 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002748 *this = x86::pmovzxwd(As<UShort8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002749 }
2750 else
Logan Chiene3191012018-08-24 22:01:50 +08002751#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002752 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002753 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04002754 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
2755 *this = As<Int4>(c);
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002756 }
2757 }
2758
Nicolas Capenscb986762017-01-20 11:34:37 -05002759 Int4::Int4(RValue<Int> rhs) : XYZW(this)
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002760 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002761 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002762 Value *vector = loadValue();
2763 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
2764
Nicolas Capense89cd582016-09-30 14:23:47 -04002765 int swizzle[4] = {0, 0, 0, 0};
2766 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002767
2768 storeValue(replicate);
2769 }
2770
John Bauman19bac1e2014-05-06 15:23:49 -04002771 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002772 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002773 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002774#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002775 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002776#else
2777 return As<Int4>(V(lowerVectorShl(V(lhs.value), rhs)));
2778#endif
John Bauman89401822014-05-06 15:04:28 -04002779 }
2780
John Bauman19bac1e2014-05-06 15:23:49 -04002781 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002782 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002783 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002784#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002785 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002786#else
2787 return As<Int4>(V(lowerVectorAShr(V(lhs.value), rhs)));
2788#endif
John Bauman89401822014-05-06 15:04:28 -04002789 }
2790
John Bauman19bac1e2014-05-06 15:23:49 -04002791 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
2792 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002793 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002794 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
Alexis Hetufb603992016-04-26 11:50:40 -04002795 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2796 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
2797 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002798 }
2799
2800 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
2801 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002802 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002803 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2804 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2805 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
2806 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002807 }
2808
2809 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
2810 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002811 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002812 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2813 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2814 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
2815 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002816 }
2817
2818 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
2819 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002820 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002821 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2822 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2823 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
2824 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002825 }
2826
2827 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
2828 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002829 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002830 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2831 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2832 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
2833 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002834 }
2835
2836 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
2837 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002838 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002839 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2840 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2841 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
2842 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002843 }
2844
2845 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
2846 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002847 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002848#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002849 if(CPUID::supportsSSE4_1())
2850 {
2851 return x86::pmaxsd(x, y);
2852 }
2853 else
Logan Chiene3191012018-08-24 22:01:50 +08002854#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002855 {
2856 RValue<Int4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002857 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04002858 }
2859 }
2860
2861 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
2862 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002863 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002864#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002865 if(CPUID::supportsSSE4_1())
2866 {
2867 return x86::pminsd(x, y);
2868 }
2869 else
Logan Chiene3191012018-08-24 22:01:50 +08002870#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002871 {
2872 RValue<Int4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002873 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04002874 }
2875 }
2876
2877 RValue<Int4> RoundInt(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04002878 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002879 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002880#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002881 return x86::cvtps2dq(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002882#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002883 return As<Int4>(V(lowerRoundInt(V(cast.value), T(Int4::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002884#endif
John Bauman89401822014-05-06 15:04:28 -04002885 }
2886
Chris Forbese86b6dc2019-03-01 09:08:47 -08002887 RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2888 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002889 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbese86b6dc2019-03-01 09:08:47 -08002890 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2891 return As<Int4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2892 }
2893
2894 RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2895 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002896 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbese86b6dc2019-03-01 09:08:47 -08002897 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2898 return As<UInt4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2899 }
2900
Nicolas Capens33438a62017-09-27 11:47:35 -04002901 RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04002902 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002903 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002904#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002905 return x86::packssdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002906#else
2907 return As<Short8>(V(lowerPack(V(x.value), V(y.value), true)));
2908#endif
John Bauman89401822014-05-06 15:04:28 -04002909 }
2910
Nicolas Capens33438a62017-09-27 11:47:35 -04002911 RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
2912 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002913 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002914#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04002915 return x86::packusdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002916#else
2917 return As<UShort8>(V(lowerPack(V(x.value), V(y.value), false)));
2918#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04002919 }
2920
John Bauman19bac1e2014-05-06 15:23:49 -04002921 RValue<Int> SignMask(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04002922 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002923 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002924#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002925 return x86::movmskps(As<Float4>(x));
Logan Chiene3191012018-08-24 22:01:50 +08002926#else
2927 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2928#endif
John Bauman89401822014-05-06 15:04:28 -04002929 }
2930
John Bauman19bac1e2014-05-06 15:23:49 -04002931 Type *Int4::getType()
John Bauman89401822014-05-06 15:04:28 -04002932 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002933 return T(llvm::VectorType::get(T(Int::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04002934 }
2935
Nicolas Capenscb986762017-01-20 11:34:37 -05002936 UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04002937 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002938 RR_DEBUG_INFO_UPDATE_LOC();
Alexis Hetu764d1422016-09-28 08:44:22 -04002939 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2940 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
John Bauman89401822014-05-06 15:04:28 -04002941
Alexis Hetu764d1422016-09-28 08:44:22 -04002942 // Smallest positive value representable in UInt, but not in Int
2943 const unsigned int ustart = 0x80000000u;
2944 const float ustartf = float(ustart);
2945
2946 // Check if the value can be represented as an Int
2947 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
2948 // If the value is too large, subtract ustart and re-add it after conversion.
2949 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
2950 // Otherwise, just convert normally
2951 (~uiValue & Int4(cast));
2952 // If the value is negative, store 0, otherwise store the result of the conversion
2953 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
John Bauman89401822014-05-06 15:04:28 -04002954 }
2955
John Bauman19bac1e2014-05-06 15:23:49 -04002956 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002957 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002958 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002959#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002960 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002961#else
2962 return As<UInt4>(V(lowerVectorShl(V(lhs.value), rhs)));
2963#endif
John Bauman89401822014-05-06 15:04:28 -04002964 }
2965
John Bauman19bac1e2014-05-06 15:23:49 -04002966 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002967 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002968 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002969#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002970 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002971#else
2972 return As<UInt4>(V(lowerVectorLShr(V(lhs.value), rhs)));
2973#endif
John Bauman89401822014-05-06 15:04:28 -04002974 }
2975
John Bauman19bac1e2014-05-06 15:23:49 -04002976 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
2977 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002978 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002979 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
Alexis Hetufb603992016-04-26 11:50:40 -04002980 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2981 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
2982 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002983 }
2984
2985 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
2986 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002987 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04002988 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
2989 }
2990
2991 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
2992 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002993 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002994 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2995 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2996 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
2997 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002998 }
2999
3000 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3001 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003002 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04003003 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
3004 }
3005
3006 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3007 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003008 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04003009 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
3010 // Restore the following line when LLVM is updated to a version where this issue is fixed.
3011 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
3012 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04003013 }
3014
3015 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3016 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003017 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04003018 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
3019 }
3020
3021 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3022 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003023 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003024#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003025 if(CPUID::supportsSSE4_1())
3026 {
3027 return x86::pmaxud(x, y);
3028 }
3029 else
Logan Chiene3191012018-08-24 22:01:50 +08003030#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003031 {
3032 RValue<UInt4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07003033 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04003034 }
3035 }
3036
3037 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3038 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003039 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003040#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003041 if(CPUID::supportsSSE4_1())
3042 {
3043 return x86::pminud(x, y);
3044 }
3045 else
Logan Chiene3191012018-08-24 22:01:50 +08003046#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003047 {
3048 RValue<UInt4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07003049 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04003050 }
3051 }
3052
John Bauman19bac1e2014-05-06 15:23:49 -04003053 Type *UInt4::getType()
John Bauman89401822014-05-06 15:04:28 -04003054 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003055 return T(llvm::VectorType::get(T(UInt::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003056 }
3057
Alexis Hetu734e2572018-12-20 14:00:49 -05003058 Type *Half::getType()
3059 {
3060 return T(llvm::Type::getInt16Ty(*::context));
3061 }
3062
Nicolas Capens05b3d662016-02-25 23:58:33 -05003063 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04003064 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003065 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003066#if defined(__i386__) || defined(__x86_64__)
3067 if(exactAtPow2)
3068 {
3069 // rcpss uses a piecewise-linear approximation which minimizes the relative error
3070 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
3071 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
3072 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003073 return x86::rcpss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003074#else
3075 return As<Float>(V(lowerRCP(V(x.value))));
3076#endif
John Bauman89401822014-05-06 15:04:28 -04003077 }
John Bauman66b8ab22014-05-06 15:57:45 -04003078
John Bauman19bac1e2014-05-06 15:23:49 -04003079 RValue<Float> RcpSqrt_pp(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003080 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003081 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003082#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003083 return x86::rsqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003084#else
3085 return As<Float>(V(lowerRSQRT(V(x.value))));
3086#endif
John Bauman89401822014-05-06 15:04:28 -04003087 }
3088
John Bauman19bac1e2014-05-06 15:23:49 -04003089 RValue<Float> Sqrt(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003090 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003091 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003092#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003093 return x86::sqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003094#else
3095 return As<Float>(V(lowerSQRT(V(x.value))));
3096#endif
John Bauman89401822014-05-06 15:04:28 -04003097 }
3098
John Bauman19bac1e2014-05-06 15:23:49 -04003099 RValue<Float> Round(RValue<Float> x)
3100 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003101 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003102#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003103 if(CPUID::supportsSSE4_1())
3104 {
3105 return x86::roundss(x, 0);
3106 }
3107 else
3108 {
3109 return Float4(Round(Float4(x))).x;
3110 }
Logan Chien83fc07a2018-09-26 22:14:00 +08003111#else
3112 return RValue<Float>(V(lowerRound(V(x.value))));
3113#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003114 }
3115
3116 RValue<Float> Trunc(RValue<Float> x)
3117 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003118 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003119#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003120 if(CPUID::supportsSSE4_1())
3121 {
3122 return x86::roundss(x, 3);
3123 }
3124 else
3125 {
3126 return Float(Int(x)); // Rounded toward zero
3127 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08003128#else
3129 return RValue<Float>(V(lowerTrunc(V(x.value))));
3130#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003131 }
3132
3133 RValue<Float> Frac(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003134 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003135 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003136#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003137 if(CPUID::supportsSSE4_1())
3138 {
3139 return x - x86::floorss(x);
3140 }
3141 else
3142 {
John Bauman19bac1e2014-05-06 15:23:49 -04003143 return Float4(Frac(Float4(x))).x;
John Bauman89401822014-05-06 15:04:28 -04003144 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08003145#else
3146 // x - floor(x) can be 1.0 for very small negative x.
3147 // Clamp against the value just below 1.0.
3148 return Min(x - Floor(x), As<Float>(Int(0x3F7FFFFF)));
3149#endif
John Bauman89401822014-05-06 15:04:28 -04003150 }
3151
John Bauman19bac1e2014-05-06 15:23:49 -04003152 RValue<Float> Floor(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003153 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003154 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003155#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003156 if(CPUID::supportsSSE4_1())
3157 {
3158 return x86::floorss(x);
3159 }
3160 else
3161 {
3162 return Float4(Floor(Float4(x))).x;
3163 }
Logan Chien40a60052018-09-26 19:03:53 +08003164#else
3165 return RValue<Float>(V(lowerFloor(V(x.value))));
3166#endif
John Bauman89401822014-05-06 15:04:28 -04003167 }
3168
John Bauman19bac1e2014-05-06 15:23:49 -04003169 RValue<Float> Ceil(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003170 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003171 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003172#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003173 if(CPUID::supportsSSE4_1())
3174 {
3175 return x86::ceilss(x);
3176 }
3177 else
Logan Chiene3191012018-08-24 22:01:50 +08003178#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003179 {
3180 return Float4(Ceil(Float4(x))).x;
3181 }
John Bauman89401822014-05-06 15:04:28 -04003182 }
3183
John Bauman19bac1e2014-05-06 15:23:49 -04003184 Type *Float::getType()
John Bauman89401822014-05-06 15:04:28 -04003185 {
Nicolas Capensac230122016-09-20 14:30:06 -04003186 return T(llvm::Type::getFloatTy(*::context));
John Bauman89401822014-05-06 15:04:28 -04003187 }
3188
John Bauman19bac1e2014-05-06 15:23:49 -04003189 Type *Float2::getType()
John Bauman89401822014-05-06 15:04:28 -04003190 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003191 return T(Type_v2f32);
John Bauman89401822014-05-06 15:04:28 -04003192 }
3193
Nicolas Capenscb986762017-01-20 11:34:37 -05003194 Float4::Float4(RValue<Float> rhs) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04003195 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003196 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04003197 Value *vector = loadValue();
John Bauman89401822014-05-06 15:04:28 -04003198 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
3199
Nicolas Capense89cd582016-09-30 14:23:47 -04003200 int swizzle[4] = {0, 0, 0, 0};
3201 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
John Bauman89401822014-05-06 15:04:28 -04003202
John Bauman66b8ab22014-05-06 15:57:45 -04003203 storeValue(replicate);
John Bauman89401822014-05-06 15:04:28 -04003204 }
3205
John Bauman19bac1e2014-05-06 15:23:49 -04003206 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003207 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003208 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003209#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003210 return x86::maxps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003211#else
3212 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OGT)));
3213#endif
John Bauman89401822014-05-06 15:04:28 -04003214 }
3215
John Bauman19bac1e2014-05-06 15:23:49 -04003216 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003217 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003218 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003219#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003220 return x86::minps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003221#else
3222 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OLT)));
3223#endif
John Bauman89401822014-05-06 15:04:28 -04003224 }
3225
Nicolas Capens05b3d662016-02-25 23:58:33 -05003226 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04003227 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003228 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003229#if defined(__i386__) || defined(__x86_64__)
3230 if(exactAtPow2)
3231 {
3232 // rcpps uses a piecewise-linear approximation which minimizes the relative error
3233 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
3234 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
3235 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003236 return x86::rcpps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003237#else
3238 return As<Float4>(V(lowerRCP(V(x.value))));
3239#endif
John Bauman89401822014-05-06 15:04:28 -04003240 }
John Bauman66b8ab22014-05-06 15:57:45 -04003241
John Bauman19bac1e2014-05-06 15:23:49 -04003242 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003243 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003244 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003245#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003246 return x86::rsqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003247#else
3248 return As<Float4>(V(lowerRSQRT(V(x.value))));
3249#endif
John Bauman89401822014-05-06 15:04:28 -04003250 }
3251
John Bauman19bac1e2014-05-06 15:23:49 -04003252 RValue<Float4> Sqrt(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003253 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003254 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003255#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003256 return x86::sqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003257#else
3258 return As<Float4>(V(lowerSQRT(V(x.value))));
3259#endif
John Bauman89401822014-05-06 15:04:28 -04003260 }
3261
John Bauman19bac1e2014-05-06 15:23:49 -04003262 RValue<Int> SignMask(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003263 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003264 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003265#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003266 return x86::movmskps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003267#else
3268 return As<Int>(V(lowerFPSignMask(V(x.value), T(Int::getType()))));
3269#endif
John Bauman89401822014-05-06 15:04:28 -04003270 }
3271
John Bauman19bac1e2014-05-06 15:23:49 -04003272 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003273 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003274 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003275 // return As<Int4>(x86::cmpeqps(x, y));
3276 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
3277 }
3278
John Bauman19bac1e2014-05-06 15:23:49 -04003279 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003280 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003281 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003282 // return As<Int4>(x86::cmpltps(x, y));
3283 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
3284 }
3285
John Bauman19bac1e2014-05-06 15:23:49 -04003286 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003287 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003288 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003289 // return As<Int4>(x86::cmpleps(x, y));
3290 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
3291 }
3292
John Bauman19bac1e2014-05-06 15:23:49 -04003293 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003294 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003295 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003296 // return As<Int4>(x86::cmpneqps(x, y));
3297 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
3298 }
3299
John Bauman19bac1e2014-05-06 15:23:49 -04003300 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003301 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003302 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003303 // return As<Int4>(x86::cmpnltps(x, y));
3304 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
3305 }
3306
John Bauman19bac1e2014-05-06 15:23:49 -04003307 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003308 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003309 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003310 // return As<Int4>(x86::cmpnleps(x, y));
3311 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
3312 }
3313
Ben Claytonec1aeb82019-03-04 19:33:27 +00003314 RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3315 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003316 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003317 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUEQ(x.value, y.value), Int4::getType()));
3318 }
3319
3320 RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3321 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003322 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003323 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULT(x.value, y.value), Int4::getType()));
3324 }
3325
3326 RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3327 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003328 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003329 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULE(x.value, y.value), Int4::getType()));
3330 }
3331
3332 RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3333 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003334 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003335 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUNE(x.value, y.value), Int4::getType()));
3336 }
3337
3338 RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3339 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003340 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003341 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGE(x.value, y.value), Int4::getType()));
3342 }
3343
3344 RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3345 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003346 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003347 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGT(x.value, y.value), Int4::getType()));
3348 }
3349
John Bauman19bac1e2014-05-06 15:23:49 -04003350 RValue<Float4> Round(RValue<Float4> x)
3351 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003352 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003353#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003354 if(CPUID::supportsSSE4_1())
3355 {
3356 return x86::roundps(x, 0);
3357 }
3358 else
3359 {
3360 return Float4(RoundInt(x));
3361 }
Logan Chien83fc07a2018-09-26 22:14:00 +08003362#else
3363 return RValue<Float4>(V(lowerRound(V(x.value))));
3364#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003365 }
3366
3367 RValue<Float4> Trunc(RValue<Float4> x)
3368 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003369 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003370#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003371 if(CPUID::supportsSSE4_1())
3372 {
3373 return x86::roundps(x, 3);
3374 }
3375 else
3376 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003377 return Float4(Int4(x));
John Bauman19bac1e2014-05-06 15:23:49 -04003378 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08003379#else
3380 return RValue<Float4>(V(lowerTrunc(V(x.value))));
3381#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003382 }
3383
3384 RValue<Float4> Frac(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003385 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003386 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb9230422017-07-17 10:27:33 -04003387 Float4 frc;
3388
Logan Chien40a60052018-09-26 19:03:53 +08003389#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003390 if(CPUID::supportsSSE4_1())
3391 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003392 frc = x - Floor(x);
John Bauman89401822014-05-06 15:04:28 -04003393 }
3394 else
3395 {
Nicolas Capensb9230422017-07-17 10:27:33 -04003396 frc = x - Float4(Int4(x)); // Signed fractional part.
John Bauman89401822014-05-06 15:04:28 -04003397
Nicolas Capensb9230422017-07-17 10:27:33 -04003398 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f))); // Add 1.0 if negative.
John Bauman89401822014-05-06 15:04:28 -04003399 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08003400#else
3401 frc = x - Floor(x);
3402#endif
Nicolas Capensb9230422017-07-17 10:27:33 -04003403
3404 // x - floor(x) can be 1.0 for very small negative x.
3405 // Clamp against the value just below 1.0.
3406 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
John Bauman89401822014-05-06 15:04:28 -04003407 }
3408
John Bauman19bac1e2014-05-06 15:23:49 -04003409 RValue<Float4> Floor(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003410 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003411 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003412#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003413 if(CPUID::supportsSSE4_1())
3414 {
3415 return x86::floorps(x);
3416 }
3417 else
3418 {
John Bauman19bac1e2014-05-06 15:23:49 -04003419 return x - Frac(x);
John Bauman89401822014-05-06 15:04:28 -04003420 }
Logan Chien40a60052018-09-26 19:03:53 +08003421#else
3422 return RValue<Float4>(V(lowerFloor(V(x.value))));
3423#endif
John Bauman89401822014-05-06 15:04:28 -04003424 }
3425
John Bauman19bac1e2014-05-06 15:23:49 -04003426 RValue<Float4> Ceil(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003427 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003428 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003429#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003430 if(CPUID::supportsSSE4_1())
3431 {
3432 return x86::ceilps(x);
3433 }
3434 else
Logan Chiene3191012018-08-24 22:01:50 +08003435#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003436 {
3437 return -Floor(-x);
3438 }
John Bauman89401822014-05-06 15:04:28 -04003439 }
3440
Ben Claytona2c8b772019-04-09 13:42:36 -04003441 RValue<Float4> Sin(RValue<Float4> v)
3442 {
3443 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sin, { V(v.value)->getType() } );
3444 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
3445 }
3446
Ben Clayton1b6f8c72019-04-09 13:47:43 -04003447 RValue<Float4> Cos(RValue<Float4> v)
3448 {
3449 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::cos, { V(v.value)->getType() } );
3450 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
3451 }
3452
Ben Clayton14740062019-04-09 13:48:41 -04003453 RValue<Float4> Tan(RValue<Float4> v)
3454 {
3455 return Sin(v) / Cos(v);
3456 }
3457
Ben Claytoneafae472019-04-09 14:22:38 -04003458 static RValue<Float4> TransformFloat4PerElement(RValue<Float4> v, const char* name)
Ben Claytonf9350d72019-04-09 14:19:02 -04003459 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003460 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), ::llvm::ArrayRef<llvm::Type*>(T(Float::getType())), false);
Ben Claytoneafae472019-04-09 14:22:38 -04003461 auto func = ::module->getOrInsertFunction(name, funcTy);
Ben Claytonf9350d72019-04-09 14:19:02 -04003462 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3463 for (uint64_t i = 0; i < 4; i++)
3464 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003465 auto el = ::builder->CreateCall(func, V(Nucleus::createExtractElement(v.value, Float::getType(), i)));
3466 out = V(Nucleus::createInsertElement(V(out), V(el), i));
Ben Claytonf9350d72019-04-09 14:19:02 -04003467 }
3468 return RValue<Float4>(V(out));
3469 }
3470
Ben Claytoneafae472019-04-09 14:22:38 -04003471 RValue<Float4> Asin(RValue<Float4> v)
3472 {
3473 return TransformFloat4PerElement(v, "asinf");
3474 }
3475
3476 RValue<Float4> Acos(RValue<Float4> v)
3477 {
3478 return TransformFloat4PerElement(v, "acosf");
3479 }
3480
Ben Clayton749b4e02019-04-09 14:27:43 -04003481 RValue<Float4> Atan(RValue<Float4> v)
3482 {
3483 return TransformFloat4PerElement(v, "atanf");
3484 }
3485
Ben Claytond9636972019-04-09 15:09:54 -04003486 RValue<Float4> Sinh(RValue<Float4> v)
3487 {
3488 return TransformFloat4PerElement(v, "sinhf");
3489 }
3490
Ben Clayton900ea2c2019-04-09 15:25:36 -04003491 RValue<Float4> Cosh(RValue<Float4> v)
3492 {
3493 return TransformFloat4PerElement(v, "coshf");
3494 }
3495
Ben Clayton3928bd92019-04-09 15:27:41 -04003496 RValue<Float4> Tanh(RValue<Float4> v)
3497 {
3498 return TransformFloat4PerElement(v, "tanhf");
3499 }
3500
Ben Claytonf6d77ab2019-04-09 15:30:04 -04003501 RValue<Float4> Asinh(RValue<Float4> v)
3502 {
3503 return TransformFloat4PerElement(v, "asinhf");
3504 }
3505
Ben Clayton28ebcb02019-04-09 15:33:38 -04003506 RValue<Float4> Acosh(RValue<Float4> v)
3507 {
3508 return TransformFloat4PerElement(v, "acoshf");
3509 }
3510
Ben Claytonfa6a5392019-04-09 15:35:24 -04003511 RValue<Float4> Atanh(RValue<Float4> v)
3512 {
3513 return TransformFloat4PerElement(v, "atanhf");
3514 }
3515
Ben Claytona520c3e2019-04-09 15:43:45 -04003516 RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
3517 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003518 ::llvm::SmallVector<::llvm::Type*, 2> paramTys;
3519 paramTys.push_back(T(Float::getType()));
3520 paramTys.push_back(T(Float::getType()));
3521 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), paramTys, false);
Ben Claytona520c3e2019-04-09 15:43:45 -04003522 auto func = ::module->getOrInsertFunction("atan2f", funcTy);
3523 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3524 for (uint64_t i = 0; i < 4; i++)
3525 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003526 auto el = ::builder->CreateCall2(func, ARGS(
3527 V(Nucleus::createExtractElement(x.value, Float::getType(), i)),
3528 V(Nucleus::createExtractElement(y.value, Float::getType(), i))
3529 ));
3530 out = V(Nucleus::createInsertElement(V(out), V(el), i));
Ben Claytona520c3e2019-04-09 15:43:45 -04003531 }
3532 return RValue<Float4>(V(out));
3533 }
3534
Ben Claytonbfe94f02019-04-09 15:52:12 -04003535 RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
3536 {
Ben Clayton7579db12019-05-02 08:37:12 +01003537 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::pow, { T(Float4::getType()) });
Ben Claytonc38fc122019-04-11 08:58:49 -04003538 return RValue<Float4>(V(::builder->CreateCall2(func, ARGS(V(x.value), V(y.value)))));
Ben Claytonbfe94f02019-04-09 15:52:12 -04003539 }
3540
Ben Clayton242f0022019-04-09 16:00:53 -04003541 RValue<Float4> Exp(RValue<Float4> v)
3542 {
3543 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::exp, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003544 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Clayton242f0022019-04-09 16:00:53 -04003545 }
3546
Ben Clayton2c1da722019-04-09 16:03:03 -04003547 RValue<Float4> Log(RValue<Float4> v)
3548 {
3549 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::log, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003550 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Clayton2c1da722019-04-09 16:03:03 -04003551 }
3552
Ben Claytonf40b56c2019-04-09 16:06:55 -04003553 RValue<Float4> Exp2(RValue<Float4> v)
3554 {
3555 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::exp2, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003556 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Claytonf40b56c2019-04-09 16:06:55 -04003557 }
3558
Ben Claytone17acfe2019-04-09 16:09:13 -04003559 RValue<Float4> Log2(RValue<Float4> v)
3560 {
3561 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::log2, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003562 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Claytone17acfe2019-04-09 16:09:13 -04003563 }
3564
Ben Clayton60958262019-04-10 14:53:30 -04003565 RValue<UInt4> Ctlz(RValue<UInt4> v, bool isZeroUndef)
3566 {
Ben Clayton7579db12019-05-02 08:37:12 +01003567 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::ctlz, { T(UInt4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003568 return RValue<UInt4>(V(::builder->CreateCall2(func, ARGS(
Ben Clayton60958262019-04-10 14:53:30 -04003569 V(v.value),
3570 isZeroUndef ? ::llvm::ConstantInt::getTrue(*::context) : ::llvm::ConstantInt::getFalse(*::context)
Ben Claytonc38fc122019-04-11 08:58:49 -04003571 ))));
Ben Clayton60958262019-04-10 14:53:30 -04003572 }
3573
Ben Clayton3f007c42019-04-10 14:54:23 -04003574 RValue<UInt4> Cttz(RValue<UInt4> v, bool isZeroUndef)
3575 {
Ben Clayton7579db12019-05-02 08:37:12 +01003576 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::cttz, { T(UInt4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003577 return RValue<UInt4>(V(::builder->CreateCall2(func, ARGS(
Ben Clayton3f007c42019-04-10 14:54:23 -04003578 V(v.value),
3579 isZeroUndef ? ::llvm::ConstantInt::getTrue(*::context) : ::llvm::ConstantInt::getFalse(*::context)
Ben Claytonc38fc122019-04-11 08:58:49 -04003580 ))));
Ben Clayton3f007c42019-04-10 14:54:23 -04003581 }
3582
John Bauman19bac1e2014-05-06 15:23:49 -04003583 Type *Float4::getType()
John Bauman89401822014-05-06 15:04:28 -04003584 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003585 return T(llvm::VectorType::get(T(Float::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003586 }
3587
John Bauman89401822014-05-06 15:04:28 -04003588 RValue<Long> Ticks()
3589 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003590 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003591 llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter);
John Bauman89401822014-05-06 15:04:28 -04003592
Nicolas Capens2ab69ee2016-09-26 11:45:17 -04003593 return RValue<Long>(V(::builder->CreateCall(rdtsc)));
John Bauman89401822014-05-06 15:04:28 -04003594 }
Ben Claytond853c122019-04-16 17:51:49 -04003595
3596 RValue<Pointer<Byte>> ConstantPointer(void const * ptr)
3597 {
3598 // Note: this should work for 32-bit pointers as well because 'inttoptr'
3599 // is defined to truncate (and zero extend) if necessary.
3600 auto ptrAsInt = ::llvm::ConstantInt::get(::llvm::Type::getInt64Ty(*::context), reinterpret_cast<uintptr_t>(ptr));
3601 return RValue<Pointer<Byte>>(V(::builder->CreateIntToPtr(ptrAsInt, T(Pointer<Byte>::getType()))));
3602 }
3603
3604 Value* Call(RValue<Pointer<Byte>> fptr, Type* retTy, std::initializer_list<Value*> args, std::initializer_list<Type*> argTys)
3605 {
3606 ::llvm::SmallVector<::llvm::Type*, 8> paramTys;
3607 for (auto ty : argTys) { paramTys.push_back(T(ty)); }
3608 auto funcTy = ::llvm::FunctionType::get(T(retTy), paramTys, false);
3609
3610 auto funcPtrTy = funcTy->getPointerTo();
3611 auto funcPtr = ::builder->CreatePointerCast(V(fptr.value), funcPtrTy);
3612
3613 ::llvm::SmallVector<::llvm::Value*, 8> arguments;
3614 for (auto arg : args) { arguments.push_back(V(arg)); }
3615 return V(::builder->CreateCall(funcPtr, arguments));
3616 }
John Bauman89401822014-05-06 15:04:28 -04003617}
3618
Nicolas Capens48461502018-08-06 14:20:45 -04003619namespace rr
John Bauman89401822014-05-06 15:04:28 -04003620{
Logan Chiene3191012018-08-24 22:01:50 +08003621#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003622 namespace x86
3623 {
John Bauman19bac1e2014-05-06 15:23:49 -04003624 RValue<Int> cvtss2si(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003625 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003626 llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si);
John Bauman66b8ab22014-05-06 15:57:45 -04003627
John Bauman89401822014-05-06 15:04:28 -04003628 Float4 vector;
3629 vector.x = val;
3630
Logan Chien813d5032018-08-31 17:19:45 +08003631 return RValue<Int>(V(::builder->CreateCall(cvtss2si, ARGS(V(RValue<Float4>(vector).value)))));
John Bauman89401822014-05-06 15:04:28 -04003632 }
3633
John Bauman19bac1e2014-05-06 15:23:49 -04003634 RValue<Int4> cvtps2dq(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003635 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003636 llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
John Bauman89401822014-05-06 15:04:28 -04003637
Logan Chien813d5032018-08-31 17:19:45 +08003638 return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003639 }
3640
John Bauman19bac1e2014-05-06 15:23:49 -04003641 RValue<Float> rcpss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003642 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003643 llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss);
John Bauman89401822014-05-06 15:04:28 -04003644
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003645 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman66b8ab22014-05-06 15:57:45 -04003646
Logan Chien813d5032018-08-31 17:19:45 +08003647 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003648 }
3649
John Bauman19bac1e2014-05-06 15:23:49 -04003650 RValue<Float> sqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003651 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003652 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
3653 return RValue<Float>(V(::builder->CreateCall(sqrt, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003654 }
3655
John Bauman19bac1e2014-05-06 15:23:49 -04003656 RValue<Float> rsqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003657 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003658 llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss);
John Bauman66b8ab22014-05-06 15:57:45 -04003659
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003660 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman89401822014-05-06 15:04:28 -04003661
Logan Chien813d5032018-08-31 17:19:45 +08003662 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003663 }
3664
John Bauman19bac1e2014-05-06 15:23:49 -04003665 RValue<Float4> rcpps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003666 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003667 llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003668
Logan Chien813d5032018-08-31 17:19:45 +08003669 return RValue<Float4>(V(::builder->CreateCall(rcpps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003670 }
3671
John Bauman19bac1e2014-05-06 15:23:49 -04003672 RValue<Float4> sqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003673 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003674 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
John Bauman66b8ab22014-05-06 15:57:45 -04003675
Logan Chien813d5032018-08-31 17:19:45 +08003676 return RValue<Float4>(V(::builder->CreateCall(sqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003677 }
3678
John Bauman19bac1e2014-05-06 15:23:49 -04003679 RValue<Float4> rsqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003680 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003681 llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003682
Logan Chien813d5032018-08-31 17:19:45 +08003683 return RValue<Float4>(V(::builder->CreateCall(rsqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003684 }
3685
John Bauman19bac1e2014-05-06 15:23:49 -04003686 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003687 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003688 llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps);
John Bauman89401822014-05-06 15:04:28 -04003689
Logan Chien813d5032018-08-31 17:19:45 +08003690 return RValue<Float4>(V(::builder->CreateCall2(maxps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003691 }
3692
John Bauman19bac1e2014-05-06 15:23:49 -04003693 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003694 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003695 llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps);
John Bauman89401822014-05-06 15:04:28 -04003696
Logan Chien813d5032018-08-31 17:19:45 +08003697 return RValue<Float4>(V(::builder->CreateCall2(minps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003698 }
3699
John Bauman19bac1e2014-05-06 15:23:49 -04003700 RValue<Float> roundss(RValue<Float> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04003701 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003702 llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss);
John Bauman89401822014-05-06 15:04:28 -04003703
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003704 Value *undef = V(llvm::UndefValue::get(T(Float4::getType())));
John Bauman89401822014-05-06 15:04:28 -04003705 Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
3706
Logan Chien813d5032018-08-31 17:19:45 +08003707 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, ARGS(V(undef), V(vector), V(Nucleus::createConstantInt(imm))))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003708 }
3709
John Bauman19bac1e2014-05-06 15:23:49 -04003710 RValue<Float> floorss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003711 {
3712 return roundss(val, 1);
3713 }
3714
John Bauman19bac1e2014-05-06 15:23:49 -04003715 RValue<Float> ceilss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003716 {
3717 return roundss(val, 2);
3718 }
3719
John Bauman19bac1e2014-05-06 15:23:49 -04003720 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04003721 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003722 llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps);
John Bauman89401822014-05-06 15:04:28 -04003723
Logan Chien813d5032018-08-31 17:19:45 +08003724 return RValue<Float4>(V(::builder->CreateCall2(roundps, ARGS(V(val.value), V(Nucleus::createConstantInt(imm))))));
John Bauman89401822014-05-06 15:04:28 -04003725 }
3726
John Bauman19bac1e2014-05-06 15:23:49 -04003727 RValue<Float4> floorps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003728 {
3729 return roundps(val, 1);
3730 }
3731
John Bauman19bac1e2014-05-06 15:23:49 -04003732 RValue<Float4> ceilps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003733 {
3734 return roundps(val, 2);
3735 }
3736
Alexis Hetu0f448072016-03-18 10:56:08 -04003737 RValue<Int4> pabsd(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04003738 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003739 return RValue<Int4>(V(lowerPABS(V(x.value))));
John Bauman89401822014-05-06 15:04:28 -04003740 }
3741
John Bauman19bac1e2014-05-06 15:23:49 -04003742 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003743 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003744 llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w);
John Bauman89401822014-05-06 15:04:28 -04003745
Logan Chien813d5032018-08-31 17:19:45 +08003746 return As<Short4>(V(::builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003747 }
John Bauman66b8ab22014-05-06 15:57:45 -04003748
John Bauman19bac1e2014-05-06 15:23:49 -04003749 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003750 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003751 llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w);
John Bauman89401822014-05-06 15:04:28 -04003752
Logan Chien813d5032018-08-31 17:19:45 +08003753 return As<Short4>(V(::builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003754 }
3755
John Bauman19bac1e2014-05-06 15:23:49 -04003756 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003757 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003758 llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w);
John Bauman89401822014-05-06 15:04:28 -04003759
Logan Chien813d5032018-08-31 17:19:45 +08003760 return As<UShort4>(V(::builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003761 }
John Bauman66b8ab22014-05-06 15:57:45 -04003762
John Bauman19bac1e2014-05-06 15:23:49 -04003763 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003764 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003765 llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w);
John Bauman89401822014-05-06 15:04:28 -04003766
Logan Chien813d5032018-08-31 17:19:45 +08003767 return As<UShort4>(V(::builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003768 }
3769
John Bauman19bac1e2014-05-06 15:23:49 -04003770 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003771 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003772 llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b);
John Bauman89401822014-05-06 15:04:28 -04003773
Logan Chien813d5032018-08-31 17:19:45 +08003774 return As<SByte8>(V(::builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003775 }
John Bauman66b8ab22014-05-06 15:57:45 -04003776
John Bauman19bac1e2014-05-06 15:23:49 -04003777 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003778 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003779 llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b);
John Bauman89401822014-05-06 15:04:28 -04003780
Logan Chien813d5032018-08-31 17:19:45 +08003781 return As<SByte8>(V(::builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003782 }
John Bauman66b8ab22014-05-06 15:57:45 -04003783
John Bauman19bac1e2014-05-06 15:23:49 -04003784 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003785 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003786 llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b);
John Bauman89401822014-05-06 15:04:28 -04003787
Logan Chien813d5032018-08-31 17:19:45 +08003788 return As<Byte8>(V(::builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003789 }
John Bauman66b8ab22014-05-06 15:57:45 -04003790
John Bauman19bac1e2014-05-06 15:23:49 -04003791 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003792 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003793 llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b);
John Bauman89401822014-05-06 15:04:28 -04003794
Logan Chien813d5032018-08-31 17:19:45 +08003795 return As<Byte8>(V(::builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value)))));
John Bauman19bac1e2014-05-06 15:23:49 -04003796 }
3797
3798 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003799 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003800 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
John Bauman89401822014-05-06 15:04:28 -04003801 }
3802
John Bauman19bac1e2014-05-06 15:23:49 -04003803 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003804 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003805 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
John Bauman89401822014-05-06 15:04:28 -04003806 }
3807
John Bauman19bac1e2014-05-06 15:23:49 -04003808 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003809 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003810 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
John Bauman89401822014-05-06 15:04:28 -04003811 }
3812
John Bauman19bac1e2014-05-06 15:23:49 -04003813 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003814 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003815 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003816 }
3817
John Bauman19bac1e2014-05-06 15:23:49 -04003818 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003819 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003820 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003821 }
3822
John Bauman19bac1e2014-05-06 15:23:49 -04003823 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003824 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003825 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003826 }
3827
John Bauman19bac1e2014-05-06 15:23:49 -04003828 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003829 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003830 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003831 }
3832
John Bauman19bac1e2014-05-06 15:23:49 -04003833 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
John Bauman89401822014-05-06 15:04:28 -04003834 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003835 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04003836
Logan Chien813d5032018-08-31 17:19:45 +08003837 return As<Short4>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003838 }
3839
John Bauman19bac1e2014-05-06 15:23:49 -04003840 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003841 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003842 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04003843
Logan Chien813d5032018-08-31 17:19:45 +08003844 return RValue<Short8>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003845 }
3846
John Bauman19bac1e2014-05-06 15:23:49 -04003847 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003848 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003849 llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128);
John Bauman89401822014-05-06 15:04:28 -04003850
Logan Chien813d5032018-08-31 17:19:45 +08003851 return As<SByte8>(V(::builder->CreateCall2(packsswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003852 }
3853
Nicolas Capens33438a62017-09-27 11:47:35 -04003854 RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003855 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003856 llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
John Bauman89401822014-05-06 15:04:28 -04003857
Logan Chien813d5032018-08-31 17:19:45 +08003858 return As<Byte8>(V(::builder->CreateCall2(packuswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003859 }
3860
Nicolas Capens3e7062b2017-01-17 14:01:33 -05003861 RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003862 {
3863 if(CPUID::supportsSSE4_1())
3864 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003865 llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw);
John Bauman66b8ab22014-05-06 15:57:45 -04003866
Logan Chien813d5032018-08-31 17:19:45 +08003867 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003868 }
3869 else
3870 {
Nicolas Capens3e7062b2017-01-17 14:01:33 -05003871 RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
3872 RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
3873
3874 return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
John Bauman89401822014-05-06 15:04:28 -04003875 }
3876 }
3877
John Bauman19bac1e2014-05-06 15:23:49 -04003878 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003879 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003880 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04003881
Logan Chien813d5032018-08-31 17:19:45 +08003882 return As<UShort4>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003883 }
3884
John Bauman19bac1e2014-05-06 15:23:49 -04003885 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003886 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003887 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04003888
Logan Chien813d5032018-08-31 17:19:45 +08003889 return RValue<UShort8>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003890 }
3891
John Bauman19bac1e2014-05-06 15:23:49 -04003892 RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003893 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003894 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04003895
Logan Chien813d5032018-08-31 17:19:45 +08003896 return As<Short4>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003897 }
3898
John Bauman19bac1e2014-05-06 15:23:49 -04003899 RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003900 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003901 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04003902
Logan Chien813d5032018-08-31 17:19:45 +08003903 return RValue<Short8>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003904 }
3905
John Bauman19bac1e2014-05-06 15:23:49 -04003906 RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003907 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003908 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04003909
Logan Chien813d5032018-08-31 17:19:45 +08003910 return As<Short4>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003911 }
3912
John Bauman19bac1e2014-05-06 15:23:49 -04003913 RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003914 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003915 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04003916
Logan Chien813d5032018-08-31 17:19:45 +08003917 return RValue<Short8>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003918 }
3919
John Bauman19bac1e2014-05-06 15:23:49 -04003920 RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003921 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003922 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04003923
Logan Chien813d5032018-08-31 17:19:45 +08003924 return As<Int2>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003925 }
3926
John Bauman19bac1e2014-05-06 15:23:49 -04003927 RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003928 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003929 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04003930
Logan Chien813d5032018-08-31 17:19:45 +08003931 return RValue<Int4>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003932 }
3933
John Bauman19bac1e2014-05-06 15:23:49 -04003934 RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003935 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003936 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04003937
Logan Chien813d5032018-08-31 17:19:45 +08003938 return As<Int2>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003939 }
3940
John Bauman19bac1e2014-05-06 15:23:49 -04003941 RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003942 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003943 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04003944
Logan Chien813d5032018-08-31 17:19:45 +08003945 return RValue<Int4>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003946 }
3947
John Bauman19bac1e2014-05-06 15:23:49 -04003948 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003949 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003950 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04003951
Logan Chien813d5032018-08-31 17:19:45 +08003952 return As<UInt2>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003953 }
3954
John Bauman19bac1e2014-05-06 15:23:49 -04003955 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003956 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003957 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04003958
Logan Chien813d5032018-08-31 17:19:45 +08003959 return RValue<UInt4>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003960 }
3961
John Bauman19bac1e2014-05-06 15:23:49 -04003962 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
3963 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003964 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
John Bauman19bac1e2014-05-06 15:23:49 -04003965 }
3966
3967 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
3968 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003969 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
John Bauman19bac1e2014-05-06 15:23:49 -04003970 }
3971
3972 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
3973 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003974 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_UGT)));
John Bauman19bac1e2014-05-06 15:23:49 -04003975 }
3976
3977 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
3978 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003979 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_ULT)));
John Bauman19bac1e2014-05-06 15:23:49 -04003980 }
3981
3982 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003983 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003984 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04003985
Logan Chien813d5032018-08-31 17:19:45 +08003986 return As<Short4>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003987 }
3988
John Bauman19bac1e2014-05-06 15:23:49 -04003989 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003990 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003991 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04003992
Logan Chien813d5032018-08-31 17:19:45 +08003993 return As<UShort4>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003994 }
3995
John Bauman19bac1e2014-05-06 15:23:49 -04003996 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003997 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003998 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04003999
Logan Chien813d5032018-08-31 17:19:45 +08004000 return As<Int2>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004001 }
4002
John Bauman19bac1e2014-05-06 15:23:49 -04004003 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04004004 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004005 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04004006
Logan Chien813d5032018-08-31 17:19:45 +08004007 return RValue<Short8>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004008 }
4009
John Bauman19bac1e2014-05-06 15:23:49 -04004010 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04004011 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004012 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04004013
Logan Chien813d5032018-08-31 17:19:45 +08004014 return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004015 }
4016
John Bauman19bac1e2014-05-06 15:23:49 -04004017 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04004018 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004019 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04004020
Logan Chien813d5032018-08-31 17:19:45 +08004021 return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004022 }
4023
John Bauman19bac1e2014-05-06 15:23:49 -04004024 RValue<Int> movmskps(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04004025 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004026 llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps);
John Bauman89401822014-05-06 15:04:28 -04004027
Logan Chien813d5032018-08-31 17:19:45 +08004028 return RValue<Int>(V(::builder->CreateCall(movmskps, ARGS(V(x.value)))));
John Bauman89401822014-05-06 15:04:28 -04004029 }
4030
John Bauman19bac1e2014-05-06 15:23:49 -04004031 RValue<Int> pmovmskb(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04004032 {
Nicolas Capens01a97962017-07-28 17:30:51 -04004033 llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128);
John Bauman89401822014-05-06 15:04:28 -04004034
Logan Chien813d5032018-08-31 17:19:45 +08004035 return RValue<Int>(V(::builder->CreateCall(pmovmskb, ARGS(V(x.value))))) & 0xFF;
John Bauman89401822014-05-06 15:04:28 -04004036 }
4037
Nicolas Capens01a97962017-07-28 17:30:51 -04004038 RValue<Int4> pmovzxbd(RValue<Byte16> x)
John Bauman89401822014-05-06 15:04:28 -04004039 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004040 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
John Bauman89401822014-05-06 15:04:28 -04004041 }
4042
Nicolas Capens01a97962017-07-28 17:30:51 -04004043 RValue<Int4> pmovsxbd(RValue<SByte16> x)
John Bauman89401822014-05-06 15:04:28 -04004044 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004045 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
John Bauman89401822014-05-06 15:04:28 -04004046 }
4047
Nicolas Capens01a97962017-07-28 17:30:51 -04004048 RValue<Int4> pmovzxwd(RValue<UShort8> x)
John Bauman89401822014-05-06 15:04:28 -04004049 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004050 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
John Bauman89401822014-05-06 15:04:28 -04004051 }
4052
Nicolas Capens01a97962017-07-28 17:30:51 -04004053 RValue<Int4> pmovsxwd(RValue<Short8> x)
John Bauman89401822014-05-06 15:04:28 -04004054 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004055 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
John Bauman89401822014-05-06 15:04:28 -04004056 }
4057 }
Logan Chiene3191012018-08-24 22:01:50 +08004058#endif // defined(__i386__) || defined(__x86_64__)
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004059
Ben Clayton60a3d6f2019-02-26 17:24:46 +00004060#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004061 // extractAll returns a vector containing the extracted n scalar value of
4062 // the vector vec.
4063 static std::vector<Value*> extractAll(Value* vec, int n)
4064 {
4065 std::vector<Value*> elements;
4066 elements.reserve(n);
4067 for (int i = 0; i < n; i++)
4068 {
4069 auto el = V(::builder->CreateExtractElement(V(vec), i));
4070 elements.push_back(el);
4071 }
4072 return elements;
4073 }
4074
4075 // toDouble returns all the float values in vals extended to doubles.
4076 static std::vector<Value*> toDouble(const std::vector<Value*>& vals)
4077 {
4078 auto doubleTy = ::llvm::Type::getDoubleTy(*::context);
4079 std::vector<Value*> elements;
4080 elements.reserve(vals.size());
4081 for (auto v : vals)
4082 {
4083 elements.push_back(V(::builder->CreateFPExt(V(v), doubleTy)));
4084 }
4085 return elements;
4086 }
4087
4088 std::vector<Value*> PrintValue::Ty<Byte4>::val(const RValue<Byte4>& v) { return extractAll(v.value, 4); }
4089 std::vector<Value*> PrintValue::Ty<Int4>::val(const RValue<Int4>& v) { return extractAll(v.value, 4); }
4090 std::vector<Value*> PrintValue::Ty<UInt4>::val(const RValue<UInt4>& v) { return extractAll(v.value, 4); }
4091 std::vector<Value*> PrintValue::Ty<Short4>::val(const RValue<Short4>& v) { return extractAll(v.value, 4); }
4092 std::vector<Value*> PrintValue::Ty<UShort4>::val(const RValue<UShort4>& v) { return extractAll(v.value, 4); }
4093 std::vector<Value*> PrintValue::Ty<Float>::val(const RValue<Float>& v) { return toDouble({v.value}); }
4094 std::vector<Value*> PrintValue::Ty<Float4>::val(const RValue<Float4>& v) { return toDouble(extractAll(v.value, 4)); }
Ben Claytonbc0cbb92019-05-15 17:12:57 +01004095 std::vector<Value*> PrintValue::Ty<const char*>::val(const char* v) { return {V(::builder->CreateGlobalStringPtr(v))}; }
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004096
4097 void Printv(const char* function, const char* file, int line, const char* fmt, std::initializer_list<PrintValue> args)
4098 {
4099 // LLVM types used below.
4100 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
4101 auto intTy = ::llvm::Type::getInt64Ty(*::context); // TODO: Natural int width.
4102 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(*::context);
4103 auto funcTy = ::llvm::FunctionType::get(i32Ty, {i8PtrTy}, true);
4104
4105 auto func = ::module->getOrInsertFunction("printf", funcTy);
4106
4107 // Build the printf format message string.
4108 std::string str;
4109 if (file != nullptr) { str += (line > 0) ? "%s:%d " : "%s "; }
4110 if (function != nullptr) { str += "%s "; }
4111 str += fmt;
4112
4113 // Perform subsitution on all '{n}' bracketed indices in the format
4114 // message.
4115 int i = 0;
4116 for (const PrintValue& arg : args)
4117 {
4118 str = replace(str, "{" + std::to_string(i++) + "}", arg.format);
4119 }
4120
4121 ::llvm::SmallVector<::llvm::Value*, 8> vals;
4122
4123 // The format message is always the first argument.
4124 vals.push_back(::builder->CreateGlobalStringPtr(str));
4125
4126 // Add optional file, line and function info if provided.
4127 if (file != nullptr)
4128 {
4129 vals.push_back(::builder->CreateGlobalStringPtr(file));
4130 if (line > 0)
4131 {
4132 vals.push_back(::llvm::ConstantInt::get(intTy, line));
4133 }
4134 }
4135 if (function != nullptr)
4136 {
4137 vals.push_back(::builder->CreateGlobalStringPtr(function));
4138 }
4139
4140 // Add all format arguments.
4141 for (const PrintValue& arg : args)
4142 {
4143 for (auto val : arg.values)
4144 {
4145 vals.push_back(V(val));
4146 }
4147 }
4148
4149 ::builder->CreateCall(func, vals);
4150 }
4151#endif // ENABLE_RR_PRINT
4152
Ben Claytonac07ed82019-03-26 14:17:41 +00004153 void Break()
4154 {
4155 auto trap = ::llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::trap);
4156 builder->CreateCall(trap);
4157 }
4158
4159 void Nop()
4160 {
4161 auto voidTy = ::llvm::Type::getVoidTy(*context);
4162 auto funcTy = ::llvm::FunctionType::get(voidTy, {}, false);
4163 auto func = ::module->getOrInsertFunction("nop", funcTy);
4164 builder->CreateCall(func);
4165 }
4166
4167 void EmitDebugLocation()
4168 {
4169#ifdef ENABLE_RR_DEBUG_INFO
4170 if (debugInfo != nullptr)
4171 {
4172 debugInfo->EmitLocation();
4173 }
4174#endif // ENABLE_RR_DEBUG_INFO
4175 }
4176
4177 void EmitDebugVariable(Value* value)
4178 {
4179#ifdef ENABLE_RR_DEBUG_INFO
4180 if (debugInfo != nullptr)
4181 {
4182 debugInfo->EmitVariable(value);
4183 }
4184#endif // ENABLE_RR_DEBUG_INFO
4185 }
4186
4187 void FlushDebug()
4188 {
4189#ifdef ENABLE_RR_DEBUG_INFO
4190 if (debugInfo != nullptr)
4191 {
4192 debugInfo->Flush();
4193 }
4194#endif // ENABLE_RR_DEBUG_INFO
4195 }
4196
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004197} // namespace rr
4198
4199// ------------------------------ Coroutines ------------------------------
4200
4201namespace {
4202
4203 struct CoroutineState
4204 {
4205 llvm::Function *await = nullptr;
4206 llvm::Function *destroy = nullptr;
4207 llvm::Value *handle = nullptr;
4208 llvm::Value *id = nullptr;
4209 llvm::Value *promise = nullptr;
4210 llvm::BasicBlock *suspendBlock = nullptr;
4211 llvm::BasicBlock *endBlock = nullptr;
4212 llvm::BasicBlock *destroyBlock = nullptr;
4213 };
4214 CoroutineState coroutine;
4215
4216 // Magic values retuned by llvm.coro.suspend.
4217 // See: https://llvm.org/docs/Coroutines.html#llvm-coro-suspend-intrinsic
4218 enum SuspendAction
4219 {
4220 SuspendActionSuspend = -1,
4221 SuspendActionResume = 0,
4222 SuspendActionDestroy = 1
4223 };
4224
4225} // anonymous namespace
4226
4227namespace rr {
4228
4229void Nucleus::createCoroutine(Type *YieldType, std::vector<Type*> &Params)
4230{
4231 // Types
4232 auto voidTy = ::llvm::Type::getVoidTy(*::context);
4233 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
4234 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
4235 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
4236 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(*::context);
4237 auto promiseTy = T(YieldType);
4238 auto promisePtrTy = promiseTy->getPointerTo();
4239 auto handleTy = i8PtrTy;
4240 auto boolTy = i1Ty;
4241
4242 // LLVM intrinsics
4243 auto coro_id = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::coro_id);
4244 auto coro_size = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::coro_size, {i32Ty});
4245 auto coro_begin = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::coro_begin);
4246 auto coro_resume = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_resume);
4247 auto coro_end = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_end);
4248 auto coro_free = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_free);
4249 auto coro_destroy = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_destroy);
4250 auto coro_promise = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_promise);
4251 auto coro_done = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_done);
4252 auto coro_suspend = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_suspend);
4253
4254 auto allocFrameTy = ::llvm::FunctionType::get(i8PtrTy, {i32Ty}, false);
4255 auto allocFrame = ::module->getOrInsertFunction("coroutine_alloc_frame", allocFrameTy);
4256 auto freeFrameTy = ::llvm::FunctionType::get(voidTy, {i8PtrTy}, false);
4257 auto freeFrame = ::module->getOrInsertFunction("coroutine_free_frame", freeFrameTy);
4258
4259 // Build the coroutine_await() function:
4260 //
4261 // bool coroutine_await(CoroutineHandle* handle, YieldType* out)
4262 // {
4263 // if (llvm.coro.done(handle))
4264 // {
4265 // return false;
4266 // }
4267 // else
4268 // {
4269 // *value = (T*)llvm.coro.promise(handle);
4270 // llvm.coro.resume(handle);
4271 // return true;
4272 // }
4273 // }
4274 //
4275 llvm::FunctionType *coroutineAwaitTy = llvm::FunctionType::get(boolTy, {handleTy, promisePtrTy}, false);
4276 ::coroutine.await = llvm::Function::Create(coroutineAwaitTy, llvm::GlobalValue::InternalLinkage, "coroutine_await", ::module);
4277 ::coroutine.await->setCallingConv(llvm::CallingConv::C);
4278 {
4279 auto args = ::coroutine.await->arg_begin();
4280 auto handle = args++;
4281 auto outPtr = args++;
4282 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "co_await", ::coroutine.await));
4283 auto doneBlock = llvm::BasicBlock::Create(*::context, "done", ::coroutine.await);
4284 auto resumeBlock = llvm::BasicBlock::Create(*::context, "resume", ::coroutine.await);
4285
4286 auto done = ::builder->CreateCall(coro_done, {handle}, "done");
4287 ::builder->CreateCondBr(done, doneBlock, resumeBlock);
4288
4289 ::builder->SetInsertPoint(doneBlock);
4290 ::builder->CreateRet(::llvm::ConstantInt::getFalse(i1Ty));
4291
4292 ::builder->SetInsertPoint(resumeBlock);
4293 auto promiseAlignment = ::llvm::ConstantInt::get(i32Ty, 4); // TODO: Get correct alignment.
4294 auto promisePtr = ::builder->CreateCall(coro_promise, {handle, promiseAlignment, ::llvm::ConstantInt::get(i1Ty, 0)});
4295 auto promise = ::builder->CreateLoad(::builder->CreatePointerCast(promisePtr, promisePtrTy));
4296 ::builder->CreateStore(promise, outPtr);
4297 ::builder->CreateCall(coro_resume, {handle});
4298 ::builder->CreateRet(::llvm::ConstantInt::getTrue(i1Ty));
4299 }
4300
4301 // Build the coroutine_destroy() function:
4302 //
4303 // void coroutine_destroy(CoroutineHandle* handle)
4304 // {
4305 // llvm.coro.destroy(handle);
4306 // }
4307 //
4308 llvm::FunctionType *coroutineDestroyTy = llvm::FunctionType::get(voidTy, handleTy, false);
4309 ::coroutine.destroy = llvm::Function::Create(coroutineDestroyTy, llvm::GlobalValue::InternalLinkage, "coroutine_destroy", ::module);
4310 ::coroutine.destroy->setCallingConv(llvm::CallingConv::C);
4311 {
4312 auto handle = ::coroutine.destroy->arg_begin();
4313 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "", ::coroutine.destroy));
4314 ::builder->CreateCall(coro_destroy, {handle});
4315 ::builder->CreateRetVoid();
4316 }
4317
4318 // Begin building the main coroutine_begin() function.
4319 //
4320 // CoroutineHandle* coroutine_begin(<Arguments>)
4321 // {
4322 // YieldType promise;
4323 // auto id = llvm.coro.id(0, &promise, nullptr, nullptr);
4324 // void* frame = coroutine_alloc_frame(llvm.coro.size.i32());
4325 // CoroutineHandle *handle = llvm.coro.begin(id, frame);
4326 //
4327 // ... <REACTOR CODE> ...
4328 //
4329 // end:
4330 // SuspendAction action = llvm.coro.suspend(none, true /* final */); // <-- RESUME POINT
4331 // switch (action)
4332 // {
4333 // case SuspendActionResume:
4334 // UNREACHABLE(); // Illegal to resume after final suspend.
4335 // case SuspendActionDestroy:
4336 // goto destroy;
4337 // default: // (SuspendActionSuspend)
4338 // goto suspend;
4339 // }
4340 //
4341 // destroy:
4342 // coroutine_free_frame(llvm.coro.free(id, handle));
4343 // goto suspend;
4344 //
4345 // suspend:
4346 // llvm.coro.end(handle, false);
4347 // return handle;
4348 // }
4349 //
4350 llvm::FunctionType *functionType = llvm::FunctionType::get(handleTy, T(Params), false);
4351 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "coroutine_begin", ::module);
4352 ::function->setCallingConv(llvm::CallingConv::C);
4353
4354#ifdef ENABLE_RR_DEBUG_INFO
4355 ::debugInfo = std::unique_ptr<DebugInfo>(new DebugInfo(::builder, ::context, ::module, ::function));
4356#endif // ENABLE_RR_DEBUG_INFO
4357
4358 auto entryBlock = llvm::BasicBlock::Create(*::context, "coroutine", ::function);
4359 ::coroutine.suspendBlock = llvm::BasicBlock::Create(*::context, "suspend", ::function);
4360 ::coroutine.endBlock = llvm::BasicBlock::Create(*::context, "end", ::function);
4361 ::coroutine.destroyBlock = llvm::BasicBlock::Create(*::context, "destroy", ::function);
4362
4363 ::builder->SetInsertPoint(entryBlock);
4364 Variable::materializeAll();
4365 ::coroutine.promise = ::builder->CreateAlloca(T(YieldType), nullptr, "promise");
4366 ::coroutine.id = ::builder->CreateCall(coro_id, {
4367 ::llvm::ConstantInt::get(i32Ty, 0),
4368 ::builder->CreatePointerCast(::coroutine.promise, i8PtrTy),
4369 ::llvm::ConstantPointerNull::get(i8PtrTy),
4370 ::llvm::ConstantPointerNull::get(i8PtrTy),
4371 });
4372 auto size = ::builder->CreateCall(coro_size, {});
4373 auto frame = ::builder->CreateCall(allocFrame, {size});
4374 ::coroutine.handle = ::builder->CreateCall(coro_begin, {::coroutine.id, frame});
4375
4376 // Build the suspend block
4377 ::builder->SetInsertPoint(::coroutine.suspendBlock);
4378 ::builder->CreateCall(coro_end, {::coroutine.handle, ::llvm::ConstantInt::get(i1Ty, 0)});
4379 ::builder->CreateRet(::coroutine.handle);
4380
4381 // Build the end block
4382 ::builder->SetInsertPoint(::coroutine.endBlock);
4383 auto action = ::builder->CreateCall(coro_suspend, {
4384 ::llvm::ConstantTokenNone::get(*::context),
4385 ::llvm::ConstantInt::get(i1Ty, 1), // final: true
4386 });
4387 auto switch_ = ::builder->CreateSwitch(action, ::coroutine.suspendBlock, 3);
4388 // switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionResume), trapBlock); // TODO: Trap attempting to resume after final suspend
4389 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionDestroy), ::coroutine.destroyBlock);
4390
4391 // Build the destroy block
4392 ::builder->SetInsertPoint(::coroutine.destroyBlock);
4393 auto memory = ::builder->CreateCall(coro_free, {::coroutine.id, ::coroutine.handle});
4394 ::builder->CreateCall(freeFrame, {memory});
4395 ::builder->CreateBr(::coroutine.suspendBlock);
4396
4397 // Switch back to the entry block for reactor codegen.
4398 ::builder->SetInsertPoint(entryBlock);
4399
4400 #if defined(_WIN32)
4401 // FIXME(capn):
4402 // On Windows, stack memory is committed in increments of 4 kB pages, with the last page
4403 // having a trap which allows the OS to grow the stack. For functions with a stack frame
4404 // larger than 4 kB this can cause an issue when a variable is accessed beyond the guard
4405 // page. Therefore the compiler emits a call to __chkstk in the function prolog to probe
4406 // the stack and ensure all pages have been committed. This is currently broken in LLVM
4407 // JIT, but we can prevent emitting the stack probe call:
4408 ::function->addFnAttr("stack-probe-size", "1048576");
4409 #endif
John Bauman89401822014-05-06 15:04:28 -04004410}
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004411
4412void Nucleus::yield(Value* val)
4413{
4414 ASSERT_MSG(::coroutine.id != nullptr, "yield() can only be called when building a Coroutine");
4415
4416 // promise = val;
4417 //
4418 // auto action = llvm.coro.suspend(none, false /* final */); // <-- RESUME POINT
4419 // switch (action)
4420 // {
4421 // case SuspendActionResume:
4422 // goto resume;
4423 // case SuspendActionDestroy:
4424 // goto destroy;
4425 // default: // (SuspendActionSuspend)
4426 // goto suspend;
4427 // }
4428 // resume:
4429 //
4430
4431 RR_DEBUG_INFO_UPDATE_LOC();
4432 Variable::materializeAll();
4433
4434 // Types
4435 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
4436 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
4437
4438 // Intrinsics
4439 auto coro_suspend = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_suspend);
4440
4441 // Create a block to resume execution.
4442 auto resumeBlock = llvm::BasicBlock::Create(*::context, "resume", ::function);
4443
4444 // Store the promise (yield value)
4445 ::builder->CreateStore(V(val), ::coroutine.promise);
4446 auto action = ::builder->CreateCall(coro_suspend, {
4447 ::llvm::ConstantTokenNone::get(*::context),
4448 ::llvm::ConstantInt::get(i1Ty, 0), // final: true
4449 });
4450 auto switch_ = ::builder->CreateSwitch(action, ::coroutine.suspendBlock, 3);
4451 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionResume), resumeBlock);
4452 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionDestroy), ::coroutine.destroyBlock);
4453
4454 // Continue building in the resume block.
4455 ::builder->SetInsertPoint(resumeBlock);
4456}
4457
4458Routine* Nucleus::acquireCoroutine(const char *name, bool runOptimizations)
4459{
4460 ASSERT_MSG(::coroutine.id != nullptr, "acquireCoroutine() called without a call to createCoroutine()");
4461
4462 ::builder->CreateBr(::coroutine.endBlock);
4463
4464#ifdef ENABLE_RR_DEBUG_INFO
4465 if (debugInfo != nullptr)
4466 {
4467 debugInfo->Finalize();
4468 }
4469#endif // ENABLE_RR_DEBUG_INFO
4470
4471 if(false)
4472 {
4473 std::error_code error;
4474 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
4475 ::module->print(file, 0);
4476 }
4477
4478 // Run manadory coroutine transforms.
4479 llvm::legacy::PassManager pm;
4480 pm.add(llvm::createCoroEarlyPass());
4481 pm.add(llvm::createCoroSplitPass());
4482 pm.add(llvm::createCoroElidePass());
4483 pm.add(llvm::createBarrierNoopPass());
4484 pm.add(llvm::createCoroCleanupPass());
4485 pm.run(*::module);
4486
4487 if(runOptimizations)
4488 {
4489 optimize();
4490 }
4491
4492 if(false)
4493 {
4494 std::error_code error;
4495 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
4496 ::module->print(file, 0);
4497 }
4498
4499 llvm::Function *funcs[Nucleus::CoroutineEntryCount];
4500 funcs[Nucleus::CoroutineEntryBegin] = ::function;
4501 funcs[Nucleus::CoroutineEntryAwait] = ::coroutine.await;
4502 funcs[Nucleus::CoroutineEntryDestroy] = ::coroutine.destroy;
4503 Routine *routine = ::reactorJIT->acquireRoutine(funcs, Nucleus::CoroutineEntryCount);
4504
4505 ::coroutine = CoroutineState{};
4506
4507 return routine;
4508}
4509
4510} // namespace rr