blob: 171f9b17f726091d107a2d22d17d2d0732d764f3 [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
Nicolas Capenscb986762017-01-20 11:34:37 -050015#include "Reactor.hpp"
Ben Claytoneb50d252019-04-15 13:50:01 -040016#include "Debug.hpp"
Ben Claytonac07ed82019-03-26 14:17:41 +000017#include "LLVMReactor.hpp"
18#include "LLVMReactorDebugInfo.hpp"
John Bauman89401822014-05-06 15:04:28 -040019
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040020#include "x86.hpp"
21#include "CPUID.hpp"
22#include "Thread.hpp"
Nicolas Capens1a3ce872018-10-10 10:42:36 -040023#include "ExecutableMemory.hpp"
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040024#include "MutexLock.hpp"
25
26#undef min
27#undef max
28
Ben Clayton09a7f452019-04-25 15:22:43 +010029#if defined(__clang__)
30// LLVM has occurances of the extra-semi warning in its headers, which will be
31// treated as an error in SwiftShader targets.
32#pragma clang diagnostic push
33#pragma clang diagnostic ignored "-Wextra-semi"
34#endif // defined(__clang__)
35
Ben Clayton5875be52019-04-11 14:57:40 -040036#include "llvm/Analysis/LoopPass.h"
37#include "llvm/ExecutionEngine/ExecutionEngine.h"
38#include "llvm/ExecutionEngine/JITSymbol.h"
39#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
40#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
41#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
42#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
43#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
44#include "llvm/ExecutionEngine/SectionMemoryManager.h"
45#include "llvm/IR/Constants.h"
46#include "llvm/IR/DataLayout.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/GlobalVariable.h"
Ben Clayton5875be52019-04-11 14:57:40 -040049#include "llvm/IR/Intrinsics.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010050#include "llvm/IR/IRBuilder.h"
Ben Clayton5875be52019-04-11 14:57:40 -040051#include "llvm/IR/LegacyPassManager.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010052#include "llvm/IR/LLVMContext.h"
Ben Clayton5875be52019-04-11 14:57:40 -040053#include "llvm/IR/Mangler.h"
54#include "llvm/IR/Module.h"
Ben Clayton4b944652019-05-02 10:56:19 +010055#include "llvm/IR/Verifier.h"
Ben Clayton5875be52019-04-11 14:57:40 -040056#include "llvm/Support/Error.h"
57#include "llvm/Support/TargetSelect.h"
58#include "llvm/Target/TargetOptions.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010059#include "llvm/Transforms/Coroutines.h"
Ben Clayton5875be52019-04-11 14:57:40 -040060#include "llvm/Transforms/InstCombine/InstCombine.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010061#include "llvm/Transforms/IPO.h"
62#include "llvm/Transforms/IPO/PassManagerBuilder.h"
Ben Clayton5875be52019-04-11 14:57:40 -040063#include "llvm/Transforms/Scalar.h"
64#include "llvm/Transforms/Scalar/GVN.h"
Ben Clayton20507fa2019-04-20 01:40:15 -040065
Ben Clayton09a7f452019-04-25 15:22:43 +010066#if defined(__clang__)
67#pragma clang diagnostic pop
68#endif // defined(__clang__)
69
Ben Clayton5875be52019-04-11 14:57:40 -040070#include "LLVMRoutine.hpp"
John Bauman89401822014-05-06 15:04:28 -040071
Ben Clayton5875be52019-04-11 14:57:40 -040072#define ARGS(...) {__VA_ARGS__}
73#define CreateCall2 CreateCall
74#define CreateCall3 CreateCall
Logan Chien0eedc8c2018-08-21 09:34:28 +080075
Ben Clayton5875be52019-04-11 14:57:40 -040076#include <unordered_map>
Logan Chien0eedc8c2018-08-21 09:34:28 +080077
John Bauman89401822014-05-06 15:04:28 -040078#include <fstream>
Ben Clayton1bc7ee92019-02-14 18:43:22 +000079#include <numeric>
80#include <thread>
Ben Clayton1c82c7b2019-04-30 12:49:27 +010081#include <iostream>
John Bauman89401822014-05-06 15:04:28 -040082
Nicolas Capens47dc8672017-04-25 12:54:39 -040083#if defined(__i386__) || defined(__x86_64__)
84#include <xmmintrin.h>
85#endif
86
Logan Chien40a60052018-09-26 19:03:53 +080087#include <math.h>
88
Nicolas Capenscb122582014-05-06 23:34:44 -040089#if defined(__x86_64__) && defined(_WIN32)
John Bauman66b8ab22014-05-06 15:57:45 -040090extern "C" void X86CompilationCallback()
91{
Ben Claytoneb50d252019-04-15 13:50:01 -040092 UNIMPLEMENTED("X86CompilationCallback");
John Bauman66b8ab22014-05-06 15:57:45 -040093}
94#endif
95
Nicolas Capens48461502018-08-06 14:20:45 -040096namespace rr
Logan Chien52cde602018-09-03 19:37:57 +080097{
98 class LLVMReactorJIT;
99}
100
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400101namespace
102{
Nicolas Capens48461502018-08-06 14:20:45 -0400103 rr::LLVMReactorJIT *reactorJIT = nullptr;
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400104 llvm::IRBuilder<> *builder = nullptr;
105 llvm::LLVMContext *context = nullptr;
106 llvm::Module *module = nullptr;
107 llvm::Function *function = nullptr;
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400108
Ben Claytonac07ed82019-03-26 14:17:41 +0000109#ifdef ENABLE_RR_DEBUG_INFO
110 std::unique_ptr<rr::DebugInfo> debugInfo;
111#endif
112
Nicolas Capensc07dc4b2018-08-06 14:20:45 -0400113 rr::MutexLock codegenMutex;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800114
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000115#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000116 std::string replace(std::string str, const std::string& substr, const std::string& replacement)
117 {
118 size_t pos = 0;
119 while((pos = str.find(substr, pos)) != std::string::npos) {
120 str.replace(pos, substr.length(), replacement);
121 pos += replacement.length();
122 }
123 return str;
124 }
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000125#endif // ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000126
Logan Chien0eedc8c2018-08-21 09:34:28 +0800127 llvm::Value *lowerPAVG(llvm::Value *x, llvm::Value *y)
128 {
129 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
130
131 llvm::VectorType *extTy =
132 llvm::VectorType::getExtendedElementVectorType(ty);
133 x = ::builder->CreateZExt(x, extTy);
134 y = ::builder->CreateZExt(y, extTy);
135
136 // (x + y + 1) >> 1
137 llvm::Constant *one = llvm::ConstantInt::get(extTy, 1);
138 llvm::Value *res = ::builder->CreateAdd(x, y);
139 res = ::builder->CreateAdd(res, one);
140 res = ::builder->CreateLShr(res, one);
141 return ::builder->CreateTrunc(res, ty);
142 }
143
144 llvm::Value *lowerPMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800145 llvm::ICmpInst::Predicate pred)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800146 {
147 return ::builder->CreateSelect(::builder->CreateICmp(pred, x, y), x, y);
148 }
149
150 llvm::Value *lowerPCMP(llvm::ICmpInst::Predicate pred, llvm::Value *x,
Logan Chienb5ce5092018-09-27 18:45:58 +0800151 llvm::Value *y, llvm::Type *dstTy)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800152 {
153 return ::builder->CreateSExt(::builder->CreateICmp(pred, x, y), dstTy, "");
154 }
155
Logan Chiene3191012018-08-24 22:01:50 +0800156#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800157 llvm::Value *lowerPMOV(llvm::Value *op, llvm::Type *dstType, bool sext)
158 {
159 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(op->getType());
160 llvm::VectorType *dstTy = llvm::cast<llvm::VectorType>(dstType);
161
162 llvm::Value *undef = llvm::UndefValue::get(srcTy);
163 llvm::SmallVector<uint32_t, 16> mask(dstTy->getNumElements());
164 std::iota(mask.begin(), mask.end(), 0);
165 llvm::Value *v = ::builder->CreateShuffleVector(op, undef, mask);
166
167 return sext ? ::builder->CreateSExt(v, dstTy)
Logan Chienb5ce5092018-09-27 18:45:58 +0800168 : ::builder->CreateZExt(v, dstTy);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800169 }
170
171 llvm::Value *lowerPABS(llvm::Value *v)
172 {
173 llvm::Value *zero = llvm::Constant::getNullValue(v->getType());
174 llvm::Value *cmp = ::builder->CreateICmp(llvm::ICmpInst::ICMP_SGT, v, zero);
175 llvm::Value *neg = ::builder->CreateNeg(v);
176 return ::builder->CreateSelect(cmp, v, neg);
177 }
178#endif // defined(__i386__) || defined(__x86_64__)
Logan Chiene3191012018-08-24 22:01:50 +0800179
180#if !defined(__i386__) && !defined(__x86_64__)
181 llvm::Value *lowerPFMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800182 llvm::FCmpInst::Predicate pred)
Logan Chiene3191012018-08-24 22:01:50 +0800183 {
184 return ::builder->CreateSelect(::builder->CreateFCmp(pred, x, y), x, y);
185 }
186
Logan Chien83fc07a2018-09-26 22:14:00 +0800187 llvm::Value *lowerRound(llvm::Value *x)
188 {
189 llvm::Function *nearbyint = llvm::Intrinsic::getDeclaration(
190 ::module, llvm::Intrinsic::nearbyint, {x->getType()});
191 return ::builder->CreateCall(nearbyint, ARGS(x));
192 }
193
Logan Chien2faa24a2018-09-26 19:59:32 +0800194 llvm::Value *lowerRoundInt(llvm::Value *x, llvm::Type *ty)
195 {
196 return ::builder->CreateFPToSI(lowerRound(x), ty);
197 }
198
Logan Chien40a60052018-09-26 19:03:53 +0800199 llvm::Value *lowerFloor(llvm::Value *x)
200 {
201 llvm::Function *floor = llvm::Intrinsic::getDeclaration(
202 ::module, llvm::Intrinsic::floor, {x->getType()});
203 return ::builder->CreateCall(floor, ARGS(x));
204 }
205
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800206 llvm::Value *lowerTrunc(llvm::Value *x)
207 {
208 llvm::Function *trunc = llvm::Intrinsic::getDeclaration(
209 ::module, llvm::Intrinsic::trunc, {x->getType()});
210 return ::builder->CreateCall(trunc, ARGS(x));
211 }
212
Logan Chiene3191012018-08-24 22:01:50 +0800213 // Packed add/sub saturatation
Logan Chien28794cf2018-09-26 18:58:03 +0800214 llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned)
Logan Chiene3191012018-08-24 22:01:50 +0800215 {
Logan Chien28794cf2018-09-26 18:58:03 +0800216 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
217 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
218
219 unsigned numBits = ty->getScalarSizeInBits();
220
221 llvm::Value *max, *min, *extX, *extY;
222 if (isSigned)
223 {
224 max = llvm::ConstantInt::get(extTy, (1LL << (numBits - 1)) - 1, true);
225 min = llvm::ConstantInt::get(extTy, (-1LL << (numBits - 1)), true);
226 extX = ::builder->CreateSExt(x, extTy);
227 extY = ::builder->CreateSExt(y, extTy);
228 }
229 else
230 {
Ben Claytoneb50d252019-04-15 13:50:01 -0400231 ASSERT_MSG(numBits <= 64, "numBits: %d", int(numBits));
Logan Chien28794cf2018-09-26 18:58:03 +0800232 uint64_t maxVal = (numBits == 64) ? ~0ULL : (1ULL << numBits) - 1;
233 max = llvm::ConstantInt::get(extTy, maxVal, false);
234 min = llvm::ConstantInt::get(extTy, 0, false);
235 extX = ::builder->CreateZExt(x, extTy);
236 extY = ::builder->CreateZExt(y, extTy);
237 }
238
239 llvm::Value *res = isAdd ? ::builder->CreateAdd(extX, extY)
240 : ::builder->CreateSub(extX, extY);
241
242 res = lowerPMINMAX(res, min, llvm::ICmpInst::ICMP_SGT);
243 res = lowerPMINMAX(res, max, llvm::ICmpInst::ICMP_SLT);
244
245 return ::builder->CreateTrunc(res, ty);
Logan Chiene3191012018-08-24 22:01:50 +0800246 }
247
248 llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y)
249 {
Logan Chien28794cf2018-09-26 18:58:03 +0800250 return lowerPSAT(x, y, true, false);
Logan Chiene3191012018-08-24 22:01:50 +0800251 }
252
253 llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y)
254 {
Logan Chien28794cf2018-09-26 18:58:03 +0800255 return lowerPSAT(x, y, true, true);
Logan Chiene3191012018-08-24 22:01:50 +0800256 }
257
258 llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y)
259 {
Logan Chien28794cf2018-09-26 18:58:03 +0800260 return lowerPSAT(x, y, false, false);
Logan Chiene3191012018-08-24 22:01:50 +0800261 }
262
263 llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y)
264 {
Logan Chien28794cf2018-09-26 18:58:03 +0800265 return lowerPSAT(x, y, false, true);
Logan Chiene3191012018-08-24 22:01:50 +0800266 }
267
268 llvm::Value *lowerSQRT(llvm::Value *x)
269 {
270 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(
271 ::module, llvm::Intrinsic::sqrt, {x->getType()});
272 return ::builder->CreateCall(sqrt, ARGS(x));
273 }
274
275 llvm::Value *lowerRCP(llvm::Value *x)
276 {
277 llvm::Type *ty = x->getType();
278 llvm::Constant *one;
279 if (llvm::VectorType *vectorTy = llvm::dyn_cast<llvm::VectorType>(ty))
280 {
281 one = llvm::ConstantVector::getSplat(
282 vectorTy->getNumElements(),
283 llvm::ConstantFP::get(vectorTy->getElementType(), 1));
284 }
285 else
286 {
287 one = llvm::ConstantFP::get(ty, 1);
288 }
289 return ::builder->CreateFDiv(one, x);
290 }
291
292 llvm::Value *lowerRSQRT(llvm::Value *x)
293 {
294 return lowerRCP(lowerSQRT(x));
295 }
296
297 llvm::Value *lowerVectorShl(llvm::Value *x, uint64_t scalarY)
298 {
299 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
300 llvm::Value *y = llvm::ConstantVector::getSplat(
301 ty->getNumElements(),
302 llvm::ConstantInt::get(ty->getElementType(), scalarY));
303 return ::builder->CreateShl(x, y);
304 }
305
306 llvm::Value *lowerVectorAShr(llvm::Value *x, uint64_t scalarY)
307 {
308 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
309 llvm::Value *y = llvm::ConstantVector::getSplat(
310 ty->getNumElements(),
311 llvm::ConstantInt::get(ty->getElementType(), scalarY));
312 return ::builder->CreateAShr(x, y);
313 }
314
315 llvm::Value *lowerVectorLShr(llvm::Value *x, uint64_t scalarY)
316 {
317 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
318 llvm::Value *y = llvm::ConstantVector::getSplat(
319 ty->getNumElements(),
320 llvm::ConstantInt::get(ty->getElementType(), scalarY));
321 return ::builder->CreateLShr(x, y);
322 }
323
324 llvm::Value *lowerMulAdd(llvm::Value *x, llvm::Value *y)
325 {
326 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
327 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
328
329 llvm::Value *extX = ::builder->CreateSExt(x, extTy);
330 llvm::Value *extY = ::builder->CreateSExt(y, extTy);
331 llvm::Value *mult = ::builder->CreateMul(extX, extY);
332
333 llvm::Value *undef = llvm::UndefValue::get(extTy);
334
335 llvm::SmallVector<uint32_t, 16> evenIdx;
336 llvm::SmallVector<uint32_t, 16> oddIdx;
337 for (uint64_t i = 0, n = ty->getNumElements(); i < n; i += 2)
338 {
339 evenIdx.push_back(i);
340 oddIdx.push_back(i + 1);
341 }
342
343 llvm::Value *lhs = ::builder->CreateShuffleVector(mult, undef, evenIdx);
344 llvm::Value *rhs = ::builder->CreateShuffleVector(mult, undef, oddIdx);
345 return ::builder->CreateAdd(lhs, rhs);
346 }
347
Logan Chiene3191012018-08-24 22:01:50 +0800348 llvm::Value *lowerPack(llvm::Value *x, llvm::Value *y, bool isSigned)
349 {
350 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(x->getType());
351 llvm::VectorType *dstTy = llvm::VectorType::getTruncatedElementVectorType(srcTy);
352
353 llvm::IntegerType *dstElemTy =
354 llvm::cast<llvm::IntegerType>(dstTy->getElementType());
355
356 uint64_t truncNumBits = dstElemTy->getIntegerBitWidth();
Ben Claytoneb50d252019-04-15 13:50:01 -0400357 ASSERT_MSG(truncNumBits < 64, "shift 64 must be handled separately. truncNumBits: %d", int(truncNumBits));
Logan Chiene3191012018-08-24 22:01:50 +0800358 llvm::Constant *max, *min;
359 if (isSigned)
360 {
361 max = llvm::ConstantInt::get(srcTy, (1LL << (truncNumBits - 1)) - 1, true);
362 min = llvm::ConstantInt::get(srcTy, (-1LL << (truncNumBits - 1)), true);
363 }
364 else
365 {
366 max = llvm::ConstantInt::get(srcTy, (1ULL << truncNumBits) - 1, false);
367 min = llvm::ConstantInt::get(srcTy, 0, false);
368 }
369
370 x = lowerPMINMAX(x, min, llvm::ICmpInst::ICMP_SGT);
371 x = lowerPMINMAX(x, max, llvm::ICmpInst::ICMP_SLT);
372 y = lowerPMINMAX(y, min, llvm::ICmpInst::ICMP_SGT);
373 y = lowerPMINMAX(y, max, llvm::ICmpInst::ICMP_SLT);
374
375 x = ::builder->CreateTrunc(x, dstTy);
376 y = ::builder->CreateTrunc(y, dstTy);
377
378 llvm::SmallVector<uint32_t, 16> index(srcTy->getNumElements() * 2);
379 std::iota(index.begin(), index.end(), 0);
380
381 return ::builder->CreateShuffleVector(x, y, index);
382 }
383
384 llvm::Value *lowerSignMask(llvm::Value *x, llvm::Type *retTy)
385 {
386 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
387 llvm::Constant *zero = llvm::ConstantInt::get(ty, 0);
388 llvm::Value *cmp = ::builder->CreateICmpSLT(x, zero);
389
390 llvm::Value *ret = ::builder->CreateZExt(
391 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
392 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
393 {
394 llvm::Value *elem = ::builder->CreateZExt(
395 ::builder->CreateExtractElement(cmp, i), retTy);
396 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
397 }
398 return ret;
399 }
400
401 llvm::Value *lowerFPSignMask(llvm::Value *x, llvm::Type *retTy)
402 {
403 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
404 llvm::Constant *zero = llvm::ConstantFP::get(ty, 0);
405 llvm::Value *cmp = ::builder->CreateFCmpULT(x, zero);
406
407 llvm::Value *ret = ::builder->CreateZExt(
408 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
409 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
410 {
411 llvm::Value *elem = ::builder->CreateZExt(
412 ::builder->CreateExtractElement(cmp, i), retTy);
413 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
414 }
415 return ret;
416 }
417#endif // !defined(__i386__) && !defined(__x86_64__)
Chris Forbese86b6dc2019-03-01 09:08:47 -0800418
419 llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext)
420 {
421 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
422 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
423
424 llvm::Value *extX, *extY;
425 if (sext)
426 {
427 extX = ::builder->CreateSExt(x, extTy);
428 extY = ::builder->CreateSExt(y, extTy);
429 }
430 else
431 {
432 extX = ::builder->CreateZExt(x, extTy);
433 extY = ::builder->CreateZExt(y, extTy);
434 }
435
436 llvm::Value *mult = ::builder->CreateMul(extX, extY);
437
438 llvm::IntegerType *intTy = llvm::cast<llvm::IntegerType>(ty->getElementType());
439 llvm::Value *mulh = ::builder->CreateAShr(mult, intTy->getBitWidth());
440 return ::builder->CreateTrunc(mulh, ty);
441 }
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400442}
443
Nicolas Capens48461502018-08-06 14:20:45 -0400444namespace rr
John Bauman89401822014-05-06 15:04:28 -0400445{
Ben Claytonc7904162019-04-17 17:35:48 -0400446 const Capabilities Caps =
447 {
448 true, // CallSupported
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100449 true, // CoroutinesSupported
Ben Claytonc7904162019-04-17 17:35:48 -0400450 };
451
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400452 static std::memory_order atomicOrdering(llvm::AtomicOrdering memoryOrder)
453 {
454 switch(memoryOrder)
455 {
456 case llvm::AtomicOrdering::Monotonic: return std::memory_order_relaxed; // https://llvm.org/docs/Atomics.html#monotonic
457 case llvm::AtomicOrdering::Acquire: return std::memory_order_acquire;
458 case llvm::AtomicOrdering::Release: return std::memory_order_release;
459 case llvm::AtomicOrdering::AcquireRelease: return std::memory_order_acq_rel;
460 case llvm::AtomicOrdering::SequentiallyConsistent: return std::memory_order_seq_cst;
461 default:
Ben Claytonfb280672019-04-25 11:16:15 +0100462 UNREACHABLE("memoryOrder: %d", int(memoryOrder));
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400463 return std::memory_order_acq_rel;
464 }
465 }
466
467 static llvm::AtomicOrdering atomicOrdering(bool atomic, std::memory_order memoryOrder)
468 {
469 if(!atomic)
470 {
471 return llvm::AtomicOrdering::NotAtomic;
472 }
473
474 switch(memoryOrder)
475 {
476 case std::memory_order_relaxed: return llvm::AtomicOrdering::Monotonic; // https://llvm.org/docs/Atomics.html#monotonic
477 case std::memory_order_consume: return llvm::AtomicOrdering::Acquire; // https://llvm.org/docs/Atomics.html#acquire: "It should also be used for C++11/C11 memory_order_consume."
478 case std::memory_order_acquire: return llvm::AtomicOrdering::Acquire;
479 case std::memory_order_release: return llvm::AtomicOrdering::Release;
480 case std::memory_order_acq_rel: return llvm::AtomicOrdering::AcquireRelease;
481 case std::memory_order_seq_cst: return llvm::AtomicOrdering::SequentiallyConsistent;
482 default:
483 UNREACHABLE("memoryOrder: %d", int(memoryOrder));
484 return llvm::AtomicOrdering::AcquireRelease;
485 }
486 }
487
488 template <typename T>
489 static void atomicLoad(void *ptr, void *ret, llvm::AtomicOrdering ordering)
490 {
491 *reinterpret_cast<T*>(ret) = std::atomic_load_explicit<T>(reinterpret_cast<std::atomic<T>*>(ptr), atomicOrdering(ordering));
492 }
493
494 template <typename T>
495 static void atomicStore(void *ptr, void *val, llvm::AtomicOrdering ordering)
496 {
497 std::atomic_store_explicit<T>(reinterpret_cast<std::atomic<T>*>(ptr), *reinterpret_cast<T*>(val), atomicOrdering(ordering));
498 }
499
Logan Chien40a60052018-09-26 19:03:53 +0800500 class ExternalFunctionSymbolResolver
501 {
502 private:
503 using FunctionMap = std::unordered_map<std::string, void *>;
504 FunctionMap func_;
505
506 public:
507 ExternalFunctionSymbolResolver()
508 {
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400509 struct Atomic
510 {
511 static void load(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
512 {
513 switch (size)
514 {
515 case 1: atomicLoad<uint8_t>(ptr, ret, ordering); break;
516 case 2: atomicLoad<uint16_t>(ptr, ret, ordering); break;
517 case 4: atomicLoad<uint32_t>(ptr, ret, ordering); break;
518 case 8: atomicLoad<uint64_t>(ptr, ret, ordering); break;
519 default:
520 UNIMPLEMENTED("Atomic::load(size: %d)", int(size));
521 }
522 }
523 static void store(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
524 {
525 switch (size)
526 {
527 case 1: atomicStore<uint8_t>(ptr, ret, ordering); break;
528 case 2: atomicStore<uint16_t>(ptr, ret, ordering); break;
529 case 4: atomicStore<uint32_t>(ptr, ret, ordering); break;
530 case 8: atomicStore<uint64_t>(ptr, ret, ordering); break;
531 default:
532 UNIMPLEMENTED("Atomic::store(size: %d)", int(size));
533 }
534 }
535 };
Ben Claytonac07ed82019-03-26 14:17:41 +0000536 struct F { static void nop() {} };
Ben Claytonac07ed82019-03-26 14:17:41 +0000537
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400538 func_.emplace("nop", reinterpret_cast<void*>(F::nop));
Logan Chien40a60052018-09-26 19:03:53 +0800539 func_.emplace("floorf", reinterpret_cast<void*>(floorf));
Logan Chien83fc07a2018-09-26 22:14:00 +0800540 func_.emplace("nearbyintf", reinterpret_cast<void*>(nearbyintf));
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800541 func_.emplace("truncf", reinterpret_cast<void*>(truncf));
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000542 func_.emplace("printf", reinterpret_cast<void*>(printf));
543 func_.emplace("puts", reinterpret_cast<void*>(puts));
Chris Forbes1a4c7122019-03-15 14:50:47 -0700544 func_.emplace("fmodf", reinterpret_cast<void*>(fmodf));
Ben Claytona2c8b772019-04-09 13:42:36 -0400545 func_.emplace("sinf", reinterpret_cast<void*>(sinf));
Ben Clayton1b6f8c72019-04-09 13:47:43 -0400546 func_.emplace("cosf", reinterpret_cast<void*>(cosf));
Ben Claytonf9350d72019-04-09 14:19:02 -0400547 func_.emplace("asinf", reinterpret_cast<void*>(asinf));
Ben Claytoneafae472019-04-09 14:22:38 -0400548 func_.emplace("acosf", reinterpret_cast<void*>(acosf));
Ben Clayton749b4e02019-04-09 14:27:43 -0400549 func_.emplace("atanf", reinterpret_cast<void*>(atanf));
Ben Claytond9636972019-04-09 15:09:54 -0400550 func_.emplace("sinhf", reinterpret_cast<void*>(sinhf));
Ben Clayton900ea2c2019-04-09 15:25:36 -0400551 func_.emplace("coshf", reinterpret_cast<void*>(coshf));
Ben Clayton3928bd92019-04-09 15:27:41 -0400552 func_.emplace("tanhf", reinterpret_cast<void*>(tanhf));
Ben Claytonf6d77ab2019-04-09 15:30:04 -0400553 func_.emplace("asinhf", reinterpret_cast<void*>(asinhf));
Ben Clayton28ebcb02019-04-09 15:33:38 -0400554 func_.emplace("acoshf", reinterpret_cast<void*>(acoshf));
Ben Claytonfa6a5392019-04-09 15:35:24 -0400555 func_.emplace("atanhf", reinterpret_cast<void*>(atanhf));
Ben Claytona520c3e2019-04-09 15:43:45 -0400556 func_.emplace("atan2f", reinterpret_cast<void*>(atan2f));
Ben Claytonbfe94f02019-04-09 15:52:12 -0400557 func_.emplace("powf", reinterpret_cast<void*>(powf));
Ben Clayton242f0022019-04-09 16:00:53 -0400558 func_.emplace("expf", reinterpret_cast<void*>(expf));
Ben Clayton2c1da722019-04-09 16:03:03 -0400559 func_.emplace("logf", reinterpret_cast<void*>(logf));
Ben Claytonf40b56c2019-04-09 16:06:55 -0400560 func_.emplace("exp2f", reinterpret_cast<void*>(exp2f));
Ben Claytone17acfe2019-04-09 16:09:13 -0400561 func_.emplace("log2f", reinterpret_cast<void*>(log2f));
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400562 func_.emplace("atomic_load", reinterpret_cast<void*>(Atomic::load));
563 func_.emplace("atomic_store", reinterpret_cast<void*>(Atomic::store));
Ben Clayton14740062019-04-09 13:48:41 -0400564
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100565 // FIXME (b/119409619): use an allocator here so we can control all memory allocations
566 func_.emplace("coroutine_alloc_frame", reinterpret_cast<void*>(malloc));
567 func_.emplace("coroutine_free_frame", reinterpret_cast<void*>(free));
568
Ben Clayton14740062019-04-09 13:48:41 -0400569#ifdef __APPLE__
Ben Clayton14740062019-04-09 13:48:41 -0400570 func_.emplace("sincosf_stret", reinterpret_cast<void*>(__sincosf_stret));
571#elif defined(__linux__)
572 func_.emplace("sincosf", reinterpret_cast<void*>(sincosf));
573#endif // __APPLE__
Logan Chien40a60052018-09-26 19:03:53 +0800574 }
575
576 void *findSymbol(const std::string &name) const
577 {
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000578 // Trim off any underscores from the start of the symbol. LLVM likes
579 // to append these on macOS.
580 const char* trimmed = name.c_str();
581 while (trimmed[0] == '_') { trimmed++; }
582
583 FunctionMap::const_iterator it = func_.find(trimmed);
Ben Claytoneb50d252019-04-15 13:50:01 -0400584 // Missing functions will likely make the module fail in exciting non-obvious ways.
585 ASSERT_MSG(it != func_.end(), "Missing external function: '%s'", name.c_str());
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000586 return it->second;
Logan Chien40a60052018-09-26 19:03:53 +0800587 }
588 };
589
Logan Chien0eedc8c2018-08-21 09:34:28 +0800590 class LLVMReactorJIT
591 {
592 private:
593 using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
594 using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
595
596 llvm::orc::ExecutionSession session;
Logan Chien40a60052018-09-26 19:03:53 +0800597 ExternalFunctionSymbolResolver externalSymbolResolver;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800598 std::shared_ptr<llvm::orc::SymbolResolver> resolver;
599 std::unique_ptr<llvm::TargetMachine> targetMachine;
600 const llvm::DataLayout dataLayout;
601 ObjLayer objLayer;
602 CompileLayer compileLayer;
603 size_t emittedFunctionsNum;
604
605 public:
606 LLVMReactorJIT(const char *arch, const llvm::SmallVectorImpl<std::string>& mattrs,
607 const llvm::TargetOptions &targetOpts):
608 resolver(createLegacyLookupResolver(
609 session,
610 [this](const std::string &name) {
Logan Chien40a60052018-09-26 19:03:53 +0800611 void *func = externalSymbolResolver.findSymbol(name);
612 if (func != nullptr)
613 {
614 return llvm::JITSymbol(
615 reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
616 }
617
Logan Chien0eedc8c2018-08-21 09:34:28 +0800618 return objLayer.findSymbol(name, true);
619 },
620 [](llvm::Error err) {
621 if (err)
622 {
623 // TODO: Log the symbol resolution errors.
624 return;
625 }
626 })),
627 targetMachine(llvm::EngineBuilder()
Ben Claytonac07ed82019-03-26 14:17:41 +0000628#ifdef ENABLE_RR_DEBUG_INFO
629 .setOptLevel(llvm::CodeGenOpt::None)
630#endif // ENABLE_RR_DEBUG_INFO
Logan Chien0eedc8c2018-08-21 09:34:28 +0800631 .setMArch(arch)
632 .setMAttrs(mattrs)
633 .setTargetOptions(targetOpts)
634 .selectTarget()),
635 dataLayout(targetMachine->createDataLayout()),
636 objLayer(
637 session,
638 [this](llvm::orc::VModuleKey) {
639 return ObjLayer::Resources{
640 std::make_shared<llvm::SectionMemoryManager>(),
641 resolver};
Ben Claytonac07ed82019-03-26 14:17:41 +0000642 },
643 ObjLayer::NotifyLoadedFtor(),
644 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj, const llvm::RuntimeDyld::LoadedObjectInfo &L) {
645#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton90cb2602019-05-23 14:42:32 +0100646 DebugInfo::NotifyObjectEmitted(Obj, L);
Ben Claytonac07ed82019-03-26 14:17:41 +0000647#endif // ENABLE_RR_DEBUG_INFO
648 },
649 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj) {
650#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton90cb2602019-05-23 14:42:32 +0100651 DebugInfo::NotifyFreeingObject(Obj);
Ben Claytonac07ed82019-03-26 14:17:41 +0000652#endif // ENABLE_RR_DEBUG_INFO
653 }
654 ),
Logan Chien0eedc8c2018-08-21 09:34:28 +0800655 compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine)),
656 emittedFunctionsNum(0)
657 {
658 }
659
660 void startSession()
661 {
662 ::module = new llvm::Module("", *::context);
663 }
664
665 void endSession()
666 {
667 ::function = nullptr;
668 ::module = nullptr;
669 }
670
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100671 LLVMRoutine *acquireRoutine(llvm::Function **funcs, size_t count)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800672 {
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100673 std::vector<std::string> mangledNames(count);
674 for (size_t i = 0; i < count; i++)
675 {
676 auto func = funcs[i];
677 std::string name = "f" + llvm::Twine(emittedFunctionsNum++).str();
678 func->setName(name);
679 func->setLinkage(llvm::GlobalValue::ExternalLinkage);
680 func->setDoesNotThrow();
Logan Chien0eedc8c2018-08-21 09:34:28 +0800681
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100682 llvm::raw_string_ostream mangledNameStream(mangledNames[i]);
683 llvm::Mangler::getNameWithPrefix(mangledNameStream, name, dataLayout);
684 }
685
686 // Compile the module - after this the llvm::Functions will have
687 // been freed.
Logan Chien0eedc8c2018-08-21 09:34:28 +0800688 std::unique_ptr<llvm::Module> mod(::module);
689 ::module = nullptr;
690 mod->setDataLayout(dataLayout);
691
692 auto moduleKey = session.allocateVModule();
693 llvm::cantFail(compileLayer.addModule(moduleKey, std::move(mod)));
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100694 funcs = nullptr; // Now points to released memory.
Logan Chien0eedc8c2018-08-21 09:34:28 +0800695
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100696 // Resolve the function addresses.
697 std::vector<void*> addresses(count);
698 for (size_t i = 0; i < count; i++)
Nicolas Capensadfbbcb2018-10-31 14:38:53 -0400699 {
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100700 llvm::JITSymbol symbol = compileLayer.findSymbolIn(moduleKey, mangledNames[i], false);
701
702 llvm::Expected<llvm::JITTargetAddress> expectAddr = symbol.getAddress();
703 if(!expectAddr)
704 {
705 return nullptr;
706 }
707
708 addresses[i] = reinterpret_cast<void *>(static_cast<intptr_t>(expectAddr.get()));
Nicolas Capensadfbbcb2018-10-31 14:38:53 -0400709 }
710
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100711 return new LLVMRoutine(addresses.data(), count, releaseRoutineCallback, this, moduleKey);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800712 }
713
714 void optimize(llvm::Module *module)
715 {
Ben Claytonac07ed82019-03-26 14:17:41 +0000716#ifdef ENABLE_RR_DEBUG_INFO
717 if (debugInfo != nullptr)
718 {
719 return; // Don't optimize if we're generating debug info.
720 }
721#endif // ENABLE_RR_DEBUG_INFO
722
Logan Chien0eedc8c2018-08-21 09:34:28 +0800723 std::unique_ptr<llvm::legacy::PassManager> passManager(
724 new llvm::legacy::PassManager());
725
726 passManager->add(llvm::createSROAPass());
727
728 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
729 {
730 switch(optimization[pass])
731 {
732 case Disabled: break;
733 case CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
734 case LICM: passManager->add(llvm::createLICMPass()); break;
735 case AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
736 case GVN: passManager->add(llvm::createGVNPass()); break;
737 case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
738 case Reassociate: passManager->add(llvm::createReassociatePass()); break;
739 case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
740 case SCCP: passManager->add(llvm::createSCCPPass()); break;
741 case ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break;
742 default:
Ben Claytoneb50d252019-04-15 13:50:01 -0400743 UNREACHABLE("optimization[pass]: %d, pass: %d", int(optimization[pass]), int(pass));
Logan Chien0eedc8c2018-08-21 09:34:28 +0800744 }
745 }
746
747 passManager->run(*::module);
748 }
749
750 private:
751 void releaseRoutineModule(llvm::orc::VModuleKey moduleKey)
752 {
753 llvm::cantFail(compileLayer.removeModule(moduleKey));
754 }
755
756 static void releaseRoutineCallback(LLVMReactorJIT *jit, uint64_t moduleKey)
757 {
758 jit->releaseRoutineModule(moduleKey);
759 }
760 };
Logan Chien52cde602018-09-03 19:37:57 +0800761
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400762 Optimization optimization[10] = {InstructionCombining, Disabled};
John Bauman89401822014-05-06 15:04:28 -0400763
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500764 // The abstract Type* types are implemented as LLVM types, except that
765 // 64-bit vectors are emulated using 128-bit ones to avoid use of MMX in x86
766 // and VFP in ARM, and eliminate the overhead of converting them to explicit
767 // 128-bit ones. LLVM types are pointers, so we can represent emulated types
768 // as abstract pointers with small enum values.
769 enum InternalType : uintptr_t
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400770 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500771 // Emulated types:
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400772 Type_v2i32,
773 Type_v4i16,
774 Type_v2i16,
775 Type_v8i8,
776 Type_v4i8,
777 Type_v2f32,
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500778 EmulatedTypeCount,
779 // Returned by asInternalType() to indicate that the abstract Type*
780 // should be interpreted as LLVM type pointer:
781 Type_LLVM
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400782 };
783
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500784 inline InternalType asInternalType(Type *type)
785 {
786 InternalType t = static_cast<InternalType>(reinterpret_cast<uintptr_t>(type));
787 return (t < EmulatedTypeCount) ? t : Type_LLVM;
788 }
789
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400790 llvm::Type *T(Type *t)
791 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500792 // Use 128-bit vectors to implement logically shorter ones.
793 switch(asInternalType(t))
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400794 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500795 case Type_v2i32: return T(Int4::getType());
796 case Type_v4i16: return T(Short8::getType());
797 case Type_v2i16: return T(Short8::getType());
798 case Type_v8i8: return T(Byte16::getType());
799 case Type_v4i8: return T(Byte16::getType());
800 case Type_v2f32: return T(Float4::getType());
801 case Type_LLVM: return reinterpret_cast<llvm::Type*>(t);
Ben Claytoneb50d252019-04-15 13:50:01 -0400802 default:
803 UNREACHABLE("asInternalType(t): %d", int(asInternalType(t)));
804 return nullptr;
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400805 }
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400806 }
807
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500808 Type *T(InternalType t)
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400809 {
810 return reinterpret_cast<Type*>(t);
811 }
812
Nicolas Capensac230122016-09-20 14:30:06 -0400813 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
814 {
815 return reinterpret_cast<std::vector<llvm::Type*>&>(t);
816 }
817
Logan Chien191b3052018-08-31 16:57:15 +0800818 inline llvm::BasicBlock *B(BasicBlock *t)
819 {
820 return reinterpret_cast<llvm::BasicBlock*>(t);
821 }
822
Nicolas Capensc8b67a42016-09-25 15:02:52 -0400823 inline BasicBlock *B(llvm::BasicBlock *t)
824 {
825 return reinterpret_cast<BasicBlock*>(t);
826 }
827
Nicolas Capens01a97962017-07-28 17:30:51 -0400828 static size_t typeSize(Type *type)
829 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500830 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -0400831 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500832 case Type_v2i32: return 8;
833 case Type_v4i16: return 8;
834 case Type_v2i16: return 4;
835 case Type_v8i8: return 8;
836 case Type_v4i8: return 4;
837 case Type_v2f32: return 8;
838 case Type_LLVM:
Nicolas Capens01a97962017-07-28 17:30:51 -0400839 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500840 llvm::Type *t = T(type);
Nicolas Capens01a97962017-07-28 17:30:51 -0400841
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500842 if(t->isPointerTy())
843 {
844 return sizeof(void*);
845 }
846
847 // At this point we should only have LLVM 'primitive' types.
848 unsigned int bits = t->getPrimitiveSizeInBits();
Ben Claytoneb50d252019-04-15 13:50:01 -0400849 ASSERT_MSG(bits != 0, "bits: %d", int(bits));
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500850
851 // TODO(capn): Booleans are 1 bit integers in LLVM's SSA type system,
852 // but are typically stored as one byte. The DataLayout structure should
853 // be used here and many other places if this assumption fails.
854 return (bits + 7) / 8;
855 }
856 break;
857 default:
Ben Claytoneb50d252019-04-15 13:50:01 -0400858 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500859 return 0;
860 }
Nicolas Capens01a97962017-07-28 17:30:51 -0400861 }
862
Nicolas Capens69674fb2017-09-01 11:08:44 -0400863 static unsigned int elementCount(Type *type)
864 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500865 switch(asInternalType(type))
Nicolas Capens69674fb2017-09-01 11:08:44 -0400866 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500867 case Type_v2i32: return 2;
868 case Type_v4i16: return 4;
869 case Type_v2i16: return 2;
870 case Type_v8i8: return 8;
871 case Type_v4i8: return 4;
872 case Type_v2f32: return 2;
873 case Type_LLVM: return llvm::cast<llvm::VectorType>(T(type))->getNumElements();
Ben Claytoneb50d252019-04-15 13:50:01 -0400874 default:
875 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
876 return 0;
Nicolas Capens69674fb2017-09-01 11:08:44 -0400877 }
Nicolas Capens69674fb2017-09-01 11:08:44 -0400878 }
879
John Bauman89401822014-05-06 15:04:28 -0400880 Nucleus::Nucleus()
881 {
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400882 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
Nicolas Capensb7ea9842015-04-01 10:54:59 -0400883
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400884 llvm::InitializeNativeTarget();
Logan Chien0eedc8c2018-08-21 09:34:28 +0800885 llvm::InitializeNativeTargetAsmPrinter();
886 llvm::InitializeNativeTargetAsmParser();
Logan Chien0eedc8c2018-08-21 09:34:28 +0800887
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400888 if(!::context)
John Bauman89401822014-05-06 15:04:28 -0400889 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400890 ::context = new llvm::LLVMContext();
John Bauman89401822014-05-06 15:04:28 -0400891 }
892
John Bauman89401822014-05-06 15:04:28 -0400893 #if defined(__x86_64__)
Logan Chien52cde602018-09-03 19:37:57 +0800894 static const char arch[] = "x86-64";
Logan Chiene3191012018-08-24 22:01:50 +0800895 #elif defined(__i386__)
Logan Chien52cde602018-09-03 19:37:57 +0800896 static const char arch[] = "x86";
Logan Chiene3191012018-08-24 22:01:50 +0800897 #elif defined(__aarch64__)
898 static const char arch[] = "arm64";
899 #elif defined(__arm__)
900 static const char arch[] = "arm";
Gordana Cmiljanovic082dfec2018-10-19 11:36:15 +0200901 #elif defined(__mips__)
Gordana Cmiljanovic20622c02018-11-05 15:00:11 +0100902 #if defined(__mips64)
903 static const char arch[] = "mips64el";
904 #else
905 static const char arch[] = "mipsel";
906 #endif
Logan Chiene3191012018-08-24 22:01:50 +0800907 #else
908 #error "unknown architecture"
John Bauman89401822014-05-06 15:04:28 -0400909 #endif
910
Ben Clayton0fc611f2019-04-18 11:23:27 -0400911 llvm::SmallVector<std::string, 8> mattrs;
912
913 llvm::StringMap<bool> features;
914 bool ok = llvm::sys::getHostCPUFeatures(features);
915 ASSERT_MSG(ok, "llvm::sys::getHostCPUFeatures returned false");
916 for (auto &feature : features)
917 {
918 if (feature.second) { mattrs.push_back(feature.first()); }
919 }
920
921#if 0
Logan Chiene3191012018-08-24 22:01:50 +0800922#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800923 mattrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx");
924 mattrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov");
925 mattrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse");
926 mattrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2");
927 mattrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3");
928 mattrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3");
Logan Chien0eedc8c2018-08-21 09:34:28 +0800929 mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse4.1" : "-sse4.1");
Logan Chiene3191012018-08-24 22:01:50 +0800930#elif defined(__arm__)
931#if __ARM_ARCH >= 8
932 mattrs.push_back("+armv8-a");
933#else
934 // armv7-a requires compiler-rt routines; otherwise, compiled kernel
935 // might fail to link.
936#endif
937#endif
Ben Clayton0fc611f2019-04-18 11:23:27 -0400938#endif
John Bauman89401822014-05-06 15:04:28 -0400939
Logan Chien0eedc8c2018-08-21 09:34:28 +0800940 llvm::TargetOptions targetOpts;
Nicolas Capensa7643812018-09-13 14:20:06 -0400941 targetOpts.UnsafeFPMath = false;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800942 // targetOpts.NoInfsFPMath = true;
943 // targetOpts.NoNaNsFPMath = true;
Logan Chien52cde602018-09-03 19:37:57 +0800944
945 if(!::reactorJIT)
946 {
Logan Chien0eedc8c2018-08-21 09:34:28 +0800947 ::reactorJIT = new LLVMReactorJIT(arch, mattrs, targetOpts);
Logan Chien52cde602018-09-03 19:37:57 +0800948 }
949
950 ::reactorJIT->startSession();
John Bauman89401822014-05-06 15:04:28 -0400951
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400952 if(!::builder)
John Bauman89401822014-05-06 15:04:28 -0400953 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400954 ::builder = new llvm::IRBuilder<>(*::context);
John Bauman89401822014-05-06 15:04:28 -0400955 }
956 }
957
958 Nucleus::~Nucleus()
959 {
Ben Clayton90cb2602019-05-23 14:42:32 +0100960#ifdef ENABLE_RR_DEBUG_INFO
961 debugInfo.reset(nullptr);
962#endif // ENABLE_RR_DEBUG_INFO
963
Logan Chien52cde602018-09-03 19:37:57 +0800964 ::reactorJIT->endSession();
Nicolas Capensb7ea9842015-04-01 10:54:59 -0400965
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400966 ::codegenMutex.unlock();
John Bauman89401822014-05-06 15:04:28 -0400967 }
968
Chris Forbes878d4b02019-01-21 10:48:35 -0800969 Routine *Nucleus::acquireRoutine(const char *name, bool runOptimizations)
John Bauman89401822014-05-06 15:04:28 -0400970 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400971 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
John Bauman19bac1e2014-05-06 15:23:49 -0400972 {
Nicolas Capensac230122016-09-20 14:30:06 -0400973 llvm::Type *type = ::function->getReturnType();
John Bauman19bac1e2014-05-06 15:23:49 -0400974
975 if(type->isVoidTy())
976 {
977 createRetVoid();
978 }
979 else
980 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400981 createRet(V(llvm::UndefValue::get(type)));
John Bauman19bac1e2014-05-06 15:23:49 -0400982 }
983 }
John Bauman89401822014-05-06 15:04:28 -0400984
Ben Clayton97c13ad2019-05-02 11:59:30 +0100985#ifdef ENABLE_RR_DEBUG_INFO
986 if (debugInfo != nullptr)
987 {
988 debugInfo->Finalize();
989 }
990#endif // ENABLE_RR_DEBUG_INFO
991
John Bauman89401822014-05-06 15:04:28 -0400992 if(false)
993 {
Ben Clayton5875be52019-04-11 14:57:40 -0400994 std::error_code error;
995 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400996 ::module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -0400997 }
998
Ben Clayton4b944652019-05-02 10:56:19 +0100999 // FIXME: Disable for release builds once heavy development is over.
1000 bool verifyIR = true;
1001 if(verifyIR)
1002 {
1003 llvm::legacy::PassManager pm;
1004 pm.add(llvm::createVerifierPass());
1005 pm.run(*::module);
1006 }
1007
John Bauman89401822014-05-06 15:04:28 -04001008 if(runOptimizations)
1009 {
1010 optimize();
1011 }
1012
1013 if(false)
1014 {
Ben Clayton5875be52019-04-11 14:57:40 -04001015 std::error_code error;
1016 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001017 ::module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -04001018 }
1019
Ben Clayton1c82c7b2019-04-30 12:49:27 +01001020 LLVMRoutine *routine = ::reactorJIT->acquireRoutine(&::function, 1);
John Bauman89401822014-05-06 15:04:28 -04001021
John Bauman89401822014-05-06 15:04:28 -04001022 return routine;
1023 }
1024
1025 void Nucleus::optimize()
1026 {
Logan Chien52cde602018-09-03 19:37:57 +08001027 ::reactorJIT->optimize(::module);
John Bauman89401822014-05-06 15:04:28 -04001028 }
1029
John Bauman19bac1e2014-05-06 15:23:49 -04001030 Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
John Bauman89401822014-05-06 15:04:28 -04001031 {
1032 // Need to allocate it in the entry block for mem2reg to work
Nicolas Capensc8b67a42016-09-25 15:02:52 -04001033 llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
John Bauman89401822014-05-06 15:04:28 -04001034
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001035 llvm::Instruction *declaration;
John Bauman89401822014-05-06 15:04:28 -04001036
1037 if(arraySize)
1038 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08001039 declaration = new llvm::AllocaInst(T(type), 0, V(Nucleus::createConstantInt(arraySize)));
John Bauman89401822014-05-06 15:04:28 -04001040 }
1041 else
1042 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08001043 declaration = new llvm::AllocaInst(T(type), 0, (llvm::Value*)nullptr);
John Bauman89401822014-05-06 15:04:28 -04001044 }
1045
1046 entryBlock.getInstList().push_front(declaration);
1047
Nicolas Capens19336542016-09-26 10:32:29 -04001048 return V(declaration);
John Bauman89401822014-05-06 15:04:28 -04001049 }
1050
1051 BasicBlock *Nucleus::createBasicBlock()
1052 {
Logan Chien191b3052018-08-31 16:57:15 +08001053 return B(llvm::BasicBlock::Create(*::context, "", ::function));
John Bauman89401822014-05-06 15:04:28 -04001054 }
1055
1056 BasicBlock *Nucleus::getInsertBlock()
1057 {
Nicolas Capensc8b67a42016-09-25 15:02:52 -04001058 return B(::builder->GetInsertBlock());
John Bauman89401822014-05-06 15:04:28 -04001059 }
1060
1061 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1062 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001063 // assert(::builder->GetInsertBlock()->back().isTerminator());
Nicolas Capens0192d152019-03-27 14:46:07 -04001064
1065 Variable::materializeAll();
1066
Logan Chien191b3052018-08-31 16:57:15 +08001067 ::builder->SetInsertPoint(B(basicBlock));
John Bauman89401822014-05-06 15:04:28 -04001068 }
1069
Nicolas Capensac230122016-09-20 14:30:06 -04001070 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
John Bauman89401822014-05-06 15:04:28 -04001071 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001072 llvm::FunctionType *functionType = llvm::FunctionType::get(T(ReturnType), T(Params), false);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001073 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
1074 ::function->setCallingConv(llvm::CallingConv::C);
John Bauman89401822014-05-06 15:04:28 -04001075
Ben Clayton5875be52019-04-11 14:57:40 -04001076 #if defined(_WIN32)
Nicolas Capens52551d12018-09-13 14:30:56 -04001077 // FIXME(capn):
1078 // On Windows, stack memory is committed in increments of 4 kB pages, with the last page
1079 // having a trap which allows the OS to grow the stack. For functions with a stack frame
1080 // larger than 4 kB this can cause an issue when a variable is accessed beyond the guard
1081 // page. Therefore the compiler emits a call to __chkstk in the function prolog to probe
1082 // the stack and ensure all pages have been committed. This is currently broken in LLVM
1083 // JIT, but we can prevent emitting the stack probe call:
1084 ::function->addFnAttr("stack-probe-size", "1048576");
1085 #endif
1086
Ben Claytonac07ed82019-03-26 14:17:41 +00001087#ifdef ENABLE_RR_DEBUG_INFO
1088 ::debugInfo = std::unique_ptr<DebugInfo>(new DebugInfo(::builder, ::context, ::module, ::function));
1089#endif // ENABLE_RR_DEBUG_INFO
1090
Logan Chien191b3052018-08-31 16:57:15 +08001091 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "", ::function));
John Bauman89401822014-05-06 15:04:28 -04001092 }
1093
Nicolas Capens19336542016-09-26 10:32:29 -04001094 Value *Nucleus::getArgument(unsigned int index)
John Bauman89401822014-05-06 15:04:28 -04001095 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001096 llvm::Function::arg_iterator args = ::function->arg_begin();
John Bauman89401822014-05-06 15:04:28 -04001097
1098 while(index)
1099 {
1100 args++;
1101 index--;
1102 }
1103
Nicolas Capens19336542016-09-26 10:32:29 -04001104 return V(&*args);
John Bauman89401822014-05-06 15:04:28 -04001105 }
1106
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001107 void Nucleus::createRetVoid()
John Bauman89401822014-05-06 15:04:28 -04001108 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001109 RR_DEBUG_INFO_UPDATE_LOC();
1110
Ben Claytonc958b172019-05-02 12:20:59 +01001111 ASSERT_MSG(::function->getReturnType() == T(Void::getType()), "Return type mismatch");
1112
Nicolas Capens0192d152019-03-27 14:46:07 -04001113 // Code generated after this point is unreachable, so any variables
1114 // being read can safely return an undefined value. We have to avoid
1115 // materializing variables after the terminator ret instruction.
1116 Variable::killUnmaterialized();
1117
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001118 ::builder->CreateRetVoid();
John Bauman89401822014-05-06 15:04:28 -04001119 }
1120
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001121 void Nucleus::createRet(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001122 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001123 RR_DEBUG_INFO_UPDATE_LOC();
1124
Ben Claytonc958b172019-05-02 12:20:59 +01001125 ASSERT_MSG(::function->getReturnType() == V(v)->getType(), "Return type mismatch");
1126
Nicolas Capens0192d152019-03-27 14:46:07 -04001127 // Code generated after this point is unreachable, so any variables
1128 // being read can safely return an undefined value. We have to avoid
1129 // materializing variables after the terminator ret instruction.
1130 Variable::killUnmaterialized();
1131
Logan Chien191b3052018-08-31 16:57:15 +08001132 ::builder->CreateRet(V(v));
John Bauman89401822014-05-06 15:04:28 -04001133 }
1134
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001135 void Nucleus::createBr(BasicBlock *dest)
John Bauman89401822014-05-06 15:04:28 -04001136 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001137 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens0192d152019-03-27 14:46:07 -04001138 Variable::materializeAll();
1139
Logan Chien191b3052018-08-31 16:57:15 +08001140 ::builder->CreateBr(B(dest));
John Bauman89401822014-05-06 15:04:28 -04001141 }
1142
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001143 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001144 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001145 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens0192d152019-03-27 14:46:07 -04001146 Variable::materializeAll();
Logan Chien191b3052018-08-31 16:57:15 +08001147 ::builder->CreateCondBr(V(cond), B(ifTrue), B(ifFalse));
John Bauman89401822014-05-06 15:04:28 -04001148 }
1149
1150 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1151 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001152 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001153 return V(::builder->CreateAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001154 }
1155
1156 Value *Nucleus::createSub(Value *lhs, Value *rhs)
1157 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001158 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001159 return V(::builder->CreateSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001160 }
1161
1162 Value *Nucleus::createMul(Value *lhs, Value *rhs)
1163 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001164 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001165 return V(::builder->CreateMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001166 }
1167
1168 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1169 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001170 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001171 return V(::builder->CreateUDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001172 }
1173
1174 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1175 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001176 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001177 return V(::builder->CreateSDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001178 }
1179
1180 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1181 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001182 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001183 return V(::builder->CreateFAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001184 }
1185
1186 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1187 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001188 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001189 return V(::builder->CreateFSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001190 }
1191
1192 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1193 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001194 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001195 return V(::builder->CreateFMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001196 }
1197
1198 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1199 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001200 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001201 return V(::builder->CreateFDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001202 }
1203
1204 Value *Nucleus::createURem(Value *lhs, Value *rhs)
1205 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001206 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001207 return V(::builder->CreateURem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001208 }
1209
1210 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1211 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001212 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001213 return V(::builder->CreateSRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001214 }
1215
1216 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1217 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001218 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001219 return V(::builder->CreateFRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001220 }
1221
1222 Value *Nucleus::createShl(Value *lhs, Value *rhs)
1223 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001224 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001225 return V(::builder->CreateShl(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001226 }
1227
1228 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1229 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001230 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001231 return V(::builder->CreateLShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001232 }
1233
1234 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1235 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001236 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001237 return V(::builder->CreateAShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001238 }
1239
1240 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1241 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001242 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001243 return V(::builder->CreateAnd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001244 }
1245
1246 Value *Nucleus::createOr(Value *lhs, Value *rhs)
1247 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001248 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001249 return V(::builder->CreateOr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001250 }
1251
1252 Value *Nucleus::createXor(Value *lhs, Value *rhs)
1253 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001254 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001255 return V(::builder->CreateXor(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001256 }
1257
Nicolas Capens19336542016-09-26 10:32:29 -04001258 Value *Nucleus::createNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001259 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001260 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001261 return V(::builder->CreateNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001262 }
1263
Nicolas Capens19336542016-09-26 10:32:29 -04001264 Value *Nucleus::createFNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001265 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001266 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001267 return V(::builder->CreateFNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001268 }
1269
Nicolas Capens19336542016-09-26 10:32:29 -04001270 Value *Nucleus::createNot(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001271 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001272 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001273 return V(::builder->CreateNot(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001274 }
1275
Nicolas Capens86509d92019-03-21 13:23:50 -04001276 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001277 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001278 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001279 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001280 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001281 case Type_v2i32:
1282 case Type_v4i16:
1283 case Type_v8i8:
1284 case Type_v2f32:
1285 return createBitCast(
1286 createInsertElement(
1287 V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))),
Nicolas Capens86509d92019-03-21 13:23:50 -04001288 createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment, atomic, memoryOrder),
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001289 0),
1290 type);
1291 case Type_v2i16:
1292 case Type_v4i8:
1293 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001294 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001295 Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
Nicolas Capens86509d92019-03-21 13:23:50 -04001296 Value *i = createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001297 i = createZExt(i, Long::getType());
1298 Value *v = createInsertElement(u, i, 0);
1299 return createBitCast(v, type);
Nicolas Capens01a97962017-07-28 17:30:51 -04001300 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001301 // Fallthrough to non-emulated case.
1302 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001303 {
Ben Clayton99e57192019-05-03 13:25:08 +01001304 auto elTy = T(type);
1305 ASSERT(V(ptr)->getType()->getContainedType(0) == elTy);
1306 if (atomic && !(elTy->isIntegerTy() || elTy->isPointerTy() || elTy->isFloatTy()))
1307 {
1308 // atomic load operand must have integer, pointer, or floating point type
1309 // Fall back to using:
1310 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1311 auto sizetTy = ::llvm::IntegerType::get(*::context, sizeof(size_t) * 8);
1312 auto intTy = ::llvm::IntegerType::get(*::context, sizeof(int) * 8);
1313 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1314 auto i8PtrTy = i8Ty->getPointerTo();
1315 auto voidTy = ::llvm::Type::getVoidTy(*::context);
1316 auto funcTy = ::llvm::FunctionType::get(voidTy, {sizetTy, i8PtrTy, i8PtrTy, intTy}, false);
1317 auto func = ::module->getOrInsertFunction("__atomic_load", funcTy);
1318 auto size = ::module->getDataLayout().getTypeStoreSize(elTy);
1319 auto out = allocateStackVariable(type);
1320 ::builder->CreateCall(func, {
1321 ::llvm::ConstantInt::get(sizetTy, size),
1322 ::builder->CreatePointerCast(V(ptr), i8PtrTy),
1323 ::builder->CreatePointerCast(V(out), i8PtrTy),
1324 ::llvm::ConstantInt::get(intTy, uint64_t(atomicOrdering(true, memoryOrder))),
1325 });
1326 return V(::builder->CreateLoad(V(out)));
1327 }
1328 else
1329 {
1330 auto load = new llvm::LoadInst(V(ptr), "", isVolatile, alignment);
1331 load->setAtomic(atomicOrdering(atomic, memoryOrder));
1332 return V(::builder->Insert(load));
1333 }
Nicolas Capens86509d92019-03-21 13:23:50 -04001334 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001335 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001336 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1337 return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001338 }
John Bauman89401822014-05-06 15:04:28 -04001339 }
1340
Nicolas Capens86509d92019-03-21 13:23:50 -04001341 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001342 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001343 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001344 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001345 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001346 case Type_v2i32:
1347 case Type_v4i16:
1348 case Type_v8i8:
1349 case Type_v2f32:
1350 createStore(
1351 createExtractElement(
1352 createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0),
1353 createBitCast(ptr, Pointer<Long>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001354 Long::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001355 return value;
1356 case Type_v2i16:
1357 case Type_v4i8:
1358 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001359 {
Logan Chien191b3052018-08-31 16:57:15 +08001360 createStore(
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001361 createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0),
1362 createBitCast(ptr, Pointer<Int>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001363 Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens01a97962017-07-28 17:30:51 -04001364 return value;
Nicolas Capens01a97962017-07-28 17:30:51 -04001365 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001366 // Fallthrough to non-emulated case.
1367 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001368 {
Ben Clayton99e57192019-05-03 13:25:08 +01001369 auto elTy = T(type);
1370 ASSERT(V(ptr)->getType()->getContainedType(0) == elTy);
1371 if (atomic && !(elTy->isIntegerTy() || elTy->isPointerTy() || elTy->isFloatTy()))
1372 {
1373 // atomic store operand must have integer, pointer, or floating point type
1374 // Fall back to using:
1375 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1376 auto sizetTy = ::llvm::IntegerType::get(*::context, sizeof(size_t) * 8);
1377 auto intTy = ::llvm::IntegerType::get(*::context, sizeof(int) * 8);
1378 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1379 auto i8PtrTy = i8Ty->getPointerTo();
1380 auto voidTy = ::llvm::Type::getVoidTy(*::context);
1381 auto funcTy = ::llvm::FunctionType::get(voidTy, {sizetTy, i8PtrTy, i8PtrTy, intTy}, false);
1382 auto func = ::module->getOrInsertFunction("__atomic_store", funcTy);
1383 auto size = ::module->getDataLayout().getTypeStoreSize(elTy);
1384 auto copy = allocateStackVariable(type);
1385 ::builder->CreateStore(V(value), V(copy));
1386 ::builder->CreateCall(func, {
1387 ::llvm::ConstantInt::get(sizetTy, size),
1388 ::builder->CreatePointerCast(V(ptr), i8PtrTy),
1389 ::builder->CreatePointerCast(V(copy), i8PtrTy),
1390 ::llvm::ConstantInt::get(intTy, uint64_t(atomicOrdering(true, memoryOrder))),
1391 });
1392 }
1393 else
1394 {
1395 auto store = ::builder->Insert(new llvm::StoreInst(V(value), V(ptr), isVolatile, alignment));
1396 store->setAtomic(atomicOrdering(atomic, memoryOrder));
1397 }
Nicolas Capens86509d92019-03-21 13:23:50 -04001398
1399 return value;
1400 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001401 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001402 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1403 return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001404 }
John Bauman89401822014-05-06 15:04:28 -04001405 }
1406
Ben Clayton0fc611f2019-04-18 11:23:27 -04001407 Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment)
1408 {
1409 ASSERT(V(base)->getType()->isPointerTy());
1410 ASSERT(V(offsets)->getType()->isVectorTy());
1411 ASSERT(V(mask)->getType()->isVectorTy());
1412
1413 auto numEls = V(mask)->getType()->getVectorNumElements();
1414 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
1415 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
1416 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1417 auto i8PtrTy = i8Ty->getPointerTo();
1418 auto elPtrTy = T(elTy)->getPointerTo();
1419 auto elVecTy = ::llvm::VectorType::get(T(elTy), numEls);
1420 auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
1421 auto i8Base = ::builder->CreatePointerCast(V(base), i8PtrTy);
1422 auto i8Ptrs = ::builder->CreateGEP(i8Base, V(offsets));
1423 auto elPtrs = ::builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
1424 auto i8Mask = ::builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
1425 auto passthrough = ::llvm::Constant::getNullValue(elVecTy);
1426 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
1427 auto func = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::masked_gather, { elVecTy, elPtrVecTy } );
1428 return V(::builder->CreateCall(func, { elPtrs, align, i8Mask, passthrough }));
1429 }
1430
1431 void Nucleus::createScatter(Value *base, Value *val, Value *offsets, Value *mask, unsigned int alignment)
1432 {
1433 ASSERT(V(base)->getType()->isPointerTy());
1434 ASSERT(V(val)->getType()->isVectorTy());
1435 ASSERT(V(offsets)->getType()->isVectorTy());
1436 ASSERT(V(mask)->getType()->isVectorTy());
1437
1438 auto numEls = V(mask)->getType()->getVectorNumElements();
1439 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
1440 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
1441 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1442 auto i8PtrTy = i8Ty->getPointerTo();
1443 auto elVecTy = V(val)->getType();
1444 auto elTy = elVecTy->getVectorElementType();
1445 auto elPtrTy = elTy->getPointerTo();
1446 auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
1447 auto i8Base = ::builder->CreatePointerCast(V(base), i8PtrTy);
1448 auto i8Ptrs = ::builder->CreateGEP(i8Base, V(offsets));
1449 auto elPtrs = ::builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
1450 auto i8Mask = ::builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
1451 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
1452 auto func = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::masked_scatter, { elVecTy, elPtrVecTy } );
1453 ::builder->CreateCall(func, { V(val), elPtrs, align, i8Mask });
1454 }
1455
Ben Claytonb16c5862019-05-08 14:01:38 +01001456 void Nucleus::createFence(std::memory_order memoryOrder)
1457 {
1458 ::builder->CreateFence(atomicOrdering(true, memoryOrder));
1459 }
1460
Nicolas Capensd294def2017-01-26 17:44:37 -08001461 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
John Bauman89401822014-05-06 15:04:28 -04001462 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001463 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytoneb50d252019-04-15 13:50:01 -04001464 ASSERT(V(ptr)->getType()->getContainedType(0) == T(type));
Nicolas Capens01a97962017-07-28 17:30:51 -04001465 if(sizeof(void*) == 8)
Nicolas Capensd294def2017-01-26 17:44:37 -08001466 {
Ben Claytonb1243732019-02-27 23:56:18 +00001467 // LLVM manual: "When indexing into an array, pointer or vector,
1468 // integers of any width are allowed, and they are not required to
1469 // be constant. These integers are treated as signed values where
1470 // relevant."
1471 //
1472 // Thus if we want indexes to be treated as unsigned we have to
1473 // zero-extend them ourselves.
1474 //
1475 // Note that this is not because we want to address anywhere near
1476 // 4 GB of data. Instead this is important for performance because
1477 // x86 supports automatic zero-extending of 32-bit registers to
1478 // 64-bit. Thus when indexing into an array using a uint32 is
1479 // actually faster than an int32.
1480 index = unsignedIndex ?
1481 createZExt(index, Long::getType()) :
1482 createSExt(index, Long::getType());
Nicolas Capens01a97962017-07-28 17:30:51 -04001483 }
Ben Claytonb1243732019-02-27 23:56:18 +00001484
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001485 // For non-emulated types we can rely on LLVM's GEP to calculate the
1486 // effective address correctly.
1487 if(asInternalType(type) == Type_LLVM)
Nicolas Capens01a97962017-07-28 17:30:51 -04001488 {
Ben Claytonb1243732019-02-27 23:56:18 +00001489 return V(::builder->CreateGEP(V(ptr), V(index)));
Nicolas Capensd294def2017-01-26 17:44:37 -08001490 }
1491
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001492 // For emulated types we have to multiply the index by the intended
1493 // type size ourselves to obain the byte offset.
Ben Claytonb1243732019-02-27 23:56:18 +00001494 index = (sizeof(void*) == 8) ?
1495 createMul(index, createConstantLong((int64_t)typeSize(type))) :
1496 createMul(index, createConstantInt((int)typeSize(type)));
1497
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001498 // Cast to a byte pointer, apply the byte offset, and cast back to the
1499 // original pointer type.
Logan Chien191b3052018-08-31 16:57:15 +08001500 return createBitCast(
1501 V(::builder->CreateGEP(V(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0)))), V(index))),
1502 T(llvm::PointerType::get(T(type), 0)));
John Bauman89401822014-05-06 15:04:28 -04001503 }
1504
Chris Forbes17813932019-04-18 11:45:54 -07001505 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
John Bauman19bac1e2014-05-06 15:23:49 -04001506 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001507 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbes17813932019-04-18 11:45:54 -07001508 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1509 }
1510
Chris Forbes707ed992019-04-18 18:17:35 -07001511 Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1512 {
1513 RR_DEBUG_INFO_UPDATE_LOC();
1514 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Sub, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1515 }
1516
Chris Forbes17813932019-04-18 11:45:54 -07001517 Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1518 {
1519 RR_DEBUG_INFO_UPDATE_LOC();
1520 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::And, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1521 }
1522
1523 Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1524 {
1525 RR_DEBUG_INFO_UPDATE_LOC();
1526 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Or, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1527 }
1528
1529 Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1530 {
1531 RR_DEBUG_INFO_UPDATE_LOC();
1532 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xor, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1533 }
1534
1535 Value *Nucleus::createAtomicMin(Value *ptr, Value *value, std::memory_order memoryOrder)
1536 {
1537 RR_DEBUG_INFO_UPDATE_LOC();
1538 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Min, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1539 }
1540
1541 Value *Nucleus::createAtomicMax(Value *ptr, Value *value, std::memory_order memoryOrder)
1542 {
1543 RR_DEBUG_INFO_UPDATE_LOC();
1544 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Max, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1545 }
1546
Chris Forbesf31bdad2019-05-23 14:58:08 -07001547 Value *Nucleus::createAtomicUMin(Value *ptr, Value *value, std::memory_order memoryOrder)
1548 {
1549 RR_DEBUG_INFO_UPDATE_LOC();
1550 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::UMin, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1551 }
1552
1553 Value *Nucleus::createAtomicUMax(Value *ptr, Value *value, std::memory_order memoryOrder)
1554 {
1555 RR_DEBUG_INFO_UPDATE_LOC();
1556 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::UMax, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1557 }
1558
1559
Chris Forbes17813932019-04-18 11:45:54 -07001560 Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1561 {
1562 RR_DEBUG_INFO_UPDATE_LOC();
1563 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
John Bauman19bac1e2014-05-06 15:23:49 -04001564 }
1565
Chris Forbesa16238d2019-04-18 16:31:54 -07001566 Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1567 {
1568 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbesc9ca99e2019-04-19 07:53:34 -07001569 // Note: AtomicCmpXchgInstruction returns a 2-member struct containing {result, success-flag}, not the result directly.
Chris Forbesa16238d2019-04-18 16:31:54 -07001570 return V(::builder->CreateExtractValue(
1571 ::builder->CreateAtomicCmpXchg(V(ptr), V(compare), V(value), atomicOrdering(true, memoryOrderEqual), atomicOrdering(true, memoryOrderUnequal)),
1572 llvm::ArrayRef<unsigned>(0u)));
1573 }
1574
Nicolas Capens19336542016-09-26 10:32:29 -04001575 Value *Nucleus::createTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001576 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001577 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001578 return V(::builder->CreateTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001579 }
1580
Nicolas Capens19336542016-09-26 10:32:29 -04001581 Value *Nucleus::createZExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001582 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001583 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001584 return V(::builder->CreateZExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001585 }
1586
Nicolas Capens19336542016-09-26 10:32:29 -04001587 Value *Nucleus::createSExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001588 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001589 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001590 return V(::builder->CreateSExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001591 }
1592
Nicolas Capens19336542016-09-26 10:32:29 -04001593 Value *Nucleus::createFPToSI(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001594 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001595 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001596 return V(::builder->CreateFPToSI(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001597 }
1598
Nicolas Capens19336542016-09-26 10:32:29 -04001599 Value *Nucleus::createSIToFP(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001600 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001601 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001602 return V(::builder->CreateSIToFP(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001603 }
1604
Nicolas Capens19336542016-09-26 10:32:29 -04001605 Value *Nucleus::createFPTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001606 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001607 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001608 return V(::builder->CreateFPTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001609 }
1610
Nicolas Capens19336542016-09-26 10:32:29 -04001611 Value *Nucleus::createFPExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001612 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001613 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001614 return V(::builder->CreateFPExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001615 }
1616
Nicolas Capens19336542016-09-26 10:32:29 -04001617 Value *Nucleus::createBitCast(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001618 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001619 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens01a97962017-07-28 17:30:51 -04001620 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1621 // support for casting between scalars and wide vectors. Emulate them by writing to the stack and
1622 // reading back as the destination type.
Logan Chien191b3052018-08-31 16:57:15 +08001623 if(!V(v)->getType()->isVectorTy() && T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001624 {
1625 Value *readAddress = allocateStackVariable(destType);
Logan Chien191b3052018-08-31 16:57:15 +08001626 Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(V(v)->getType(), 0)));
1627 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001628 return createLoad(readAddress, destType);
1629 }
Logan Chien191b3052018-08-31 16:57:15 +08001630 else if(V(v)->getType()->isVectorTy() && !T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001631 {
Logan Chien191b3052018-08-31 16:57:15 +08001632 Value *writeAddress = allocateStackVariable(T(V(v)->getType()));
1633 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001634 Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
1635 return createLoad(readAddress, destType);
1636 }
1637
Logan Chien191b3052018-08-31 16:57:15 +08001638 return V(::builder->CreateBitCast(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001639 }
1640
John Bauman89401822014-05-06 15:04:28 -04001641 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1642 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001643 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001644 return V(::builder->CreateICmpEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001645 }
1646
1647 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1648 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001649 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001650 return V(::builder->CreateICmpNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001651 }
1652
1653 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1654 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001655 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001656 return V(::builder->CreateICmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001657 }
1658
1659 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1660 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001661 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001662 return V(::builder->CreateICmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001663 }
1664
1665 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1666 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001667 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001668 return V(::builder->CreateICmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001669 }
1670
1671 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1672 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001673 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001674 return V(::builder->CreateICmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001675 }
1676
1677 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1678 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001679 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001680 return V(::builder->CreateICmpSGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001681 }
1682
1683 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1684 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001685 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001686 return V(::builder->CreateICmpSGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001687 }
1688
1689 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1690 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001691 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001692 return V(::builder->CreateICmpSLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001693 }
1694
1695 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1696 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001697 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001698 return V(::builder->CreateICmpSLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001699 }
1700
1701 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1702 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001703 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001704 return V(::builder->CreateFCmpOEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001705 }
1706
1707 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1708 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001709 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001710 return V(::builder->CreateFCmpOGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001711 }
1712
1713 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1714 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001715 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001716 return V(::builder->CreateFCmpOGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001717 }
1718
1719 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1720 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001721 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001722 return V(::builder->CreateFCmpOLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001723 }
1724
1725 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1726 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001727 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001728 return V(::builder->CreateFCmpOLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001729 }
1730
1731 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1732 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001733 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001734 return V(::builder->CreateFCmpONE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001735 }
1736
1737 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1738 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001739 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001740 return V(::builder->CreateFCmpORD(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001741 }
1742
1743 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1744 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001745 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001746 return V(::builder->CreateFCmpUNO(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001747 }
1748
1749 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1750 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001751 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001752 return V(::builder->CreateFCmpUEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001753 }
1754
1755 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1756 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001757 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001758 return V(::builder->CreateFCmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001759 }
1760
1761 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1762 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001763 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001764 return V(::builder->CreateFCmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001765 }
1766
1767 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1768 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001769 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001770 return V(::builder->CreateFCmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001771 }
1772
1773 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1774 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001775 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001776 return V(::builder->CreateFCmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001777 }
1778
1779 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1780 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001781 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton71008d82019-03-05 17:17:59 +00001782 return V(::builder->CreateFCmpUNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001783 }
1784
Nicolas Capense95d5342016-09-30 11:37:28 -04001785 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
John Bauman89401822014-05-06 15:04:28 -04001786 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001787 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytoneb50d252019-04-15 13:50:01 -04001788 ASSERT(V(vector)->getType()->getContainedType(0) == T(type));
Logan Chien191b3052018-08-31 16:57:15 +08001789 return V(::builder->CreateExtractElement(V(vector), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04001790 }
1791
1792 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1793 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001794 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001795 return V(::builder->CreateInsertElement(V(vector), V(element), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04001796 }
1797
Logan Chien191b3052018-08-31 16:57:15 +08001798 Value *Nucleus::createShuffleVector(Value *v1, Value *v2, const int *select)
John Bauman89401822014-05-06 15:04:28 -04001799 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001800 RR_DEBUG_INFO_UPDATE_LOC();
1801
Logan Chien191b3052018-08-31 16:57:15 +08001802 int size = llvm::cast<llvm::VectorType>(V(v1)->getType())->getNumElements();
Nicolas Capense89cd582016-09-30 14:23:47 -04001803 const int maxSize = 16;
1804 llvm::Constant *swizzle[maxSize];
Ben Claytoneb50d252019-04-15 13:50:01 -04001805 ASSERT(size <= maxSize);
Nicolas Capense89cd582016-09-30 14:23:47 -04001806
1807 for(int i = 0; i < size; i++)
1808 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001809 swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]);
Nicolas Capense89cd582016-09-30 14:23:47 -04001810 }
1811
1812 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
1813
Logan Chien191b3052018-08-31 16:57:15 +08001814 return V(::builder->CreateShuffleVector(V(v1), V(v2), shuffle));
John Bauman89401822014-05-06 15:04:28 -04001815 }
1816
Logan Chien191b3052018-08-31 16:57:15 +08001817 Value *Nucleus::createSelect(Value *c, Value *ifTrue, Value *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001818 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001819 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001820 return V(::builder->CreateSelect(V(c), V(ifTrue), V(ifFalse)));
John Bauman89401822014-05-06 15:04:28 -04001821 }
1822
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001823 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
John Bauman89401822014-05-06 15:04:28 -04001824 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001825 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001826 return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(V(control), B(defaultBranch), numCases));
John Bauman89401822014-05-06 15:04:28 -04001827 }
1828
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001829 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
John Bauman89401822014-05-06 15:04:28 -04001830 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001831 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001832 llvm::SwitchInst *sw = reinterpret_cast<llvm::SwitchInst *>(switchCases);
1833 sw->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), B(branch));
John Bauman89401822014-05-06 15:04:28 -04001834 }
1835
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001836 void Nucleus::createUnreachable()
John Bauman89401822014-05-06 15:04:28 -04001837 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001838 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001839 ::builder->CreateUnreachable();
John Bauman89401822014-05-06 15:04:28 -04001840 }
1841
Nicolas Capensac230122016-09-20 14:30:06 -04001842 Type *Nucleus::getPointerType(Type *ElementType)
John Bauman89401822014-05-06 15:04:28 -04001843 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001844 return T(llvm::PointerType::get(T(ElementType), 0));
John Bauman89401822014-05-06 15:04:28 -04001845 }
1846
Nicolas Capens13ac2322016-10-13 14:52:12 -04001847 Value *Nucleus::createNullValue(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04001848 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001849 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001850 return V(llvm::Constant::getNullValue(T(Ty)));
John Bauman89401822014-05-06 15:04:28 -04001851 }
1852
Nicolas Capens13ac2322016-10-13 14:52:12 -04001853 Value *Nucleus::createConstantLong(int64_t i)
John Bauman89401822014-05-06 15:04:28 -04001854 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001855 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001856 return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001857 }
1858
Nicolas Capens13ac2322016-10-13 14:52:12 -04001859 Value *Nucleus::createConstantInt(int i)
John Bauman89401822014-05-06 15:04:28 -04001860 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001861 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001862 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001863 }
1864
Nicolas Capens13ac2322016-10-13 14:52:12 -04001865 Value *Nucleus::createConstantInt(unsigned int i)
John Bauman89401822014-05-06 15:04:28 -04001866 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001867 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001868 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001869 }
1870
Nicolas Capens13ac2322016-10-13 14:52:12 -04001871 Value *Nucleus::createConstantBool(bool b)
John Bauman89401822014-05-06 15:04:28 -04001872 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001873 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001874 return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b));
John Bauman89401822014-05-06 15:04:28 -04001875 }
1876
Nicolas Capens13ac2322016-10-13 14:52:12 -04001877 Value *Nucleus::createConstantByte(signed char i)
John Bauman89401822014-05-06 15:04:28 -04001878 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001879 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001880 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001881 }
1882
Nicolas Capens13ac2322016-10-13 14:52:12 -04001883 Value *Nucleus::createConstantByte(unsigned char i)
John Bauman89401822014-05-06 15:04:28 -04001884 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001885 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001886 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001887 }
1888
Nicolas Capens13ac2322016-10-13 14:52:12 -04001889 Value *Nucleus::createConstantShort(short i)
John Bauman89401822014-05-06 15:04:28 -04001890 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001891 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001892 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001893 }
1894
Nicolas Capens13ac2322016-10-13 14:52:12 -04001895 Value *Nucleus::createConstantShort(unsigned short i)
John Bauman89401822014-05-06 15:04:28 -04001896 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001897 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001898 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001899 }
1900
Nicolas Capens13ac2322016-10-13 14:52:12 -04001901 Value *Nucleus::createConstantFloat(float x)
John Bauman89401822014-05-06 15:04:28 -04001902 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001903 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001904 return V(llvm::ConstantFP::get(T(Float::getType()), x));
John Bauman89401822014-05-06 15:04:28 -04001905 }
1906
Nicolas Capens13ac2322016-10-13 14:52:12 -04001907 Value *Nucleus::createNullPointer(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04001908 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001909 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001910 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0)));
John Bauman89401822014-05-06 15:04:28 -04001911 }
1912
Nicolas Capens13ac2322016-10-13 14:52:12 -04001913 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
John Bauman89401822014-05-06 15:04:28 -04001914 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001915 ASSERT(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04001916 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1917 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
Ben Claytoneb50d252019-04-15 13:50:01 -04001918 ASSERT(numElements <= 16 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001919 llvm::Constant *constantVector[16];
1920
Nicolas Capens69674fb2017-09-01 11:08:44 -04001921 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04001922 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04001923 constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001924 }
1925
Nicolas Capens69674fb2017-09-01 11:08:44 -04001926 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
Nicolas Capens13ac2322016-10-13 14:52:12 -04001927 }
1928
1929 Value *Nucleus::createConstantVector(const double *constants, Type *type)
1930 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001931 ASSERT(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04001932 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1933 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
Ben Claytoneb50d252019-04-15 13:50:01 -04001934 ASSERT(numElements <= 8 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001935 llvm::Constant *constantVector[8];
1936
Nicolas Capens69674fb2017-09-01 11:08:44 -04001937 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04001938 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04001939 constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001940 }
1941
Nicolas Capens69674fb2017-09-01 11:08:44 -04001942 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
John Bauman89401822014-05-06 15:04:28 -04001943 }
1944
John Bauman19bac1e2014-05-06 15:23:49 -04001945 Type *Void::getType()
John Bauman89401822014-05-06 15:04:28 -04001946 {
Nicolas Capensac230122016-09-20 14:30:06 -04001947 return T(llvm::Type::getVoidTy(*::context));
John Bauman89401822014-05-06 15:04:28 -04001948 }
1949
John Bauman19bac1e2014-05-06 15:23:49 -04001950 Type *Bool::getType()
John Bauman89401822014-05-06 15:04:28 -04001951 {
Nicolas Capensac230122016-09-20 14:30:06 -04001952 return T(llvm::Type::getInt1Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001953 }
1954
John Bauman19bac1e2014-05-06 15:23:49 -04001955 Type *Byte::getType()
John Bauman89401822014-05-06 15:04:28 -04001956 {
Nicolas Capensac230122016-09-20 14:30:06 -04001957 return T(llvm::Type::getInt8Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001958 }
1959
John Bauman19bac1e2014-05-06 15:23:49 -04001960 Type *SByte::getType()
John Bauman89401822014-05-06 15:04:28 -04001961 {
Nicolas Capensac230122016-09-20 14:30:06 -04001962 return T(llvm::Type::getInt8Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001963 }
1964
John Bauman19bac1e2014-05-06 15:23:49 -04001965 Type *Short::getType()
John Bauman89401822014-05-06 15:04:28 -04001966 {
Nicolas Capensac230122016-09-20 14:30:06 -04001967 return T(llvm::Type::getInt16Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001968 }
1969
John Bauman19bac1e2014-05-06 15:23:49 -04001970 Type *UShort::getType()
John Bauman89401822014-05-06 15:04:28 -04001971 {
Nicolas Capensac230122016-09-20 14:30:06 -04001972 return T(llvm::Type::getInt16Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001973 }
1974
John Bauman19bac1e2014-05-06 15:23:49 -04001975 Type *Byte4::getType()
John Bauman89401822014-05-06 15:04:28 -04001976 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001977 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04001978 }
1979
John Bauman19bac1e2014-05-06 15:23:49 -04001980 Type *SByte4::getType()
John Bauman89401822014-05-06 15:04:28 -04001981 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001982 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04001983 }
1984
John Bauman19bac1e2014-05-06 15:23:49 -04001985 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001986 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001987 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001988#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001989 return x86::paddusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001990#else
1991 return As<Byte8>(V(lowerPUADDSAT(V(x.value), V(y.value))));
1992#endif
John Bauman89401822014-05-06 15:04:28 -04001993 }
John Bauman66b8ab22014-05-06 15:57:45 -04001994
John Bauman19bac1e2014-05-06 15:23:49 -04001995 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001996 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001997 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08001998#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001999 return x86::psubusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002000#else
2001 return As<Byte8>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
2002#endif
John Bauman89401822014-05-06 15:04:28 -04002003 }
2004
John Bauman19bac1e2014-05-06 15:23:49 -04002005 RValue<Int> SignMask(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04002006 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002007 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002008#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002009 return x86::pmovmskb(x);
Logan Chiene3191012018-08-24 22:01:50 +08002010#else
2011 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2012#endif
John Bauman89401822014-05-06 15:04:28 -04002013 }
2014
John Bauman19bac1e2014-05-06 15:23:49 -04002015// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002016// {
Logan Chiene3191012018-08-24 22:01:50 +08002017//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002018// return x86::pcmpgtb(x, y); // FIXME: Signedness
Logan Chiene3191012018-08-24 22:01:50 +08002019//#else
2020// return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
2021//#endif
John Bauman89401822014-05-06 15:04:28 -04002022// }
John Bauman66b8ab22014-05-06 15:57:45 -04002023
John Bauman19bac1e2014-05-06 15:23:49 -04002024 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002025 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002026 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002027#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002028 return x86::pcmpeqb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002029#else
2030 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
2031#endif
John Bauman89401822014-05-06 15:04:28 -04002032 }
2033
John Bauman19bac1e2014-05-06 15:23:49 -04002034 Type *Byte8::getType()
John Bauman89401822014-05-06 15:04:28 -04002035 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002036 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04002037 }
2038
John Bauman19bac1e2014-05-06 15:23:49 -04002039 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002040 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002041 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002042#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002043 return x86::paddsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002044#else
2045 return As<SByte8>(V(lowerPSADDSAT(V(x.value), V(y.value))));
2046#endif
John Bauman89401822014-05-06 15:04:28 -04002047 }
John Bauman66b8ab22014-05-06 15:57:45 -04002048
John Bauman19bac1e2014-05-06 15:23:49 -04002049 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002050 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002051 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002052#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002053 return x86::psubsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002054#else
2055 return As<SByte8>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
2056#endif
John Bauman89401822014-05-06 15:04:28 -04002057 }
2058
John Bauman19bac1e2014-05-06 15:23:49 -04002059 RValue<Int> SignMask(RValue<SByte8> x)
John Bauman89401822014-05-06 15:04:28 -04002060 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002061 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002062#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002063 return x86::pmovmskb(As<Byte8>(x));
Logan Chiene3191012018-08-24 22:01:50 +08002064#else
2065 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2066#endif
John Bauman89401822014-05-06 15:04:28 -04002067 }
2068
John Bauman19bac1e2014-05-06 15:23:49 -04002069 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002070 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002071 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002072#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002073 return x86::pcmpgtb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002074#else
2075 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
2076#endif
John Bauman89401822014-05-06 15:04:28 -04002077 }
John Bauman66b8ab22014-05-06 15:57:45 -04002078
John Bauman19bac1e2014-05-06 15:23:49 -04002079 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002080 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002081 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002082#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002083 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
Logan Chiene3191012018-08-24 22:01:50 +08002084#else
2085 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
2086#endif
John Bauman89401822014-05-06 15:04:28 -04002087 }
2088
John Bauman19bac1e2014-05-06 15:23:49 -04002089 Type *SByte8::getType()
John Bauman89401822014-05-06 15:04:28 -04002090 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002091 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04002092 }
2093
John Bauman19bac1e2014-05-06 15:23:49 -04002094 Type *Byte16::getType()
John Bauman89401822014-05-06 15:04:28 -04002095 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002096 return T(llvm::VectorType::get(T(Byte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04002097 }
2098
John Bauman19bac1e2014-05-06 15:23:49 -04002099 Type *SByte16::getType()
John Bauman89401822014-05-06 15:04:28 -04002100 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002101 return T(llvm::VectorType::get(T(SByte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04002102 }
2103
Nicolas Capens16b5f152016-10-13 13:39:01 -04002104 Type *Short2::getType()
2105 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002106 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04002107 }
2108
Nicolas Capens16b5f152016-10-13 13:39:01 -04002109 Type *UShort2::getType()
2110 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002111 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04002112 }
2113
John Bauman19bac1e2014-05-06 15:23:49 -04002114 Short4::Short4(RValue<Int4> cast)
John Bauman89401822014-05-06 15:04:28 -04002115 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002116 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens01a97962017-07-28 17:30:51 -04002117 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
John Bauman89401822014-05-06 15:04:28 -04002118 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2119
Nicolas Capens01a97962017-07-28 17:30:51 -04002120 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2121 Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
John Bauman89401822014-05-06 15:04:28 -04002122
John Bauman66b8ab22014-05-06 15:57:45 -04002123 storeValue(short4);
John Bauman89401822014-05-06 15:04:28 -04002124 }
2125
John Bauman19bac1e2014-05-06 15:23:49 -04002126// Short4::Short4(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002127// {
2128// }
2129
John Bauman19bac1e2014-05-06 15:23:49 -04002130 Short4::Short4(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04002131 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002132 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002133 Int4 v4i32 = Int4(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002134#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002135 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
Logan Chiene3191012018-08-24 22:01:50 +08002136#else
2137 Value *v = v4i32.loadValue();
2138 v4i32 = As<Int4>(V(lowerPack(V(v), V(v), true)));
2139#endif
John Bauman66b8ab22014-05-06 15:57:45 -04002140
2141 storeValue(As<Short4>(Int2(v4i32)).value);
John Bauman89401822014-05-06 15:04:28 -04002142 }
2143
John Bauman19bac1e2014-05-06 15:23:49 -04002144 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002145 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002146 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002147#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002148 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2149
2150 return x86::psllw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002151#else
2152 return As<Short4>(V(lowerVectorShl(V(lhs.value), rhs)));
2153#endif
John Bauman89401822014-05-06 15:04:28 -04002154 }
2155
John Bauman19bac1e2014-05-06 15:23:49 -04002156 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002157 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002158 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002159#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002160 return x86::psraw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002161#else
2162 return As<Short4>(V(lowerVectorAShr(V(lhs.value), rhs)));
2163#endif
John Bauman89401822014-05-06 15:04:28 -04002164 }
2165
John Bauman19bac1e2014-05-06 15:23:49 -04002166 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002167 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002168 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002169#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002170 return x86::pmaxsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002171#else
2172 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
2173#endif
John Bauman89401822014-05-06 15:04:28 -04002174 }
2175
John Bauman19bac1e2014-05-06 15:23:49 -04002176 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002177 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002178 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002179#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002180 return x86::pminsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002181#else
2182 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
2183#endif
John Bauman89401822014-05-06 15:04:28 -04002184 }
2185
John Bauman19bac1e2014-05-06 15:23:49 -04002186 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002187 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002188 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002189#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002190 return x86::paddsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002191#else
2192 return As<Short4>(V(lowerPSADDSAT(V(x.value), V(y.value))));
2193#endif
John Bauman89401822014-05-06 15:04:28 -04002194 }
2195
John Bauman19bac1e2014-05-06 15:23:49 -04002196 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002197 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002198 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002199#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002200 return x86::psubsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002201#else
2202 return As<Short4>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
2203#endif
John Bauman89401822014-05-06 15:04:28 -04002204 }
2205
John Bauman19bac1e2014-05-06 15:23:49 -04002206 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002207 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002208 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002209#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002210 return x86::pmulhw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002211#else
2212 return As<Short4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2213#endif
John Bauman89401822014-05-06 15:04:28 -04002214 }
2215
John Bauman19bac1e2014-05-06 15:23:49 -04002216 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002217 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002218 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002219#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002220 return x86::pmaddwd(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002221#else
2222 return As<Int2>(V(lowerMulAdd(V(x.value), V(y.value))));
2223#endif
John Bauman89401822014-05-06 15:04:28 -04002224 }
2225
Nicolas Capens33438a62017-09-27 11:47:35 -04002226 RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002227 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002228 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002229#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002230 auto result = x86::packsswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002231#else
2232 auto result = V(lowerPack(V(x.value), V(y.value), true));
2233#endif
Nicolas Capens01a97962017-07-28 17:30:51 -04002234 return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
John Bauman89401822014-05-06 15:04:28 -04002235 }
2236
Nicolas Capens33438a62017-09-27 11:47:35 -04002237 RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2238 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002239 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002240#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04002241 auto result = x86::packuswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002242#else
2243 auto result = V(lowerPack(V(x.value), V(y.value), false));
2244#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04002245 return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
2246 }
2247
John Bauman19bac1e2014-05-06 15:23:49 -04002248 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002249 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002250 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002251#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002252 return x86::pcmpgtw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002253#else
2254 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
2255#endif
John Bauman89401822014-05-06 15:04:28 -04002256 }
2257
John Bauman19bac1e2014-05-06 15:23:49 -04002258 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002259 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002260 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002261#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002262 return x86::pcmpeqw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002263#else
2264 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
2265#endif
John Bauman89401822014-05-06 15:04:28 -04002266 }
2267
John Bauman19bac1e2014-05-06 15:23:49 -04002268 Type *Short4::getType()
John Bauman89401822014-05-06 15:04:28 -04002269 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002270 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002271 }
2272
John Bauman19bac1e2014-05-06 15:23:49 -04002273 UShort4::UShort4(RValue<Float4> cast, bool saturate)
John Bauman89401822014-05-06 15:04:28 -04002274 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002275 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002276 if(saturate)
2277 {
Logan Chiena8385ed2018-09-26 19:22:54 +08002278#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002279 if(CPUID::supportsSSE4_1())
2280 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002281 Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation
Nicolas Capens33438a62017-09-27 11:47:35 -04002282 *this = As<Short4>(PackUnsigned(int4, int4));
John Bauman89401822014-05-06 15:04:28 -04002283 }
2284 else
Logan Chiena8385ed2018-09-26 19:22:54 +08002285#endif
John Bauman89401822014-05-06 15:04:28 -04002286 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002287 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
John Bauman89401822014-05-06 15:04:28 -04002288 }
2289 }
2290 else
2291 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002292 *this = Short4(Int4(cast));
John Bauman89401822014-05-06 15:04:28 -04002293 }
2294 }
2295
John Bauman19bac1e2014-05-06 15:23:49 -04002296 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002297 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002298 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002299#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002300 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2301
2302 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002303#else
2304 return As<UShort4>(V(lowerVectorShl(V(lhs.value), rhs)));
2305#endif
John Bauman89401822014-05-06 15:04:28 -04002306 }
2307
John Bauman19bac1e2014-05-06 15:23:49 -04002308 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002309 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002310 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002311#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002312 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
2313
2314 return x86::psrlw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002315#else
2316 return As<UShort4>(V(lowerVectorLShr(V(lhs.value), rhs)));
2317#endif
John Bauman89401822014-05-06 15:04:28 -04002318 }
2319
John Bauman19bac1e2014-05-06 15:23:49 -04002320 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002321 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002322 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04002323 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002324 }
2325
John Bauman19bac1e2014-05-06 15:23:49 -04002326 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002327 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002328 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04002329 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002330 }
2331
John Bauman19bac1e2014-05-06 15:23:49 -04002332 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002333 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002334 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002335#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002336 return x86::paddusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002337#else
2338 return As<UShort4>(V(lowerPUADDSAT(V(x.value), V(y.value))));
2339#endif
John Bauman89401822014-05-06 15:04:28 -04002340 }
2341
John Bauman19bac1e2014-05-06 15:23:49 -04002342 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002343 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002344 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002345#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002346 return x86::psubusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002347#else
2348 return As<UShort4>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
2349#endif
John Bauman89401822014-05-06 15:04:28 -04002350 }
2351
John Bauman19bac1e2014-05-06 15:23:49 -04002352 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002353 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002354 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002355#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002356 return x86::pmulhuw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002357#else
2358 return As<UShort4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2359#endif
John Bauman89401822014-05-06 15:04:28 -04002360 }
2361
John Bauman19bac1e2014-05-06 15:23:49 -04002362 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002363 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002364 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002365#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002366 return x86::pavgw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002367#else
2368 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
2369#endif
John Bauman89401822014-05-06 15:04:28 -04002370 }
2371
John Bauman19bac1e2014-05-06 15:23:49 -04002372 Type *UShort4::getType()
John Bauman89401822014-05-06 15:04:28 -04002373 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002374 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002375 }
2376
John Bauman19bac1e2014-05-06 15:23:49 -04002377 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002378 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002379 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002380#if defined(__i386__) || defined(__x86_64__)
2381 return x86::psllw(lhs, rhs);
2382#else
2383 return As<Short8>(V(lowerVectorShl(V(lhs.value), rhs)));
2384#endif
John Bauman89401822014-05-06 15:04:28 -04002385 }
2386
John Bauman19bac1e2014-05-06 15:23:49 -04002387 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002388 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002389 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002390#if defined(__i386__) || defined(__x86_64__)
2391 return x86::psraw(lhs, rhs);
2392#else
2393 return As<Short8>(V(lowerVectorAShr(V(lhs.value), rhs)));
2394#endif
John Bauman89401822014-05-06 15:04:28 -04002395 }
2396
John Bauman19bac1e2014-05-06 15:23:49 -04002397 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002398 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002399 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002400#if defined(__i386__) || defined(__x86_64__)
2401 return x86::pmaddwd(x, y);
2402#else
2403 return As<Int4>(V(lowerMulAdd(V(x.value), V(y.value))));
2404#endif
John Bauman89401822014-05-06 15:04:28 -04002405 }
2406
John Bauman19bac1e2014-05-06 15:23:49 -04002407 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002408 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002409 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002410#if defined(__i386__) || defined(__x86_64__)
2411 return x86::pmulhw(x, y);
2412#else
2413 return As<Short8>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2414#endif
John Bauman89401822014-05-06 15:04:28 -04002415 }
2416
John Bauman19bac1e2014-05-06 15:23:49 -04002417 Type *Short8::getType()
John Bauman89401822014-05-06 15:04:28 -04002418 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002419 return T(llvm::VectorType::get(T(Short::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002420 }
2421
John Bauman19bac1e2014-05-06 15:23:49 -04002422 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002423 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002424 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002425#if defined(__i386__) || defined(__x86_64__)
2426 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));
2427#else
2428 return As<UShort8>(V(lowerVectorShl(V(lhs.value), rhs)));
2429#endif
John Bauman89401822014-05-06 15:04:28 -04002430 }
2431
John Bauman19bac1e2014-05-06 15:23:49 -04002432 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002433 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002434 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002435#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002436 return x86::psrlw(lhs, rhs); // FIXME: Fallback required
Logan Chiene3191012018-08-24 22:01:50 +08002437#else
2438 return As<UShort8>(V(lowerVectorLShr(V(lhs.value), rhs)));
2439#endif
John Bauman89401822014-05-06 15:04:28 -04002440 }
2441
John Bauman19bac1e2014-05-06 15:23:49 -04002442 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
John Bauman89401822014-05-06 15:04:28 -04002443 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002444 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capense89cd582016-09-30 14:23:47 -04002445 int pshufb[16] =
2446 {
2447 select0 + 0,
2448 select0 + 1,
2449 select1 + 0,
2450 select1 + 1,
2451 select2 + 0,
2452 select2 + 1,
2453 select3 + 0,
2454 select3 + 1,
2455 select4 + 0,
2456 select4 + 1,
2457 select5 + 0,
2458 select5 + 1,
2459 select6 + 0,
2460 select6 + 1,
2461 select7 + 0,
2462 select7 + 1,
2463 };
John Bauman89401822014-05-06 15:04:28 -04002464
2465 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
Nicolas Capense89cd582016-09-30 14:23:47 -04002466 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
John Bauman89401822014-05-06 15:04:28 -04002467 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
2468
2469 return RValue<UShort8>(short8);
2470 }
2471
John Bauman19bac1e2014-05-06 15:23:49 -04002472 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04002473 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002474 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002475#if defined(__i386__) || defined(__x86_64__)
2476 return x86::pmulhuw(x, y);
2477#else
2478 return As<UShort8>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2479#endif
John Bauman89401822014-05-06 15:04:28 -04002480 }
2481
John Bauman19bac1e2014-05-06 15:23:49 -04002482 Type *UShort8::getType()
John Bauman89401822014-05-06 15:04:28 -04002483 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002484 return T(llvm::VectorType::get(T(UShort::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002485 }
2486
Nicolas Capens96d4e092016-11-18 14:22:38 -05002487 RValue<Int> operator++(Int &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002488 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002489 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002490 RValue<Int> res = val;
2491
Logan Chien191b3052018-08-31 16:57:15 +08002492 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002493 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002494
2495 return res;
2496 }
2497
Nicolas Capens96d4e092016-11-18 14:22:38 -05002498 const Int &operator++(Int &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002499 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002500 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002501 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002502 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002503
2504 return val;
2505 }
2506
Nicolas Capens96d4e092016-11-18 14:22:38 -05002507 RValue<Int> operator--(Int &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002508 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002509 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002510 RValue<Int> res = val;
2511
Logan Chien191b3052018-08-31 16:57:15 +08002512 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002513 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002514
2515 return res;
2516 }
2517
Nicolas Capens96d4e092016-11-18 14:22:38 -05002518 const Int &operator--(Int &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002519 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002520 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002521 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002522 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002523
2524 return val;
2525 }
2526
John Bauman19bac1e2014-05-06 15:23:49 -04002527 RValue<Int> RoundInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002528 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002529 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002530#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002531 return x86::cvtss2si(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002532#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002533 return RValue<Int>(V(lowerRoundInt(V(cast.value), T(Int::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002534#endif
John Bauman89401822014-05-06 15:04:28 -04002535 }
2536
John Bauman19bac1e2014-05-06 15:23:49 -04002537 Type *Int::getType()
John Bauman89401822014-05-06 15:04:28 -04002538 {
Nicolas Capensac230122016-09-20 14:30:06 -04002539 return T(llvm::Type::getInt32Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002540 }
2541
John Bauman19bac1e2014-05-06 15:23:49 -04002542 Type *Long::getType()
John Bauman89401822014-05-06 15:04:28 -04002543 {
Nicolas Capensac230122016-09-20 14:30:06 -04002544 return T(llvm::Type::getInt64Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002545 }
2546
John Bauman19bac1e2014-05-06 15:23:49 -04002547 UInt::UInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002548 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002549 RR_DEBUG_INFO_UPDATE_LOC();
Alexis Hetu764d1422016-09-28 08:44:22 -04002550 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2551 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
John Bauman89401822014-05-06 15:04:28 -04002552
Alexis Hetu764d1422016-09-28 08:44:22 -04002553 // Smallest positive value representable in UInt, but not in Int
2554 const unsigned int ustart = 0x80000000u;
2555 const float ustartf = float(ustart);
2556
2557 // If the value is negative, store 0, otherwise store the result of the conversion
2558 storeValue((~(As<Int>(cast) >> 31) &
2559 // Check if the value can be represented as an Int
2560 IfThenElse(cast >= ustartf,
2561 // If the value is too large, subtract ustart and re-add it after conversion.
2562 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
2563 // Otherwise, just convert normally
2564 Int(cast))).value);
John Bauman89401822014-05-06 15:04:28 -04002565 }
2566
Nicolas Capens96d4e092016-11-18 14:22:38 -05002567 RValue<UInt> operator++(UInt &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002568 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002569 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002570 RValue<UInt> res = val;
2571
Logan Chien191b3052018-08-31 16:57:15 +08002572 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002573 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002574
2575 return res;
2576 }
2577
Nicolas Capens96d4e092016-11-18 14:22:38 -05002578 const UInt &operator++(UInt &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002579 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002580 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002581 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002582 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002583
2584 return val;
2585 }
2586
Nicolas Capens96d4e092016-11-18 14:22:38 -05002587 RValue<UInt> operator--(UInt &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002588 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002589 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002590 RValue<UInt> res = val;
2591
Logan Chien191b3052018-08-31 16:57:15 +08002592 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002593 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002594
2595 return res;
2596 }
2597
Nicolas Capens96d4e092016-11-18 14:22:38 -05002598 const UInt &operator--(UInt &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002599 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002600 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002601 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002602 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002603
2604 return val;
2605 }
2606
John Bauman19bac1e2014-05-06 15:23:49 -04002607// RValue<UInt> RoundUInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002608// {
Logan Chiene3191012018-08-24 22:01:50 +08002609//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002610// return x86::cvtss2si(val); // FIXME: Unsigned
Logan Chiene3191012018-08-24 22:01:50 +08002611//#else
2612// return IfThenElse(cast > 0.0f, Int(cast + 0.5f), Int(cast - 0.5f));
2613//#endif
John Bauman89401822014-05-06 15:04:28 -04002614// }
2615
John Bauman19bac1e2014-05-06 15:23:49 -04002616 Type *UInt::getType()
John Bauman89401822014-05-06 15:04:28 -04002617 {
Nicolas Capensac230122016-09-20 14:30:06 -04002618 return T(llvm::Type::getInt32Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002619 }
2620
John Bauman19bac1e2014-05-06 15:23:49 -04002621// Int2::Int2(RValue<Int> cast)
2622// {
John Bauman19bac1e2014-05-06 15:23:49 -04002623// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2624// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
John Bauman66b8ab22014-05-06 15:57:45 -04002625//
Nicolas Capense89cd582016-09-30 14:23:47 -04002626// int shuffle[2] = {0, 0};
2627// Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
John Bauman19bac1e2014-05-06 15:23:49 -04002628//
John Bauman66b8ab22014-05-06 15:57:45 -04002629// storeValue(replicate);
John Bauman19bac1e2014-05-06 15:23:49 -04002630// }
John Bauman89401822014-05-06 15:04:28 -04002631
John Bauman19bac1e2014-05-06 15:23:49 -04002632 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002633 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002634 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002635#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002636 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
2637
2638 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002639#else
2640 return As<Int2>(V(lowerVectorShl(V(lhs.value), rhs)));
2641#endif
John Bauman89401822014-05-06 15:04:28 -04002642 }
2643
John Bauman19bac1e2014-05-06 15:23:49 -04002644 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002645 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002646 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002647#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002648 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
2649
2650 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002651#else
2652 return As<Int2>(V(lowerVectorAShr(V(lhs.value), rhs)));
2653#endif
John Bauman89401822014-05-06 15:04:28 -04002654 }
2655
John Bauman19bac1e2014-05-06 15:23:49 -04002656 Type *Int2::getType()
John Bauman89401822014-05-06 15:04:28 -04002657 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002658 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002659 }
2660
John Bauman19bac1e2014-05-06 15:23:49 -04002661 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002662 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002663 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002664#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002665 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
2666
2667 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002668#else
2669 return As<UInt2>(V(lowerVectorShl(V(lhs.value), rhs)));
2670#endif
John Bauman89401822014-05-06 15:04:28 -04002671 }
2672
John Bauman19bac1e2014-05-06 15:23:49 -04002673 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002674 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002675 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002676#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002677 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
2678
2679 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002680#else
2681 return As<UInt2>(V(lowerVectorLShr(V(lhs.value), rhs)));
2682#endif
John Bauman89401822014-05-06 15:04:28 -04002683 }
2684
John Bauman19bac1e2014-05-06 15:23:49 -04002685 Type *UInt2::getType()
John Bauman89401822014-05-06 15:04:28 -04002686 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002687 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002688 }
2689
Nicolas Capenscb986762017-01-20 11:34:37 -05002690 Int4::Int4(RValue<Byte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002691 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002692 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002693#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002694 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002695 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002696 *this = x86::pmovzxbd(As<Byte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002697 }
2698 else
Logan Chiene3191012018-08-24 22:01:50 +08002699#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002700 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002701 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
Nicolas Capens01a97962017-07-28 17:30:51 -04002702 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
Logan Chien191b3052018-08-31 16:57:15 +08002703 Value *b = Nucleus::createShuffleVector(a, Nucleus::createNullValue(Byte16::getType()), swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002704
Nicolas Capense89cd582016-09-30 14:23:47 -04002705 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04002706 Value *c = Nucleus::createBitCast(b, Short8::getType());
Logan Chien191b3052018-08-31 16:57:15 +08002707 Value *d = Nucleus::createShuffleVector(c, Nucleus::createNullValue(Short8::getType()), swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002708
Nicolas Capens01a97962017-07-28 17:30:51 -04002709 *this = As<Int4>(d);
2710 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002711 }
2712
Nicolas Capenscb986762017-01-20 11:34:37 -05002713 Int4::Int4(RValue<SByte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002714 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002715 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002716#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002717 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002718 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002719 *this = x86::pmovsxbd(As<SByte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002720 }
2721 else
Logan Chiene3191012018-08-24 22:01:50 +08002722#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002723 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002724 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
2725 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
2726 Value *b = Nucleus::createShuffleVector(a, a, swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002727
Nicolas Capense89cd582016-09-30 14:23:47 -04002728 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04002729 Value *c = Nucleus::createBitCast(b, Short8::getType());
2730 Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002731
Nicolas Capens01a97962017-07-28 17:30:51 -04002732 *this = As<Int4>(d) >> 24;
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002733 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002734 }
2735
Nicolas Capenscb986762017-01-20 11:34:37 -05002736 Int4::Int4(RValue<Short4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002737 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002738 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002739#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002740 if(CPUID::supportsSSE4_1())
2741 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002742 *this = x86::pmovsxwd(As<Short8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002743 }
2744 else
Logan Chiene3191012018-08-24 22:01:50 +08002745#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002746 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002747 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04002748 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
2749 *this = As<Int4>(c) >> 16;
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002750 }
2751 }
2752
Nicolas Capenscb986762017-01-20 11:34:37 -05002753 Int4::Int4(RValue<UShort4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002754 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002755 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002756#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002757 if(CPUID::supportsSSE4_1())
2758 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002759 *this = x86::pmovzxwd(As<UShort8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002760 }
2761 else
Logan Chiene3191012018-08-24 22:01:50 +08002762#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002763 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002764 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04002765 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
2766 *this = As<Int4>(c);
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002767 }
2768 }
2769
Nicolas Capenscb986762017-01-20 11:34:37 -05002770 Int4::Int4(RValue<Int> rhs) : XYZW(this)
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002771 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002772 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002773 Value *vector = loadValue();
2774 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
2775
Nicolas Capense89cd582016-09-30 14:23:47 -04002776 int swizzle[4] = {0, 0, 0, 0};
2777 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002778
2779 storeValue(replicate);
2780 }
2781
John Bauman19bac1e2014-05-06 15:23:49 -04002782 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002783 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002784 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002785#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002786 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002787#else
2788 return As<Int4>(V(lowerVectorShl(V(lhs.value), rhs)));
2789#endif
John Bauman89401822014-05-06 15:04:28 -04002790 }
2791
John Bauman19bac1e2014-05-06 15:23:49 -04002792 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002793 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002794 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002795#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002796 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002797#else
2798 return As<Int4>(V(lowerVectorAShr(V(lhs.value), rhs)));
2799#endif
John Bauman89401822014-05-06 15:04:28 -04002800 }
2801
John Bauman19bac1e2014-05-06 15:23:49 -04002802 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
2803 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002804 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002805 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
Alexis Hetufb603992016-04-26 11:50:40 -04002806 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2807 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
2808 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002809 }
2810
2811 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
2812 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002813 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002814 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2815 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2816 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
2817 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002818 }
2819
2820 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
2821 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002822 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002823 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2824 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2825 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
2826 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002827 }
2828
2829 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
2830 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002831 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002832 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2833 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2834 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
2835 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002836 }
2837
2838 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
2839 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002840 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002841 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2842 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2843 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
2844 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002845 }
2846
2847 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
2848 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002849 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002850 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2851 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2852 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
2853 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002854 }
2855
2856 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
2857 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002858 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002859#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002860 if(CPUID::supportsSSE4_1())
2861 {
2862 return x86::pmaxsd(x, y);
2863 }
2864 else
Logan Chiene3191012018-08-24 22:01:50 +08002865#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002866 {
2867 RValue<Int4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002868 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04002869 }
2870 }
2871
2872 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
2873 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002874 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002875#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002876 if(CPUID::supportsSSE4_1())
2877 {
2878 return x86::pminsd(x, y);
2879 }
2880 else
Logan Chiene3191012018-08-24 22:01:50 +08002881#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002882 {
2883 RValue<Int4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002884 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04002885 }
2886 }
2887
2888 RValue<Int4> RoundInt(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04002889 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002890 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002891#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002892 return x86::cvtps2dq(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002893#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002894 return As<Int4>(V(lowerRoundInt(V(cast.value), T(Int4::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002895#endif
John Bauman89401822014-05-06 15:04:28 -04002896 }
2897
Chris Forbese86b6dc2019-03-01 09:08:47 -08002898 RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2899 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002900 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbese86b6dc2019-03-01 09:08:47 -08002901 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2902 return As<Int4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2903 }
2904
2905 RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2906 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002907 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbese86b6dc2019-03-01 09:08:47 -08002908 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2909 return As<UInt4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2910 }
2911
Nicolas Capens33438a62017-09-27 11:47:35 -04002912 RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04002913 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002914 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002915#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002916 return x86::packssdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002917#else
2918 return As<Short8>(V(lowerPack(V(x.value), V(y.value), true)));
2919#endif
John Bauman89401822014-05-06 15:04:28 -04002920 }
2921
Nicolas Capens33438a62017-09-27 11:47:35 -04002922 RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
2923 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002924 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002925#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04002926 return x86::packusdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002927#else
2928 return As<UShort8>(V(lowerPack(V(x.value), V(y.value), false)));
2929#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04002930 }
2931
John Bauman19bac1e2014-05-06 15:23:49 -04002932 RValue<Int> SignMask(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04002933 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002934 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002935#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002936 return x86::movmskps(As<Float4>(x));
Logan Chiene3191012018-08-24 22:01:50 +08002937#else
2938 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2939#endif
John Bauman89401822014-05-06 15:04:28 -04002940 }
2941
John Bauman19bac1e2014-05-06 15:23:49 -04002942 Type *Int4::getType()
John Bauman89401822014-05-06 15:04:28 -04002943 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002944 return T(llvm::VectorType::get(T(Int::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04002945 }
2946
Nicolas Capenscb986762017-01-20 11:34:37 -05002947 UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04002948 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002949 RR_DEBUG_INFO_UPDATE_LOC();
Alexis Hetu764d1422016-09-28 08:44:22 -04002950 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2951 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
John Bauman89401822014-05-06 15:04:28 -04002952
Alexis Hetu764d1422016-09-28 08:44:22 -04002953 // Smallest positive value representable in UInt, but not in Int
2954 const unsigned int ustart = 0x80000000u;
2955 const float ustartf = float(ustart);
2956
2957 // Check if the value can be represented as an Int
2958 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
2959 // If the value is too large, subtract ustart and re-add it after conversion.
2960 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
2961 // Otherwise, just convert normally
2962 (~uiValue & Int4(cast));
2963 // If the value is negative, store 0, otherwise store the result of the conversion
2964 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
John Bauman89401822014-05-06 15:04:28 -04002965 }
2966
John Bauman19bac1e2014-05-06 15:23:49 -04002967 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002968 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002969 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002970#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002971 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002972#else
2973 return As<UInt4>(V(lowerVectorShl(V(lhs.value), rhs)));
2974#endif
John Bauman89401822014-05-06 15:04:28 -04002975 }
2976
John Bauman19bac1e2014-05-06 15:23:49 -04002977 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002978 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002979 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002980#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002981 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002982#else
2983 return As<UInt4>(V(lowerVectorLShr(V(lhs.value), rhs)));
2984#endif
John Bauman89401822014-05-06 15:04:28 -04002985 }
2986
John Bauman19bac1e2014-05-06 15:23:49 -04002987 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
2988 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002989 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002990 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
Alexis Hetufb603992016-04-26 11:50:40 -04002991 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2992 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
2993 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002994 }
2995
2996 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
2997 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002998 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04002999 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
3000 }
3001
3002 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3003 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003004 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04003005 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
3006 // Restore the following line when LLVM is updated to a version where this issue is fixed.
3007 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
3008 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04003009 }
3010
3011 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3012 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003013 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04003014 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
3015 }
3016
3017 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3018 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003019 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04003020 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
3021 // Restore the following line when LLVM is updated to a version where this issue is fixed.
3022 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
3023 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04003024 }
3025
3026 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3027 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003028 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04003029 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
3030 }
3031
3032 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3033 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003034 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003035#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003036 if(CPUID::supportsSSE4_1())
3037 {
3038 return x86::pmaxud(x, y);
3039 }
3040 else
Logan Chiene3191012018-08-24 22:01:50 +08003041#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003042 {
3043 RValue<UInt4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07003044 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04003045 }
3046 }
3047
3048 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3049 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003050 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003051#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003052 if(CPUID::supportsSSE4_1())
3053 {
3054 return x86::pminud(x, y);
3055 }
3056 else
Logan Chiene3191012018-08-24 22:01:50 +08003057#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003058 {
3059 RValue<UInt4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07003060 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04003061 }
3062 }
3063
John Bauman19bac1e2014-05-06 15:23:49 -04003064 Type *UInt4::getType()
John Bauman89401822014-05-06 15:04:28 -04003065 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003066 return T(llvm::VectorType::get(T(UInt::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003067 }
3068
Alexis Hetu734e2572018-12-20 14:00:49 -05003069 Type *Half::getType()
3070 {
3071 return T(llvm::Type::getInt16Ty(*::context));
3072 }
3073
Nicolas Capens05b3d662016-02-25 23:58:33 -05003074 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04003075 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003076 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003077#if defined(__i386__) || defined(__x86_64__)
3078 if(exactAtPow2)
3079 {
3080 // rcpss uses a piecewise-linear approximation which minimizes the relative error
3081 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
3082 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
3083 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003084 return x86::rcpss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003085#else
3086 return As<Float>(V(lowerRCP(V(x.value))));
3087#endif
John Bauman89401822014-05-06 15:04:28 -04003088 }
John Bauman66b8ab22014-05-06 15:57:45 -04003089
John Bauman19bac1e2014-05-06 15:23:49 -04003090 RValue<Float> RcpSqrt_pp(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003091 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003092 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003093#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003094 return x86::rsqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003095#else
3096 return As<Float>(V(lowerRSQRT(V(x.value))));
3097#endif
John Bauman89401822014-05-06 15:04:28 -04003098 }
3099
John Bauman19bac1e2014-05-06 15:23:49 -04003100 RValue<Float> Sqrt(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003101 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003102 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003103#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003104 return x86::sqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003105#else
3106 return As<Float>(V(lowerSQRT(V(x.value))));
3107#endif
John Bauman89401822014-05-06 15:04:28 -04003108 }
3109
John Bauman19bac1e2014-05-06 15:23:49 -04003110 RValue<Float> Round(RValue<Float> x)
3111 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003112 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003113#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003114 if(CPUID::supportsSSE4_1())
3115 {
3116 return x86::roundss(x, 0);
3117 }
3118 else
3119 {
3120 return Float4(Round(Float4(x))).x;
3121 }
Logan Chien83fc07a2018-09-26 22:14:00 +08003122#else
3123 return RValue<Float>(V(lowerRound(V(x.value))));
3124#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003125 }
3126
3127 RValue<Float> Trunc(RValue<Float> x)
3128 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003129 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003130#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003131 if(CPUID::supportsSSE4_1())
3132 {
3133 return x86::roundss(x, 3);
3134 }
3135 else
3136 {
3137 return Float(Int(x)); // Rounded toward zero
3138 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08003139#else
3140 return RValue<Float>(V(lowerTrunc(V(x.value))));
3141#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003142 }
3143
3144 RValue<Float> Frac(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003145 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003146 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003147#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003148 if(CPUID::supportsSSE4_1())
3149 {
3150 return x - x86::floorss(x);
3151 }
3152 else
3153 {
John Bauman19bac1e2014-05-06 15:23:49 -04003154 return Float4(Frac(Float4(x))).x;
John Bauman89401822014-05-06 15:04:28 -04003155 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08003156#else
3157 // x - floor(x) can be 1.0 for very small negative x.
3158 // Clamp against the value just below 1.0.
3159 return Min(x - Floor(x), As<Float>(Int(0x3F7FFFFF)));
3160#endif
John Bauman89401822014-05-06 15:04:28 -04003161 }
3162
John Bauman19bac1e2014-05-06 15:23:49 -04003163 RValue<Float> Floor(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003164 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003165 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003166#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003167 if(CPUID::supportsSSE4_1())
3168 {
3169 return x86::floorss(x);
3170 }
3171 else
3172 {
3173 return Float4(Floor(Float4(x))).x;
3174 }
Logan Chien40a60052018-09-26 19:03:53 +08003175#else
3176 return RValue<Float>(V(lowerFloor(V(x.value))));
3177#endif
John Bauman89401822014-05-06 15:04:28 -04003178 }
3179
John Bauman19bac1e2014-05-06 15:23:49 -04003180 RValue<Float> Ceil(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003181 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003182 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003183#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003184 if(CPUID::supportsSSE4_1())
3185 {
3186 return x86::ceilss(x);
3187 }
3188 else
Logan Chiene3191012018-08-24 22:01:50 +08003189#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003190 {
3191 return Float4(Ceil(Float4(x))).x;
3192 }
John Bauman89401822014-05-06 15:04:28 -04003193 }
3194
John Bauman19bac1e2014-05-06 15:23:49 -04003195 Type *Float::getType()
John Bauman89401822014-05-06 15:04:28 -04003196 {
Nicolas Capensac230122016-09-20 14:30:06 -04003197 return T(llvm::Type::getFloatTy(*::context));
John Bauman89401822014-05-06 15:04:28 -04003198 }
3199
John Bauman19bac1e2014-05-06 15:23:49 -04003200 Type *Float2::getType()
John Bauman89401822014-05-06 15:04:28 -04003201 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003202 return T(Type_v2f32);
John Bauman89401822014-05-06 15:04:28 -04003203 }
3204
Nicolas Capenscb986762017-01-20 11:34:37 -05003205 Float4::Float4(RValue<Float> rhs) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04003206 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003207 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04003208 Value *vector = loadValue();
John Bauman89401822014-05-06 15:04:28 -04003209 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
3210
Nicolas Capense89cd582016-09-30 14:23:47 -04003211 int swizzle[4] = {0, 0, 0, 0};
3212 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
John Bauman89401822014-05-06 15:04:28 -04003213
John Bauman66b8ab22014-05-06 15:57:45 -04003214 storeValue(replicate);
John Bauman89401822014-05-06 15:04:28 -04003215 }
3216
John Bauman19bac1e2014-05-06 15:23:49 -04003217 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003218 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003219 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003220#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003221 return x86::maxps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003222#else
3223 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OGT)));
3224#endif
John Bauman89401822014-05-06 15:04:28 -04003225 }
3226
John Bauman19bac1e2014-05-06 15:23:49 -04003227 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003228 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003229 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003230#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003231 return x86::minps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003232#else
3233 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OLT)));
3234#endif
John Bauman89401822014-05-06 15:04:28 -04003235 }
3236
Nicolas Capens05b3d662016-02-25 23:58:33 -05003237 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04003238 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003239 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003240#if defined(__i386__) || defined(__x86_64__)
3241 if(exactAtPow2)
3242 {
3243 // rcpps uses a piecewise-linear approximation which minimizes the relative error
3244 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
3245 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
3246 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003247 return x86::rcpps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003248#else
3249 return As<Float4>(V(lowerRCP(V(x.value))));
3250#endif
John Bauman89401822014-05-06 15:04:28 -04003251 }
John Bauman66b8ab22014-05-06 15:57:45 -04003252
John Bauman19bac1e2014-05-06 15:23:49 -04003253 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003254 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003255 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003256#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003257 return x86::rsqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003258#else
3259 return As<Float4>(V(lowerRSQRT(V(x.value))));
3260#endif
John Bauman89401822014-05-06 15:04:28 -04003261 }
3262
John Bauman19bac1e2014-05-06 15:23:49 -04003263 RValue<Float4> Sqrt(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003264 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003265 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003266#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003267 return x86::sqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003268#else
3269 return As<Float4>(V(lowerSQRT(V(x.value))));
3270#endif
John Bauman89401822014-05-06 15:04:28 -04003271 }
3272
John Bauman19bac1e2014-05-06 15:23:49 -04003273 RValue<Int> SignMask(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003274 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003275 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003276#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003277 return x86::movmskps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003278#else
3279 return As<Int>(V(lowerFPSignMask(V(x.value), T(Int::getType()))));
3280#endif
John Bauman89401822014-05-06 15:04:28 -04003281 }
3282
John Bauman19bac1e2014-05-06 15:23:49 -04003283 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003284 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003285 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003286 // return As<Int4>(x86::cmpeqps(x, y));
3287 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
3288 }
3289
John Bauman19bac1e2014-05-06 15:23:49 -04003290 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003291 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003292 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003293 // return As<Int4>(x86::cmpltps(x, y));
3294 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
3295 }
3296
John Bauman19bac1e2014-05-06 15:23:49 -04003297 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003298 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003299 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003300 // return As<Int4>(x86::cmpleps(x, y));
3301 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
3302 }
3303
John Bauman19bac1e2014-05-06 15:23:49 -04003304 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003305 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003306 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003307 // return As<Int4>(x86::cmpneqps(x, y));
3308 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
3309 }
3310
John Bauman19bac1e2014-05-06 15:23:49 -04003311 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003312 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003313 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003314 // return As<Int4>(x86::cmpnltps(x, y));
3315 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
3316 }
3317
John Bauman19bac1e2014-05-06 15:23:49 -04003318 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003319 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003320 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003321 // return As<Int4>(x86::cmpnleps(x, y));
3322 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
3323 }
3324
Ben Claytonec1aeb82019-03-04 19:33:27 +00003325 RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3326 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003327 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003328 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUEQ(x.value, y.value), Int4::getType()));
3329 }
3330
3331 RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3332 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003333 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003334 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULT(x.value, y.value), Int4::getType()));
3335 }
3336
3337 RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3338 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003339 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003340 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULE(x.value, y.value), Int4::getType()));
3341 }
3342
3343 RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3344 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003345 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003346 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUNE(x.value, y.value), Int4::getType()));
3347 }
3348
3349 RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3350 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003351 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003352 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGE(x.value, y.value), Int4::getType()));
3353 }
3354
3355 RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3356 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003357 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003358 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGT(x.value, y.value), Int4::getType()));
3359 }
3360
John Bauman19bac1e2014-05-06 15:23:49 -04003361 RValue<Float4> Round(RValue<Float4> x)
3362 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003363 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003364#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003365 if(CPUID::supportsSSE4_1())
3366 {
3367 return x86::roundps(x, 0);
3368 }
3369 else
3370 {
3371 return Float4(RoundInt(x));
3372 }
Logan Chien83fc07a2018-09-26 22:14:00 +08003373#else
3374 return RValue<Float4>(V(lowerRound(V(x.value))));
3375#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003376 }
3377
3378 RValue<Float4> Trunc(RValue<Float4> x)
3379 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003380 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003381#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003382 if(CPUID::supportsSSE4_1())
3383 {
3384 return x86::roundps(x, 3);
3385 }
3386 else
3387 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003388 return Float4(Int4(x));
John Bauman19bac1e2014-05-06 15:23:49 -04003389 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08003390#else
3391 return RValue<Float4>(V(lowerTrunc(V(x.value))));
3392#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003393 }
3394
3395 RValue<Float4> Frac(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003396 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003397 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb9230422017-07-17 10:27:33 -04003398 Float4 frc;
3399
Logan Chien40a60052018-09-26 19:03:53 +08003400#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003401 if(CPUID::supportsSSE4_1())
3402 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003403 frc = x - Floor(x);
John Bauman89401822014-05-06 15:04:28 -04003404 }
3405 else
3406 {
Nicolas Capensb9230422017-07-17 10:27:33 -04003407 frc = x - Float4(Int4(x)); // Signed fractional part.
John Bauman89401822014-05-06 15:04:28 -04003408
Nicolas Capensb9230422017-07-17 10:27:33 -04003409 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f))); // Add 1.0 if negative.
John Bauman89401822014-05-06 15:04:28 -04003410 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08003411#else
3412 frc = x - Floor(x);
3413#endif
Nicolas Capensb9230422017-07-17 10:27:33 -04003414
3415 // x - floor(x) can be 1.0 for very small negative x.
3416 // Clamp against the value just below 1.0.
3417 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
John Bauman89401822014-05-06 15:04:28 -04003418 }
3419
John Bauman19bac1e2014-05-06 15:23:49 -04003420 RValue<Float4> Floor(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003421 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003422 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003423#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003424 if(CPUID::supportsSSE4_1())
3425 {
3426 return x86::floorps(x);
3427 }
3428 else
3429 {
John Bauman19bac1e2014-05-06 15:23:49 -04003430 return x - Frac(x);
John Bauman89401822014-05-06 15:04:28 -04003431 }
Logan Chien40a60052018-09-26 19:03:53 +08003432#else
3433 return RValue<Float4>(V(lowerFloor(V(x.value))));
3434#endif
John Bauman89401822014-05-06 15:04:28 -04003435 }
3436
John Bauman19bac1e2014-05-06 15:23:49 -04003437 RValue<Float4> Ceil(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003438 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003439 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003440#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003441 if(CPUID::supportsSSE4_1())
3442 {
3443 return x86::ceilps(x);
3444 }
3445 else
Logan Chiene3191012018-08-24 22:01:50 +08003446#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003447 {
3448 return -Floor(-x);
3449 }
John Bauman89401822014-05-06 15:04:28 -04003450 }
3451
Ben Claytona2c8b772019-04-09 13:42:36 -04003452 RValue<Float4> Sin(RValue<Float4> v)
3453 {
3454 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sin, { V(v.value)->getType() } );
3455 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
3456 }
3457
Ben Clayton1b6f8c72019-04-09 13:47:43 -04003458 RValue<Float4> Cos(RValue<Float4> v)
3459 {
3460 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::cos, { V(v.value)->getType() } );
3461 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
3462 }
3463
Ben Clayton14740062019-04-09 13:48:41 -04003464 RValue<Float4> Tan(RValue<Float4> v)
3465 {
3466 return Sin(v) / Cos(v);
3467 }
3468
Ben Claytoneafae472019-04-09 14:22:38 -04003469 static RValue<Float4> TransformFloat4PerElement(RValue<Float4> v, const char* name)
Ben Claytonf9350d72019-04-09 14:19:02 -04003470 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003471 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), ::llvm::ArrayRef<llvm::Type*>(T(Float::getType())), false);
Ben Claytoneafae472019-04-09 14:22:38 -04003472 auto func = ::module->getOrInsertFunction(name, funcTy);
Ben Claytonf9350d72019-04-09 14:19:02 -04003473 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3474 for (uint64_t i = 0; i < 4; i++)
3475 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003476 auto el = ::builder->CreateCall(func, V(Nucleus::createExtractElement(v.value, Float::getType(), i)));
3477 out = V(Nucleus::createInsertElement(V(out), V(el), i));
Ben Claytonf9350d72019-04-09 14:19:02 -04003478 }
3479 return RValue<Float4>(V(out));
3480 }
3481
Ben Claytoneafae472019-04-09 14:22:38 -04003482 RValue<Float4> Asin(RValue<Float4> v)
3483 {
3484 return TransformFloat4PerElement(v, "asinf");
3485 }
3486
3487 RValue<Float4> Acos(RValue<Float4> v)
3488 {
3489 return TransformFloat4PerElement(v, "acosf");
3490 }
3491
Ben Clayton749b4e02019-04-09 14:27:43 -04003492 RValue<Float4> Atan(RValue<Float4> v)
3493 {
3494 return TransformFloat4PerElement(v, "atanf");
3495 }
3496
Ben Claytond9636972019-04-09 15:09:54 -04003497 RValue<Float4> Sinh(RValue<Float4> v)
3498 {
3499 return TransformFloat4PerElement(v, "sinhf");
3500 }
3501
Ben Clayton900ea2c2019-04-09 15:25:36 -04003502 RValue<Float4> Cosh(RValue<Float4> v)
3503 {
3504 return TransformFloat4PerElement(v, "coshf");
3505 }
3506
Ben Clayton3928bd92019-04-09 15:27:41 -04003507 RValue<Float4> Tanh(RValue<Float4> v)
3508 {
3509 return TransformFloat4PerElement(v, "tanhf");
3510 }
3511
Ben Claytonf6d77ab2019-04-09 15:30:04 -04003512 RValue<Float4> Asinh(RValue<Float4> v)
3513 {
3514 return TransformFloat4PerElement(v, "asinhf");
3515 }
3516
Ben Clayton28ebcb02019-04-09 15:33:38 -04003517 RValue<Float4> Acosh(RValue<Float4> v)
3518 {
3519 return TransformFloat4PerElement(v, "acoshf");
3520 }
3521
Ben Claytonfa6a5392019-04-09 15:35:24 -04003522 RValue<Float4> Atanh(RValue<Float4> v)
3523 {
3524 return TransformFloat4PerElement(v, "atanhf");
3525 }
3526
Ben Claytona520c3e2019-04-09 15:43:45 -04003527 RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
3528 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003529 ::llvm::SmallVector<::llvm::Type*, 2> paramTys;
3530 paramTys.push_back(T(Float::getType()));
3531 paramTys.push_back(T(Float::getType()));
3532 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), paramTys, false);
Ben Claytona520c3e2019-04-09 15:43:45 -04003533 auto func = ::module->getOrInsertFunction("atan2f", funcTy);
3534 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3535 for (uint64_t i = 0; i < 4; i++)
3536 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003537 auto el = ::builder->CreateCall2(func, ARGS(
3538 V(Nucleus::createExtractElement(x.value, Float::getType(), i)),
3539 V(Nucleus::createExtractElement(y.value, Float::getType(), i))
3540 ));
3541 out = V(Nucleus::createInsertElement(V(out), V(el), i));
Ben Claytona520c3e2019-04-09 15:43:45 -04003542 }
3543 return RValue<Float4>(V(out));
3544 }
3545
Ben Claytonbfe94f02019-04-09 15:52:12 -04003546 RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
3547 {
Ben Clayton7579db12019-05-02 08:37:12 +01003548 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::pow, { T(Float4::getType()) });
Ben Claytonc38fc122019-04-11 08:58:49 -04003549 return RValue<Float4>(V(::builder->CreateCall2(func, ARGS(V(x.value), V(y.value)))));
Ben Claytonbfe94f02019-04-09 15:52:12 -04003550 }
3551
Ben Clayton242f0022019-04-09 16:00:53 -04003552 RValue<Float4> Exp(RValue<Float4> v)
3553 {
3554 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::exp, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003555 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Clayton242f0022019-04-09 16:00:53 -04003556 }
3557
Ben Clayton2c1da722019-04-09 16:03:03 -04003558 RValue<Float4> Log(RValue<Float4> v)
3559 {
3560 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::log, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003561 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Clayton2c1da722019-04-09 16:03:03 -04003562 }
3563
Ben Claytonf40b56c2019-04-09 16:06:55 -04003564 RValue<Float4> Exp2(RValue<Float4> v)
3565 {
3566 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::exp2, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003567 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Claytonf40b56c2019-04-09 16:06:55 -04003568 }
3569
Ben Claytone17acfe2019-04-09 16:09:13 -04003570 RValue<Float4> Log2(RValue<Float4> v)
3571 {
3572 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::log2, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003573 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Claytone17acfe2019-04-09 16:09:13 -04003574 }
3575
Ben Clayton60958262019-04-10 14:53:30 -04003576 RValue<UInt4> Ctlz(RValue<UInt4> v, bool isZeroUndef)
3577 {
Ben Clayton7579db12019-05-02 08:37:12 +01003578 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::ctlz, { T(UInt4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003579 return RValue<UInt4>(V(::builder->CreateCall2(func, ARGS(
Ben Clayton60958262019-04-10 14:53:30 -04003580 V(v.value),
3581 isZeroUndef ? ::llvm::ConstantInt::getTrue(*::context) : ::llvm::ConstantInt::getFalse(*::context)
Ben Claytonc38fc122019-04-11 08:58:49 -04003582 ))));
Ben Clayton60958262019-04-10 14:53:30 -04003583 }
3584
Ben Clayton3f007c42019-04-10 14:54:23 -04003585 RValue<UInt4> Cttz(RValue<UInt4> v, bool isZeroUndef)
3586 {
Ben Clayton7579db12019-05-02 08:37:12 +01003587 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::cttz, { T(UInt4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003588 return RValue<UInt4>(V(::builder->CreateCall2(func, ARGS(
Ben Clayton3f007c42019-04-10 14:54:23 -04003589 V(v.value),
3590 isZeroUndef ? ::llvm::ConstantInt::getTrue(*::context) : ::llvm::ConstantInt::getFalse(*::context)
Ben Claytonc38fc122019-04-11 08:58:49 -04003591 ))));
Ben Clayton3f007c42019-04-10 14:54:23 -04003592 }
3593
John Bauman19bac1e2014-05-06 15:23:49 -04003594 Type *Float4::getType()
John Bauman89401822014-05-06 15:04:28 -04003595 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003596 return T(llvm::VectorType::get(T(Float::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003597 }
3598
John Bauman89401822014-05-06 15:04:28 -04003599 RValue<Long> Ticks()
3600 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003601 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003602 llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter);
John Bauman89401822014-05-06 15:04:28 -04003603
Nicolas Capens2ab69ee2016-09-26 11:45:17 -04003604 return RValue<Long>(V(::builder->CreateCall(rdtsc)));
John Bauman89401822014-05-06 15:04:28 -04003605 }
Ben Claytond853c122019-04-16 17:51:49 -04003606
3607 RValue<Pointer<Byte>> ConstantPointer(void const * ptr)
3608 {
3609 // Note: this should work for 32-bit pointers as well because 'inttoptr'
3610 // is defined to truncate (and zero extend) if necessary.
3611 auto ptrAsInt = ::llvm::ConstantInt::get(::llvm::Type::getInt64Ty(*::context), reinterpret_cast<uintptr_t>(ptr));
3612 return RValue<Pointer<Byte>>(V(::builder->CreateIntToPtr(ptrAsInt, T(Pointer<Byte>::getType()))));
3613 }
3614
3615 Value* Call(RValue<Pointer<Byte>> fptr, Type* retTy, std::initializer_list<Value*> args, std::initializer_list<Type*> argTys)
3616 {
3617 ::llvm::SmallVector<::llvm::Type*, 8> paramTys;
3618 for (auto ty : argTys) { paramTys.push_back(T(ty)); }
3619 auto funcTy = ::llvm::FunctionType::get(T(retTy), paramTys, false);
3620
3621 auto funcPtrTy = funcTy->getPointerTo();
3622 auto funcPtr = ::builder->CreatePointerCast(V(fptr.value), funcPtrTy);
3623
3624 ::llvm::SmallVector<::llvm::Value*, 8> arguments;
3625 for (auto arg : args) { arguments.push_back(V(arg)); }
3626 return V(::builder->CreateCall(funcPtr, arguments));
3627 }
John Bauman89401822014-05-06 15:04:28 -04003628}
3629
Nicolas Capens48461502018-08-06 14:20:45 -04003630namespace rr
John Bauman89401822014-05-06 15:04:28 -04003631{
Logan Chiene3191012018-08-24 22:01:50 +08003632#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003633 namespace x86
3634 {
John Bauman19bac1e2014-05-06 15:23:49 -04003635 RValue<Int> cvtss2si(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003636 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003637 llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si);
John Bauman66b8ab22014-05-06 15:57:45 -04003638
John Bauman89401822014-05-06 15:04:28 -04003639 Float4 vector;
3640 vector.x = val;
3641
Logan Chien813d5032018-08-31 17:19:45 +08003642 return RValue<Int>(V(::builder->CreateCall(cvtss2si, ARGS(V(RValue<Float4>(vector).value)))));
John Bauman89401822014-05-06 15:04:28 -04003643 }
3644
John Bauman19bac1e2014-05-06 15:23:49 -04003645 RValue<Int4> cvtps2dq(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003646 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003647 llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
John Bauman89401822014-05-06 15:04:28 -04003648
Logan Chien813d5032018-08-31 17:19:45 +08003649 return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003650 }
3651
John Bauman19bac1e2014-05-06 15:23:49 -04003652 RValue<Float> rcpss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003653 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003654 llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss);
John Bauman89401822014-05-06 15:04:28 -04003655
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003656 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman66b8ab22014-05-06 15:57:45 -04003657
Logan Chien813d5032018-08-31 17:19:45 +08003658 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003659 }
3660
John Bauman19bac1e2014-05-06 15:23:49 -04003661 RValue<Float> sqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003662 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003663 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
3664 return RValue<Float>(V(::builder->CreateCall(sqrt, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003665 }
3666
John Bauman19bac1e2014-05-06 15:23:49 -04003667 RValue<Float> rsqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003668 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003669 llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss);
John Bauman66b8ab22014-05-06 15:57:45 -04003670
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003671 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman89401822014-05-06 15:04:28 -04003672
Logan Chien813d5032018-08-31 17:19:45 +08003673 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003674 }
3675
John Bauman19bac1e2014-05-06 15:23:49 -04003676 RValue<Float4> rcpps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003677 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003678 llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003679
Logan Chien813d5032018-08-31 17:19:45 +08003680 return RValue<Float4>(V(::builder->CreateCall(rcpps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003681 }
3682
John Bauman19bac1e2014-05-06 15:23:49 -04003683 RValue<Float4> sqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003684 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003685 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
John Bauman66b8ab22014-05-06 15:57:45 -04003686
Logan Chien813d5032018-08-31 17:19:45 +08003687 return RValue<Float4>(V(::builder->CreateCall(sqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003688 }
3689
John Bauman19bac1e2014-05-06 15:23:49 -04003690 RValue<Float4> rsqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003691 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003692 llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003693
Logan Chien813d5032018-08-31 17:19:45 +08003694 return RValue<Float4>(V(::builder->CreateCall(rsqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003695 }
3696
John Bauman19bac1e2014-05-06 15:23:49 -04003697 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003698 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003699 llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps);
John Bauman89401822014-05-06 15:04:28 -04003700
Logan Chien813d5032018-08-31 17:19:45 +08003701 return RValue<Float4>(V(::builder->CreateCall2(maxps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003702 }
3703
John Bauman19bac1e2014-05-06 15:23:49 -04003704 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003705 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003706 llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps);
John Bauman89401822014-05-06 15:04:28 -04003707
Logan Chien813d5032018-08-31 17:19:45 +08003708 return RValue<Float4>(V(::builder->CreateCall2(minps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003709 }
3710
John Bauman19bac1e2014-05-06 15:23:49 -04003711 RValue<Float> roundss(RValue<Float> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04003712 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003713 llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss);
John Bauman89401822014-05-06 15:04:28 -04003714
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003715 Value *undef = V(llvm::UndefValue::get(T(Float4::getType())));
John Bauman89401822014-05-06 15:04:28 -04003716 Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
3717
Logan Chien813d5032018-08-31 17:19:45 +08003718 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, ARGS(V(undef), V(vector), V(Nucleus::createConstantInt(imm))))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003719 }
3720
John Bauman19bac1e2014-05-06 15:23:49 -04003721 RValue<Float> floorss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003722 {
3723 return roundss(val, 1);
3724 }
3725
John Bauman19bac1e2014-05-06 15:23:49 -04003726 RValue<Float> ceilss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003727 {
3728 return roundss(val, 2);
3729 }
3730
John Bauman19bac1e2014-05-06 15:23:49 -04003731 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04003732 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003733 llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps);
John Bauman89401822014-05-06 15:04:28 -04003734
Logan Chien813d5032018-08-31 17:19:45 +08003735 return RValue<Float4>(V(::builder->CreateCall2(roundps, ARGS(V(val.value), V(Nucleus::createConstantInt(imm))))));
John Bauman89401822014-05-06 15:04:28 -04003736 }
3737
John Bauman19bac1e2014-05-06 15:23:49 -04003738 RValue<Float4> floorps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003739 {
3740 return roundps(val, 1);
3741 }
3742
John Bauman19bac1e2014-05-06 15:23:49 -04003743 RValue<Float4> ceilps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003744 {
3745 return roundps(val, 2);
3746 }
3747
Alexis Hetu0f448072016-03-18 10:56:08 -04003748 RValue<Int4> pabsd(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04003749 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003750 return RValue<Int4>(V(lowerPABS(V(x.value))));
John Bauman89401822014-05-06 15:04:28 -04003751 }
3752
John Bauman19bac1e2014-05-06 15:23:49 -04003753 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003754 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003755 llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w);
John Bauman89401822014-05-06 15:04:28 -04003756
Logan Chien813d5032018-08-31 17:19:45 +08003757 return As<Short4>(V(::builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003758 }
John Bauman66b8ab22014-05-06 15:57:45 -04003759
John Bauman19bac1e2014-05-06 15:23:49 -04003760 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003761 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003762 llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w);
John Bauman89401822014-05-06 15:04:28 -04003763
Logan Chien813d5032018-08-31 17:19:45 +08003764 return As<Short4>(V(::builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003765 }
3766
John Bauman19bac1e2014-05-06 15:23:49 -04003767 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003768 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003769 llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w);
John Bauman89401822014-05-06 15:04:28 -04003770
Logan Chien813d5032018-08-31 17:19:45 +08003771 return As<UShort4>(V(::builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003772 }
John Bauman66b8ab22014-05-06 15:57:45 -04003773
John Bauman19bac1e2014-05-06 15:23:49 -04003774 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003775 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003776 llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w);
John Bauman89401822014-05-06 15:04:28 -04003777
Logan Chien813d5032018-08-31 17:19:45 +08003778 return As<UShort4>(V(::builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003779 }
3780
John Bauman19bac1e2014-05-06 15:23:49 -04003781 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003782 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003783 llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b);
John Bauman89401822014-05-06 15:04:28 -04003784
Logan Chien813d5032018-08-31 17:19:45 +08003785 return As<SByte8>(V(::builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003786 }
John Bauman66b8ab22014-05-06 15:57:45 -04003787
John Bauman19bac1e2014-05-06 15:23:49 -04003788 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003789 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003790 llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b);
John Bauman89401822014-05-06 15:04:28 -04003791
Logan Chien813d5032018-08-31 17:19:45 +08003792 return As<SByte8>(V(::builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003793 }
John Bauman66b8ab22014-05-06 15:57:45 -04003794
John Bauman19bac1e2014-05-06 15:23:49 -04003795 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003796 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003797 llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b);
John Bauman89401822014-05-06 15:04:28 -04003798
Logan Chien813d5032018-08-31 17:19:45 +08003799 return As<Byte8>(V(::builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003800 }
John Bauman66b8ab22014-05-06 15:57:45 -04003801
John Bauman19bac1e2014-05-06 15:23:49 -04003802 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003803 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003804 llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b);
John Bauman89401822014-05-06 15:04:28 -04003805
Logan Chien813d5032018-08-31 17:19:45 +08003806 return As<Byte8>(V(::builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value)))));
John Bauman19bac1e2014-05-06 15:23:49 -04003807 }
3808
3809 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003810 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003811 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
John Bauman89401822014-05-06 15:04:28 -04003812 }
3813
John Bauman19bac1e2014-05-06 15:23:49 -04003814 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003815 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003816 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
John Bauman89401822014-05-06 15:04:28 -04003817 }
3818
John Bauman19bac1e2014-05-06 15:23:49 -04003819 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003820 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003821 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
John Bauman89401822014-05-06 15:04:28 -04003822 }
3823
John Bauman19bac1e2014-05-06 15:23:49 -04003824 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003825 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003826 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003827 }
3828
John Bauman19bac1e2014-05-06 15:23:49 -04003829 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003830 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003831 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003832 }
3833
John Bauman19bac1e2014-05-06 15:23:49 -04003834 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003835 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003836 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003837 }
3838
John Bauman19bac1e2014-05-06 15:23:49 -04003839 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003840 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003841 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003842 }
3843
John Bauman19bac1e2014-05-06 15:23:49 -04003844 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
John Bauman89401822014-05-06 15:04:28 -04003845 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003846 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04003847
Logan Chien813d5032018-08-31 17:19:45 +08003848 return As<Short4>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003849 }
3850
John Bauman19bac1e2014-05-06 15:23:49 -04003851 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003852 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003853 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04003854
Logan Chien813d5032018-08-31 17:19:45 +08003855 return RValue<Short8>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003856 }
3857
John Bauman19bac1e2014-05-06 15:23:49 -04003858 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003859 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003860 llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128);
John Bauman89401822014-05-06 15:04:28 -04003861
Logan Chien813d5032018-08-31 17:19:45 +08003862 return As<SByte8>(V(::builder->CreateCall2(packsswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003863 }
3864
Nicolas Capens33438a62017-09-27 11:47:35 -04003865 RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003866 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003867 llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
John Bauman89401822014-05-06 15:04:28 -04003868
Logan Chien813d5032018-08-31 17:19:45 +08003869 return As<Byte8>(V(::builder->CreateCall2(packuswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003870 }
3871
Nicolas Capens3e7062b2017-01-17 14:01:33 -05003872 RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003873 {
3874 if(CPUID::supportsSSE4_1())
3875 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003876 llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw);
John Bauman66b8ab22014-05-06 15:57:45 -04003877
Logan Chien813d5032018-08-31 17:19:45 +08003878 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003879 }
3880 else
3881 {
Nicolas Capens3e7062b2017-01-17 14:01:33 -05003882 RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
3883 RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
3884
3885 return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
John Bauman89401822014-05-06 15:04:28 -04003886 }
3887 }
3888
John Bauman19bac1e2014-05-06 15:23:49 -04003889 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003890 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003891 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04003892
Logan Chien813d5032018-08-31 17:19:45 +08003893 return As<UShort4>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003894 }
3895
John Bauman19bac1e2014-05-06 15:23:49 -04003896 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003897 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003898 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04003899
Logan Chien813d5032018-08-31 17:19:45 +08003900 return RValue<UShort8>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003901 }
3902
John Bauman19bac1e2014-05-06 15:23:49 -04003903 RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003904 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003905 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04003906
Logan Chien813d5032018-08-31 17:19:45 +08003907 return As<Short4>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003908 }
3909
John Bauman19bac1e2014-05-06 15:23:49 -04003910 RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003911 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003912 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04003913
Logan Chien813d5032018-08-31 17:19:45 +08003914 return RValue<Short8>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003915 }
3916
John Bauman19bac1e2014-05-06 15:23:49 -04003917 RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003918 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003919 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04003920
Logan Chien813d5032018-08-31 17:19:45 +08003921 return As<Short4>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003922 }
3923
John Bauman19bac1e2014-05-06 15:23:49 -04003924 RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003925 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003926 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04003927
Logan Chien813d5032018-08-31 17:19:45 +08003928 return RValue<Short8>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003929 }
3930
John Bauman19bac1e2014-05-06 15:23:49 -04003931 RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003932 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003933 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04003934
Logan Chien813d5032018-08-31 17:19:45 +08003935 return As<Int2>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003936 }
3937
John Bauman19bac1e2014-05-06 15:23:49 -04003938 RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003939 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003940 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04003941
Logan Chien813d5032018-08-31 17:19:45 +08003942 return RValue<Int4>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003943 }
3944
John Bauman19bac1e2014-05-06 15:23:49 -04003945 RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003946 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003947 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04003948
Logan Chien813d5032018-08-31 17:19:45 +08003949 return As<Int2>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003950 }
3951
John Bauman19bac1e2014-05-06 15:23:49 -04003952 RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003953 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003954 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04003955
Logan Chien813d5032018-08-31 17:19:45 +08003956 return RValue<Int4>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003957 }
3958
John Bauman19bac1e2014-05-06 15:23:49 -04003959 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003960 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003961 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04003962
Logan Chien813d5032018-08-31 17:19:45 +08003963 return As<UInt2>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003964 }
3965
John Bauman19bac1e2014-05-06 15:23:49 -04003966 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003967 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003968 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04003969
Logan Chien813d5032018-08-31 17:19:45 +08003970 return RValue<UInt4>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003971 }
3972
John Bauman19bac1e2014-05-06 15:23:49 -04003973 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
3974 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003975 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
John Bauman19bac1e2014-05-06 15:23:49 -04003976 }
3977
3978 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
3979 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003980 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
John Bauman19bac1e2014-05-06 15:23:49 -04003981 }
3982
3983 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
3984 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003985 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_UGT)));
John Bauman19bac1e2014-05-06 15:23:49 -04003986 }
3987
3988 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
3989 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003990 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_ULT)));
John Bauman19bac1e2014-05-06 15:23:49 -04003991 }
3992
3993 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003994 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003995 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04003996
Logan Chien813d5032018-08-31 17:19:45 +08003997 return As<Short4>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003998 }
3999
John Bauman19bac1e2014-05-06 15:23:49 -04004000 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04004001 {
Nicolas Capens01a97962017-07-28 17:30:51 -04004002 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04004003
Logan Chien813d5032018-08-31 17:19:45 +08004004 return As<UShort4>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004005 }
4006
John Bauman19bac1e2014-05-06 15:23:49 -04004007 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004008 {
Nicolas Capens01a97962017-07-28 17:30:51 -04004009 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04004010
Logan Chien813d5032018-08-31 17:19:45 +08004011 return As<Int2>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004012 }
4013
John Bauman19bac1e2014-05-06 15:23:49 -04004014 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04004015 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004016 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04004017
Logan Chien813d5032018-08-31 17:19:45 +08004018 return RValue<Short8>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004019 }
4020
John Bauman19bac1e2014-05-06 15:23:49 -04004021 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04004022 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004023 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04004024
Logan Chien813d5032018-08-31 17:19:45 +08004025 return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004026 }
4027
John Bauman19bac1e2014-05-06 15:23:49 -04004028 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04004029 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004030 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04004031
Logan Chien813d5032018-08-31 17:19:45 +08004032 return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004033 }
4034
John Bauman19bac1e2014-05-06 15:23:49 -04004035 RValue<Int> movmskps(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04004036 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004037 llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps);
John Bauman89401822014-05-06 15:04:28 -04004038
Logan Chien813d5032018-08-31 17:19:45 +08004039 return RValue<Int>(V(::builder->CreateCall(movmskps, ARGS(V(x.value)))));
John Bauman89401822014-05-06 15:04:28 -04004040 }
4041
John Bauman19bac1e2014-05-06 15:23:49 -04004042 RValue<Int> pmovmskb(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04004043 {
Nicolas Capens01a97962017-07-28 17:30:51 -04004044 llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128);
John Bauman89401822014-05-06 15:04:28 -04004045
Logan Chien813d5032018-08-31 17:19:45 +08004046 return RValue<Int>(V(::builder->CreateCall(pmovmskb, ARGS(V(x.value))))) & 0xFF;
John Bauman89401822014-05-06 15:04:28 -04004047 }
4048
Nicolas Capens01a97962017-07-28 17:30:51 -04004049 RValue<Int4> pmovzxbd(RValue<Byte16> x)
John Bauman89401822014-05-06 15:04:28 -04004050 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004051 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
John Bauman89401822014-05-06 15:04:28 -04004052 }
4053
Nicolas Capens01a97962017-07-28 17:30:51 -04004054 RValue<Int4> pmovsxbd(RValue<SByte16> x)
John Bauman89401822014-05-06 15:04:28 -04004055 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004056 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
John Bauman89401822014-05-06 15:04:28 -04004057 }
4058
Nicolas Capens01a97962017-07-28 17:30:51 -04004059 RValue<Int4> pmovzxwd(RValue<UShort8> x)
John Bauman89401822014-05-06 15:04:28 -04004060 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004061 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
John Bauman89401822014-05-06 15:04:28 -04004062 }
4063
Nicolas Capens01a97962017-07-28 17:30:51 -04004064 RValue<Int4> pmovsxwd(RValue<Short8> x)
John Bauman89401822014-05-06 15:04:28 -04004065 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004066 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
John Bauman89401822014-05-06 15:04:28 -04004067 }
4068 }
Logan Chiene3191012018-08-24 22:01:50 +08004069#endif // defined(__i386__) || defined(__x86_64__)
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004070
Ben Clayton60a3d6f2019-02-26 17:24:46 +00004071#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004072 // extractAll returns a vector containing the extracted n scalar value of
4073 // the vector vec.
4074 static std::vector<Value*> extractAll(Value* vec, int n)
4075 {
4076 std::vector<Value*> elements;
4077 elements.reserve(n);
4078 for (int i = 0; i < n; i++)
4079 {
4080 auto el = V(::builder->CreateExtractElement(V(vec), i));
4081 elements.push_back(el);
4082 }
4083 return elements;
4084 }
4085
4086 // toDouble returns all the float values in vals extended to doubles.
4087 static std::vector<Value*> toDouble(const std::vector<Value*>& vals)
4088 {
4089 auto doubleTy = ::llvm::Type::getDoubleTy(*::context);
4090 std::vector<Value*> elements;
4091 elements.reserve(vals.size());
4092 for (auto v : vals)
4093 {
4094 elements.push_back(V(::builder->CreateFPExt(V(v), doubleTy)));
4095 }
4096 return elements;
4097 }
4098
4099 std::vector<Value*> PrintValue::Ty<Byte4>::val(const RValue<Byte4>& v) { return extractAll(v.value, 4); }
4100 std::vector<Value*> PrintValue::Ty<Int4>::val(const RValue<Int4>& v) { return extractAll(v.value, 4); }
4101 std::vector<Value*> PrintValue::Ty<UInt4>::val(const RValue<UInt4>& v) { return extractAll(v.value, 4); }
4102 std::vector<Value*> PrintValue::Ty<Short4>::val(const RValue<Short4>& v) { return extractAll(v.value, 4); }
4103 std::vector<Value*> PrintValue::Ty<UShort4>::val(const RValue<UShort4>& v) { return extractAll(v.value, 4); }
4104 std::vector<Value*> PrintValue::Ty<Float>::val(const RValue<Float>& v) { return toDouble({v.value}); }
4105 std::vector<Value*> PrintValue::Ty<Float4>::val(const RValue<Float4>& v) { return toDouble(extractAll(v.value, 4)); }
Ben Claytonbc0cbb92019-05-15 17:12:57 +01004106 std::vector<Value*> PrintValue::Ty<const char*>::val(const char* v) { return {V(::builder->CreateGlobalStringPtr(v))}; }
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004107
4108 void Printv(const char* function, const char* file, int line, const char* fmt, std::initializer_list<PrintValue> args)
4109 {
4110 // LLVM types used below.
4111 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
4112 auto intTy = ::llvm::Type::getInt64Ty(*::context); // TODO: Natural int width.
4113 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(*::context);
4114 auto funcTy = ::llvm::FunctionType::get(i32Ty, {i8PtrTy}, true);
4115
4116 auto func = ::module->getOrInsertFunction("printf", funcTy);
4117
4118 // Build the printf format message string.
4119 std::string str;
4120 if (file != nullptr) { str += (line > 0) ? "%s:%d " : "%s "; }
4121 if (function != nullptr) { str += "%s "; }
4122 str += fmt;
4123
4124 // Perform subsitution on all '{n}' bracketed indices in the format
4125 // message.
4126 int i = 0;
4127 for (const PrintValue& arg : args)
4128 {
4129 str = replace(str, "{" + std::to_string(i++) + "}", arg.format);
4130 }
4131
4132 ::llvm::SmallVector<::llvm::Value*, 8> vals;
4133
4134 // The format message is always the first argument.
4135 vals.push_back(::builder->CreateGlobalStringPtr(str));
4136
4137 // Add optional file, line and function info if provided.
4138 if (file != nullptr)
4139 {
4140 vals.push_back(::builder->CreateGlobalStringPtr(file));
4141 if (line > 0)
4142 {
4143 vals.push_back(::llvm::ConstantInt::get(intTy, line));
4144 }
4145 }
4146 if (function != nullptr)
4147 {
4148 vals.push_back(::builder->CreateGlobalStringPtr(function));
4149 }
4150
4151 // Add all format arguments.
4152 for (const PrintValue& arg : args)
4153 {
4154 for (auto val : arg.values)
4155 {
4156 vals.push_back(V(val));
4157 }
4158 }
4159
4160 ::builder->CreateCall(func, vals);
4161 }
4162#endif // ENABLE_RR_PRINT
4163
Ben Claytonac07ed82019-03-26 14:17:41 +00004164 void Break()
4165 {
4166 auto trap = ::llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::trap);
4167 builder->CreateCall(trap);
4168 }
4169
4170 void Nop()
4171 {
4172 auto voidTy = ::llvm::Type::getVoidTy(*context);
4173 auto funcTy = ::llvm::FunctionType::get(voidTy, {}, false);
4174 auto func = ::module->getOrInsertFunction("nop", funcTy);
4175 builder->CreateCall(func);
4176 }
4177
4178 void EmitDebugLocation()
4179 {
4180#ifdef ENABLE_RR_DEBUG_INFO
4181 if (debugInfo != nullptr)
4182 {
4183 debugInfo->EmitLocation();
4184 }
4185#endif // ENABLE_RR_DEBUG_INFO
4186 }
4187
4188 void EmitDebugVariable(Value* value)
4189 {
4190#ifdef ENABLE_RR_DEBUG_INFO
4191 if (debugInfo != nullptr)
4192 {
4193 debugInfo->EmitVariable(value);
4194 }
4195#endif // ENABLE_RR_DEBUG_INFO
4196 }
4197
4198 void FlushDebug()
4199 {
4200#ifdef ENABLE_RR_DEBUG_INFO
4201 if (debugInfo != nullptr)
4202 {
4203 debugInfo->Flush();
4204 }
4205#endif // ENABLE_RR_DEBUG_INFO
4206 }
4207
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004208} // namespace rr
4209
4210// ------------------------------ Coroutines ------------------------------
4211
4212namespace {
4213
4214 struct CoroutineState
4215 {
4216 llvm::Function *await = nullptr;
4217 llvm::Function *destroy = nullptr;
4218 llvm::Value *handle = nullptr;
4219 llvm::Value *id = nullptr;
4220 llvm::Value *promise = nullptr;
4221 llvm::BasicBlock *suspendBlock = nullptr;
4222 llvm::BasicBlock *endBlock = nullptr;
4223 llvm::BasicBlock *destroyBlock = nullptr;
4224 };
4225 CoroutineState coroutine;
4226
4227 // Magic values retuned by llvm.coro.suspend.
4228 // See: https://llvm.org/docs/Coroutines.html#llvm-coro-suspend-intrinsic
4229 enum SuspendAction
4230 {
4231 SuspendActionSuspend = -1,
4232 SuspendActionResume = 0,
4233 SuspendActionDestroy = 1
4234 };
4235
4236} // anonymous namespace
4237
4238namespace rr {
4239
4240void Nucleus::createCoroutine(Type *YieldType, std::vector<Type*> &Params)
4241{
4242 // Types
4243 auto voidTy = ::llvm::Type::getVoidTy(*::context);
4244 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
4245 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
4246 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
4247 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(*::context);
4248 auto promiseTy = T(YieldType);
4249 auto promisePtrTy = promiseTy->getPointerTo();
4250 auto handleTy = i8PtrTy;
4251 auto boolTy = i1Ty;
4252
4253 // LLVM intrinsics
4254 auto coro_id = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::coro_id);
4255 auto coro_size = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::coro_size, {i32Ty});
4256 auto coro_begin = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::coro_begin);
4257 auto coro_resume = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_resume);
4258 auto coro_end = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_end);
4259 auto coro_free = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_free);
4260 auto coro_destroy = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_destroy);
4261 auto coro_promise = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_promise);
4262 auto coro_done = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_done);
4263 auto coro_suspend = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_suspend);
4264
4265 auto allocFrameTy = ::llvm::FunctionType::get(i8PtrTy, {i32Ty}, false);
4266 auto allocFrame = ::module->getOrInsertFunction("coroutine_alloc_frame", allocFrameTy);
4267 auto freeFrameTy = ::llvm::FunctionType::get(voidTy, {i8PtrTy}, false);
4268 auto freeFrame = ::module->getOrInsertFunction("coroutine_free_frame", freeFrameTy);
4269
4270 // Build the coroutine_await() function:
4271 //
4272 // bool coroutine_await(CoroutineHandle* handle, YieldType* out)
4273 // {
4274 // if (llvm.coro.done(handle))
4275 // {
4276 // return false;
4277 // }
4278 // else
4279 // {
4280 // *value = (T*)llvm.coro.promise(handle);
4281 // llvm.coro.resume(handle);
4282 // return true;
4283 // }
4284 // }
4285 //
4286 llvm::FunctionType *coroutineAwaitTy = llvm::FunctionType::get(boolTy, {handleTy, promisePtrTy}, false);
4287 ::coroutine.await = llvm::Function::Create(coroutineAwaitTy, llvm::GlobalValue::InternalLinkage, "coroutine_await", ::module);
4288 ::coroutine.await->setCallingConv(llvm::CallingConv::C);
4289 {
4290 auto args = ::coroutine.await->arg_begin();
4291 auto handle = args++;
4292 auto outPtr = args++;
4293 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "co_await", ::coroutine.await));
4294 auto doneBlock = llvm::BasicBlock::Create(*::context, "done", ::coroutine.await);
4295 auto resumeBlock = llvm::BasicBlock::Create(*::context, "resume", ::coroutine.await);
4296
4297 auto done = ::builder->CreateCall(coro_done, {handle}, "done");
4298 ::builder->CreateCondBr(done, doneBlock, resumeBlock);
4299
4300 ::builder->SetInsertPoint(doneBlock);
4301 ::builder->CreateRet(::llvm::ConstantInt::getFalse(i1Ty));
4302
4303 ::builder->SetInsertPoint(resumeBlock);
4304 auto promiseAlignment = ::llvm::ConstantInt::get(i32Ty, 4); // TODO: Get correct alignment.
4305 auto promisePtr = ::builder->CreateCall(coro_promise, {handle, promiseAlignment, ::llvm::ConstantInt::get(i1Ty, 0)});
4306 auto promise = ::builder->CreateLoad(::builder->CreatePointerCast(promisePtr, promisePtrTy));
4307 ::builder->CreateStore(promise, outPtr);
4308 ::builder->CreateCall(coro_resume, {handle});
4309 ::builder->CreateRet(::llvm::ConstantInt::getTrue(i1Ty));
4310 }
4311
4312 // Build the coroutine_destroy() function:
4313 //
4314 // void coroutine_destroy(CoroutineHandle* handle)
4315 // {
4316 // llvm.coro.destroy(handle);
4317 // }
4318 //
4319 llvm::FunctionType *coroutineDestroyTy = llvm::FunctionType::get(voidTy, handleTy, false);
4320 ::coroutine.destroy = llvm::Function::Create(coroutineDestroyTy, llvm::GlobalValue::InternalLinkage, "coroutine_destroy", ::module);
4321 ::coroutine.destroy->setCallingConv(llvm::CallingConv::C);
4322 {
4323 auto handle = ::coroutine.destroy->arg_begin();
4324 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "", ::coroutine.destroy));
4325 ::builder->CreateCall(coro_destroy, {handle});
4326 ::builder->CreateRetVoid();
4327 }
4328
4329 // Begin building the main coroutine_begin() function.
4330 //
4331 // CoroutineHandle* coroutine_begin(<Arguments>)
4332 // {
4333 // YieldType promise;
4334 // auto id = llvm.coro.id(0, &promise, nullptr, nullptr);
4335 // void* frame = coroutine_alloc_frame(llvm.coro.size.i32());
4336 // CoroutineHandle *handle = llvm.coro.begin(id, frame);
4337 //
4338 // ... <REACTOR CODE> ...
4339 //
4340 // end:
4341 // SuspendAction action = llvm.coro.suspend(none, true /* final */); // <-- RESUME POINT
4342 // switch (action)
4343 // {
4344 // case SuspendActionResume:
4345 // UNREACHABLE(); // Illegal to resume after final suspend.
4346 // case SuspendActionDestroy:
4347 // goto destroy;
4348 // default: // (SuspendActionSuspend)
4349 // goto suspend;
4350 // }
4351 //
4352 // destroy:
4353 // coroutine_free_frame(llvm.coro.free(id, handle));
4354 // goto suspend;
4355 //
4356 // suspend:
4357 // llvm.coro.end(handle, false);
4358 // return handle;
4359 // }
4360 //
4361 llvm::FunctionType *functionType = llvm::FunctionType::get(handleTy, T(Params), false);
4362 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "coroutine_begin", ::module);
4363 ::function->setCallingConv(llvm::CallingConv::C);
4364
4365#ifdef ENABLE_RR_DEBUG_INFO
4366 ::debugInfo = std::unique_ptr<DebugInfo>(new DebugInfo(::builder, ::context, ::module, ::function));
4367#endif // ENABLE_RR_DEBUG_INFO
4368
4369 auto entryBlock = llvm::BasicBlock::Create(*::context, "coroutine", ::function);
4370 ::coroutine.suspendBlock = llvm::BasicBlock::Create(*::context, "suspend", ::function);
4371 ::coroutine.endBlock = llvm::BasicBlock::Create(*::context, "end", ::function);
4372 ::coroutine.destroyBlock = llvm::BasicBlock::Create(*::context, "destroy", ::function);
4373
4374 ::builder->SetInsertPoint(entryBlock);
4375 Variable::materializeAll();
4376 ::coroutine.promise = ::builder->CreateAlloca(T(YieldType), nullptr, "promise");
4377 ::coroutine.id = ::builder->CreateCall(coro_id, {
4378 ::llvm::ConstantInt::get(i32Ty, 0),
4379 ::builder->CreatePointerCast(::coroutine.promise, i8PtrTy),
4380 ::llvm::ConstantPointerNull::get(i8PtrTy),
4381 ::llvm::ConstantPointerNull::get(i8PtrTy),
4382 });
4383 auto size = ::builder->CreateCall(coro_size, {});
4384 auto frame = ::builder->CreateCall(allocFrame, {size});
4385 ::coroutine.handle = ::builder->CreateCall(coro_begin, {::coroutine.id, frame});
4386
4387 // Build the suspend block
4388 ::builder->SetInsertPoint(::coroutine.suspendBlock);
4389 ::builder->CreateCall(coro_end, {::coroutine.handle, ::llvm::ConstantInt::get(i1Ty, 0)});
4390 ::builder->CreateRet(::coroutine.handle);
4391
4392 // Build the end block
4393 ::builder->SetInsertPoint(::coroutine.endBlock);
4394 auto action = ::builder->CreateCall(coro_suspend, {
4395 ::llvm::ConstantTokenNone::get(*::context),
4396 ::llvm::ConstantInt::get(i1Ty, 1), // final: true
4397 });
4398 auto switch_ = ::builder->CreateSwitch(action, ::coroutine.suspendBlock, 3);
4399 // switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionResume), trapBlock); // TODO: Trap attempting to resume after final suspend
4400 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionDestroy), ::coroutine.destroyBlock);
4401
4402 // Build the destroy block
4403 ::builder->SetInsertPoint(::coroutine.destroyBlock);
4404 auto memory = ::builder->CreateCall(coro_free, {::coroutine.id, ::coroutine.handle});
4405 ::builder->CreateCall(freeFrame, {memory});
4406 ::builder->CreateBr(::coroutine.suspendBlock);
4407
4408 // Switch back to the entry block for reactor codegen.
4409 ::builder->SetInsertPoint(entryBlock);
4410
4411 #if defined(_WIN32)
4412 // FIXME(capn):
4413 // On Windows, stack memory is committed in increments of 4 kB pages, with the last page
4414 // having a trap which allows the OS to grow the stack. For functions with a stack frame
4415 // larger than 4 kB this can cause an issue when a variable is accessed beyond the guard
4416 // page. Therefore the compiler emits a call to __chkstk in the function prolog to probe
4417 // the stack and ensure all pages have been committed. This is currently broken in LLVM
4418 // JIT, but we can prevent emitting the stack probe call:
4419 ::function->addFnAttr("stack-probe-size", "1048576");
4420 #endif
John Bauman89401822014-05-06 15:04:28 -04004421}
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004422
4423void Nucleus::yield(Value* val)
4424{
4425 ASSERT_MSG(::coroutine.id != nullptr, "yield() can only be called when building a Coroutine");
4426
4427 // promise = val;
4428 //
4429 // auto action = llvm.coro.suspend(none, false /* final */); // <-- RESUME POINT
4430 // switch (action)
4431 // {
4432 // case SuspendActionResume:
4433 // goto resume;
4434 // case SuspendActionDestroy:
4435 // goto destroy;
4436 // default: // (SuspendActionSuspend)
4437 // goto suspend;
4438 // }
4439 // resume:
4440 //
4441
4442 RR_DEBUG_INFO_UPDATE_LOC();
4443 Variable::materializeAll();
4444
4445 // Types
4446 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
4447 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
4448
4449 // Intrinsics
4450 auto coro_suspend = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_suspend);
4451
4452 // Create a block to resume execution.
4453 auto resumeBlock = llvm::BasicBlock::Create(*::context, "resume", ::function);
4454
4455 // Store the promise (yield value)
4456 ::builder->CreateStore(V(val), ::coroutine.promise);
4457 auto action = ::builder->CreateCall(coro_suspend, {
4458 ::llvm::ConstantTokenNone::get(*::context),
4459 ::llvm::ConstantInt::get(i1Ty, 0), // final: true
4460 });
4461 auto switch_ = ::builder->CreateSwitch(action, ::coroutine.suspendBlock, 3);
4462 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionResume), resumeBlock);
4463 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionDestroy), ::coroutine.destroyBlock);
4464
4465 // Continue building in the resume block.
4466 ::builder->SetInsertPoint(resumeBlock);
4467}
4468
4469Routine* Nucleus::acquireCoroutine(const char *name, bool runOptimizations)
4470{
4471 ASSERT_MSG(::coroutine.id != nullptr, "acquireCoroutine() called without a call to createCoroutine()");
4472
4473 ::builder->CreateBr(::coroutine.endBlock);
4474
4475#ifdef ENABLE_RR_DEBUG_INFO
4476 if (debugInfo != nullptr)
4477 {
4478 debugInfo->Finalize();
4479 }
4480#endif // ENABLE_RR_DEBUG_INFO
4481
4482 if(false)
4483 {
4484 std::error_code error;
4485 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
4486 ::module->print(file, 0);
4487 }
4488
4489 // Run manadory coroutine transforms.
4490 llvm::legacy::PassManager pm;
4491 pm.add(llvm::createCoroEarlyPass());
4492 pm.add(llvm::createCoroSplitPass());
4493 pm.add(llvm::createCoroElidePass());
4494 pm.add(llvm::createBarrierNoopPass());
4495 pm.add(llvm::createCoroCleanupPass());
4496 pm.run(*::module);
4497
4498 if(runOptimizations)
4499 {
4500 optimize();
4501 }
4502
4503 if(false)
4504 {
4505 std::error_code error;
4506 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
4507 ::module->print(file, 0);
4508 }
4509
4510 llvm::Function *funcs[Nucleus::CoroutineEntryCount];
4511 funcs[Nucleus::CoroutineEntryBegin] = ::function;
4512 funcs[Nucleus::CoroutineEntryAwait] = ::coroutine.await;
4513 funcs[Nucleus::CoroutineEntryDestroy] = ::coroutine.destroy;
4514 Routine *routine = ::reactorJIT->acquireRoutine(funcs, Nucleus::CoroutineEntryCount);
4515
4516 ::coroutine = CoroutineState{};
4517
4518 return routine;
4519}
4520
4521} // namespace rr