blob: c4e522a75e15348bfd71261b31b64aec671ff16f [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
Nicolas Capenscb986762017-01-20 11:34:37 -050015#include "Reactor.hpp"
Ben Claytoneb50d252019-04-15 13:50:01 -040016#include "Debug.hpp"
Ben Claytonac07ed82019-03-26 14:17:41 +000017#include "LLVMReactor.hpp"
18#include "LLVMReactorDebugInfo.hpp"
John Bauman89401822014-05-06 15:04:28 -040019
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040020#include "x86.hpp"
21#include "CPUID.hpp"
22#include "Thread.hpp"
Nicolas Capens1a3ce872018-10-10 10:42:36 -040023#include "ExecutableMemory.hpp"
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040024#include "MutexLock.hpp"
25
26#undef min
27#undef max
28
Ben Clayton09a7f452019-04-25 15:22:43 +010029#if defined(__clang__)
30// LLVM has occurances of the extra-semi warning in its headers, which will be
31// treated as an error in SwiftShader targets.
32#pragma clang diagnostic push
33#pragma clang diagnostic ignored "-Wextra-semi"
34#endif // defined(__clang__)
35
Ben Clayton5875be52019-04-11 14:57:40 -040036#include "llvm/Analysis/LoopPass.h"
37#include "llvm/ExecutionEngine/ExecutionEngine.h"
38#include "llvm/ExecutionEngine/JITSymbol.h"
39#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
40#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
41#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
42#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
43#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
44#include "llvm/ExecutionEngine/SectionMemoryManager.h"
45#include "llvm/IR/Constants.h"
46#include "llvm/IR/DataLayout.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/GlobalVariable.h"
Ben Clayton5875be52019-04-11 14:57:40 -040049#include "llvm/IR/Intrinsics.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010050#include "llvm/IR/IRBuilder.h"
Ben Clayton5875be52019-04-11 14:57:40 -040051#include "llvm/IR/LegacyPassManager.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010052#include "llvm/IR/LLVMContext.h"
Ben Clayton5875be52019-04-11 14:57:40 -040053#include "llvm/IR/Mangler.h"
54#include "llvm/IR/Module.h"
Ben Clayton4b944652019-05-02 10:56:19 +010055#include "llvm/IR/Verifier.h"
Ben Clayton5875be52019-04-11 14:57:40 -040056#include "llvm/Support/Error.h"
57#include "llvm/Support/TargetSelect.h"
58#include "llvm/Target/TargetOptions.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010059#include "llvm/Transforms/Coroutines.h"
Ben Clayton5875be52019-04-11 14:57:40 -040060#include "llvm/Transforms/InstCombine/InstCombine.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010061#include "llvm/Transforms/IPO.h"
62#include "llvm/Transforms/IPO/PassManagerBuilder.h"
Ben Clayton5875be52019-04-11 14:57:40 -040063#include "llvm/Transforms/Scalar.h"
64#include "llvm/Transforms/Scalar/GVN.h"
Ben Clayton20507fa2019-04-20 01:40:15 -040065
Ben Clayton09a7f452019-04-25 15:22:43 +010066#if defined(__clang__)
67#pragma clang diagnostic pop
68#endif // defined(__clang__)
69
Ben Clayton5875be52019-04-11 14:57:40 -040070#include "LLVMRoutine.hpp"
John Bauman89401822014-05-06 15:04:28 -040071
Ben Clayton5875be52019-04-11 14:57:40 -040072#define ARGS(...) {__VA_ARGS__}
73#define CreateCall2 CreateCall
74#define CreateCall3 CreateCall
Logan Chien0eedc8c2018-08-21 09:34:28 +080075
Ben Clayton5875be52019-04-11 14:57:40 -040076#include <unordered_map>
Logan Chien0eedc8c2018-08-21 09:34:28 +080077
John Bauman89401822014-05-06 15:04:28 -040078#include <fstream>
Ben Claytoncee3dff2019-05-22 12:01:22 +010079#include <iostream>
80#include <mutex>
Ben Clayton1bc7ee92019-02-14 18:43:22 +000081#include <numeric>
82#include <thread>
John Bauman89401822014-05-06 15:04:28 -040083
Nicolas Capens47dc8672017-04-25 12:54:39 -040084#if defined(__i386__) || defined(__x86_64__)
85#include <xmmintrin.h>
86#endif
87
Logan Chien40a60052018-09-26 19:03:53 +080088#include <math.h>
89
Nicolas Capenscb122582014-05-06 23:34:44 -040090#if defined(__x86_64__) && defined(_WIN32)
John Bauman66b8ab22014-05-06 15:57:45 -040091extern "C" void X86CompilationCallback()
92{
Ben Claytoneb50d252019-04-15 13:50:01 -040093 UNIMPLEMENTED("X86CompilationCallback");
John Bauman66b8ab22014-05-06 15:57:45 -040094}
95#endif
96
Nicolas Capens48461502018-08-06 14:20:45 -040097namespace rr
Logan Chien52cde602018-09-03 19:37:57 +080098{
99 class LLVMReactorJIT;
100}
101
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400102namespace
103{
Nicolas Capens48461502018-08-06 14:20:45 -0400104 rr::LLVMReactorJIT *reactorJIT = nullptr;
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400105 llvm::IRBuilder<> *builder = nullptr;
106 llvm::LLVMContext *context = nullptr;
107 llvm::Module *module = nullptr;
108 llvm::Function *function = nullptr;
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400109
Ben Claytonac07ed82019-03-26 14:17:41 +0000110#ifdef ENABLE_RR_DEBUG_INFO
111 std::unique_ptr<rr::DebugInfo> debugInfo;
112#endif
113
Nicolas Capensc07dc4b2018-08-06 14:20:45 -0400114 rr::MutexLock codegenMutex;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800115
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000116#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000117 std::string replace(std::string str, const std::string& substr, const std::string& replacement)
118 {
119 size_t pos = 0;
120 while((pos = str.find(substr, pos)) != std::string::npos) {
121 str.replace(pos, substr.length(), replacement);
122 pos += replacement.length();
123 }
124 return str;
125 }
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000126#endif // ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000127
Logan Chien0eedc8c2018-08-21 09:34:28 +0800128 llvm::Value *lowerPAVG(llvm::Value *x, llvm::Value *y)
129 {
130 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
131
132 llvm::VectorType *extTy =
133 llvm::VectorType::getExtendedElementVectorType(ty);
134 x = ::builder->CreateZExt(x, extTy);
135 y = ::builder->CreateZExt(y, extTy);
136
137 // (x + y + 1) >> 1
138 llvm::Constant *one = llvm::ConstantInt::get(extTy, 1);
139 llvm::Value *res = ::builder->CreateAdd(x, y);
140 res = ::builder->CreateAdd(res, one);
141 res = ::builder->CreateLShr(res, one);
142 return ::builder->CreateTrunc(res, ty);
143 }
144
145 llvm::Value *lowerPMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800146 llvm::ICmpInst::Predicate pred)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800147 {
148 return ::builder->CreateSelect(::builder->CreateICmp(pred, x, y), x, y);
149 }
150
151 llvm::Value *lowerPCMP(llvm::ICmpInst::Predicate pred, llvm::Value *x,
Logan Chienb5ce5092018-09-27 18:45:58 +0800152 llvm::Value *y, llvm::Type *dstTy)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800153 {
154 return ::builder->CreateSExt(::builder->CreateICmp(pred, x, y), dstTy, "");
155 }
156
Logan Chiene3191012018-08-24 22:01:50 +0800157#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800158 llvm::Value *lowerPMOV(llvm::Value *op, llvm::Type *dstType, bool sext)
159 {
160 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(op->getType());
161 llvm::VectorType *dstTy = llvm::cast<llvm::VectorType>(dstType);
162
163 llvm::Value *undef = llvm::UndefValue::get(srcTy);
164 llvm::SmallVector<uint32_t, 16> mask(dstTy->getNumElements());
165 std::iota(mask.begin(), mask.end(), 0);
166 llvm::Value *v = ::builder->CreateShuffleVector(op, undef, mask);
167
168 return sext ? ::builder->CreateSExt(v, dstTy)
Logan Chienb5ce5092018-09-27 18:45:58 +0800169 : ::builder->CreateZExt(v, dstTy);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800170 }
171
172 llvm::Value *lowerPABS(llvm::Value *v)
173 {
174 llvm::Value *zero = llvm::Constant::getNullValue(v->getType());
175 llvm::Value *cmp = ::builder->CreateICmp(llvm::ICmpInst::ICMP_SGT, v, zero);
176 llvm::Value *neg = ::builder->CreateNeg(v);
177 return ::builder->CreateSelect(cmp, v, neg);
178 }
179#endif // defined(__i386__) || defined(__x86_64__)
Logan Chiene3191012018-08-24 22:01:50 +0800180
181#if !defined(__i386__) && !defined(__x86_64__)
182 llvm::Value *lowerPFMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800183 llvm::FCmpInst::Predicate pred)
Logan Chiene3191012018-08-24 22:01:50 +0800184 {
185 return ::builder->CreateSelect(::builder->CreateFCmp(pred, x, y), x, y);
186 }
187
Logan Chien83fc07a2018-09-26 22:14:00 +0800188 llvm::Value *lowerRound(llvm::Value *x)
189 {
190 llvm::Function *nearbyint = llvm::Intrinsic::getDeclaration(
191 ::module, llvm::Intrinsic::nearbyint, {x->getType()});
192 return ::builder->CreateCall(nearbyint, ARGS(x));
193 }
194
Logan Chien2faa24a2018-09-26 19:59:32 +0800195 llvm::Value *lowerRoundInt(llvm::Value *x, llvm::Type *ty)
196 {
197 return ::builder->CreateFPToSI(lowerRound(x), ty);
198 }
199
Logan Chien40a60052018-09-26 19:03:53 +0800200 llvm::Value *lowerFloor(llvm::Value *x)
201 {
202 llvm::Function *floor = llvm::Intrinsic::getDeclaration(
203 ::module, llvm::Intrinsic::floor, {x->getType()});
204 return ::builder->CreateCall(floor, ARGS(x));
205 }
206
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800207 llvm::Value *lowerTrunc(llvm::Value *x)
208 {
209 llvm::Function *trunc = llvm::Intrinsic::getDeclaration(
210 ::module, llvm::Intrinsic::trunc, {x->getType()});
211 return ::builder->CreateCall(trunc, ARGS(x));
212 }
213
Logan Chiene3191012018-08-24 22:01:50 +0800214 // Packed add/sub saturatation
Logan Chien28794cf2018-09-26 18:58:03 +0800215 llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned)
Logan Chiene3191012018-08-24 22:01:50 +0800216 {
Logan Chien28794cf2018-09-26 18:58:03 +0800217 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
218 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
219
220 unsigned numBits = ty->getScalarSizeInBits();
221
222 llvm::Value *max, *min, *extX, *extY;
223 if (isSigned)
224 {
225 max = llvm::ConstantInt::get(extTy, (1LL << (numBits - 1)) - 1, true);
226 min = llvm::ConstantInt::get(extTy, (-1LL << (numBits - 1)), true);
227 extX = ::builder->CreateSExt(x, extTy);
228 extY = ::builder->CreateSExt(y, extTy);
229 }
230 else
231 {
Ben Claytoneb50d252019-04-15 13:50:01 -0400232 ASSERT_MSG(numBits <= 64, "numBits: %d", int(numBits));
Logan Chien28794cf2018-09-26 18:58:03 +0800233 uint64_t maxVal = (numBits == 64) ? ~0ULL : (1ULL << numBits) - 1;
234 max = llvm::ConstantInt::get(extTy, maxVal, false);
235 min = llvm::ConstantInt::get(extTy, 0, false);
236 extX = ::builder->CreateZExt(x, extTy);
237 extY = ::builder->CreateZExt(y, extTy);
238 }
239
240 llvm::Value *res = isAdd ? ::builder->CreateAdd(extX, extY)
241 : ::builder->CreateSub(extX, extY);
242
243 res = lowerPMINMAX(res, min, llvm::ICmpInst::ICMP_SGT);
244 res = lowerPMINMAX(res, max, llvm::ICmpInst::ICMP_SLT);
245
246 return ::builder->CreateTrunc(res, ty);
Logan Chiene3191012018-08-24 22:01:50 +0800247 }
248
249 llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y)
250 {
Logan Chien28794cf2018-09-26 18:58:03 +0800251 return lowerPSAT(x, y, true, false);
Logan Chiene3191012018-08-24 22:01:50 +0800252 }
253
254 llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y)
255 {
Logan Chien28794cf2018-09-26 18:58:03 +0800256 return lowerPSAT(x, y, true, true);
Logan Chiene3191012018-08-24 22:01:50 +0800257 }
258
259 llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y)
260 {
Logan Chien28794cf2018-09-26 18:58:03 +0800261 return lowerPSAT(x, y, false, false);
Logan Chiene3191012018-08-24 22:01:50 +0800262 }
263
264 llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y)
265 {
Logan Chien28794cf2018-09-26 18:58:03 +0800266 return lowerPSAT(x, y, false, true);
Logan Chiene3191012018-08-24 22:01:50 +0800267 }
268
269 llvm::Value *lowerSQRT(llvm::Value *x)
270 {
271 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(
272 ::module, llvm::Intrinsic::sqrt, {x->getType()});
273 return ::builder->CreateCall(sqrt, ARGS(x));
274 }
275
276 llvm::Value *lowerRCP(llvm::Value *x)
277 {
278 llvm::Type *ty = x->getType();
279 llvm::Constant *one;
280 if (llvm::VectorType *vectorTy = llvm::dyn_cast<llvm::VectorType>(ty))
281 {
282 one = llvm::ConstantVector::getSplat(
283 vectorTy->getNumElements(),
284 llvm::ConstantFP::get(vectorTy->getElementType(), 1));
285 }
286 else
287 {
288 one = llvm::ConstantFP::get(ty, 1);
289 }
290 return ::builder->CreateFDiv(one, x);
291 }
292
293 llvm::Value *lowerRSQRT(llvm::Value *x)
294 {
295 return lowerRCP(lowerSQRT(x));
296 }
297
298 llvm::Value *lowerVectorShl(llvm::Value *x, uint64_t scalarY)
299 {
300 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
301 llvm::Value *y = llvm::ConstantVector::getSplat(
302 ty->getNumElements(),
303 llvm::ConstantInt::get(ty->getElementType(), scalarY));
304 return ::builder->CreateShl(x, y);
305 }
306
307 llvm::Value *lowerVectorAShr(llvm::Value *x, uint64_t scalarY)
308 {
309 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
310 llvm::Value *y = llvm::ConstantVector::getSplat(
311 ty->getNumElements(),
312 llvm::ConstantInt::get(ty->getElementType(), scalarY));
313 return ::builder->CreateAShr(x, y);
314 }
315
316 llvm::Value *lowerVectorLShr(llvm::Value *x, uint64_t scalarY)
317 {
318 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
319 llvm::Value *y = llvm::ConstantVector::getSplat(
320 ty->getNumElements(),
321 llvm::ConstantInt::get(ty->getElementType(), scalarY));
322 return ::builder->CreateLShr(x, y);
323 }
324
325 llvm::Value *lowerMulAdd(llvm::Value *x, llvm::Value *y)
326 {
327 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
328 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
329
330 llvm::Value *extX = ::builder->CreateSExt(x, extTy);
331 llvm::Value *extY = ::builder->CreateSExt(y, extTy);
332 llvm::Value *mult = ::builder->CreateMul(extX, extY);
333
334 llvm::Value *undef = llvm::UndefValue::get(extTy);
335
336 llvm::SmallVector<uint32_t, 16> evenIdx;
337 llvm::SmallVector<uint32_t, 16> oddIdx;
338 for (uint64_t i = 0, n = ty->getNumElements(); i < n; i += 2)
339 {
340 evenIdx.push_back(i);
341 oddIdx.push_back(i + 1);
342 }
343
344 llvm::Value *lhs = ::builder->CreateShuffleVector(mult, undef, evenIdx);
345 llvm::Value *rhs = ::builder->CreateShuffleVector(mult, undef, oddIdx);
346 return ::builder->CreateAdd(lhs, rhs);
347 }
348
Logan Chiene3191012018-08-24 22:01:50 +0800349 llvm::Value *lowerPack(llvm::Value *x, llvm::Value *y, bool isSigned)
350 {
351 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(x->getType());
352 llvm::VectorType *dstTy = llvm::VectorType::getTruncatedElementVectorType(srcTy);
353
354 llvm::IntegerType *dstElemTy =
355 llvm::cast<llvm::IntegerType>(dstTy->getElementType());
356
357 uint64_t truncNumBits = dstElemTy->getIntegerBitWidth();
Ben Claytoneb50d252019-04-15 13:50:01 -0400358 ASSERT_MSG(truncNumBits < 64, "shift 64 must be handled separately. truncNumBits: %d", int(truncNumBits));
Logan Chiene3191012018-08-24 22:01:50 +0800359 llvm::Constant *max, *min;
360 if (isSigned)
361 {
362 max = llvm::ConstantInt::get(srcTy, (1LL << (truncNumBits - 1)) - 1, true);
363 min = llvm::ConstantInt::get(srcTy, (-1LL << (truncNumBits - 1)), true);
364 }
365 else
366 {
367 max = llvm::ConstantInt::get(srcTy, (1ULL << truncNumBits) - 1, false);
368 min = llvm::ConstantInt::get(srcTy, 0, false);
369 }
370
371 x = lowerPMINMAX(x, min, llvm::ICmpInst::ICMP_SGT);
372 x = lowerPMINMAX(x, max, llvm::ICmpInst::ICMP_SLT);
373 y = lowerPMINMAX(y, min, llvm::ICmpInst::ICMP_SGT);
374 y = lowerPMINMAX(y, max, llvm::ICmpInst::ICMP_SLT);
375
376 x = ::builder->CreateTrunc(x, dstTy);
377 y = ::builder->CreateTrunc(y, dstTy);
378
379 llvm::SmallVector<uint32_t, 16> index(srcTy->getNumElements() * 2);
380 std::iota(index.begin(), index.end(), 0);
381
382 return ::builder->CreateShuffleVector(x, y, index);
383 }
384
385 llvm::Value *lowerSignMask(llvm::Value *x, llvm::Type *retTy)
386 {
387 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
388 llvm::Constant *zero = llvm::ConstantInt::get(ty, 0);
389 llvm::Value *cmp = ::builder->CreateICmpSLT(x, zero);
390
391 llvm::Value *ret = ::builder->CreateZExt(
392 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
393 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
394 {
395 llvm::Value *elem = ::builder->CreateZExt(
396 ::builder->CreateExtractElement(cmp, i), retTy);
397 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
398 }
399 return ret;
400 }
401
402 llvm::Value *lowerFPSignMask(llvm::Value *x, llvm::Type *retTy)
403 {
404 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
405 llvm::Constant *zero = llvm::ConstantFP::get(ty, 0);
406 llvm::Value *cmp = ::builder->CreateFCmpULT(x, zero);
407
408 llvm::Value *ret = ::builder->CreateZExt(
409 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
410 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
411 {
412 llvm::Value *elem = ::builder->CreateZExt(
413 ::builder->CreateExtractElement(cmp, i), retTy);
414 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
415 }
416 return ret;
417 }
418#endif // !defined(__i386__) && !defined(__x86_64__)
Chris Forbese86b6dc2019-03-01 09:08:47 -0800419
420 llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext)
421 {
422 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
423 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
424
425 llvm::Value *extX, *extY;
426 if (sext)
427 {
428 extX = ::builder->CreateSExt(x, extTy);
429 extY = ::builder->CreateSExt(y, extTy);
430 }
431 else
432 {
433 extX = ::builder->CreateZExt(x, extTy);
434 extY = ::builder->CreateZExt(y, extTy);
435 }
436
437 llvm::Value *mult = ::builder->CreateMul(extX, extY);
438
439 llvm::IntegerType *intTy = llvm::cast<llvm::IntegerType>(ty->getElementType());
440 llvm::Value *mulh = ::builder->CreateAShr(mult, intTy->getBitWidth());
441 return ::builder->CreateTrunc(mulh, ty);
442 }
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400443}
444
Nicolas Capens48461502018-08-06 14:20:45 -0400445namespace rr
John Bauman89401822014-05-06 15:04:28 -0400446{
Ben Claytonc7904162019-04-17 17:35:48 -0400447 const Capabilities Caps =
448 {
449 true, // CallSupported
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100450 true, // CoroutinesSupported
Ben Claytonc7904162019-04-17 17:35:48 -0400451 };
452
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400453 static std::memory_order atomicOrdering(llvm::AtomicOrdering memoryOrder)
454 {
455 switch(memoryOrder)
456 {
457 case llvm::AtomicOrdering::Monotonic: return std::memory_order_relaxed; // https://llvm.org/docs/Atomics.html#monotonic
458 case llvm::AtomicOrdering::Acquire: return std::memory_order_acquire;
459 case llvm::AtomicOrdering::Release: return std::memory_order_release;
460 case llvm::AtomicOrdering::AcquireRelease: return std::memory_order_acq_rel;
461 case llvm::AtomicOrdering::SequentiallyConsistent: return std::memory_order_seq_cst;
462 default:
Ben Claytonfb280672019-04-25 11:16:15 +0100463 UNREACHABLE("memoryOrder: %d", int(memoryOrder));
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400464 return std::memory_order_acq_rel;
465 }
466 }
467
468 static llvm::AtomicOrdering atomicOrdering(bool atomic, std::memory_order memoryOrder)
469 {
470 if(!atomic)
471 {
472 return llvm::AtomicOrdering::NotAtomic;
473 }
474
475 switch(memoryOrder)
476 {
477 case std::memory_order_relaxed: return llvm::AtomicOrdering::Monotonic; // https://llvm.org/docs/Atomics.html#monotonic
478 case std::memory_order_consume: return llvm::AtomicOrdering::Acquire; // https://llvm.org/docs/Atomics.html#acquire: "It should also be used for C++11/C11 memory_order_consume."
479 case std::memory_order_acquire: return llvm::AtomicOrdering::Acquire;
480 case std::memory_order_release: return llvm::AtomicOrdering::Release;
481 case std::memory_order_acq_rel: return llvm::AtomicOrdering::AcquireRelease;
482 case std::memory_order_seq_cst: return llvm::AtomicOrdering::SequentiallyConsistent;
483 default:
484 UNREACHABLE("memoryOrder: %d", int(memoryOrder));
485 return llvm::AtomicOrdering::AcquireRelease;
486 }
487 }
488
489 template <typename T>
490 static void atomicLoad(void *ptr, void *ret, llvm::AtomicOrdering ordering)
491 {
492 *reinterpret_cast<T*>(ret) = std::atomic_load_explicit<T>(reinterpret_cast<std::atomic<T>*>(ptr), atomicOrdering(ordering));
493 }
494
495 template <typename T>
496 static void atomicStore(void *ptr, void *val, llvm::AtomicOrdering ordering)
497 {
498 std::atomic_store_explicit<T>(reinterpret_cast<std::atomic<T>*>(ptr), *reinterpret_cast<T*>(val), atomicOrdering(ordering));
499 }
500
Logan Chien40a60052018-09-26 19:03:53 +0800501 class ExternalFunctionSymbolResolver
502 {
503 private:
504 using FunctionMap = std::unordered_map<std::string, void *>;
505 FunctionMap func_;
506
507 public:
508 ExternalFunctionSymbolResolver()
509 {
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400510 struct Atomic
511 {
512 static void load(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
513 {
514 switch (size)
515 {
516 case 1: atomicLoad<uint8_t>(ptr, ret, ordering); break;
517 case 2: atomicLoad<uint16_t>(ptr, ret, ordering); break;
518 case 4: atomicLoad<uint32_t>(ptr, ret, ordering); break;
519 case 8: atomicLoad<uint64_t>(ptr, ret, ordering); break;
520 default:
521 UNIMPLEMENTED("Atomic::load(size: %d)", int(size));
522 }
523 }
524 static void store(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
525 {
526 switch (size)
527 {
528 case 1: atomicStore<uint8_t>(ptr, ret, ordering); break;
529 case 2: atomicStore<uint16_t>(ptr, ret, ordering); break;
530 case 4: atomicStore<uint32_t>(ptr, ret, ordering); break;
531 case 8: atomicStore<uint64_t>(ptr, ret, ordering); break;
532 default:
533 UNIMPLEMENTED("Atomic::store(size: %d)", int(size));
534 }
535 }
536 };
Ben Claytonac07ed82019-03-26 14:17:41 +0000537 struct F { static void nop() {} };
Ben Claytonac07ed82019-03-26 14:17:41 +0000538
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400539 func_.emplace("nop", reinterpret_cast<void*>(F::nop));
Logan Chien40a60052018-09-26 19:03:53 +0800540 func_.emplace("floorf", reinterpret_cast<void*>(floorf));
Logan Chien83fc07a2018-09-26 22:14:00 +0800541 func_.emplace("nearbyintf", reinterpret_cast<void*>(nearbyintf));
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800542 func_.emplace("truncf", reinterpret_cast<void*>(truncf));
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000543 func_.emplace("printf", reinterpret_cast<void*>(printf));
544 func_.emplace("puts", reinterpret_cast<void*>(puts));
Chris Forbes1a4c7122019-03-15 14:50:47 -0700545 func_.emplace("fmodf", reinterpret_cast<void*>(fmodf));
Ben Claytona2c8b772019-04-09 13:42:36 -0400546 func_.emplace("sinf", reinterpret_cast<void*>(sinf));
Ben Clayton1b6f8c72019-04-09 13:47:43 -0400547 func_.emplace("cosf", reinterpret_cast<void*>(cosf));
Ben Claytonf9350d72019-04-09 14:19:02 -0400548 func_.emplace("asinf", reinterpret_cast<void*>(asinf));
Ben Claytoneafae472019-04-09 14:22:38 -0400549 func_.emplace("acosf", reinterpret_cast<void*>(acosf));
Ben Clayton749b4e02019-04-09 14:27:43 -0400550 func_.emplace("atanf", reinterpret_cast<void*>(atanf));
Ben Claytond9636972019-04-09 15:09:54 -0400551 func_.emplace("sinhf", reinterpret_cast<void*>(sinhf));
Ben Clayton900ea2c2019-04-09 15:25:36 -0400552 func_.emplace("coshf", reinterpret_cast<void*>(coshf));
Ben Clayton3928bd92019-04-09 15:27:41 -0400553 func_.emplace("tanhf", reinterpret_cast<void*>(tanhf));
Ben Claytonf6d77ab2019-04-09 15:30:04 -0400554 func_.emplace("asinhf", reinterpret_cast<void*>(asinhf));
Ben Clayton28ebcb02019-04-09 15:33:38 -0400555 func_.emplace("acoshf", reinterpret_cast<void*>(acoshf));
Ben Claytonfa6a5392019-04-09 15:35:24 -0400556 func_.emplace("atanhf", reinterpret_cast<void*>(atanhf));
Ben Claytona520c3e2019-04-09 15:43:45 -0400557 func_.emplace("atan2f", reinterpret_cast<void*>(atan2f));
Ben Claytonbfe94f02019-04-09 15:52:12 -0400558 func_.emplace("powf", reinterpret_cast<void*>(powf));
Ben Clayton242f0022019-04-09 16:00:53 -0400559 func_.emplace("expf", reinterpret_cast<void*>(expf));
Ben Clayton2c1da722019-04-09 16:03:03 -0400560 func_.emplace("logf", reinterpret_cast<void*>(logf));
Ben Claytonf40b56c2019-04-09 16:06:55 -0400561 func_.emplace("exp2f", reinterpret_cast<void*>(exp2f));
Ben Claytone17acfe2019-04-09 16:09:13 -0400562 func_.emplace("log2f", reinterpret_cast<void*>(log2f));
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400563 func_.emplace("atomic_load", reinterpret_cast<void*>(Atomic::load));
564 func_.emplace("atomic_store", reinterpret_cast<void*>(Atomic::store));
Ben Clayton14740062019-04-09 13:48:41 -0400565
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100566 // FIXME (b/119409619): use an allocator here so we can control all memory allocations
567 func_.emplace("coroutine_alloc_frame", reinterpret_cast<void*>(malloc));
568 func_.emplace("coroutine_free_frame", reinterpret_cast<void*>(free));
569
Ben Clayton14740062019-04-09 13:48:41 -0400570#ifdef __APPLE__
Ben Clayton14740062019-04-09 13:48:41 -0400571 func_.emplace("sincosf_stret", reinterpret_cast<void*>(__sincosf_stret));
572#elif defined(__linux__)
573 func_.emplace("sincosf", reinterpret_cast<void*>(sincosf));
574#endif // __APPLE__
Logan Chien40a60052018-09-26 19:03:53 +0800575 }
576
577 void *findSymbol(const std::string &name) const
578 {
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000579 // Trim off any underscores from the start of the symbol. LLVM likes
580 // to append these on macOS.
581 const char* trimmed = name.c_str();
582 while (trimmed[0] == '_') { trimmed++; }
583
584 FunctionMap::const_iterator it = func_.find(trimmed);
Ben Claytoneb50d252019-04-15 13:50:01 -0400585 // Missing functions will likely make the module fail in exciting non-obvious ways.
586 ASSERT_MSG(it != func_.end(), "Missing external function: '%s'", name.c_str());
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000587 return it->second;
Logan Chien40a60052018-09-26 19:03:53 +0800588 }
589 };
590
Logan Chien0eedc8c2018-08-21 09:34:28 +0800591 class LLVMReactorJIT
592 {
593 private:
594 using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
595 using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
596
597 llvm::orc::ExecutionSession session;
Logan Chien40a60052018-09-26 19:03:53 +0800598 ExternalFunctionSymbolResolver externalSymbolResolver;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800599 std::shared_ptr<llvm::orc::SymbolResolver> resolver;
600 std::unique_ptr<llvm::TargetMachine> targetMachine;
601 const llvm::DataLayout dataLayout;
602 ObjLayer objLayer;
Ben Claytoncee3dff2019-05-22 12:01:22 +0100603 CompileLayer compileLayer; // guarded by mutex
604 std::mutex mutex;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800605 size_t emittedFunctionsNum;
606
607 public:
608 LLVMReactorJIT(const char *arch, const llvm::SmallVectorImpl<std::string>& mattrs,
609 const llvm::TargetOptions &targetOpts):
610 resolver(createLegacyLookupResolver(
611 session,
612 [this](const std::string &name) {
Logan Chien40a60052018-09-26 19:03:53 +0800613 void *func = externalSymbolResolver.findSymbol(name);
614 if (func != nullptr)
615 {
616 return llvm::JITSymbol(
617 reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
618 }
619
Logan Chien0eedc8c2018-08-21 09:34:28 +0800620 return objLayer.findSymbol(name, true);
621 },
622 [](llvm::Error err) {
623 if (err)
624 {
625 // TODO: Log the symbol resolution errors.
626 return;
627 }
628 })),
629 targetMachine(llvm::EngineBuilder()
Ben Claytonac07ed82019-03-26 14:17:41 +0000630#ifdef ENABLE_RR_DEBUG_INFO
631 .setOptLevel(llvm::CodeGenOpt::None)
632#endif // ENABLE_RR_DEBUG_INFO
Logan Chien0eedc8c2018-08-21 09:34:28 +0800633 .setMArch(arch)
634 .setMAttrs(mattrs)
635 .setTargetOptions(targetOpts)
636 .selectTarget()),
637 dataLayout(targetMachine->createDataLayout()),
638 objLayer(
639 session,
640 [this](llvm::orc::VModuleKey) {
641 return ObjLayer::Resources{
642 std::make_shared<llvm::SectionMemoryManager>(),
643 resolver};
Ben Claytonac07ed82019-03-26 14:17:41 +0000644 },
645 ObjLayer::NotifyLoadedFtor(),
646 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj, const llvm::RuntimeDyld::LoadedObjectInfo &L) {
647#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton90cb2602019-05-23 14:42:32 +0100648 DebugInfo::NotifyObjectEmitted(Obj, L);
Ben Claytonac07ed82019-03-26 14:17:41 +0000649#endif // ENABLE_RR_DEBUG_INFO
650 },
651 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj) {
652#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton90cb2602019-05-23 14:42:32 +0100653 DebugInfo::NotifyFreeingObject(Obj);
Ben Claytonac07ed82019-03-26 14:17:41 +0000654#endif // ENABLE_RR_DEBUG_INFO
655 }
656 ),
Logan Chien0eedc8c2018-08-21 09:34:28 +0800657 compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine)),
658 emittedFunctionsNum(0)
659 {
660 }
661
662 void startSession()
663 {
664 ::module = new llvm::Module("", *::context);
665 }
666
667 void endSession()
668 {
669 ::function = nullptr;
670 ::module = nullptr;
671 }
672
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100673 LLVMRoutine *acquireRoutine(llvm::Function **funcs, size_t count)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800674 {
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100675 std::vector<std::string> mangledNames(count);
676 for (size_t i = 0; i < count; i++)
677 {
678 auto func = funcs[i];
679 std::string name = "f" + llvm::Twine(emittedFunctionsNum++).str();
680 func->setName(name);
681 func->setLinkage(llvm::GlobalValue::ExternalLinkage);
682 func->setDoesNotThrow();
Logan Chien0eedc8c2018-08-21 09:34:28 +0800683
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100684 llvm::raw_string_ostream mangledNameStream(mangledNames[i]);
685 llvm::Mangler::getNameWithPrefix(mangledNameStream, name, dataLayout);
686 }
687
688 // Compile the module - after this the llvm::Functions will have
689 // been freed.
Logan Chien0eedc8c2018-08-21 09:34:28 +0800690 std::unique_ptr<llvm::Module> mod(::module);
691 ::module = nullptr;
692 mod->setDataLayout(dataLayout);
693
694 auto moduleKey = session.allocateVModule();
Ben Claytoncee3dff2019-05-22 12:01:22 +0100695
696 // Resolve the function symbols - needs to be performed under mutex lock.
697 std::vector<llvm::JITSymbol> symbols;
698 {
699 std::unique_lock<std::mutex> lock(mutex);
700 llvm::cantFail(compileLayer.addModule(moduleKey, std::move(mod)));
701 funcs = nullptr; // Now points to released memory.
702 for (size_t i = 0; i < count; i++)
703 {
704 symbols.push_back(compileLayer.findSymbolIn(moduleKey, mangledNames[i], false));
705 }
706 }
Logan Chien0eedc8c2018-08-21 09:34:28 +0800707
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100708 // Resolve the function addresses.
709 std::vector<void*> addresses(count);
710 for (size_t i = 0; i < count; i++)
Nicolas Capensadfbbcb2018-10-31 14:38:53 -0400711 {
Ben Claytoncee3dff2019-05-22 12:01:22 +0100712 if(auto expectAddr = symbols[i].getAddress())
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100713 {
Ben Claytoncee3dff2019-05-22 12:01:22 +0100714 addresses[i] = reinterpret_cast<void *>(static_cast<intptr_t>(expectAddr.get()));
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100715 }
Nicolas Capensadfbbcb2018-10-31 14:38:53 -0400716 }
717
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100718 return new LLVMRoutine(addresses.data(), count, releaseRoutineCallback, this, moduleKey);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800719 }
720
721 void optimize(llvm::Module *module)
722 {
Ben Claytonac07ed82019-03-26 14:17:41 +0000723#ifdef ENABLE_RR_DEBUG_INFO
724 if (debugInfo != nullptr)
725 {
726 return; // Don't optimize if we're generating debug info.
727 }
728#endif // ENABLE_RR_DEBUG_INFO
729
Logan Chien0eedc8c2018-08-21 09:34:28 +0800730 std::unique_ptr<llvm::legacy::PassManager> passManager(
731 new llvm::legacy::PassManager());
732
733 passManager->add(llvm::createSROAPass());
734
735 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
736 {
737 switch(optimization[pass])
738 {
739 case Disabled: break;
740 case CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
741 case LICM: passManager->add(llvm::createLICMPass()); break;
742 case AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
743 case GVN: passManager->add(llvm::createGVNPass()); break;
744 case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
745 case Reassociate: passManager->add(llvm::createReassociatePass()); break;
746 case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
747 case SCCP: passManager->add(llvm::createSCCPPass()); break;
748 case ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break;
749 default:
Ben Claytoneb50d252019-04-15 13:50:01 -0400750 UNREACHABLE("optimization[pass]: %d, pass: %d", int(optimization[pass]), int(pass));
Logan Chien0eedc8c2018-08-21 09:34:28 +0800751 }
752 }
753
754 passManager->run(*::module);
755 }
756
757 private:
758 void releaseRoutineModule(llvm::orc::VModuleKey moduleKey)
759 {
Ben Claytoncee3dff2019-05-22 12:01:22 +0100760 std::unique_lock<std::mutex> lock(mutex);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800761 llvm::cantFail(compileLayer.removeModule(moduleKey));
762 }
763
764 static void releaseRoutineCallback(LLVMReactorJIT *jit, uint64_t moduleKey)
765 {
766 jit->releaseRoutineModule(moduleKey);
767 }
768 };
Logan Chien52cde602018-09-03 19:37:57 +0800769
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400770 Optimization optimization[10] = {InstructionCombining, Disabled};
John Bauman89401822014-05-06 15:04:28 -0400771
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500772 // The abstract Type* types are implemented as LLVM types, except that
773 // 64-bit vectors are emulated using 128-bit ones to avoid use of MMX in x86
774 // and VFP in ARM, and eliminate the overhead of converting them to explicit
775 // 128-bit ones. LLVM types are pointers, so we can represent emulated types
776 // as abstract pointers with small enum values.
777 enum InternalType : uintptr_t
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400778 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500779 // Emulated types:
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400780 Type_v2i32,
781 Type_v4i16,
782 Type_v2i16,
783 Type_v8i8,
784 Type_v4i8,
785 Type_v2f32,
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500786 EmulatedTypeCount,
787 // Returned by asInternalType() to indicate that the abstract Type*
788 // should be interpreted as LLVM type pointer:
789 Type_LLVM
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400790 };
791
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500792 inline InternalType asInternalType(Type *type)
793 {
794 InternalType t = static_cast<InternalType>(reinterpret_cast<uintptr_t>(type));
795 return (t < EmulatedTypeCount) ? t : Type_LLVM;
796 }
797
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400798 llvm::Type *T(Type *t)
799 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500800 // Use 128-bit vectors to implement logically shorter ones.
801 switch(asInternalType(t))
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400802 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500803 case Type_v2i32: return T(Int4::getType());
804 case Type_v4i16: return T(Short8::getType());
805 case Type_v2i16: return T(Short8::getType());
806 case Type_v8i8: return T(Byte16::getType());
807 case Type_v4i8: return T(Byte16::getType());
808 case Type_v2f32: return T(Float4::getType());
809 case Type_LLVM: return reinterpret_cast<llvm::Type*>(t);
Ben Claytoneb50d252019-04-15 13:50:01 -0400810 default:
811 UNREACHABLE("asInternalType(t): %d", int(asInternalType(t)));
812 return nullptr;
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400813 }
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400814 }
815
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500816 Type *T(InternalType t)
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400817 {
818 return reinterpret_cast<Type*>(t);
819 }
820
Nicolas Capensac230122016-09-20 14:30:06 -0400821 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
822 {
823 return reinterpret_cast<std::vector<llvm::Type*>&>(t);
824 }
825
Logan Chien191b3052018-08-31 16:57:15 +0800826 inline llvm::BasicBlock *B(BasicBlock *t)
827 {
828 return reinterpret_cast<llvm::BasicBlock*>(t);
829 }
830
Nicolas Capensc8b67a42016-09-25 15:02:52 -0400831 inline BasicBlock *B(llvm::BasicBlock *t)
832 {
833 return reinterpret_cast<BasicBlock*>(t);
834 }
835
Nicolas Capens01a97962017-07-28 17:30:51 -0400836 static size_t typeSize(Type *type)
837 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500838 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -0400839 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500840 case Type_v2i32: return 8;
841 case Type_v4i16: return 8;
842 case Type_v2i16: return 4;
843 case Type_v8i8: return 8;
844 case Type_v4i8: return 4;
845 case Type_v2f32: return 8;
846 case Type_LLVM:
Nicolas Capens01a97962017-07-28 17:30:51 -0400847 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500848 llvm::Type *t = T(type);
Nicolas Capens01a97962017-07-28 17:30:51 -0400849
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500850 if(t->isPointerTy())
851 {
852 return sizeof(void*);
853 }
854
855 // At this point we should only have LLVM 'primitive' types.
856 unsigned int bits = t->getPrimitiveSizeInBits();
Ben Claytoneb50d252019-04-15 13:50:01 -0400857 ASSERT_MSG(bits != 0, "bits: %d", int(bits));
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500858
859 // TODO(capn): Booleans are 1 bit integers in LLVM's SSA type system,
860 // but are typically stored as one byte. The DataLayout structure should
861 // be used here and many other places if this assumption fails.
862 return (bits + 7) / 8;
863 }
864 break;
865 default:
Ben Claytoneb50d252019-04-15 13:50:01 -0400866 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500867 return 0;
868 }
Nicolas Capens01a97962017-07-28 17:30:51 -0400869 }
870
Nicolas Capens69674fb2017-09-01 11:08:44 -0400871 static unsigned int elementCount(Type *type)
872 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500873 switch(asInternalType(type))
Nicolas Capens69674fb2017-09-01 11:08:44 -0400874 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500875 case Type_v2i32: return 2;
876 case Type_v4i16: return 4;
877 case Type_v2i16: return 2;
878 case Type_v8i8: return 8;
879 case Type_v4i8: return 4;
880 case Type_v2f32: return 2;
881 case Type_LLVM: return llvm::cast<llvm::VectorType>(T(type))->getNumElements();
Ben Claytoneb50d252019-04-15 13:50:01 -0400882 default:
883 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
884 return 0;
Nicolas Capens69674fb2017-09-01 11:08:44 -0400885 }
Nicolas Capens69674fb2017-09-01 11:08:44 -0400886 }
887
John Bauman89401822014-05-06 15:04:28 -0400888 Nucleus::Nucleus()
889 {
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400890 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
Nicolas Capensb7ea9842015-04-01 10:54:59 -0400891
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400892 llvm::InitializeNativeTarget();
Logan Chien0eedc8c2018-08-21 09:34:28 +0800893 llvm::InitializeNativeTargetAsmPrinter();
894 llvm::InitializeNativeTargetAsmParser();
Logan Chien0eedc8c2018-08-21 09:34:28 +0800895
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400896 if(!::context)
John Bauman89401822014-05-06 15:04:28 -0400897 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400898 ::context = new llvm::LLVMContext();
John Bauman89401822014-05-06 15:04:28 -0400899 }
900
John Bauman89401822014-05-06 15:04:28 -0400901 #if defined(__x86_64__)
Logan Chien52cde602018-09-03 19:37:57 +0800902 static const char arch[] = "x86-64";
Logan Chiene3191012018-08-24 22:01:50 +0800903 #elif defined(__i386__)
Logan Chien52cde602018-09-03 19:37:57 +0800904 static const char arch[] = "x86";
Logan Chiene3191012018-08-24 22:01:50 +0800905 #elif defined(__aarch64__)
906 static const char arch[] = "arm64";
907 #elif defined(__arm__)
908 static const char arch[] = "arm";
Gordana Cmiljanovic082dfec2018-10-19 11:36:15 +0200909 #elif defined(__mips__)
Gordana Cmiljanovic20622c02018-11-05 15:00:11 +0100910 #if defined(__mips64)
911 static const char arch[] = "mips64el";
912 #else
913 static const char arch[] = "mipsel";
914 #endif
Colin Samples8fd53302019-06-13 09:57:44 -0400915 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
916 static const char arch[] = "ppc64le";
Logan Chiene3191012018-08-24 22:01:50 +0800917 #else
918 #error "unknown architecture"
John Bauman89401822014-05-06 15:04:28 -0400919 #endif
920
Ben Clayton0fc611f2019-04-18 11:23:27 -0400921 llvm::SmallVector<std::string, 8> mattrs;
922
923 llvm::StringMap<bool> features;
Colin Samples8fd53302019-06-13 09:57:44 -0400924
Ben Clayton0fc611f2019-04-18 11:23:27 -0400925 bool ok = llvm::sys::getHostCPUFeatures(features);
Colin Samples8fd53302019-06-13 09:57:44 -0400926
927 #if defined(__i386__) || defined(__x86_64__) || \
928 (defined(__linux__) && (defined(__arm__) || defined(__aarch64__)))
Ben Clayton0fc611f2019-04-18 11:23:27 -0400929 ASSERT_MSG(ok, "llvm::sys::getHostCPUFeatures returned false");
Colin Samples8fd53302019-06-13 09:57:44 -0400930 #else
931 (void) ok; // getHostCPUFeatures always returns false on other platforms
932 #endif
933
Ben Clayton0fc611f2019-04-18 11:23:27 -0400934 for (auto &feature : features)
935 {
936 if (feature.second) { mattrs.push_back(feature.first()); }
937 }
938
939#if 0
Logan Chiene3191012018-08-24 22:01:50 +0800940#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800941 mattrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx");
942 mattrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov");
943 mattrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse");
944 mattrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2");
945 mattrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3");
946 mattrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3");
Logan Chien0eedc8c2018-08-21 09:34:28 +0800947 mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse4.1" : "-sse4.1");
Logan Chiene3191012018-08-24 22:01:50 +0800948#elif defined(__arm__)
949#if __ARM_ARCH >= 8
950 mattrs.push_back("+armv8-a");
951#else
952 // armv7-a requires compiler-rt routines; otherwise, compiled kernel
953 // might fail to link.
954#endif
955#endif
Ben Clayton0fc611f2019-04-18 11:23:27 -0400956#endif
John Bauman89401822014-05-06 15:04:28 -0400957
Logan Chien0eedc8c2018-08-21 09:34:28 +0800958 llvm::TargetOptions targetOpts;
Nicolas Capensa7643812018-09-13 14:20:06 -0400959 targetOpts.UnsafeFPMath = false;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800960 // targetOpts.NoInfsFPMath = true;
961 // targetOpts.NoNaNsFPMath = true;
Logan Chien52cde602018-09-03 19:37:57 +0800962
963 if(!::reactorJIT)
964 {
Logan Chien0eedc8c2018-08-21 09:34:28 +0800965 ::reactorJIT = new LLVMReactorJIT(arch, mattrs, targetOpts);
Logan Chien52cde602018-09-03 19:37:57 +0800966 }
967
968 ::reactorJIT->startSession();
John Bauman89401822014-05-06 15:04:28 -0400969
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400970 if(!::builder)
John Bauman89401822014-05-06 15:04:28 -0400971 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400972 ::builder = new llvm::IRBuilder<>(*::context);
John Bauman89401822014-05-06 15:04:28 -0400973 }
974 }
975
976 Nucleus::~Nucleus()
977 {
Ben Clayton90cb2602019-05-23 14:42:32 +0100978#ifdef ENABLE_RR_DEBUG_INFO
979 debugInfo.reset(nullptr);
980#endif // ENABLE_RR_DEBUG_INFO
981
Logan Chien52cde602018-09-03 19:37:57 +0800982 ::reactorJIT->endSession();
Nicolas Capensb7ea9842015-04-01 10:54:59 -0400983
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400984 ::codegenMutex.unlock();
John Bauman89401822014-05-06 15:04:28 -0400985 }
986
Chris Forbes878d4b02019-01-21 10:48:35 -0800987 Routine *Nucleus::acquireRoutine(const char *name, bool runOptimizations)
John Bauman89401822014-05-06 15:04:28 -0400988 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400989 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
John Bauman19bac1e2014-05-06 15:23:49 -0400990 {
Nicolas Capensac230122016-09-20 14:30:06 -0400991 llvm::Type *type = ::function->getReturnType();
John Bauman19bac1e2014-05-06 15:23:49 -0400992
993 if(type->isVoidTy())
994 {
995 createRetVoid();
996 }
997 else
998 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400999 createRet(V(llvm::UndefValue::get(type)));
John Bauman19bac1e2014-05-06 15:23:49 -04001000 }
1001 }
John Bauman89401822014-05-06 15:04:28 -04001002
Ben Clayton97c13ad2019-05-02 11:59:30 +01001003#ifdef ENABLE_RR_DEBUG_INFO
1004 if (debugInfo != nullptr)
1005 {
1006 debugInfo->Finalize();
1007 }
1008#endif // ENABLE_RR_DEBUG_INFO
1009
John Bauman89401822014-05-06 15:04:28 -04001010 if(false)
1011 {
Ben Clayton5875be52019-04-11 14:57:40 -04001012 std::error_code error;
1013 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001014 ::module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -04001015 }
1016
Ben Clayton4b944652019-05-02 10:56:19 +01001017 // FIXME: Disable for release builds once heavy development is over.
1018 bool verifyIR = true;
1019 if(verifyIR)
1020 {
1021 llvm::legacy::PassManager pm;
1022 pm.add(llvm::createVerifierPass());
1023 pm.run(*::module);
1024 }
1025
John Bauman89401822014-05-06 15:04:28 -04001026 if(runOptimizations)
1027 {
1028 optimize();
1029 }
1030
1031 if(false)
1032 {
Ben Clayton5875be52019-04-11 14:57:40 -04001033 std::error_code error;
1034 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001035 ::module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -04001036 }
1037
Ben Clayton1c82c7b2019-04-30 12:49:27 +01001038 LLVMRoutine *routine = ::reactorJIT->acquireRoutine(&::function, 1);
John Bauman89401822014-05-06 15:04:28 -04001039
John Bauman89401822014-05-06 15:04:28 -04001040 return routine;
1041 }
1042
1043 void Nucleus::optimize()
1044 {
Logan Chien52cde602018-09-03 19:37:57 +08001045 ::reactorJIT->optimize(::module);
John Bauman89401822014-05-06 15:04:28 -04001046 }
1047
John Bauman19bac1e2014-05-06 15:23:49 -04001048 Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
John Bauman89401822014-05-06 15:04:28 -04001049 {
1050 // Need to allocate it in the entry block for mem2reg to work
Nicolas Capensc8b67a42016-09-25 15:02:52 -04001051 llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
John Bauman89401822014-05-06 15:04:28 -04001052
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001053 llvm::Instruction *declaration;
John Bauman89401822014-05-06 15:04:28 -04001054
1055 if(arraySize)
1056 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08001057 declaration = new llvm::AllocaInst(T(type), 0, V(Nucleus::createConstantInt(arraySize)));
John Bauman89401822014-05-06 15:04:28 -04001058 }
1059 else
1060 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08001061 declaration = new llvm::AllocaInst(T(type), 0, (llvm::Value*)nullptr);
John Bauman89401822014-05-06 15:04:28 -04001062 }
1063
1064 entryBlock.getInstList().push_front(declaration);
1065
Nicolas Capens19336542016-09-26 10:32:29 -04001066 return V(declaration);
John Bauman89401822014-05-06 15:04:28 -04001067 }
1068
1069 BasicBlock *Nucleus::createBasicBlock()
1070 {
Logan Chien191b3052018-08-31 16:57:15 +08001071 return B(llvm::BasicBlock::Create(*::context, "", ::function));
John Bauman89401822014-05-06 15:04:28 -04001072 }
1073
1074 BasicBlock *Nucleus::getInsertBlock()
1075 {
Nicolas Capensc8b67a42016-09-25 15:02:52 -04001076 return B(::builder->GetInsertBlock());
John Bauman89401822014-05-06 15:04:28 -04001077 }
1078
1079 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1080 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001081 // assert(::builder->GetInsertBlock()->back().isTerminator());
Nicolas Capens0192d152019-03-27 14:46:07 -04001082
1083 Variable::materializeAll();
1084
Logan Chien191b3052018-08-31 16:57:15 +08001085 ::builder->SetInsertPoint(B(basicBlock));
John Bauman89401822014-05-06 15:04:28 -04001086 }
1087
Nicolas Capensac230122016-09-20 14:30:06 -04001088 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
John Bauman89401822014-05-06 15:04:28 -04001089 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001090 llvm::FunctionType *functionType = llvm::FunctionType::get(T(ReturnType), T(Params), false);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001091 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
1092 ::function->setCallingConv(llvm::CallingConv::C);
John Bauman89401822014-05-06 15:04:28 -04001093
Ben Clayton5875be52019-04-11 14:57:40 -04001094 #if defined(_WIN32)
Nicolas Capens52551d12018-09-13 14:30:56 -04001095 // FIXME(capn):
1096 // On Windows, stack memory is committed in increments of 4 kB pages, with the last page
1097 // having a trap which allows the OS to grow the stack. For functions with a stack frame
1098 // larger than 4 kB this can cause an issue when a variable is accessed beyond the guard
1099 // page. Therefore the compiler emits a call to __chkstk in the function prolog to probe
1100 // the stack and ensure all pages have been committed. This is currently broken in LLVM
1101 // JIT, but we can prevent emitting the stack probe call:
1102 ::function->addFnAttr("stack-probe-size", "1048576");
1103 #endif
1104
Ben Claytonac07ed82019-03-26 14:17:41 +00001105#ifdef ENABLE_RR_DEBUG_INFO
1106 ::debugInfo = std::unique_ptr<DebugInfo>(new DebugInfo(::builder, ::context, ::module, ::function));
1107#endif // ENABLE_RR_DEBUG_INFO
1108
Logan Chien191b3052018-08-31 16:57:15 +08001109 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "", ::function));
John Bauman89401822014-05-06 15:04:28 -04001110 }
1111
Nicolas Capens19336542016-09-26 10:32:29 -04001112 Value *Nucleus::getArgument(unsigned int index)
John Bauman89401822014-05-06 15:04:28 -04001113 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001114 llvm::Function::arg_iterator args = ::function->arg_begin();
John Bauman89401822014-05-06 15:04:28 -04001115
1116 while(index)
1117 {
1118 args++;
1119 index--;
1120 }
1121
Nicolas Capens19336542016-09-26 10:32:29 -04001122 return V(&*args);
John Bauman89401822014-05-06 15:04:28 -04001123 }
1124
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001125 void Nucleus::createRetVoid()
John Bauman89401822014-05-06 15:04:28 -04001126 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001127 RR_DEBUG_INFO_UPDATE_LOC();
1128
Ben Claytonc958b172019-05-02 12:20:59 +01001129 ASSERT_MSG(::function->getReturnType() == T(Void::getType()), "Return type mismatch");
1130
Nicolas Capens0192d152019-03-27 14:46:07 -04001131 // Code generated after this point is unreachable, so any variables
1132 // being read can safely return an undefined value. We have to avoid
1133 // materializing variables after the terminator ret instruction.
1134 Variable::killUnmaterialized();
1135
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001136 ::builder->CreateRetVoid();
John Bauman89401822014-05-06 15:04:28 -04001137 }
1138
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001139 void Nucleus::createRet(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001140 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001141 RR_DEBUG_INFO_UPDATE_LOC();
1142
Ben Claytonc958b172019-05-02 12:20:59 +01001143 ASSERT_MSG(::function->getReturnType() == V(v)->getType(), "Return type mismatch");
1144
Nicolas Capens0192d152019-03-27 14:46:07 -04001145 // Code generated after this point is unreachable, so any variables
1146 // being read can safely return an undefined value. We have to avoid
1147 // materializing variables after the terminator ret instruction.
1148 Variable::killUnmaterialized();
1149
Logan Chien191b3052018-08-31 16:57:15 +08001150 ::builder->CreateRet(V(v));
John Bauman89401822014-05-06 15:04:28 -04001151 }
1152
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001153 void Nucleus::createBr(BasicBlock *dest)
John Bauman89401822014-05-06 15:04:28 -04001154 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001155 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens0192d152019-03-27 14:46:07 -04001156 Variable::materializeAll();
1157
Logan Chien191b3052018-08-31 16:57:15 +08001158 ::builder->CreateBr(B(dest));
John Bauman89401822014-05-06 15:04:28 -04001159 }
1160
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001161 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001162 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001163 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens0192d152019-03-27 14:46:07 -04001164 Variable::materializeAll();
Logan Chien191b3052018-08-31 16:57:15 +08001165 ::builder->CreateCondBr(V(cond), B(ifTrue), B(ifFalse));
John Bauman89401822014-05-06 15:04:28 -04001166 }
1167
1168 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1169 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001170 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001171 return V(::builder->CreateAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001172 }
1173
1174 Value *Nucleus::createSub(Value *lhs, Value *rhs)
1175 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001176 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001177 return V(::builder->CreateSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001178 }
1179
1180 Value *Nucleus::createMul(Value *lhs, Value *rhs)
1181 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001182 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001183 return V(::builder->CreateMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001184 }
1185
1186 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1187 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001188 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001189 return V(::builder->CreateUDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001190 }
1191
1192 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1193 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001194 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001195 return V(::builder->CreateSDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001196 }
1197
1198 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1199 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001200 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001201 return V(::builder->CreateFAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001202 }
1203
1204 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1205 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001206 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001207 return V(::builder->CreateFSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001208 }
1209
1210 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1211 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001212 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001213 return V(::builder->CreateFMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001214 }
1215
1216 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1217 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001218 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001219 return V(::builder->CreateFDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001220 }
1221
1222 Value *Nucleus::createURem(Value *lhs, Value *rhs)
1223 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001224 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001225 return V(::builder->CreateURem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001226 }
1227
1228 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1229 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001230 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001231 return V(::builder->CreateSRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001232 }
1233
1234 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1235 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001236 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001237 return V(::builder->CreateFRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001238 }
1239
1240 Value *Nucleus::createShl(Value *lhs, Value *rhs)
1241 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001242 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001243 return V(::builder->CreateShl(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001244 }
1245
1246 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1247 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001248 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001249 return V(::builder->CreateLShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001250 }
1251
1252 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1253 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001254 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001255 return V(::builder->CreateAShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001256 }
1257
1258 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1259 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001260 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001261 return V(::builder->CreateAnd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001262 }
1263
1264 Value *Nucleus::createOr(Value *lhs, Value *rhs)
1265 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001266 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001267 return V(::builder->CreateOr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001268 }
1269
1270 Value *Nucleus::createXor(Value *lhs, Value *rhs)
1271 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001272 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001273 return V(::builder->CreateXor(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001274 }
1275
Nicolas Capens19336542016-09-26 10:32:29 -04001276 Value *Nucleus::createNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001277 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001278 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001279 return V(::builder->CreateNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001280 }
1281
Nicolas Capens19336542016-09-26 10:32:29 -04001282 Value *Nucleus::createFNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001283 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001284 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001285 return V(::builder->CreateFNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001286 }
1287
Nicolas Capens19336542016-09-26 10:32:29 -04001288 Value *Nucleus::createNot(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001289 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001290 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001291 return V(::builder->CreateNot(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001292 }
1293
Nicolas Capens86509d92019-03-21 13:23:50 -04001294 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001295 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001296 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001297 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001298 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001299 case Type_v2i32:
1300 case Type_v4i16:
1301 case Type_v8i8:
1302 case Type_v2f32:
1303 return createBitCast(
1304 createInsertElement(
1305 V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))),
Nicolas Capens86509d92019-03-21 13:23:50 -04001306 createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment, atomic, memoryOrder),
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001307 0),
1308 type);
1309 case Type_v2i16:
1310 case Type_v4i8:
1311 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001312 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001313 Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
Nicolas Capens86509d92019-03-21 13:23:50 -04001314 Value *i = createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001315 i = createZExt(i, Long::getType());
1316 Value *v = createInsertElement(u, i, 0);
1317 return createBitCast(v, type);
Nicolas Capens01a97962017-07-28 17:30:51 -04001318 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001319 // Fallthrough to non-emulated case.
1320 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001321 {
Ben Clayton99e57192019-05-03 13:25:08 +01001322 auto elTy = T(type);
1323 ASSERT(V(ptr)->getType()->getContainedType(0) == elTy);
1324 if (atomic && !(elTy->isIntegerTy() || elTy->isPointerTy() || elTy->isFloatTy()))
1325 {
1326 // atomic load operand must have integer, pointer, or floating point type
1327 // Fall back to using:
1328 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1329 auto sizetTy = ::llvm::IntegerType::get(*::context, sizeof(size_t) * 8);
1330 auto intTy = ::llvm::IntegerType::get(*::context, sizeof(int) * 8);
1331 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1332 auto i8PtrTy = i8Ty->getPointerTo();
1333 auto voidTy = ::llvm::Type::getVoidTy(*::context);
1334 auto funcTy = ::llvm::FunctionType::get(voidTy, {sizetTy, i8PtrTy, i8PtrTy, intTy}, false);
1335 auto func = ::module->getOrInsertFunction("__atomic_load", funcTy);
1336 auto size = ::module->getDataLayout().getTypeStoreSize(elTy);
1337 auto out = allocateStackVariable(type);
1338 ::builder->CreateCall(func, {
1339 ::llvm::ConstantInt::get(sizetTy, size),
1340 ::builder->CreatePointerCast(V(ptr), i8PtrTy),
1341 ::builder->CreatePointerCast(V(out), i8PtrTy),
1342 ::llvm::ConstantInt::get(intTy, uint64_t(atomicOrdering(true, memoryOrder))),
1343 });
1344 return V(::builder->CreateLoad(V(out)));
1345 }
1346 else
1347 {
1348 auto load = new llvm::LoadInst(V(ptr), "", isVolatile, alignment);
1349 load->setAtomic(atomicOrdering(atomic, memoryOrder));
1350 return V(::builder->Insert(load));
1351 }
Nicolas Capens86509d92019-03-21 13:23:50 -04001352 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001353 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001354 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1355 return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001356 }
John Bauman89401822014-05-06 15:04:28 -04001357 }
1358
Nicolas Capens86509d92019-03-21 13:23:50 -04001359 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001360 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001361 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001362 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001363 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001364 case Type_v2i32:
1365 case Type_v4i16:
1366 case Type_v8i8:
1367 case Type_v2f32:
1368 createStore(
1369 createExtractElement(
1370 createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0),
1371 createBitCast(ptr, Pointer<Long>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001372 Long::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001373 return value;
1374 case Type_v2i16:
1375 case Type_v4i8:
1376 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001377 {
Logan Chien191b3052018-08-31 16:57:15 +08001378 createStore(
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001379 createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0),
1380 createBitCast(ptr, Pointer<Int>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001381 Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens01a97962017-07-28 17:30:51 -04001382 return value;
Nicolas Capens01a97962017-07-28 17:30:51 -04001383 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001384 // Fallthrough to non-emulated case.
1385 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001386 {
Ben Clayton99e57192019-05-03 13:25:08 +01001387 auto elTy = T(type);
1388 ASSERT(V(ptr)->getType()->getContainedType(0) == elTy);
1389 if (atomic && !(elTy->isIntegerTy() || elTy->isPointerTy() || elTy->isFloatTy()))
1390 {
1391 // atomic store operand must have integer, pointer, or floating point type
1392 // Fall back to using:
1393 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1394 auto sizetTy = ::llvm::IntegerType::get(*::context, sizeof(size_t) * 8);
1395 auto intTy = ::llvm::IntegerType::get(*::context, sizeof(int) * 8);
1396 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1397 auto i8PtrTy = i8Ty->getPointerTo();
1398 auto voidTy = ::llvm::Type::getVoidTy(*::context);
1399 auto funcTy = ::llvm::FunctionType::get(voidTy, {sizetTy, i8PtrTy, i8PtrTy, intTy}, false);
1400 auto func = ::module->getOrInsertFunction("__atomic_store", funcTy);
1401 auto size = ::module->getDataLayout().getTypeStoreSize(elTy);
1402 auto copy = allocateStackVariable(type);
1403 ::builder->CreateStore(V(value), V(copy));
1404 ::builder->CreateCall(func, {
1405 ::llvm::ConstantInt::get(sizetTy, size),
1406 ::builder->CreatePointerCast(V(ptr), i8PtrTy),
1407 ::builder->CreatePointerCast(V(copy), i8PtrTy),
1408 ::llvm::ConstantInt::get(intTy, uint64_t(atomicOrdering(true, memoryOrder))),
1409 });
1410 }
1411 else
1412 {
1413 auto store = ::builder->Insert(new llvm::StoreInst(V(value), V(ptr), isVolatile, alignment));
1414 store->setAtomic(atomicOrdering(atomic, memoryOrder));
1415 }
Nicolas Capens86509d92019-03-21 13:23:50 -04001416
1417 return value;
1418 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001419 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001420 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1421 return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001422 }
John Bauman89401822014-05-06 15:04:28 -04001423 }
1424
Ben Clayton0fc611f2019-04-18 11:23:27 -04001425 Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment)
1426 {
1427 ASSERT(V(base)->getType()->isPointerTy());
1428 ASSERT(V(offsets)->getType()->isVectorTy());
1429 ASSERT(V(mask)->getType()->isVectorTy());
1430
1431 auto numEls = V(mask)->getType()->getVectorNumElements();
1432 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
1433 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
1434 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1435 auto i8PtrTy = i8Ty->getPointerTo();
1436 auto elPtrTy = T(elTy)->getPointerTo();
1437 auto elVecTy = ::llvm::VectorType::get(T(elTy), numEls);
1438 auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
1439 auto i8Base = ::builder->CreatePointerCast(V(base), i8PtrTy);
1440 auto i8Ptrs = ::builder->CreateGEP(i8Base, V(offsets));
1441 auto elPtrs = ::builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
1442 auto i8Mask = ::builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
1443 auto passthrough = ::llvm::Constant::getNullValue(elVecTy);
1444 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
1445 auto func = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::masked_gather, { elVecTy, elPtrVecTy } );
1446 return V(::builder->CreateCall(func, { elPtrs, align, i8Mask, passthrough }));
1447 }
1448
1449 void Nucleus::createScatter(Value *base, Value *val, Value *offsets, Value *mask, unsigned int alignment)
1450 {
1451 ASSERT(V(base)->getType()->isPointerTy());
1452 ASSERT(V(val)->getType()->isVectorTy());
1453 ASSERT(V(offsets)->getType()->isVectorTy());
1454 ASSERT(V(mask)->getType()->isVectorTy());
1455
1456 auto numEls = V(mask)->getType()->getVectorNumElements();
1457 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
1458 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
1459 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1460 auto i8PtrTy = i8Ty->getPointerTo();
1461 auto elVecTy = V(val)->getType();
1462 auto elTy = elVecTy->getVectorElementType();
1463 auto elPtrTy = elTy->getPointerTo();
1464 auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
1465 auto i8Base = ::builder->CreatePointerCast(V(base), i8PtrTy);
1466 auto i8Ptrs = ::builder->CreateGEP(i8Base, V(offsets));
1467 auto elPtrs = ::builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
1468 auto i8Mask = ::builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
1469 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
1470 auto func = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::masked_scatter, { elVecTy, elPtrVecTy } );
1471 ::builder->CreateCall(func, { V(val), elPtrs, align, i8Mask });
1472 }
1473
Ben Claytonb16c5862019-05-08 14:01:38 +01001474 void Nucleus::createFence(std::memory_order memoryOrder)
1475 {
1476 ::builder->CreateFence(atomicOrdering(true, memoryOrder));
1477 }
1478
Nicolas Capensd294def2017-01-26 17:44:37 -08001479 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
John Bauman89401822014-05-06 15:04:28 -04001480 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001481 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytoneb50d252019-04-15 13:50:01 -04001482 ASSERT(V(ptr)->getType()->getContainedType(0) == T(type));
Nicolas Capens01a97962017-07-28 17:30:51 -04001483 if(sizeof(void*) == 8)
Nicolas Capensd294def2017-01-26 17:44:37 -08001484 {
Ben Claytonb1243732019-02-27 23:56:18 +00001485 // LLVM manual: "When indexing into an array, pointer or vector,
1486 // integers of any width are allowed, and they are not required to
1487 // be constant. These integers are treated as signed values where
1488 // relevant."
1489 //
1490 // Thus if we want indexes to be treated as unsigned we have to
1491 // zero-extend them ourselves.
1492 //
1493 // Note that this is not because we want to address anywhere near
1494 // 4 GB of data. Instead this is important for performance because
1495 // x86 supports automatic zero-extending of 32-bit registers to
1496 // 64-bit. Thus when indexing into an array using a uint32 is
1497 // actually faster than an int32.
1498 index = unsignedIndex ?
1499 createZExt(index, Long::getType()) :
1500 createSExt(index, Long::getType());
Nicolas Capens01a97962017-07-28 17:30:51 -04001501 }
Ben Claytonb1243732019-02-27 23:56:18 +00001502
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001503 // For non-emulated types we can rely on LLVM's GEP to calculate the
1504 // effective address correctly.
1505 if(asInternalType(type) == Type_LLVM)
Nicolas Capens01a97962017-07-28 17:30:51 -04001506 {
Ben Claytonb1243732019-02-27 23:56:18 +00001507 return V(::builder->CreateGEP(V(ptr), V(index)));
Nicolas Capensd294def2017-01-26 17:44:37 -08001508 }
1509
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001510 // For emulated types we have to multiply the index by the intended
1511 // type size ourselves to obain the byte offset.
Ben Claytonb1243732019-02-27 23:56:18 +00001512 index = (sizeof(void*) == 8) ?
1513 createMul(index, createConstantLong((int64_t)typeSize(type))) :
1514 createMul(index, createConstantInt((int)typeSize(type)));
1515
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001516 // Cast to a byte pointer, apply the byte offset, and cast back to the
1517 // original pointer type.
Logan Chien191b3052018-08-31 16:57:15 +08001518 return createBitCast(
1519 V(::builder->CreateGEP(V(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0)))), V(index))),
1520 T(llvm::PointerType::get(T(type), 0)));
John Bauman89401822014-05-06 15:04:28 -04001521 }
1522
Chris Forbes17813932019-04-18 11:45:54 -07001523 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
John Bauman19bac1e2014-05-06 15:23:49 -04001524 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001525 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbes17813932019-04-18 11:45:54 -07001526 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1527 }
1528
Chris Forbes707ed992019-04-18 18:17:35 -07001529 Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1530 {
1531 RR_DEBUG_INFO_UPDATE_LOC();
1532 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Sub, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1533 }
1534
Chris Forbes17813932019-04-18 11:45:54 -07001535 Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1536 {
1537 RR_DEBUG_INFO_UPDATE_LOC();
1538 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::And, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1539 }
1540
1541 Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1542 {
1543 RR_DEBUG_INFO_UPDATE_LOC();
1544 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Or, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1545 }
1546
1547 Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1548 {
1549 RR_DEBUG_INFO_UPDATE_LOC();
1550 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xor, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1551 }
1552
1553 Value *Nucleus::createAtomicMin(Value *ptr, Value *value, std::memory_order memoryOrder)
1554 {
1555 RR_DEBUG_INFO_UPDATE_LOC();
1556 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Min, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1557 }
1558
1559 Value *Nucleus::createAtomicMax(Value *ptr, Value *value, std::memory_order memoryOrder)
1560 {
1561 RR_DEBUG_INFO_UPDATE_LOC();
1562 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Max, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1563 }
1564
Chris Forbesf31bdad2019-05-23 14:58:08 -07001565 Value *Nucleus::createAtomicUMin(Value *ptr, Value *value, std::memory_order memoryOrder)
1566 {
1567 RR_DEBUG_INFO_UPDATE_LOC();
1568 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::UMin, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1569 }
1570
1571 Value *Nucleus::createAtomicUMax(Value *ptr, Value *value, std::memory_order memoryOrder)
1572 {
1573 RR_DEBUG_INFO_UPDATE_LOC();
1574 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::UMax, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1575 }
1576
1577
Chris Forbes17813932019-04-18 11:45:54 -07001578 Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1579 {
1580 RR_DEBUG_INFO_UPDATE_LOC();
1581 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
John Bauman19bac1e2014-05-06 15:23:49 -04001582 }
1583
Chris Forbesa16238d2019-04-18 16:31:54 -07001584 Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1585 {
1586 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbesc9ca99e2019-04-19 07:53:34 -07001587 // Note: AtomicCmpXchgInstruction returns a 2-member struct containing {result, success-flag}, not the result directly.
Chris Forbesa16238d2019-04-18 16:31:54 -07001588 return V(::builder->CreateExtractValue(
1589 ::builder->CreateAtomicCmpXchg(V(ptr), V(compare), V(value), atomicOrdering(true, memoryOrderEqual), atomicOrdering(true, memoryOrderUnequal)),
1590 llvm::ArrayRef<unsigned>(0u)));
1591 }
1592
Nicolas Capens19336542016-09-26 10:32:29 -04001593 Value *Nucleus::createTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001594 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001595 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001596 return V(::builder->CreateTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001597 }
1598
Nicolas Capens19336542016-09-26 10:32:29 -04001599 Value *Nucleus::createZExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001600 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001601 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001602 return V(::builder->CreateZExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001603 }
1604
Nicolas Capens19336542016-09-26 10:32:29 -04001605 Value *Nucleus::createSExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001606 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001607 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001608 return V(::builder->CreateSExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001609 }
1610
Nicolas Capens19336542016-09-26 10:32:29 -04001611 Value *Nucleus::createFPToSI(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001612 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001613 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001614 return V(::builder->CreateFPToSI(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001615 }
1616
Nicolas Capens19336542016-09-26 10:32:29 -04001617 Value *Nucleus::createSIToFP(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001618 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001619 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001620 return V(::builder->CreateSIToFP(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001621 }
1622
Nicolas Capens19336542016-09-26 10:32:29 -04001623 Value *Nucleus::createFPTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001624 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001625 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001626 return V(::builder->CreateFPTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001627 }
1628
Nicolas Capens19336542016-09-26 10:32:29 -04001629 Value *Nucleus::createFPExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001630 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001631 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001632 return V(::builder->CreateFPExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001633 }
1634
Nicolas Capens19336542016-09-26 10:32:29 -04001635 Value *Nucleus::createBitCast(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001636 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001637 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens01a97962017-07-28 17:30:51 -04001638 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1639 // support for casting between scalars and wide vectors. Emulate them by writing to the stack and
1640 // reading back as the destination type.
Logan Chien191b3052018-08-31 16:57:15 +08001641 if(!V(v)->getType()->isVectorTy() && T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001642 {
1643 Value *readAddress = allocateStackVariable(destType);
Logan Chien191b3052018-08-31 16:57:15 +08001644 Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(V(v)->getType(), 0)));
1645 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001646 return createLoad(readAddress, destType);
1647 }
Logan Chien191b3052018-08-31 16:57:15 +08001648 else if(V(v)->getType()->isVectorTy() && !T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001649 {
Logan Chien191b3052018-08-31 16:57:15 +08001650 Value *writeAddress = allocateStackVariable(T(V(v)->getType()));
1651 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001652 Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
1653 return createLoad(readAddress, destType);
1654 }
1655
Logan Chien191b3052018-08-31 16:57:15 +08001656 return V(::builder->CreateBitCast(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001657 }
1658
John Bauman89401822014-05-06 15:04:28 -04001659 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1660 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001661 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001662 return V(::builder->CreateICmpEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001663 }
1664
1665 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1666 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001667 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001668 return V(::builder->CreateICmpNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001669 }
1670
1671 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1672 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001673 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001674 return V(::builder->CreateICmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001675 }
1676
1677 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1678 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001679 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001680 return V(::builder->CreateICmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001681 }
1682
1683 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1684 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001685 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001686 return V(::builder->CreateICmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001687 }
1688
1689 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1690 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001691 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001692 return V(::builder->CreateICmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001693 }
1694
1695 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1696 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001697 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001698 return V(::builder->CreateICmpSGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001699 }
1700
1701 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1702 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001703 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001704 return V(::builder->CreateICmpSGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001705 }
1706
1707 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1708 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001709 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001710 return V(::builder->CreateICmpSLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001711 }
1712
1713 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1714 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001715 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001716 return V(::builder->CreateICmpSLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001717 }
1718
1719 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1720 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001721 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001722 return V(::builder->CreateFCmpOEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001723 }
1724
1725 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1726 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001727 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001728 return V(::builder->CreateFCmpOGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001729 }
1730
1731 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1732 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001733 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001734 return V(::builder->CreateFCmpOGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001735 }
1736
1737 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1738 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001739 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001740 return V(::builder->CreateFCmpOLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001741 }
1742
1743 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1744 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001745 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001746 return V(::builder->CreateFCmpOLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001747 }
1748
1749 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1750 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001751 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001752 return V(::builder->CreateFCmpONE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001753 }
1754
1755 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1756 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001757 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001758 return V(::builder->CreateFCmpORD(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001759 }
1760
1761 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1762 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001763 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001764 return V(::builder->CreateFCmpUNO(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001765 }
1766
1767 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1768 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001769 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001770 return V(::builder->CreateFCmpUEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001771 }
1772
1773 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1774 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001775 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001776 return V(::builder->CreateFCmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001777 }
1778
1779 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1780 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001781 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001782 return V(::builder->CreateFCmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001783 }
1784
1785 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1786 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001787 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001788 return V(::builder->CreateFCmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001789 }
1790
1791 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1792 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001793 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001794 return V(::builder->CreateFCmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001795 }
1796
1797 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1798 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001799 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton71008d82019-03-05 17:17:59 +00001800 return V(::builder->CreateFCmpUNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001801 }
1802
Nicolas Capense95d5342016-09-30 11:37:28 -04001803 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
John Bauman89401822014-05-06 15:04:28 -04001804 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001805 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytoneb50d252019-04-15 13:50:01 -04001806 ASSERT(V(vector)->getType()->getContainedType(0) == T(type));
Logan Chien191b3052018-08-31 16:57:15 +08001807 return V(::builder->CreateExtractElement(V(vector), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04001808 }
1809
1810 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1811 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001812 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001813 return V(::builder->CreateInsertElement(V(vector), V(element), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04001814 }
1815
Logan Chien191b3052018-08-31 16:57:15 +08001816 Value *Nucleus::createShuffleVector(Value *v1, Value *v2, const int *select)
John Bauman89401822014-05-06 15:04:28 -04001817 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001818 RR_DEBUG_INFO_UPDATE_LOC();
1819
Logan Chien191b3052018-08-31 16:57:15 +08001820 int size = llvm::cast<llvm::VectorType>(V(v1)->getType())->getNumElements();
Nicolas Capense89cd582016-09-30 14:23:47 -04001821 const int maxSize = 16;
1822 llvm::Constant *swizzle[maxSize];
Ben Claytoneb50d252019-04-15 13:50:01 -04001823 ASSERT(size <= maxSize);
Nicolas Capense89cd582016-09-30 14:23:47 -04001824
1825 for(int i = 0; i < size; i++)
1826 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001827 swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]);
Nicolas Capense89cd582016-09-30 14:23:47 -04001828 }
1829
1830 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
1831
Logan Chien191b3052018-08-31 16:57:15 +08001832 return V(::builder->CreateShuffleVector(V(v1), V(v2), shuffle));
John Bauman89401822014-05-06 15:04:28 -04001833 }
1834
Logan Chien191b3052018-08-31 16:57:15 +08001835 Value *Nucleus::createSelect(Value *c, Value *ifTrue, Value *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001836 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001837 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001838 return V(::builder->CreateSelect(V(c), V(ifTrue), V(ifFalse)));
John Bauman89401822014-05-06 15:04:28 -04001839 }
1840
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001841 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
John Bauman89401822014-05-06 15:04:28 -04001842 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001843 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001844 return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(V(control), B(defaultBranch), numCases));
John Bauman89401822014-05-06 15:04:28 -04001845 }
1846
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001847 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
John Bauman89401822014-05-06 15:04:28 -04001848 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001849 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001850 llvm::SwitchInst *sw = reinterpret_cast<llvm::SwitchInst *>(switchCases);
1851 sw->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), B(branch));
John Bauman89401822014-05-06 15:04:28 -04001852 }
1853
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001854 void Nucleus::createUnreachable()
John Bauman89401822014-05-06 15:04:28 -04001855 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001856 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001857 ::builder->CreateUnreachable();
John Bauman89401822014-05-06 15:04:28 -04001858 }
1859
Nicolas Capensac230122016-09-20 14:30:06 -04001860 Type *Nucleus::getPointerType(Type *ElementType)
John Bauman89401822014-05-06 15:04:28 -04001861 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001862 return T(llvm::PointerType::get(T(ElementType), 0));
John Bauman89401822014-05-06 15:04:28 -04001863 }
1864
Nicolas Capens13ac2322016-10-13 14:52:12 -04001865 Value *Nucleus::createNullValue(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04001866 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001867 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001868 return V(llvm::Constant::getNullValue(T(Ty)));
John Bauman89401822014-05-06 15:04:28 -04001869 }
1870
Nicolas Capens13ac2322016-10-13 14:52:12 -04001871 Value *Nucleus::createConstantLong(int64_t i)
John Bauman89401822014-05-06 15:04:28 -04001872 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001873 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001874 return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001875 }
1876
Nicolas Capens13ac2322016-10-13 14:52:12 -04001877 Value *Nucleus::createConstantInt(int i)
John Bauman89401822014-05-06 15:04:28 -04001878 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001879 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001880 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001881 }
1882
Nicolas Capens13ac2322016-10-13 14:52:12 -04001883 Value *Nucleus::createConstantInt(unsigned int i)
John Bauman89401822014-05-06 15:04:28 -04001884 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001885 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001886 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001887 }
1888
Nicolas Capens13ac2322016-10-13 14:52:12 -04001889 Value *Nucleus::createConstantBool(bool b)
John Bauman89401822014-05-06 15:04:28 -04001890 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001891 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001892 return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b));
John Bauman89401822014-05-06 15:04:28 -04001893 }
1894
Nicolas Capens13ac2322016-10-13 14:52:12 -04001895 Value *Nucleus::createConstantByte(signed char i)
John Bauman89401822014-05-06 15:04:28 -04001896 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001897 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001898 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001899 }
1900
Nicolas Capens13ac2322016-10-13 14:52:12 -04001901 Value *Nucleus::createConstantByte(unsigned char i)
John Bauman89401822014-05-06 15:04:28 -04001902 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001903 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001904 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001905 }
1906
Nicolas Capens13ac2322016-10-13 14:52:12 -04001907 Value *Nucleus::createConstantShort(short i)
John Bauman89401822014-05-06 15:04:28 -04001908 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001909 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001910 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001911 }
1912
Nicolas Capens13ac2322016-10-13 14:52:12 -04001913 Value *Nucleus::createConstantShort(unsigned short i)
John Bauman89401822014-05-06 15:04:28 -04001914 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001915 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001916 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001917 }
1918
Nicolas Capens13ac2322016-10-13 14:52:12 -04001919 Value *Nucleus::createConstantFloat(float x)
John Bauman89401822014-05-06 15:04:28 -04001920 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001921 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001922 return V(llvm::ConstantFP::get(T(Float::getType()), x));
John Bauman89401822014-05-06 15:04:28 -04001923 }
1924
Nicolas Capens13ac2322016-10-13 14:52:12 -04001925 Value *Nucleus::createNullPointer(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04001926 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001927 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001928 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0)));
John Bauman89401822014-05-06 15:04:28 -04001929 }
1930
Nicolas Capens13ac2322016-10-13 14:52:12 -04001931 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
John Bauman89401822014-05-06 15:04:28 -04001932 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001933 ASSERT(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04001934 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1935 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
Ben Claytoneb50d252019-04-15 13:50:01 -04001936 ASSERT(numElements <= 16 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001937 llvm::Constant *constantVector[16];
1938
Nicolas Capens69674fb2017-09-01 11:08:44 -04001939 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04001940 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04001941 constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001942 }
1943
Nicolas Capens69674fb2017-09-01 11:08:44 -04001944 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
Nicolas Capens13ac2322016-10-13 14:52:12 -04001945 }
1946
1947 Value *Nucleus::createConstantVector(const double *constants, Type *type)
1948 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001949 ASSERT(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04001950 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1951 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
Ben Claytoneb50d252019-04-15 13:50:01 -04001952 ASSERT(numElements <= 8 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001953 llvm::Constant *constantVector[8];
1954
Nicolas Capens69674fb2017-09-01 11:08:44 -04001955 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04001956 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04001957 constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001958 }
1959
Nicolas Capens69674fb2017-09-01 11:08:44 -04001960 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
John Bauman89401822014-05-06 15:04:28 -04001961 }
1962
John Bauman19bac1e2014-05-06 15:23:49 -04001963 Type *Void::getType()
John Bauman89401822014-05-06 15:04:28 -04001964 {
Nicolas Capensac230122016-09-20 14:30:06 -04001965 return T(llvm::Type::getVoidTy(*::context));
John Bauman89401822014-05-06 15:04:28 -04001966 }
1967
John Bauman19bac1e2014-05-06 15:23:49 -04001968 Type *Bool::getType()
John Bauman89401822014-05-06 15:04:28 -04001969 {
Nicolas Capensac230122016-09-20 14:30:06 -04001970 return T(llvm::Type::getInt1Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001971 }
1972
John Bauman19bac1e2014-05-06 15:23:49 -04001973 Type *Byte::getType()
John Bauman89401822014-05-06 15:04:28 -04001974 {
Nicolas Capensac230122016-09-20 14:30:06 -04001975 return T(llvm::Type::getInt8Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001976 }
1977
John Bauman19bac1e2014-05-06 15:23:49 -04001978 Type *SByte::getType()
John Bauman89401822014-05-06 15:04:28 -04001979 {
Nicolas Capensac230122016-09-20 14:30:06 -04001980 return T(llvm::Type::getInt8Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001981 }
1982
John Bauman19bac1e2014-05-06 15:23:49 -04001983 Type *Short::getType()
John Bauman89401822014-05-06 15:04:28 -04001984 {
Nicolas Capensac230122016-09-20 14:30:06 -04001985 return T(llvm::Type::getInt16Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001986 }
1987
John Bauman19bac1e2014-05-06 15:23:49 -04001988 Type *UShort::getType()
John Bauman89401822014-05-06 15:04:28 -04001989 {
Nicolas Capensac230122016-09-20 14:30:06 -04001990 return T(llvm::Type::getInt16Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001991 }
1992
John Bauman19bac1e2014-05-06 15:23:49 -04001993 Type *Byte4::getType()
John Bauman89401822014-05-06 15:04:28 -04001994 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001995 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04001996 }
1997
John Bauman19bac1e2014-05-06 15:23:49 -04001998 Type *SByte4::getType()
John Bauman89401822014-05-06 15:04:28 -04001999 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002000 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04002001 }
2002
John Bauman19bac1e2014-05-06 15:23:49 -04002003 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002004 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002005 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002006#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002007 return x86::paddusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002008#else
2009 return As<Byte8>(V(lowerPUADDSAT(V(x.value), V(y.value))));
2010#endif
John Bauman89401822014-05-06 15:04:28 -04002011 }
John Bauman66b8ab22014-05-06 15:57:45 -04002012
John Bauman19bac1e2014-05-06 15:23:49 -04002013 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002014 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002015 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002016#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002017 return x86::psubusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002018#else
2019 return As<Byte8>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
2020#endif
John Bauman89401822014-05-06 15:04:28 -04002021 }
2022
John Bauman19bac1e2014-05-06 15:23:49 -04002023 RValue<Int> SignMask(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04002024 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002025 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002026#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002027 return x86::pmovmskb(x);
Logan Chiene3191012018-08-24 22:01:50 +08002028#else
2029 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2030#endif
John Bauman89401822014-05-06 15:04:28 -04002031 }
2032
John Bauman19bac1e2014-05-06 15:23:49 -04002033// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002034// {
Logan Chiene3191012018-08-24 22:01:50 +08002035//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002036// return x86::pcmpgtb(x, y); // FIXME: Signedness
Logan Chiene3191012018-08-24 22:01:50 +08002037//#else
2038// return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
2039//#endif
John Bauman89401822014-05-06 15:04:28 -04002040// }
John Bauman66b8ab22014-05-06 15:57:45 -04002041
John Bauman19bac1e2014-05-06 15:23:49 -04002042 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002043 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002044 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002045#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002046 return x86::pcmpeqb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002047#else
2048 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
2049#endif
John Bauman89401822014-05-06 15:04:28 -04002050 }
2051
John Bauman19bac1e2014-05-06 15:23:49 -04002052 Type *Byte8::getType()
John Bauman89401822014-05-06 15:04:28 -04002053 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002054 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04002055 }
2056
John Bauman19bac1e2014-05-06 15:23:49 -04002057 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002058 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002059 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002060#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002061 return x86::paddsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002062#else
2063 return As<SByte8>(V(lowerPSADDSAT(V(x.value), V(y.value))));
2064#endif
John Bauman89401822014-05-06 15:04:28 -04002065 }
John Bauman66b8ab22014-05-06 15:57:45 -04002066
John Bauman19bac1e2014-05-06 15:23:49 -04002067 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002068 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002069 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002070#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002071 return x86::psubsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002072#else
2073 return As<SByte8>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
2074#endif
John Bauman89401822014-05-06 15:04:28 -04002075 }
2076
John Bauman19bac1e2014-05-06 15:23:49 -04002077 RValue<Int> SignMask(RValue<SByte8> x)
John Bauman89401822014-05-06 15:04:28 -04002078 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002079 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002080#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002081 return x86::pmovmskb(As<Byte8>(x));
Logan Chiene3191012018-08-24 22:01:50 +08002082#else
2083 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2084#endif
John Bauman89401822014-05-06 15:04:28 -04002085 }
2086
John Bauman19bac1e2014-05-06 15:23:49 -04002087 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002088 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002089 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002090#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002091 return x86::pcmpgtb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002092#else
2093 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
2094#endif
John Bauman89401822014-05-06 15:04:28 -04002095 }
John Bauman66b8ab22014-05-06 15:57:45 -04002096
John Bauman19bac1e2014-05-06 15:23:49 -04002097 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002098 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002099 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002100#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002101 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
Logan Chiene3191012018-08-24 22:01:50 +08002102#else
2103 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
2104#endif
John Bauman89401822014-05-06 15:04:28 -04002105 }
2106
John Bauman19bac1e2014-05-06 15:23:49 -04002107 Type *SByte8::getType()
John Bauman89401822014-05-06 15:04:28 -04002108 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002109 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04002110 }
2111
John Bauman19bac1e2014-05-06 15:23:49 -04002112 Type *Byte16::getType()
John Bauman89401822014-05-06 15:04:28 -04002113 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002114 return T(llvm::VectorType::get(T(Byte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04002115 }
2116
John Bauman19bac1e2014-05-06 15:23:49 -04002117 Type *SByte16::getType()
John Bauman89401822014-05-06 15:04:28 -04002118 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002119 return T(llvm::VectorType::get(T(SByte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04002120 }
2121
Nicolas Capens16b5f152016-10-13 13:39:01 -04002122 Type *Short2::getType()
2123 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002124 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04002125 }
2126
Nicolas Capens16b5f152016-10-13 13:39:01 -04002127 Type *UShort2::getType()
2128 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002129 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04002130 }
2131
John Bauman19bac1e2014-05-06 15:23:49 -04002132 Short4::Short4(RValue<Int4> cast)
John Bauman89401822014-05-06 15:04:28 -04002133 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002134 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens01a97962017-07-28 17:30:51 -04002135 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
John Bauman89401822014-05-06 15:04:28 -04002136 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2137
Nicolas Capens01a97962017-07-28 17:30:51 -04002138 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2139 Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
John Bauman89401822014-05-06 15:04:28 -04002140
John Bauman66b8ab22014-05-06 15:57:45 -04002141 storeValue(short4);
John Bauman89401822014-05-06 15:04:28 -04002142 }
2143
John Bauman19bac1e2014-05-06 15:23:49 -04002144// Short4::Short4(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002145// {
2146// }
2147
John Bauman19bac1e2014-05-06 15:23:49 -04002148 Short4::Short4(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04002149 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002150 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002151 Int4 v4i32 = Int4(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002152#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002153 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
Logan Chiene3191012018-08-24 22:01:50 +08002154#else
2155 Value *v = v4i32.loadValue();
2156 v4i32 = As<Int4>(V(lowerPack(V(v), V(v), true)));
2157#endif
John Bauman66b8ab22014-05-06 15:57:45 -04002158
2159 storeValue(As<Short4>(Int2(v4i32)).value);
John Bauman89401822014-05-06 15:04:28 -04002160 }
2161
John Bauman19bac1e2014-05-06 15:23:49 -04002162 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002163 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002164 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002165#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002166 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2167
2168 return x86::psllw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002169#else
2170 return As<Short4>(V(lowerVectorShl(V(lhs.value), rhs)));
2171#endif
John Bauman89401822014-05-06 15:04:28 -04002172 }
2173
John Bauman19bac1e2014-05-06 15:23:49 -04002174 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002175 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002176 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002177#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002178 return x86::psraw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002179#else
2180 return As<Short4>(V(lowerVectorAShr(V(lhs.value), rhs)));
2181#endif
John Bauman89401822014-05-06 15:04:28 -04002182 }
2183
John Bauman19bac1e2014-05-06 15:23:49 -04002184 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002185 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002186 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002187#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002188 return x86::pmaxsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002189#else
2190 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
2191#endif
John Bauman89401822014-05-06 15:04:28 -04002192 }
2193
John Bauman19bac1e2014-05-06 15:23:49 -04002194 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002195 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002196 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002197#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002198 return x86::pminsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002199#else
2200 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
2201#endif
John Bauman89401822014-05-06 15:04:28 -04002202 }
2203
John Bauman19bac1e2014-05-06 15:23:49 -04002204 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002205 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002206 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002207#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002208 return x86::paddsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002209#else
2210 return As<Short4>(V(lowerPSADDSAT(V(x.value), V(y.value))));
2211#endif
John Bauman89401822014-05-06 15:04:28 -04002212 }
2213
John Bauman19bac1e2014-05-06 15:23:49 -04002214 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002215 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002216 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002217#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002218 return x86::psubsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002219#else
2220 return As<Short4>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
2221#endif
John Bauman89401822014-05-06 15:04:28 -04002222 }
2223
John Bauman19bac1e2014-05-06 15:23:49 -04002224 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002225 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002226 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002227#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002228 return x86::pmulhw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002229#else
2230 return As<Short4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2231#endif
John Bauman89401822014-05-06 15:04:28 -04002232 }
2233
John Bauman19bac1e2014-05-06 15:23:49 -04002234 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002235 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002236 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002237#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002238 return x86::pmaddwd(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002239#else
2240 return As<Int2>(V(lowerMulAdd(V(x.value), V(y.value))));
2241#endif
John Bauman89401822014-05-06 15:04:28 -04002242 }
2243
Nicolas Capens33438a62017-09-27 11:47:35 -04002244 RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002245 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002246 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002247#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002248 auto result = x86::packsswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002249#else
2250 auto result = V(lowerPack(V(x.value), V(y.value), true));
2251#endif
Nicolas Capens01a97962017-07-28 17:30:51 -04002252 return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
John Bauman89401822014-05-06 15:04:28 -04002253 }
2254
Nicolas Capens33438a62017-09-27 11:47:35 -04002255 RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2256 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002257 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002258#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04002259 auto result = x86::packuswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002260#else
2261 auto result = V(lowerPack(V(x.value), V(y.value), false));
2262#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04002263 return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
2264 }
2265
John Bauman19bac1e2014-05-06 15:23:49 -04002266 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002267 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002268 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002269#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002270 return x86::pcmpgtw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002271#else
2272 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
2273#endif
John Bauman89401822014-05-06 15:04:28 -04002274 }
2275
John Bauman19bac1e2014-05-06 15:23:49 -04002276 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002277 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002278 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002279#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002280 return x86::pcmpeqw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002281#else
2282 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
2283#endif
John Bauman89401822014-05-06 15:04:28 -04002284 }
2285
John Bauman19bac1e2014-05-06 15:23:49 -04002286 Type *Short4::getType()
John Bauman89401822014-05-06 15:04:28 -04002287 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002288 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002289 }
2290
John Bauman19bac1e2014-05-06 15:23:49 -04002291 UShort4::UShort4(RValue<Float4> cast, bool saturate)
John Bauman89401822014-05-06 15:04:28 -04002292 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002293 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002294 if(saturate)
2295 {
Logan Chiena8385ed2018-09-26 19:22:54 +08002296#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002297 if(CPUID::supportsSSE4_1())
2298 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002299 Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation
Nicolas Capens33438a62017-09-27 11:47:35 -04002300 *this = As<Short4>(PackUnsigned(int4, int4));
John Bauman89401822014-05-06 15:04:28 -04002301 }
2302 else
Logan Chiena8385ed2018-09-26 19:22:54 +08002303#endif
John Bauman89401822014-05-06 15:04:28 -04002304 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002305 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
John Bauman89401822014-05-06 15:04:28 -04002306 }
2307 }
2308 else
2309 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002310 *this = Short4(Int4(cast));
John Bauman89401822014-05-06 15:04:28 -04002311 }
2312 }
2313
John Bauman19bac1e2014-05-06 15:23:49 -04002314 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002315 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002316 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002317#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002318 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2319
2320 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002321#else
2322 return As<UShort4>(V(lowerVectorShl(V(lhs.value), rhs)));
2323#endif
John Bauman89401822014-05-06 15:04:28 -04002324 }
2325
John Bauman19bac1e2014-05-06 15:23:49 -04002326 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002327 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002328 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002329#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002330 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
2331
2332 return x86::psrlw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002333#else
2334 return As<UShort4>(V(lowerVectorLShr(V(lhs.value), rhs)));
2335#endif
John Bauman89401822014-05-06 15:04:28 -04002336 }
2337
John Bauman19bac1e2014-05-06 15:23:49 -04002338 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002339 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002340 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04002341 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002342 }
2343
John Bauman19bac1e2014-05-06 15:23:49 -04002344 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002345 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002346 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04002347 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002348 }
2349
John Bauman19bac1e2014-05-06 15:23:49 -04002350 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002351 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002352 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002353#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002354 return x86::paddusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002355#else
2356 return As<UShort4>(V(lowerPUADDSAT(V(x.value), V(y.value))));
2357#endif
John Bauman89401822014-05-06 15:04:28 -04002358 }
2359
John Bauman19bac1e2014-05-06 15:23:49 -04002360 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002361 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002362 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002363#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002364 return x86::psubusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002365#else
2366 return As<UShort4>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
2367#endif
John Bauman89401822014-05-06 15:04:28 -04002368 }
2369
John Bauman19bac1e2014-05-06 15:23:49 -04002370 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002371 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002372 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002373#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002374 return x86::pmulhuw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002375#else
2376 return As<UShort4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2377#endif
John Bauman89401822014-05-06 15:04:28 -04002378 }
2379
John Bauman19bac1e2014-05-06 15:23:49 -04002380 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002381 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002382 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002383#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002384 return x86::pavgw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002385#else
2386 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
2387#endif
John Bauman89401822014-05-06 15:04:28 -04002388 }
2389
John Bauman19bac1e2014-05-06 15:23:49 -04002390 Type *UShort4::getType()
John Bauman89401822014-05-06 15:04:28 -04002391 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002392 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002393 }
2394
John Bauman19bac1e2014-05-06 15:23:49 -04002395 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002396 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002397 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002398#if defined(__i386__) || defined(__x86_64__)
2399 return x86::psllw(lhs, rhs);
2400#else
2401 return As<Short8>(V(lowerVectorShl(V(lhs.value), rhs)));
2402#endif
John Bauman89401822014-05-06 15:04:28 -04002403 }
2404
John Bauman19bac1e2014-05-06 15:23:49 -04002405 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002406 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002407 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002408#if defined(__i386__) || defined(__x86_64__)
2409 return x86::psraw(lhs, rhs);
2410#else
2411 return As<Short8>(V(lowerVectorAShr(V(lhs.value), rhs)));
2412#endif
John Bauman89401822014-05-06 15:04:28 -04002413 }
2414
John Bauman19bac1e2014-05-06 15:23:49 -04002415 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002416 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002417 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002418#if defined(__i386__) || defined(__x86_64__)
2419 return x86::pmaddwd(x, y);
2420#else
2421 return As<Int4>(V(lowerMulAdd(V(x.value), V(y.value))));
2422#endif
John Bauman89401822014-05-06 15:04:28 -04002423 }
2424
John Bauman19bac1e2014-05-06 15:23:49 -04002425 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002426 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002427 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002428#if defined(__i386__) || defined(__x86_64__)
2429 return x86::pmulhw(x, y);
2430#else
2431 return As<Short8>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2432#endif
John Bauman89401822014-05-06 15:04:28 -04002433 }
2434
John Bauman19bac1e2014-05-06 15:23:49 -04002435 Type *Short8::getType()
John Bauman89401822014-05-06 15:04:28 -04002436 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002437 return T(llvm::VectorType::get(T(Short::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002438 }
2439
John Bauman19bac1e2014-05-06 15:23:49 -04002440 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002441 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002442 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002443#if defined(__i386__) || defined(__x86_64__)
2444 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));
2445#else
2446 return As<UShort8>(V(lowerVectorShl(V(lhs.value), rhs)));
2447#endif
John Bauman89401822014-05-06 15:04:28 -04002448 }
2449
John Bauman19bac1e2014-05-06 15:23:49 -04002450 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002451 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002452 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002453#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002454 return x86::psrlw(lhs, rhs); // FIXME: Fallback required
Logan Chiene3191012018-08-24 22:01:50 +08002455#else
2456 return As<UShort8>(V(lowerVectorLShr(V(lhs.value), rhs)));
2457#endif
John Bauman89401822014-05-06 15:04:28 -04002458 }
2459
John Bauman19bac1e2014-05-06 15:23:49 -04002460 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
John Bauman89401822014-05-06 15:04:28 -04002461 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002462 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capense89cd582016-09-30 14:23:47 -04002463 int pshufb[16] =
2464 {
2465 select0 + 0,
2466 select0 + 1,
2467 select1 + 0,
2468 select1 + 1,
2469 select2 + 0,
2470 select2 + 1,
2471 select3 + 0,
2472 select3 + 1,
2473 select4 + 0,
2474 select4 + 1,
2475 select5 + 0,
2476 select5 + 1,
2477 select6 + 0,
2478 select6 + 1,
2479 select7 + 0,
2480 select7 + 1,
2481 };
John Bauman89401822014-05-06 15:04:28 -04002482
2483 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
Nicolas Capense89cd582016-09-30 14:23:47 -04002484 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
John Bauman89401822014-05-06 15:04:28 -04002485 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
2486
2487 return RValue<UShort8>(short8);
2488 }
2489
John Bauman19bac1e2014-05-06 15:23:49 -04002490 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04002491 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002492 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002493#if defined(__i386__) || defined(__x86_64__)
2494 return x86::pmulhuw(x, y);
2495#else
2496 return As<UShort8>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2497#endif
John Bauman89401822014-05-06 15:04:28 -04002498 }
2499
John Bauman19bac1e2014-05-06 15:23:49 -04002500 Type *UShort8::getType()
John Bauman89401822014-05-06 15:04:28 -04002501 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002502 return T(llvm::VectorType::get(T(UShort::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002503 }
2504
Nicolas Capens96d4e092016-11-18 14:22:38 -05002505 RValue<Int> operator++(Int &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002506 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002507 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002508 RValue<Int> res = val;
2509
Logan Chien191b3052018-08-31 16:57:15 +08002510 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002511 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002512
2513 return res;
2514 }
2515
Nicolas Capens96d4e092016-11-18 14:22:38 -05002516 const Int &operator++(Int &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002517 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002518 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002519 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002520 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002521
2522 return val;
2523 }
2524
Nicolas Capens96d4e092016-11-18 14:22:38 -05002525 RValue<Int> operator--(Int &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002526 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002527 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002528 RValue<Int> res = val;
2529
Logan Chien191b3052018-08-31 16:57:15 +08002530 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002531 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002532
2533 return res;
2534 }
2535
Nicolas Capens96d4e092016-11-18 14:22:38 -05002536 const Int &operator--(Int &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002537 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002538 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002539 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002540 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002541
2542 return val;
2543 }
2544
John Bauman19bac1e2014-05-06 15:23:49 -04002545 RValue<Int> RoundInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002546 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002547 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002548#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002549 return x86::cvtss2si(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002550#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002551 return RValue<Int>(V(lowerRoundInt(V(cast.value), T(Int::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002552#endif
John Bauman89401822014-05-06 15:04:28 -04002553 }
2554
John Bauman19bac1e2014-05-06 15:23:49 -04002555 Type *Int::getType()
John Bauman89401822014-05-06 15:04:28 -04002556 {
Nicolas Capensac230122016-09-20 14:30:06 -04002557 return T(llvm::Type::getInt32Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002558 }
2559
John Bauman19bac1e2014-05-06 15:23:49 -04002560 Type *Long::getType()
John Bauman89401822014-05-06 15:04:28 -04002561 {
Nicolas Capensac230122016-09-20 14:30:06 -04002562 return T(llvm::Type::getInt64Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002563 }
2564
John Bauman19bac1e2014-05-06 15:23:49 -04002565 UInt::UInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002566 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002567 RR_DEBUG_INFO_UPDATE_LOC();
Alexis Hetu764d1422016-09-28 08:44:22 -04002568 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2569 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
John Bauman89401822014-05-06 15:04:28 -04002570
Alexis Hetu764d1422016-09-28 08:44:22 -04002571 // Smallest positive value representable in UInt, but not in Int
2572 const unsigned int ustart = 0x80000000u;
2573 const float ustartf = float(ustart);
2574
2575 // If the value is negative, store 0, otherwise store the result of the conversion
2576 storeValue((~(As<Int>(cast) >> 31) &
2577 // Check if the value can be represented as an Int
2578 IfThenElse(cast >= ustartf,
2579 // If the value is too large, subtract ustart and re-add it after conversion.
2580 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
2581 // Otherwise, just convert normally
2582 Int(cast))).value);
John Bauman89401822014-05-06 15:04:28 -04002583 }
2584
Nicolas Capens96d4e092016-11-18 14:22:38 -05002585 RValue<UInt> operator++(UInt &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002586 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002587 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002588 RValue<UInt> res = val;
2589
Logan Chien191b3052018-08-31 16:57:15 +08002590 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002591 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002592
2593 return res;
2594 }
2595
Nicolas Capens96d4e092016-11-18 14:22:38 -05002596 const UInt &operator++(UInt &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002597 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002598 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002599 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002600 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002601
2602 return val;
2603 }
2604
Nicolas Capens96d4e092016-11-18 14:22:38 -05002605 RValue<UInt> operator--(UInt &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002606 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002607 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002608 RValue<UInt> res = val;
2609
Logan Chien191b3052018-08-31 16:57:15 +08002610 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002611 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002612
2613 return res;
2614 }
2615
Nicolas Capens96d4e092016-11-18 14:22:38 -05002616 const UInt &operator--(UInt &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002617 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002618 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002619 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002620 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002621
2622 return val;
2623 }
2624
John Bauman19bac1e2014-05-06 15:23:49 -04002625// RValue<UInt> RoundUInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002626// {
Logan Chiene3191012018-08-24 22:01:50 +08002627//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002628// return x86::cvtss2si(val); // FIXME: Unsigned
Logan Chiene3191012018-08-24 22:01:50 +08002629//#else
2630// return IfThenElse(cast > 0.0f, Int(cast + 0.5f), Int(cast - 0.5f));
2631//#endif
John Bauman89401822014-05-06 15:04:28 -04002632// }
2633
John Bauman19bac1e2014-05-06 15:23:49 -04002634 Type *UInt::getType()
John Bauman89401822014-05-06 15:04:28 -04002635 {
Nicolas Capensac230122016-09-20 14:30:06 -04002636 return T(llvm::Type::getInt32Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002637 }
2638
John Bauman19bac1e2014-05-06 15:23:49 -04002639// Int2::Int2(RValue<Int> cast)
2640// {
John Bauman19bac1e2014-05-06 15:23:49 -04002641// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2642// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
John Bauman66b8ab22014-05-06 15:57:45 -04002643//
Nicolas Capense89cd582016-09-30 14:23:47 -04002644// int shuffle[2] = {0, 0};
2645// Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
John Bauman19bac1e2014-05-06 15:23:49 -04002646//
John Bauman66b8ab22014-05-06 15:57:45 -04002647// storeValue(replicate);
John Bauman19bac1e2014-05-06 15:23:49 -04002648// }
John Bauman89401822014-05-06 15:04:28 -04002649
John Bauman19bac1e2014-05-06 15:23:49 -04002650 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002651 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002652 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002653#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002654 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
2655
2656 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002657#else
2658 return As<Int2>(V(lowerVectorShl(V(lhs.value), rhs)));
2659#endif
John Bauman89401822014-05-06 15:04:28 -04002660 }
2661
John Bauman19bac1e2014-05-06 15:23:49 -04002662 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002663 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002664 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002665#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002666 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
2667
2668 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002669#else
2670 return As<Int2>(V(lowerVectorAShr(V(lhs.value), rhs)));
2671#endif
John Bauman89401822014-05-06 15:04:28 -04002672 }
2673
John Bauman19bac1e2014-05-06 15:23:49 -04002674 Type *Int2::getType()
John Bauman89401822014-05-06 15:04:28 -04002675 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002676 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002677 }
2678
John Bauman19bac1e2014-05-06 15:23:49 -04002679 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002680 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002681 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002682#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002683 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
2684
2685 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002686#else
2687 return As<UInt2>(V(lowerVectorShl(V(lhs.value), rhs)));
2688#endif
John Bauman89401822014-05-06 15:04:28 -04002689 }
2690
John Bauman19bac1e2014-05-06 15:23:49 -04002691 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002692 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002693 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002694#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002695 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
2696
2697 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002698#else
2699 return As<UInt2>(V(lowerVectorLShr(V(lhs.value), rhs)));
2700#endif
John Bauman89401822014-05-06 15:04:28 -04002701 }
2702
John Bauman19bac1e2014-05-06 15:23:49 -04002703 Type *UInt2::getType()
John Bauman89401822014-05-06 15:04:28 -04002704 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002705 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002706 }
2707
Nicolas Capenscb986762017-01-20 11:34:37 -05002708 Int4::Int4(RValue<Byte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002709 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002710 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002711#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002712 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002713 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002714 *this = x86::pmovzxbd(As<Byte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002715 }
2716 else
Logan Chiene3191012018-08-24 22:01:50 +08002717#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002718 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002719 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
Nicolas Capens01a97962017-07-28 17:30:51 -04002720 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
Logan Chien191b3052018-08-31 16:57:15 +08002721 Value *b = Nucleus::createShuffleVector(a, Nucleus::createNullValue(Byte16::getType()), swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002722
Nicolas Capense89cd582016-09-30 14:23:47 -04002723 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04002724 Value *c = Nucleus::createBitCast(b, Short8::getType());
Logan Chien191b3052018-08-31 16:57:15 +08002725 Value *d = Nucleus::createShuffleVector(c, Nucleus::createNullValue(Short8::getType()), swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002726
Nicolas Capens01a97962017-07-28 17:30:51 -04002727 *this = As<Int4>(d);
2728 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002729 }
2730
Nicolas Capenscb986762017-01-20 11:34:37 -05002731 Int4::Int4(RValue<SByte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002732 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002733 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002734#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002735 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002736 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002737 *this = x86::pmovsxbd(As<SByte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002738 }
2739 else
Logan Chiene3191012018-08-24 22:01:50 +08002740#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002741 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002742 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
2743 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
2744 Value *b = Nucleus::createShuffleVector(a, a, swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002745
Nicolas Capense89cd582016-09-30 14:23:47 -04002746 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04002747 Value *c = Nucleus::createBitCast(b, Short8::getType());
2748 Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002749
Nicolas Capens01a97962017-07-28 17:30:51 -04002750 *this = As<Int4>(d) >> 24;
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002751 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002752 }
2753
Nicolas Capenscb986762017-01-20 11:34:37 -05002754 Int4::Int4(RValue<Short4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002755 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002756 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002757#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002758 if(CPUID::supportsSSE4_1())
2759 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002760 *this = x86::pmovsxwd(As<Short8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002761 }
2762 else
Logan Chiene3191012018-08-24 22:01:50 +08002763#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002764 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002765 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04002766 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
2767 *this = As<Int4>(c) >> 16;
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002768 }
2769 }
2770
Nicolas Capenscb986762017-01-20 11:34:37 -05002771 Int4::Int4(RValue<UShort4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002772 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002773 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002774#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002775 if(CPUID::supportsSSE4_1())
2776 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002777 *this = x86::pmovzxwd(As<UShort8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002778 }
2779 else
Logan Chiene3191012018-08-24 22:01:50 +08002780#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002781 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002782 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04002783 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
2784 *this = As<Int4>(c);
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002785 }
2786 }
2787
Nicolas Capenscb986762017-01-20 11:34:37 -05002788 Int4::Int4(RValue<Int> rhs) : XYZW(this)
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002789 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002790 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002791 Value *vector = loadValue();
2792 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
2793
Nicolas Capense89cd582016-09-30 14:23:47 -04002794 int swizzle[4] = {0, 0, 0, 0};
2795 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002796
2797 storeValue(replicate);
2798 }
2799
John Bauman19bac1e2014-05-06 15:23:49 -04002800 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002801 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002802 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002803#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002804 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002805#else
2806 return As<Int4>(V(lowerVectorShl(V(lhs.value), rhs)));
2807#endif
John Bauman89401822014-05-06 15:04:28 -04002808 }
2809
John Bauman19bac1e2014-05-06 15:23:49 -04002810 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002811 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002812 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002813#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002814 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002815#else
2816 return As<Int4>(V(lowerVectorAShr(V(lhs.value), rhs)));
2817#endif
John Bauman89401822014-05-06 15:04:28 -04002818 }
2819
John Bauman19bac1e2014-05-06 15:23:49 -04002820 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
2821 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002822 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002823 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
Alexis Hetufb603992016-04-26 11:50:40 -04002824 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2825 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
2826 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002827 }
2828
2829 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
2830 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002831 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002832 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2833 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2834 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
2835 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002836 }
2837
2838 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
2839 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002840 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002841 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2842 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2843 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
2844 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002845 }
2846
2847 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
2848 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002849 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002850 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2851 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2852 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
2853 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002854 }
2855
2856 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
2857 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002858 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002859 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2860 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2861 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
2862 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002863 }
2864
2865 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
2866 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002867 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002868 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2869 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2870 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
2871 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002872 }
2873
2874 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
2875 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002876 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002877#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002878 if(CPUID::supportsSSE4_1())
2879 {
2880 return x86::pmaxsd(x, y);
2881 }
2882 else
Logan Chiene3191012018-08-24 22:01:50 +08002883#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002884 {
2885 RValue<Int4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002886 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04002887 }
2888 }
2889
2890 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
2891 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002892 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002893#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002894 if(CPUID::supportsSSE4_1())
2895 {
2896 return x86::pminsd(x, y);
2897 }
2898 else
Logan Chiene3191012018-08-24 22:01:50 +08002899#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002900 {
2901 RValue<Int4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002902 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04002903 }
2904 }
2905
2906 RValue<Int4> RoundInt(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04002907 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002908 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002909#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002910 return x86::cvtps2dq(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002911#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002912 return As<Int4>(V(lowerRoundInt(V(cast.value), T(Int4::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002913#endif
John Bauman89401822014-05-06 15:04:28 -04002914 }
2915
Chris Forbese86b6dc2019-03-01 09:08:47 -08002916 RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2917 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002918 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbese86b6dc2019-03-01 09:08:47 -08002919 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2920 return As<Int4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2921 }
2922
2923 RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2924 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002925 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbese86b6dc2019-03-01 09:08:47 -08002926 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2927 return As<UInt4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2928 }
2929
Nicolas Capens33438a62017-09-27 11:47:35 -04002930 RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04002931 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002932 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002933#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002934 return x86::packssdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002935#else
2936 return As<Short8>(V(lowerPack(V(x.value), V(y.value), true)));
2937#endif
John Bauman89401822014-05-06 15:04:28 -04002938 }
2939
Nicolas Capens33438a62017-09-27 11:47:35 -04002940 RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
2941 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002942 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002943#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04002944 return x86::packusdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002945#else
2946 return As<UShort8>(V(lowerPack(V(x.value), V(y.value), false)));
2947#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04002948 }
2949
John Bauman19bac1e2014-05-06 15:23:49 -04002950 RValue<Int> SignMask(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04002951 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002952 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002953#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002954 return x86::movmskps(As<Float4>(x));
Logan Chiene3191012018-08-24 22:01:50 +08002955#else
2956 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2957#endif
John Bauman89401822014-05-06 15:04:28 -04002958 }
2959
John Bauman19bac1e2014-05-06 15:23:49 -04002960 Type *Int4::getType()
John Bauman89401822014-05-06 15:04:28 -04002961 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002962 return T(llvm::VectorType::get(T(Int::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04002963 }
2964
Nicolas Capenscb986762017-01-20 11:34:37 -05002965 UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04002966 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002967 RR_DEBUG_INFO_UPDATE_LOC();
Alexis Hetu764d1422016-09-28 08:44:22 -04002968 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2969 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
John Bauman89401822014-05-06 15:04:28 -04002970
Alexis Hetu764d1422016-09-28 08:44:22 -04002971 // Smallest positive value representable in UInt, but not in Int
2972 const unsigned int ustart = 0x80000000u;
2973 const float ustartf = float(ustart);
2974
2975 // Check if the value can be represented as an Int
2976 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
2977 // If the value is too large, subtract ustart and re-add it after conversion.
2978 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
2979 // Otherwise, just convert normally
2980 (~uiValue & Int4(cast));
2981 // If the value is negative, store 0, otherwise store the result of the conversion
2982 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
John Bauman89401822014-05-06 15:04:28 -04002983 }
2984
Ben Clayton88816fa2019-05-15 17:08:14 +01002985 UInt4::UInt4(RValue<UInt> rhs) : XYZW(this)
2986 {
2987 RR_DEBUG_INFO_UPDATE_LOC();
2988 Value *vector = loadValue();
2989 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
2990
2991 int swizzle[4] = {0, 0, 0, 0};
2992 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
2993
2994 storeValue(replicate);
2995 }
2996
John Bauman19bac1e2014-05-06 15:23:49 -04002997 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002998 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002999 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003000#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003001 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08003002#else
3003 return As<UInt4>(V(lowerVectorShl(V(lhs.value), rhs)));
3004#endif
John Bauman89401822014-05-06 15:04:28 -04003005 }
3006
John Bauman19bac1e2014-05-06 15:23:49 -04003007 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04003008 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003009 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003010#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003011 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08003012#else
3013 return As<UInt4>(V(lowerVectorLShr(V(lhs.value), rhs)));
3014#endif
John Bauman89401822014-05-06 15:04:28 -04003015 }
3016
John Bauman19bac1e2014-05-06 15:23:49 -04003017 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3018 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003019 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04003020 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
Alexis Hetufb603992016-04-26 11:50:40 -04003021 // Restore the following line when LLVM is updated to a version where this issue is fixed.
3022 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
3023 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04003024 }
3025
3026 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3027 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003028 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04003029 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
3030 }
3031
3032 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3033 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003034 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04003035 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
3036 // Restore the following line when LLVM is updated to a version where this issue is fixed.
3037 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
3038 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04003039 }
3040
3041 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3042 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003043 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04003044 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
3045 }
3046
3047 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3048 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003049 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04003050 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
3051 // Restore the following line when LLVM is updated to a version where this issue is fixed.
3052 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
3053 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04003054 }
3055
3056 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3057 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003058 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04003059 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
3060 }
3061
3062 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3063 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003064 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003065#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003066 if(CPUID::supportsSSE4_1())
3067 {
3068 return x86::pmaxud(x, y);
3069 }
3070 else
Logan Chiene3191012018-08-24 22:01:50 +08003071#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003072 {
3073 RValue<UInt4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07003074 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04003075 }
3076 }
3077
3078 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3079 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003080 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003081#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003082 if(CPUID::supportsSSE4_1())
3083 {
3084 return x86::pminud(x, y);
3085 }
3086 else
Logan Chiene3191012018-08-24 22:01:50 +08003087#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003088 {
3089 RValue<UInt4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07003090 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04003091 }
3092 }
3093
John Bauman19bac1e2014-05-06 15:23:49 -04003094 Type *UInt4::getType()
John Bauman89401822014-05-06 15:04:28 -04003095 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003096 return T(llvm::VectorType::get(T(UInt::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003097 }
3098
Alexis Hetu734e2572018-12-20 14:00:49 -05003099 Type *Half::getType()
3100 {
3101 return T(llvm::Type::getInt16Ty(*::context));
3102 }
3103
Nicolas Capens05b3d662016-02-25 23:58:33 -05003104 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04003105 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003106 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003107#if defined(__i386__) || defined(__x86_64__)
3108 if(exactAtPow2)
3109 {
3110 // rcpss uses a piecewise-linear approximation which minimizes the relative error
3111 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
3112 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
3113 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003114 return x86::rcpss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003115#else
3116 return As<Float>(V(lowerRCP(V(x.value))));
3117#endif
John Bauman89401822014-05-06 15:04:28 -04003118 }
John Bauman66b8ab22014-05-06 15:57:45 -04003119
John Bauman19bac1e2014-05-06 15:23:49 -04003120 RValue<Float> RcpSqrt_pp(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003121 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003122 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003123#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003124 return x86::rsqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003125#else
3126 return As<Float>(V(lowerRSQRT(V(x.value))));
3127#endif
John Bauman89401822014-05-06 15:04:28 -04003128 }
3129
John Bauman19bac1e2014-05-06 15:23:49 -04003130 RValue<Float> Sqrt(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003131 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003132 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003133#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003134 return x86::sqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003135#else
3136 return As<Float>(V(lowerSQRT(V(x.value))));
3137#endif
John Bauman89401822014-05-06 15:04:28 -04003138 }
3139
John Bauman19bac1e2014-05-06 15:23:49 -04003140 RValue<Float> Round(RValue<Float> x)
3141 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003142 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003143#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003144 if(CPUID::supportsSSE4_1())
3145 {
3146 return x86::roundss(x, 0);
3147 }
3148 else
3149 {
3150 return Float4(Round(Float4(x))).x;
3151 }
Logan Chien83fc07a2018-09-26 22:14:00 +08003152#else
3153 return RValue<Float>(V(lowerRound(V(x.value))));
3154#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003155 }
3156
3157 RValue<Float> Trunc(RValue<Float> x)
3158 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003159 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003160#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003161 if(CPUID::supportsSSE4_1())
3162 {
3163 return x86::roundss(x, 3);
3164 }
3165 else
3166 {
3167 return Float(Int(x)); // Rounded toward zero
3168 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08003169#else
3170 return RValue<Float>(V(lowerTrunc(V(x.value))));
3171#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003172 }
3173
3174 RValue<Float> Frac(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003175 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003176 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003177#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003178 if(CPUID::supportsSSE4_1())
3179 {
3180 return x - x86::floorss(x);
3181 }
3182 else
3183 {
John Bauman19bac1e2014-05-06 15:23:49 -04003184 return Float4(Frac(Float4(x))).x;
John Bauman89401822014-05-06 15:04:28 -04003185 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08003186#else
3187 // x - floor(x) can be 1.0 for very small negative x.
3188 // Clamp against the value just below 1.0.
3189 return Min(x - Floor(x), As<Float>(Int(0x3F7FFFFF)));
3190#endif
John Bauman89401822014-05-06 15:04:28 -04003191 }
3192
John Bauman19bac1e2014-05-06 15:23:49 -04003193 RValue<Float> Floor(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003194 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003195 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003196#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003197 if(CPUID::supportsSSE4_1())
3198 {
3199 return x86::floorss(x);
3200 }
3201 else
3202 {
3203 return Float4(Floor(Float4(x))).x;
3204 }
Logan Chien40a60052018-09-26 19:03:53 +08003205#else
3206 return RValue<Float>(V(lowerFloor(V(x.value))));
3207#endif
John Bauman89401822014-05-06 15:04:28 -04003208 }
3209
John Bauman19bac1e2014-05-06 15:23:49 -04003210 RValue<Float> Ceil(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003211 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003212 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003213#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003214 if(CPUID::supportsSSE4_1())
3215 {
3216 return x86::ceilss(x);
3217 }
3218 else
Logan Chiene3191012018-08-24 22:01:50 +08003219#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003220 {
3221 return Float4(Ceil(Float4(x))).x;
3222 }
John Bauman89401822014-05-06 15:04:28 -04003223 }
3224
John Bauman19bac1e2014-05-06 15:23:49 -04003225 Type *Float::getType()
John Bauman89401822014-05-06 15:04:28 -04003226 {
Nicolas Capensac230122016-09-20 14:30:06 -04003227 return T(llvm::Type::getFloatTy(*::context));
John Bauman89401822014-05-06 15:04:28 -04003228 }
3229
John Bauman19bac1e2014-05-06 15:23:49 -04003230 Type *Float2::getType()
John Bauman89401822014-05-06 15:04:28 -04003231 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003232 return T(Type_v2f32);
John Bauman89401822014-05-06 15:04:28 -04003233 }
3234
Nicolas Capenscb986762017-01-20 11:34:37 -05003235 Float4::Float4(RValue<Float> rhs) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04003236 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003237 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04003238 Value *vector = loadValue();
John Bauman89401822014-05-06 15:04:28 -04003239 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
3240
Nicolas Capense89cd582016-09-30 14:23:47 -04003241 int swizzle[4] = {0, 0, 0, 0};
3242 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
John Bauman89401822014-05-06 15:04:28 -04003243
John Bauman66b8ab22014-05-06 15:57:45 -04003244 storeValue(replicate);
John Bauman89401822014-05-06 15:04:28 -04003245 }
3246
John Bauman19bac1e2014-05-06 15:23:49 -04003247 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003248 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003249 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003250#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003251 return x86::maxps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003252#else
3253 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OGT)));
3254#endif
John Bauman89401822014-05-06 15:04:28 -04003255 }
3256
John Bauman19bac1e2014-05-06 15:23:49 -04003257 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003258 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003259 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003260#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003261 return x86::minps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003262#else
3263 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OLT)));
3264#endif
John Bauman89401822014-05-06 15:04:28 -04003265 }
3266
Nicolas Capens05b3d662016-02-25 23:58:33 -05003267 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04003268 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003269 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003270#if defined(__i386__) || defined(__x86_64__)
3271 if(exactAtPow2)
3272 {
3273 // rcpps uses a piecewise-linear approximation which minimizes the relative error
3274 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
3275 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
3276 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003277 return x86::rcpps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003278#else
3279 return As<Float4>(V(lowerRCP(V(x.value))));
3280#endif
John Bauman89401822014-05-06 15:04:28 -04003281 }
John Bauman66b8ab22014-05-06 15:57:45 -04003282
John Bauman19bac1e2014-05-06 15:23:49 -04003283 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003284 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003285 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003286#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003287 return x86::rsqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003288#else
3289 return As<Float4>(V(lowerRSQRT(V(x.value))));
3290#endif
John Bauman89401822014-05-06 15:04:28 -04003291 }
3292
John Bauman19bac1e2014-05-06 15:23:49 -04003293 RValue<Float4> Sqrt(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003294 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003295 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003296#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003297 return x86::sqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003298#else
3299 return As<Float4>(V(lowerSQRT(V(x.value))));
3300#endif
John Bauman89401822014-05-06 15:04:28 -04003301 }
3302
John Bauman19bac1e2014-05-06 15:23:49 -04003303 RValue<Int> SignMask(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003304 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003305 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003306#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003307 return x86::movmskps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003308#else
3309 return As<Int>(V(lowerFPSignMask(V(x.value), T(Int::getType()))));
3310#endif
John Bauman89401822014-05-06 15:04:28 -04003311 }
3312
John Bauman19bac1e2014-05-06 15:23:49 -04003313 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003314 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003315 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003316 // return As<Int4>(x86::cmpeqps(x, y));
3317 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
3318 }
3319
John Bauman19bac1e2014-05-06 15:23:49 -04003320 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003321 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003322 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003323 // return As<Int4>(x86::cmpltps(x, y));
3324 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
3325 }
3326
John Bauman19bac1e2014-05-06 15:23:49 -04003327 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003328 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003329 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003330 // return As<Int4>(x86::cmpleps(x, y));
3331 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
3332 }
3333
John Bauman19bac1e2014-05-06 15:23:49 -04003334 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003335 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003336 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003337 // return As<Int4>(x86::cmpneqps(x, y));
3338 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
3339 }
3340
John Bauman19bac1e2014-05-06 15:23:49 -04003341 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003342 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003343 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003344 // return As<Int4>(x86::cmpnltps(x, y));
3345 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
3346 }
3347
John Bauman19bac1e2014-05-06 15:23:49 -04003348 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003349 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003350 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003351 // return As<Int4>(x86::cmpnleps(x, y));
3352 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
3353 }
3354
Ben Claytonec1aeb82019-03-04 19:33:27 +00003355 RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3356 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003357 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003358 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUEQ(x.value, y.value), Int4::getType()));
3359 }
3360
3361 RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3362 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003363 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003364 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULT(x.value, y.value), Int4::getType()));
3365 }
3366
3367 RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3368 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003369 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003370 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULE(x.value, y.value), Int4::getType()));
3371 }
3372
3373 RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3374 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003375 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003376 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUNE(x.value, y.value), Int4::getType()));
3377 }
3378
3379 RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3380 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003381 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003382 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGE(x.value, y.value), Int4::getType()));
3383 }
3384
3385 RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3386 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003387 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003388 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGT(x.value, y.value), Int4::getType()));
3389 }
3390
John Bauman19bac1e2014-05-06 15:23:49 -04003391 RValue<Float4> Round(RValue<Float4> x)
3392 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003393 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003394#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003395 if(CPUID::supportsSSE4_1())
3396 {
3397 return x86::roundps(x, 0);
3398 }
3399 else
3400 {
3401 return Float4(RoundInt(x));
3402 }
Logan Chien83fc07a2018-09-26 22:14:00 +08003403#else
3404 return RValue<Float4>(V(lowerRound(V(x.value))));
3405#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003406 }
3407
3408 RValue<Float4> Trunc(RValue<Float4> x)
3409 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003410 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003411#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003412 if(CPUID::supportsSSE4_1())
3413 {
3414 return x86::roundps(x, 3);
3415 }
3416 else
3417 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003418 return Float4(Int4(x));
John Bauman19bac1e2014-05-06 15:23:49 -04003419 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08003420#else
3421 return RValue<Float4>(V(lowerTrunc(V(x.value))));
3422#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003423 }
3424
3425 RValue<Float4> Frac(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003426 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003427 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb9230422017-07-17 10:27:33 -04003428 Float4 frc;
3429
Logan Chien40a60052018-09-26 19:03:53 +08003430#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003431 if(CPUID::supportsSSE4_1())
3432 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003433 frc = x - Floor(x);
John Bauman89401822014-05-06 15:04:28 -04003434 }
3435 else
3436 {
Nicolas Capensb9230422017-07-17 10:27:33 -04003437 frc = x - Float4(Int4(x)); // Signed fractional part.
John Bauman89401822014-05-06 15:04:28 -04003438
Nicolas Capensb9230422017-07-17 10:27:33 -04003439 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f))); // Add 1.0 if negative.
John Bauman89401822014-05-06 15:04:28 -04003440 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08003441#else
3442 frc = x - Floor(x);
3443#endif
Nicolas Capensb9230422017-07-17 10:27:33 -04003444
3445 // x - floor(x) can be 1.0 for very small negative x.
3446 // Clamp against the value just below 1.0.
3447 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
John Bauman89401822014-05-06 15:04:28 -04003448 }
3449
John Bauman19bac1e2014-05-06 15:23:49 -04003450 RValue<Float4> Floor(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003451 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003452 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003453#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003454 if(CPUID::supportsSSE4_1())
3455 {
3456 return x86::floorps(x);
3457 }
3458 else
3459 {
John Bauman19bac1e2014-05-06 15:23:49 -04003460 return x - Frac(x);
John Bauman89401822014-05-06 15:04:28 -04003461 }
Logan Chien40a60052018-09-26 19:03:53 +08003462#else
3463 return RValue<Float4>(V(lowerFloor(V(x.value))));
3464#endif
John Bauman89401822014-05-06 15:04:28 -04003465 }
3466
John Bauman19bac1e2014-05-06 15:23:49 -04003467 RValue<Float4> Ceil(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003468 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003469 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003470#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003471 if(CPUID::supportsSSE4_1())
3472 {
3473 return x86::ceilps(x);
3474 }
3475 else
Logan Chiene3191012018-08-24 22:01:50 +08003476#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003477 {
3478 return -Floor(-x);
3479 }
John Bauman89401822014-05-06 15:04:28 -04003480 }
3481
Ben Claytona2c8b772019-04-09 13:42:36 -04003482 RValue<Float4> Sin(RValue<Float4> v)
3483 {
3484 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sin, { V(v.value)->getType() } );
3485 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
3486 }
3487
Ben Clayton1b6f8c72019-04-09 13:47:43 -04003488 RValue<Float4> Cos(RValue<Float4> v)
3489 {
3490 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::cos, { V(v.value)->getType() } );
3491 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
3492 }
3493
Ben Clayton14740062019-04-09 13:48:41 -04003494 RValue<Float4> Tan(RValue<Float4> v)
3495 {
3496 return Sin(v) / Cos(v);
3497 }
3498
Ben Claytoneafae472019-04-09 14:22:38 -04003499 static RValue<Float4> TransformFloat4PerElement(RValue<Float4> v, const char* name)
Ben Claytonf9350d72019-04-09 14:19:02 -04003500 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003501 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), ::llvm::ArrayRef<llvm::Type*>(T(Float::getType())), false);
Ben Claytoneafae472019-04-09 14:22:38 -04003502 auto func = ::module->getOrInsertFunction(name, funcTy);
Ben Claytonf9350d72019-04-09 14:19:02 -04003503 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3504 for (uint64_t i = 0; i < 4; i++)
3505 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003506 auto el = ::builder->CreateCall(func, V(Nucleus::createExtractElement(v.value, Float::getType(), i)));
3507 out = V(Nucleus::createInsertElement(V(out), V(el), i));
Ben Claytonf9350d72019-04-09 14:19:02 -04003508 }
3509 return RValue<Float4>(V(out));
3510 }
3511
Ben Claytoneafae472019-04-09 14:22:38 -04003512 RValue<Float4> Asin(RValue<Float4> v)
3513 {
3514 return TransformFloat4PerElement(v, "asinf");
3515 }
3516
3517 RValue<Float4> Acos(RValue<Float4> v)
3518 {
3519 return TransformFloat4PerElement(v, "acosf");
3520 }
3521
Ben Clayton749b4e02019-04-09 14:27:43 -04003522 RValue<Float4> Atan(RValue<Float4> v)
3523 {
3524 return TransformFloat4PerElement(v, "atanf");
3525 }
3526
Ben Claytond9636972019-04-09 15:09:54 -04003527 RValue<Float4> Sinh(RValue<Float4> v)
3528 {
3529 return TransformFloat4PerElement(v, "sinhf");
3530 }
3531
Ben Clayton900ea2c2019-04-09 15:25:36 -04003532 RValue<Float4> Cosh(RValue<Float4> v)
3533 {
3534 return TransformFloat4PerElement(v, "coshf");
3535 }
3536
Ben Clayton3928bd92019-04-09 15:27:41 -04003537 RValue<Float4> Tanh(RValue<Float4> v)
3538 {
3539 return TransformFloat4PerElement(v, "tanhf");
3540 }
3541
Ben Claytonf6d77ab2019-04-09 15:30:04 -04003542 RValue<Float4> Asinh(RValue<Float4> v)
3543 {
3544 return TransformFloat4PerElement(v, "asinhf");
3545 }
3546
Ben Clayton28ebcb02019-04-09 15:33:38 -04003547 RValue<Float4> Acosh(RValue<Float4> v)
3548 {
3549 return TransformFloat4PerElement(v, "acoshf");
3550 }
3551
Ben Claytonfa6a5392019-04-09 15:35:24 -04003552 RValue<Float4> Atanh(RValue<Float4> v)
3553 {
3554 return TransformFloat4PerElement(v, "atanhf");
3555 }
3556
Ben Claytona520c3e2019-04-09 15:43:45 -04003557 RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
3558 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003559 ::llvm::SmallVector<::llvm::Type*, 2> paramTys;
3560 paramTys.push_back(T(Float::getType()));
3561 paramTys.push_back(T(Float::getType()));
3562 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), paramTys, false);
Ben Claytona520c3e2019-04-09 15:43:45 -04003563 auto func = ::module->getOrInsertFunction("atan2f", funcTy);
3564 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3565 for (uint64_t i = 0; i < 4; i++)
3566 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003567 auto el = ::builder->CreateCall2(func, ARGS(
3568 V(Nucleus::createExtractElement(x.value, Float::getType(), i)),
3569 V(Nucleus::createExtractElement(y.value, Float::getType(), i))
3570 ));
3571 out = V(Nucleus::createInsertElement(V(out), V(el), i));
Ben Claytona520c3e2019-04-09 15:43:45 -04003572 }
3573 return RValue<Float4>(V(out));
3574 }
3575
Ben Claytonbfe94f02019-04-09 15:52:12 -04003576 RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
3577 {
Ben Clayton7579db12019-05-02 08:37:12 +01003578 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::pow, { T(Float4::getType()) });
Ben Claytonc38fc122019-04-11 08:58:49 -04003579 return RValue<Float4>(V(::builder->CreateCall2(func, ARGS(V(x.value), V(y.value)))));
Ben Claytonbfe94f02019-04-09 15:52:12 -04003580 }
3581
Ben Clayton242f0022019-04-09 16:00:53 -04003582 RValue<Float4> Exp(RValue<Float4> v)
3583 {
3584 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::exp, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003585 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Clayton242f0022019-04-09 16:00:53 -04003586 }
3587
Ben Clayton2c1da722019-04-09 16:03:03 -04003588 RValue<Float4> Log(RValue<Float4> v)
3589 {
3590 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::log, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003591 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Clayton2c1da722019-04-09 16:03:03 -04003592 }
3593
Ben Claytonf40b56c2019-04-09 16:06:55 -04003594 RValue<Float4> Exp2(RValue<Float4> v)
3595 {
3596 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::exp2, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003597 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Claytonf40b56c2019-04-09 16:06:55 -04003598 }
3599
Ben Claytone17acfe2019-04-09 16:09:13 -04003600 RValue<Float4> Log2(RValue<Float4> v)
3601 {
3602 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::log2, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003603 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Claytone17acfe2019-04-09 16:09:13 -04003604 }
3605
Ben Clayton60958262019-04-10 14:53:30 -04003606 RValue<UInt4> Ctlz(RValue<UInt4> v, bool isZeroUndef)
3607 {
Ben Clayton7579db12019-05-02 08:37:12 +01003608 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::ctlz, { T(UInt4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003609 return RValue<UInt4>(V(::builder->CreateCall2(func, ARGS(
Ben Clayton60958262019-04-10 14:53:30 -04003610 V(v.value),
3611 isZeroUndef ? ::llvm::ConstantInt::getTrue(*::context) : ::llvm::ConstantInt::getFalse(*::context)
Ben Claytonc38fc122019-04-11 08:58:49 -04003612 ))));
Ben Clayton60958262019-04-10 14:53:30 -04003613 }
3614
Ben Clayton3f007c42019-04-10 14:54:23 -04003615 RValue<UInt4> Cttz(RValue<UInt4> v, bool isZeroUndef)
3616 {
Ben Clayton7579db12019-05-02 08:37:12 +01003617 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::cttz, { T(UInt4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003618 return RValue<UInt4>(V(::builder->CreateCall2(func, ARGS(
Ben Clayton3f007c42019-04-10 14:54:23 -04003619 V(v.value),
3620 isZeroUndef ? ::llvm::ConstantInt::getTrue(*::context) : ::llvm::ConstantInt::getFalse(*::context)
Ben Claytonc38fc122019-04-11 08:58:49 -04003621 ))));
Ben Clayton3f007c42019-04-10 14:54:23 -04003622 }
3623
John Bauman19bac1e2014-05-06 15:23:49 -04003624 Type *Float4::getType()
John Bauman89401822014-05-06 15:04:28 -04003625 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003626 return T(llvm::VectorType::get(T(Float::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003627 }
3628
John Bauman89401822014-05-06 15:04:28 -04003629 RValue<Long> Ticks()
3630 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003631 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003632 llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter);
John Bauman89401822014-05-06 15:04:28 -04003633
Nicolas Capens2ab69ee2016-09-26 11:45:17 -04003634 return RValue<Long>(V(::builder->CreateCall(rdtsc)));
John Bauman89401822014-05-06 15:04:28 -04003635 }
Ben Claytond853c122019-04-16 17:51:49 -04003636
3637 RValue<Pointer<Byte>> ConstantPointer(void const * ptr)
3638 {
3639 // Note: this should work for 32-bit pointers as well because 'inttoptr'
3640 // is defined to truncate (and zero extend) if necessary.
3641 auto ptrAsInt = ::llvm::ConstantInt::get(::llvm::Type::getInt64Ty(*::context), reinterpret_cast<uintptr_t>(ptr));
3642 return RValue<Pointer<Byte>>(V(::builder->CreateIntToPtr(ptrAsInt, T(Pointer<Byte>::getType()))));
3643 }
3644
3645 Value* Call(RValue<Pointer<Byte>> fptr, Type* retTy, std::initializer_list<Value*> args, std::initializer_list<Type*> argTys)
3646 {
3647 ::llvm::SmallVector<::llvm::Type*, 8> paramTys;
3648 for (auto ty : argTys) { paramTys.push_back(T(ty)); }
3649 auto funcTy = ::llvm::FunctionType::get(T(retTy), paramTys, false);
3650
3651 auto funcPtrTy = funcTy->getPointerTo();
3652 auto funcPtr = ::builder->CreatePointerCast(V(fptr.value), funcPtrTy);
3653
3654 ::llvm::SmallVector<::llvm::Value*, 8> arguments;
3655 for (auto arg : args) { arguments.push_back(V(arg)); }
3656 return V(::builder->CreateCall(funcPtr, arguments));
3657 }
John Bauman89401822014-05-06 15:04:28 -04003658}
3659
Nicolas Capens48461502018-08-06 14:20:45 -04003660namespace rr
John Bauman89401822014-05-06 15:04:28 -04003661{
Logan Chiene3191012018-08-24 22:01:50 +08003662#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003663 namespace x86
3664 {
John Bauman19bac1e2014-05-06 15:23:49 -04003665 RValue<Int> cvtss2si(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003666 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003667 llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si);
John Bauman66b8ab22014-05-06 15:57:45 -04003668
John Bauman89401822014-05-06 15:04:28 -04003669 Float4 vector;
3670 vector.x = val;
3671
Logan Chien813d5032018-08-31 17:19:45 +08003672 return RValue<Int>(V(::builder->CreateCall(cvtss2si, ARGS(V(RValue<Float4>(vector).value)))));
John Bauman89401822014-05-06 15:04:28 -04003673 }
3674
John Bauman19bac1e2014-05-06 15:23:49 -04003675 RValue<Int4> cvtps2dq(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003676 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003677 llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
John Bauman89401822014-05-06 15:04:28 -04003678
Logan Chien813d5032018-08-31 17:19:45 +08003679 return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003680 }
3681
John Bauman19bac1e2014-05-06 15:23:49 -04003682 RValue<Float> rcpss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003683 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003684 llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss);
John Bauman89401822014-05-06 15:04:28 -04003685
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003686 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman66b8ab22014-05-06 15:57:45 -04003687
Logan Chien813d5032018-08-31 17:19:45 +08003688 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003689 }
3690
John Bauman19bac1e2014-05-06 15:23:49 -04003691 RValue<Float> sqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003692 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003693 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
3694 return RValue<Float>(V(::builder->CreateCall(sqrt, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003695 }
3696
John Bauman19bac1e2014-05-06 15:23:49 -04003697 RValue<Float> rsqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003698 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003699 llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss);
John Bauman66b8ab22014-05-06 15:57:45 -04003700
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003701 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman89401822014-05-06 15:04:28 -04003702
Logan Chien813d5032018-08-31 17:19:45 +08003703 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003704 }
3705
John Bauman19bac1e2014-05-06 15:23:49 -04003706 RValue<Float4> rcpps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003707 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003708 llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003709
Logan Chien813d5032018-08-31 17:19:45 +08003710 return RValue<Float4>(V(::builder->CreateCall(rcpps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003711 }
3712
John Bauman19bac1e2014-05-06 15:23:49 -04003713 RValue<Float4> sqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003714 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003715 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
John Bauman66b8ab22014-05-06 15:57:45 -04003716
Logan Chien813d5032018-08-31 17:19:45 +08003717 return RValue<Float4>(V(::builder->CreateCall(sqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003718 }
3719
John Bauman19bac1e2014-05-06 15:23:49 -04003720 RValue<Float4> rsqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003721 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003722 llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003723
Logan Chien813d5032018-08-31 17:19:45 +08003724 return RValue<Float4>(V(::builder->CreateCall(rsqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003725 }
3726
John Bauman19bac1e2014-05-06 15:23:49 -04003727 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003728 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003729 llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps);
John Bauman89401822014-05-06 15:04:28 -04003730
Logan Chien813d5032018-08-31 17:19:45 +08003731 return RValue<Float4>(V(::builder->CreateCall2(maxps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003732 }
3733
John Bauman19bac1e2014-05-06 15:23:49 -04003734 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003735 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003736 llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps);
John Bauman89401822014-05-06 15:04:28 -04003737
Logan Chien813d5032018-08-31 17:19:45 +08003738 return RValue<Float4>(V(::builder->CreateCall2(minps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003739 }
3740
John Bauman19bac1e2014-05-06 15:23:49 -04003741 RValue<Float> roundss(RValue<Float> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04003742 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003743 llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss);
John Bauman89401822014-05-06 15:04:28 -04003744
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003745 Value *undef = V(llvm::UndefValue::get(T(Float4::getType())));
John Bauman89401822014-05-06 15:04:28 -04003746 Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
3747
Logan Chien813d5032018-08-31 17:19:45 +08003748 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, ARGS(V(undef), V(vector), V(Nucleus::createConstantInt(imm))))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003749 }
3750
John Bauman19bac1e2014-05-06 15:23:49 -04003751 RValue<Float> floorss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003752 {
3753 return roundss(val, 1);
3754 }
3755
John Bauman19bac1e2014-05-06 15:23:49 -04003756 RValue<Float> ceilss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003757 {
3758 return roundss(val, 2);
3759 }
3760
John Bauman19bac1e2014-05-06 15:23:49 -04003761 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04003762 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003763 llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps);
John Bauman89401822014-05-06 15:04:28 -04003764
Logan Chien813d5032018-08-31 17:19:45 +08003765 return RValue<Float4>(V(::builder->CreateCall2(roundps, ARGS(V(val.value), V(Nucleus::createConstantInt(imm))))));
John Bauman89401822014-05-06 15:04:28 -04003766 }
3767
John Bauman19bac1e2014-05-06 15:23:49 -04003768 RValue<Float4> floorps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003769 {
3770 return roundps(val, 1);
3771 }
3772
John Bauman19bac1e2014-05-06 15:23:49 -04003773 RValue<Float4> ceilps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003774 {
3775 return roundps(val, 2);
3776 }
3777
Alexis Hetu0f448072016-03-18 10:56:08 -04003778 RValue<Int4> pabsd(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04003779 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003780 return RValue<Int4>(V(lowerPABS(V(x.value))));
John Bauman89401822014-05-06 15:04:28 -04003781 }
3782
John Bauman19bac1e2014-05-06 15:23:49 -04003783 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003784 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003785 llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w);
John Bauman89401822014-05-06 15:04:28 -04003786
Logan Chien813d5032018-08-31 17:19:45 +08003787 return As<Short4>(V(::builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003788 }
John Bauman66b8ab22014-05-06 15:57:45 -04003789
John Bauman19bac1e2014-05-06 15:23:49 -04003790 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003791 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003792 llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w);
John Bauman89401822014-05-06 15:04:28 -04003793
Logan Chien813d5032018-08-31 17:19:45 +08003794 return As<Short4>(V(::builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003795 }
3796
John Bauman19bac1e2014-05-06 15:23:49 -04003797 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003798 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003799 llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w);
John Bauman89401822014-05-06 15:04:28 -04003800
Logan Chien813d5032018-08-31 17:19:45 +08003801 return As<UShort4>(V(::builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003802 }
John Bauman66b8ab22014-05-06 15:57:45 -04003803
John Bauman19bac1e2014-05-06 15:23:49 -04003804 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003805 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003806 llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w);
John Bauman89401822014-05-06 15:04:28 -04003807
Logan Chien813d5032018-08-31 17:19:45 +08003808 return As<UShort4>(V(::builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003809 }
3810
John Bauman19bac1e2014-05-06 15:23:49 -04003811 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003812 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003813 llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b);
John Bauman89401822014-05-06 15:04:28 -04003814
Logan Chien813d5032018-08-31 17:19:45 +08003815 return As<SByte8>(V(::builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003816 }
John Bauman66b8ab22014-05-06 15:57:45 -04003817
John Bauman19bac1e2014-05-06 15:23:49 -04003818 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003819 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003820 llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b);
John Bauman89401822014-05-06 15:04:28 -04003821
Logan Chien813d5032018-08-31 17:19:45 +08003822 return As<SByte8>(V(::builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003823 }
John Bauman66b8ab22014-05-06 15:57:45 -04003824
John Bauman19bac1e2014-05-06 15:23:49 -04003825 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003826 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003827 llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b);
John Bauman89401822014-05-06 15:04:28 -04003828
Logan Chien813d5032018-08-31 17:19:45 +08003829 return As<Byte8>(V(::builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003830 }
John Bauman66b8ab22014-05-06 15:57:45 -04003831
John Bauman19bac1e2014-05-06 15:23:49 -04003832 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003833 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003834 llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b);
John Bauman89401822014-05-06 15:04:28 -04003835
Logan Chien813d5032018-08-31 17:19:45 +08003836 return As<Byte8>(V(::builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value)))));
John Bauman19bac1e2014-05-06 15:23:49 -04003837 }
3838
3839 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003840 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003841 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
John Bauman89401822014-05-06 15:04:28 -04003842 }
3843
John Bauman19bac1e2014-05-06 15:23:49 -04003844 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003845 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003846 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
John Bauman89401822014-05-06 15:04:28 -04003847 }
3848
John Bauman19bac1e2014-05-06 15:23:49 -04003849 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003850 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003851 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
John Bauman89401822014-05-06 15:04:28 -04003852 }
3853
John Bauman19bac1e2014-05-06 15:23:49 -04003854 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003855 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003856 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003857 }
3858
John Bauman19bac1e2014-05-06 15:23:49 -04003859 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003860 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003861 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003862 }
3863
John Bauman19bac1e2014-05-06 15:23:49 -04003864 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003865 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003866 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003867 }
3868
John Bauman19bac1e2014-05-06 15:23:49 -04003869 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003870 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003871 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003872 }
3873
John Bauman19bac1e2014-05-06 15:23:49 -04003874 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
John Bauman89401822014-05-06 15:04:28 -04003875 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003876 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04003877
Logan Chien813d5032018-08-31 17:19:45 +08003878 return As<Short4>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003879 }
3880
John Bauman19bac1e2014-05-06 15:23:49 -04003881 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003882 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003883 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04003884
Logan Chien813d5032018-08-31 17:19:45 +08003885 return RValue<Short8>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003886 }
3887
John Bauman19bac1e2014-05-06 15:23:49 -04003888 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003889 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003890 llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128);
John Bauman89401822014-05-06 15:04:28 -04003891
Logan Chien813d5032018-08-31 17:19:45 +08003892 return As<SByte8>(V(::builder->CreateCall2(packsswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003893 }
3894
Nicolas Capens33438a62017-09-27 11:47:35 -04003895 RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003896 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003897 llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
John Bauman89401822014-05-06 15:04:28 -04003898
Logan Chien813d5032018-08-31 17:19:45 +08003899 return As<Byte8>(V(::builder->CreateCall2(packuswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003900 }
3901
Nicolas Capens3e7062b2017-01-17 14:01:33 -05003902 RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003903 {
3904 if(CPUID::supportsSSE4_1())
3905 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003906 llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw);
John Bauman66b8ab22014-05-06 15:57:45 -04003907
Logan Chien813d5032018-08-31 17:19:45 +08003908 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003909 }
3910 else
3911 {
Nicolas Capens3e7062b2017-01-17 14:01:33 -05003912 RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
3913 RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
3914
3915 return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
John Bauman89401822014-05-06 15:04:28 -04003916 }
3917 }
3918
John Bauman19bac1e2014-05-06 15:23:49 -04003919 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003920 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003921 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04003922
Logan Chien813d5032018-08-31 17:19:45 +08003923 return As<UShort4>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003924 }
3925
John Bauman19bac1e2014-05-06 15:23:49 -04003926 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003927 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003928 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04003929
Logan Chien813d5032018-08-31 17:19:45 +08003930 return RValue<UShort8>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003931 }
3932
John Bauman19bac1e2014-05-06 15:23:49 -04003933 RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003934 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003935 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04003936
Logan Chien813d5032018-08-31 17:19:45 +08003937 return As<Short4>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003938 }
3939
John Bauman19bac1e2014-05-06 15:23:49 -04003940 RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003941 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003942 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04003943
Logan Chien813d5032018-08-31 17:19:45 +08003944 return RValue<Short8>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003945 }
3946
John Bauman19bac1e2014-05-06 15:23:49 -04003947 RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003948 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003949 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04003950
Logan Chien813d5032018-08-31 17:19:45 +08003951 return As<Short4>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003952 }
3953
John Bauman19bac1e2014-05-06 15:23:49 -04003954 RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003955 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003956 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04003957
Logan Chien813d5032018-08-31 17:19:45 +08003958 return RValue<Short8>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003959 }
3960
John Bauman19bac1e2014-05-06 15:23:49 -04003961 RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003962 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003963 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04003964
Logan Chien813d5032018-08-31 17:19:45 +08003965 return As<Int2>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003966 }
3967
John Bauman19bac1e2014-05-06 15:23:49 -04003968 RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003969 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003970 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04003971
Logan Chien813d5032018-08-31 17:19:45 +08003972 return RValue<Int4>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003973 }
3974
John Bauman19bac1e2014-05-06 15:23:49 -04003975 RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003976 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003977 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04003978
Logan Chien813d5032018-08-31 17:19:45 +08003979 return As<Int2>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003980 }
3981
John Bauman19bac1e2014-05-06 15:23:49 -04003982 RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003983 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003984 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04003985
Logan Chien813d5032018-08-31 17:19:45 +08003986 return RValue<Int4>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003987 }
3988
John Bauman19bac1e2014-05-06 15:23:49 -04003989 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003990 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003991 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04003992
Logan Chien813d5032018-08-31 17:19:45 +08003993 return As<UInt2>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003994 }
3995
John Bauman19bac1e2014-05-06 15:23:49 -04003996 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003997 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003998 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04003999
Logan Chien813d5032018-08-31 17:19:45 +08004000 return RValue<UInt4>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004001 }
4002
John Bauman19bac1e2014-05-06 15:23:49 -04004003 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
4004 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004005 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
John Bauman19bac1e2014-05-06 15:23:49 -04004006 }
4007
4008 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
4009 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004010 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
John Bauman19bac1e2014-05-06 15:23:49 -04004011 }
4012
4013 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
4014 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004015 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_UGT)));
John Bauman19bac1e2014-05-06 15:23:49 -04004016 }
4017
4018 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
4019 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004020 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_ULT)));
John Bauman19bac1e2014-05-06 15:23:49 -04004021 }
4022
4023 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004024 {
Nicolas Capens01a97962017-07-28 17:30:51 -04004025 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04004026
Logan Chien813d5032018-08-31 17:19:45 +08004027 return As<Short4>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004028 }
4029
John Bauman19bac1e2014-05-06 15:23:49 -04004030 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04004031 {
Nicolas Capens01a97962017-07-28 17:30:51 -04004032 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04004033
Logan Chien813d5032018-08-31 17:19:45 +08004034 return As<UShort4>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004035 }
4036
John Bauman19bac1e2014-05-06 15:23:49 -04004037 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004038 {
Nicolas Capens01a97962017-07-28 17:30:51 -04004039 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04004040
Logan Chien813d5032018-08-31 17:19:45 +08004041 return As<Int2>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004042 }
4043
John Bauman19bac1e2014-05-06 15:23:49 -04004044 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04004045 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004046 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04004047
Logan Chien813d5032018-08-31 17:19:45 +08004048 return RValue<Short8>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004049 }
4050
John Bauman19bac1e2014-05-06 15:23:49 -04004051 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04004052 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004053 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04004054
Logan Chien813d5032018-08-31 17:19:45 +08004055 return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004056 }
4057
John Bauman19bac1e2014-05-06 15:23:49 -04004058 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04004059 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004060 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04004061
Logan Chien813d5032018-08-31 17:19:45 +08004062 return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004063 }
4064
John Bauman19bac1e2014-05-06 15:23:49 -04004065 RValue<Int> movmskps(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04004066 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004067 llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps);
John Bauman89401822014-05-06 15:04:28 -04004068
Logan Chien813d5032018-08-31 17:19:45 +08004069 return RValue<Int>(V(::builder->CreateCall(movmskps, ARGS(V(x.value)))));
John Bauman89401822014-05-06 15:04:28 -04004070 }
4071
John Bauman19bac1e2014-05-06 15:23:49 -04004072 RValue<Int> pmovmskb(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04004073 {
Nicolas Capens01a97962017-07-28 17:30:51 -04004074 llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128);
John Bauman89401822014-05-06 15:04:28 -04004075
Logan Chien813d5032018-08-31 17:19:45 +08004076 return RValue<Int>(V(::builder->CreateCall(pmovmskb, ARGS(V(x.value))))) & 0xFF;
John Bauman89401822014-05-06 15:04:28 -04004077 }
4078
Nicolas Capens01a97962017-07-28 17:30:51 -04004079 RValue<Int4> pmovzxbd(RValue<Byte16> x)
John Bauman89401822014-05-06 15:04:28 -04004080 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004081 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
John Bauman89401822014-05-06 15:04:28 -04004082 }
4083
Nicolas Capens01a97962017-07-28 17:30:51 -04004084 RValue<Int4> pmovsxbd(RValue<SByte16> x)
John Bauman89401822014-05-06 15:04:28 -04004085 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004086 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
John Bauman89401822014-05-06 15:04:28 -04004087 }
4088
Nicolas Capens01a97962017-07-28 17:30:51 -04004089 RValue<Int4> pmovzxwd(RValue<UShort8> x)
John Bauman89401822014-05-06 15:04:28 -04004090 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004091 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
John Bauman89401822014-05-06 15:04:28 -04004092 }
4093
Nicolas Capens01a97962017-07-28 17:30:51 -04004094 RValue<Int4> pmovsxwd(RValue<Short8> x)
John Bauman89401822014-05-06 15:04:28 -04004095 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004096 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
John Bauman89401822014-05-06 15:04:28 -04004097 }
4098 }
Logan Chiene3191012018-08-24 22:01:50 +08004099#endif // defined(__i386__) || defined(__x86_64__)
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004100
Ben Clayton60a3d6f2019-02-26 17:24:46 +00004101#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004102 // extractAll returns a vector containing the extracted n scalar value of
4103 // the vector vec.
4104 static std::vector<Value*> extractAll(Value* vec, int n)
4105 {
4106 std::vector<Value*> elements;
4107 elements.reserve(n);
4108 for (int i = 0; i < n; i++)
4109 {
4110 auto el = V(::builder->CreateExtractElement(V(vec), i));
4111 elements.push_back(el);
4112 }
4113 return elements;
4114 }
4115
Ben Claytonca8e3d72019-05-14 16:51:05 +01004116 // toInt returns all the integer values in vals extended to a native width
4117 // integer.
4118 static std::vector<Value*> toInt(const std::vector<Value*>& vals, bool isSigned)
4119 {
4120 auto intTy = ::llvm::Type::getIntNTy(*::context, sizeof(int) * 8); // Natural integer width.
4121 std::vector<Value*> elements;
4122 elements.reserve(vals.size());
4123 for (auto v : vals)
4124 {
4125 if (isSigned)
4126 {
4127 elements.push_back(V(::builder->CreateSExt(V(v), intTy)));
4128 }
4129 else
4130 {
4131 elements.push_back(V(::builder->CreateZExt(V(v), intTy)));
4132 }
4133 }
4134 return elements;
4135 }
4136
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004137 // toDouble returns all the float values in vals extended to doubles.
4138 static std::vector<Value*> toDouble(const std::vector<Value*>& vals)
4139 {
4140 auto doubleTy = ::llvm::Type::getDoubleTy(*::context);
4141 std::vector<Value*> elements;
4142 elements.reserve(vals.size());
4143 for (auto v : vals)
4144 {
4145 elements.push_back(V(::builder->CreateFPExt(V(v), doubleTy)));
4146 }
4147 return elements;
4148 }
4149
Ben Claytonca8e3d72019-05-14 16:51:05 +01004150 std::vector<Value*> PrintValue::Ty<Byte4>::val(const RValue<Byte4>& v) { return toInt(extractAll(v.value, 4), false); }
4151 std::vector<Value*> PrintValue::Ty<Int>::val(const RValue<Int>& v) { return toInt({v.value}, true); }
4152 std::vector<Value*> PrintValue::Ty<Int2>::val(const RValue<Int2>& v) { return toInt(extractAll(v.value, 2), true); }
4153 std::vector<Value*> PrintValue::Ty<Int4>::val(const RValue<Int4>& v) { return toInt(extractAll(v.value, 4), true); }
4154 std::vector<Value*> PrintValue::Ty<UInt>::val(const RValue<UInt>& v) { return toInt({v.value}, false); }
4155 std::vector<Value*> PrintValue::Ty<UInt2>::val(const RValue<UInt2>& v) { return toInt(extractAll(v.value, 2), false); }
4156 std::vector<Value*> PrintValue::Ty<UInt4>::val(const RValue<UInt4>& v) { return toInt(extractAll(v.value, 4), false); }
4157 std::vector<Value*> PrintValue::Ty<Short4>::val(const RValue<Short4>& v) { return toInt(extractAll(v.value, 4), true); }
4158 std::vector<Value*> PrintValue::Ty<UShort4>::val(const RValue<UShort4>& v) { return toInt(extractAll(v.value, 4), false); }
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004159 std::vector<Value*> PrintValue::Ty<Float>::val(const RValue<Float>& v) { return toDouble({v.value}); }
4160 std::vector<Value*> PrintValue::Ty<Float4>::val(const RValue<Float4>& v) { return toDouble(extractAll(v.value, 4)); }
Ben Claytonbc0cbb92019-05-15 17:12:57 +01004161 std::vector<Value*> PrintValue::Ty<const char*>::val(const char* v) { return {V(::builder->CreateGlobalStringPtr(v))}; }
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004162
4163 void Printv(const char* function, const char* file, int line, const char* fmt, std::initializer_list<PrintValue> args)
4164 {
4165 // LLVM types used below.
4166 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
Ben Claytonca8e3d72019-05-14 16:51:05 +01004167 auto intTy = ::llvm::Type::getIntNTy(*::context, sizeof(int) * 8); // Natural integer width.
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004168 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(*::context);
4169 auto funcTy = ::llvm::FunctionType::get(i32Ty, {i8PtrTy}, true);
4170
4171 auto func = ::module->getOrInsertFunction("printf", funcTy);
4172
4173 // Build the printf format message string.
4174 std::string str;
4175 if (file != nullptr) { str += (line > 0) ? "%s:%d " : "%s "; }
4176 if (function != nullptr) { str += "%s "; }
4177 str += fmt;
4178
4179 // Perform subsitution on all '{n}' bracketed indices in the format
4180 // message.
4181 int i = 0;
4182 for (const PrintValue& arg : args)
4183 {
4184 str = replace(str, "{" + std::to_string(i++) + "}", arg.format);
4185 }
4186
4187 ::llvm::SmallVector<::llvm::Value*, 8> vals;
4188
4189 // The format message is always the first argument.
4190 vals.push_back(::builder->CreateGlobalStringPtr(str));
4191
4192 // Add optional file, line and function info if provided.
4193 if (file != nullptr)
4194 {
4195 vals.push_back(::builder->CreateGlobalStringPtr(file));
4196 if (line > 0)
4197 {
4198 vals.push_back(::llvm::ConstantInt::get(intTy, line));
4199 }
4200 }
4201 if (function != nullptr)
4202 {
4203 vals.push_back(::builder->CreateGlobalStringPtr(function));
4204 }
4205
4206 // Add all format arguments.
4207 for (const PrintValue& arg : args)
4208 {
4209 for (auto val : arg.values)
4210 {
4211 vals.push_back(V(val));
4212 }
4213 }
4214
4215 ::builder->CreateCall(func, vals);
4216 }
4217#endif // ENABLE_RR_PRINT
4218
Ben Claytonac07ed82019-03-26 14:17:41 +00004219 void Break()
4220 {
4221 auto trap = ::llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::trap);
4222 builder->CreateCall(trap);
4223 }
4224
4225 void Nop()
4226 {
4227 auto voidTy = ::llvm::Type::getVoidTy(*context);
4228 auto funcTy = ::llvm::FunctionType::get(voidTy, {}, false);
4229 auto func = ::module->getOrInsertFunction("nop", funcTy);
4230 builder->CreateCall(func);
4231 }
4232
4233 void EmitDebugLocation()
4234 {
4235#ifdef ENABLE_RR_DEBUG_INFO
4236 if (debugInfo != nullptr)
4237 {
4238 debugInfo->EmitLocation();
4239 }
4240#endif // ENABLE_RR_DEBUG_INFO
4241 }
4242
4243 void EmitDebugVariable(Value* value)
4244 {
4245#ifdef ENABLE_RR_DEBUG_INFO
4246 if (debugInfo != nullptr)
4247 {
4248 debugInfo->EmitVariable(value);
4249 }
4250#endif // ENABLE_RR_DEBUG_INFO
4251 }
4252
4253 void FlushDebug()
4254 {
4255#ifdef ENABLE_RR_DEBUG_INFO
4256 if (debugInfo != nullptr)
4257 {
4258 debugInfo->Flush();
4259 }
4260#endif // ENABLE_RR_DEBUG_INFO
4261 }
4262
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004263} // namespace rr
4264
4265// ------------------------------ Coroutines ------------------------------
4266
4267namespace {
4268
4269 struct CoroutineState
4270 {
4271 llvm::Function *await = nullptr;
4272 llvm::Function *destroy = nullptr;
4273 llvm::Value *handle = nullptr;
4274 llvm::Value *id = nullptr;
4275 llvm::Value *promise = nullptr;
4276 llvm::BasicBlock *suspendBlock = nullptr;
4277 llvm::BasicBlock *endBlock = nullptr;
4278 llvm::BasicBlock *destroyBlock = nullptr;
4279 };
4280 CoroutineState coroutine;
4281
4282 // Magic values retuned by llvm.coro.suspend.
4283 // See: https://llvm.org/docs/Coroutines.html#llvm-coro-suspend-intrinsic
4284 enum SuspendAction
4285 {
4286 SuspendActionSuspend = -1,
4287 SuspendActionResume = 0,
4288 SuspendActionDestroy = 1
4289 };
4290
4291} // anonymous namespace
4292
4293namespace rr {
4294
4295void Nucleus::createCoroutine(Type *YieldType, std::vector<Type*> &Params)
4296{
4297 // Types
4298 auto voidTy = ::llvm::Type::getVoidTy(*::context);
4299 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
4300 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
4301 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
4302 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(*::context);
4303 auto promiseTy = T(YieldType);
4304 auto promisePtrTy = promiseTy->getPointerTo();
4305 auto handleTy = i8PtrTy;
4306 auto boolTy = i1Ty;
4307
4308 // LLVM intrinsics
4309 auto coro_id = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::coro_id);
4310 auto coro_size = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::coro_size, {i32Ty});
4311 auto coro_begin = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::coro_begin);
4312 auto coro_resume = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_resume);
4313 auto coro_end = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_end);
4314 auto coro_free = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_free);
4315 auto coro_destroy = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_destroy);
4316 auto coro_promise = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_promise);
4317 auto coro_done = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_done);
4318 auto coro_suspend = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_suspend);
4319
4320 auto allocFrameTy = ::llvm::FunctionType::get(i8PtrTy, {i32Ty}, false);
4321 auto allocFrame = ::module->getOrInsertFunction("coroutine_alloc_frame", allocFrameTy);
4322 auto freeFrameTy = ::llvm::FunctionType::get(voidTy, {i8PtrTy}, false);
4323 auto freeFrame = ::module->getOrInsertFunction("coroutine_free_frame", freeFrameTy);
4324
4325 // Build the coroutine_await() function:
4326 //
4327 // bool coroutine_await(CoroutineHandle* handle, YieldType* out)
4328 // {
4329 // if (llvm.coro.done(handle))
4330 // {
4331 // return false;
4332 // }
4333 // else
4334 // {
4335 // *value = (T*)llvm.coro.promise(handle);
4336 // llvm.coro.resume(handle);
4337 // return true;
4338 // }
4339 // }
4340 //
4341 llvm::FunctionType *coroutineAwaitTy = llvm::FunctionType::get(boolTy, {handleTy, promisePtrTy}, false);
4342 ::coroutine.await = llvm::Function::Create(coroutineAwaitTy, llvm::GlobalValue::InternalLinkage, "coroutine_await", ::module);
4343 ::coroutine.await->setCallingConv(llvm::CallingConv::C);
4344 {
4345 auto args = ::coroutine.await->arg_begin();
4346 auto handle = args++;
4347 auto outPtr = args++;
4348 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "co_await", ::coroutine.await));
4349 auto doneBlock = llvm::BasicBlock::Create(*::context, "done", ::coroutine.await);
4350 auto resumeBlock = llvm::BasicBlock::Create(*::context, "resume", ::coroutine.await);
4351
4352 auto done = ::builder->CreateCall(coro_done, {handle}, "done");
4353 ::builder->CreateCondBr(done, doneBlock, resumeBlock);
4354
4355 ::builder->SetInsertPoint(doneBlock);
4356 ::builder->CreateRet(::llvm::ConstantInt::getFalse(i1Ty));
4357
4358 ::builder->SetInsertPoint(resumeBlock);
4359 auto promiseAlignment = ::llvm::ConstantInt::get(i32Ty, 4); // TODO: Get correct alignment.
4360 auto promisePtr = ::builder->CreateCall(coro_promise, {handle, promiseAlignment, ::llvm::ConstantInt::get(i1Ty, 0)});
4361 auto promise = ::builder->CreateLoad(::builder->CreatePointerCast(promisePtr, promisePtrTy));
4362 ::builder->CreateStore(promise, outPtr);
4363 ::builder->CreateCall(coro_resume, {handle});
4364 ::builder->CreateRet(::llvm::ConstantInt::getTrue(i1Ty));
4365 }
4366
4367 // Build the coroutine_destroy() function:
4368 //
4369 // void coroutine_destroy(CoroutineHandle* handle)
4370 // {
4371 // llvm.coro.destroy(handle);
4372 // }
4373 //
4374 llvm::FunctionType *coroutineDestroyTy = llvm::FunctionType::get(voidTy, handleTy, false);
4375 ::coroutine.destroy = llvm::Function::Create(coroutineDestroyTy, llvm::GlobalValue::InternalLinkage, "coroutine_destroy", ::module);
4376 ::coroutine.destroy->setCallingConv(llvm::CallingConv::C);
4377 {
4378 auto handle = ::coroutine.destroy->arg_begin();
4379 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "", ::coroutine.destroy));
4380 ::builder->CreateCall(coro_destroy, {handle});
4381 ::builder->CreateRetVoid();
4382 }
4383
4384 // Begin building the main coroutine_begin() function.
4385 //
4386 // CoroutineHandle* coroutine_begin(<Arguments>)
4387 // {
4388 // YieldType promise;
4389 // auto id = llvm.coro.id(0, &promise, nullptr, nullptr);
4390 // void* frame = coroutine_alloc_frame(llvm.coro.size.i32());
4391 // CoroutineHandle *handle = llvm.coro.begin(id, frame);
4392 //
4393 // ... <REACTOR CODE> ...
4394 //
4395 // end:
4396 // SuspendAction action = llvm.coro.suspend(none, true /* final */); // <-- RESUME POINT
4397 // switch (action)
4398 // {
4399 // case SuspendActionResume:
4400 // UNREACHABLE(); // Illegal to resume after final suspend.
4401 // case SuspendActionDestroy:
4402 // goto destroy;
4403 // default: // (SuspendActionSuspend)
4404 // goto suspend;
4405 // }
4406 //
4407 // destroy:
4408 // coroutine_free_frame(llvm.coro.free(id, handle));
4409 // goto suspend;
4410 //
4411 // suspend:
4412 // llvm.coro.end(handle, false);
4413 // return handle;
4414 // }
4415 //
4416 llvm::FunctionType *functionType = llvm::FunctionType::get(handleTy, T(Params), false);
4417 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "coroutine_begin", ::module);
4418 ::function->setCallingConv(llvm::CallingConv::C);
4419
4420#ifdef ENABLE_RR_DEBUG_INFO
4421 ::debugInfo = std::unique_ptr<DebugInfo>(new DebugInfo(::builder, ::context, ::module, ::function));
4422#endif // ENABLE_RR_DEBUG_INFO
4423
4424 auto entryBlock = llvm::BasicBlock::Create(*::context, "coroutine", ::function);
4425 ::coroutine.suspendBlock = llvm::BasicBlock::Create(*::context, "suspend", ::function);
4426 ::coroutine.endBlock = llvm::BasicBlock::Create(*::context, "end", ::function);
4427 ::coroutine.destroyBlock = llvm::BasicBlock::Create(*::context, "destroy", ::function);
4428
4429 ::builder->SetInsertPoint(entryBlock);
4430 Variable::materializeAll();
4431 ::coroutine.promise = ::builder->CreateAlloca(T(YieldType), nullptr, "promise");
4432 ::coroutine.id = ::builder->CreateCall(coro_id, {
4433 ::llvm::ConstantInt::get(i32Ty, 0),
4434 ::builder->CreatePointerCast(::coroutine.promise, i8PtrTy),
4435 ::llvm::ConstantPointerNull::get(i8PtrTy),
4436 ::llvm::ConstantPointerNull::get(i8PtrTy),
4437 });
4438 auto size = ::builder->CreateCall(coro_size, {});
4439 auto frame = ::builder->CreateCall(allocFrame, {size});
4440 ::coroutine.handle = ::builder->CreateCall(coro_begin, {::coroutine.id, frame});
4441
4442 // Build the suspend block
4443 ::builder->SetInsertPoint(::coroutine.suspendBlock);
4444 ::builder->CreateCall(coro_end, {::coroutine.handle, ::llvm::ConstantInt::get(i1Ty, 0)});
4445 ::builder->CreateRet(::coroutine.handle);
4446
4447 // Build the end block
4448 ::builder->SetInsertPoint(::coroutine.endBlock);
4449 auto action = ::builder->CreateCall(coro_suspend, {
4450 ::llvm::ConstantTokenNone::get(*::context),
4451 ::llvm::ConstantInt::get(i1Ty, 1), // final: true
4452 });
4453 auto switch_ = ::builder->CreateSwitch(action, ::coroutine.suspendBlock, 3);
4454 // switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionResume), trapBlock); // TODO: Trap attempting to resume after final suspend
4455 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionDestroy), ::coroutine.destroyBlock);
4456
4457 // Build the destroy block
4458 ::builder->SetInsertPoint(::coroutine.destroyBlock);
4459 auto memory = ::builder->CreateCall(coro_free, {::coroutine.id, ::coroutine.handle});
4460 ::builder->CreateCall(freeFrame, {memory});
4461 ::builder->CreateBr(::coroutine.suspendBlock);
4462
4463 // Switch back to the entry block for reactor codegen.
4464 ::builder->SetInsertPoint(entryBlock);
4465
4466 #if defined(_WIN32)
4467 // FIXME(capn):
4468 // On Windows, stack memory is committed in increments of 4 kB pages, with the last page
4469 // having a trap which allows the OS to grow the stack. For functions with a stack frame
4470 // larger than 4 kB this can cause an issue when a variable is accessed beyond the guard
4471 // page. Therefore the compiler emits a call to __chkstk in the function prolog to probe
4472 // the stack and ensure all pages have been committed. This is currently broken in LLVM
4473 // JIT, but we can prevent emitting the stack probe call:
4474 ::function->addFnAttr("stack-probe-size", "1048576");
4475 #endif
John Bauman89401822014-05-06 15:04:28 -04004476}
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004477
4478void Nucleus::yield(Value* val)
4479{
4480 ASSERT_MSG(::coroutine.id != nullptr, "yield() can only be called when building a Coroutine");
4481
4482 // promise = val;
4483 //
4484 // auto action = llvm.coro.suspend(none, false /* final */); // <-- RESUME POINT
4485 // switch (action)
4486 // {
4487 // case SuspendActionResume:
4488 // goto resume;
4489 // case SuspendActionDestroy:
4490 // goto destroy;
4491 // default: // (SuspendActionSuspend)
4492 // goto suspend;
4493 // }
4494 // resume:
4495 //
4496
4497 RR_DEBUG_INFO_UPDATE_LOC();
4498 Variable::materializeAll();
4499
4500 // Types
4501 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
4502 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
4503
4504 // Intrinsics
4505 auto coro_suspend = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_suspend);
4506
4507 // Create a block to resume execution.
4508 auto resumeBlock = llvm::BasicBlock::Create(*::context, "resume", ::function);
4509
4510 // Store the promise (yield value)
4511 ::builder->CreateStore(V(val), ::coroutine.promise);
4512 auto action = ::builder->CreateCall(coro_suspend, {
4513 ::llvm::ConstantTokenNone::get(*::context),
4514 ::llvm::ConstantInt::get(i1Ty, 0), // final: true
4515 });
4516 auto switch_ = ::builder->CreateSwitch(action, ::coroutine.suspendBlock, 3);
4517 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionResume), resumeBlock);
4518 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionDestroy), ::coroutine.destroyBlock);
4519
4520 // Continue building in the resume block.
4521 ::builder->SetInsertPoint(resumeBlock);
4522}
4523
4524Routine* Nucleus::acquireCoroutine(const char *name, bool runOptimizations)
4525{
4526 ASSERT_MSG(::coroutine.id != nullptr, "acquireCoroutine() called without a call to createCoroutine()");
4527
4528 ::builder->CreateBr(::coroutine.endBlock);
4529
4530#ifdef ENABLE_RR_DEBUG_INFO
4531 if (debugInfo != nullptr)
4532 {
4533 debugInfo->Finalize();
4534 }
4535#endif // ENABLE_RR_DEBUG_INFO
4536
4537 if(false)
4538 {
4539 std::error_code error;
4540 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
4541 ::module->print(file, 0);
4542 }
4543
4544 // Run manadory coroutine transforms.
4545 llvm::legacy::PassManager pm;
4546 pm.add(llvm::createCoroEarlyPass());
4547 pm.add(llvm::createCoroSplitPass());
4548 pm.add(llvm::createCoroElidePass());
4549 pm.add(llvm::createBarrierNoopPass());
4550 pm.add(llvm::createCoroCleanupPass());
4551 pm.run(*::module);
4552
4553 if(runOptimizations)
4554 {
4555 optimize();
4556 }
4557
4558 if(false)
4559 {
4560 std::error_code error;
4561 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
4562 ::module->print(file, 0);
4563 }
4564
4565 llvm::Function *funcs[Nucleus::CoroutineEntryCount];
4566 funcs[Nucleus::CoroutineEntryBegin] = ::function;
4567 funcs[Nucleus::CoroutineEntryAwait] = ::coroutine.await;
4568 funcs[Nucleus::CoroutineEntryDestroy] = ::coroutine.destroy;
4569 Routine *routine = ::reactorJIT->acquireRoutine(funcs, Nucleus::CoroutineEntryCount);
4570
4571 ::coroutine = CoroutineState{};
4572
4573 return routine;
4574}
4575
4576} // namespace rr