blob: 781bc153ced4ead4927750751ec05e53643b9f0a [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
Nicolas Capenscb986762017-01-20 11:34:37 -050015#include "Reactor.hpp"
Ben Claytoneb50d252019-04-15 13:50:01 -040016#include "Debug.hpp"
Ben Claytonac07ed82019-03-26 14:17:41 +000017#include "LLVMReactor.hpp"
18#include "LLVMReactorDebugInfo.hpp"
John Bauman89401822014-05-06 15:04:28 -040019
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040020#include "x86.hpp"
21#include "CPUID.hpp"
22#include "Thread.hpp"
Nicolas Capens1a3ce872018-10-10 10:42:36 -040023#include "ExecutableMemory.hpp"
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040024#include "MutexLock.hpp"
25
26#undef min
27#undef max
28
Ben Clayton09a7f452019-04-25 15:22:43 +010029#if defined(__clang__)
30// LLVM has occurances of the extra-semi warning in its headers, which will be
31// treated as an error in SwiftShader targets.
32#pragma clang diagnostic push
33#pragma clang diagnostic ignored "-Wextra-semi"
34#endif // defined(__clang__)
35
Ben Clayton5875be52019-04-11 14:57:40 -040036#include "llvm/Analysis/LoopPass.h"
37#include "llvm/ExecutionEngine/ExecutionEngine.h"
38#include "llvm/ExecutionEngine/JITSymbol.h"
39#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
40#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
41#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
42#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
43#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
44#include "llvm/ExecutionEngine/SectionMemoryManager.h"
45#include "llvm/IR/Constants.h"
46#include "llvm/IR/DataLayout.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/GlobalVariable.h"
Ben Clayton5875be52019-04-11 14:57:40 -040049#include "llvm/IR/Intrinsics.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010050#include "llvm/IR/IRBuilder.h"
Ben Clayton5875be52019-04-11 14:57:40 -040051#include "llvm/IR/LegacyPassManager.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010052#include "llvm/IR/LLVMContext.h"
Ben Clayton5875be52019-04-11 14:57:40 -040053#include "llvm/IR/Mangler.h"
54#include "llvm/IR/Module.h"
Ben Clayton4b944652019-05-02 10:56:19 +010055#include "llvm/IR/Verifier.h"
Ben Clayton5875be52019-04-11 14:57:40 -040056#include "llvm/Support/Error.h"
57#include "llvm/Support/TargetSelect.h"
58#include "llvm/Target/TargetOptions.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010059#include "llvm/Transforms/Coroutines.h"
Ben Clayton5875be52019-04-11 14:57:40 -040060#include "llvm/Transforms/InstCombine/InstCombine.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010061#include "llvm/Transforms/IPO.h"
62#include "llvm/Transforms/IPO/PassManagerBuilder.h"
Ben Clayton5875be52019-04-11 14:57:40 -040063#include "llvm/Transforms/Scalar.h"
64#include "llvm/Transforms/Scalar/GVN.h"
Ben Clayton20507fa2019-04-20 01:40:15 -040065
Ben Clayton09a7f452019-04-25 15:22:43 +010066#if defined(__clang__)
67#pragma clang diagnostic pop
68#endif // defined(__clang__)
69
Ben Clayton5875be52019-04-11 14:57:40 -040070#include "LLVMRoutine.hpp"
John Bauman89401822014-05-06 15:04:28 -040071
Ben Clayton5875be52019-04-11 14:57:40 -040072#define ARGS(...) {__VA_ARGS__}
73#define CreateCall2 CreateCall
74#define CreateCall3 CreateCall
Logan Chien0eedc8c2018-08-21 09:34:28 +080075
Ben Clayton5875be52019-04-11 14:57:40 -040076#include <unordered_map>
Logan Chien0eedc8c2018-08-21 09:34:28 +080077
John Bauman89401822014-05-06 15:04:28 -040078#include <fstream>
Ben Claytoncee3dff2019-05-22 12:01:22 +010079#include <iostream>
80#include <mutex>
Ben Clayton1bc7ee92019-02-14 18:43:22 +000081#include <numeric>
82#include <thread>
John Bauman89401822014-05-06 15:04:28 -040083
Nicolas Capens47dc8672017-04-25 12:54:39 -040084#if defined(__i386__) || defined(__x86_64__)
85#include <xmmintrin.h>
86#endif
87
Logan Chien40a60052018-09-26 19:03:53 +080088#include <math.h>
89
Nicolas Capenscb122582014-05-06 23:34:44 -040090#if defined(__x86_64__) && defined(_WIN32)
John Bauman66b8ab22014-05-06 15:57:45 -040091extern "C" void X86CompilationCallback()
92{
Ben Claytoneb50d252019-04-15 13:50:01 -040093 UNIMPLEMENTED("X86CompilationCallback");
John Bauman66b8ab22014-05-06 15:57:45 -040094}
95#endif
96
Nicolas Capens48461502018-08-06 14:20:45 -040097namespace rr
Logan Chien52cde602018-09-03 19:37:57 +080098{
99 class LLVMReactorJIT;
100}
101
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400102namespace
103{
Nicolas Capens48461502018-08-06 14:20:45 -0400104 rr::LLVMReactorJIT *reactorJIT = nullptr;
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400105 llvm::IRBuilder<> *builder = nullptr;
106 llvm::LLVMContext *context = nullptr;
107 llvm::Module *module = nullptr;
108 llvm::Function *function = nullptr;
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400109
Ben Claytonac07ed82019-03-26 14:17:41 +0000110#ifdef ENABLE_RR_DEBUG_INFO
111 std::unique_ptr<rr::DebugInfo> debugInfo;
112#endif
113
Nicolas Capensc07dc4b2018-08-06 14:20:45 -0400114 rr::MutexLock codegenMutex;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800115
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000116#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000117 std::string replace(std::string str, const std::string& substr, const std::string& replacement)
118 {
119 size_t pos = 0;
120 while((pos = str.find(substr, pos)) != std::string::npos) {
121 str.replace(pos, substr.length(), replacement);
122 pos += replacement.length();
123 }
124 return str;
125 }
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000126#endif // ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000127
Logan Chien0eedc8c2018-08-21 09:34:28 +0800128 llvm::Value *lowerPAVG(llvm::Value *x, llvm::Value *y)
129 {
130 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
131
132 llvm::VectorType *extTy =
133 llvm::VectorType::getExtendedElementVectorType(ty);
134 x = ::builder->CreateZExt(x, extTy);
135 y = ::builder->CreateZExt(y, extTy);
136
137 // (x + y + 1) >> 1
138 llvm::Constant *one = llvm::ConstantInt::get(extTy, 1);
139 llvm::Value *res = ::builder->CreateAdd(x, y);
140 res = ::builder->CreateAdd(res, one);
141 res = ::builder->CreateLShr(res, one);
142 return ::builder->CreateTrunc(res, ty);
143 }
144
145 llvm::Value *lowerPMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800146 llvm::ICmpInst::Predicate pred)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800147 {
148 return ::builder->CreateSelect(::builder->CreateICmp(pred, x, y), x, y);
149 }
150
151 llvm::Value *lowerPCMP(llvm::ICmpInst::Predicate pred, llvm::Value *x,
Logan Chienb5ce5092018-09-27 18:45:58 +0800152 llvm::Value *y, llvm::Type *dstTy)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800153 {
154 return ::builder->CreateSExt(::builder->CreateICmp(pred, x, y), dstTy, "");
155 }
156
Logan Chiene3191012018-08-24 22:01:50 +0800157#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800158 llvm::Value *lowerPMOV(llvm::Value *op, llvm::Type *dstType, bool sext)
159 {
160 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(op->getType());
161 llvm::VectorType *dstTy = llvm::cast<llvm::VectorType>(dstType);
162
163 llvm::Value *undef = llvm::UndefValue::get(srcTy);
164 llvm::SmallVector<uint32_t, 16> mask(dstTy->getNumElements());
165 std::iota(mask.begin(), mask.end(), 0);
166 llvm::Value *v = ::builder->CreateShuffleVector(op, undef, mask);
167
168 return sext ? ::builder->CreateSExt(v, dstTy)
Logan Chienb5ce5092018-09-27 18:45:58 +0800169 : ::builder->CreateZExt(v, dstTy);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800170 }
171
172 llvm::Value *lowerPABS(llvm::Value *v)
173 {
174 llvm::Value *zero = llvm::Constant::getNullValue(v->getType());
175 llvm::Value *cmp = ::builder->CreateICmp(llvm::ICmpInst::ICMP_SGT, v, zero);
176 llvm::Value *neg = ::builder->CreateNeg(v);
177 return ::builder->CreateSelect(cmp, v, neg);
178 }
179#endif // defined(__i386__) || defined(__x86_64__)
Logan Chiene3191012018-08-24 22:01:50 +0800180
181#if !defined(__i386__) && !defined(__x86_64__)
182 llvm::Value *lowerPFMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800183 llvm::FCmpInst::Predicate pred)
Logan Chiene3191012018-08-24 22:01:50 +0800184 {
185 return ::builder->CreateSelect(::builder->CreateFCmp(pred, x, y), x, y);
186 }
187
Logan Chien83fc07a2018-09-26 22:14:00 +0800188 llvm::Value *lowerRound(llvm::Value *x)
189 {
190 llvm::Function *nearbyint = llvm::Intrinsic::getDeclaration(
191 ::module, llvm::Intrinsic::nearbyint, {x->getType()});
192 return ::builder->CreateCall(nearbyint, ARGS(x));
193 }
194
Logan Chien2faa24a2018-09-26 19:59:32 +0800195 llvm::Value *lowerRoundInt(llvm::Value *x, llvm::Type *ty)
196 {
197 return ::builder->CreateFPToSI(lowerRound(x), ty);
198 }
199
Logan Chien40a60052018-09-26 19:03:53 +0800200 llvm::Value *lowerFloor(llvm::Value *x)
201 {
202 llvm::Function *floor = llvm::Intrinsic::getDeclaration(
203 ::module, llvm::Intrinsic::floor, {x->getType()});
204 return ::builder->CreateCall(floor, ARGS(x));
205 }
206
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800207 llvm::Value *lowerTrunc(llvm::Value *x)
208 {
209 llvm::Function *trunc = llvm::Intrinsic::getDeclaration(
210 ::module, llvm::Intrinsic::trunc, {x->getType()});
211 return ::builder->CreateCall(trunc, ARGS(x));
212 }
213
Logan Chiene3191012018-08-24 22:01:50 +0800214 // Packed add/sub saturatation
Logan Chien28794cf2018-09-26 18:58:03 +0800215 llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned)
Logan Chiene3191012018-08-24 22:01:50 +0800216 {
Logan Chien28794cf2018-09-26 18:58:03 +0800217 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
218 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
219
220 unsigned numBits = ty->getScalarSizeInBits();
221
222 llvm::Value *max, *min, *extX, *extY;
223 if (isSigned)
224 {
225 max = llvm::ConstantInt::get(extTy, (1LL << (numBits - 1)) - 1, true);
226 min = llvm::ConstantInt::get(extTy, (-1LL << (numBits - 1)), true);
227 extX = ::builder->CreateSExt(x, extTy);
228 extY = ::builder->CreateSExt(y, extTy);
229 }
230 else
231 {
Ben Claytoneb50d252019-04-15 13:50:01 -0400232 ASSERT_MSG(numBits <= 64, "numBits: %d", int(numBits));
Logan Chien28794cf2018-09-26 18:58:03 +0800233 uint64_t maxVal = (numBits == 64) ? ~0ULL : (1ULL << numBits) - 1;
234 max = llvm::ConstantInt::get(extTy, maxVal, false);
235 min = llvm::ConstantInt::get(extTy, 0, false);
236 extX = ::builder->CreateZExt(x, extTy);
237 extY = ::builder->CreateZExt(y, extTy);
238 }
239
240 llvm::Value *res = isAdd ? ::builder->CreateAdd(extX, extY)
241 : ::builder->CreateSub(extX, extY);
242
243 res = lowerPMINMAX(res, min, llvm::ICmpInst::ICMP_SGT);
244 res = lowerPMINMAX(res, max, llvm::ICmpInst::ICMP_SLT);
245
246 return ::builder->CreateTrunc(res, ty);
Logan Chiene3191012018-08-24 22:01:50 +0800247 }
248
249 llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y)
250 {
Logan Chien28794cf2018-09-26 18:58:03 +0800251 return lowerPSAT(x, y, true, false);
Logan Chiene3191012018-08-24 22:01:50 +0800252 }
253
254 llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y)
255 {
Logan Chien28794cf2018-09-26 18:58:03 +0800256 return lowerPSAT(x, y, true, true);
Logan Chiene3191012018-08-24 22:01:50 +0800257 }
258
259 llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y)
260 {
Logan Chien28794cf2018-09-26 18:58:03 +0800261 return lowerPSAT(x, y, false, false);
Logan Chiene3191012018-08-24 22:01:50 +0800262 }
263
264 llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y)
265 {
Logan Chien28794cf2018-09-26 18:58:03 +0800266 return lowerPSAT(x, y, false, true);
Logan Chiene3191012018-08-24 22:01:50 +0800267 }
268
269 llvm::Value *lowerSQRT(llvm::Value *x)
270 {
271 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(
272 ::module, llvm::Intrinsic::sqrt, {x->getType()});
273 return ::builder->CreateCall(sqrt, ARGS(x));
274 }
275
276 llvm::Value *lowerRCP(llvm::Value *x)
277 {
278 llvm::Type *ty = x->getType();
279 llvm::Constant *one;
280 if (llvm::VectorType *vectorTy = llvm::dyn_cast<llvm::VectorType>(ty))
281 {
282 one = llvm::ConstantVector::getSplat(
283 vectorTy->getNumElements(),
284 llvm::ConstantFP::get(vectorTy->getElementType(), 1));
285 }
286 else
287 {
288 one = llvm::ConstantFP::get(ty, 1);
289 }
290 return ::builder->CreateFDiv(one, x);
291 }
292
293 llvm::Value *lowerRSQRT(llvm::Value *x)
294 {
295 return lowerRCP(lowerSQRT(x));
296 }
297
298 llvm::Value *lowerVectorShl(llvm::Value *x, uint64_t scalarY)
299 {
300 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
301 llvm::Value *y = llvm::ConstantVector::getSplat(
302 ty->getNumElements(),
303 llvm::ConstantInt::get(ty->getElementType(), scalarY));
304 return ::builder->CreateShl(x, y);
305 }
306
307 llvm::Value *lowerVectorAShr(llvm::Value *x, uint64_t scalarY)
308 {
309 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
310 llvm::Value *y = llvm::ConstantVector::getSplat(
311 ty->getNumElements(),
312 llvm::ConstantInt::get(ty->getElementType(), scalarY));
313 return ::builder->CreateAShr(x, y);
314 }
315
316 llvm::Value *lowerVectorLShr(llvm::Value *x, uint64_t scalarY)
317 {
318 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
319 llvm::Value *y = llvm::ConstantVector::getSplat(
320 ty->getNumElements(),
321 llvm::ConstantInt::get(ty->getElementType(), scalarY));
322 return ::builder->CreateLShr(x, y);
323 }
324
325 llvm::Value *lowerMulAdd(llvm::Value *x, llvm::Value *y)
326 {
327 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
328 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
329
330 llvm::Value *extX = ::builder->CreateSExt(x, extTy);
331 llvm::Value *extY = ::builder->CreateSExt(y, extTy);
332 llvm::Value *mult = ::builder->CreateMul(extX, extY);
333
334 llvm::Value *undef = llvm::UndefValue::get(extTy);
335
336 llvm::SmallVector<uint32_t, 16> evenIdx;
337 llvm::SmallVector<uint32_t, 16> oddIdx;
338 for (uint64_t i = 0, n = ty->getNumElements(); i < n; i += 2)
339 {
340 evenIdx.push_back(i);
341 oddIdx.push_back(i + 1);
342 }
343
344 llvm::Value *lhs = ::builder->CreateShuffleVector(mult, undef, evenIdx);
345 llvm::Value *rhs = ::builder->CreateShuffleVector(mult, undef, oddIdx);
346 return ::builder->CreateAdd(lhs, rhs);
347 }
348
Logan Chiene3191012018-08-24 22:01:50 +0800349 llvm::Value *lowerPack(llvm::Value *x, llvm::Value *y, bool isSigned)
350 {
351 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(x->getType());
352 llvm::VectorType *dstTy = llvm::VectorType::getTruncatedElementVectorType(srcTy);
353
354 llvm::IntegerType *dstElemTy =
355 llvm::cast<llvm::IntegerType>(dstTy->getElementType());
356
357 uint64_t truncNumBits = dstElemTy->getIntegerBitWidth();
Ben Claytoneb50d252019-04-15 13:50:01 -0400358 ASSERT_MSG(truncNumBits < 64, "shift 64 must be handled separately. truncNumBits: %d", int(truncNumBits));
Logan Chiene3191012018-08-24 22:01:50 +0800359 llvm::Constant *max, *min;
360 if (isSigned)
361 {
362 max = llvm::ConstantInt::get(srcTy, (1LL << (truncNumBits - 1)) - 1, true);
363 min = llvm::ConstantInt::get(srcTy, (-1LL << (truncNumBits - 1)), true);
364 }
365 else
366 {
367 max = llvm::ConstantInt::get(srcTy, (1ULL << truncNumBits) - 1, false);
368 min = llvm::ConstantInt::get(srcTy, 0, false);
369 }
370
371 x = lowerPMINMAX(x, min, llvm::ICmpInst::ICMP_SGT);
372 x = lowerPMINMAX(x, max, llvm::ICmpInst::ICMP_SLT);
373 y = lowerPMINMAX(y, min, llvm::ICmpInst::ICMP_SGT);
374 y = lowerPMINMAX(y, max, llvm::ICmpInst::ICMP_SLT);
375
376 x = ::builder->CreateTrunc(x, dstTy);
377 y = ::builder->CreateTrunc(y, dstTy);
378
379 llvm::SmallVector<uint32_t, 16> index(srcTy->getNumElements() * 2);
380 std::iota(index.begin(), index.end(), 0);
381
382 return ::builder->CreateShuffleVector(x, y, index);
383 }
384
385 llvm::Value *lowerSignMask(llvm::Value *x, llvm::Type *retTy)
386 {
387 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
388 llvm::Constant *zero = llvm::ConstantInt::get(ty, 0);
389 llvm::Value *cmp = ::builder->CreateICmpSLT(x, zero);
390
391 llvm::Value *ret = ::builder->CreateZExt(
392 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
393 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
394 {
395 llvm::Value *elem = ::builder->CreateZExt(
396 ::builder->CreateExtractElement(cmp, i), retTy);
397 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
398 }
399 return ret;
400 }
401
402 llvm::Value *lowerFPSignMask(llvm::Value *x, llvm::Type *retTy)
403 {
404 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
405 llvm::Constant *zero = llvm::ConstantFP::get(ty, 0);
406 llvm::Value *cmp = ::builder->CreateFCmpULT(x, zero);
407
408 llvm::Value *ret = ::builder->CreateZExt(
409 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
410 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
411 {
412 llvm::Value *elem = ::builder->CreateZExt(
413 ::builder->CreateExtractElement(cmp, i), retTy);
414 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
415 }
416 return ret;
417 }
418#endif // !defined(__i386__) && !defined(__x86_64__)
Chris Forbese86b6dc2019-03-01 09:08:47 -0800419
420 llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext)
421 {
422 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
423 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
424
425 llvm::Value *extX, *extY;
426 if (sext)
427 {
428 extX = ::builder->CreateSExt(x, extTy);
429 extY = ::builder->CreateSExt(y, extTy);
430 }
431 else
432 {
433 extX = ::builder->CreateZExt(x, extTy);
434 extY = ::builder->CreateZExt(y, extTy);
435 }
436
437 llvm::Value *mult = ::builder->CreateMul(extX, extY);
438
439 llvm::IntegerType *intTy = llvm::cast<llvm::IntegerType>(ty->getElementType());
440 llvm::Value *mulh = ::builder->CreateAShr(mult, intTy->getBitWidth());
441 return ::builder->CreateTrunc(mulh, ty);
442 }
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400443}
444
Nicolas Capens48461502018-08-06 14:20:45 -0400445namespace rr
John Bauman89401822014-05-06 15:04:28 -0400446{
Ben Claytonc7904162019-04-17 17:35:48 -0400447 const Capabilities Caps =
448 {
449 true, // CallSupported
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100450 true, // CoroutinesSupported
Ben Claytonc7904162019-04-17 17:35:48 -0400451 };
452
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400453 static std::memory_order atomicOrdering(llvm::AtomicOrdering memoryOrder)
454 {
455 switch(memoryOrder)
456 {
457 case llvm::AtomicOrdering::Monotonic: return std::memory_order_relaxed; // https://llvm.org/docs/Atomics.html#monotonic
458 case llvm::AtomicOrdering::Acquire: return std::memory_order_acquire;
459 case llvm::AtomicOrdering::Release: return std::memory_order_release;
460 case llvm::AtomicOrdering::AcquireRelease: return std::memory_order_acq_rel;
461 case llvm::AtomicOrdering::SequentiallyConsistent: return std::memory_order_seq_cst;
462 default:
Ben Claytonfb280672019-04-25 11:16:15 +0100463 UNREACHABLE("memoryOrder: %d", int(memoryOrder));
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400464 return std::memory_order_acq_rel;
465 }
466 }
467
468 static llvm::AtomicOrdering atomicOrdering(bool atomic, std::memory_order memoryOrder)
469 {
470 if(!atomic)
471 {
472 return llvm::AtomicOrdering::NotAtomic;
473 }
474
475 switch(memoryOrder)
476 {
477 case std::memory_order_relaxed: return llvm::AtomicOrdering::Monotonic; // https://llvm.org/docs/Atomics.html#monotonic
478 case std::memory_order_consume: return llvm::AtomicOrdering::Acquire; // https://llvm.org/docs/Atomics.html#acquire: "It should also be used for C++11/C11 memory_order_consume."
479 case std::memory_order_acquire: return llvm::AtomicOrdering::Acquire;
480 case std::memory_order_release: return llvm::AtomicOrdering::Release;
481 case std::memory_order_acq_rel: return llvm::AtomicOrdering::AcquireRelease;
482 case std::memory_order_seq_cst: return llvm::AtomicOrdering::SequentiallyConsistent;
483 default:
484 UNREACHABLE("memoryOrder: %d", int(memoryOrder));
485 return llvm::AtomicOrdering::AcquireRelease;
486 }
487 }
488
489 template <typename T>
490 static void atomicLoad(void *ptr, void *ret, llvm::AtomicOrdering ordering)
491 {
492 *reinterpret_cast<T*>(ret) = std::atomic_load_explicit<T>(reinterpret_cast<std::atomic<T>*>(ptr), atomicOrdering(ordering));
493 }
494
495 template <typename T>
496 static void atomicStore(void *ptr, void *val, llvm::AtomicOrdering ordering)
497 {
498 std::atomic_store_explicit<T>(reinterpret_cast<std::atomic<T>*>(ptr), *reinterpret_cast<T*>(val), atomicOrdering(ordering));
499 }
500
Logan Chien40a60052018-09-26 19:03:53 +0800501 class ExternalFunctionSymbolResolver
502 {
503 private:
504 using FunctionMap = std::unordered_map<std::string, void *>;
505 FunctionMap func_;
506
507 public:
508 ExternalFunctionSymbolResolver()
509 {
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400510 struct Atomic
511 {
512 static void load(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
513 {
514 switch (size)
515 {
516 case 1: atomicLoad<uint8_t>(ptr, ret, ordering); break;
517 case 2: atomicLoad<uint16_t>(ptr, ret, ordering); break;
518 case 4: atomicLoad<uint32_t>(ptr, ret, ordering); break;
519 case 8: atomicLoad<uint64_t>(ptr, ret, ordering); break;
520 default:
521 UNIMPLEMENTED("Atomic::load(size: %d)", int(size));
522 }
523 }
524 static void store(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
525 {
526 switch (size)
527 {
528 case 1: atomicStore<uint8_t>(ptr, ret, ordering); break;
529 case 2: atomicStore<uint16_t>(ptr, ret, ordering); break;
530 case 4: atomicStore<uint32_t>(ptr, ret, ordering); break;
531 case 8: atomicStore<uint64_t>(ptr, ret, ordering); break;
532 default:
533 UNIMPLEMENTED("Atomic::store(size: %d)", int(size));
534 }
535 }
536 };
Chris Forbes9283b252019-06-17 09:44:28 -0700537 struct F
538 {
539 static void nop() {}
540 static void neverCalled() { UNREACHABLE("Should never be called"); }
541
542#ifdef __ANDROID__
543 // forwarders since we can't take address of builtins
544 static void sync_synchronize() { __sync_synchronize(); }
545 static uint32_t sync_fetch_and_add_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_add_4(ptr, val); }
546#endif
547 };
Ben Claytonac07ed82019-03-26 14:17:41 +0000548
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400549 func_.emplace("nop", reinterpret_cast<void*>(F::nop));
Logan Chien40a60052018-09-26 19:03:53 +0800550 func_.emplace("floorf", reinterpret_cast<void*>(floorf));
Logan Chien83fc07a2018-09-26 22:14:00 +0800551 func_.emplace("nearbyintf", reinterpret_cast<void*>(nearbyintf));
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800552 func_.emplace("truncf", reinterpret_cast<void*>(truncf));
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000553 func_.emplace("printf", reinterpret_cast<void*>(printf));
554 func_.emplace("puts", reinterpret_cast<void*>(puts));
Chris Forbes1a4c7122019-03-15 14:50:47 -0700555 func_.emplace("fmodf", reinterpret_cast<void*>(fmodf));
Ben Claytona2c8b772019-04-09 13:42:36 -0400556 func_.emplace("sinf", reinterpret_cast<void*>(sinf));
Ben Clayton1b6f8c72019-04-09 13:47:43 -0400557 func_.emplace("cosf", reinterpret_cast<void*>(cosf));
Ben Claytonf9350d72019-04-09 14:19:02 -0400558 func_.emplace("asinf", reinterpret_cast<void*>(asinf));
Ben Claytoneafae472019-04-09 14:22:38 -0400559 func_.emplace("acosf", reinterpret_cast<void*>(acosf));
Ben Clayton749b4e02019-04-09 14:27:43 -0400560 func_.emplace("atanf", reinterpret_cast<void*>(atanf));
Ben Claytond9636972019-04-09 15:09:54 -0400561 func_.emplace("sinhf", reinterpret_cast<void*>(sinhf));
Ben Clayton900ea2c2019-04-09 15:25:36 -0400562 func_.emplace("coshf", reinterpret_cast<void*>(coshf));
Ben Clayton3928bd92019-04-09 15:27:41 -0400563 func_.emplace("tanhf", reinterpret_cast<void*>(tanhf));
Ben Claytonf6d77ab2019-04-09 15:30:04 -0400564 func_.emplace("asinhf", reinterpret_cast<void*>(asinhf));
Ben Clayton28ebcb02019-04-09 15:33:38 -0400565 func_.emplace("acoshf", reinterpret_cast<void*>(acoshf));
Ben Claytonfa6a5392019-04-09 15:35:24 -0400566 func_.emplace("atanhf", reinterpret_cast<void*>(atanhf));
Ben Claytona520c3e2019-04-09 15:43:45 -0400567 func_.emplace("atan2f", reinterpret_cast<void*>(atan2f));
Ben Claytonbfe94f02019-04-09 15:52:12 -0400568 func_.emplace("powf", reinterpret_cast<void*>(powf));
Ben Clayton242f0022019-04-09 16:00:53 -0400569 func_.emplace("expf", reinterpret_cast<void*>(expf));
Ben Clayton2c1da722019-04-09 16:03:03 -0400570 func_.emplace("logf", reinterpret_cast<void*>(logf));
Ben Claytonf40b56c2019-04-09 16:06:55 -0400571 func_.emplace("exp2f", reinterpret_cast<void*>(exp2f));
Ben Claytone17acfe2019-04-09 16:09:13 -0400572 func_.emplace("log2f", reinterpret_cast<void*>(log2f));
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400573 func_.emplace("atomic_load", reinterpret_cast<void*>(Atomic::load));
574 func_.emplace("atomic_store", reinterpret_cast<void*>(Atomic::store));
Ben Clayton14740062019-04-09 13:48:41 -0400575
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100576 // FIXME (b/119409619): use an allocator here so we can control all memory allocations
577 func_.emplace("coroutine_alloc_frame", reinterpret_cast<void*>(malloc));
578 func_.emplace("coroutine_free_frame", reinterpret_cast<void*>(free));
579
Ben Clayton14740062019-04-09 13:48:41 -0400580#ifdef __APPLE__
Ben Clayton14740062019-04-09 13:48:41 -0400581 func_.emplace("sincosf_stret", reinterpret_cast<void*>(__sincosf_stret));
582#elif defined(__linux__)
583 func_.emplace("sincosf", reinterpret_cast<void*>(sincosf));
584#endif // __APPLE__
Chris Forbes9283b252019-06-17 09:44:28 -0700585
586#ifdef __ANDROID__
587 func_.emplace("aeabi_unwind_cpp_pr0", reinterpret_cast<void*>(F::neverCalled));
588 func_.emplace("sync_synchronize", reinterpret_cast<void*>(F::sync_synchronize));
589 func_.emplace("sync_fetch_and_add_4", reinterpret_cast<void*>(F::sync_fetch_and_add_4));
590#endif
Logan Chien40a60052018-09-26 19:03:53 +0800591 }
592
593 void *findSymbol(const std::string &name) const
594 {
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000595 // Trim off any underscores from the start of the symbol. LLVM likes
596 // to append these on macOS.
597 const char* trimmed = name.c_str();
598 while (trimmed[0] == '_') { trimmed++; }
599
600 FunctionMap::const_iterator it = func_.find(trimmed);
Ben Claytoneb50d252019-04-15 13:50:01 -0400601 // Missing functions will likely make the module fail in exciting non-obvious ways.
602 ASSERT_MSG(it != func_.end(), "Missing external function: '%s'", name.c_str());
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000603 return it->second;
Logan Chien40a60052018-09-26 19:03:53 +0800604 }
605 };
606
Logan Chien0eedc8c2018-08-21 09:34:28 +0800607 class LLVMReactorJIT
608 {
609 private:
610 using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
611 using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
612
613 llvm::orc::ExecutionSession session;
Logan Chien40a60052018-09-26 19:03:53 +0800614 ExternalFunctionSymbolResolver externalSymbolResolver;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800615 std::shared_ptr<llvm::orc::SymbolResolver> resolver;
616 std::unique_ptr<llvm::TargetMachine> targetMachine;
617 const llvm::DataLayout dataLayout;
618 ObjLayer objLayer;
Ben Claytoncee3dff2019-05-22 12:01:22 +0100619 CompileLayer compileLayer; // guarded by mutex
620 std::mutex mutex;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800621 size_t emittedFunctionsNum;
622
623 public:
624 LLVMReactorJIT(const char *arch, const llvm::SmallVectorImpl<std::string>& mattrs,
625 const llvm::TargetOptions &targetOpts):
626 resolver(createLegacyLookupResolver(
627 session,
628 [this](const std::string &name) {
Logan Chien40a60052018-09-26 19:03:53 +0800629 void *func = externalSymbolResolver.findSymbol(name);
630 if (func != nullptr)
631 {
632 return llvm::JITSymbol(
633 reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
634 }
635
Logan Chien0eedc8c2018-08-21 09:34:28 +0800636 return objLayer.findSymbol(name, true);
637 },
638 [](llvm::Error err) {
639 if (err)
640 {
641 // TODO: Log the symbol resolution errors.
642 return;
643 }
644 })),
645 targetMachine(llvm::EngineBuilder()
Ben Claytonac07ed82019-03-26 14:17:41 +0000646#ifdef ENABLE_RR_DEBUG_INFO
647 .setOptLevel(llvm::CodeGenOpt::None)
648#endif // ENABLE_RR_DEBUG_INFO
Logan Chien0eedc8c2018-08-21 09:34:28 +0800649 .setMArch(arch)
650 .setMAttrs(mattrs)
651 .setTargetOptions(targetOpts)
652 .selectTarget()),
653 dataLayout(targetMachine->createDataLayout()),
654 objLayer(
655 session,
656 [this](llvm::orc::VModuleKey) {
657 return ObjLayer::Resources{
658 std::make_shared<llvm::SectionMemoryManager>(),
659 resolver};
Ben Claytonac07ed82019-03-26 14:17:41 +0000660 },
661 ObjLayer::NotifyLoadedFtor(),
662 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj, const llvm::RuntimeDyld::LoadedObjectInfo &L) {
663#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton90cb2602019-05-23 14:42:32 +0100664 DebugInfo::NotifyObjectEmitted(Obj, L);
Ben Claytonac07ed82019-03-26 14:17:41 +0000665#endif // ENABLE_RR_DEBUG_INFO
666 },
667 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj) {
668#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton90cb2602019-05-23 14:42:32 +0100669 DebugInfo::NotifyFreeingObject(Obj);
Ben Claytonac07ed82019-03-26 14:17:41 +0000670#endif // ENABLE_RR_DEBUG_INFO
671 }
672 ),
Logan Chien0eedc8c2018-08-21 09:34:28 +0800673 compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine)),
674 emittedFunctionsNum(0)
675 {
676 }
677
678 void startSession()
679 {
680 ::module = new llvm::Module("", *::context);
681 }
682
683 void endSession()
684 {
685 ::function = nullptr;
686 ::module = nullptr;
687 }
688
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100689 LLVMRoutine *acquireRoutine(llvm::Function **funcs, size_t count)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800690 {
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100691 std::vector<std::string> mangledNames(count);
692 for (size_t i = 0; i < count; i++)
693 {
694 auto func = funcs[i];
695 std::string name = "f" + llvm::Twine(emittedFunctionsNum++).str();
696 func->setName(name);
697 func->setLinkage(llvm::GlobalValue::ExternalLinkage);
698 func->setDoesNotThrow();
Logan Chien0eedc8c2018-08-21 09:34:28 +0800699
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100700 llvm::raw_string_ostream mangledNameStream(mangledNames[i]);
701 llvm::Mangler::getNameWithPrefix(mangledNameStream, name, dataLayout);
702 }
703
704 // Compile the module - after this the llvm::Functions will have
705 // been freed.
Logan Chien0eedc8c2018-08-21 09:34:28 +0800706 std::unique_ptr<llvm::Module> mod(::module);
707 ::module = nullptr;
708 mod->setDataLayout(dataLayout);
709
710 auto moduleKey = session.allocateVModule();
Ben Claytoncee3dff2019-05-22 12:01:22 +0100711
712 // Resolve the function symbols - needs to be performed under mutex lock.
713 std::vector<llvm::JITSymbol> symbols;
714 {
715 std::unique_lock<std::mutex> lock(mutex);
716 llvm::cantFail(compileLayer.addModule(moduleKey, std::move(mod)));
717 funcs = nullptr; // Now points to released memory.
718 for (size_t i = 0; i < count; i++)
719 {
720 symbols.push_back(compileLayer.findSymbolIn(moduleKey, mangledNames[i], false));
721 }
722 }
Logan Chien0eedc8c2018-08-21 09:34:28 +0800723
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100724 // Resolve the function addresses.
725 std::vector<void*> addresses(count);
726 for (size_t i = 0; i < count; i++)
Nicolas Capensadfbbcb2018-10-31 14:38:53 -0400727 {
Ben Claytoncee3dff2019-05-22 12:01:22 +0100728 if(auto expectAddr = symbols[i].getAddress())
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100729 {
Ben Claytoncee3dff2019-05-22 12:01:22 +0100730 addresses[i] = reinterpret_cast<void *>(static_cast<intptr_t>(expectAddr.get()));
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100731 }
Nicolas Capensadfbbcb2018-10-31 14:38:53 -0400732 }
733
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100734 return new LLVMRoutine(addresses.data(), count, releaseRoutineCallback, this, moduleKey);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800735 }
736
737 void optimize(llvm::Module *module)
738 {
Ben Claytonac07ed82019-03-26 14:17:41 +0000739#ifdef ENABLE_RR_DEBUG_INFO
740 if (debugInfo != nullptr)
741 {
742 return; // Don't optimize if we're generating debug info.
743 }
744#endif // ENABLE_RR_DEBUG_INFO
745
Logan Chien0eedc8c2018-08-21 09:34:28 +0800746 std::unique_ptr<llvm::legacy::PassManager> passManager(
747 new llvm::legacy::PassManager());
748
749 passManager->add(llvm::createSROAPass());
750
751 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
752 {
753 switch(optimization[pass])
754 {
755 case Disabled: break;
756 case CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
757 case LICM: passManager->add(llvm::createLICMPass()); break;
758 case AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
759 case GVN: passManager->add(llvm::createGVNPass()); break;
760 case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
761 case Reassociate: passManager->add(llvm::createReassociatePass()); break;
762 case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
763 case SCCP: passManager->add(llvm::createSCCPPass()); break;
764 case ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break;
765 default:
Ben Claytoneb50d252019-04-15 13:50:01 -0400766 UNREACHABLE("optimization[pass]: %d, pass: %d", int(optimization[pass]), int(pass));
Logan Chien0eedc8c2018-08-21 09:34:28 +0800767 }
768 }
769
770 passManager->run(*::module);
771 }
772
773 private:
774 void releaseRoutineModule(llvm::orc::VModuleKey moduleKey)
775 {
Ben Claytoncee3dff2019-05-22 12:01:22 +0100776 std::unique_lock<std::mutex> lock(mutex);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800777 llvm::cantFail(compileLayer.removeModule(moduleKey));
778 }
779
780 static void releaseRoutineCallback(LLVMReactorJIT *jit, uint64_t moduleKey)
781 {
782 jit->releaseRoutineModule(moduleKey);
783 }
784 };
Logan Chien52cde602018-09-03 19:37:57 +0800785
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400786 Optimization optimization[10] = {InstructionCombining, Disabled};
John Bauman89401822014-05-06 15:04:28 -0400787
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500788 // The abstract Type* types are implemented as LLVM types, except that
789 // 64-bit vectors are emulated using 128-bit ones to avoid use of MMX in x86
790 // and VFP in ARM, and eliminate the overhead of converting them to explicit
791 // 128-bit ones. LLVM types are pointers, so we can represent emulated types
792 // as abstract pointers with small enum values.
793 enum InternalType : uintptr_t
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400794 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500795 // Emulated types:
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400796 Type_v2i32,
797 Type_v4i16,
798 Type_v2i16,
799 Type_v8i8,
800 Type_v4i8,
801 Type_v2f32,
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500802 EmulatedTypeCount,
803 // Returned by asInternalType() to indicate that the abstract Type*
804 // should be interpreted as LLVM type pointer:
805 Type_LLVM
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400806 };
807
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500808 inline InternalType asInternalType(Type *type)
809 {
810 InternalType t = static_cast<InternalType>(reinterpret_cast<uintptr_t>(type));
811 return (t < EmulatedTypeCount) ? t : Type_LLVM;
812 }
813
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400814 llvm::Type *T(Type *t)
815 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500816 // Use 128-bit vectors to implement logically shorter ones.
817 switch(asInternalType(t))
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400818 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500819 case Type_v2i32: return T(Int4::getType());
820 case Type_v4i16: return T(Short8::getType());
821 case Type_v2i16: return T(Short8::getType());
822 case Type_v8i8: return T(Byte16::getType());
823 case Type_v4i8: return T(Byte16::getType());
824 case Type_v2f32: return T(Float4::getType());
825 case Type_LLVM: return reinterpret_cast<llvm::Type*>(t);
Ben Claytoneb50d252019-04-15 13:50:01 -0400826 default:
827 UNREACHABLE("asInternalType(t): %d", int(asInternalType(t)));
828 return nullptr;
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400829 }
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400830 }
831
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500832 Type *T(InternalType t)
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400833 {
834 return reinterpret_cast<Type*>(t);
835 }
836
Nicolas Capensac230122016-09-20 14:30:06 -0400837 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
838 {
839 return reinterpret_cast<std::vector<llvm::Type*>&>(t);
840 }
841
Logan Chien191b3052018-08-31 16:57:15 +0800842 inline llvm::BasicBlock *B(BasicBlock *t)
843 {
844 return reinterpret_cast<llvm::BasicBlock*>(t);
845 }
846
Nicolas Capensc8b67a42016-09-25 15:02:52 -0400847 inline BasicBlock *B(llvm::BasicBlock *t)
848 {
849 return reinterpret_cast<BasicBlock*>(t);
850 }
851
Nicolas Capens01a97962017-07-28 17:30:51 -0400852 static size_t typeSize(Type *type)
853 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500854 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -0400855 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500856 case Type_v2i32: return 8;
857 case Type_v4i16: return 8;
858 case Type_v2i16: return 4;
859 case Type_v8i8: return 8;
860 case Type_v4i8: return 4;
861 case Type_v2f32: return 8;
862 case Type_LLVM:
Nicolas Capens01a97962017-07-28 17:30:51 -0400863 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500864 llvm::Type *t = T(type);
Nicolas Capens01a97962017-07-28 17:30:51 -0400865
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500866 if(t->isPointerTy())
867 {
868 return sizeof(void*);
869 }
870
871 // At this point we should only have LLVM 'primitive' types.
872 unsigned int bits = t->getPrimitiveSizeInBits();
Ben Claytoneb50d252019-04-15 13:50:01 -0400873 ASSERT_MSG(bits != 0, "bits: %d", int(bits));
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500874
875 // TODO(capn): Booleans are 1 bit integers in LLVM's SSA type system,
876 // but are typically stored as one byte. The DataLayout structure should
877 // be used here and many other places if this assumption fails.
878 return (bits + 7) / 8;
879 }
880 break;
881 default:
Ben Claytoneb50d252019-04-15 13:50:01 -0400882 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500883 return 0;
884 }
Nicolas Capens01a97962017-07-28 17:30:51 -0400885 }
886
Nicolas Capens69674fb2017-09-01 11:08:44 -0400887 static unsigned int elementCount(Type *type)
888 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500889 switch(asInternalType(type))
Nicolas Capens69674fb2017-09-01 11:08:44 -0400890 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500891 case Type_v2i32: return 2;
892 case Type_v4i16: return 4;
893 case Type_v2i16: return 2;
894 case Type_v8i8: return 8;
895 case Type_v4i8: return 4;
896 case Type_v2f32: return 2;
897 case Type_LLVM: return llvm::cast<llvm::VectorType>(T(type))->getNumElements();
Ben Claytoneb50d252019-04-15 13:50:01 -0400898 default:
899 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
900 return 0;
Nicolas Capens69674fb2017-09-01 11:08:44 -0400901 }
Nicolas Capens69674fb2017-09-01 11:08:44 -0400902 }
903
Ben Claytonea38f952019-06-17 13:56:56 +0100904 static ::llvm::Function* createFunction(const char *name, ::llvm::Type *retTy, const std::vector<::llvm::Type*> &params)
905 {
906 llvm::FunctionType *functionType = llvm::FunctionType::get(retTy, params, false);
907 auto func = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, name, ::module);
908 func->setDoesNotThrow();
909 func->setCallingConv(llvm::CallingConv::C);
910
911 #if defined(_WIN32)
912 // FIXME(capn):
913 // On Windows, stack memory is committed in increments of 4 kB pages, with the last page
914 // having a trap which allows the OS to grow the stack. For functions with a stack frame
915 // larger than 4 kB this can cause an issue when a variable is accessed beyond the guard
916 // page. Therefore the compiler emits a call to __chkstk in the function prolog to probe
917 // the stack and ensure all pages have been committed. This is currently broken in LLVM
918 // JIT, but we can prevent emitting the stack probe call:
919 func->addFnAttr("stack-probe-size", "1048576");
920 #endif
921
922 return func;
923 }
924
John Bauman89401822014-05-06 15:04:28 -0400925 Nucleus::Nucleus()
926 {
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400927 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
Nicolas Capensb7ea9842015-04-01 10:54:59 -0400928
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400929 llvm::InitializeNativeTarget();
Logan Chien0eedc8c2018-08-21 09:34:28 +0800930 llvm::InitializeNativeTargetAsmPrinter();
931 llvm::InitializeNativeTargetAsmParser();
Logan Chien0eedc8c2018-08-21 09:34:28 +0800932
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400933 if(!::context)
John Bauman89401822014-05-06 15:04:28 -0400934 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400935 ::context = new llvm::LLVMContext();
John Bauman89401822014-05-06 15:04:28 -0400936 }
937
John Bauman89401822014-05-06 15:04:28 -0400938 #if defined(__x86_64__)
Logan Chien52cde602018-09-03 19:37:57 +0800939 static const char arch[] = "x86-64";
Logan Chiene3191012018-08-24 22:01:50 +0800940 #elif defined(__i386__)
Logan Chien52cde602018-09-03 19:37:57 +0800941 static const char arch[] = "x86";
Logan Chiene3191012018-08-24 22:01:50 +0800942 #elif defined(__aarch64__)
943 static const char arch[] = "arm64";
944 #elif defined(__arm__)
945 static const char arch[] = "arm";
Gordana Cmiljanovic082dfec2018-10-19 11:36:15 +0200946 #elif defined(__mips__)
Gordana Cmiljanovic20622c02018-11-05 15:00:11 +0100947 #if defined(__mips64)
948 static const char arch[] = "mips64el";
949 #else
950 static const char arch[] = "mipsel";
951 #endif
Colin Samples8fd53302019-06-13 09:57:44 -0400952 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
953 static const char arch[] = "ppc64le";
Logan Chiene3191012018-08-24 22:01:50 +0800954 #else
955 #error "unknown architecture"
John Bauman89401822014-05-06 15:04:28 -0400956 #endif
957
Ben Clayton0fc611f2019-04-18 11:23:27 -0400958 llvm::SmallVector<std::string, 8> mattrs;
959
960 llvm::StringMap<bool> features;
Colin Samples8fd53302019-06-13 09:57:44 -0400961
Ben Clayton0fc611f2019-04-18 11:23:27 -0400962 bool ok = llvm::sys::getHostCPUFeatures(features);
Colin Samples8fd53302019-06-13 09:57:44 -0400963
964 #if defined(__i386__) || defined(__x86_64__) || \
965 (defined(__linux__) && (defined(__arm__) || defined(__aarch64__)))
Ben Clayton0fc611f2019-04-18 11:23:27 -0400966 ASSERT_MSG(ok, "llvm::sys::getHostCPUFeatures returned false");
Colin Samples8fd53302019-06-13 09:57:44 -0400967 #else
968 (void) ok; // getHostCPUFeatures always returns false on other platforms
969 #endif
970
Ben Clayton0fc611f2019-04-18 11:23:27 -0400971 for (auto &feature : features)
972 {
973 if (feature.second) { mattrs.push_back(feature.first()); }
974 }
975
976#if 0
Logan Chiene3191012018-08-24 22:01:50 +0800977#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800978 mattrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx");
979 mattrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov");
980 mattrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse");
981 mattrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2");
982 mattrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3");
983 mattrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3");
Logan Chien0eedc8c2018-08-21 09:34:28 +0800984 mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse4.1" : "-sse4.1");
Logan Chiene3191012018-08-24 22:01:50 +0800985#elif defined(__arm__)
986#if __ARM_ARCH >= 8
987 mattrs.push_back("+armv8-a");
988#else
989 // armv7-a requires compiler-rt routines; otherwise, compiled kernel
990 // might fail to link.
991#endif
992#endif
Ben Clayton0fc611f2019-04-18 11:23:27 -0400993#endif
John Bauman89401822014-05-06 15:04:28 -0400994
Logan Chien0eedc8c2018-08-21 09:34:28 +0800995 llvm::TargetOptions targetOpts;
Nicolas Capensa7643812018-09-13 14:20:06 -0400996 targetOpts.UnsafeFPMath = false;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800997 // targetOpts.NoInfsFPMath = true;
998 // targetOpts.NoNaNsFPMath = true;
Logan Chien52cde602018-09-03 19:37:57 +0800999
1000 if(!::reactorJIT)
1001 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08001002 ::reactorJIT = new LLVMReactorJIT(arch, mattrs, targetOpts);
Logan Chien52cde602018-09-03 19:37:57 +08001003 }
1004
1005 ::reactorJIT->startSession();
John Bauman89401822014-05-06 15:04:28 -04001006
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001007 if(!::builder)
John Bauman89401822014-05-06 15:04:28 -04001008 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001009 ::builder = new llvm::IRBuilder<>(*::context);
John Bauman89401822014-05-06 15:04:28 -04001010 }
1011 }
1012
1013 Nucleus::~Nucleus()
1014 {
Ben Clayton90cb2602019-05-23 14:42:32 +01001015#ifdef ENABLE_RR_DEBUG_INFO
1016 debugInfo.reset(nullptr);
1017#endif // ENABLE_RR_DEBUG_INFO
1018
Logan Chien52cde602018-09-03 19:37:57 +08001019 ::reactorJIT->endSession();
Nicolas Capensb7ea9842015-04-01 10:54:59 -04001020
Nicolas Capens3bbc5e12016-09-27 10:49:52 -04001021 ::codegenMutex.unlock();
John Bauman89401822014-05-06 15:04:28 -04001022 }
1023
Chris Forbes878d4b02019-01-21 10:48:35 -08001024 Routine *Nucleus::acquireRoutine(const char *name, bool runOptimizations)
John Bauman89401822014-05-06 15:04:28 -04001025 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001026 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
John Bauman19bac1e2014-05-06 15:23:49 -04001027 {
Nicolas Capensac230122016-09-20 14:30:06 -04001028 llvm::Type *type = ::function->getReturnType();
John Bauman19bac1e2014-05-06 15:23:49 -04001029
1030 if(type->isVoidTy())
1031 {
1032 createRetVoid();
1033 }
1034 else
1035 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001036 createRet(V(llvm::UndefValue::get(type)));
John Bauman19bac1e2014-05-06 15:23:49 -04001037 }
1038 }
John Bauman89401822014-05-06 15:04:28 -04001039
Ben Clayton97c13ad2019-05-02 11:59:30 +01001040#ifdef ENABLE_RR_DEBUG_INFO
1041 if (debugInfo != nullptr)
1042 {
1043 debugInfo->Finalize();
1044 }
1045#endif // ENABLE_RR_DEBUG_INFO
1046
John Bauman89401822014-05-06 15:04:28 -04001047 if(false)
1048 {
Ben Clayton5875be52019-04-11 14:57:40 -04001049 std::error_code error;
1050 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001051 ::module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -04001052 }
1053
Ben Clayton4b944652019-05-02 10:56:19 +01001054 // FIXME: Disable for release builds once heavy development is over.
1055 bool verifyIR = true;
1056 if(verifyIR)
1057 {
1058 llvm::legacy::PassManager pm;
1059 pm.add(llvm::createVerifierPass());
1060 pm.run(*::module);
1061 }
1062
John Bauman89401822014-05-06 15:04:28 -04001063 if(runOptimizations)
1064 {
1065 optimize();
1066 }
1067
1068 if(false)
1069 {
Ben Clayton5875be52019-04-11 14:57:40 -04001070 std::error_code error;
1071 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001072 ::module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -04001073 }
1074
Ben Clayton1c82c7b2019-04-30 12:49:27 +01001075 LLVMRoutine *routine = ::reactorJIT->acquireRoutine(&::function, 1);
John Bauman89401822014-05-06 15:04:28 -04001076
John Bauman89401822014-05-06 15:04:28 -04001077 return routine;
1078 }
1079
1080 void Nucleus::optimize()
1081 {
Logan Chien52cde602018-09-03 19:37:57 +08001082 ::reactorJIT->optimize(::module);
John Bauman89401822014-05-06 15:04:28 -04001083 }
1084
John Bauman19bac1e2014-05-06 15:23:49 -04001085 Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
John Bauman89401822014-05-06 15:04:28 -04001086 {
1087 // Need to allocate it in the entry block for mem2reg to work
Nicolas Capensc8b67a42016-09-25 15:02:52 -04001088 llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
John Bauman89401822014-05-06 15:04:28 -04001089
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001090 llvm::Instruction *declaration;
John Bauman89401822014-05-06 15:04:28 -04001091
1092 if(arraySize)
1093 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08001094 declaration = new llvm::AllocaInst(T(type), 0, V(Nucleus::createConstantInt(arraySize)));
John Bauman89401822014-05-06 15:04:28 -04001095 }
1096 else
1097 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08001098 declaration = new llvm::AllocaInst(T(type), 0, (llvm::Value*)nullptr);
John Bauman89401822014-05-06 15:04:28 -04001099 }
1100
1101 entryBlock.getInstList().push_front(declaration);
1102
Nicolas Capens19336542016-09-26 10:32:29 -04001103 return V(declaration);
John Bauman89401822014-05-06 15:04:28 -04001104 }
1105
1106 BasicBlock *Nucleus::createBasicBlock()
1107 {
Logan Chien191b3052018-08-31 16:57:15 +08001108 return B(llvm::BasicBlock::Create(*::context, "", ::function));
John Bauman89401822014-05-06 15:04:28 -04001109 }
1110
1111 BasicBlock *Nucleus::getInsertBlock()
1112 {
Nicolas Capensc8b67a42016-09-25 15:02:52 -04001113 return B(::builder->GetInsertBlock());
John Bauman89401822014-05-06 15:04:28 -04001114 }
1115
1116 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1117 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001118 // assert(::builder->GetInsertBlock()->back().isTerminator());
Nicolas Capens0192d152019-03-27 14:46:07 -04001119
1120 Variable::materializeAll();
1121
Logan Chien191b3052018-08-31 16:57:15 +08001122 ::builder->SetInsertPoint(B(basicBlock));
John Bauman89401822014-05-06 15:04:28 -04001123 }
1124
Nicolas Capensac230122016-09-20 14:30:06 -04001125 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
John Bauman89401822014-05-06 15:04:28 -04001126 {
Ben Claytonea38f952019-06-17 13:56:56 +01001127 ::function = rr::createFunction("", T(ReturnType), T(Params));
Nicolas Capens52551d12018-09-13 14:30:56 -04001128
Ben Claytonac07ed82019-03-26 14:17:41 +00001129#ifdef ENABLE_RR_DEBUG_INFO
1130 ::debugInfo = std::unique_ptr<DebugInfo>(new DebugInfo(::builder, ::context, ::module, ::function));
1131#endif // ENABLE_RR_DEBUG_INFO
1132
Logan Chien191b3052018-08-31 16:57:15 +08001133 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "", ::function));
John Bauman89401822014-05-06 15:04:28 -04001134 }
1135
Nicolas Capens19336542016-09-26 10:32:29 -04001136 Value *Nucleus::getArgument(unsigned int index)
John Bauman89401822014-05-06 15:04:28 -04001137 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001138 llvm::Function::arg_iterator args = ::function->arg_begin();
John Bauman89401822014-05-06 15:04:28 -04001139
1140 while(index)
1141 {
1142 args++;
1143 index--;
1144 }
1145
Nicolas Capens19336542016-09-26 10:32:29 -04001146 return V(&*args);
John Bauman89401822014-05-06 15:04:28 -04001147 }
1148
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001149 void Nucleus::createRetVoid()
John Bauman89401822014-05-06 15:04:28 -04001150 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001151 RR_DEBUG_INFO_UPDATE_LOC();
1152
Ben Claytonc958b172019-05-02 12:20:59 +01001153 ASSERT_MSG(::function->getReturnType() == T(Void::getType()), "Return type mismatch");
1154
Nicolas Capens0192d152019-03-27 14:46:07 -04001155 // Code generated after this point is unreachable, so any variables
1156 // being read can safely return an undefined value. We have to avoid
1157 // materializing variables after the terminator ret instruction.
1158 Variable::killUnmaterialized();
1159
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001160 ::builder->CreateRetVoid();
John Bauman89401822014-05-06 15:04:28 -04001161 }
1162
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001163 void Nucleus::createRet(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001164 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001165 RR_DEBUG_INFO_UPDATE_LOC();
1166
Ben Claytonc958b172019-05-02 12:20:59 +01001167 ASSERT_MSG(::function->getReturnType() == V(v)->getType(), "Return type mismatch");
1168
Nicolas Capens0192d152019-03-27 14:46:07 -04001169 // Code generated after this point is unreachable, so any variables
1170 // being read can safely return an undefined value. We have to avoid
1171 // materializing variables after the terminator ret instruction.
1172 Variable::killUnmaterialized();
1173
Logan Chien191b3052018-08-31 16:57:15 +08001174 ::builder->CreateRet(V(v));
John Bauman89401822014-05-06 15:04:28 -04001175 }
1176
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001177 void Nucleus::createBr(BasicBlock *dest)
John Bauman89401822014-05-06 15:04:28 -04001178 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001179 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens0192d152019-03-27 14:46:07 -04001180 Variable::materializeAll();
1181
Logan Chien191b3052018-08-31 16:57:15 +08001182 ::builder->CreateBr(B(dest));
John Bauman89401822014-05-06 15:04:28 -04001183 }
1184
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001185 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001186 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001187 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens0192d152019-03-27 14:46:07 -04001188 Variable::materializeAll();
Logan Chien191b3052018-08-31 16:57:15 +08001189 ::builder->CreateCondBr(V(cond), B(ifTrue), B(ifFalse));
John Bauman89401822014-05-06 15:04:28 -04001190 }
1191
1192 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1193 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001194 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001195 return V(::builder->CreateAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001196 }
1197
1198 Value *Nucleus::createSub(Value *lhs, Value *rhs)
1199 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001200 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001201 return V(::builder->CreateSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001202 }
1203
1204 Value *Nucleus::createMul(Value *lhs, Value *rhs)
1205 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001206 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001207 return V(::builder->CreateMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001208 }
1209
1210 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1211 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001212 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001213 return V(::builder->CreateUDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001214 }
1215
1216 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1217 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001218 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001219 return V(::builder->CreateSDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001220 }
1221
1222 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1223 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001224 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001225 return V(::builder->CreateFAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001226 }
1227
1228 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1229 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001230 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001231 return V(::builder->CreateFSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001232 }
1233
1234 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1235 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001236 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001237 return V(::builder->CreateFMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001238 }
1239
1240 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1241 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001242 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001243 return V(::builder->CreateFDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001244 }
1245
1246 Value *Nucleus::createURem(Value *lhs, Value *rhs)
1247 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001248 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001249 return V(::builder->CreateURem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001250 }
1251
1252 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1253 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001254 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001255 return V(::builder->CreateSRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001256 }
1257
1258 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1259 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001260 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001261 return V(::builder->CreateFRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001262 }
1263
1264 Value *Nucleus::createShl(Value *lhs, Value *rhs)
1265 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001266 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001267 return V(::builder->CreateShl(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001268 }
1269
1270 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1271 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001272 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001273 return V(::builder->CreateLShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001274 }
1275
1276 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1277 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001278 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001279 return V(::builder->CreateAShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001280 }
1281
1282 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1283 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001284 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001285 return V(::builder->CreateAnd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001286 }
1287
1288 Value *Nucleus::createOr(Value *lhs, Value *rhs)
1289 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001290 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001291 return V(::builder->CreateOr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001292 }
1293
1294 Value *Nucleus::createXor(Value *lhs, Value *rhs)
1295 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001296 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001297 return V(::builder->CreateXor(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001298 }
1299
Nicolas Capens19336542016-09-26 10:32:29 -04001300 Value *Nucleus::createNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001301 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001302 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001303 return V(::builder->CreateNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001304 }
1305
Nicolas Capens19336542016-09-26 10:32:29 -04001306 Value *Nucleus::createFNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001307 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001308 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001309 return V(::builder->CreateFNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001310 }
1311
Nicolas Capens19336542016-09-26 10:32:29 -04001312 Value *Nucleus::createNot(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001313 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001314 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001315 return V(::builder->CreateNot(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001316 }
1317
Nicolas Capens86509d92019-03-21 13:23:50 -04001318 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001319 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001320 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001321 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001322 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001323 case Type_v2i32:
1324 case Type_v4i16:
1325 case Type_v8i8:
1326 case Type_v2f32:
1327 return createBitCast(
1328 createInsertElement(
1329 V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))),
Nicolas Capens86509d92019-03-21 13:23:50 -04001330 createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment, atomic, memoryOrder),
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001331 0),
1332 type);
1333 case Type_v2i16:
1334 case Type_v4i8:
1335 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001336 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001337 Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
Nicolas Capens86509d92019-03-21 13:23:50 -04001338 Value *i = createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001339 i = createZExt(i, Long::getType());
1340 Value *v = createInsertElement(u, i, 0);
1341 return createBitCast(v, type);
Nicolas Capens01a97962017-07-28 17:30:51 -04001342 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001343 // Fallthrough to non-emulated case.
1344 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001345 {
Ben Clayton99e57192019-05-03 13:25:08 +01001346 auto elTy = T(type);
1347 ASSERT(V(ptr)->getType()->getContainedType(0) == elTy);
1348 if (atomic && !(elTy->isIntegerTy() || elTy->isPointerTy() || elTy->isFloatTy()))
1349 {
1350 // atomic load operand must have integer, pointer, or floating point type
1351 // Fall back to using:
1352 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1353 auto sizetTy = ::llvm::IntegerType::get(*::context, sizeof(size_t) * 8);
1354 auto intTy = ::llvm::IntegerType::get(*::context, sizeof(int) * 8);
1355 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1356 auto i8PtrTy = i8Ty->getPointerTo();
1357 auto voidTy = ::llvm::Type::getVoidTy(*::context);
1358 auto funcTy = ::llvm::FunctionType::get(voidTy, {sizetTy, i8PtrTy, i8PtrTy, intTy}, false);
1359 auto func = ::module->getOrInsertFunction("__atomic_load", funcTy);
1360 auto size = ::module->getDataLayout().getTypeStoreSize(elTy);
1361 auto out = allocateStackVariable(type);
1362 ::builder->CreateCall(func, {
1363 ::llvm::ConstantInt::get(sizetTy, size),
1364 ::builder->CreatePointerCast(V(ptr), i8PtrTy),
1365 ::builder->CreatePointerCast(V(out), i8PtrTy),
1366 ::llvm::ConstantInt::get(intTy, uint64_t(atomicOrdering(true, memoryOrder))),
1367 });
1368 return V(::builder->CreateLoad(V(out)));
1369 }
1370 else
1371 {
1372 auto load = new llvm::LoadInst(V(ptr), "", isVolatile, alignment);
1373 load->setAtomic(atomicOrdering(atomic, memoryOrder));
1374 return V(::builder->Insert(load));
1375 }
Nicolas Capens86509d92019-03-21 13:23:50 -04001376 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001377 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001378 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1379 return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001380 }
John Bauman89401822014-05-06 15:04:28 -04001381 }
1382
Nicolas Capens86509d92019-03-21 13:23:50 -04001383 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001384 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001385 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001386 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001387 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001388 case Type_v2i32:
1389 case Type_v4i16:
1390 case Type_v8i8:
1391 case Type_v2f32:
1392 createStore(
1393 createExtractElement(
1394 createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0),
1395 createBitCast(ptr, Pointer<Long>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001396 Long::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001397 return value;
1398 case Type_v2i16:
1399 case Type_v4i8:
1400 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001401 {
Logan Chien191b3052018-08-31 16:57:15 +08001402 createStore(
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001403 createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0),
1404 createBitCast(ptr, Pointer<Int>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001405 Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens01a97962017-07-28 17:30:51 -04001406 return value;
Nicolas Capens01a97962017-07-28 17:30:51 -04001407 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001408 // Fallthrough to non-emulated case.
1409 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001410 {
Ben Clayton99e57192019-05-03 13:25:08 +01001411 auto elTy = T(type);
1412 ASSERT(V(ptr)->getType()->getContainedType(0) == elTy);
1413 if (atomic && !(elTy->isIntegerTy() || elTy->isPointerTy() || elTy->isFloatTy()))
1414 {
1415 // atomic store operand must have integer, pointer, or floating point type
1416 // Fall back to using:
1417 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1418 auto sizetTy = ::llvm::IntegerType::get(*::context, sizeof(size_t) * 8);
1419 auto intTy = ::llvm::IntegerType::get(*::context, sizeof(int) * 8);
1420 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1421 auto i8PtrTy = i8Ty->getPointerTo();
1422 auto voidTy = ::llvm::Type::getVoidTy(*::context);
1423 auto funcTy = ::llvm::FunctionType::get(voidTy, {sizetTy, i8PtrTy, i8PtrTy, intTy}, false);
1424 auto func = ::module->getOrInsertFunction("__atomic_store", funcTy);
1425 auto size = ::module->getDataLayout().getTypeStoreSize(elTy);
1426 auto copy = allocateStackVariable(type);
1427 ::builder->CreateStore(V(value), V(copy));
1428 ::builder->CreateCall(func, {
1429 ::llvm::ConstantInt::get(sizetTy, size),
1430 ::builder->CreatePointerCast(V(ptr), i8PtrTy),
1431 ::builder->CreatePointerCast(V(copy), i8PtrTy),
1432 ::llvm::ConstantInt::get(intTy, uint64_t(atomicOrdering(true, memoryOrder))),
1433 });
1434 }
1435 else
1436 {
1437 auto store = ::builder->Insert(new llvm::StoreInst(V(value), V(ptr), isVolatile, alignment));
1438 store->setAtomic(atomicOrdering(atomic, memoryOrder));
1439 }
Nicolas Capens86509d92019-03-21 13:23:50 -04001440
1441 return value;
1442 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001443 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001444 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1445 return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001446 }
John Bauman89401822014-05-06 15:04:28 -04001447 }
1448
Ben Clayton0fc611f2019-04-18 11:23:27 -04001449 Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment)
1450 {
1451 ASSERT(V(base)->getType()->isPointerTy());
1452 ASSERT(V(offsets)->getType()->isVectorTy());
1453 ASSERT(V(mask)->getType()->isVectorTy());
1454
1455 auto numEls = V(mask)->getType()->getVectorNumElements();
1456 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
1457 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
1458 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1459 auto i8PtrTy = i8Ty->getPointerTo();
1460 auto elPtrTy = T(elTy)->getPointerTo();
1461 auto elVecTy = ::llvm::VectorType::get(T(elTy), numEls);
1462 auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
1463 auto i8Base = ::builder->CreatePointerCast(V(base), i8PtrTy);
1464 auto i8Ptrs = ::builder->CreateGEP(i8Base, V(offsets));
1465 auto elPtrs = ::builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
1466 auto i8Mask = ::builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
1467 auto passthrough = ::llvm::Constant::getNullValue(elVecTy);
1468 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
1469 auto func = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::masked_gather, { elVecTy, elPtrVecTy } );
1470 return V(::builder->CreateCall(func, { elPtrs, align, i8Mask, passthrough }));
1471 }
1472
1473 void Nucleus::createScatter(Value *base, Value *val, Value *offsets, Value *mask, unsigned int alignment)
1474 {
1475 ASSERT(V(base)->getType()->isPointerTy());
1476 ASSERT(V(val)->getType()->isVectorTy());
1477 ASSERT(V(offsets)->getType()->isVectorTy());
1478 ASSERT(V(mask)->getType()->isVectorTy());
1479
1480 auto numEls = V(mask)->getType()->getVectorNumElements();
1481 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
1482 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
1483 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
1484 auto i8PtrTy = i8Ty->getPointerTo();
1485 auto elVecTy = V(val)->getType();
1486 auto elTy = elVecTy->getVectorElementType();
1487 auto elPtrTy = elTy->getPointerTo();
1488 auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
1489 auto i8Base = ::builder->CreatePointerCast(V(base), i8PtrTy);
1490 auto i8Ptrs = ::builder->CreateGEP(i8Base, V(offsets));
1491 auto elPtrs = ::builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
1492 auto i8Mask = ::builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
1493 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
1494 auto func = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::masked_scatter, { elVecTy, elPtrVecTy } );
1495 ::builder->CreateCall(func, { V(val), elPtrs, align, i8Mask });
1496 }
1497
Ben Claytonb16c5862019-05-08 14:01:38 +01001498 void Nucleus::createFence(std::memory_order memoryOrder)
1499 {
1500 ::builder->CreateFence(atomicOrdering(true, memoryOrder));
1501 }
1502
Nicolas Capensd294def2017-01-26 17:44:37 -08001503 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
John Bauman89401822014-05-06 15:04:28 -04001504 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001505 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytoneb50d252019-04-15 13:50:01 -04001506 ASSERT(V(ptr)->getType()->getContainedType(0) == T(type));
Nicolas Capens01a97962017-07-28 17:30:51 -04001507 if(sizeof(void*) == 8)
Nicolas Capensd294def2017-01-26 17:44:37 -08001508 {
Ben Claytonb1243732019-02-27 23:56:18 +00001509 // LLVM manual: "When indexing into an array, pointer or vector,
1510 // integers of any width are allowed, and they are not required to
1511 // be constant. These integers are treated as signed values where
1512 // relevant."
1513 //
1514 // Thus if we want indexes to be treated as unsigned we have to
1515 // zero-extend them ourselves.
1516 //
1517 // Note that this is not because we want to address anywhere near
1518 // 4 GB of data. Instead this is important for performance because
1519 // x86 supports automatic zero-extending of 32-bit registers to
1520 // 64-bit. Thus when indexing into an array using a uint32 is
1521 // actually faster than an int32.
1522 index = unsignedIndex ?
1523 createZExt(index, Long::getType()) :
1524 createSExt(index, Long::getType());
Nicolas Capens01a97962017-07-28 17:30:51 -04001525 }
Ben Claytonb1243732019-02-27 23:56:18 +00001526
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001527 // For non-emulated types we can rely on LLVM's GEP to calculate the
1528 // effective address correctly.
1529 if(asInternalType(type) == Type_LLVM)
Nicolas Capens01a97962017-07-28 17:30:51 -04001530 {
Ben Claytonb1243732019-02-27 23:56:18 +00001531 return V(::builder->CreateGEP(V(ptr), V(index)));
Nicolas Capensd294def2017-01-26 17:44:37 -08001532 }
1533
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001534 // For emulated types we have to multiply the index by the intended
1535 // type size ourselves to obain the byte offset.
Ben Claytonb1243732019-02-27 23:56:18 +00001536 index = (sizeof(void*) == 8) ?
1537 createMul(index, createConstantLong((int64_t)typeSize(type))) :
1538 createMul(index, createConstantInt((int)typeSize(type)));
1539
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001540 // Cast to a byte pointer, apply the byte offset, and cast back to the
1541 // original pointer type.
Logan Chien191b3052018-08-31 16:57:15 +08001542 return createBitCast(
1543 V(::builder->CreateGEP(V(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0)))), V(index))),
1544 T(llvm::PointerType::get(T(type), 0)));
John Bauman89401822014-05-06 15:04:28 -04001545 }
1546
Chris Forbes17813932019-04-18 11:45:54 -07001547 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
John Bauman19bac1e2014-05-06 15:23:49 -04001548 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001549 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbes17813932019-04-18 11:45:54 -07001550 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1551 }
1552
Chris Forbes707ed992019-04-18 18:17:35 -07001553 Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1554 {
1555 RR_DEBUG_INFO_UPDATE_LOC();
1556 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Sub, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1557 }
1558
Chris Forbes17813932019-04-18 11:45:54 -07001559 Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1560 {
1561 RR_DEBUG_INFO_UPDATE_LOC();
1562 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::And, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1563 }
1564
1565 Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1566 {
1567 RR_DEBUG_INFO_UPDATE_LOC();
1568 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Or, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1569 }
1570
1571 Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1572 {
1573 RR_DEBUG_INFO_UPDATE_LOC();
1574 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xor, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1575 }
1576
1577 Value *Nucleus::createAtomicMin(Value *ptr, Value *value, std::memory_order memoryOrder)
1578 {
1579 RR_DEBUG_INFO_UPDATE_LOC();
1580 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Min, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1581 }
1582
1583 Value *Nucleus::createAtomicMax(Value *ptr, Value *value, std::memory_order memoryOrder)
1584 {
1585 RR_DEBUG_INFO_UPDATE_LOC();
1586 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Max, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1587 }
1588
Chris Forbesf31bdad2019-05-23 14:58:08 -07001589 Value *Nucleus::createAtomicUMin(Value *ptr, Value *value, std::memory_order memoryOrder)
1590 {
1591 RR_DEBUG_INFO_UPDATE_LOC();
1592 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::UMin, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1593 }
1594
1595 Value *Nucleus::createAtomicUMax(Value *ptr, Value *value, std::memory_order memoryOrder)
1596 {
1597 RR_DEBUG_INFO_UPDATE_LOC();
1598 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::UMax, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
1599 }
1600
1601
Chris Forbes17813932019-04-18 11:45:54 -07001602 Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1603 {
1604 RR_DEBUG_INFO_UPDATE_LOC();
1605 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
John Bauman19bac1e2014-05-06 15:23:49 -04001606 }
1607
Chris Forbesa16238d2019-04-18 16:31:54 -07001608 Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1609 {
1610 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbesc9ca99e2019-04-19 07:53:34 -07001611 // Note: AtomicCmpXchgInstruction returns a 2-member struct containing {result, success-flag}, not the result directly.
Chris Forbesa16238d2019-04-18 16:31:54 -07001612 return V(::builder->CreateExtractValue(
1613 ::builder->CreateAtomicCmpXchg(V(ptr), V(compare), V(value), atomicOrdering(true, memoryOrderEqual), atomicOrdering(true, memoryOrderUnequal)),
1614 llvm::ArrayRef<unsigned>(0u)));
1615 }
1616
Nicolas Capens19336542016-09-26 10:32:29 -04001617 Value *Nucleus::createTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001618 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001619 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001620 return V(::builder->CreateTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001621 }
1622
Nicolas Capens19336542016-09-26 10:32:29 -04001623 Value *Nucleus::createZExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001624 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001625 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001626 return V(::builder->CreateZExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001627 }
1628
Nicolas Capens19336542016-09-26 10:32:29 -04001629 Value *Nucleus::createSExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001630 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001631 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001632 return V(::builder->CreateSExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001633 }
1634
Nicolas Capens19336542016-09-26 10:32:29 -04001635 Value *Nucleus::createFPToSI(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001636 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001637 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001638 return V(::builder->CreateFPToSI(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001639 }
1640
Nicolas Capens19336542016-09-26 10:32:29 -04001641 Value *Nucleus::createSIToFP(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001642 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001643 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001644 return V(::builder->CreateSIToFP(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001645 }
1646
Nicolas Capens19336542016-09-26 10:32:29 -04001647 Value *Nucleus::createFPTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001648 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001649 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001650 return V(::builder->CreateFPTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001651 }
1652
Nicolas Capens19336542016-09-26 10:32:29 -04001653 Value *Nucleus::createFPExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001654 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001655 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001656 return V(::builder->CreateFPExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001657 }
1658
Nicolas Capens19336542016-09-26 10:32:29 -04001659 Value *Nucleus::createBitCast(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001660 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001661 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens01a97962017-07-28 17:30:51 -04001662 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1663 // support for casting between scalars and wide vectors. Emulate them by writing to the stack and
1664 // reading back as the destination type.
Logan Chien191b3052018-08-31 16:57:15 +08001665 if(!V(v)->getType()->isVectorTy() && T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001666 {
1667 Value *readAddress = allocateStackVariable(destType);
Logan Chien191b3052018-08-31 16:57:15 +08001668 Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(V(v)->getType(), 0)));
1669 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001670 return createLoad(readAddress, destType);
1671 }
Logan Chien191b3052018-08-31 16:57:15 +08001672 else if(V(v)->getType()->isVectorTy() && !T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001673 {
Logan Chien191b3052018-08-31 16:57:15 +08001674 Value *writeAddress = allocateStackVariable(T(V(v)->getType()));
1675 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001676 Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
1677 return createLoad(readAddress, destType);
1678 }
1679
Logan Chien191b3052018-08-31 16:57:15 +08001680 return V(::builder->CreateBitCast(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001681 }
1682
John Bauman89401822014-05-06 15:04:28 -04001683 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1684 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001685 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001686 return V(::builder->CreateICmpEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001687 }
1688
1689 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1690 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001691 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001692 return V(::builder->CreateICmpNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001693 }
1694
1695 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1696 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001697 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001698 return V(::builder->CreateICmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001699 }
1700
1701 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1702 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001703 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001704 return V(::builder->CreateICmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001705 }
1706
1707 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1708 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001709 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001710 return V(::builder->CreateICmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001711 }
1712
1713 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1714 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001715 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001716 return V(::builder->CreateICmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001717 }
1718
1719 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1720 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001721 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001722 return V(::builder->CreateICmpSGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001723 }
1724
1725 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1726 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001727 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001728 return V(::builder->CreateICmpSGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001729 }
1730
1731 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1732 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001733 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001734 return V(::builder->CreateICmpSLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001735 }
1736
1737 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1738 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001739 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001740 return V(::builder->CreateICmpSLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001741 }
1742
1743 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1744 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001745 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001746 return V(::builder->CreateFCmpOEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001747 }
1748
1749 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1750 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001751 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001752 return V(::builder->CreateFCmpOGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001753 }
1754
1755 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1756 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001757 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001758 return V(::builder->CreateFCmpOGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001759 }
1760
1761 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1762 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001763 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001764 return V(::builder->CreateFCmpOLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001765 }
1766
1767 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1768 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001769 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001770 return V(::builder->CreateFCmpOLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001771 }
1772
1773 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1774 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001775 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001776 return V(::builder->CreateFCmpONE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001777 }
1778
1779 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1780 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001781 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001782 return V(::builder->CreateFCmpORD(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001783 }
1784
1785 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1786 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001787 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001788 return V(::builder->CreateFCmpUNO(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001789 }
1790
1791 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1792 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001793 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001794 return V(::builder->CreateFCmpUEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001795 }
1796
1797 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1798 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001799 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001800 return V(::builder->CreateFCmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001801 }
1802
1803 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1804 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001805 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001806 return V(::builder->CreateFCmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001807 }
1808
1809 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1810 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001811 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001812 return V(::builder->CreateFCmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001813 }
1814
1815 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1816 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001817 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001818 return V(::builder->CreateFCmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001819 }
1820
1821 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1822 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001823 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton71008d82019-03-05 17:17:59 +00001824 return V(::builder->CreateFCmpUNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001825 }
1826
Nicolas Capense95d5342016-09-30 11:37:28 -04001827 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
John Bauman89401822014-05-06 15:04:28 -04001828 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001829 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytoneb50d252019-04-15 13:50:01 -04001830 ASSERT(V(vector)->getType()->getContainedType(0) == T(type));
Logan Chien191b3052018-08-31 16:57:15 +08001831 return V(::builder->CreateExtractElement(V(vector), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04001832 }
1833
1834 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1835 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001836 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001837 return V(::builder->CreateInsertElement(V(vector), V(element), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04001838 }
1839
Logan Chien191b3052018-08-31 16:57:15 +08001840 Value *Nucleus::createShuffleVector(Value *v1, Value *v2, const int *select)
John Bauman89401822014-05-06 15:04:28 -04001841 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001842 RR_DEBUG_INFO_UPDATE_LOC();
1843
Logan Chien191b3052018-08-31 16:57:15 +08001844 int size = llvm::cast<llvm::VectorType>(V(v1)->getType())->getNumElements();
Nicolas Capense89cd582016-09-30 14:23:47 -04001845 const int maxSize = 16;
1846 llvm::Constant *swizzle[maxSize];
Ben Claytoneb50d252019-04-15 13:50:01 -04001847 ASSERT(size <= maxSize);
Nicolas Capense89cd582016-09-30 14:23:47 -04001848
1849 for(int i = 0; i < size; i++)
1850 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001851 swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]);
Nicolas Capense89cd582016-09-30 14:23:47 -04001852 }
1853
1854 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
1855
Logan Chien191b3052018-08-31 16:57:15 +08001856 return V(::builder->CreateShuffleVector(V(v1), V(v2), shuffle));
John Bauman89401822014-05-06 15:04:28 -04001857 }
1858
Logan Chien191b3052018-08-31 16:57:15 +08001859 Value *Nucleus::createSelect(Value *c, Value *ifTrue, Value *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001860 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001861 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001862 return V(::builder->CreateSelect(V(c), V(ifTrue), V(ifFalse)));
John Bauman89401822014-05-06 15:04:28 -04001863 }
1864
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001865 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
John Bauman89401822014-05-06 15:04:28 -04001866 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001867 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001868 return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(V(control), B(defaultBranch), numCases));
John Bauman89401822014-05-06 15:04:28 -04001869 }
1870
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001871 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
John Bauman89401822014-05-06 15:04:28 -04001872 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001873 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08001874 llvm::SwitchInst *sw = reinterpret_cast<llvm::SwitchInst *>(switchCases);
1875 sw->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), B(branch));
John Bauman89401822014-05-06 15:04:28 -04001876 }
1877
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001878 void Nucleus::createUnreachable()
John Bauman89401822014-05-06 15:04:28 -04001879 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001880 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001881 ::builder->CreateUnreachable();
John Bauman89401822014-05-06 15:04:28 -04001882 }
1883
Nicolas Capensac230122016-09-20 14:30:06 -04001884 Type *Nucleus::getPointerType(Type *ElementType)
John Bauman89401822014-05-06 15:04:28 -04001885 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001886 return T(llvm::PointerType::get(T(ElementType), 0));
John Bauman89401822014-05-06 15:04:28 -04001887 }
1888
Nicolas Capens13ac2322016-10-13 14:52:12 -04001889 Value *Nucleus::createNullValue(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04001890 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001891 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001892 return V(llvm::Constant::getNullValue(T(Ty)));
John Bauman89401822014-05-06 15:04:28 -04001893 }
1894
Nicolas Capens13ac2322016-10-13 14:52:12 -04001895 Value *Nucleus::createConstantLong(int64_t i)
John Bauman89401822014-05-06 15:04:28 -04001896 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001897 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001898 return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001899 }
1900
Nicolas Capens13ac2322016-10-13 14:52:12 -04001901 Value *Nucleus::createConstantInt(int i)
John Bauman89401822014-05-06 15:04:28 -04001902 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001903 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001904 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001905 }
1906
Nicolas Capens13ac2322016-10-13 14:52:12 -04001907 Value *Nucleus::createConstantInt(unsigned int i)
John Bauman89401822014-05-06 15:04:28 -04001908 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001909 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001910 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001911 }
1912
Nicolas Capens13ac2322016-10-13 14:52:12 -04001913 Value *Nucleus::createConstantBool(bool b)
John Bauman89401822014-05-06 15:04:28 -04001914 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001915 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001916 return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b));
John Bauman89401822014-05-06 15:04:28 -04001917 }
1918
Nicolas Capens13ac2322016-10-13 14:52:12 -04001919 Value *Nucleus::createConstantByte(signed char i)
John Bauman89401822014-05-06 15:04:28 -04001920 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001921 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001922 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001923 }
1924
Nicolas Capens13ac2322016-10-13 14:52:12 -04001925 Value *Nucleus::createConstantByte(unsigned char i)
John Bauman89401822014-05-06 15:04:28 -04001926 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001927 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001928 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001929 }
1930
Nicolas Capens13ac2322016-10-13 14:52:12 -04001931 Value *Nucleus::createConstantShort(short i)
John Bauman89401822014-05-06 15:04:28 -04001932 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001933 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001934 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001935 }
1936
Nicolas Capens13ac2322016-10-13 14:52:12 -04001937 Value *Nucleus::createConstantShort(unsigned short i)
John Bauman89401822014-05-06 15:04:28 -04001938 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001939 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001940 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001941 }
1942
Nicolas Capens13ac2322016-10-13 14:52:12 -04001943 Value *Nucleus::createConstantFloat(float x)
John Bauman89401822014-05-06 15:04:28 -04001944 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001945 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001946 return V(llvm::ConstantFP::get(T(Float::getType()), x));
John Bauman89401822014-05-06 15:04:28 -04001947 }
1948
Nicolas Capens13ac2322016-10-13 14:52:12 -04001949 Value *Nucleus::createNullPointer(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04001950 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001951 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001952 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0)));
John Bauman89401822014-05-06 15:04:28 -04001953 }
1954
Nicolas Capens13ac2322016-10-13 14:52:12 -04001955 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
John Bauman89401822014-05-06 15:04:28 -04001956 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001957 ASSERT(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04001958 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1959 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
Ben Claytoneb50d252019-04-15 13:50:01 -04001960 ASSERT(numElements <= 16 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001961 llvm::Constant *constantVector[16];
1962
Nicolas Capens69674fb2017-09-01 11:08:44 -04001963 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04001964 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04001965 constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001966 }
1967
Nicolas Capens69674fb2017-09-01 11:08:44 -04001968 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
Nicolas Capens13ac2322016-10-13 14:52:12 -04001969 }
1970
1971 Value *Nucleus::createConstantVector(const double *constants, Type *type)
1972 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001973 ASSERT(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04001974 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1975 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
Ben Claytoneb50d252019-04-15 13:50:01 -04001976 ASSERT(numElements <= 8 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001977 llvm::Constant *constantVector[8];
1978
Nicolas Capens69674fb2017-09-01 11:08:44 -04001979 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04001980 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04001981 constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001982 }
1983
Nicolas Capens69674fb2017-09-01 11:08:44 -04001984 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
John Bauman89401822014-05-06 15:04:28 -04001985 }
1986
John Bauman19bac1e2014-05-06 15:23:49 -04001987 Type *Void::getType()
John Bauman89401822014-05-06 15:04:28 -04001988 {
Nicolas Capensac230122016-09-20 14:30:06 -04001989 return T(llvm::Type::getVoidTy(*::context));
John Bauman89401822014-05-06 15:04:28 -04001990 }
1991
John Bauman19bac1e2014-05-06 15:23:49 -04001992 Type *Bool::getType()
John Bauman89401822014-05-06 15:04:28 -04001993 {
Nicolas Capensac230122016-09-20 14:30:06 -04001994 return T(llvm::Type::getInt1Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001995 }
1996
John Bauman19bac1e2014-05-06 15:23:49 -04001997 Type *Byte::getType()
John Bauman89401822014-05-06 15:04:28 -04001998 {
Nicolas Capensac230122016-09-20 14:30:06 -04001999 return T(llvm::Type::getInt8Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002000 }
2001
John Bauman19bac1e2014-05-06 15:23:49 -04002002 Type *SByte::getType()
John Bauman89401822014-05-06 15:04:28 -04002003 {
Nicolas Capensac230122016-09-20 14:30:06 -04002004 return T(llvm::Type::getInt8Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002005 }
2006
John Bauman19bac1e2014-05-06 15:23:49 -04002007 Type *Short::getType()
John Bauman89401822014-05-06 15:04:28 -04002008 {
Nicolas Capensac230122016-09-20 14:30:06 -04002009 return T(llvm::Type::getInt16Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002010 }
2011
John Bauman19bac1e2014-05-06 15:23:49 -04002012 Type *UShort::getType()
John Bauman89401822014-05-06 15:04:28 -04002013 {
Nicolas Capensac230122016-09-20 14:30:06 -04002014 return T(llvm::Type::getInt16Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002015 }
2016
John Bauman19bac1e2014-05-06 15:23:49 -04002017 Type *Byte4::getType()
John Bauman89401822014-05-06 15:04:28 -04002018 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002019 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04002020 }
2021
John Bauman19bac1e2014-05-06 15:23:49 -04002022 Type *SByte4::getType()
John Bauman89401822014-05-06 15:04:28 -04002023 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002024 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04002025 }
2026
John Bauman19bac1e2014-05-06 15:23:49 -04002027 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002028 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002029 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002030#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002031 return x86::paddusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002032#else
2033 return As<Byte8>(V(lowerPUADDSAT(V(x.value), V(y.value))));
2034#endif
John Bauman89401822014-05-06 15:04:28 -04002035 }
John Bauman66b8ab22014-05-06 15:57:45 -04002036
John Bauman19bac1e2014-05-06 15:23:49 -04002037 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002038 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002039 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002040#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002041 return x86::psubusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002042#else
2043 return As<Byte8>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
2044#endif
John Bauman89401822014-05-06 15:04:28 -04002045 }
2046
John Bauman19bac1e2014-05-06 15:23:49 -04002047 RValue<Int> SignMask(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04002048 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002049 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002050#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002051 return x86::pmovmskb(x);
Logan Chiene3191012018-08-24 22:01:50 +08002052#else
2053 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2054#endif
John Bauman89401822014-05-06 15:04:28 -04002055 }
2056
John Bauman19bac1e2014-05-06 15:23:49 -04002057// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002058// {
Logan Chiene3191012018-08-24 22:01:50 +08002059//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002060// return x86::pcmpgtb(x, y); // FIXME: Signedness
Logan Chiene3191012018-08-24 22:01:50 +08002061//#else
2062// return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
2063//#endif
John Bauman89401822014-05-06 15:04:28 -04002064// }
John Bauman66b8ab22014-05-06 15:57:45 -04002065
John Bauman19bac1e2014-05-06 15:23:49 -04002066 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002067 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002068 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002069#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002070 return x86::pcmpeqb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002071#else
2072 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
2073#endif
John Bauman89401822014-05-06 15:04:28 -04002074 }
2075
John Bauman19bac1e2014-05-06 15:23:49 -04002076 Type *Byte8::getType()
John Bauman89401822014-05-06 15:04:28 -04002077 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002078 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04002079 }
2080
John Bauman19bac1e2014-05-06 15:23:49 -04002081 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002082 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002083 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002084#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002085 return x86::paddsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002086#else
2087 return As<SByte8>(V(lowerPSADDSAT(V(x.value), V(y.value))));
2088#endif
John Bauman89401822014-05-06 15:04:28 -04002089 }
John Bauman66b8ab22014-05-06 15:57:45 -04002090
John Bauman19bac1e2014-05-06 15:23:49 -04002091 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002092 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002093 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002094#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002095 return x86::psubsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002096#else
2097 return As<SByte8>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
2098#endif
John Bauman89401822014-05-06 15:04:28 -04002099 }
2100
John Bauman19bac1e2014-05-06 15:23:49 -04002101 RValue<Int> SignMask(RValue<SByte8> x)
John Bauman89401822014-05-06 15:04:28 -04002102 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002103 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002104#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002105 return x86::pmovmskb(As<Byte8>(x));
Logan Chiene3191012018-08-24 22:01:50 +08002106#else
2107 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2108#endif
John Bauman89401822014-05-06 15:04:28 -04002109 }
2110
John Bauman19bac1e2014-05-06 15:23:49 -04002111 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002112 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002113 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002114#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002115 return x86::pcmpgtb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002116#else
2117 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
2118#endif
John Bauman89401822014-05-06 15:04:28 -04002119 }
John Bauman66b8ab22014-05-06 15:57:45 -04002120
John Bauman19bac1e2014-05-06 15:23:49 -04002121 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002122 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002123 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002124#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002125 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
Logan Chiene3191012018-08-24 22:01:50 +08002126#else
2127 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
2128#endif
John Bauman89401822014-05-06 15:04:28 -04002129 }
2130
John Bauman19bac1e2014-05-06 15:23:49 -04002131 Type *SByte8::getType()
John Bauman89401822014-05-06 15:04:28 -04002132 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002133 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04002134 }
2135
John Bauman19bac1e2014-05-06 15:23:49 -04002136 Type *Byte16::getType()
John Bauman89401822014-05-06 15:04:28 -04002137 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002138 return T(llvm::VectorType::get(T(Byte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04002139 }
2140
John Bauman19bac1e2014-05-06 15:23:49 -04002141 Type *SByte16::getType()
John Bauman89401822014-05-06 15:04:28 -04002142 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002143 return T(llvm::VectorType::get(T(SByte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04002144 }
2145
Nicolas Capens16b5f152016-10-13 13:39:01 -04002146 Type *Short2::getType()
2147 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002148 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04002149 }
2150
Nicolas Capens16b5f152016-10-13 13:39:01 -04002151 Type *UShort2::getType()
2152 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002153 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04002154 }
2155
John Bauman19bac1e2014-05-06 15:23:49 -04002156 Short4::Short4(RValue<Int4> cast)
John Bauman89401822014-05-06 15:04:28 -04002157 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002158 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens01a97962017-07-28 17:30:51 -04002159 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
John Bauman89401822014-05-06 15:04:28 -04002160 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2161
Nicolas Capens01a97962017-07-28 17:30:51 -04002162 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2163 Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
John Bauman89401822014-05-06 15:04:28 -04002164
John Bauman66b8ab22014-05-06 15:57:45 -04002165 storeValue(short4);
John Bauman89401822014-05-06 15:04:28 -04002166 }
2167
John Bauman19bac1e2014-05-06 15:23:49 -04002168// Short4::Short4(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002169// {
2170// }
2171
John Bauman19bac1e2014-05-06 15:23:49 -04002172 Short4::Short4(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04002173 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002174 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002175 Int4 v4i32 = Int4(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002176#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002177 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
Logan Chiene3191012018-08-24 22:01:50 +08002178#else
2179 Value *v = v4i32.loadValue();
2180 v4i32 = As<Int4>(V(lowerPack(V(v), V(v), true)));
2181#endif
John Bauman66b8ab22014-05-06 15:57:45 -04002182
2183 storeValue(As<Short4>(Int2(v4i32)).value);
John Bauman89401822014-05-06 15:04:28 -04002184 }
2185
John Bauman19bac1e2014-05-06 15:23:49 -04002186 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002187 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002188 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002189#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002190 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2191
2192 return x86::psllw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002193#else
2194 return As<Short4>(V(lowerVectorShl(V(lhs.value), rhs)));
2195#endif
John Bauman89401822014-05-06 15:04:28 -04002196 }
2197
John Bauman19bac1e2014-05-06 15:23:49 -04002198 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002199 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002200 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002201#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002202 return x86::psraw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002203#else
2204 return As<Short4>(V(lowerVectorAShr(V(lhs.value), rhs)));
2205#endif
John Bauman89401822014-05-06 15:04:28 -04002206 }
2207
John Bauman19bac1e2014-05-06 15:23:49 -04002208 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002209 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002210 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002211#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002212 return x86::pmaxsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002213#else
2214 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
2215#endif
John Bauman89401822014-05-06 15:04:28 -04002216 }
2217
John Bauman19bac1e2014-05-06 15:23:49 -04002218 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002219 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002220 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002221#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002222 return x86::pminsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002223#else
2224 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
2225#endif
John Bauman89401822014-05-06 15:04:28 -04002226 }
2227
John Bauman19bac1e2014-05-06 15:23:49 -04002228 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002229 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002230 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002231#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002232 return x86::paddsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002233#else
2234 return As<Short4>(V(lowerPSADDSAT(V(x.value), V(y.value))));
2235#endif
John Bauman89401822014-05-06 15:04:28 -04002236 }
2237
John Bauman19bac1e2014-05-06 15:23:49 -04002238 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002239 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002240 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002241#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002242 return x86::psubsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002243#else
2244 return As<Short4>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
2245#endif
John Bauman89401822014-05-06 15:04:28 -04002246 }
2247
John Bauman19bac1e2014-05-06 15:23:49 -04002248 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002249 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002250 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002251#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002252 return x86::pmulhw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002253#else
2254 return As<Short4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2255#endif
John Bauman89401822014-05-06 15:04:28 -04002256 }
2257
John Bauman19bac1e2014-05-06 15:23:49 -04002258 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002259 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002260 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002261#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002262 return x86::pmaddwd(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002263#else
2264 return As<Int2>(V(lowerMulAdd(V(x.value), V(y.value))));
2265#endif
John Bauman89401822014-05-06 15:04:28 -04002266 }
2267
Nicolas Capens33438a62017-09-27 11:47:35 -04002268 RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002269 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002270 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002271#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002272 auto result = x86::packsswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002273#else
2274 auto result = V(lowerPack(V(x.value), V(y.value), true));
2275#endif
Nicolas Capens01a97962017-07-28 17:30:51 -04002276 return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
John Bauman89401822014-05-06 15:04:28 -04002277 }
2278
Nicolas Capens33438a62017-09-27 11:47:35 -04002279 RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2280 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002281 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002282#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04002283 auto result = x86::packuswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002284#else
2285 auto result = V(lowerPack(V(x.value), V(y.value), false));
2286#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04002287 return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
2288 }
2289
John Bauman19bac1e2014-05-06 15:23:49 -04002290 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002291 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002292 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002293#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002294 return x86::pcmpgtw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002295#else
2296 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
2297#endif
John Bauman89401822014-05-06 15:04:28 -04002298 }
2299
John Bauman19bac1e2014-05-06 15:23:49 -04002300 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002301 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002302 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002303#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002304 return x86::pcmpeqw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002305#else
2306 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
2307#endif
John Bauman89401822014-05-06 15:04:28 -04002308 }
2309
John Bauman19bac1e2014-05-06 15:23:49 -04002310 Type *Short4::getType()
John Bauman89401822014-05-06 15:04:28 -04002311 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002312 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002313 }
2314
John Bauman19bac1e2014-05-06 15:23:49 -04002315 UShort4::UShort4(RValue<Float4> cast, bool saturate)
John Bauman89401822014-05-06 15:04:28 -04002316 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002317 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002318 if(saturate)
2319 {
Logan Chiena8385ed2018-09-26 19:22:54 +08002320#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002321 if(CPUID::supportsSSE4_1())
2322 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002323 Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation
Nicolas Capens33438a62017-09-27 11:47:35 -04002324 *this = As<Short4>(PackUnsigned(int4, int4));
John Bauman89401822014-05-06 15:04:28 -04002325 }
2326 else
Logan Chiena8385ed2018-09-26 19:22:54 +08002327#endif
John Bauman89401822014-05-06 15:04:28 -04002328 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002329 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
John Bauman89401822014-05-06 15:04:28 -04002330 }
2331 }
2332 else
2333 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002334 *this = Short4(Int4(cast));
John Bauman89401822014-05-06 15:04:28 -04002335 }
2336 }
2337
John Bauman19bac1e2014-05-06 15:23:49 -04002338 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002339 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002340 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002341#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002342 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2343
2344 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002345#else
2346 return As<UShort4>(V(lowerVectorShl(V(lhs.value), rhs)));
2347#endif
John Bauman89401822014-05-06 15:04:28 -04002348 }
2349
John Bauman19bac1e2014-05-06 15:23:49 -04002350 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002351 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002352 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002353#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002354 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
2355
2356 return x86::psrlw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002357#else
2358 return As<UShort4>(V(lowerVectorLShr(V(lhs.value), rhs)));
2359#endif
John Bauman89401822014-05-06 15:04:28 -04002360 }
2361
John Bauman19bac1e2014-05-06 15:23:49 -04002362 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002363 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002364 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04002365 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002366 }
2367
John Bauman19bac1e2014-05-06 15:23:49 -04002368 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002369 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002370 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04002371 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002372 }
2373
John Bauman19bac1e2014-05-06 15:23:49 -04002374 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002375 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002376 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002377#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002378 return x86::paddusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002379#else
2380 return As<UShort4>(V(lowerPUADDSAT(V(x.value), V(y.value))));
2381#endif
John Bauman89401822014-05-06 15:04:28 -04002382 }
2383
John Bauman19bac1e2014-05-06 15:23:49 -04002384 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002385 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002386 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002387#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002388 return x86::psubusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002389#else
2390 return As<UShort4>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
2391#endif
John Bauman89401822014-05-06 15:04:28 -04002392 }
2393
John Bauman19bac1e2014-05-06 15:23:49 -04002394 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002395 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002396 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002397#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002398 return x86::pmulhuw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002399#else
2400 return As<UShort4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2401#endif
John Bauman89401822014-05-06 15:04:28 -04002402 }
2403
John Bauman19bac1e2014-05-06 15:23:49 -04002404 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002405 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002406 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002407#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002408 return x86::pavgw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002409#else
2410 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
2411#endif
John Bauman89401822014-05-06 15:04:28 -04002412 }
2413
John Bauman19bac1e2014-05-06 15:23:49 -04002414 Type *UShort4::getType()
John Bauman89401822014-05-06 15:04:28 -04002415 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002416 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002417 }
2418
John Bauman19bac1e2014-05-06 15:23:49 -04002419 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002420 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002421 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002422#if defined(__i386__) || defined(__x86_64__)
2423 return x86::psllw(lhs, rhs);
2424#else
2425 return As<Short8>(V(lowerVectorShl(V(lhs.value), rhs)));
2426#endif
John Bauman89401822014-05-06 15:04:28 -04002427 }
2428
John Bauman19bac1e2014-05-06 15:23:49 -04002429 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002430 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002431 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002432#if defined(__i386__) || defined(__x86_64__)
2433 return x86::psraw(lhs, rhs);
2434#else
2435 return As<Short8>(V(lowerVectorAShr(V(lhs.value), rhs)));
2436#endif
John Bauman89401822014-05-06 15:04:28 -04002437 }
2438
John Bauman19bac1e2014-05-06 15:23:49 -04002439 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002440 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002441 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002442#if defined(__i386__) || defined(__x86_64__)
2443 return x86::pmaddwd(x, y);
2444#else
2445 return As<Int4>(V(lowerMulAdd(V(x.value), V(y.value))));
2446#endif
John Bauman89401822014-05-06 15:04:28 -04002447 }
2448
John Bauman19bac1e2014-05-06 15:23:49 -04002449 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002450 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002451 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002452#if defined(__i386__) || defined(__x86_64__)
2453 return x86::pmulhw(x, y);
2454#else
2455 return As<Short8>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2456#endif
John Bauman89401822014-05-06 15:04:28 -04002457 }
2458
John Bauman19bac1e2014-05-06 15:23:49 -04002459 Type *Short8::getType()
John Bauman89401822014-05-06 15:04:28 -04002460 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002461 return T(llvm::VectorType::get(T(Short::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002462 }
2463
John Bauman19bac1e2014-05-06 15:23:49 -04002464 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002465 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002466 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002467#if defined(__i386__) || defined(__x86_64__)
2468 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));
2469#else
2470 return As<UShort8>(V(lowerVectorShl(V(lhs.value), rhs)));
2471#endif
John Bauman89401822014-05-06 15:04:28 -04002472 }
2473
John Bauman19bac1e2014-05-06 15:23:49 -04002474 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002475 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002476 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002477#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002478 return x86::psrlw(lhs, rhs); // FIXME: Fallback required
Logan Chiene3191012018-08-24 22:01:50 +08002479#else
2480 return As<UShort8>(V(lowerVectorLShr(V(lhs.value), rhs)));
2481#endif
John Bauman89401822014-05-06 15:04:28 -04002482 }
2483
John Bauman19bac1e2014-05-06 15:23:49 -04002484 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
John Bauman89401822014-05-06 15:04:28 -04002485 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002486 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capense89cd582016-09-30 14:23:47 -04002487 int pshufb[16] =
2488 {
2489 select0 + 0,
2490 select0 + 1,
2491 select1 + 0,
2492 select1 + 1,
2493 select2 + 0,
2494 select2 + 1,
2495 select3 + 0,
2496 select3 + 1,
2497 select4 + 0,
2498 select4 + 1,
2499 select5 + 0,
2500 select5 + 1,
2501 select6 + 0,
2502 select6 + 1,
2503 select7 + 0,
2504 select7 + 1,
2505 };
John Bauman89401822014-05-06 15:04:28 -04002506
2507 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
Nicolas Capense89cd582016-09-30 14:23:47 -04002508 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
John Bauman89401822014-05-06 15:04:28 -04002509 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
2510
2511 return RValue<UShort8>(short8);
2512 }
2513
John Bauman19bac1e2014-05-06 15:23:49 -04002514 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04002515 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002516 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002517#if defined(__i386__) || defined(__x86_64__)
2518 return x86::pmulhuw(x, y);
2519#else
2520 return As<UShort8>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2521#endif
John Bauman89401822014-05-06 15:04:28 -04002522 }
2523
John Bauman19bac1e2014-05-06 15:23:49 -04002524 Type *UShort8::getType()
John Bauman89401822014-05-06 15:04:28 -04002525 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002526 return T(llvm::VectorType::get(T(UShort::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002527 }
2528
Nicolas Capens96d4e092016-11-18 14:22:38 -05002529 RValue<Int> operator++(Int &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002530 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002531 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002532 RValue<Int> res = val;
2533
Logan Chien191b3052018-08-31 16:57:15 +08002534 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002535 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002536
2537 return res;
2538 }
2539
Nicolas Capens96d4e092016-11-18 14:22:38 -05002540 const Int &operator++(Int &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002541 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002542 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002543 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002544 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002545
2546 return val;
2547 }
2548
Nicolas Capens96d4e092016-11-18 14:22:38 -05002549 RValue<Int> operator--(Int &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002550 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002551 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002552 RValue<Int> res = val;
2553
Logan Chien191b3052018-08-31 16:57:15 +08002554 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002555 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002556
2557 return res;
2558 }
2559
Nicolas Capens96d4e092016-11-18 14:22:38 -05002560 const Int &operator--(Int &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002561 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002562 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002563 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002564 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002565
2566 return val;
2567 }
2568
John Bauman19bac1e2014-05-06 15:23:49 -04002569 RValue<Int> RoundInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002570 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002571 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002572#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002573 return x86::cvtss2si(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002574#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002575 return RValue<Int>(V(lowerRoundInt(V(cast.value), T(Int::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002576#endif
John Bauman89401822014-05-06 15:04:28 -04002577 }
2578
John Bauman19bac1e2014-05-06 15:23:49 -04002579 Type *Int::getType()
John Bauman89401822014-05-06 15:04:28 -04002580 {
Nicolas Capensac230122016-09-20 14:30:06 -04002581 return T(llvm::Type::getInt32Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002582 }
2583
John Bauman19bac1e2014-05-06 15:23:49 -04002584 Type *Long::getType()
John Bauman89401822014-05-06 15:04:28 -04002585 {
Nicolas Capensac230122016-09-20 14:30:06 -04002586 return T(llvm::Type::getInt64Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002587 }
2588
John Bauman19bac1e2014-05-06 15:23:49 -04002589 UInt::UInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002590 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002591 RR_DEBUG_INFO_UPDATE_LOC();
Alexis Hetu764d1422016-09-28 08:44:22 -04002592 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2593 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
John Bauman89401822014-05-06 15:04:28 -04002594
Alexis Hetu764d1422016-09-28 08:44:22 -04002595 // Smallest positive value representable in UInt, but not in Int
2596 const unsigned int ustart = 0x80000000u;
2597 const float ustartf = float(ustart);
2598
2599 // If the value is negative, store 0, otherwise store the result of the conversion
2600 storeValue((~(As<Int>(cast) >> 31) &
2601 // Check if the value can be represented as an Int
2602 IfThenElse(cast >= ustartf,
2603 // If the value is too large, subtract ustart and re-add it after conversion.
2604 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
2605 // Otherwise, just convert normally
2606 Int(cast))).value);
John Bauman89401822014-05-06 15:04:28 -04002607 }
2608
Nicolas Capens96d4e092016-11-18 14:22:38 -05002609 RValue<UInt> operator++(UInt &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002610 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002611 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002612 RValue<UInt> res = val;
2613
Logan Chien191b3052018-08-31 16:57:15 +08002614 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002615 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002616
2617 return res;
2618 }
2619
Nicolas Capens96d4e092016-11-18 14:22:38 -05002620 const UInt &operator++(UInt &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002621 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002622 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002623 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002624 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002625
2626 return val;
2627 }
2628
Nicolas Capens96d4e092016-11-18 14:22:38 -05002629 RValue<UInt> operator--(UInt &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002630 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002631 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002632 RValue<UInt> res = val;
2633
Logan Chien191b3052018-08-31 16:57:15 +08002634 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002635 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002636
2637 return res;
2638 }
2639
Nicolas Capens96d4e092016-11-18 14:22:38 -05002640 const UInt &operator--(UInt &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002641 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002642 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002643 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002644 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002645
2646 return val;
2647 }
2648
John Bauman19bac1e2014-05-06 15:23:49 -04002649// RValue<UInt> RoundUInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002650// {
Logan Chiene3191012018-08-24 22:01:50 +08002651//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002652// return x86::cvtss2si(val); // FIXME: Unsigned
Logan Chiene3191012018-08-24 22:01:50 +08002653//#else
2654// return IfThenElse(cast > 0.0f, Int(cast + 0.5f), Int(cast - 0.5f));
2655//#endif
John Bauman89401822014-05-06 15:04:28 -04002656// }
2657
John Bauman19bac1e2014-05-06 15:23:49 -04002658 Type *UInt::getType()
John Bauman89401822014-05-06 15:04:28 -04002659 {
Nicolas Capensac230122016-09-20 14:30:06 -04002660 return T(llvm::Type::getInt32Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002661 }
2662
John Bauman19bac1e2014-05-06 15:23:49 -04002663// Int2::Int2(RValue<Int> cast)
2664// {
John Bauman19bac1e2014-05-06 15:23:49 -04002665// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2666// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
John Bauman66b8ab22014-05-06 15:57:45 -04002667//
Nicolas Capense89cd582016-09-30 14:23:47 -04002668// int shuffle[2] = {0, 0};
2669// Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
John Bauman19bac1e2014-05-06 15:23:49 -04002670//
John Bauman66b8ab22014-05-06 15:57:45 -04002671// storeValue(replicate);
John Bauman19bac1e2014-05-06 15:23:49 -04002672// }
John Bauman89401822014-05-06 15:04:28 -04002673
John Bauman19bac1e2014-05-06 15:23:49 -04002674 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002675 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002676 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002677#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002678 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
2679
2680 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002681#else
2682 return As<Int2>(V(lowerVectorShl(V(lhs.value), rhs)));
2683#endif
John Bauman89401822014-05-06 15:04:28 -04002684 }
2685
John Bauman19bac1e2014-05-06 15:23:49 -04002686 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002687 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002688 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002689#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002690 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
2691
2692 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002693#else
2694 return As<Int2>(V(lowerVectorAShr(V(lhs.value), rhs)));
2695#endif
John Bauman89401822014-05-06 15:04:28 -04002696 }
2697
John Bauman19bac1e2014-05-06 15:23:49 -04002698 Type *Int2::getType()
John Bauman89401822014-05-06 15:04:28 -04002699 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002700 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002701 }
2702
John Bauman19bac1e2014-05-06 15:23:49 -04002703 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002704 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002705 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002706#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002707 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
2708
2709 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002710#else
2711 return As<UInt2>(V(lowerVectorShl(V(lhs.value), rhs)));
2712#endif
John Bauman89401822014-05-06 15:04:28 -04002713 }
2714
John Bauman19bac1e2014-05-06 15:23:49 -04002715 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002716 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002717 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002718#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002719 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
2720
2721 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002722#else
2723 return As<UInt2>(V(lowerVectorLShr(V(lhs.value), rhs)));
2724#endif
John Bauman89401822014-05-06 15:04:28 -04002725 }
2726
John Bauman19bac1e2014-05-06 15:23:49 -04002727 Type *UInt2::getType()
John Bauman89401822014-05-06 15:04:28 -04002728 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002729 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002730 }
2731
Nicolas Capenscb986762017-01-20 11:34:37 -05002732 Int4::Int4(RValue<Byte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002733 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002734 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002735#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002736 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002737 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002738 *this = x86::pmovzxbd(As<Byte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002739 }
2740 else
Logan Chiene3191012018-08-24 22:01:50 +08002741#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002742 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002743 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
Nicolas Capens01a97962017-07-28 17:30:51 -04002744 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
Logan Chien191b3052018-08-31 16:57:15 +08002745 Value *b = Nucleus::createShuffleVector(a, Nucleus::createNullValue(Byte16::getType()), swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002746
Nicolas Capense89cd582016-09-30 14:23:47 -04002747 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04002748 Value *c = Nucleus::createBitCast(b, Short8::getType());
Logan Chien191b3052018-08-31 16:57:15 +08002749 Value *d = Nucleus::createShuffleVector(c, Nucleus::createNullValue(Short8::getType()), swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002750
Nicolas Capens01a97962017-07-28 17:30:51 -04002751 *this = As<Int4>(d);
2752 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002753 }
2754
Nicolas Capenscb986762017-01-20 11:34:37 -05002755 Int4::Int4(RValue<SByte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002756 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002757 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002758#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002759 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002760 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002761 *this = x86::pmovsxbd(As<SByte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002762 }
2763 else
Logan Chiene3191012018-08-24 22:01:50 +08002764#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002765 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002766 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
2767 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
2768 Value *b = Nucleus::createShuffleVector(a, a, swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002769
Nicolas Capense89cd582016-09-30 14:23:47 -04002770 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04002771 Value *c = Nucleus::createBitCast(b, Short8::getType());
2772 Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002773
Nicolas Capens01a97962017-07-28 17:30:51 -04002774 *this = As<Int4>(d) >> 24;
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002775 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002776 }
2777
Nicolas Capenscb986762017-01-20 11:34:37 -05002778 Int4::Int4(RValue<Short4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002779 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002780 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002781#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002782 if(CPUID::supportsSSE4_1())
2783 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002784 *this = x86::pmovsxwd(As<Short8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002785 }
2786 else
Logan Chiene3191012018-08-24 22:01:50 +08002787#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002788 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002789 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04002790 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
2791 *this = As<Int4>(c) >> 16;
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002792 }
2793 }
2794
Nicolas Capenscb986762017-01-20 11:34:37 -05002795 Int4::Int4(RValue<UShort4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002796 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002797 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002798#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002799 if(CPUID::supportsSSE4_1())
2800 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002801 *this = x86::pmovzxwd(As<UShort8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002802 }
2803 else
Logan Chiene3191012018-08-24 22:01:50 +08002804#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002805 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002806 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04002807 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
2808 *this = As<Int4>(c);
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002809 }
2810 }
2811
Nicolas Capenscb986762017-01-20 11:34:37 -05002812 Int4::Int4(RValue<Int> rhs) : XYZW(this)
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002813 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002814 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002815 Value *vector = loadValue();
2816 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
2817
Nicolas Capense89cd582016-09-30 14:23:47 -04002818 int swizzle[4] = {0, 0, 0, 0};
2819 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002820
2821 storeValue(replicate);
2822 }
2823
John Bauman19bac1e2014-05-06 15:23:49 -04002824 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002825 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002826 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002827#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002828 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002829#else
2830 return As<Int4>(V(lowerVectorShl(V(lhs.value), rhs)));
2831#endif
John Bauman89401822014-05-06 15:04:28 -04002832 }
2833
John Bauman19bac1e2014-05-06 15:23:49 -04002834 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002835 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002836 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002837#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002838 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002839#else
2840 return As<Int4>(V(lowerVectorAShr(V(lhs.value), rhs)));
2841#endif
John Bauman89401822014-05-06 15:04:28 -04002842 }
2843
John Bauman19bac1e2014-05-06 15:23:49 -04002844 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
2845 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002846 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002847 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
Alexis Hetufb603992016-04-26 11:50:40 -04002848 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2849 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
2850 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002851 }
2852
2853 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
2854 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002855 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002856 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2857 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2858 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
2859 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002860 }
2861
2862 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
2863 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002864 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002865 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2866 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2867 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
2868 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002869 }
2870
2871 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
2872 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002873 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002874 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2875 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2876 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
2877 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002878 }
2879
2880 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
2881 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002882 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04002883 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2884 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2885 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
2886 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002887 }
2888
2889 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
2890 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002891 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002892 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2893 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2894 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
2895 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002896 }
2897
2898 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
2899 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002900 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002901#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002902 if(CPUID::supportsSSE4_1())
2903 {
2904 return x86::pmaxsd(x, y);
2905 }
2906 else
Logan Chiene3191012018-08-24 22:01:50 +08002907#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002908 {
2909 RValue<Int4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002910 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04002911 }
2912 }
2913
2914 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
2915 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002916 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002917#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002918 if(CPUID::supportsSSE4_1())
2919 {
2920 return x86::pminsd(x, y);
2921 }
2922 else
Logan Chiene3191012018-08-24 22:01:50 +08002923#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002924 {
2925 RValue<Int4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002926 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04002927 }
2928 }
2929
2930 RValue<Int4> RoundInt(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04002931 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002932 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002933#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002934 return x86::cvtps2dq(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002935#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002936 return As<Int4>(V(lowerRoundInt(V(cast.value), T(Int4::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002937#endif
John Bauman89401822014-05-06 15:04:28 -04002938 }
2939
Chris Forbese86b6dc2019-03-01 09:08:47 -08002940 RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2941 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002942 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbese86b6dc2019-03-01 09:08:47 -08002943 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2944 return As<Int4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2945 }
2946
2947 RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2948 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002949 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbese86b6dc2019-03-01 09:08:47 -08002950 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2951 return As<UInt4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2952 }
2953
Nicolas Capens33438a62017-09-27 11:47:35 -04002954 RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04002955 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002956 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002957#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002958 return x86::packssdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002959#else
2960 return As<Short8>(V(lowerPack(V(x.value), V(y.value), true)));
2961#endif
John Bauman89401822014-05-06 15:04:28 -04002962 }
2963
Nicolas Capens33438a62017-09-27 11:47:35 -04002964 RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
2965 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002966 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002967#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04002968 return x86::packusdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002969#else
2970 return As<UShort8>(V(lowerPack(V(x.value), V(y.value), false)));
2971#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04002972 }
2973
John Bauman19bac1e2014-05-06 15:23:49 -04002974 RValue<Int> SignMask(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04002975 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002976 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002977#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002978 return x86::movmskps(As<Float4>(x));
Logan Chiene3191012018-08-24 22:01:50 +08002979#else
2980 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2981#endif
John Bauman89401822014-05-06 15:04:28 -04002982 }
2983
John Bauman19bac1e2014-05-06 15:23:49 -04002984 Type *Int4::getType()
John Bauman89401822014-05-06 15:04:28 -04002985 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002986 return T(llvm::VectorType::get(T(Int::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04002987 }
2988
Nicolas Capenscb986762017-01-20 11:34:37 -05002989 UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04002990 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002991 RR_DEBUG_INFO_UPDATE_LOC();
Alexis Hetu764d1422016-09-28 08:44:22 -04002992 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2993 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
John Bauman89401822014-05-06 15:04:28 -04002994
Alexis Hetu764d1422016-09-28 08:44:22 -04002995 // Smallest positive value representable in UInt, but not in Int
2996 const unsigned int ustart = 0x80000000u;
2997 const float ustartf = float(ustart);
2998
2999 // Check if the value can be represented as an Int
3000 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3001 // If the value is too large, subtract ustart and re-add it after conversion.
3002 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
3003 // Otherwise, just convert normally
3004 (~uiValue & Int4(cast));
3005 // If the value is negative, store 0, otherwise store the result of the conversion
3006 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
John Bauman89401822014-05-06 15:04:28 -04003007 }
3008
Ben Clayton88816fa2019-05-15 17:08:14 +01003009 UInt4::UInt4(RValue<UInt> rhs) : XYZW(this)
3010 {
3011 RR_DEBUG_INFO_UPDATE_LOC();
3012 Value *vector = loadValue();
3013 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
3014
3015 int swizzle[4] = {0, 0, 0, 0};
3016 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
3017
3018 storeValue(replicate);
3019 }
3020
John Bauman19bac1e2014-05-06 15:23:49 -04003021 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04003022 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003023 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003024#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003025 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08003026#else
3027 return As<UInt4>(V(lowerVectorShl(V(lhs.value), rhs)));
3028#endif
John Bauman89401822014-05-06 15:04:28 -04003029 }
3030
John Bauman19bac1e2014-05-06 15:23:49 -04003031 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04003032 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003033 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003034#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003035 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08003036#else
3037 return As<UInt4>(V(lowerVectorLShr(V(lhs.value), rhs)));
3038#endif
John Bauman89401822014-05-06 15:04:28 -04003039 }
3040
John Bauman19bac1e2014-05-06 15:23:49 -04003041 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3042 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003043 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04003044 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
Alexis Hetufb603992016-04-26 11:50:40 -04003045 // Restore the following line when LLVM is updated to a version where this issue is fixed.
3046 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
3047 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04003048 }
3049
3050 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3051 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003052 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04003053 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
3054 }
3055
3056 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3057 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003058 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04003059 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
3060 // Restore the following line when LLVM is updated to a version where this issue is fixed.
3061 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
3062 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04003063 }
3064
3065 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3066 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003067 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04003068 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
3069 }
3070
3071 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3072 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003073 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens197226a2016-04-27 23:08:50 -04003074 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
3075 // Restore the following line when LLVM is updated to a version where this issue is fixed.
3076 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
3077 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04003078 }
3079
3080 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3081 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003082 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04003083 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
3084 }
3085
3086 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3087 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003088 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003089#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003090 if(CPUID::supportsSSE4_1())
3091 {
3092 return x86::pmaxud(x, y);
3093 }
3094 else
Logan Chiene3191012018-08-24 22:01:50 +08003095#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003096 {
3097 RValue<UInt4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07003098 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04003099 }
3100 }
3101
3102 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3103 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003104 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003105#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003106 if(CPUID::supportsSSE4_1())
3107 {
3108 return x86::pminud(x, y);
3109 }
3110 else
Logan Chiene3191012018-08-24 22:01:50 +08003111#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003112 {
3113 RValue<UInt4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07003114 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04003115 }
3116 }
3117
John Bauman19bac1e2014-05-06 15:23:49 -04003118 Type *UInt4::getType()
John Bauman89401822014-05-06 15:04:28 -04003119 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003120 return T(llvm::VectorType::get(T(UInt::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003121 }
3122
Alexis Hetu734e2572018-12-20 14:00:49 -05003123 Type *Half::getType()
3124 {
3125 return T(llvm::Type::getInt16Ty(*::context));
3126 }
3127
Nicolas Capens05b3d662016-02-25 23:58:33 -05003128 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04003129 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003130 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003131#if defined(__i386__) || defined(__x86_64__)
3132 if(exactAtPow2)
3133 {
3134 // rcpss uses a piecewise-linear approximation which minimizes the relative error
3135 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
3136 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
3137 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003138 return x86::rcpss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003139#else
3140 return As<Float>(V(lowerRCP(V(x.value))));
3141#endif
John Bauman89401822014-05-06 15:04:28 -04003142 }
John Bauman66b8ab22014-05-06 15:57:45 -04003143
John Bauman19bac1e2014-05-06 15:23:49 -04003144 RValue<Float> RcpSqrt_pp(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003145 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003146 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003147#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003148 return x86::rsqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003149#else
3150 return As<Float>(V(lowerRSQRT(V(x.value))));
3151#endif
John Bauman89401822014-05-06 15:04:28 -04003152 }
3153
John Bauman19bac1e2014-05-06 15:23:49 -04003154 RValue<Float> Sqrt(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003155 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003156 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003157#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003158 return x86::sqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003159#else
3160 return As<Float>(V(lowerSQRT(V(x.value))));
3161#endif
John Bauman89401822014-05-06 15:04:28 -04003162 }
3163
John Bauman19bac1e2014-05-06 15:23:49 -04003164 RValue<Float> Round(RValue<Float> x)
3165 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003166 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003167#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003168 if(CPUID::supportsSSE4_1())
3169 {
3170 return x86::roundss(x, 0);
3171 }
3172 else
3173 {
3174 return Float4(Round(Float4(x))).x;
3175 }
Logan Chien83fc07a2018-09-26 22:14:00 +08003176#else
3177 return RValue<Float>(V(lowerRound(V(x.value))));
3178#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003179 }
3180
3181 RValue<Float> Trunc(RValue<Float> x)
3182 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003183 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003184#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003185 if(CPUID::supportsSSE4_1())
3186 {
3187 return x86::roundss(x, 3);
3188 }
3189 else
3190 {
3191 return Float(Int(x)); // Rounded toward zero
3192 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08003193#else
3194 return RValue<Float>(V(lowerTrunc(V(x.value))));
3195#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003196 }
3197
3198 RValue<Float> Frac(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003199 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003200 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003201#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003202 if(CPUID::supportsSSE4_1())
3203 {
3204 return x - x86::floorss(x);
3205 }
3206 else
3207 {
John Bauman19bac1e2014-05-06 15:23:49 -04003208 return Float4(Frac(Float4(x))).x;
John Bauman89401822014-05-06 15:04:28 -04003209 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08003210#else
3211 // x - floor(x) can be 1.0 for very small negative x.
3212 // Clamp against the value just below 1.0.
3213 return Min(x - Floor(x), As<Float>(Int(0x3F7FFFFF)));
3214#endif
John Bauman89401822014-05-06 15:04:28 -04003215 }
3216
John Bauman19bac1e2014-05-06 15:23:49 -04003217 RValue<Float> Floor(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003218 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003219 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003220#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003221 if(CPUID::supportsSSE4_1())
3222 {
3223 return x86::floorss(x);
3224 }
3225 else
3226 {
3227 return Float4(Floor(Float4(x))).x;
3228 }
Logan Chien40a60052018-09-26 19:03:53 +08003229#else
3230 return RValue<Float>(V(lowerFloor(V(x.value))));
3231#endif
John Bauman89401822014-05-06 15:04:28 -04003232 }
3233
John Bauman19bac1e2014-05-06 15:23:49 -04003234 RValue<Float> Ceil(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003235 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003236 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003237#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003238 if(CPUID::supportsSSE4_1())
3239 {
3240 return x86::ceilss(x);
3241 }
3242 else
Logan Chiene3191012018-08-24 22:01:50 +08003243#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003244 {
3245 return Float4(Ceil(Float4(x))).x;
3246 }
John Bauman89401822014-05-06 15:04:28 -04003247 }
3248
John Bauman19bac1e2014-05-06 15:23:49 -04003249 Type *Float::getType()
John Bauman89401822014-05-06 15:04:28 -04003250 {
Nicolas Capensac230122016-09-20 14:30:06 -04003251 return T(llvm::Type::getFloatTy(*::context));
John Bauman89401822014-05-06 15:04:28 -04003252 }
3253
John Bauman19bac1e2014-05-06 15:23:49 -04003254 Type *Float2::getType()
John Bauman89401822014-05-06 15:04:28 -04003255 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003256 return T(Type_v2f32);
John Bauman89401822014-05-06 15:04:28 -04003257 }
3258
Nicolas Capenscb986762017-01-20 11:34:37 -05003259 Float4::Float4(RValue<Float> rhs) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04003260 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003261 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04003262 Value *vector = loadValue();
John Bauman89401822014-05-06 15:04:28 -04003263 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
3264
Nicolas Capense89cd582016-09-30 14:23:47 -04003265 int swizzle[4] = {0, 0, 0, 0};
3266 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
John Bauman89401822014-05-06 15:04:28 -04003267
John Bauman66b8ab22014-05-06 15:57:45 -04003268 storeValue(replicate);
John Bauman89401822014-05-06 15:04:28 -04003269 }
3270
John Bauman19bac1e2014-05-06 15:23:49 -04003271 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003272 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003273 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003274#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003275 return x86::maxps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003276#else
3277 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OGT)));
3278#endif
John Bauman89401822014-05-06 15:04:28 -04003279 }
3280
John Bauman19bac1e2014-05-06 15:23:49 -04003281 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003282 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003283 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003284#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003285 return x86::minps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003286#else
3287 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OLT)));
3288#endif
John Bauman89401822014-05-06 15:04:28 -04003289 }
3290
Nicolas Capens05b3d662016-02-25 23:58:33 -05003291 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04003292 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003293 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003294#if defined(__i386__) || defined(__x86_64__)
3295 if(exactAtPow2)
3296 {
3297 // rcpps uses a piecewise-linear approximation which minimizes the relative error
3298 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
3299 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
3300 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003301 return x86::rcpps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003302#else
3303 return As<Float4>(V(lowerRCP(V(x.value))));
3304#endif
John Bauman89401822014-05-06 15:04:28 -04003305 }
John Bauman66b8ab22014-05-06 15:57:45 -04003306
John Bauman19bac1e2014-05-06 15:23:49 -04003307 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003308 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003309 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003310#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003311 return x86::rsqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003312#else
3313 return As<Float4>(V(lowerRSQRT(V(x.value))));
3314#endif
John Bauman89401822014-05-06 15:04:28 -04003315 }
3316
John Bauman19bac1e2014-05-06 15:23:49 -04003317 RValue<Float4> Sqrt(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003318 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003319 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003320#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003321 return x86::sqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003322#else
3323 return As<Float4>(V(lowerSQRT(V(x.value))));
3324#endif
John Bauman89401822014-05-06 15:04:28 -04003325 }
3326
John Bauman19bac1e2014-05-06 15:23:49 -04003327 RValue<Int> SignMask(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003328 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003329 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003330#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003331 return x86::movmskps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003332#else
3333 return As<Int>(V(lowerFPSignMask(V(x.value), T(Int::getType()))));
3334#endif
John Bauman89401822014-05-06 15:04:28 -04003335 }
3336
John Bauman19bac1e2014-05-06 15:23:49 -04003337 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003338 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003339 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003340 // return As<Int4>(x86::cmpeqps(x, y));
3341 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
3342 }
3343
John Bauman19bac1e2014-05-06 15:23:49 -04003344 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003345 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003346 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003347 // return As<Int4>(x86::cmpltps(x, y));
3348 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
3349 }
3350
John Bauman19bac1e2014-05-06 15:23:49 -04003351 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003352 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003353 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003354 // return As<Int4>(x86::cmpleps(x, y));
3355 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
3356 }
3357
John Bauman19bac1e2014-05-06 15:23:49 -04003358 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003359 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003360 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003361 // return As<Int4>(x86::cmpneqps(x, y));
3362 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
3363 }
3364
John Bauman19bac1e2014-05-06 15:23:49 -04003365 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003366 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003367 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003368 // return As<Int4>(x86::cmpnltps(x, y));
3369 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
3370 }
3371
John Bauman19bac1e2014-05-06 15:23:49 -04003372 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003373 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003374 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003375 // return As<Int4>(x86::cmpnleps(x, y));
3376 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
3377 }
3378
Ben Claytonec1aeb82019-03-04 19:33:27 +00003379 RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3380 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003381 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003382 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUEQ(x.value, y.value), Int4::getType()));
3383 }
3384
3385 RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3386 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003387 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003388 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULT(x.value, y.value), Int4::getType()));
3389 }
3390
3391 RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3392 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003393 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003394 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULE(x.value, y.value), Int4::getType()));
3395 }
3396
3397 RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3398 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003399 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003400 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUNE(x.value, y.value), Int4::getType()));
3401 }
3402
3403 RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3404 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003405 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003406 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGE(x.value, y.value), Int4::getType()));
3407 }
3408
3409 RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3410 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003411 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003412 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGT(x.value, y.value), Int4::getType()));
3413 }
3414
John Bauman19bac1e2014-05-06 15:23:49 -04003415 RValue<Float4> Round(RValue<Float4> x)
3416 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003417 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003418#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003419 if(CPUID::supportsSSE4_1())
3420 {
3421 return x86::roundps(x, 0);
3422 }
3423 else
3424 {
3425 return Float4(RoundInt(x));
3426 }
Logan Chien83fc07a2018-09-26 22:14:00 +08003427#else
3428 return RValue<Float4>(V(lowerRound(V(x.value))));
3429#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003430 }
3431
3432 RValue<Float4> Trunc(RValue<Float4> x)
3433 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003434 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003435#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003436 if(CPUID::supportsSSE4_1())
3437 {
3438 return x86::roundps(x, 3);
3439 }
3440 else
3441 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003442 return Float4(Int4(x));
John Bauman19bac1e2014-05-06 15:23:49 -04003443 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08003444#else
3445 return RValue<Float4>(V(lowerTrunc(V(x.value))));
3446#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003447 }
3448
3449 RValue<Float4> Frac(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003450 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003451 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb9230422017-07-17 10:27:33 -04003452 Float4 frc;
3453
Logan Chien40a60052018-09-26 19:03:53 +08003454#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003455 if(CPUID::supportsSSE4_1())
3456 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003457 frc = x - Floor(x);
John Bauman89401822014-05-06 15:04:28 -04003458 }
3459 else
3460 {
Nicolas Capensb9230422017-07-17 10:27:33 -04003461 frc = x - Float4(Int4(x)); // Signed fractional part.
John Bauman89401822014-05-06 15:04:28 -04003462
Nicolas Capensb9230422017-07-17 10:27:33 -04003463 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f))); // Add 1.0 if negative.
John Bauman89401822014-05-06 15:04:28 -04003464 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08003465#else
3466 frc = x - Floor(x);
3467#endif
Nicolas Capensb9230422017-07-17 10:27:33 -04003468
3469 // x - floor(x) can be 1.0 for very small negative x.
3470 // Clamp against the value just below 1.0.
3471 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
John Bauman89401822014-05-06 15:04:28 -04003472 }
3473
John Bauman19bac1e2014-05-06 15:23:49 -04003474 RValue<Float4> Floor(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003475 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003476 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003477#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003478 if(CPUID::supportsSSE4_1())
3479 {
3480 return x86::floorps(x);
3481 }
3482 else
3483 {
John Bauman19bac1e2014-05-06 15:23:49 -04003484 return x - Frac(x);
John Bauman89401822014-05-06 15:04:28 -04003485 }
Logan Chien40a60052018-09-26 19:03:53 +08003486#else
3487 return RValue<Float4>(V(lowerFloor(V(x.value))));
3488#endif
John Bauman89401822014-05-06 15:04:28 -04003489 }
3490
John Bauman19bac1e2014-05-06 15:23:49 -04003491 RValue<Float4> Ceil(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003492 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003493 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003494#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003495 if(CPUID::supportsSSE4_1())
3496 {
3497 return x86::ceilps(x);
3498 }
3499 else
Logan Chiene3191012018-08-24 22:01:50 +08003500#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003501 {
3502 return -Floor(-x);
3503 }
John Bauman89401822014-05-06 15:04:28 -04003504 }
3505
Ben Claytona2c8b772019-04-09 13:42:36 -04003506 RValue<Float4> Sin(RValue<Float4> v)
3507 {
3508 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sin, { V(v.value)->getType() } );
3509 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
3510 }
3511
Ben Clayton1b6f8c72019-04-09 13:47:43 -04003512 RValue<Float4> Cos(RValue<Float4> v)
3513 {
3514 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::cos, { V(v.value)->getType() } );
3515 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
3516 }
3517
Ben Clayton14740062019-04-09 13:48:41 -04003518 RValue<Float4> Tan(RValue<Float4> v)
3519 {
3520 return Sin(v) / Cos(v);
3521 }
3522
Ben Claytoneafae472019-04-09 14:22:38 -04003523 static RValue<Float4> TransformFloat4PerElement(RValue<Float4> v, const char* name)
Ben Claytonf9350d72019-04-09 14:19:02 -04003524 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003525 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), ::llvm::ArrayRef<llvm::Type*>(T(Float::getType())), false);
Ben Claytoneafae472019-04-09 14:22:38 -04003526 auto func = ::module->getOrInsertFunction(name, funcTy);
Ben Claytonf9350d72019-04-09 14:19:02 -04003527 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3528 for (uint64_t i = 0; i < 4; i++)
3529 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003530 auto el = ::builder->CreateCall(func, V(Nucleus::createExtractElement(v.value, Float::getType(), i)));
3531 out = V(Nucleus::createInsertElement(V(out), V(el), i));
Ben Claytonf9350d72019-04-09 14:19:02 -04003532 }
3533 return RValue<Float4>(V(out));
3534 }
3535
Ben Claytoneafae472019-04-09 14:22:38 -04003536 RValue<Float4> Asin(RValue<Float4> v)
3537 {
3538 return TransformFloat4PerElement(v, "asinf");
3539 }
3540
3541 RValue<Float4> Acos(RValue<Float4> v)
3542 {
3543 return TransformFloat4PerElement(v, "acosf");
3544 }
3545
Ben Clayton749b4e02019-04-09 14:27:43 -04003546 RValue<Float4> Atan(RValue<Float4> v)
3547 {
3548 return TransformFloat4PerElement(v, "atanf");
3549 }
3550
Ben Claytond9636972019-04-09 15:09:54 -04003551 RValue<Float4> Sinh(RValue<Float4> v)
3552 {
3553 return TransformFloat4PerElement(v, "sinhf");
3554 }
3555
Ben Clayton900ea2c2019-04-09 15:25:36 -04003556 RValue<Float4> Cosh(RValue<Float4> v)
3557 {
3558 return TransformFloat4PerElement(v, "coshf");
3559 }
3560
Ben Clayton3928bd92019-04-09 15:27:41 -04003561 RValue<Float4> Tanh(RValue<Float4> v)
3562 {
3563 return TransformFloat4PerElement(v, "tanhf");
3564 }
3565
Ben Claytonf6d77ab2019-04-09 15:30:04 -04003566 RValue<Float4> Asinh(RValue<Float4> v)
3567 {
3568 return TransformFloat4PerElement(v, "asinhf");
3569 }
3570
Ben Clayton28ebcb02019-04-09 15:33:38 -04003571 RValue<Float4> Acosh(RValue<Float4> v)
3572 {
3573 return TransformFloat4PerElement(v, "acoshf");
3574 }
3575
Ben Claytonfa6a5392019-04-09 15:35:24 -04003576 RValue<Float4> Atanh(RValue<Float4> v)
3577 {
3578 return TransformFloat4PerElement(v, "atanhf");
3579 }
3580
Ben Claytona520c3e2019-04-09 15:43:45 -04003581 RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
3582 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003583 ::llvm::SmallVector<::llvm::Type*, 2> paramTys;
3584 paramTys.push_back(T(Float::getType()));
3585 paramTys.push_back(T(Float::getType()));
3586 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), paramTys, false);
Ben Claytona520c3e2019-04-09 15:43:45 -04003587 auto func = ::module->getOrInsertFunction("atan2f", funcTy);
3588 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3589 for (uint64_t i = 0; i < 4; i++)
3590 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003591 auto el = ::builder->CreateCall2(func, ARGS(
3592 V(Nucleus::createExtractElement(x.value, Float::getType(), i)),
3593 V(Nucleus::createExtractElement(y.value, Float::getType(), i))
3594 ));
3595 out = V(Nucleus::createInsertElement(V(out), V(el), i));
Ben Claytona520c3e2019-04-09 15:43:45 -04003596 }
3597 return RValue<Float4>(V(out));
3598 }
3599
Ben Claytonbfe94f02019-04-09 15:52:12 -04003600 RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
3601 {
Ben Clayton7579db12019-05-02 08:37:12 +01003602 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::pow, { T(Float4::getType()) });
Ben Claytonc38fc122019-04-11 08:58:49 -04003603 return RValue<Float4>(V(::builder->CreateCall2(func, ARGS(V(x.value), V(y.value)))));
Ben Claytonbfe94f02019-04-09 15:52:12 -04003604 }
3605
Ben Clayton242f0022019-04-09 16:00:53 -04003606 RValue<Float4> Exp(RValue<Float4> v)
3607 {
3608 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::exp, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003609 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Clayton242f0022019-04-09 16:00:53 -04003610 }
3611
Ben Clayton2c1da722019-04-09 16:03:03 -04003612 RValue<Float4> Log(RValue<Float4> v)
3613 {
3614 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::log, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003615 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Clayton2c1da722019-04-09 16:03:03 -04003616 }
3617
Ben Claytonf40b56c2019-04-09 16:06:55 -04003618 RValue<Float4> Exp2(RValue<Float4> v)
3619 {
3620 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::exp2, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003621 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Claytonf40b56c2019-04-09 16:06:55 -04003622 }
3623
Ben Claytone17acfe2019-04-09 16:09:13 -04003624 RValue<Float4> Log2(RValue<Float4> v)
3625 {
3626 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::log2, { T(Float4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003627 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
Ben Claytone17acfe2019-04-09 16:09:13 -04003628 }
3629
Ben Clayton60958262019-04-10 14:53:30 -04003630 RValue<UInt4> Ctlz(RValue<UInt4> v, bool isZeroUndef)
3631 {
Ben Clayton7579db12019-05-02 08:37:12 +01003632 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::ctlz, { T(UInt4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003633 return RValue<UInt4>(V(::builder->CreateCall2(func, ARGS(
Ben Clayton60958262019-04-10 14:53:30 -04003634 V(v.value),
3635 isZeroUndef ? ::llvm::ConstantInt::getTrue(*::context) : ::llvm::ConstantInt::getFalse(*::context)
Ben Claytonc38fc122019-04-11 08:58:49 -04003636 ))));
Ben Clayton60958262019-04-10 14:53:30 -04003637 }
3638
Ben Clayton3f007c42019-04-10 14:54:23 -04003639 RValue<UInt4> Cttz(RValue<UInt4> v, bool isZeroUndef)
3640 {
Ben Clayton7579db12019-05-02 08:37:12 +01003641 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::cttz, { T(UInt4::getType()) } );
Ben Claytonc38fc122019-04-11 08:58:49 -04003642 return RValue<UInt4>(V(::builder->CreateCall2(func, ARGS(
Ben Clayton3f007c42019-04-10 14:54:23 -04003643 V(v.value),
3644 isZeroUndef ? ::llvm::ConstantInt::getTrue(*::context) : ::llvm::ConstantInt::getFalse(*::context)
Ben Claytonc38fc122019-04-11 08:58:49 -04003645 ))));
Ben Clayton3f007c42019-04-10 14:54:23 -04003646 }
3647
John Bauman19bac1e2014-05-06 15:23:49 -04003648 Type *Float4::getType()
John Bauman89401822014-05-06 15:04:28 -04003649 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003650 return T(llvm::VectorType::get(T(Float::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003651 }
3652
John Bauman89401822014-05-06 15:04:28 -04003653 RValue<Long> Ticks()
3654 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003655 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003656 llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter);
John Bauman89401822014-05-06 15:04:28 -04003657
Nicolas Capens2ab69ee2016-09-26 11:45:17 -04003658 return RValue<Long>(V(::builder->CreateCall(rdtsc)));
John Bauman89401822014-05-06 15:04:28 -04003659 }
Ben Claytond853c122019-04-16 17:51:49 -04003660
3661 RValue<Pointer<Byte>> ConstantPointer(void const * ptr)
3662 {
3663 // Note: this should work for 32-bit pointers as well because 'inttoptr'
3664 // is defined to truncate (and zero extend) if necessary.
3665 auto ptrAsInt = ::llvm::ConstantInt::get(::llvm::Type::getInt64Ty(*::context), reinterpret_cast<uintptr_t>(ptr));
3666 return RValue<Pointer<Byte>>(V(::builder->CreateIntToPtr(ptrAsInt, T(Pointer<Byte>::getType()))));
3667 }
3668
3669 Value* Call(RValue<Pointer<Byte>> fptr, Type* retTy, std::initializer_list<Value*> args, std::initializer_list<Type*> argTys)
3670 {
3671 ::llvm::SmallVector<::llvm::Type*, 8> paramTys;
3672 for (auto ty : argTys) { paramTys.push_back(T(ty)); }
3673 auto funcTy = ::llvm::FunctionType::get(T(retTy), paramTys, false);
3674
3675 auto funcPtrTy = funcTy->getPointerTo();
3676 auto funcPtr = ::builder->CreatePointerCast(V(fptr.value), funcPtrTy);
3677
3678 ::llvm::SmallVector<::llvm::Value*, 8> arguments;
3679 for (auto arg : args) { arguments.push_back(V(arg)); }
3680 return V(::builder->CreateCall(funcPtr, arguments));
3681 }
John Bauman89401822014-05-06 15:04:28 -04003682}
3683
Nicolas Capens48461502018-08-06 14:20:45 -04003684namespace rr
John Bauman89401822014-05-06 15:04:28 -04003685{
Logan Chiene3191012018-08-24 22:01:50 +08003686#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003687 namespace x86
3688 {
John Bauman19bac1e2014-05-06 15:23:49 -04003689 RValue<Int> cvtss2si(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003690 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003691 llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si);
John Bauman66b8ab22014-05-06 15:57:45 -04003692
John Bauman89401822014-05-06 15:04:28 -04003693 Float4 vector;
3694 vector.x = val;
3695
Logan Chien813d5032018-08-31 17:19:45 +08003696 return RValue<Int>(V(::builder->CreateCall(cvtss2si, ARGS(V(RValue<Float4>(vector).value)))));
John Bauman89401822014-05-06 15:04:28 -04003697 }
3698
John Bauman19bac1e2014-05-06 15:23:49 -04003699 RValue<Int4> cvtps2dq(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003700 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003701 llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
John Bauman89401822014-05-06 15:04:28 -04003702
Logan Chien813d5032018-08-31 17:19:45 +08003703 return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003704 }
3705
John Bauman19bac1e2014-05-06 15:23:49 -04003706 RValue<Float> rcpss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003707 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003708 llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss);
John Bauman89401822014-05-06 15:04:28 -04003709
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003710 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman66b8ab22014-05-06 15:57:45 -04003711
Logan Chien813d5032018-08-31 17:19:45 +08003712 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003713 }
3714
John Bauman19bac1e2014-05-06 15:23:49 -04003715 RValue<Float> sqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003716 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003717 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
3718 return RValue<Float>(V(::builder->CreateCall(sqrt, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003719 }
3720
John Bauman19bac1e2014-05-06 15:23:49 -04003721 RValue<Float> rsqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003722 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003723 llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss);
John Bauman66b8ab22014-05-06 15:57:45 -04003724
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003725 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman89401822014-05-06 15:04:28 -04003726
Logan Chien813d5032018-08-31 17:19:45 +08003727 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003728 }
3729
John Bauman19bac1e2014-05-06 15:23:49 -04003730 RValue<Float4> rcpps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003731 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003732 llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003733
Logan Chien813d5032018-08-31 17:19:45 +08003734 return RValue<Float4>(V(::builder->CreateCall(rcpps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003735 }
3736
John Bauman19bac1e2014-05-06 15:23:49 -04003737 RValue<Float4> sqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003738 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003739 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
John Bauman66b8ab22014-05-06 15:57:45 -04003740
Logan Chien813d5032018-08-31 17:19:45 +08003741 return RValue<Float4>(V(::builder->CreateCall(sqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003742 }
3743
John Bauman19bac1e2014-05-06 15:23:49 -04003744 RValue<Float4> rsqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003745 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003746 llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003747
Logan Chien813d5032018-08-31 17:19:45 +08003748 return RValue<Float4>(V(::builder->CreateCall(rsqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003749 }
3750
John Bauman19bac1e2014-05-06 15:23:49 -04003751 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003752 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003753 llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps);
John Bauman89401822014-05-06 15:04:28 -04003754
Logan Chien813d5032018-08-31 17:19:45 +08003755 return RValue<Float4>(V(::builder->CreateCall2(maxps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003756 }
3757
John Bauman19bac1e2014-05-06 15:23:49 -04003758 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003759 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003760 llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps);
John Bauman89401822014-05-06 15:04:28 -04003761
Logan Chien813d5032018-08-31 17:19:45 +08003762 return RValue<Float4>(V(::builder->CreateCall2(minps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003763 }
3764
John Bauman19bac1e2014-05-06 15:23:49 -04003765 RValue<Float> roundss(RValue<Float> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04003766 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003767 llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss);
John Bauman89401822014-05-06 15:04:28 -04003768
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003769 Value *undef = V(llvm::UndefValue::get(T(Float4::getType())));
John Bauman89401822014-05-06 15:04:28 -04003770 Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
3771
Logan Chien813d5032018-08-31 17:19:45 +08003772 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, ARGS(V(undef), V(vector), V(Nucleus::createConstantInt(imm))))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003773 }
3774
John Bauman19bac1e2014-05-06 15:23:49 -04003775 RValue<Float> floorss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003776 {
3777 return roundss(val, 1);
3778 }
3779
John Bauman19bac1e2014-05-06 15:23:49 -04003780 RValue<Float> ceilss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003781 {
3782 return roundss(val, 2);
3783 }
3784
John Bauman19bac1e2014-05-06 15:23:49 -04003785 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04003786 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003787 llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps);
John Bauman89401822014-05-06 15:04:28 -04003788
Logan Chien813d5032018-08-31 17:19:45 +08003789 return RValue<Float4>(V(::builder->CreateCall2(roundps, ARGS(V(val.value), V(Nucleus::createConstantInt(imm))))));
John Bauman89401822014-05-06 15:04:28 -04003790 }
3791
John Bauman19bac1e2014-05-06 15:23:49 -04003792 RValue<Float4> floorps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003793 {
3794 return roundps(val, 1);
3795 }
3796
John Bauman19bac1e2014-05-06 15:23:49 -04003797 RValue<Float4> ceilps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003798 {
3799 return roundps(val, 2);
3800 }
3801
Alexis Hetu0f448072016-03-18 10:56:08 -04003802 RValue<Int4> pabsd(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04003803 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003804 return RValue<Int4>(V(lowerPABS(V(x.value))));
John Bauman89401822014-05-06 15:04:28 -04003805 }
3806
John Bauman19bac1e2014-05-06 15:23:49 -04003807 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003808 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003809 llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w);
John Bauman89401822014-05-06 15:04:28 -04003810
Logan Chien813d5032018-08-31 17:19:45 +08003811 return As<Short4>(V(::builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003812 }
John Bauman66b8ab22014-05-06 15:57:45 -04003813
John Bauman19bac1e2014-05-06 15:23:49 -04003814 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003815 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003816 llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w);
John Bauman89401822014-05-06 15:04:28 -04003817
Logan Chien813d5032018-08-31 17:19:45 +08003818 return As<Short4>(V(::builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003819 }
3820
John Bauman19bac1e2014-05-06 15:23:49 -04003821 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003822 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003823 llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w);
John Bauman89401822014-05-06 15:04:28 -04003824
Logan Chien813d5032018-08-31 17:19:45 +08003825 return As<UShort4>(V(::builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003826 }
John Bauman66b8ab22014-05-06 15:57:45 -04003827
John Bauman19bac1e2014-05-06 15:23:49 -04003828 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003829 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003830 llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w);
John Bauman89401822014-05-06 15:04:28 -04003831
Logan Chien813d5032018-08-31 17:19:45 +08003832 return As<UShort4>(V(::builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003833 }
3834
John Bauman19bac1e2014-05-06 15:23:49 -04003835 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003836 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003837 llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b);
John Bauman89401822014-05-06 15:04:28 -04003838
Logan Chien813d5032018-08-31 17:19:45 +08003839 return As<SByte8>(V(::builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003840 }
John Bauman66b8ab22014-05-06 15:57:45 -04003841
John Bauman19bac1e2014-05-06 15:23:49 -04003842 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003843 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003844 llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b);
John Bauman89401822014-05-06 15:04:28 -04003845
Logan Chien813d5032018-08-31 17:19:45 +08003846 return As<SByte8>(V(::builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003847 }
John Bauman66b8ab22014-05-06 15:57:45 -04003848
John Bauman19bac1e2014-05-06 15:23:49 -04003849 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003850 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003851 llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b);
John Bauman89401822014-05-06 15:04:28 -04003852
Logan Chien813d5032018-08-31 17:19:45 +08003853 return As<Byte8>(V(::builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003854 }
John Bauman66b8ab22014-05-06 15:57:45 -04003855
John Bauman19bac1e2014-05-06 15:23:49 -04003856 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003857 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003858 llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b);
John Bauman89401822014-05-06 15:04:28 -04003859
Logan Chien813d5032018-08-31 17:19:45 +08003860 return As<Byte8>(V(::builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value)))));
John Bauman19bac1e2014-05-06 15:23:49 -04003861 }
3862
3863 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003864 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003865 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
John Bauman89401822014-05-06 15:04:28 -04003866 }
3867
John Bauman19bac1e2014-05-06 15:23:49 -04003868 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003869 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003870 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
John Bauman89401822014-05-06 15:04:28 -04003871 }
3872
John Bauman19bac1e2014-05-06 15:23:49 -04003873 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003874 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003875 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
John Bauman89401822014-05-06 15:04:28 -04003876 }
3877
John Bauman19bac1e2014-05-06 15:23:49 -04003878 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003879 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003880 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003881 }
3882
John Bauman19bac1e2014-05-06 15:23:49 -04003883 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003884 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003885 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003886 }
3887
John Bauman19bac1e2014-05-06 15:23:49 -04003888 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003889 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003890 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003891 }
3892
John Bauman19bac1e2014-05-06 15:23:49 -04003893 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003894 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08003895 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
John Bauman89401822014-05-06 15:04:28 -04003896 }
3897
John Bauman19bac1e2014-05-06 15:23:49 -04003898 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
John Bauman89401822014-05-06 15:04:28 -04003899 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003900 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04003901
Logan Chien813d5032018-08-31 17:19:45 +08003902 return As<Short4>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003903 }
3904
John Bauman19bac1e2014-05-06 15:23:49 -04003905 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003906 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003907 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04003908
Logan Chien813d5032018-08-31 17:19:45 +08003909 return RValue<Short8>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003910 }
3911
John Bauman19bac1e2014-05-06 15:23:49 -04003912 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003913 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003914 llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128);
John Bauman89401822014-05-06 15:04:28 -04003915
Logan Chien813d5032018-08-31 17:19:45 +08003916 return As<SByte8>(V(::builder->CreateCall2(packsswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003917 }
3918
Nicolas Capens33438a62017-09-27 11:47:35 -04003919 RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003920 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003921 llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
John Bauman89401822014-05-06 15:04:28 -04003922
Logan Chien813d5032018-08-31 17:19:45 +08003923 return As<Byte8>(V(::builder->CreateCall2(packuswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003924 }
3925
Nicolas Capens3e7062b2017-01-17 14:01:33 -05003926 RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003927 {
3928 if(CPUID::supportsSSE4_1())
3929 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003930 llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw);
John Bauman66b8ab22014-05-06 15:57:45 -04003931
Logan Chien813d5032018-08-31 17:19:45 +08003932 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003933 }
3934 else
3935 {
Nicolas Capens3e7062b2017-01-17 14:01:33 -05003936 RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
3937 RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
3938
3939 return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
John Bauman89401822014-05-06 15:04:28 -04003940 }
3941 }
3942
John Bauman19bac1e2014-05-06 15:23:49 -04003943 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003944 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003945 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04003946
Logan Chien813d5032018-08-31 17:19:45 +08003947 return As<UShort4>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003948 }
3949
John Bauman19bac1e2014-05-06 15:23:49 -04003950 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003951 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003952 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04003953
Logan Chien813d5032018-08-31 17:19:45 +08003954 return RValue<UShort8>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003955 }
3956
John Bauman19bac1e2014-05-06 15:23:49 -04003957 RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003958 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003959 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04003960
Logan Chien813d5032018-08-31 17:19:45 +08003961 return As<Short4>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003962 }
3963
John Bauman19bac1e2014-05-06 15:23:49 -04003964 RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003965 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003966 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04003967
Logan Chien813d5032018-08-31 17:19:45 +08003968 return RValue<Short8>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003969 }
3970
John Bauman19bac1e2014-05-06 15:23:49 -04003971 RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003972 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003973 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04003974
Logan Chien813d5032018-08-31 17:19:45 +08003975 return As<Short4>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003976 }
3977
John Bauman19bac1e2014-05-06 15:23:49 -04003978 RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003979 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003980 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04003981
Logan Chien813d5032018-08-31 17:19:45 +08003982 return RValue<Short8>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003983 }
3984
John Bauman19bac1e2014-05-06 15:23:49 -04003985 RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003986 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003987 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04003988
Logan Chien813d5032018-08-31 17:19:45 +08003989 return As<Int2>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003990 }
3991
John Bauman19bac1e2014-05-06 15:23:49 -04003992 RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003993 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003994 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04003995
Logan Chien813d5032018-08-31 17:19:45 +08003996 return RValue<Int4>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003997 }
3998
John Bauman19bac1e2014-05-06 15:23:49 -04003999 RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004000 {
Nicolas Capens01a97962017-07-28 17:30:51 -04004001 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04004002
Logan Chien813d5032018-08-31 17:19:45 +08004003 return As<Int2>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004004 }
4005
John Bauman19bac1e2014-05-06 15:23:49 -04004006 RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004007 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04004008 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04004009
Logan Chien813d5032018-08-31 17:19:45 +08004010 return RValue<Int4>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004011 }
4012
John Bauman19bac1e2014-05-06 15:23:49 -04004013 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004014 {
Nicolas Capens01a97962017-07-28 17:30:51 -04004015 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04004016
Logan Chien813d5032018-08-31 17:19:45 +08004017 return As<UInt2>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004018 }
4019
John Bauman19bac1e2014-05-06 15:23:49 -04004020 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004021 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04004022 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04004023
Logan Chien813d5032018-08-31 17:19:45 +08004024 return RValue<UInt4>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004025 }
4026
John Bauman19bac1e2014-05-06 15:23:49 -04004027 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
4028 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004029 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
John Bauman19bac1e2014-05-06 15:23:49 -04004030 }
4031
4032 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
4033 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004034 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
John Bauman19bac1e2014-05-06 15:23:49 -04004035 }
4036
4037 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
4038 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004039 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_UGT)));
John Bauman19bac1e2014-05-06 15:23:49 -04004040 }
4041
4042 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
4043 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004044 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_ULT)));
John Bauman19bac1e2014-05-06 15:23:49 -04004045 }
4046
4047 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004048 {
Nicolas Capens01a97962017-07-28 17:30:51 -04004049 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04004050
Logan Chien813d5032018-08-31 17:19:45 +08004051 return As<Short4>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004052 }
4053
John Bauman19bac1e2014-05-06 15:23:49 -04004054 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04004055 {
Nicolas Capens01a97962017-07-28 17:30:51 -04004056 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04004057
Logan Chien813d5032018-08-31 17:19:45 +08004058 return As<UShort4>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004059 }
4060
John Bauman19bac1e2014-05-06 15:23:49 -04004061 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004062 {
Nicolas Capens01a97962017-07-28 17:30:51 -04004063 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04004064
Logan Chien813d5032018-08-31 17:19:45 +08004065 return As<Int2>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004066 }
4067
John Bauman19bac1e2014-05-06 15:23:49 -04004068 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04004069 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004070 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04004071
Logan Chien813d5032018-08-31 17:19:45 +08004072 return RValue<Short8>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004073 }
4074
John Bauman19bac1e2014-05-06 15:23:49 -04004075 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04004076 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004077 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04004078
Logan Chien813d5032018-08-31 17:19:45 +08004079 return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004080 }
4081
John Bauman19bac1e2014-05-06 15:23:49 -04004082 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04004083 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004084 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04004085
Logan Chien813d5032018-08-31 17:19:45 +08004086 return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004087 }
4088
John Bauman19bac1e2014-05-06 15:23:49 -04004089 RValue<Int> movmskps(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04004090 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004091 llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps);
John Bauman89401822014-05-06 15:04:28 -04004092
Logan Chien813d5032018-08-31 17:19:45 +08004093 return RValue<Int>(V(::builder->CreateCall(movmskps, ARGS(V(x.value)))));
John Bauman89401822014-05-06 15:04:28 -04004094 }
4095
John Bauman19bac1e2014-05-06 15:23:49 -04004096 RValue<Int> pmovmskb(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04004097 {
Nicolas Capens01a97962017-07-28 17:30:51 -04004098 llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128);
John Bauman89401822014-05-06 15:04:28 -04004099
Logan Chien813d5032018-08-31 17:19:45 +08004100 return RValue<Int>(V(::builder->CreateCall(pmovmskb, ARGS(V(x.value))))) & 0xFF;
John Bauman89401822014-05-06 15:04:28 -04004101 }
4102
Nicolas Capens01a97962017-07-28 17:30:51 -04004103 RValue<Int4> pmovzxbd(RValue<Byte16> x)
John Bauman89401822014-05-06 15:04:28 -04004104 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004105 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
John Bauman89401822014-05-06 15:04:28 -04004106 }
4107
Nicolas Capens01a97962017-07-28 17:30:51 -04004108 RValue<Int4> pmovsxbd(RValue<SByte16> x)
John Bauman89401822014-05-06 15:04:28 -04004109 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004110 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
John Bauman89401822014-05-06 15:04:28 -04004111 }
4112
Nicolas Capens01a97962017-07-28 17:30:51 -04004113 RValue<Int4> pmovzxwd(RValue<UShort8> x)
John Bauman89401822014-05-06 15:04:28 -04004114 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004115 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
John Bauman89401822014-05-06 15:04:28 -04004116 }
4117
Nicolas Capens01a97962017-07-28 17:30:51 -04004118 RValue<Int4> pmovsxwd(RValue<Short8> x)
John Bauman89401822014-05-06 15:04:28 -04004119 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004120 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
John Bauman89401822014-05-06 15:04:28 -04004121 }
4122 }
Logan Chiene3191012018-08-24 22:01:50 +08004123#endif // defined(__i386__) || defined(__x86_64__)
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004124
Ben Clayton60a3d6f2019-02-26 17:24:46 +00004125#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004126 // extractAll returns a vector containing the extracted n scalar value of
4127 // the vector vec.
4128 static std::vector<Value*> extractAll(Value* vec, int n)
4129 {
4130 std::vector<Value*> elements;
4131 elements.reserve(n);
4132 for (int i = 0; i < n; i++)
4133 {
4134 auto el = V(::builder->CreateExtractElement(V(vec), i));
4135 elements.push_back(el);
4136 }
4137 return elements;
4138 }
4139
Ben Claytonca8e3d72019-05-14 16:51:05 +01004140 // toInt returns all the integer values in vals extended to a native width
4141 // integer.
4142 static std::vector<Value*> toInt(const std::vector<Value*>& vals, bool isSigned)
4143 {
4144 auto intTy = ::llvm::Type::getIntNTy(*::context, sizeof(int) * 8); // Natural integer width.
4145 std::vector<Value*> elements;
4146 elements.reserve(vals.size());
4147 for (auto v : vals)
4148 {
4149 if (isSigned)
4150 {
4151 elements.push_back(V(::builder->CreateSExt(V(v), intTy)));
4152 }
4153 else
4154 {
4155 elements.push_back(V(::builder->CreateZExt(V(v), intTy)));
4156 }
4157 }
4158 return elements;
4159 }
4160
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004161 // toDouble returns all the float values in vals extended to doubles.
4162 static std::vector<Value*> toDouble(const std::vector<Value*>& vals)
4163 {
4164 auto doubleTy = ::llvm::Type::getDoubleTy(*::context);
4165 std::vector<Value*> elements;
4166 elements.reserve(vals.size());
4167 for (auto v : vals)
4168 {
4169 elements.push_back(V(::builder->CreateFPExt(V(v), doubleTy)));
4170 }
4171 return elements;
4172 }
4173
Ben Claytonca8e3d72019-05-14 16:51:05 +01004174 std::vector<Value*> PrintValue::Ty<Byte4>::val(const RValue<Byte4>& v) { return toInt(extractAll(v.value, 4), false); }
4175 std::vector<Value*> PrintValue::Ty<Int>::val(const RValue<Int>& v) { return toInt({v.value}, true); }
4176 std::vector<Value*> PrintValue::Ty<Int2>::val(const RValue<Int2>& v) { return toInt(extractAll(v.value, 2), true); }
4177 std::vector<Value*> PrintValue::Ty<Int4>::val(const RValue<Int4>& v) { return toInt(extractAll(v.value, 4), true); }
4178 std::vector<Value*> PrintValue::Ty<UInt>::val(const RValue<UInt>& v) { return toInt({v.value}, false); }
4179 std::vector<Value*> PrintValue::Ty<UInt2>::val(const RValue<UInt2>& v) { return toInt(extractAll(v.value, 2), false); }
4180 std::vector<Value*> PrintValue::Ty<UInt4>::val(const RValue<UInt4>& v) { return toInt(extractAll(v.value, 4), false); }
4181 std::vector<Value*> PrintValue::Ty<Short4>::val(const RValue<Short4>& v) { return toInt(extractAll(v.value, 4), true); }
4182 std::vector<Value*> PrintValue::Ty<UShort4>::val(const RValue<UShort4>& v) { return toInt(extractAll(v.value, 4), false); }
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004183 std::vector<Value*> PrintValue::Ty<Float>::val(const RValue<Float>& v) { return toDouble({v.value}); }
4184 std::vector<Value*> PrintValue::Ty<Float4>::val(const RValue<Float4>& v) { return toDouble(extractAll(v.value, 4)); }
Ben Claytonbc0cbb92019-05-15 17:12:57 +01004185 std::vector<Value*> PrintValue::Ty<const char*>::val(const char* v) { return {V(::builder->CreateGlobalStringPtr(v))}; }
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004186
4187 void Printv(const char* function, const char* file, int line, const char* fmt, std::initializer_list<PrintValue> args)
4188 {
4189 // LLVM types used below.
4190 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
Ben Claytonca8e3d72019-05-14 16:51:05 +01004191 auto intTy = ::llvm::Type::getIntNTy(*::context, sizeof(int) * 8); // Natural integer width.
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004192 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(*::context);
4193 auto funcTy = ::llvm::FunctionType::get(i32Ty, {i8PtrTy}, true);
4194
4195 auto func = ::module->getOrInsertFunction("printf", funcTy);
4196
4197 // Build the printf format message string.
4198 std::string str;
4199 if (file != nullptr) { str += (line > 0) ? "%s:%d " : "%s "; }
4200 if (function != nullptr) { str += "%s "; }
4201 str += fmt;
4202
4203 // Perform subsitution on all '{n}' bracketed indices in the format
4204 // message.
4205 int i = 0;
4206 for (const PrintValue& arg : args)
4207 {
4208 str = replace(str, "{" + std::to_string(i++) + "}", arg.format);
4209 }
4210
4211 ::llvm::SmallVector<::llvm::Value*, 8> vals;
4212
4213 // The format message is always the first argument.
4214 vals.push_back(::builder->CreateGlobalStringPtr(str));
4215
4216 // Add optional file, line and function info if provided.
4217 if (file != nullptr)
4218 {
4219 vals.push_back(::builder->CreateGlobalStringPtr(file));
4220 if (line > 0)
4221 {
4222 vals.push_back(::llvm::ConstantInt::get(intTy, line));
4223 }
4224 }
4225 if (function != nullptr)
4226 {
4227 vals.push_back(::builder->CreateGlobalStringPtr(function));
4228 }
4229
4230 // Add all format arguments.
4231 for (const PrintValue& arg : args)
4232 {
4233 for (auto val : arg.values)
4234 {
4235 vals.push_back(V(val));
4236 }
4237 }
4238
4239 ::builder->CreateCall(func, vals);
4240 }
4241#endif // ENABLE_RR_PRINT
4242
Ben Claytonac07ed82019-03-26 14:17:41 +00004243 void Break()
4244 {
4245 auto trap = ::llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::trap);
4246 builder->CreateCall(trap);
4247 }
4248
4249 void Nop()
4250 {
4251 auto voidTy = ::llvm::Type::getVoidTy(*context);
4252 auto funcTy = ::llvm::FunctionType::get(voidTy, {}, false);
4253 auto func = ::module->getOrInsertFunction("nop", funcTy);
4254 builder->CreateCall(func);
4255 }
4256
4257 void EmitDebugLocation()
4258 {
4259#ifdef ENABLE_RR_DEBUG_INFO
4260 if (debugInfo != nullptr)
4261 {
4262 debugInfo->EmitLocation();
4263 }
4264#endif // ENABLE_RR_DEBUG_INFO
4265 }
4266
4267 void EmitDebugVariable(Value* value)
4268 {
4269#ifdef ENABLE_RR_DEBUG_INFO
4270 if (debugInfo != nullptr)
4271 {
4272 debugInfo->EmitVariable(value);
4273 }
4274#endif // ENABLE_RR_DEBUG_INFO
4275 }
4276
4277 void FlushDebug()
4278 {
4279#ifdef ENABLE_RR_DEBUG_INFO
4280 if (debugInfo != nullptr)
4281 {
4282 debugInfo->Flush();
4283 }
4284#endif // ENABLE_RR_DEBUG_INFO
4285 }
4286
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004287} // namespace rr
4288
4289// ------------------------------ Coroutines ------------------------------
4290
4291namespace {
4292
4293 struct CoroutineState
4294 {
4295 llvm::Function *await = nullptr;
4296 llvm::Function *destroy = nullptr;
4297 llvm::Value *handle = nullptr;
4298 llvm::Value *id = nullptr;
4299 llvm::Value *promise = nullptr;
4300 llvm::BasicBlock *suspendBlock = nullptr;
4301 llvm::BasicBlock *endBlock = nullptr;
4302 llvm::BasicBlock *destroyBlock = nullptr;
4303 };
4304 CoroutineState coroutine;
4305
4306 // Magic values retuned by llvm.coro.suspend.
4307 // See: https://llvm.org/docs/Coroutines.html#llvm-coro-suspend-intrinsic
4308 enum SuspendAction
4309 {
4310 SuspendActionSuspend = -1,
4311 SuspendActionResume = 0,
4312 SuspendActionDestroy = 1
4313 };
4314
4315} // anonymous namespace
4316
4317namespace rr {
4318
4319void Nucleus::createCoroutine(Type *YieldType, std::vector<Type*> &Params)
4320{
4321 // Types
4322 auto voidTy = ::llvm::Type::getVoidTy(*::context);
4323 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
4324 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
4325 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
4326 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(*::context);
4327 auto promiseTy = T(YieldType);
4328 auto promisePtrTy = promiseTy->getPointerTo();
4329 auto handleTy = i8PtrTy;
4330 auto boolTy = i1Ty;
4331
4332 // LLVM intrinsics
4333 auto coro_id = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::coro_id);
4334 auto coro_size = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::coro_size, {i32Ty});
4335 auto coro_begin = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::coro_begin);
4336 auto coro_resume = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_resume);
4337 auto coro_end = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_end);
4338 auto coro_free = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_free);
4339 auto coro_destroy = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_destroy);
4340 auto coro_promise = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_promise);
4341 auto coro_done = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_done);
4342 auto coro_suspend = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_suspend);
4343
4344 auto allocFrameTy = ::llvm::FunctionType::get(i8PtrTy, {i32Ty}, false);
4345 auto allocFrame = ::module->getOrInsertFunction("coroutine_alloc_frame", allocFrameTy);
4346 auto freeFrameTy = ::llvm::FunctionType::get(voidTy, {i8PtrTy}, false);
4347 auto freeFrame = ::module->getOrInsertFunction("coroutine_free_frame", freeFrameTy);
4348
4349 // Build the coroutine_await() function:
4350 //
4351 // bool coroutine_await(CoroutineHandle* handle, YieldType* out)
4352 // {
4353 // if (llvm.coro.done(handle))
4354 // {
4355 // return false;
4356 // }
4357 // else
4358 // {
4359 // *value = (T*)llvm.coro.promise(handle);
4360 // llvm.coro.resume(handle);
4361 // return true;
4362 // }
4363 // }
4364 //
Ben Claytonea38f952019-06-17 13:56:56 +01004365 ::coroutine.await = rr::createFunction("coroutine_await", boolTy, {handleTy, promisePtrTy});
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004366 {
4367 auto args = ::coroutine.await->arg_begin();
4368 auto handle = args++;
4369 auto outPtr = args++;
4370 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "co_await", ::coroutine.await));
4371 auto doneBlock = llvm::BasicBlock::Create(*::context, "done", ::coroutine.await);
4372 auto resumeBlock = llvm::BasicBlock::Create(*::context, "resume", ::coroutine.await);
4373
4374 auto done = ::builder->CreateCall(coro_done, {handle}, "done");
4375 ::builder->CreateCondBr(done, doneBlock, resumeBlock);
4376
4377 ::builder->SetInsertPoint(doneBlock);
4378 ::builder->CreateRet(::llvm::ConstantInt::getFalse(i1Ty));
4379
4380 ::builder->SetInsertPoint(resumeBlock);
4381 auto promiseAlignment = ::llvm::ConstantInt::get(i32Ty, 4); // TODO: Get correct alignment.
4382 auto promisePtr = ::builder->CreateCall(coro_promise, {handle, promiseAlignment, ::llvm::ConstantInt::get(i1Ty, 0)});
4383 auto promise = ::builder->CreateLoad(::builder->CreatePointerCast(promisePtr, promisePtrTy));
4384 ::builder->CreateStore(promise, outPtr);
4385 ::builder->CreateCall(coro_resume, {handle});
4386 ::builder->CreateRet(::llvm::ConstantInt::getTrue(i1Ty));
4387 }
4388
4389 // Build the coroutine_destroy() function:
4390 //
4391 // void coroutine_destroy(CoroutineHandle* handle)
4392 // {
4393 // llvm.coro.destroy(handle);
4394 // }
4395 //
Ben Claytonea38f952019-06-17 13:56:56 +01004396 ::coroutine.destroy = rr::createFunction("coroutine_destroy", voidTy, {handleTy});
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004397 {
4398 auto handle = ::coroutine.destroy->arg_begin();
4399 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "", ::coroutine.destroy));
4400 ::builder->CreateCall(coro_destroy, {handle});
4401 ::builder->CreateRetVoid();
4402 }
4403
4404 // Begin building the main coroutine_begin() function.
4405 //
4406 // CoroutineHandle* coroutine_begin(<Arguments>)
4407 // {
4408 // YieldType promise;
4409 // auto id = llvm.coro.id(0, &promise, nullptr, nullptr);
4410 // void* frame = coroutine_alloc_frame(llvm.coro.size.i32());
4411 // CoroutineHandle *handle = llvm.coro.begin(id, frame);
4412 //
4413 // ... <REACTOR CODE> ...
4414 //
4415 // end:
4416 // SuspendAction action = llvm.coro.suspend(none, true /* final */); // <-- RESUME POINT
4417 // switch (action)
4418 // {
4419 // case SuspendActionResume:
4420 // UNREACHABLE(); // Illegal to resume after final suspend.
4421 // case SuspendActionDestroy:
4422 // goto destroy;
4423 // default: // (SuspendActionSuspend)
4424 // goto suspend;
4425 // }
4426 //
4427 // destroy:
4428 // coroutine_free_frame(llvm.coro.free(id, handle));
4429 // goto suspend;
4430 //
4431 // suspend:
4432 // llvm.coro.end(handle, false);
4433 // return handle;
4434 // }
4435 //
Ben Claytonea38f952019-06-17 13:56:56 +01004436 ::function = rr::createFunction("coroutine_begin", handleTy, T(Params));
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004437
4438#ifdef ENABLE_RR_DEBUG_INFO
4439 ::debugInfo = std::unique_ptr<DebugInfo>(new DebugInfo(::builder, ::context, ::module, ::function));
4440#endif // ENABLE_RR_DEBUG_INFO
4441
4442 auto entryBlock = llvm::BasicBlock::Create(*::context, "coroutine", ::function);
4443 ::coroutine.suspendBlock = llvm::BasicBlock::Create(*::context, "suspend", ::function);
4444 ::coroutine.endBlock = llvm::BasicBlock::Create(*::context, "end", ::function);
4445 ::coroutine.destroyBlock = llvm::BasicBlock::Create(*::context, "destroy", ::function);
4446
4447 ::builder->SetInsertPoint(entryBlock);
4448 Variable::materializeAll();
4449 ::coroutine.promise = ::builder->CreateAlloca(T(YieldType), nullptr, "promise");
4450 ::coroutine.id = ::builder->CreateCall(coro_id, {
4451 ::llvm::ConstantInt::get(i32Ty, 0),
4452 ::builder->CreatePointerCast(::coroutine.promise, i8PtrTy),
4453 ::llvm::ConstantPointerNull::get(i8PtrTy),
4454 ::llvm::ConstantPointerNull::get(i8PtrTy),
4455 });
4456 auto size = ::builder->CreateCall(coro_size, {});
4457 auto frame = ::builder->CreateCall(allocFrame, {size});
4458 ::coroutine.handle = ::builder->CreateCall(coro_begin, {::coroutine.id, frame});
4459
4460 // Build the suspend block
4461 ::builder->SetInsertPoint(::coroutine.suspendBlock);
4462 ::builder->CreateCall(coro_end, {::coroutine.handle, ::llvm::ConstantInt::get(i1Ty, 0)});
4463 ::builder->CreateRet(::coroutine.handle);
4464
4465 // Build the end block
4466 ::builder->SetInsertPoint(::coroutine.endBlock);
4467 auto action = ::builder->CreateCall(coro_suspend, {
4468 ::llvm::ConstantTokenNone::get(*::context),
4469 ::llvm::ConstantInt::get(i1Ty, 1), // final: true
4470 });
4471 auto switch_ = ::builder->CreateSwitch(action, ::coroutine.suspendBlock, 3);
4472 // switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionResume), trapBlock); // TODO: Trap attempting to resume after final suspend
4473 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionDestroy), ::coroutine.destroyBlock);
4474
4475 // Build the destroy block
4476 ::builder->SetInsertPoint(::coroutine.destroyBlock);
4477 auto memory = ::builder->CreateCall(coro_free, {::coroutine.id, ::coroutine.handle});
4478 ::builder->CreateCall(freeFrame, {memory});
4479 ::builder->CreateBr(::coroutine.suspendBlock);
4480
4481 // Switch back to the entry block for reactor codegen.
4482 ::builder->SetInsertPoint(entryBlock);
John Bauman89401822014-05-06 15:04:28 -04004483}
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004484
4485void Nucleus::yield(Value* val)
4486{
4487 ASSERT_MSG(::coroutine.id != nullptr, "yield() can only be called when building a Coroutine");
4488
4489 // promise = val;
4490 //
4491 // auto action = llvm.coro.suspend(none, false /* final */); // <-- RESUME POINT
4492 // switch (action)
4493 // {
4494 // case SuspendActionResume:
4495 // goto resume;
4496 // case SuspendActionDestroy:
4497 // goto destroy;
4498 // default: // (SuspendActionSuspend)
4499 // goto suspend;
4500 // }
4501 // resume:
4502 //
4503
4504 RR_DEBUG_INFO_UPDATE_LOC();
4505 Variable::materializeAll();
4506
4507 // Types
4508 auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
4509 auto i8Ty = ::llvm::Type::getInt8Ty(*::context);
4510
4511 // Intrinsics
4512 auto coro_suspend = ::llvm::Intrinsic::getDeclaration(::module, ::llvm::Intrinsic::coro_suspend);
4513
4514 // Create a block to resume execution.
4515 auto resumeBlock = llvm::BasicBlock::Create(*::context, "resume", ::function);
4516
4517 // Store the promise (yield value)
4518 ::builder->CreateStore(V(val), ::coroutine.promise);
4519 auto action = ::builder->CreateCall(coro_suspend, {
4520 ::llvm::ConstantTokenNone::get(*::context),
4521 ::llvm::ConstantInt::get(i1Ty, 0), // final: true
4522 });
4523 auto switch_ = ::builder->CreateSwitch(action, ::coroutine.suspendBlock, 3);
4524 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionResume), resumeBlock);
4525 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionDestroy), ::coroutine.destroyBlock);
4526
4527 // Continue building in the resume block.
4528 ::builder->SetInsertPoint(resumeBlock);
4529}
4530
4531Routine* Nucleus::acquireCoroutine(const char *name, bool runOptimizations)
4532{
4533 ASSERT_MSG(::coroutine.id != nullptr, "acquireCoroutine() called without a call to createCoroutine()");
4534
4535 ::builder->CreateBr(::coroutine.endBlock);
4536
4537#ifdef ENABLE_RR_DEBUG_INFO
4538 if (debugInfo != nullptr)
4539 {
4540 debugInfo->Finalize();
4541 }
4542#endif // ENABLE_RR_DEBUG_INFO
4543
4544 if(false)
4545 {
4546 std::error_code error;
4547 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
4548 ::module->print(file, 0);
4549 }
4550
4551 // Run manadory coroutine transforms.
4552 llvm::legacy::PassManager pm;
4553 pm.add(llvm::createCoroEarlyPass());
4554 pm.add(llvm::createCoroSplitPass());
4555 pm.add(llvm::createCoroElidePass());
4556 pm.add(llvm::createBarrierNoopPass());
4557 pm.add(llvm::createCoroCleanupPass());
4558 pm.run(*::module);
4559
4560 if(runOptimizations)
4561 {
4562 optimize();
4563 }
4564
4565 if(false)
4566 {
4567 std::error_code error;
4568 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
4569 ::module->print(file, 0);
4570 }
4571
4572 llvm::Function *funcs[Nucleus::CoroutineEntryCount];
4573 funcs[Nucleus::CoroutineEntryBegin] = ::function;
4574 funcs[Nucleus::CoroutineEntryAwait] = ::coroutine.await;
4575 funcs[Nucleus::CoroutineEntryDestroy] = ::coroutine.destroy;
4576 Routine *routine = ::reactorJIT->acquireRoutine(funcs, Nucleus::CoroutineEntryCount);
4577
4578 ::coroutine = CoroutineState{};
4579
4580 return routine;
4581}
4582
4583} // namespace rr