Blame - llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp - toolchain/llvm-project

blob: 1fbaae2ba33be56649c96b0c59588511b5eac433 [file] [log] [blame]

Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1	//===- AMDGPULibCalls.cpp -------------------------------------------------===//
				2	//
Chandler Carruth	2946cd7	2019-01-19 08:50:56 +0000	[diff] [blame]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	//
				9	/// \file
Adrian Prantl	5f8f34e4	2018-05-01 15:54:18 +0000	[diff] [blame]	10	/// This file does AMD library function optimizations.
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	11	//
				12	//===----------------------------------------------------------------------===//
				13
				14	#define DEBUG_TYPE "amdgpu-simplifylib"
				15
				16	#include "AMDGPU.h"
				17	#include "AMDGPULibFunc.h"
				18	#include "llvm/Analysis/AliasAnalysis.h"
				19	#include "llvm/Analysis/Loads.h"
				20	#include "llvm/ADT/StringSet.h"
				21	#include "llvm/ADT/StringRef.h"
				22	#include "llvm/IR/Constants.h"
				23	#include "llvm/IR/DerivedTypes.h"
				24	#include "llvm/IR/Instructions.h"
				25	#include "llvm/IR/IRBuilder.h"
				26	#include "llvm/IR/Function.h"
				27	#include "llvm/IR/LLVMContext.h"
				28	#include "llvm/IR/Module.h"
				29	#include "llvm/IR/ValueSymbolTable.h"
				30	#include "llvm/Support/Debug.h"
				31	#include "llvm/Support/raw_ostream.h"
Stanislav Mekhanoshin	1d8cf2b	2017-09-29 23:40:19 +0000	[diff] [blame]	32	#include "llvm/Target/TargetOptions.h"
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	33	#include <vector>
				34	#include <cmath>
				35
				36	using namespace llvm;
				37
				38	static cl::opt<bool> EnablePreLink("amdgpu-prelink",
				39	cl::desc("Enable pre-link mode optimizations"),
				40	cl::init(false),
				41	cl::Hidden);
				42
				43	static cl::list<std::string> UseNative("amdgpu-use-native",
				44	cl::desc("Comma separated list of functions to replace with native, or all"),
				45	cl::CommaSeparated, cl::ValueOptional,
				46	cl::Hidden);
				47
				48	#define MATH_PI 3.14159265358979323846264338327950288419716939937511
				49	#define MATH_E 2.71828182845904523536028747135266249775724709369996
				50	#define MATH_SQRT2 1.41421356237309504880168872420969807856967187537695
				51
				52	#define MATH_LOG2E 1.4426950408889634073599246810018921374266459541529859
				53	#define MATH_LOG10E 0.4342944819032518276511289189166050822943970058036665
				54	// Value of log2(10)
				55	#define MATH_LOG2_10 3.3219280948873623478703194294893901758648313930245806
				56	// Value of 1 / log2(10)
				57	#define MATH_RLOG2_10 0.3010299956639811952137388947244930267681898814621085
				58	// Value of 1 / M_LOG2E_F = 1 / log2(e)
				59	#define MATH_RLOG2_E 0.6931471805599453094172321214581765680755001343602552
				60
				61	namespace llvm {
				62
				63	class AMDGPULibCalls {
				64	private:
				65
				66	typedef llvm::AMDGPULibFunc FuncInfo;
				67
				68	// -fuse-native.
				69	bool AllNative = false;
				70
				71	bool useNativeFunc(const StringRef F) const;
				72
				73	// Return a pointer (pointer expr) to the function if function defintion with
				74	// "FuncName" exists. It may create a new function prototype in pre-link mode.
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	75	Constant getFunction(Module M, const FuncInfo& fInfo);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	76
				77	// Replace a normal function with its native version.
				78	bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);
				79
				80	bool parseFunctionName(const StringRef& FMangledName,
				81	FuncInfo FInfo=nullptr /out*/);
				82
				83	bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
				84
				85	/* Specialized optimizations */
				86
				87	// recip (half or native)
				88	bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				89
				90	// divide (half or native)
				91	bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				92
				93	// pow/powr/pown
				94	bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				95
				96	// rootn
				97	bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				98
				99	// fma/mad
				100	bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				101
				102	// -fuse-native for sincos
				103	bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
				104
				105	// evaluate calls if calls' arguments are constants.
				106	bool evaluateScalarMathFunc(FuncInfo &FInfo, double& Res0,
				107	double& Res1, Constant copr0, Constant copr1, Constant *copr2);
				108	bool evaluateCall(CallInst *aCI, FuncInfo &FInfo);
				109
				110	// exp
				111	bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				112
				113	// exp2
				114	bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				115
				116	// exp10
				117	bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				118
				119	// log
				120	bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				121
				122	// log2
				123	bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				124
				125	// log10
				126	bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				127
				128	// sqrt
				129	bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				130
				131	// sin/cos
				132	bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
				133
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	134	// __read_pipe/__write_pipe
				135	bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);
				136
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	137	// Get insertion point at entry.
				138	BasicBlock::iterator getEntryIns(CallInst * UI);
				139	// Insert an Alloc instruction.
				140	AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
				141	// Get a scalar native builtin signle argument FP function
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	142	Constant* getNativeFunction(Module* M, const FuncInfo &FInfo);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	143
				144	protected:
				145	CallInst *CI;
				146
				147	bool isUnsafeMath(const CallInst *CI) const;
				148
				149	void replaceCall(Value *With) {
				150	CI->replaceAllUsesWith(With);
				151	CI->eraseFromParent();
				152	}
				153
				154	public:
				155	bool fold(CallInst CI, AliasAnalysis AA = nullptr);
				156
				157	void initNativeFuncs();
				158
				159	// Replace a normal math function call with that native version
				160	bool useNative(CallInst *CI);
				161	};
				162
				163	} // end llvm namespace
				164
				165	namespace {
				166
				167	class AMDGPUSimplifyLibCalls : public FunctionPass {
				168
				169	AMDGPULibCalls Simplifier;
				170
Stanislav Mekhanoshin	1d8cf2b	2017-09-29 23:40:19 +0000	[diff] [blame]	171	const TargetOptions Options;
				172
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	173	public:
				174	static char ID; // Pass identification
				175
Stanislav Mekhanoshin	1d8cf2b	2017-09-29 23:40:19 +0000	[diff] [blame]	176	AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions())
				177	: FunctionPass(ID), Options(Opt) {
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	178	initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
				179	}
				180
				181	void getAnalysisUsage(AnalysisUsage &AU) const override {
				182	AU.addRequired<AAResultsWrapperPass>();
				183	}
				184
				185	bool runOnFunction(Function &M) override;
				186	};
				187
				188	class AMDGPUUseNativeCalls : public FunctionPass {
				189
				190	AMDGPULibCalls Simplifier;
				191
				192	public:
				193	static char ID; // Pass identification
				194
				195	AMDGPUUseNativeCalls() : FunctionPass(ID) {
				196	initializeAMDGPUUseNativeCallsPass(*PassRegistry::getPassRegistry());
				197	Simplifier.initNativeFuncs();
				198	}
				199
				200	bool runOnFunction(Function &F) override;
				201	};
				202
				203	} // end anonymous namespace.
				204
				205	char AMDGPUSimplifyLibCalls::ID = 0;
				206	char AMDGPUUseNativeCalls::ID = 0;
				207
				208	INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
				209	"Simplify well-known AMD library calls", false, false)
				210	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
				211	INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
				212	"Simplify well-known AMD library calls", false, false)
				213
				214	INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
				215	"Replace builtin math calls with that native versions.",
				216	false, false)
				217
				218	template <typename IRB>
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	219	static CallInst CreateCallEx(IRB &B, Value Callee, Value *Arg,
Benjamin Kramer	51ebcaa	2017-11-24 14:55:41 +0000	[diff] [blame]	220	const Twine &Name = "") {
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	221	CallInst *R = B.CreateCall(Callee, Arg, Name);
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	222	if (Function* F = dyn_cast<Function>(Callee))
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	223	R->setCallingConv(F->getCallingConv());
				224	return R;
				225	}
				226
				227	template <typename IRB>
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	228	static CallInst CreateCallEx2(IRB &B, Value Callee, Value Arg1, Value Arg2,
				229	const Twine &Name = "") {
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	230	CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	231	if (Function* F = dyn_cast<Function>(Callee))
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	232	R->setCallingConv(F->getCallingConv());
				233	return R;
				234	}
				235
				236	// Data structures for table-driven optimizations.
				237	// FuncTbl works for both f32 and f64 functions with 1 input argument
				238
				239	struct TableEntry {
				240	double result;
				241	double input;
				242	};
				243
				244	/* a list of {result, input} */
				245	static const TableEntry tbl_acos[] = {
				246	{MATH_PI/2.0, 0.0},
				247	{MATH_PI/2.0, -0.0},
				248	{0.0, 1.0},
				249	{MATH_PI, -1.0}
				250	};
				251	static const TableEntry tbl_acosh[] = {
				252	{0.0, 1.0}
				253	};
				254	static const TableEntry tbl_acospi[] = {
				255	{0.5, 0.0},
				256	{0.5, -0.0},
				257	{0.0, 1.0},
				258	{1.0, -1.0}
				259	};
				260	static const TableEntry tbl_asin[] = {
				261	{0.0, 0.0},
				262	{-0.0, -0.0},
				263	{MATH_PI/2.0, 1.0},
				264	{-MATH_PI/2.0, -1.0}
				265	};
				266	static const TableEntry tbl_asinh[] = {
				267	{0.0, 0.0},
				268	{-0.0, -0.0}
				269	};
				270	static const TableEntry tbl_asinpi[] = {
				271	{0.0, 0.0},
				272	{-0.0, -0.0},
				273	{0.5, 1.0},
				274	{-0.5, -1.0}
				275	};
				276	static const TableEntry tbl_atan[] = {
				277	{0.0, 0.0},
				278	{-0.0, -0.0},
				279	{MATH_PI/4.0, 1.0},
				280	{-MATH_PI/4.0, -1.0}
				281	};
				282	static const TableEntry tbl_atanh[] = {
				283	{0.0, 0.0},
				284	{-0.0, -0.0}
				285	};
				286	static const TableEntry tbl_atanpi[] = {
				287	{0.0, 0.0},
				288	{-0.0, -0.0},
				289	{0.25, 1.0},
				290	{-0.25, -1.0}
				291	};
				292	static const TableEntry tbl_cbrt[] = {
				293	{0.0, 0.0},
				294	{-0.0, -0.0},
				295	{1.0, 1.0},
				296	{-1.0, -1.0},
				297	};
				298	static const TableEntry tbl_cos[] = {
				299	{1.0, 0.0},
				300	{1.0, -0.0}
				301	};
				302	static const TableEntry tbl_cosh[] = {
				303	{1.0, 0.0},
				304	{1.0, -0.0}
				305	};
				306	static const TableEntry tbl_cospi[] = {
				307	{1.0, 0.0},
				308	{1.0, -0.0}
				309	};
				310	static const TableEntry tbl_erfc[] = {
				311	{1.0, 0.0},
				312	{1.0, -0.0}
				313	};
				314	static const TableEntry tbl_erf[] = {
				315	{0.0, 0.0},
				316	{-0.0, -0.0}
				317	};
				318	static const TableEntry tbl_exp[] = {
				319	{1.0, 0.0},
				320	{1.0, -0.0},
				321	{MATH_E, 1.0}
				322	};
				323	static const TableEntry tbl_exp2[] = {
				324	{1.0, 0.0},
				325	{1.0, -0.0},
				326	{2.0, 1.0}
				327	};
				328	static const TableEntry tbl_exp10[] = {
				329	{1.0, 0.0},
				330	{1.0, -0.0},
				331	{10.0, 1.0}
				332	};
				333	static const TableEntry tbl_expm1[] = {
				334	{0.0, 0.0},
				335	{-0.0, -0.0}
				336	};
				337	static const TableEntry tbl_log[] = {
				338	{0.0, 1.0},
				339	{1.0, MATH_E}
				340	};
				341	static const TableEntry tbl_log2[] = {
				342	{0.0, 1.0},
				343	{1.0, 2.0}
				344	};
				345	static const TableEntry tbl_log10[] = {
				346	{0.0, 1.0},
				347	{1.0, 10.0}
				348	};
				349	static const TableEntry tbl_rsqrt[] = {
				350	{1.0, 1.0},
				351	{1.0/MATH_SQRT2, 2.0}
				352	};
				353	static const TableEntry tbl_sin[] = {
				354	{0.0, 0.0},
				355	{-0.0, -0.0}
				356	};
				357	static const TableEntry tbl_sinh[] = {
				358	{0.0, 0.0},
				359	{-0.0, -0.0}
				360	};
				361	static const TableEntry tbl_sinpi[] = {
				362	{0.0, 0.0},
				363	{-0.0, -0.0}
				364	};
				365	static const TableEntry tbl_sqrt[] = {
				366	{0.0, 0.0},
				367	{1.0, 1.0},
				368	{MATH_SQRT2, 2.0}
				369	};
				370	static const TableEntry tbl_tan[] = {
				371	{0.0, 0.0},
				372	{-0.0, -0.0}
				373	};
				374	static const TableEntry tbl_tanh[] = {
				375	{0.0, 0.0},
				376	{-0.0, -0.0}
				377	};
				378	static const TableEntry tbl_tanpi[] = {
				379	{0.0, 0.0},
				380	{-0.0, -0.0}
				381	};
				382	static const TableEntry tbl_tgamma[] = {
				383	{1.0, 1.0},
				384	{1.0, 2.0},
				385	{2.0, 3.0},
				386	{6.0, 4.0}
				387	};
				388
				389	static bool HasNative(AMDGPULibFunc::EFuncId id) {
				390	switch(id) {
				391	case AMDGPULibFunc::EI_DIVIDE:
				392	case AMDGPULibFunc::EI_COS:
				393	case AMDGPULibFunc::EI_EXP:
				394	case AMDGPULibFunc::EI_EXP2:
				395	case AMDGPULibFunc::EI_EXP10:
				396	case AMDGPULibFunc::EI_LOG:
				397	case AMDGPULibFunc::EI_LOG2:
				398	case AMDGPULibFunc::EI_LOG10:
				399	case AMDGPULibFunc::EI_POWR:
				400	case AMDGPULibFunc::EI_RECIP:
				401	case AMDGPULibFunc::EI_RSQRT:
				402	case AMDGPULibFunc::EI_SIN:
				403	case AMDGPULibFunc::EI_SINCOS:
				404	case AMDGPULibFunc::EI_SQRT:
				405	case AMDGPULibFunc::EI_TAN:
				406	return true;
				407	default:;
				408	}
				409	return false;
				410	}
				411
				412	struct TableRef {
				413	size_t size;
				414	const TableEntry *table; // variable size: from 0 to (size - 1)
				415
				416	TableRef() : size(0), table(nullptr) {}
				417
				418	template <size_t N>
				419	TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {}
				420	};
				421
				422	static TableRef getOptTable(AMDGPULibFunc::EFuncId id) {
				423	switch(id) {
				424	case AMDGPULibFunc::EI_ACOS: return TableRef(tbl_acos);
				425	case AMDGPULibFunc::EI_ACOSH: return TableRef(tbl_acosh);
				426	case AMDGPULibFunc::EI_ACOSPI: return TableRef(tbl_acospi);
				427	case AMDGPULibFunc::EI_ASIN: return TableRef(tbl_asin);
				428	case AMDGPULibFunc::EI_ASINH: return TableRef(tbl_asinh);
				429	case AMDGPULibFunc::EI_ASINPI: return TableRef(tbl_asinpi);
				430	case AMDGPULibFunc::EI_ATAN: return TableRef(tbl_atan);
				431	case AMDGPULibFunc::EI_ATANH: return TableRef(tbl_atanh);
				432	case AMDGPULibFunc::EI_ATANPI: return TableRef(tbl_atanpi);
				433	case AMDGPULibFunc::EI_CBRT: return TableRef(tbl_cbrt);
				434	case AMDGPULibFunc::EI_NCOS:
				435	case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos);
				436	case AMDGPULibFunc::EI_COSH: return TableRef(tbl_cosh);
				437	case AMDGPULibFunc::EI_COSPI: return TableRef(tbl_cospi);
				438	case AMDGPULibFunc::EI_ERFC: return TableRef(tbl_erfc);
				439	case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf);
				440	case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp);
				441	case AMDGPULibFunc::EI_NEXP2:
				442	case AMDGPULibFunc::EI_EXP2: return TableRef(tbl_exp2);
				443	case AMDGPULibFunc::EI_EXP10: return TableRef(tbl_exp10);
				444	case AMDGPULibFunc::EI_EXPM1: return TableRef(tbl_expm1);
				445	case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log);
				446	case AMDGPULibFunc::EI_NLOG2:
				447	case AMDGPULibFunc::EI_LOG2: return TableRef(tbl_log2);
				448	case AMDGPULibFunc::EI_LOG10: return TableRef(tbl_log10);
				449	case AMDGPULibFunc::EI_NRSQRT:
				450	case AMDGPULibFunc::EI_RSQRT: return TableRef(tbl_rsqrt);
				451	case AMDGPULibFunc::EI_NSIN:
				452	case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin);
				453	case AMDGPULibFunc::EI_SINH: return TableRef(tbl_sinh);
				454	case AMDGPULibFunc::EI_SINPI: return TableRef(tbl_sinpi);
				455	case AMDGPULibFunc::EI_NSQRT:
				456	case AMDGPULibFunc::EI_SQRT: return TableRef(tbl_sqrt);
				457	case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan);
				458	case AMDGPULibFunc::EI_TANH: return TableRef(tbl_tanh);
				459	case AMDGPULibFunc::EI_TANPI: return TableRef(tbl_tanpi);
				460	case AMDGPULibFunc::EI_TGAMMA: return TableRef(tbl_tgamma);
				461	default:;
				462	}
				463	return TableRef();
				464	}
				465
				466	static inline int getVecSize(const AMDGPULibFunc& FInfo) {
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	467	return FInfo.getLeads()[0].VectorSize;
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	468	}
				469
				470	static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	471	return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	472	}
				473
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	474	Constant AMDGPULibCalls::getFunction(Module M, const FuncInfo& fInfo) {
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	475	// If we are doing PreLinkOpt, the function is external. So it is safe to
				476	// use getOrInsertFunction() at this stage.
				477
				478	return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo)
				479	: AMDGPULibFunc::getFunction(M, fInfo);
				480	}
				481
				482	bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName,
				483	FuncInfo *FInfo) {
				484	return AMDGPULibFunc::parse(FMangledName, *FInfo);
				485	}
				486
				487	bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
				488	if (auto Op = dyn_cast<FPMathOperator>(CI))
Sanjay Patel	629c411	2017-11-06 16:27:15 +0000	[diff] [blame]	489	if (Op->isFast())
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	490	return true;
				491	const Function *F = CI->getParent()->getParent();
				492	Attribute Attr = F->getFnAttribute("unsafe-fp-math");
				493	return Attr.getValueAsString() == "true";
				494	}
				495
				496	bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
				497	return AllNative \|\|
				498	std::find(UseNative.begin(), UseNative.end(), F) != UseNative.end();
				499	}
				500
				501	void AMDGPULibCalls::initNativeFuncs() {
				502	AllNative = useNativeFunc("all") \|\|
				503	(UseNative.getNumOccurrences() && UseNative.size() == 1 &&
				504	UseNative.begin()->empty());
				505	}
				506
				507	bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
				508	bool native_sin = useNativeFunc("sin");
				509	bool native_cos = useNativeFunc("cos");
				510
				511	if (native_sin && native_cos) {
				512	Module *M = aCI->getModule();
				513	Value *opr0 = aCI->getArgOperand(0);
				514
				515	AMDGPULibFunc nf;
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	516	nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
				517	nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	518
				519	nf.setPrefix(AMDGPULibFunc::NATIVE);
				520	nf.setId(AMDGPULibFunc::EI_SIN);
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	521	Constant *sinExpr = getFunction(M, nf);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	522
				523	nf.setPrefix(AMDGPULibFunc::NATIVE);
				524	nf.setId(AMDGPULibFunc::EI_COS);
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	525	Constant *cosExpr = getFunction(M, nf);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	526	if (sinExpr && cosExpr) {
				527	Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
				528	Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
				529	new StoreInst(cosval, aCI->getArgOperand(1), aCI);
				530
				531	DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
				532	<< " with native version of sin/cos");
				533
				534	replaceCall(sinval);
				535	return true;
				536	}
				537	}
				538	return false;
				539	}
				540
				541	bool AMDGPULibCalls::useNative(CallInst *aCI) {
				542	CI = aCI;
				543	Function *Callee = aCI->getCalledFunction();
				544
				545	FuncInfo FInfo;
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	546	if (!parseFunctionName(Callee->getName(), &FInfo) \|\| !FInfo.isMangled() \|\|
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	547	FInfo.getPrefix() != AMDGPULibFunc::NOPFX \|\|
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	548	getArgType(FInfo) == AMDGPULibFunc::F64 \|\| !HasNative(FInfo.getId()) \|\|
				549	!(AllNative \|\| useNativeFunc(FInfo.getName()))) {
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	550	return false;
				551	}
				552
				553	if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
				554	return sincosUseNative(aCI, FInfo);
				555
				556	FInfo.setPrefix(AMDGPULibFunc::NATIVE);
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	557	Constant *F = getFunction(aCI->getModule(), FInfo);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	558	if (!F)
				559	return false;
				560
				561	aCI->setCalledFunction(F);
				562	DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
				563	<< " with native version");
				564	return true;
				565	}
				566
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	567	// Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
				568	// builtin, with appended type size and alignment arguments, where 2 or 4
				569	// indicates the original number of arguments. The library has optimized version
				570	// of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
				571	// power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
				572	// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
				573	// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
				574	bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
				575	FuncInfo &FInfo) {
				576	auto *Callee = CI->getCalledFunction();
				577	if (!Callee->isDeclaration())
				578	return false;
				579
				580	assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
				581	auto *M = Callee->getParent();
				582	auto &Ctx = M->getContext();
				583	std::string Name = Callee->getName();
				584	auto NumArg = CI->getNumArgOperands();
				585	if (NumArg != 4 && NumArg != 6)
				586	return false;
				587	auto *PacketSize = CI->getArgOperand(NumArg - 2);
				588	auto *PacketAlign = CI->getArgOperand(NumArg - 1);
				589	if (!isa<ConstantInt>(PacketSize) \|\| !isa<ConstantInt>(PacketAlign))
				590	return false;
				591	unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
				592	unsigned Align = cast<ConstantInt>(PacketAlign)->getZExtValue();
				593	if (Size != Align \|\| !isPowerOf2_32(Size))
				594	return false;
				595
				596	Type *PtrElemTy;
				597	if (Size <= 8)
				598	PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
				599	else
				600	PtrElemTy = VectorType::get(Type::getInt64Ty(Ctx), Size / 8);
				601	unsigned PtrArgLoc = CI->getNumArgOperands() - 3;
				602	auto PtrArg = CI->getArgOperand(PtrArgLoc);
				603	unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
				604	auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
				605
				606	SmallVector<llvm::Type *, 6> ArgTys;
				607	for (unsigned I = 0; I != PtrArgLoc; ++I)
				608	ArgTys.push_back(CI->getArgOperand(I)->getType());
				609	ArgTys.push_back(PtrTy);
				610
				611	Name = Name + "_" + std::to_string(Size);
				612	auto *FTy = FunctionType::get(Callee->getReturnType(),
				613	ArrayRef<Type *>(ArgTys), false);
				614	AMDGPULibFunc NewLibFunc(Name, FTy);
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	615	auto *F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	616	if (!F)
				617	return false;
				618
				619	auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
				620	SmallVector<Value *, 6> Args;
				621	for (unsigned I = 0; I != PtrArgLoc; ++I)
				622	Args.push_back(CI->getArgOperand(I));
				623	Args.push_back(BCast);
				624
				625	auto *NCI = B.CreateCall(F, Args);
				626	NCI->setAttributes(CI->getAttributes());
				627	CI->replaceAllUsesWith(NCI);
				628	CI->dropAllReferences();
				629	CI->eraseFromParent();
				630
				631	return true;
				632	}
				633
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	634	// This function returns false if no change; return true otherwise.
				635	bool AMDGPULibCalls::fold(CallInst CI, AliasAnalysis AA) {
				636	this->CI = CI;
				637	Function *Callee = CI->getCalledFunction();
				638
				639	// Ignore indirect calls.
				640	if (Callee == 0) return false;
				641
				642	FuncInfo FInfo;
				643	if (!parseFunctionName(Callee->getName(), &FInfo))
				644	return false;
				645
				646	// Further check the number of arguments to see if they match.
				647	if (CI->getNumArgOperands() != FInfo.getNumArgs())
				648	return false;
				649
				650	BasicBlock *BB = CI->getParent();
				651	LLVMContext &Context = CI->getParent()->getContext();
				652	IRBuilder<> B(Context);
				653
				654	// Set the builder to the instruction after the call.
				655	B.SetInsertPoint(BB, CI->getIterator());
				656
				657	// Copy fast flags from the original call.
				658	if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
				659	B.setFastMathFlags(FPOp->getFastMathFlags());
				660
				661	if (TDOFold(CI, FInfo))
				662	return true;
				663
				664	// Under unsafe-math, evaluate calls if possible.
				665	// According to Brian Sumner, we can do this for all f32 function calls
				666	// using host's double function calls.
				667	if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
				668	return true;
				669
				670	// Specilized optimizations for each function call
				671	switch (FInfo.getId()) {
				672	case AMDGPULibFunc::EI_RECIP:
				673	// skip vector function
				674	assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE \|\|
				675	FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
				676	"recip must be an either native or half function");
				677	return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);
				678
				679	case AMDGPULibFunc::EI_DIVIDE:
				680	// skip vector function
				681	assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE \|\|
				682	FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
				683	"divide must be an either native or half function");
				684	return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);
				685
				686	case AMDGPULibFunc::EI_POW:
				687	case AMDGPULibFunc::EI_POWR:
				688	case AMDGPULibFunc::EI_POWN:
				689	return fold_pow(CI, B, FInfo);
				690
				691	case AMDGPULibFunc::EI_ROOTN:
				692	// skip vector function
				693	return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);
				694
				695	case AMDGPULibFunc::EI_FMA:
				696	case AMDGPULibFunc::EI_MAD:
				697	case AMDGPULibFunc::EI_NFMA:
				698	// skip vector function
				699	return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);
				700
				701	case AMDGPULibFunc::EI_SQRT:
				702	return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
				703	case AMDGPULibFunc::EI_COS:
				704	case AMDGPULibFunc::EI_SIN:
				705	if ((getArgType(FInfo) == AMDGPULibFunc::F32 \|\|
				706	getArgType(FInfo) == AMDGPULibFunc::F64)
				707	&& (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
				708	return fold_sincos(CI, B, AA);
				709
				710	break;
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	711	case AMDGPULibFunc::EI_READ_PIPE_2:
				712	case AMDGPULibFunc::EI_READ_PIPE_4:
				713	case AMDGPULibFunc::EI_WRITE_PIPE_2:
				714	case AMDGPULibFunc::EI_WRITE_PIPE_4:
				715	return fold_read_write_pipe(CI, B, FInfo);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	716
				717	default:
				718	break;
				719	}
				720
				721	return false;
				722	}
				723
				724	bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
				725	// Table-Driven optimization
				726	const TableRef tr = getOptTable(FInfo.getId());
				727	if (tr.size==0)
				728	return false;
				729
				730	int const sz = (int)tr.size;
				731	const TableEntry * const ftbl = tr.table;
				732	Value *opr0 = CI->getArgOperand(0);
				733
				734	if (getVecSize(FInfo) > 1) {
				735	if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
				736	SmallVector<double, 0> DVal;
				737	for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
				738	ConstantFP *eltval = dyn_cast<ConstantFP>(
				739	CV->getElementAsConstant((unsigned)eltNo));
				740	assert(eltval && "Non-FP arguments in math function!");
				741	bool found = false;
				742	for (int i=0; i < sz; ++i) {
				743	if (eltval->isExactlyValue(ftbl[i].input)) {
				744	DVal.push_back(ftbl[i].result);
				745	found = true;
				746	break;
				747	}
				748	}
				749	if (!found) {
				750	// This vector constants not handled yet.
				751	return false;
				752	}
				753	}
				754	LLVMContext &context = CI->getParent()->getParent()->getContext();
				755	Constant *nval;
				756	if (getArgType(FInfo) == AMDGPULibFunc::F32) {
				757	SmallVector<float, 0> FVal;
				758	for (unsigned i = 0; i < DVal.size(); ++i) {
				759	FVal.push_back((float)DVal[i]);
				760	}
				761	ArrayRef<float> tmp(FVal);
				762	nval = ConstantDataVector::get(context, tmp);
				763	} else { // F64
				764	ArrayRef<double> tmp(DVal);
				765	nval = ConstantDataVector::get(context, tmp);
				766	}
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	767	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << nval << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	768	replaceCall(nval);
				769	return true;
				770	}
				771	} else {
				772	// Scalar version
				773	if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
				774	for (int i = 0; i < sz; ++i) {
				775	if (CF->isExactlyValue(ftbl[i].input)) {
				776	Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result);
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	777	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << nval << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	778	replaceCall(nval);
				779	return true;
				780	}
				781	}
				782	}
				783	}
				784
				785	return false;
				786	}
				787
				788	bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
				789	Module *M = CI->getModule();
				790	if (getArgType(FInfo) != AMDGPULibFunc::F32 \|\|
				791	FInfo.getPrefix() != AMDGPULibFunc::NOPFX \|\|
				792	!HasNative(FInfo.getId()))
				793	return false;
				794
				795	AMDGPULibFunc nf = FInfo;
				796	nf.setPrefix(AMDGPULibFunc::NATIVE);
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	797	if (Constant *FPExpr = getFunction(M, nf)) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	798	LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	799
				800	CI->setCalledFunction(FPExpr);
				801
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	802	LLVM_DEBUG(dbgs() << *CI << '\n');
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	803
				804	return true;
				805	}
				806	return false;
				807	}
				808
				809	// [native_]half_recip(c) ==> 1.0/c
				810	bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
				811	const FuncInfo &FInfo) {
				812	Value *opr0 = CI->getArgOperand(0);
				813	if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
				814	// Just create a normal div. Later, InstCombine will be able
				815	// to compute the divide into a constant (avoid check float infinity
				816	// or subnormal at this point).
				817	Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
				818	opr0,
				819	"recip2div");
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	820	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << nval << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	821	replaceCall(nval);
				822	return true;
				823	}
				824	return false;
				825	}
				826
				827	// [native_]half_divide(x, c) ==> x/c
				828	bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
				829	const FuncInfo &FInfo) {
				830	Value *opr0 = CI->getArgOperand(0);
				831	Value *opr1 = CI->getArgOperand(1);
				832	ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
				833	ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
				834
				835	if ((CF0 && CF1) \|\| // both are constants
				836	(CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
				837	// CF1 is constant && f32 divide
				838	{
				839	Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
				840	opr1, "__div2recip");
				841	Value *nval = B.CreateFMul(opr0, nval1, "__div2mul");
				842	replaceCall(nval);
				843	return true;
				844	}
				845	return false;
				846	}
				847
				848	namespace llvm {
				849	static double log2(double V) {
				850	#if _XOPEN_SOURCE >= 600 \|\| _ISOC99_SOURCE \|\| _POSIX_C_SOURCE >= 200112L
				851	return ::log2(V);
				852	#else
				853	return log(V) / 0.693147180559945309417;
				854	#endif
				855	}
				856	}
				857
				858	bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
				859	const FuncInfo &FInfo) {
				860	assert((FInfo.getId() == AMDGPULibFunc::EI_POW \|\|
				861	FInfo.getId() == AMDGPULibFunc::EI_POWR \|\|
				862	FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
				863	"fold_pow: encounter a wrong function call");
				864
				865	Value opr0, opr1;
				866	ConstantFP *CF;
				867	ConstantInt *CINT;
				868	ConstantAggregateZero *CZero;
				869	Type *eltType;
				870
				871	opr0 = CI->getArgOperand(0);
				872	opr1 = CI->getArgOperand(1);
				873	CZero = dyn_cast<ConstantAggregateZero>(opr1);
				874	if (getVecSize(FInfo) == 1) {
				875	eltType = opr0->getType();
				876	CF = dyn_cast<ConstantFP>(opr1);
				877	CINT = dyn_cast<ConstantInt>(opr1);
				878	} else {
				879	VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
				880	assert(VTy && "Oprand of vector function should be of vectortype");
				881	eltType = VTy->getElementType();
				882	ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);
				883
				884	// Now, only Handle vector const whose elements have the same value.
				885	CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
				886	CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
				887	}
				888
				889	// No unsafe math , no constant argument, do nothing
				890	if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
				891	return false;
				892
				893	// 0x1111111 means that we don't do anything for this call.
				894	int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
				895
				896	if ((CF && CF->isZero()) \|\| (CINT && ci_opr1 == 0) \|\| CZero) {
				897	// pow/powr/pown(x, 0) == 1
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	898	LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	899	Constant *cnval = ConstantFP::get(eltType, 1.0);
				900	if (getVecSize(FInfo) > 1) {
				901	cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
				902	}
				903	replaceCall(cnval);
				904	return true;
				905	}
				906	if ((CF && CF->isExactlyValue(1.0)) \|\| (CINT && ci_opr1 == 1)) {
				907	// pow/powr/pown(x, 1.0) = x
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	908	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << opr0 << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	909	replaceCall(opr0);
				910	return true;
				911	}
				912	if ((CF && CF->isExactlyValue(2.0)) \|\| (CINT && ci_opr1 == 2)) {
				913	// pow/powr/pown(x, 2.0) = x*x
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	914	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << opr0 << " * " << *opr0
				915	<< "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	916	Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
				917	replaceCall(nval);
				918	return true;
				919	}
				920	if ((CF && CF->isExactlyValue(-1.0)) \|\| (CINT && ci_opr1 == -1)) {
				921	// pow/powr/pown(x, -1.0) = 1.0/x
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	922	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> 1 / " << opr0 << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	923	Constant *cnval = ConstantFP::get(eltType, 1.0);
				924	if (getVecSize(FInfo) > 1) {
				925	cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
				926	}
				927	Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
				928	replaceCall(nval);
				929	return true;
				930	}
				931
				932	Module *M = CI->getModule();
				933	if (CF && (CF->isExactlyValue(0.5) \|\| CF->isExactlyValue(-0.5))) {
				934	// pow[r](x, [-]0.5) = sqrt(x)
				935	bool issqrt = CF->isExactlyValue(0.5);
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	936	if (Constant *FPExpr = getFunction(M,
				937	AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
				938	: AMDGPULibFunc::EI_RSQRT, FInfo))) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	939	LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
				940	<< FInfo.getName().c_str() << "(" << *opr0 << ")\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	941	Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
				942	: "__pow2rsqrt");
				943	replaceCall(nval);
				944	return true;
				945	}
				946	}
				947
				948	if (!isUnsafeMath(CI))
				949	return false;
				950
				951	// Unsafe Math optimization
				952
				953	// Remember that ci_opr1 is set if opr1 is integral
				954	if (CF) {
				955	double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
				956	? (double)CF->getValueAPF().convertToFloat()
				957	: CF->getValueAPF().convertToDouble();
				958	int ival = (int)dval;
				959	if ((double)ival == dval) {
				960	ci_opr1 = ival;
				961	} else
				962	ci_opr1 = 0x11111111;
				963	}
				964
				965	// pow/powr/pown(x, c) = [1/](xx..x); where
				966	// trunc(c) == c && the number of x == c && \|c\| <= 12
				967	unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
				968	if (abs_opr1 <= 12) {
				969	Constant *cnval;
				970	Value *nval;
				971	if (abs_opr1 == 0) {
				972	cnval = ConstantFP::get(eltType, 1.0);
				973	if (getVecSize(FInfo) > 1) {
				974	cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
				975	}
				976	nval = cnval;
				977	} else {
				978	Value *valx2 = nullptr;
				979	nval = nullptr;
				980	while (abs_opr1 > 0) {
				981	valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
				982	if (abs_opr1 & 1) {
				983	nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
				984	}
				985	abs_opr1 >>= 1;
				986	}
				987	}
				988
				989	if (ci_opr1 < 0) {
				990	cnval = ConstantFP::get(eltType, 1.0);
				991	if (getVecSize(FInfo) > 1) {
				992	cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
				993	}
				994	nval = B.CreateFDiv(cnval, nval, "__1powprod");
				995	}
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	996	LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
				997	<< ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
				998	<< ")\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	999	replaceCall(nval);
				1000	return true;
				1001	}
				1002
				1003	// powr ---> exp2(y * log2(x))
				1004	// pown/pow ---> powr(fabs(x), y) \| (x & ((int)y << 31))
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	1005	Constant *ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2,
				1006	FInfo));
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1007	if (!ExpExpr)
				1008	return false;
				1009
				1010	bool needlog = false;
				1011	bool needabs = false;
				1012	bool needcopysign = false;
				1013	Constant *cnval = nullptr;
				1014	if (getVecSize(FInfo) == 1) {
				1015	CF = dyn_cast<ConstantFP>(opr0);
				1016
				1017	if (CF) {
				1018	double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
				1019	? (double)CF->getValueAPF().convertToFloat()
				1020	: CF->getValueAPF().convertToDouble();
				1021
				1022	V = log2(std::abs(V));
				1023	cnval = ConstantFP::get(eltType, V);
				1024	needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
				1025	CF->isNegative();
				1026	} else {
				1027	needlog = true;
				1028	needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
				1029	(!CF \|\| CF->isNegative());
				1030	}
				1031	} else {
				1032	ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
				1033
				1034	if (!CDV) {
				1035	needlog = true;
				1036	needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
				1037	} else {
				1038	assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
				1039	"Wrong vector size detected");
				1040
				1041	SmallVector<double, 0> DVal;
				1042	for (int i=0; i < getVecSize(FInfo); ++i) {
				1043	double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
				1044	? (double)CDV->getElementAsFloat(i)
				1045	: CDV->getElementAsDouble(i);
				1046	if (V < 0.0) needcopysign = true;
				1047	V = log2(std::abs(V));
				1048	DVal.push_back(V);
				1049	}
				1050	if (getArgType(FInfo) == AMDGPULibFunc::F32) {
				1051	SmallVector<float, 0> FVal;
				1052	for (unsigned i=0; i < DVal.size(); ++i) {
				1053	FVal.push_back((float)DVal[i]);
				1054	}
				1055	ArrayRef<float> tmp(FVal);
				1056	cnval = ConstantDataVector::get(M->getContext(), tmp);
				1057	} else {
				1058	ArrayRef<double> tmp(DVal);
				1059	cnval = ConstantDataVector::get(M->getContext(), tmp);
				1060	}
				1061	}
				1062	}
				1063
				1064	if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
				1065	// We cannot handle corner cases for a general pow() function, give up
				1066	// unless y is a constant integral value. Then proceed as if it were pown.
				1067	if (getVecSize(FInfo) == 1) {
				1068	if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
				1069	double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
				1070	? (double)CF->getValueAPF().convertToFloat()
				1071	: CF->getValueAPF().convertToDouble();
				1072	if (y != (double)(int64_t)y)
				1073	return false;
				1074	} else
				1075	return false;
				1076	} else {
				1077	if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
				1078	for (int i=0; i < getVecSize(FInfo); ++i) {
				1079	double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
				1080	? (double)CDV->getElementAsFloat(i)
				1081	: CDV->getElementAsDouble(i);
				1082	if (y != (double)(int64_t)y)
				1083	return false;
				1084	}
				1085	} else
				1086	return false;
				1087	}
				1088	}
				1089
				1090	Value *nval;
				1091	if (needabs) {
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	1092	Constant *AbsExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS,
				1093	FInfo));
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1094	if (!AbsExpr)
				1095	return false;
				1096	nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
				1097	} else {
				1098	nval = cnval ? cnval : opr0;
				1099	}
				1100	if (needlog) {
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	1101	Constant *LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2,
				1102	FInfo));
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1103	if (!LogExpr)
				1104	return false;
				1105	nval = CreateCallEx(B,LogExpr, nval, "__log2");
				1106	}
				1107
				1108	if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
				1109	// convert int(32) to fp(f32 or f64)
				1110	opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
				1111	}
				1112	nval = B.CreateFMul(opr1, nval, "__ylogx");
				1113	nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
				1114
				1115	if (needcopysign) {
				1116	Value *opr_n;
				1117	Type* rTy = opr0->getType();
				1118	Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
				1119	Type *nTy = nTyS;
				1120	if (const VectorType *vTy = dyn_cast<VectorType>(rTy))
				1121	nTy = VectorType::get(nTyS, vTy->getNumElements());
				1122	unsigned size = nTy->getScalarSizeInBits();
				1123	opr_n = CI->getArgOperand(1);
				1124	if (opr_n->getType()->isIntegerTy())
				1125	opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
				1126	else
				1127	opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
				1128
				1129	Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
				1130	sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
				1131	nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
				1132	nval = B.CreateBitCast(nval, opr0->getType());
				1133	}
				1134
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1135	LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
				1136	<< "exp2(" << opr1 << " log2(" << *opr0 << "))\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1137	replaceCall(nval);
				1138
				1139	return true;
				1140	}
				1141
				1142	bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
				1143	const FuncInfo &FInfo) {
				1144	Value *opr0 = CI->getArgOperand(0);
				1145	Value *opr1 = CI->getArgOperand(1);
				1146
				1147	ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
				1148	if (!CINT) {
				1149	return false;
				1150	}
				1151	int ci_opr1 = (int)CINT->getSExtValue();
				1152	if (ci_opr1 == 1) { // rootn(x, 1) = x
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1153	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << opr0 << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1154	replaceCall(opr0);
				1155	return true;
				1156	}
				1157	if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
				1158	std::vector<const Type*> ParamsTys;
				1159	ParamsTys.push_back(opr0->getType());
				1160	Module *M = CI->getModule();
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	1161	if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT,
				1162	FInfo))) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1163	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> sqrt(" << opr0 << ")\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1164	Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
				1165	replaceCall(nval);
				1166	return true;
				1167	}
				1168	} else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
				1169	Module *M = CI->getModule();
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	1170	if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT,
				1171	FInfo))) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1172	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> cbrt(" << opr0 << ")\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1173	Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
				1174	replaceCall(nval);
				1175	return true;
				1176	}
				1177	} else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1178	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> 1.0 / " << opr0 << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1179	Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
				1180	opr0,
				1181	"__rootn2div");
				1182	replaceCall(nval);
				1183	return true;
				1184	} else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x)
				1185	std::vector<const Type*> ParamsTys;
				1186	ParamsTys.push_back(opr0->getType());
				1187	Module *M = CI->getModule();
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	1188	if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT,
				1189	FInfo))) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1190	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> rsqrt(" << opr0
				1191	<< ")\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1192	Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
				1193	replaceCall(nval);
				1194	return true;
				1195	}
				1196	}
				1197	return false;
				1198	}
				1199
				1200	bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
				1201	const FuncInfo &FInfo) {
				1202	Value *opr0 = CI->getArgOperand(0);
				1203	Value *opr1 = CI->getArgOperand(1);
				1204	Value *opr2 = CI->getArgOperand(2);
				1205
				1206	ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
				1207	ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
				1208	if ((CF0 && CF0->isZero()) \|\| (CF1 && CF1->isZero())) {
				1209	// fma/mad(a, b, c) = c if a=0 \|\| b=0
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1210	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << opr2 << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1211	replaceCall(opr2);
				1212	return true;
				1213	}
				1214	if (CF0 && CF0->isExactlyValue(1.0f)) {
				1215	// fma/mad(a, b, c) = b+c if a=1
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1216	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << opr1 << " + " << *opr2
				1217	<< "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1218	Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
				1219	replaceCall(nval);
				1220	return true;
				1221	}
				1222	if (CF1 && CF1->isExactlyValue(1.0f)) {
				1223	// fma/mad(a, b, c) = a+c if b=1
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1224	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << opr0 << " + " << *opr2
				1225	<< "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1226	Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
				1227	replaceCall(nval);
				1228	return true;
				1229	}
				1230	if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
				1231	if (CF->isZero()) {
				1232	// fma/mad(a, b, c) = a*b if c=0
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1233	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << opr0 << " * "
				1234	<< *opr1 << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1235	Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
				1236	replaceCall(nval);
				1237	return true;
				1238	}
				1239	}
				1240
				1241	return false;
				1242	}
				1243
				1244	// Get a scalar native builtin signle argument FP function
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	1245	Constant* AMDGPULibCalls::getNativeFunction(Module* M, const FuncInfo& FInfo) {
Stanislav Mekhanoshin	312c557	2017-08-28 18:00:08 +0000	[diff] [blame]	1246	if (getArgType(FInfo) == AMDGPULibFunc::F64 \|\| !HasNative(FInfo.getId()))
				1247	return nullptr;
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1248	FuncInfo nf = FInfo;
				1249	nf.setPrefix(AMDGPULibFunc::NATIVE);
				1250	return getFunction(M, nf);
				1251	}
				1252
				1253	// fold sqrt -> native_sqrt (x)
				1254	bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
				1255	const FuncInfo &FInfo) {
Stanislav Mekhanoshin	312c557	2017-08-28 18:00:08 +0000	[diff] [blame]	1256	if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1257	(FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	1258	if (Constant *FPExpr = getNativeFunction(
				1259	CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1260	Value *opr0 = CI->getArgOperand(0);
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1261	LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
				1262	<< "sqrt(" << *opr0 << ")\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1263	Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
				1264	replaceCall(nval);
				1265	return true;
				1266	}
				1267	}
				1268	return false;
				1269	}
				1270
				1271	// fold sin, cos -> sincos.
				1272	bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
				1273	AliasAnalysis *AA) {
				1274	AMDGPULibFunc fInfo;
				1275	if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
				1276	return false;
				1277
				1278	assert(fInfo.getId() == AMDGPULibFunc::EI_SIN \|\|
				1279	fInfo.getId() == AMDGPULibFunc::EI_COS);
				1280	bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
				1281
				1282	Value *CArgVal = CI->getArgOperand(0);
				1283	BasicBlock * const CBB = CI->getParent();
				1284
				1285	int const MaxScan = 30;
				1286
				1287	{ // fold in load value.
				1288	LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
				1289	if (LI && LI->getParent() == CBB) {
				1290	BasicBlock::iterator BBI = LI->getIterator();
				1291	Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
				1292	if (AvailableVal) {
				1293	CArgVal->replaceAllUsesWith(AvailableVal);
				1294	if (CArgVal->getNumUses() == 0)
				1295	LI->eraseFromParent();
				1296	CArgVal = CI->getArgOperand(0);
				1297	}
				1298	}
				1299	}
				1300
				1301	Module *M = CI->getModule();
				1302	fInfo.setId(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN);
				1303	std::string const PairName = fInfo.mangle();
				1304
				1305	CallInst *UI = nullptr;
				1306	for (User* U : CArgVal->users()) {
				1307	CallInst *XI = dyn_cast_or_null<CallInst>(U);
				1308	if (!XI \|\| XI == CI \|\| XI->getParent() != CBB)
				1309	continue;
				1310
				1311	Function *UCallee = XI->getCalledFunction();
				1312	if (!UCallee \|\| !UCallee->getName().equals(PairName))
				1313	continue;
				1314
				1315	BasicBlock::iterator BBI = CI->getIterator();
				1316	if (BBI == CI->getParent()->begin())
				1317	break;
				1318	--BBI;
				1319	for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
				1320	if (cast<Instruction>(BBI) == XI) {
				1321	UI = XI;
				1322	break;
				1323	}
				1324	}
				1325	if (UI) break;
				1326	}
				1327
				1328	if (!UI) return false;
				1329
				1330	// Merge the sin and cos.
				1331
				1332	// for OpenCL 2.0 we have only generic implementation of sincos
				1333	// function.
				1334	AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	1335	nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	1336	Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf));
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1337	if (!Fsincos) return false;
				1338
				1339	BasicBlock::iterator ItOld = B.GetInsertPoint();
				1340	AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
				1341	B.SetInsertPoint(UI);
				1342
				1343	Value *P = Alloc;
James Y Knight	fadf250	2019-01-31 21:51:58 +0000	[diff] [blame]	1344	Type *PTy = Fsincos->getFunctionType()->getParamType(1);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1345	// The allocaInst allocates the memory in private address space. This need
				1346	// to be bitcasted to point to the address space of cos pointer type.
				1347	// In OpenCL 2.0 this is generic, while in 1.2 that is private.
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	1348	if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1349	P = B.CreateAddrSpaceCast(Alloc, PTy);
				1350	CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
				1351
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1352	LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << CI << ", " << UI << ") with "
				1353	<< *Call << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1354
				1355	if (!isSin) { // CI->cos, UI->sin
				1356	B.SetInsertPoint(&*ItOld);
				1357	UI->replaceAllUsesWith(&*Call);
				1358	Instruction *Reload = B.CreateLoad(Alloc);
				1359	CI->replaceAllUsesWith(Reload);
				1360	UI->eraseFromParent();
				1361	CI->eraseFromParent();
				1362	} else { // CI->sin, UI->cos
				1363	Instruction *Reload = B.CreateLoad(Alloc);
				1364	UI->replaceAllUsesWith(Reload);
				1365	CI->replaceAllUsesWith(Call);
				1366	UI->eraseFromParent();
				1367	CI->eraseFromParent();
				1368	}
				1369	return true;
				1370	}
				1371
				1372	// Get insertion point at entry.
				1373	BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
				1374	Function * Func = UI->getParent()->getParent();
				1375	BasicBlock * BB = &Func->getEntryBlock();
				1376	assert(BB && "Entry block not found!");
				1377	BasicBlock::iterator ItNew = BB->begin();
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1378	return ItNew;
				1379	}
				1380
				1381	// Insert a AllocsInst at the beginning of function entry block.
				1382	AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
				1383	const char *prefix) {
				1384	BasicBlock::iterator ItNew = getEntryIns(UI);
				1385	Function *UCallee = UI->getCalledFunction();
				1386	Type *RetType = UCallee->getReturnType();
				1387	B.SetInsertPoint(&*ItNew);
				1388	AllocaInst *Alloc = B.CreateAlloca(RetType, 0,
				1389	std::string(prefix) + UI->getName());
				1390	Alloc->setAlignment(UCallee->getParent()->getDataLayout()
				1391	.getTypeAllocSize(RetType));
				1392	return Alloc;
				1393	}
				1394
				1395	bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo,
				1396	double& Res0, double& Res1,
				1397	Constant copr0, Constant copr1,
				1398	Constant *copr2) {
				1399	// By default, opr0/opr1/opr3 holds values of float/double type.
				1400	// If they are not float/double, each function has to its
				1401	// operand separately.
				1402	double opr0=0.0, opr1=0.0, opr2=0.0;
				1403	ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
				1404	ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
				1405	ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
				1406	if (fpopr0) {
				1407	opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
				1408	? fpopr0->getValueAPF().convertToDouble()
				1409	: (double)fpopr0->getValueAPF().convertToFloat();
				1410	}
				1411
				1412	if (fpopr1) {
				1413	opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
				1414	? fpopr1->getValueAPF().convertToDouble()
				1415	: (double)fpopr1->getValueAPF().convertToFloat();
				1416	}
				1417
				1418	if (fpopr2) {
				1419	opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
				1420	? fpopr2->getValueAPF().convertToDouble()
				1421	: (double)fpopr2->getValueAPF().convertToFloat();
				1422	}
				1423
				1424	switch (FInfo.getId()) {
				1425	default : return false;
				1426
				1427	case AMDGPULibFunc::EI_ACOS:
				1428	Res0 = acos(opr0);
				1429	return true;
				1430
				1431	case AMDGPULibFunc::EI_ACOSH:
				1432	// acosh(x) == log(x + sqrt(x*x - 1))
				1433	Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
				1434	return true;
				1435
				1436	case AMDGPULibFunc::EI_ACOSPI:
				1437	Res0 = acos(opr0) / MATH_PI;
				1438	return true;
				1439
				1440	case AMDGPULibFunc::EI_ASIN:
				1441	Res0 = asin(opr0);
				1442	return true;
				1443
				1444	case AMDGPULibFunc::EI_ASINH:
				1445	// asinh(x) == log(x + sqrt(x*x + 1))
				1446	Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
				1447	return true;
				1448
				1449	case AMDGPULibFunc::EI_ASINPI:
				1450	Res0 = asin(opr0) / MATH_PI;
				1451	return true;
				1452
				1453	case AMDGPULibFunc::EI_ATAN:
				1454	Res0 = atan(opr0);
				1455	return true;
				1456
				1457	case AMDGPULibFunc::EI_ATANH:
				1458	// atanh(x) == (log(x+1) - log(x-1))/2;
				1459	Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
				1460	return true;
				1461
				1462	case AMDGPULibFunc::EI_ATANPI:
				1463	Res0 = atan(opr0) / MATH_PI;
				1464	return true;
				1465
				1466	case AMDGPULibFunc::EI_CBRT:
				1467	Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
				1468	return true;
				1469
				1470	case AMDGPULibFunc::EI_COS:
				1471	Res0 = cos(opr0);
				1472	return true;
				1473
				1474	case AMDGPULibFunc::EI_COSH:
				1475	Res0 = cosh(opr0);
				1476	return true;
				1477
				1478	case AMDGPULibFunc::EI_COSPI:
				1479	Res0 = cos(MATH_PI * opr0);
				1480	return true;
				1481
				1482	case AMDGPULibFunc::EI_EXP:
				1483	Res0 = exp(opr0);
				1484	return true;
				1485
				1486	case AMDGPULibFunc::EI_EXP2:
				1487	Res0 = pow(2.0, opr0);
				1488	return true;
				1489
				1490	case AMDGPULibFunc::EI_EXP10:
				1491	Res0 = pow(10.0, opr0);
				1492	return true;
				1493
				1494	case AMDGPULibFunc::EI_EXPM1:
				1495	Res0 = exp(opr0) - 1.0;
				1496	return true;
				1497
				1498	case AMDGPULibFunc::EI_LOG:
				1499	Res0 = log(opr0);
				1500	return true;
				1501
				1502	case AMDGPULibFunc::EI_LOG2:
				1503	Res0 = log(opr0) / log(2.0);
				1504	return true;
				1505
				1506	case AMDGPULibFunc::EI_LOG10:
				1507	Res0 = log(opr0) / log(10.0);
				1508	return true;
				1509
				1510	case AMDGPULibFunc::EI_RSQRT:
				1511	Res0 = 1.0 / sqrt(opr0);
				1512	return true;
				1513
				1514	case AMDGPULibFunc::EI_SIN:
				1515	Res0 = sin(opr0);
				1516	return true;
				1517
				1518	case AMDGPULibFunc::EI_SINH:
				1519	Res0 = sinh(opr0);
				1520	return true;
				1521
				1522	case AMDGPULibFunc::EI_SINPI:
				1523	Res0 = sin(MATH_PI * opr0);
				1524	return true;
				1525
				1526	case AMDGPULibFunc::EI_SQRT:
				1527	Res0 = sqrt(opr0);
				1528	return true;
				1529
				1530	case AMDGPULibFunc::EI_TAN:
				1531	Res0 = tan(opr0);
				1532	return true;
				1533
				1534	case AMDGPULibFunc::EI_TANH:
				1535	Res0 = tanh(opr0);
				1536	return true;
				1537
				1538	case AMDGPULibFunc::EI_TANPI:
				1539	Res0 = tan(MATH_PI * opr0);
				1540	return true;
				1541
				1542	case AMDGPULibFunc::EI_RECIP:
				1543	Res0 = 1.0 / opr0;
				1544	return true;
				1545
				1546	// two-arg functions
				1547	case AMDGPULibFunc::EI_DIVIDE:
				1548	Res0 = opr0 / opr1;
				1549	return true;
				1550
				1551	case AMDGPULibFunc::EI_POW:
				1552	case AMDGPULibFunc::EI_POWR:
				1553	Res0 = pow(opr0, opr1);
				1554	return true;
				1555
				1556	case AMDGPULibFunc::EI_POWN: {
				1557	if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
				1558	double val = (double)iopr1->getSExtValue();
				1559	Res0 = pow(opr0, val);
				1560	return true;
				1561	}
				1562	return false;
				1563	}
				1564
				1565	case AMDGPULibFunc::EI_ROOTN: {
				1566	if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
				1567	double val = (double)iopr1->getSExtValue();
				1568	Res0 = pow(opr0, 1.0 / val);
				1569	return true;
				1570	}
				1571	return false;
				1572	}
				1573
				1574	// with ptr arg
				1575	case AMDGPULibFunc::EI_SINCOS:
				1576	Res0 = sin(opr0);
				1577	Res1 = cos(opr0);
				1578	return true;
				1579
				1580	// three-arg functions
				1581	case AMDGPULibFunc::EI_FMA:
				1582	case AMDGPULibFunc::EI_MAD:
				1583	Res0 = opr0 * opr1 + opr2;
				1584	return true;
				1585	}
				1586
				1587	return false;
				1588	}
				1589
				1590	bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
				1591	int numArgs = (int)aCI->getNumArgOperands();
				1592	if (numArgs > 3)
				1593	return false;
				1594
				1595	Constant *copr0 = nullptr;
				1596	Constant *copr1 = nullptr;
				1597	Constant *copr2 = nullptr;
				1598	if (numArgs > 0) {
				1599	if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
				1600	return false;
				1601	}
				1602
				1603	if (numArgs > 1) {
				1604	if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
				1605	if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
				1606	return false;
				1607	}
				1608	}
				1609
				1610	if (numArgs > 2) {
				1611	if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
				1612	return false;
				1613	}
				1614
				1615	// At this point, all arguments to aCI are constants.
				1616
				1617	// max vector size is 16, and sincos will generate two results.
				1618	double DVal0[16], DVal1[16];
				1619	bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
				1620	if (getVecSize(FInfo) == 1) {
				1621	if (!evaluateScalarMathFunc(FInfo, DVal0[0],
				1622	DVal1[0], copr0, copr1, copr2)) {
				1623	return false;
				1624	}
				1625	} else {
				1626	ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
				1627	ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
				1628	ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
				1629	for (int i=0; i < getVecSize(FInfo); ++i) {
				1630	Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
				1631	Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
				1632	Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
				1633	if (!evaluateScalarMathFunc(FInfo, DVal0[i],
				1634	DVal1[i], celt0, celt1, celt2)) {
				1635	return false;
				1636	}
				1637	}
				1638	}
				1639
				1640	LLVMContext &context = CI->getParent()->getParent()->getContext();
				1641	Constant nval0, nval1;
				1642	if (getVecSize(FInfo) == 1) {
				1643	nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
				1644	if (hasTwoResults)
				1645	nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
				1646	} else {
				1647	if (getArgType(FInfo) == AMDGPULibFunc::F32) {
				1648	SmallVector <float, 0> FVal0, FVal1;
				1649	for (int i=0; i < getVecSize(FInfo); ++i)
				1650	FVal0.push_back((float)DVal0[i]);
				1651	ArrayRef<float> tmp0(FVal0);
				1652	nval0 = ConstantDataVector::get(context, tmp0);
				1653	if (hasTwoResults) {
				1654	for (int i=0; i < getVecSize(FInfo); ++i)
				1655	FVal1.push_back((float)DVal1[i]);
				1656	ArrayRef<float> tmp1(FVal1);
				1657	nval1 = ConstantDataVector::get(context, tmp1);
				1658	}
				1659	} else {
				1660	ArrayRef<double> tmp0(DVal0);
				1661	nval0 = ConstantDataVector::get(context, tmp0);
				1662	if (hasTwoResults) {
				1663	ArrayRef<double> tmp1(DVal1);
				1664	nval1 = ConstantDataVector::get(context, tmp1);
				1665	}
				1666	}
				1667	}
				1668
				1669	if (hasTwoResults) {
				1670	// sincos
				1671	assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
				1672	"math function with ptr arg not supported yet");
				1673	new StoreInst(nval1, aCI->getArgOperand(1), aCI);
				1674	}
				1675
				1676	replaceCall(nval0);
				1677	return true;
				1678	}
				1679
				1680	// Public interface to the Simplify LibCalls pass.
Stanislav Mekhanoshin	1d8cf2b	2017-09-29 23:40:19 +0000	[diff] [blame]	1681	FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt) {
				1682	return new AMDGPUSimplifyLibCalls(Opt);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1683	}
				1684
				1685	FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
				1686	return new AMDGPUUseNativeCalls();
				1687	}
				1688
Stanislav Mekhanoshin	1d8cf2b	2017-09-29 23:40:19 +0000	[diff] [blame]	1689	static bool setFastFlags(Function &F, const TargetOptions &Options) {
				1690	AttrBuilder B;
				1691
				1692	if (Options.UnsafeFPMath \|\| Options.NoInfsFPMath)
				1693	B.addAttribute("no-infs-fp-math", "true");
				1694	if (Options.UnsafeFPMath \|\| Options.NoNaNsFPMath)
				1695	B.addAttribute("no-nans-fp-math", "true");
				1696	if (Options.UnsafeFPMath) {
				1697	B.addAttribute("less-precise-fpmad", "true");
				1698	B.addAttribute("unsafe-fp-math", "true");
				1699	}
				1700
				1701	if (!B.hasAttributes())
				1702	return false;
				1703
				1704	F.addAttributes(AttributeList::FunctionIndex, B);
				1705
				1706	return true;
				1707	}
				1708
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1709	bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
				1710	if (skipFunction(F))
				1711	return false;
				1712
				1713	bool Changed = false;
				1714	auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
				1715
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1716	LLVM_DEBUG(dbgs() << "AMDIC: process function ";
				1717	F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1718
Stanislav Mekhanoshin	1d8cf2b	2017-09-29 23:40:19 +0000	[diff] [blame]	1719	if (!EnablePreLink)
				1720	Changed \|= setFastFlags(F, Options);
				1721
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1722	for (auto &BB : F) {
				1723	for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
				1724	// Ignore non-calls.
				1725	CallInst *CI = dyn_cast<CallInst>(I);
				1726	++I;
				1727	if (!CI) continue;
				1728
				1729	// Ignore indirect calls.
				1730	Function *Callee = CI->getCalledFunction();
				1731	if (Callee == 0) continue;
				1732
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1733	LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
				1734	dbgs().flush());
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1735	if(Simplifier.fold(CI, AA))
				1736	Changed = true;
				1737	}
				1738	}
				1739	return Changed;
				1740	}
				1741
				1742	bool AMDGPUUseNativeCalls::runOnFunction(Function &F) {
				1743	if (skipFunction(F) \|\| UseNative.empty())
				1744	return false;
				1745
				1746	bool Changed = false;
				1747	for (auto &BB : F) {
				1748	for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
				1749	// Ignore non-calls.
				1750	CallInst *CI = dyn_cast<CallInst>(I);
				1751	++I;
				1752	if (!CI) continue;
				1753
				1754	// Ignore indirect calls.
				1755	Function *Callee = CI->getCalledFunction();
				1756	if (Callee == 0) continue;
				1757
				1758	if(Simplifier.useNative(CI))
				1759	Changed = true;
				1760	}
				1761	}
				1762	return Changed;
				1763	}