Blame - llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp - toolchain/llvm-project

blob: 0c56927dea02b221594e2cd80eaf8cfb76285180 [file] [log] [blame]

Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1	//===- AMDGPULibCalls.cpp -------------------------------------------------===//
				2	//
Chandler Carruth	2946cd7	2019-01-19 08:50:56 +0000	[diff] [blame]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	//
				9	/// \file
Adrian Prantl	5f8f34e4	2018-05-01 15:54:18 +0000	[diff] [blame]	10	/// This file does AMD library function optimizations.
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	11	//
				12	//===----------------------------------------------------------------------===//
				13
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	14	#include "AMDGPU.h"
				15	#include "AMDGPULibFunc.h"
Stanislav Mekhanoshin	a9191c8	2019-06-17 17:57:50 +0000	[diff] [blame]	16	#include "AMDGPUSubtarget.h"
Reid Kleckner	05da2fe	2019-11-13 13:15:01 -0800	[diff] [blame]	17	#include "llvm/ADT/StringRef.h"
				18	#include "llvm/ADT/StringSet.h"
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	19	#include "llvm/Analysis/AliasAnalysis.h"
				20	#include "llvm/Analysis/Loads.h"
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	21	#include "llvm/IR/Constants.h"
				22	#include "llvm/IR/DerivedTypes.h"
Reid Kleckner	05da2fe	2019-11-13 13:15:01 -0800	[diff] [blame]	23	#include "llvm/IR/Function.h"
				24	#include "llvm/IR/IRBuilder.h"
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	25	#include "llvm/IR/Instructions.h"
Stanislav Mekhanoshin	a9191c8	2019-06-17 17:57:50 +0000	[diff] [blame]	26	#include "llvm/IR/Intrinsics.h"
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	27	#include "llvm/IR/LLVMContext.h"
				28	#include "llvm/IR/Module.h"
				29	#include "llvm/IR/ValueSymbolTable.h"
Reid Kleckner	05da2fe	2019-11-13 13:15:01 -0800	[diff] [blame]	30	#include "llvm/InitializePasses.h"
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	31	#include "llvm/Support/Debug.h"
Evandro Menezes	c57a9dc	2019-10-09 20:00:43 +0000	[diff] [blame]	32	#include "llvm/Support/MathExtras.h"
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	33	#include "llvm/Support/raw_ostream.h"
Stanislav Mekhanoshin	a9191c8	2019-06-17 17:57:50 +0000	[diff] [blame]	34	#include "llvm/Target/TargetMachine.h"
Stanislav Mekhanoshin	1d8cf2b	2017-09-29 23:40:19 +0000	[diff] [blame]	35	#include "llvm/Target/TargetOptions.h"
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	36	#include <cmath>
Reid Kleckner	05da2fe	2019-11-13 13:15:01 -0800	[diff] [blame]	37	#include <vector>
				38
				39	#define DEBUG_TYPE "amdgpu-simplifylib"
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	40
				41	using namespace llvm;
				42
				43	static cl::opt<bool> EnablePreLink("amdgpu-prelink",
				44	cl::desc("Enable pre-link mode optimizations"),
				45	cl::init(false),
				46	cl::Hidden);
				47
				48	static cl::list<std::string> UseNative("amdgpu-use-native",
				49	cl::desc("Comma separated list of functions to replace with native, or all"),
				50	cl::CommaSeparated, cl::ValueOptional,
				51	cl::Hidden);
				52
Evandro Menezes	c57a9dc	2019-10-09 20:00:43 +0000	[diff] [blame]	53	#define MATH_PI numbers::pi
				54	#define MATH_E numbers::e
				55	#define MATH_SQRT2 numbers::sqrt2
				56	#define MATH_SQRT1_2 numbers::inv_sqrt2
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	57
				58	namespace llvm {
				59
				60	class AMDGPULibCalls {
				61	private:
				62
				63	typedef llvm::AMDGPULibFunc FuncInfo;
				64
Stanislav Mekhanoshin	a9191c8	2019-06-17 17:57:50 +0000	[diff] [blame]	65	const TargetMachine *TM;
				66
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	67	// -fuse-native.
				68	bool AllNative = false;
				69
				70	bool useNativeFunc(const StringRef F) const;
				71
				72	// Return a pointer (pointer expr) to the function if function defintion with
				73	// "FuncName" exists. It may create a new function prototype in pre-link mode.
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	74	FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	75
				76	// Replace a normal function with its native version.
				77	bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);
				78
				79	bool parseFunctionName(const StringRef& FMangledName,
				80	FuncInfo FInfo=nullptr /out*/);
				81
				82	bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
				83
				84	/* Specialized optimizations */
				85
				86	// recip (half or native)
				87	bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				88
				89	// divide (half or native)
				90	bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				91
				92	// pow/powr/pown
				93	bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				94
				95	// rootn
				96	bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				97
				98	// fma/mad
				99	bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				100
				101	// -fuse-native for sincos
				102	bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
				103
				104	// evaluate calls if calls' arguments are constants.
				105	bool evaluateScalarMathFunc(FuncInfo &FInfo, double& Res0,
				106	double& Res1, Constant copr0, Constant copr1, Constant *copr2);
				107	bool evaluateCall(CallInst *aCI, FuncInfo &FInfo);
				108
				109	// exp
				110	bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				111
				112	// exp2
				113	bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				114
				115	// exp10
				116	bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				117
				118	// log
				119	bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				120
				121	// log2
				122	bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				123
				124	// log10
				125	bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				126
				127	// sqrt
				128	bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
				129
				130	// sin/cos
				131	bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
				132
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	133	// __read_pipe/__write_pipe
				134	bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);
				135
Stanislav Mekhanoshin	a9191c8	2019-06-17 17:57:50 +0000	[diff] [blame]	136	// llvm.amdgcn.wavefrontsize
				137	bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
				138
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	139	// Get insertion point at entry.
				140	BasicBlock::iterator getEntryIns(CallInst * UI);
				141	// Insert an Alloc instruction.
				142	AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
				143	// Get a scalar native builtin signle argument FP function
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	144	FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	145
				146	protected:
				147	CallInst *CI;
				148
				149	bool isUnsafeMath(const CallInst *CI) const;
				150
				151	void replaceCall(Value *With) {
				152	CI->replaceAllUsesWith(With);
				153	CI->eraseFromParent();
				154	}
				155
				156	public:
Stanislav Mekhanoshin	a9191c8	2019-06-17 17:57:50 +0000	[diff] [blame]	157	AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
				158
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	159	bool fold(CallInst CI, AliasAnalysis AA = nullptr);
				160
				161	void initNativeFuncs();
				162
				163	// Replace a normal math function call with that native version
				164	bool useNative(CallInst *CI);
				165	};
				166
				167	} // end llvm namespace
				168
				169	namespace {
				170
				171	class AMDGPUSimplifyLibCalls : public FunctionPass {
				172
Stanislav Mekhanoshin	1d8cf2b	2017-09-29 23:40:19 +0000	[diff] [blame]	173	const TargetOptions Options;
				174
Stanislav Mekhanoshin	a9191c8	2019-06-17 17:57:50 +0000	[diff] [blame]	175	AMDGPULibCalls Simplifier;
				176
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	177	public:
				178	static char ID; // Pass identification
				179
Stanislav Mekhanoshin	a9191c8	2019-06-17 17:57:50 +0000	[diff] [blame]	180	AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions(),
				181	const TargetMachine *TM = nullptr)
				182	: FunctionPass(ID), Options(Opt), Simplifier(TM) {
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	183	initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
				184	}
				185
				186	void getAnalysisUsage(AnalysisUsage &AU) const override {
				187	AU.addRequired<AAResultsWrapperPass>();
				188	}
				189
				190	bool runOnFunction(Function &M) override;
				191	};
				192
				193	class AMDGPUUseNativeCalls : public FunctionPass {
				194
				195	AMDGPULibCalls Simplifier;
				196
				197	public:
				198	static char ID; // Pass identification
				199
				200	AMDGPUUseNativeCalls() : FunctionPass(ID) {
				201	initializeAMDGPUUseNativeCallsPass(*PassRegistry::getPassRegistry());
				202	Simplifier.initNativeFuncs();
				203	}
				204
				205	bool runOnFunction(Function &F) override;
				206	};
				207
				208	} // end anonymous namespace.
				209
				210	char AMDGPUSimplifyLibCalls::ID = 0;
				211	char AMDGPUUseNativeCalls::ID = 0;
				212
				213	INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
				214	"Simplify well-known AMD library calls", false, false)
				215	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
				216	INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
				217	"Simplify well-known AMD library calls", false, false)
				218
				219	INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
				220	"Replace builtin math calls with that native versions.",
				221	false, false)
				222
				223	template <typename IRB>
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	224	static CallInst CreateCallEx(IRB &B, FunctionCallee Callee, Value Arg,
Benjamin Kramer	51ebcaa	2017-11-24 14:55:41 +0000	[diff] [blame]	225	const Twine &Name = "") {
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	226	CallInst *R = B.CreateCall(Callee, Arg, Name);
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	227	if (Function *F = dyn_cast<Function>(Callee.getCallee()))
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	228	R->setCallingConv(F->getCallingConv());
				229	return R;
				230	}
				231
				232	template <typename IRB>
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	233	static CallInst CreateCallEx2(IRB &B, FunctionCallee Callee, Value Arg1,
				234	Value *Arg2, const Twine &Name = "") {
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	235	CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	236	if (Function *F = dyn_cast<Function>(Callee.getCallee()))
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	237	R->setCallingConv(F->getCallingConv());
				238	return R;
				239	}
				240
				241	// Data structures for table-driven optimizations.
				242	// FuncTbl works for both f32 and f64 functions with 1 input argument
				243
				244	struct TableEntry {
				245	double result;
				246	double input;
				247	};
				248
				249	/* a list of {result, input} */
				250	static const TableEntry tbl_acos[] = {
Evandro Menezes	c57a9dc	2019-10-09 20:00:43 +0000	[diff] [blame]	251	{MATH_PI / 2.0, 0.0},
				252	{MATH_PI / 2.0, -0.0},
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	253	{0.0, 1.0},
				254	{MATH_PI, -1.0}
				255	};
				256	static const TableEntry tbl_acosh[] = {
				257	{0.0, 1.0}
				258	};
				259	static const TableEntry tbl_acospi[] = {
				260	{0.5, 0.0},
				261	{0.5, -0.0},
				262	{0.0, 1.0},
				263	{1.0, -1.0}
				264	};
				265	static const TableEntry tbl_asin[] = {
				266	{0.0, 0.0},
				267	{-0.0, -0.0},
Evandro Menezes	c57a9dc	2019-10-09 20:00:43 +0000	[diff] [blame]	268	{MATH_PI / 2.0, 1.0},
				269	{-MATH_PI / 2.0, -1.0}
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	270	};
				271	static const TableEntry tbl_asinh[] = {
				272	{0.0, 0.0},
				273	{-0.0, -0.0}
				274	};
				275	static const TableEntry tbl_asinpi[] = {
				276	{0.0, 0.0},
				277	{-0.0, -0.0},
				278	{0.5, 1.0},
				279	{-0.5, -1.0}
				280	};
				281	static const TableEntry tbl_atan[] = {
				282	{0.0, 0.0},
				283	{-0.0, -0.0},
Evandro Menezes	c57a9dc	2019-10-09 20:00:43 +0000	[diff] [blame]	284	{MATH_PI / 4.0, 1.0},
				285	{-MATH_PI / 4.0, -1.0}
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	286	};
				287	static const TableEntry tbl_atanh[] = {
				288	{0.0, 0.0},
				289	{-0.0, -0.0}
				290	};
				291	static const TableEntry tbl_atanpi[] = {
				292	{0.0, 0.0},
				293	{-0.0, -0.0},
				294	{0.25, 1.0},
				295	{-0.25, -1.0}
				296	};
				297	static const TableEntry tbl_cbrt[] = {
				298	{0.0, 0.0},
				299	{-0.0, -0.0},
				300	{1.0, 1.0},
				301	{-1.0, -1.0},
				302	};
				303	static const TableEntry tbl_cos[] = {
				304	{1.0, 0.0},
				305	{1.0, -0.0}
				306	};
				307	static const TableEntry tbl_cosh[] = {
				308	{1.0, 0.0},
				309	{1.0, -0.0}
				310	};
				311	static const TableEntry tbl_cospi[] = {
				312	{1.0, 0.0},
				313	{1.0, -0.0}
				314	};
				315	static const TableEntry tbl_erfc[] = {
				316	{1.0, 0.0},
				317	{1.0, -0.0}
				318	};
				319	static const TableEntry tbl_erf[] = {
				320	{0.0, 0.0},
				321	{-0.0, -0.0}
				322	};
				323	static const TableEntry tbl_exp[] = {
				324	{1.0, 0.0},
				325	{1.0, -0.0},
				326	{MATH_E, 1.0}
				327	};
				328	static const TableEntry tbl_exp2[] = {
				329	{1.0, 0.0},
				330	{1.0, -0.0},
				331	{2.0, 1.0}
				332	};
				333	static const TableEntry tbl_exp10[] = {
				334	{1.0, 0.0},
				335	{1.0, -0.0},
				336	{10.0, 1.0}
				337	};
				338	static const TableEntry tbl_expm1[] = {
				339	{0.0, 0.0},
				340	{-0.0, -0.0}
				341	};
				342	static const TableEntry tbl_log[] = {
				343	{0.0, 1.0},
				344	{1.0, MATH_E}
				345	};
				346	static const TableEntry tbl_log2[] = {
				347	{0.0, 1.0},
				348	{1.0, 2.0}
				349	};
				350	static const TableEntry tbl_log10[] = {
				351	{0.0, 1.0},
				352	{1.0, 10.0}
				353	};
				354	static const TableEntry tbl_rsqrt[] = {
				355	{1.0, 1.0},
Evandro Menezes	c57a9dc	2019-10-09 20:00:43 +0000	[diff] [blame]	356	{MATH_SQRT1_2, 2.0}
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	357	};
				358	static const TableEntry tbl_sin[] = {
				359	{0.0, 0.0},
				360	{-0.0, -0.0}
				361	};
				362	static const TableEntry tbl_sinh[] = {
				363	{0.0, 0.0},
				364	{-0.0, -0.0}
				365	};
				366	static const TableEntry tbl_sinpi[] = {
				367	{0.0, 0.0},
				368	{-0.0, -0.0}
				369	};
				370	static const TableEntry tbl_sqrt[] = {
				371	{0.0, 0.0},
				372	{1.0, 1.0},
				373	{MATH_SQRT2, 2.0}
				374	};
				375	static const TableEntry tbl_tan[] = {
				376	{0.0, 0.0},
				377	{-0.0, -0.0}
				378	};
				379	static const TableEntry tbl_tanh[] = {
				380	{0.0, 0.0},
				381	{-0.0, -0.0}
				382	};
				383	static const TableEntry tbl_tanpi[] = {
				384	{0.0, 0.0},
				385	{-0.0, -0.0}
				386	};
				387	static const TableEntry tbl_tgamma[] = {
				388	{1.0, 1.0},
				389	{1.0, 2.0},
				390	{2.0, 3.0},
				391	{6.0, 4.0}
				392	};
				393
				394	static bool HasNative(AMDGPULibFunc::EFuncId id) {
				395	switch(id) {
				396	case AMDGPULibFunc::EI_DIVIDE:
				397	case AMDGPULibFunc::EI_COS:
				398	case AMDGPULibFunc::EI_EXP:
				399	case AMDGPULibFunc::EI_EXP2:
				400	case AMDGPULibFunc::EI_EXP10:
				401	case AMDGPULibFunc::EI_LOG:
				402	case AMDGPULibFunc::EI_LOG2:
				403	case AMDGPULibFunc::EI_LOG10:
				404	case AMDGPULibFunc::EI_POWR:
				405	case AMDGPULibFunc::EI_RECIP:
				406	case AMDGPULibFunc::EI_RSQRT:
				407	case AMDGPULibFunc::EI_SIN:
				408	case AMDGPULibFunc::EI_SINCOS:
				409	case AMDGPULibFunc::EI_SQRT:
				410	case AMDGPULibFunc::EI_TAN:
				411	return true;
				412	default:;
				413	}
				414	return false;
				415	}
				416
				417	struct TableRef {
				418	size_t size;
				419	const TableEntry *table; // variable size: from 0 to (size - 1)
				420
				421	TableRef() : size(0), table(nullptr) {}
				422
				423	template <size_t N>
				424	TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {}
				425	};
				426
				427	static TableRef getOptTable(AMDGPULibFunc::EFuncId id) {
				428	switch(id) {
				429	case AMDGPULibFunc::EI_ACOS: return TableRef(tbl_acos);
				430	case AMDGPULibFunc::EI_ACOSH: return TableRef(tbl_acosh);
				431	case AMDGPULibFunc::EI_ACOSPI: return TableRef(tbl_acospi);
				432	case AMDGPULibFunc::EI_ASIN: return TableRef(tbl_asin);
				433	case AMDGPULibFunc::EI_ASINH: return TableRef(tbl_asinh);
				434	case AMDGPULibFunc::EI_ASINPI: return TableRef(tbl_asinpi);
				435	case AMDGPULibFunc::EI_ATAN: return TableRef(tbl_atan);
				436	case AMDGPULibFunc::EI_ATANH: return TableRef(tbl_atanh);
				437	case AMDGPULibFunc::EI_ATANPI: return TableRef(tbl_atanpi);
				438	case AMDGPULibFunc::EI_CBRT: return TableRef(tbl_cbrt);
				439	case AMDGPULibFunc::EI_NCOS:
				440	case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos);
				441	case AMDGPULibFunc::EI_COSH: return TableRef(tbl_cosh);
				442	case AMDGPULibFunc::EI_COSPI: return TableRef(tbl_cospi);
				443	case AMDGPULibFunc::EI_ERFC: return TableRef(tbl_erfc);
				444	case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf);
				445	case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp);
				446	case AMDGPULibFunc::EI_NEXP2:
				447	case AMDGPULibFunc::EI_EXP2: return TableRef(tbl_exp2);
				448	case AMDGPULibFunc::EI_EXP10: return TableRef(tbl_exp10);
				449	case AMDGPULibFunc::EI_EXPM1: return TableRef(tbl_expm1);
				450	case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log);
				451	case AMDGPULibFunc::EI_NLOG2:
				452	case AMDGPULibFunc::EI_LOG2: return TableRef(tbl_log2);
				453	case AMDGPULibFunc::EI_LOG10: return TableRef(tbl_log10);
				454	case AMDGPULibFunc::EI_NRSQRT:
				455	case AMDGPULibFunc::EI_RSQRT: return TableRef(tbl_rsqrt);
				456	case AMDGPULibFunc::EI_NSIN:
				457	case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin);
				458	case AMDGPULibFunc::EI_SINH: return TableRef(tbl_sinh);
				459	case AMDGPULibFunc::EI_SINPI: return TableRef(tbl_sinpi);
				460	case AMDGPULibFunc::EI_NSQRT:
				461	case AMDGPULibFunc::EI_SQRT: return TableRef(tbl_sqrt);
				462	case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan);
				463	case AMDGPULibFunc::EI_TANH: return TableRef(tbl_tanh);
				464	case AMDGPULibFunc::EI_TANPI: return TableRef(tbl_tanpi);
				465	case AMDGPULibFunc::EI_TGAMMA: return TableRef(tbl_tgamma);
				466	default:;
				467	}
				468	return TableRef();
				469	}
				470
				471	static inline int getVecSize(const AMDGPULibFunc& FInfo) {
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	472	return FInfo.getLeads()[0].VectorSize;
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	473	}
				474
				475	static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	476	return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	477	}
				478
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	479	FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	480	// If we are doing PreLinkOpt, the function is external. So it is safe to
				481	// use getOrInsertFunction() at this stage.
				482
				483	return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo)
				484	: AMDGPULibFunc::getFunction(M, fInfo);
				485	}
				486
				487	bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName,
				488	FuncInfo *FInfo) {
				489	return AMDGPULibFunc::parse(FMangledName, *FInfo);
				490	}
				491
				492	bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
				493	if (auto Op = dyn_cast<FPMathOperator>(CI))
Sanjay Patel	629c411	2017-11-06 16:27:15 +0000	[diff] [blame]	494	if (Op->isFast())
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	495	return true;
				496	const Function *F = CI->getParent()->getParent();
				497	Attribute Attr = F->getFnAttribute("unsafe-fp-math");
				498	return Attr.getValueAsString() == "true";
				499	}
				500
				501	bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
				502	return AllNative \|\|
				503	std::find(UseNative.begin(), UseNative.end(), F) != UseNative.end();
				504	}
				505
				506	void AMDGPULibCalls::initNativeFuncs() {
				507	AllNative = useNativeFunc("all") \|\|
				508	(UseNative.getNumOccurrences() && UseNative.size() == 1 &&
				509	UseNative.begin()->empty());
				510	}
				511
				512	bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
				513	bool native_sin = useNativeFunc("sin");
				514	bool native_cos = useNativeFunc("cos");
				515
				516	if (native_sin && native_cos) {
				517	Module *M = aCI->getModule();
				518	Value *opr0 = aCI->getArgOperand(0);
				519
				520	AMDGPULibFunc nf;
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	521	nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
				522	nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	523
				524	nf.setPrefix(AMDGPULibFunc::NATIVE);
				525	nf.setId(AMDGPULibFunc::EI_SIN);
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	526	FunctionCallee sinExpr = getFunction(M, nf);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	527
				528	nf.setPrefix(AMDGPULibFunc::NATIVE);
				529	nf.setId(AMDGPULibFunc::EI_COS);
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	530	FunctionCallee cosExpr = getFunction(M, nf);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	531	if (sinExpr && cosExpr) {
				532	Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
				533	Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
				534	new StoreInst(cosval, aCI->getArgOperand(1), aCI);
				535
				536	DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
				537	<< " with native version of sin/cos");
				538
				539	replaceCall(sinval);
				540	return true;
				541	}
				542	}
				543	return false;
				544	}
				545
				546	bool AMDGPULibCalls::useNative(CallInst *aCI) {
				547	CI = aCI;
				548	Function *Callee = aCI->getCalledFunction();
				549
				550	FuncInfo FInfo;
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	551	if (!parseFunctionName(Callee->getName(), &FInfo) \|\| !FInfo.isMangled() \|\|
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	552	FInfo.getPrefix() != AMDGPULibFunc::NOPFX \|\|
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	553	getArgType(FInfo) == AMDGPULibFunc::F64 \|\| !HasNative(FInfo.getId()) \|\|
				554	!(AllNative \|\| useNativeFunc(FInfo.getName()))) {
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	555	return false;
				556	}
				557
				558	if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
				559	return sincosUseNative(aCI, FInfo);
				560
				561	FInfo.setPrefix(AMDGPULibFunc::NATIVE);
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	562	FunctionCallee F = getFunction(aCI->getModule(), FInfo);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	563	if (!F)
				564	return false;
				565
				566	aCI->setCalledFunction(F);
				567	DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
				568	<< " with native version");
				569	return true;
				570	}
				571
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	572	// Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
				573	// builtin, with appended type size and alignment arguments, where 2 or 4
				574	// indicates the original number of arguments. The library has optimized version
				575	// of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
				576	// power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
				577	// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
				578	// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
				579	bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
				580	FuncInfo &FInfo) {
				581	auto *Callee = CI->getCalledFunction();
				582	if (!Callee->isDeclaration())
				583	return false;
				584
				585	assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
				586	auto *M = Callee->getParent();
				587	auto &Ctx = M->getContext();
				588	std::string Name = Callee->getName();
				589	auto NumArg = CI->getNumArgOperands();
				590	if (NumArg != 4 && NumArg != 6)
				591	return false;
				592	auto *PacketSize = CI->getArgOperand(NumArg - 2);
				593	auto *PacketAlign = CI->getArgOperand(NumArg - 1);
				594	if (!isa<ConstantInt>(PacketSize) \|\| !isa<ConstantInt>(PacketAlign))
				595	return false;
				596	unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
				597	unsigned Align = cast<ConstantInt>(PacketAlign)->getZExtValue();
				598	if (Size != Align \|\| !isPowerOf2_32(Size))
				599	return false;
				600
				601	Type *PtrElemTy;
				602	if (Size <= 8)
				603	PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
				604	else
				605	PtrElemTy = VectorType::get(Type::getInt64Ty(Ctx), Size / 8);
				606	unsigned PtrArgLoc = CI->getNumArgOperands() - 3;
				607	auto PtrArg = CI->getArgOperand(PtrArgLoc);
				608	unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
				609	auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
				610
				611	SmallVector<llvm::Type *, 6> ArgTys;
				612	for (unsigned I = 0; I != PtrArgLoc; ++I)
				613	ArgTys.push_back(CI->getArgOperand(I)->getType());
				614	ArgTys.push_back(PtrTy);
				615
				616	Name = Name + "_" + std::to_string(Size);
				617	auto *FTy = FunctionType::get(Callee->getReturnType(),
				618	ArrayRef<Type *>(ArgTys), false);
				619	AMDGPULibFunc NewLibFunc(Name, FTy);
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	620	FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	621	if (!F)
				622	return false;
				623
				624	auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
				625	SmallVector<Value *, 6> Args;
				626	for (unsigned I = 0; I != PtrArgLoc; ++I)
				627	Args.push_back(CI->getArgOperand(I));
				628	Args.push_back(BCast);
				629
				630	auto *NCI = B.CreateCall(F, Args);
				631	NCI->setAttributes(CI->getAttributes());
				632	CI->replaceAllUsesWith(NCI);
				633	CI->dropAllReferences();
				634	CI->eraseFromParent();
				635
				636	return true;
				637	}
				638
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	639	// This function returns false if no change; return true otherwise.
				640	bool AMDGPULibCalls::fold(CallInst CI, AliasAnalysis AA) {
				641	this->CI = CI;
				642	Function *Callee = CI->getCalledFunction();
				643
				644	// Ignore indirect calls.
				645	if (Callee == 0) return false;
				646
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	647	BasicBlock *BB = CI->getParent();
				648	LLVMContext &Context = CI->getParent()->getContext();
				649	IRBuilder<> B(Context);
				650
				651	// Set the builder to the instruction after the call.
				652	B.SetInsertPoint(BB, CI->getIterator());
				653
				654	// Copy fast flags from the original call.
				655	if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
				656	B.setFastMathFlags(FPOp->getFastMathFlags());
				657
Stanislav Mekhanoshin	a9191c8	2019-06-17 17:57:50 +0000	[diff] [blame]	658	switch (Callee->getIntrinsicID()) {
				659	default:
				660	break;
				661	case Intrinsic::amdgcn_wavefrontsize:
				662	return !EnablePreLink && fold_wavefrontsize(CI, B);
				663	}
				664
				665	FuncInfo FInfo;
				666	if (!parseFunctionName(Callee->getName(), &FInfo))
				667	return false;
				668
				669	// Further check the number of arguments to see if they match.
				670	if (CI->getNumArgOperands() != FInfo.getNumArgs())
				671	return false;
				672
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	673	if (TDOFold(CI, FInfo))
				674	return true;
				675
				676	// Under unsafe-math, evaluate calls if possible.
				677	// According to Brian Sumner, we can do this for all f32 function calls
				678	// using host's double function calls.
				679	if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
				680	return true;
				681
				682	// Specilized optimizations for each function call
				683	switch (FInfo.getId()) {
				684	case AMDGPULibFunc::EI_RECIP:
				685	// skip vector function
				686	assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE \|\|
				687	FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
				688	"recip must be an either native or half function");
				689	return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);
				690
				691	case AMDGPULibFunc::EI_DIVIDE:
				692	// skip vector function
				693	assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE \|\|
				694	FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
				695	"divide must be an either native or half function");
				696	return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);
				697
				698	case AMDGPULibFunc::EI_POW:
				699	case AMDGPULibFunc::EI_POWR:
				700	case AMDGPULibFunc::EI_POWN:
				701	return fold_pow(CI, B, FInfo);
				702
				703	case AMDGPULibFunc::EI_ROOTN:
				704	// skip vector function
				705	return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);
				706
				707	case AMDGPULibFunc::EI_FMA:
				708	case AMDGPULibFunc::EI_MAD:
				709	case AMDGPULibFunc::EI_NFMA:
				710	// skip vector function
				711	return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);
				712
				713	case AMDGPULibFunc::EI_SQRT:
				714	return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
				715	case AMDGPULibFunc::EI_COS:
				716	case AMDGPULibFunc::EI_SIN:
				717	if ((getArgType(FInfo) == AMDGPULibFunc::F32 \|\|
				718	getArgType(FInfo) == AMDGPULibFunc::F64)
				719	&& (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
				720	return fold_sincos(CI, B, AA);
				721
				722	break;
Yaxun Liu	fc5121a	2017-09-06 00:30:27 +0000	[diff] [blame]	723	case AMDGPULibFunc::EI_READ_PIPE_2:
				724	case AMDGPULibFunc::EI_READ_PIPE_4:
				725	case AMDGPULibFunc::EI_WRITE_PIPE_2:
				726	case AMDGPULibFunc::EI_WRITE_PIPE_4:
				727	return fold_read_write_pipe(CI, B, FInfo);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	728
				729	default:
				730	break;
				731	}
				732
				733	return false;
				734	}
				735
				736	bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
				737	// Table-Driven optimization
				738	const TableRef tr = getOptTable(FInfo.getId());
				739	if (tr.size==0)
				740	return false;
				741
				742	int const sz = (int)tr.size;
				743	const TableEntry * const ftbl = tr.table;
				744	Value *opr0 = CI->getArgOperand(0);
				745
				746	if (getVecSize(FInfo) > 1) {
				747	if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
				748	SmallVector<double, 0> DVal;
				749	for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
				750	ConstantFP *eltval = dyn_cast<ConstantFP>(
				751	CV->getElementAsConstant((unsigned)eltNo));
				752	assert(eltval && "Non-FP arguments in math function!");
				753	bool found = false;
				754	for (int i=0; i < sz; ++i) {
				755	if (eltval->isExactlyValue(ftbl[i].input)) {
				756	DVal.push_back(ftbl[i].result);
				757	found = true;
				758	break;
				759	}
				760	}
				761	if (!found) {
				762	// This vector constants not handled yet.
				763	return false;
				764	}
				765	}
				766	LLVMContext &context = CI->getParent()->getParent()->getContext();
				767	Constant *nval;
				768	if (getArgType(FInfo) == AMDGPULibFunc::F32) {
				769	SmallVector<float, 0> FVal;
				770	for (unsigned i = 0; i < DVal.size(); ++i) {
				771	FVal.push_back((float)DVal[i]);
				772	}
				773	ArrayRef<float> tmp(FVal);
				774	nval = ConstantDataVector::get(context, tmp);
				775	} else { // F64
				776	ArrayRef<double> tmp(DVal);
				777	nval = ConstantDataVector::get(context, tmp);
				778	}
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	779	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << nval << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	780	replaceCall(nval);
				781	return true;
				782	}
				783	} else {
				784	// Scalar version
				785	if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
				786	for (int i = 0; i < sz; ++i) {
				787	if (CF->isExactlyValue(ftbl[i].input)) {
				788	Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result);
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	789	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << nval << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	790	replaceCall(nval);
				791	return true;
				792	}
				793	}
				794	}
				795	}
				796
				797	return false;
				798	}
				799
				800	bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
				801	Module *M = CI->getModule();
				802	if (getArgType(FInfo) != AMDGPULibFunc::F32 \|\|
				803	FInfo.getPrefix() != AMDGPULibFunc::NOPFX \|\|
				804	!HasNative(FInfo.getId()))
				805	return false;
				806
				807	AMDGPULibFunc nf = FInfo;
				808	nf.setPrefix(AMDGPULibFunc::NATIVE);
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	809	if (FunctionCallee FPExpr = getFunction(M, nf)) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	810	LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	811
				812	CI->setCalledFunction(FPExpr);
				813
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	814	LLVM_DEBUG(dbgs() << *CI << '\n');
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	815
				816	return true;
				817	}
				818	return false;
				819	}
				820
				821	// [native_]half_recip(c) ==> 1.0/c
				822	bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
				823	const FuncInfo &FInfo) {
				824	Value *opr0 = CI->getArgOperand(0);
				825	if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
				826	// Just create a normal div. Later, InstCombine will be able
				827	// to compute the divide into a constant (avoid check float infinity
				828	// or subnormal at this point).
				829	Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
				830	opr0,
				831	"recip2div");
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	832	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << nval << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	833	replaceCall(nval);
				834	return true;
				835	}
				836	return false;
				837	}
				838
				839	// [native_]half_divide(x, c) ==> x/c
				840	bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
				841	const FuncInfo &FInfo) {
				842	Value *opr0 = CI->getArgOperand(0);
				843	Value *opr1 = CI->getArgOperand(1);
				844	ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
				845	ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
				846
				847	if ((CF0 && CF1) \|\| // both are constants
				848	(CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
				849	// CF1 is constant && f32 divide
				850	{
				851	Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
				852	opr1, "__div2recip");
				853	Value *nval = B.CreateFMul(opr0, nval1, "__div2mul");
				854	replaceCall(nval);
				855	return true;
				856	}
				857	return false;
				858	}
				859
				860	namespace llvm {
				861	static double log2(double V) {
David Tenty	ae79a2c	2019-07-12 20:12:15 +0000	[diff] [blame]	862	#if _XOPEN_SOURCE >= 600 \|\| defined(_ISOC99_SOURCE) \|\| _POSIX_C_SOURCE >= 200112L
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	863	return ::log2(V);
				864	#else
Evandro Menezes	c57a9dc	2019-10-09 20:00:43 +0000	[diff] [blame]	865	return log(V) / numbers::ln2;
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	866	#endif
				867	}
				868	}
				869
				870	bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
				871	const FuncInfo &FInfo) {
				872	assert((FInfo.getId() == AMDGPULibFunc::EI_POW \|\|
				873	FInfo.getId() == AMDGPULibFunc::EI_POWR \|\|
				874	FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
				875	"fold_pow: encounter a wrong function call");
				876
				877	Value opr0, opr1;
				878	ConstantFP *CF;
				879	ConstantInt *CINT;
				880	ConstantAggregateZero *CZero;
				881	Type *eltType;
				882
				883	opr0 = CI->getArgOperand(0);
				884	opr1 = CI->getArgOperand(1);
				885	CZero = dyn_cast<ConstantAggregateZero>(opr1);
				886	if (getVecSize(FInfo) == 1) {
				887	eltType = opr0->getType();
				888	CF = dyn_cast<ConstantFP>(opr1);
				889	CINT = dyn_cast<ConstantInt>(opr1);
				890	} else {
				891	VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
				892	assert(VTy && "Oprand of vector function should be of vectortype");
				893	eltType = VTy->getElementType();
				894	ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);
				895
				896	// Now, only Handle vector const whose elements have the same value.
				897	CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
				898	CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
				899	}
				900
				901	// No unsafe math , no constant argument, do nothing
				902	if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
				903	return false;
				904
				905	// 0x1111111 means that we don't do anything for this call.
				906	int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
				907
				908	if ((CF && CF->isZero()) \|\| (CINT && ci_opr1 == 0) \|\| CZero) {
				909	// pow/powr/pown(x, 0) == 1
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	910	LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	911	Constant *cnval = ConstantFP::get(eltType, 1.0);
				912	if (getVecSize(FInfo) > 1) {
				913	cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
				914	}
				915	replaceCall(cnval);
				916	return true;
				917	}
				918	if ((CF && CF->isExactlyValue(1.0)) \|\| (CINT && ci_opr1 == 1)) {
				919	// pow/powr/pown(x, 1.0) = x
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	920	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << opr0 << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	921	replaceCall(opr0);
				922	return true;
				923	}
				924	if ((CF && CF->isExactlyValue(2.0)) \|\| (CINT && ci_opr1 == 2)) {
				925	// pow/powr/pown(x, 2.0) = x*x
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	926	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << opr0 << " * " << *opr0
				927	<< "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	928	Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
				929	replaceCall(nval);
				930	return true;
				931	}
				932	if ((CF && CF->isExactlyValue(-1.0)) \|\| (CINT && ci_opr1 == -1)) {
				933	// pow/powr/pown(x, -1.0) = 1.0/x
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	934	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> 1 / " << opr0 << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	935	Constant *cnval = ConstantFP::get(eltType, 1.0);
				936	if (getVecSize(FInfo) > 1) {
				937	cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
				938	}
				939	Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
				940	replaceCall(nval);
				941	return true;
				942	}
				943
				944	Module *M = CI->getModule();
				945	if (CF && (CF->isExactlyValue(0.5) \|\| CF->isExactlyValue(-0.5))) {
				946	// pow[r](x, [-]0.5) = sqrt(x)
				947	bool issqrt = CF->isExactlyValue(0.5);
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	948	if (FunctionCallee FPExpr =
				949	getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
				950	: AMDGPULibFunc::EI_RSQRT,
				951	FInfo))) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	952	LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
				953	<< FInfo.getName().c_str() << "(" << *opr0 << ")\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	954	Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
				955	: "__pow2rsqrt");
				956	replaceCall(nval);
				957	return true;
				958	}
				959	}
				960
				961	if (!isUnsafeMath(CI))
				962	return false;
				963
				964	// Unsafe Math optimization
				965
				966	// Remember that ci_opr1 is set if opr1 is integral
				967	if (CF) {
				968	double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
				969	? (double)CF->getValueAPF().convertToFloat()
				970	: CF->getValueAPF().convertToDouble();
				971	int ival = (int)dval;
				972	if ((double)ival == dval) {
				973	ci_opr1 = ival;
				974	} else
				975	ci_opr1 = 0x11111111;
				976	}
				977
				978	// pow/powr/pown(x, c) = [1/](xx..x); where
				979	// trunc(c) == c && the number of x == c && \|c\| <= 12
				980	unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
				981	if (abs_opr1 <= 12) {
				982	Constant *cnval;
				983	Value *nval;
				984	if (abs_opr1 == 0) {
				985	cnval = ConstantFP::get(eltType, 1.0);
				986	if (getVecSize(FInfo) > 1) {
				987	cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
				988	}
				989	nval = cnval;
				990	} else {
				991	Value *valx2 = nullptr;
				992	nval = nullptr;
				993	while (abs_opr1 > 0) {
				994	valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
				995	if (abs_opr1 & 1) {
				996	nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
				997	}
				998	abs_opr1 >>= 1;
				999	}
				1000	}
				1001
				1002	if (ci_opr1 < 0) {
				1003	cnval = ConstantFP::get(eltType, 1.0);
				1004	if (getVecSize(FInfo) > 1) {
				1005	cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
				1006	}
				1007	nval = B.CreateFDiv(cnval, nval, "__1powprod");
				1008	}
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1009	LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
				1010	<< ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
				1011	<< ")\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1012	replaceCall(nval);
				1013	return true;
				1014	}
				1015
				1016	// powr ---> exp2(y * log2(x))
				1017	// pown/pow ---> powr(fabs(x), y) \| (x & ((int)y << 31))
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	1018	FunctionCallee ExpExpr =
				1019	getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1020	if (!ExpExpr)
				1021	return false;
				1022
				1023	bool needlog = false;
				1024	bool needabs = false;
				1025	bool needcopysign = false;
				1026	Constant *cnval = nullptr;
				1027	if (getVecSize(FInfo) == 1) {
				1028	CF = dyn_cast<ConstantFP>(opr0);
				1029
				1030	if (CF) {
				1031	double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
				1032	? (double)CF->getValueAPF().convertToFloat()
				1033	: CF->getValueAPF().convertToDouble();
				1034
				1035	V = log2(std::abs(V));
				1036	cnval = ConstantFP::get(eltType, V);
				1037	needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
				1038	CF->isNegative();
				1039	} else {
				1040	needlog = true;
				1041	needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
				1042	(!CF \|\| CF->isNegative());
				1043	}
				1044	} else {
				1045	ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
				1046
				1047	if (!CDV) {
				1048	needlog = true;
				1049	needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
				1050	} else {
				1051	assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
				1052	"Wrong vector size detected");
				1053
				1054	SmallVector<double, 0> DVal;
				1055	for (int i=0; i < getVecSize(FInfo); ++i) {
				1056	double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
				1057	? (double)CDV->getElementAsFloat(i)
				1058	: CDV->getElementAsDouble(i);
				1059	if (V < 0.0) needcopysign = true;
				1060	V = log2(std::abs(V));
				1061	DVal.push_back(V);
				1062	}
				1063	if (getArgType(FInfo) == AMDGPULibFunc::F32) {
				1064	SmallVector<float, 0> FVal;
				1065	for (unsigned i=0; i < DVal.size(); ++i) {
				1066	FVal.push_back((float)DVal[i]);
				1067	}
				1068	ArrayRef<float> tmp(FVal);
				1069	cnval = ConstantDataVector::get(M->getContext(), tmp);
				1070	} else {
				1071	ArrayRef<double> tmp(DVal);
				1072	cnval = ConstantDataVector::get(M->getContext(), tmp);
				1073	}
				1074	}
				1075	}
				1076
				1077	if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
				1078	// We cannot handle corner cases for a general pow() function, give up
				1079	// unless y is a constant integral value. Then proceed as if it were pown.
				1080	if (getVecSize(FInfo) == 1) {
				1081	if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
				1082	double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
				1083	? (double)CF->getValueAPF().convertToFloat()
				1084	: CF->getValueAPF().convertToDouble();
				1085	if (y != (double)(int64_t)y)
				1086	return false;
				1087	} else
				1088	return false;
				1089	} else {
				1090	if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
				1091	for (int i=0; i < getVecSize(FInfo); ++i) {
				1092	double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
				1093	? (double)CDV->getElementAsFloat(i)
				1094	: CDV->getElementAsDouble(i);
				1095	if (y != (double)(int64_t)y)
				1096	return false;
				1097	}
				1098	} else
				1099	return false;
				1100	}
				1101	}
				1102
				1103	Value *nval;
				1104	if (needabs) {
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	1105	FunctionCallee AbsExpr =
				1106	getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo));
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1107	if (!AbsExpr)
				1108	return false;
				1109	nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
				1110	} else {
				1111	nval = cnval ? cnval : opr0;
				1112	}
				1113	if (needlog) {
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	1114	FunctionCallee LogExpr =
				1115	getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1116	if (!LogExpr)
				1117	return false;
				1118	nval = CreateCallEx(B,LogExpr, nval, "__log2");
				1119	}
				1120
				1121	if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
				1122	// convert int(32) to fp(f32 or f64)
				1123	opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
				1124	}
				1125	nval = B.CreateFMul(opr1, nval, "__ylogx");
				1126	nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
				1127
				1128	if (needcopysign) {
				1129	Value *opr_n;
				1130	Type* rTy = opr0->getType();
				1131	Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
				1132	Type *nTy = nTyS;
				1133	if (const VectorType *vTy = dyn_cast<VectorType>(rTy))
				1134	nTy = VectorType::get(nTyS, vTy->getNumElements());
				1135	unsigned size = nTy->getScalarSizeInBits();
				1136	opr_n = CI->getArgOperand(1);
				1137	if (opr_n->getType()->isIntegerTy())
				1138	opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
				1139	else
				1140	opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
				1141
				1142	Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
				1143	sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
				1144	nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
				1145	nval = B.CreateBitCast(nval, opr0->getType());
				1146	}
				1147
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1148	LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
				1149	<< "exp2(" << opr1 << " log2(" << *opr0 << "))\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1150	replaceCall(nval);
				1151
				1152	return true;
				1153	}
				1154
				1155	bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
				1156	const FuncInfo &FInfo) {
				1157	Value *opr0 = CI->getArgOperand(0);
				1158	Value *opr1 = CI->getArgOperand(1);
				1159
				1160	ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
				1161	if (!CINT) {
				1162	return false;
				1163	}
				1164	int ci_opr1 = (int)CINT->getSExtValue();
				1165	if (ci_opr1 == 1) { // rootn(x, 1) = x
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1166	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << opr0 << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1167	replaceCall(opr0);
				1168	return true;
				1169	}
				1170	if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1171	Module *M = CI->getModule();
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	1172	if (FunctionCallee FPExpr =
				1173	getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1174	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> sqrt(" << opr0 << ")\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1175	Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
				1176	replaceCall(nval);
				1177	return true;
				1178	}
				1179	} else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
				1180	Module *M = CI->getModule();
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	1181	if (FunctionCallee FPExpr =
				1182	getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1183	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> cbrt(" << opr0 << ")\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1184	Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
				1185	replaceCall(nval);
				1186	return true;
				1187	}
				1188	} else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1189	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> 1.0 / " << opr0 << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1190	Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
				1191	opr0,
				1192	"__rootn2div");
				1193	replaceCall(nval);
				1194	return true;
				1195	} else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x)
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1196	Module *M = CI->getModule();
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	1197	if (FunctionCallee FPExpr =
				1198	getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1199	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> rsqrt(" << opr0
				1200	<< ")\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1201	Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
				1202	replaceCall(nval);
				1203	return true;
				1204	}
				1205	}
				1206	return false;
				1207	}
				1208
				1209	bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
				1210	const FuncInfo &FInfo) {
				1211	Value *opr0 = CI->getArgOperand(0);
				1212	Value *opr1 = CI->getArgOperand(1);
				1213	Value *opr2 = CI->getArgOperand(2);
				1214
				1215	ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
				1216	ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
				1217	if ((CF0 && CF0->isZero()) \|\| (CF1 && CF1->isZero())) {
				1218	// fma/mad(a, b, c) = c if a=0 \|\| b=0
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1219	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << opr2 << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1220	replaceCall(opr2);
				1221	return true;
				1222	}
				1223	if (CF0 && CF0->isExactlyValue(1.0f)) {
				1224	// fma/mad(a, b, c) = b+c if a=1
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1225	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << opr1 << " + " << *opr2
				1226	<< "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1227	Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
				1228	replaceCall(nval);
				1229	return true;
				1230	}
				1231	if (CF1 && CF1->isExactlyValue(1.0f)) {
				1232	// fma/mad(a, b, c) = a+c if b=1
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1233	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << opr0 << " + " << *opr2
				1234	<< "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1235	Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
				1236	replaceCall(nval);
				1237	return true;
				1238	}
				1239	if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
				1240	if (CF->isZero()) {
				1241	// fma/mad(a, b, c) = a*b if c=0
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1242	LLVM_DEBUG(errs() << "AMDIC: " << CI << " ---> " << opr0 << " * "
				1243	<< *opr1 << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1244	Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
				1245	replaceCall(nval);
				1246	return true;
				1247	}
				1248	}
				1249
				1250	return false;
				1251	}
				1252
				1253	// Get a scalar native builtin signle argument FP function
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	1254	FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
				1255	const FuncInfo &FInfo) {
Stanislav Mekhanoshin	312c557	2017-08-28 18:00:08 +0000	[diff] [blame]	1256	if (getArgType(FInfo) == AMDGPULibFunc::F64 \|\| !HasNative(FInfo.getId()))
				1257	return nullptr;
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1258	FuncInfo nf = FInfo;
				1259	nf.setPrefix(AMDGPULibFunc::NATIVE);
				1260	return getFunction(M, nf);
				1261	}
				1262
				1263	// fold sqrt -> native_sqrt (x)
				1264	bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
				1265	const FuncInfo &FInfo) {
Stanislav Mekhanoshin	312c557	2017-08-28 18:00:08 +0000	[diff] [blame]	1266	if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1267	(FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	1268	if (FunctionCallee FPExpr = getNativeFunction(
				1269	CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1270	Value *opr0 = CI->getArgOperand(0);
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1271	LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
				1272	<< "sqrt(" << *opr0 << ")\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1273	Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
				1274	replaceCall(nval);
				1275	return true;
				1276	}
				1277	}
				1278	return false;
				1279	}
				1280
				1281	// fold sin, cos -> sincos.
				1282	bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
				1283	AliasAnalysis *AA) {
				1284	AMDGPULibFunc fInfo;
				1285	if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
				1286	return false;
				1287
				1288	assert(fInfo.getId() == AMDGPULibFunc::EI_SIN \|\|
				1289	fInfo.getId() == AMDGPULibFunc::EI_COS);
				1290	bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
				1291
				1292	Value *CArgVal = CI->getArgOperand(0);
				1293	BasicBlock * const CBB = CI->getParent();
				1294
				1295	int const MaxScan = 30;
				1296
				1297	{ // fold in load value.
				1298	LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
				1299	if (LI && LI->getParent() == CBB) {
				1300	BasicBlock::iterator BBI = LI->getIterator();
				1301	Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
				1302	if (AvailableVal) {
				1303	CArgVal->replaceAllUsesWith(AvailableVal);
				1304	if (CArgVal->getNumUses() == 0)
				1305	LI->eraseFromParent();
				1306	CArgVal = CI->getArgOperand(0);
				1307	}
				1308	}
				1309	}
				1310
				1311	Module *M = CI->getModule();
				1312	fInfo.setId(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN);
				1313	std::string const PairName = fInfo.mangle();
				1314
				1315	CallInst *UI = nullptr;
				1316	for (User* U : CArgVal->users()) {
				1317	CallInst *XI = dyn_cast_or_null<CallInst>(U);
				1318	if (!XI \|\| XI == CI \|\| XI->getParent() != CBB)
				1319	continue;
				1320
				1321	Function *UCallee = XI->getCalledFunction();
				1322	if (!UCallee \|\| !UCallee->getName().equals(PairName))
				1323	continue;
				1324
				1325	BasicBlock::iterator BBI = CI->getIterator();
				1326	if (BBI == CI->getParent()->begin())
				1327	break;
				1328	--BBI;
				1329	for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
				1330	if (cast<Instruction>(BBI) == XI) {
				1331	UI = XI;
				1332	break;
				1333	}
				1334	}
				1335	if (UI) break;
				1336	}
				1337
				1338	if (!UI) return false;
				1339
				1340	// Merge the sin and cos.
				1341
				1342	// for OpenCL 2.0 we have only generic implementation of sincos
				1343	// function.
				1344	AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	1345	nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	1346	FunctionCallee Fsincos = getFunction(M, nf);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1347	if (!Fsincos) return false;
				1348
				1349	BasicBlock::iterator ItOld = B.GetInsertPoint();
				1350	AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
				1351	B.SetInsertPoint(UI);
				1352
				1353	Value *P = Alloc;
James Y Knight	1368022	2019-02-01 02:28:03 +0000	[diff] [blame]	1354	Type *PTy = Fsincos.getFunctionType()->getParamType(1);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1355	// The allocaInst allocates the memory in private address space. This need
				1356	// to be bitcasted to point to the address space of cos pointer type.
				1357	// In OpenCL 2.0 this is generic, while in 1.2 that is private.
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	1358	if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1359	P = B.CreateAddrSpaceCast(Alloc, PTy);
				1360	CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
				1361
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1362	LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << CI << ", " << UI << ") with "
				1363	<< *Call << "\n");
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1364
				1365	if (!isSin) { // CI->cos, UI->sin
				1366	B.SetInsertPoint(&*ItOld);
				1367	UI->replaceAllUsesWith(&*Call);
James Y Knight	14359ef	2019-02-01 20:44:24 +0000	[diff] [blame]	1368	Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1369	CI->replaceAllUsesWith(Reload);
				1370	UI->eraseFromParent();
				1371	CI->eraseFromParent();
				1372	} else { // CI->sin, UI->cos
James Y Knight	14359ef	2019-02-01 20:44:24 +0000	[diff] [blame]	1373	Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1374	UI->replaceAllUsesWith(Reload);
				1375	CI->replaceAllUsesWith(Call);
				1376	UI->eraseFromParent();
				1377	CI->eraseFromParent();
				1378	}
				1379	return true;
				1380	}
				1381
Stanislav Mekhanoshin	a9191c8	2019-06-17 17:57:50 +0000	[diff] [blame]	1382	bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
				1383	if (!TM)
				1384	return false;
				1385
				1386	StringRef CPU = TM->getTargetCPU();
				1387	StringRef Features = TM->getTargetFeatureString();
				1388	if ((CPU.empty() \|\| CPU.equals_lower("generic")) &&
				1389	(Features.empty() \|\|
				1390	Features.find_lower("wavefrontsize") == StringRef::npos))
				1391	return false;
				1392
				1393	Function *F = CI->getParent()->getParent();
				1394	const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
				1395	unsigned N = ST.getWavefrontSize();
				1396
				1397	LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "
				1398	<< N << "\n");
				1399
				1400	CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
				1401	CI->eraseFromParent();
				1402	return true;
				1403	}
				1404
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1405	// Get insertion point at entry.
				1406	BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
				1407	Function * Func = UI->getParent()->getParent();
				1408	BasicBlock * BB = &Func->getEntryBlock();
				1409	assert(BB && "Entry block not found!");
				1410	BasicBlock::iterator ItNew = BB->begin();
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1411	return ItNew;
				1412	}
				1413
				1414	// Insert a AllocsInst at the beginning of function entry block.
				1415	AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
				1416	const char *prefix) {
				1417	BasicBlock::iterator ItNew = getEntryIns(UI);
				1418	Function *UCallee = UI->getCalledFunction();
				1419	Type *RetType = UCallee->getReturnType();
				1420	B.SetInsertPoint(&*ItNew);
				1421	AllocaInst *Alloc = B.CreateAlloca(RetType, 0,
				1422	std::string(prefix) + UI->getName());
Guillaume Chatelet	ab11b91	2019-09-30 13:34:44 +0000	[diff] [blame]	1423	Alloc->setAlignment(MaybeAlign(
				1424	UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1425	return Alloc;
				1426	}
				1427
				1428	bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo,
				1429	double& Res0, double& Res1,
				1430	Constant copr0, Constant copr1,
				1431	Constant *copr2) {
				1432	// By default, opr0/opr1/opr3 holds values of float/double type.
				1433	// If they are not float/double, each function has to its
				1434	// operand separately.
				1435	double opr0=0.0, opr1=0.0, opr2=0.0;
				1436	ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
				1437	ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
				1438	ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
				1439	if (fpopr0) {
				1440	opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
				1441	? fpopr0->getValueAPF().convertToDouble()
				1442	: (double)fpopr0->getValueAPF().convertToFloat();
				1443	}
				1444
				1445	if (fpopr1) {
				1446	opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
				1447	? fpopr1->getValueAPF().convertToDouble()
				1448	: (double)fpopr1->getValueAPF().convertToFloat();
				1449	}
				1450
				1451	if (fpopr2) {
				1452	opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
				1453	? fpopr2->getValueAPF().convertToDouble()
				1454	: (double)fpopr2->getValueAPF().convertToFloat();
				1455	}
				1456
				1457	switch (FInfo.getId()) {
				1458	default : return false;
				1459
				1460	case AMDGPULibFunc::EI_ACOS:
				1461	Res0 = acos(opr0);
				1462	return true;
				1463
				1464	case AMDGPULibFunc::EI_ACOSH:
				1465	// acosh(x) == log(x + sqrt(x*x - 1))
				1466	Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
				1467	return true;
				1468
				1469	case AMDGPULibFunc::EI_ACOSPI:
				1470	Res0 = acos(opr0) / MATH_PI;
				1471	return true;
				1472
				1473	case AMDGPULibFunc::EI_ASIN:
				1474	Res0 = asin(opr0);
				1475	return true;
				1476
				1477	case AMDGPULibFunc::EI_ASINH:
				1478	// asinh(x) == log(x + sqrt(x*x + 1))
				1479	Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
				1480	return true;
				1481
				1482	case AMDGPULibFunc::EI_ASINPI:
				1483	Res0 = asin(opr0) / MATH_PI;
				1484	return true;
				1485
				1486	case AMDGPULibFunc::EI_ATAN:
				1487	Res0 = atan(opr0);
				1488	return true;
				1489
				1490	case AMDGPULibFunc::EI_ATANH:
				1491	// atanh(x) == (log(x+1) - log(x-1))/2;
				1492	Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
				1493	return true;
				1494
				1495	case AMDGPULibFunc::EI_ATANPI:
				1496	Res0 = atan(opr0) / MATH_PI;
				1497	return true;
				1498
				1499	case AMDGPULibFunc::EI_CBRT:
				1500	Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
				1501	return true;
				1502
				1503	case AMDGPULibFunc::EI_COS:
				1504	Res0 = cos(opr0);
				1505	return true;
				1506
				1507	case AMDGPULibFunc::EI_COSH:
				1508	Res0 = cosh(opr0);
				1509	return true;
				1510
				1511	case AMDGPULibFunc::EI_COSPI:
				1512	Res0 = cos(MATH_PI * opr0);
				1513	return true;
				1514
				1515	case AMDGPULibFunc::EI_EXP:
				1516	Res0 = exp(opr0);
				1517	return true;
				1518
				1519	case AMDGPULibFunc::EI_EXP2:
				1520	Res0 = pow(2.0, opr0);
				1521	return true;
				1522
				1523	case AMDGPULibFunc::EI_EXP10:
				1524	Res0 = pow(10.0, opr0);
				1525	return true;
				1526
				1527	case AMDGPULibFunc::EI_EXPM1:
				1528	Res0 = exp(opr0) - 1.0;
				1529	return true;
				1530
				1531	case AMDGPULibFunc::EI_LOG:
				1532	Res0 = log(opr0);
				1533	return true;
				1534
				1535	case AMDGPULibFunc::EI_LOG2:
				1536	Res0 = log(opr0) / log(2.0);
				1537	return true;
				1538
				1539	case AMDGPULibFunc::EI_LOG10:
				1540	Res0 = log(opr0) / log(10.0);
				1541	return true;
				1542
				1543	case AMDGPULibFunc::EI_RSQRT:
				1544	Res0 = 1.0 / sqrt(opr0);
				1545	return true;
				1546
				1547	case AMDGPULibFunc::EI_SIN:
				1548	Res0 = sin(opr0);
				1549	return true;
				1550
				1551	case AMDGPULibFunc::EI_SINH:
				1552	Res0 = sinh(opr0);
				1553	return true;
				1554
				1555	case AMDGPULibFunc::EI_SINPI:
				1556	Res0 = sin(MATH_PI * opr0);
				1557	return true;
				1558
				1559	case AMDGPULibFunc::EI_SQRT:
				1560	Res0 = sqrt(opr0);
				1561	return true;
				1562
				1563	case AMDGPULibFunc::EI_TAN:
				1564	Res0 = tan(opr0);
				1565	return true;
				1566
				1567	case AMDGPULibFunc::EI_TANH:
				1568	Res0 = tanh(opr0);
				1569	return true;
				1570
				1571	case AMDGPULibFunc::EI_TANPI:
				1572	Res0 = tan(MATH_PI * opr0);
				1573	return true;
				1574
				1575	case AMDGPULibFunc::EI_RECIP:
				1576	Res0 = 1.0 / opr0;
				1577	return true;
				1578
				1579	// two-arg functions
				1580	case AMDGPULibFunc::EI_DIVIDE:
				1581	Res0 = opr0 / opr1;
				1582	return true;
				1583
				1584	case AMDGPULibFunc::EI_POW:
				1585	case AMDGPULibFunc::EI_POWR:
				1586	Res0 = pow(opr0, opr1);
				1587	return true;
				1588
				1589	case AMDGPULibFunc::EI_POWN: {
				1590	if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
				1591	double val = (double)iopr1->getSExtValue();
				1592	Res0 = pow(opr0, val);
				1593	return true;
				1594	}
				1595	return false;
				1596	}
				1597
				1598	case AMDGPULibFunc::EI_ROOTN: {
				1599	if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
				1600	double val = (double)iopr1->getSExtValue();
				1601	Res0 = pow(opr0, 1.0 / val);
				1602	return true;
				1603	}
				1604	return false;
				1605	}
				1606
				1607	// with ptr arg
				1608	case AMDGPULibFunc::EI_SINCOS:
				1609	Res0 = sin(opr0);
				1610	Res1 = cos(opr0);
				1611	return true;
				1612
				1613	// three-arg functions
				1614	case AMDGPULibFunc::EI_FMA:
				1615	case AMDGPULibFunc::EI_MAD:
				1616	Res0 = opr0 * opr1 + opr2;
				1617	return true;
				1618	}
				1619
				1620	return false;
				1621	}
				1622
				1623	bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
				1624	int numArgs = (int)aCI->getNumArgOperands();
				1625	if (numArgs > 3)
				1626	return false;
				1627
				1628	Constant *copr0 = nullptr;
				1629	Constant *copr1 = nullptr;
				1630	Constant *copr2 = nullptr;
				1631	if (numArgs > 0) {
				1632	if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
				1633	return false;
				1634	}
				1635
				1636	if (numArgs > 1) {
				1637	if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
				1638	if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
				1639	return false;
				1640	}
				1641	}
				1642
				1643	if (numArgs > 2) {
				1644	if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
				1645	return false;
				1646	}
				1647
				1648	// At this point, all arguments to aCI are constants.
				1649
				1650	// max vector size is 16, and sincos will generate two results.
				1651	double DVal0[16], DVal1[16];
				1652	bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
				1653	if (getVecSize(FInfo) == 1) {
				1654	if (!evaluateScalarMathFunc(FInfo, DVal0[0],
				1655	DVal1[0], copr0, copr1, copr2)) {
				1656	return false;
				1657	}
				1658	} else {
				1659	ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
				1660	ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
				1661	ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
				1662	for (int i=0; i < getVecSize(FInfo); ++i) {
				1663	Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
				1664	Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
				1665	Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
				1666	if (!evaluateScalarMathFunc(FInfo, DVal0[i],
				1667	DVal1[i], celt0, celt1, celt2)) {
				1668	return false;
				1669	}
				1670	}
				1671	}
				1672
				1673	LLVMContext &context = CI->getParent()->getParent()->getContext();
				1674	Constant nval0, nval1;
				1675	if (getVecSize(FInfo) == 1) {
				1676	nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
				1677	if (hasTwoResults)
				1678	nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
				1679	} else {
				1680	if (getArgType(FInfo) == AMDGPULibFunc::F32) {
				1681	SmallVector <float, 0> FVal0, FVal1;
				1682	for (int i=0; i < getVecSize(FInfo); ++i)
				1683	FVal0.push_back((float)DVal0[i]);
				1684	ArrayRef<float> tmp0(FVal0);
				1685	nval0 = ConstantDataVector::get(context, tmp0);
				1686	if (hasTwoResults) {
				1687	for (int i=0; i < getVecSize(FInfo); ++i)
				1688	FVal1.push_back((float)DVal1[i]);
				1689	ArrayRef<float> tmp1(FVal1);
				1690	nval1 = ConstantDataVector::get(context, tmp1);
				1691	}
				1692	} else {
				1693	ArrayRef<double> tmp0(DVal0);
				1694	nval0 = ConstantDataVector::get(context, tmp0);
				1695	if (hasTwoResults) {
				1696	ArrayRef<double> tmp1(DVal1);
				1697	nval1 = ConstantDataVector::get(context, tmp1);
				1698	}
				1699	}
				1700	}
				1701
				1702	if (hasTwoResults) {
				1703	// sincos
				1704	assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
				1705	"math function with ptr arg not supported yet");
				1706	new StoreInst(nval1, aCI->getArgOperand(1), aCI);
				1707	}
				1708
				1709	replaceCall(nval0);
				1710	return true;
				1711	}
				1712
				1713	// Public interface to the Simplify LibCalls pass.
Stanislav Mekhanoshin	a9191c8	2019-06-17 17:57:50 +0000	[diff] [blame]	1714	FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt,
				1715	const TargetMachine *TM) {
				1716	return new AMDGPUSimplifyLibCalls(Opt, TM);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1717	}
				1718
				1719	FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
				1720	return new AMDGPUUseNativeCalls();
				1721	}
				1722
Stanislav Mekhanoshin	1d8cf2b	2017-09-29 23:40:19 +0000	[diff] [blame]	1723	static bool setFastFlags(Function &F, const TargetOptions &Options) {
				1724	AttrBuilder B;
				1725
				1726	if (Options.UnsafeFPMath \|\| Options.NoInfsFPMath)
				1727	B.addAttribute("no-infs-fp-math", "true");
				1728	if (Options.UnsafeFPMath \|\| Options.NoNaNsFPMath)
				1729	B.addAttribute("no-nans-fp-math", "true");
				1730	if (Options.UnsafeFPMath) {
				1731	B.addAttribute("less-precise-fpmad", "true");
				1732	B.addAttribute("unsafe-fp-math", "true");
				1733	}
				1734
				1735	if (!B.hasAttributes())
				1736	return false;
				1737
				1738	F.addAttributes(AttributeList::FunctionIndex, B);
				1739
				1740	return true;
				1741	}
				1742
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1743	bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
				1744	if (skipFunction(F))
				1745	return false;
				1746
				1747	bool Changed = false;
				1748	auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
				1749
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1750	LLVM_DEBUG(dbgs() << "AMDIC: process function ";
				1751	F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1752
Stanislav Mekhanoshin	1d8cf2b	2017-09-29 23:40:19 +0000	[diff] [blame]	1753	if (!EnablePreLink)
				1754	Changed \|= setFastFlags(F, Options);
				1755
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1756	for (auto &BB : F) {
				1757	for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
				1758	// Ignore non-calls.
				1759	CallInst *CI = dyn_cast<CallInst>(I);
				1760	++I;
				1761	if (!CI) continue;
				1762
				1763	// Ignore indirect calls.
				1764	Function *Callee = CI->getCalledFunction();
				1765	if (Callee == 0) continue;
				1766
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	1767	LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
				1768	dbgs().flush());
Stanislav Mekhanoshin	7f37794	2017-08-11 16:42:09 +0000	[diff] [blame]	1769	if(Simplifier.fold(CI, AA))
				1770	Changed = true;
				1771	}
				1772	}
				1773	return Changed;
				1774	}
				1775
				1776	bool AMDGPUUseNativeCalls::runOnFunction(Function &F) {
				1777	if (skipFunction(F) \|\| UseNative.empty())
				1778	return false;
				1779
				1780	bool Changed = false;
				1781	for (auto &BB : F) {
				1782	for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
				1783	// Ignore non-calls.
				1784	CallInst *CI = dyn_cast<CallInst>(I);
				1785	++I;
				1786	if (!CI) continue;
				1787
				1788	// Ignore indirect calls.
				1789	Function *Callee = CI->getCalledFunction();
				1790	if (Callee == 0) continue;
				1791
				1792	if(Simplifier.useNative(CI))
				1793	Changed = true;
				1794	}
				1795	}
				1796	return Changed;
				1797	}