Blame - lib/Target/PowerPC/PPCISelLowering.cpp - platform/external/llvm

blob: 6a35bafaf4a4fe6c76977f75154a83f4086d0af1 [file] [log] [blame]

Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1	//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
Chris Lattner	081ce94	2007-12-29 20:36:04 +0000	[diff] [blame]	5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements the PPCISelLowering class.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
				14	#include "PPCISelLowering.h"
				15	#include "PPCMachineFunctionInfo.h"
				16	#include "PPCPredicates.h"
				17	#include "PPCTargetMachine.h"
				18	#include "PPCPerfectShuffle.h"
Owen Anderson	1636de9	2007-09-07 04:06:50 +0000	[diff] [blame]	19	#include "llvm/ADT/STLExtras.h"
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	20	#include "llvm/ADT/VectorExtras.h"
				21	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
				22	#include "llvm/CodeGen/CallingConvLower.h"
				23	#include "llvm/CodeGen/MachineFrameInfo.h"
				24	#include "llvm/CodeGen/MachineFunction.h"
				25	#include "llvm/CodeGen/MachineInstrBuilder.h"
Chris Lattner	1b98919	2007-12-31 04:13:23 +0000	[diff] [blame]	26	#include "llvm/CodeGen/MachineRegisterInfo.h"
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	27	#include "llvm/CodeGen/SelectionDAG.h"
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	28	#include "llvm/Constants.h"
				29	#include "llvm/Function.h"
				30	#include "llvm/Intrinsics.h"
				31	#include "llvm/Support/MathExtras.h"
				32	#include "llvm/Target/TargetOptions.h"
				33	#include "llvm/Support/CommandLine.h"
				34	using namespace llvm;
				35
				36	static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
				37	cl::desc("enable preincrement load/store generation on PPC (experimental)"),
				38	cl::Hidden);
				39
				40	PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
				41	: TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) {
				42
				43	setPow2DivIsCheap();
				44
				45	// Use _setjmp/_longjmp instead of setjmp/longjmp.
				46	setUseUnderscoreSetJmp(true);
				47	setUseUnderscoreLongJmp(true);
				48
				49	// Set up the register classes.
				50	addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
				51	addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
				52	addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
				53
				54	// PowerPC has an i16 but no i8 (or i1) SEXTLOAD
				55	setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand);
				56	setLoadXAction(ISD::SEXTLOAD, MVT::i8, Expand);
Chris Lattner	3bc0850	2008-01-17 19:59:44 +0000	[diff] [blame]	57
				58	setTruncStoreAction(MVT::f64, MVT::f32, Expand);
				59
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	60	// PowerPC has pre-inc load and store's.
				61	setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
				62	setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
				63	setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
				64	setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
				65	setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
				66	setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
				67	setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
				68	setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
				69	setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
				70	setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
				71
				72	setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
				73	setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
				74
Dale Johannesen	472d15d	2007-10-06 01:24:11 +0000	[diff] [blame]	75	// Shortening conversions involving ppcf128 get expanded (2 regs -> 1 reg)
				76	setConvertAction(MVT::ppcf128, MVT::f64, Expand);
				77	setConvertAction(MVT::ppcf128, MVT::f32, Expand);
Dale Johannesen	3d8578b	2007-10-10 01:01:31 +0000	[diff] [blame]	78	// This is used in the ppcf128->int sequence. Note it has different semantics
				79	// from FP_ROUND: that rounds to nearest, this rounds to zero.
				80	setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
Dale Johannesen	472d15d	2007-10-06 01:24:11 +0000	[diff] [blame]	81
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	82	// PowerPC has no intrinsics for these particular operations
				83	setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
				84	setOperationAction(ISD::MEMSET, MVT::Other, Expand);
				85	setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
				86
				87	// PowerPC has no SREM/UREM instructions
				88	setOperationAction(ISD::SREM, MVT::i32, Expand);
				89	setOperationAction(ISD::UREM, MVT::i32, Expand);
				90	setOperationAction(ISD::SREM, MVT::i64, Expand);
				91	setOperationAction(ISD::UREM, MVT::i64, Expand);
Dan Gohman	c9130bb	2007-10-08 17:28:24 +0000	[diff] [blame]	92
				93	// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
				94	setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
				95	setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
				96	setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
				97	setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
				98	setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
				99	setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
				100	setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
				101	setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	102
Dan Gohman	2f7b198	2007-10-11 23:21:31 +0000	[diff] [blame]	103	// We don't support sin/cos/sqrt/fmod/pow
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	104	setOperationAction(ISD::FSIN , MVT::f64, Expand);
				105	setOperationAction(ISD::FCOS , MVT::f64, Expand);
				106	setOperationAction(ISD::FREM , MVT::f64, Expand);
Dan Gohman	2f7b198	2007-10-11 23:21:31 +0000	[diff] [blame]	107	setOperationAction(ISD::FPOW , MVT::f64, Expand);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	108	setOperationAction(ISD::FSIN , MVT::f32, Expand);
				109	setOperationAction(ISD::FCOS , MVT::f32, Expand);
				110	setOperationAction(ISD::FREM , MVT::f32, Expand);
Dan Gohman	2f7b198	2007-10-11 23:21:31 +0000	[diff] [blame]	111	setOperationAction(ISD::FPOW , MVT::f32, Expand);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	112
				113	// If we're enabling GP optimizations, use hardware square root
				114	if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
				115	setOperationAction(ISD::FSQRT, MVT::f64, Expand);
				116	setOperationAction(ISD::FSQRT, MVT::f32, Expand);
				117	}
				118
				119	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
				120	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
				121
				122	// PowerPC does not have BSWAP, CTPOP or CTTZ
				123	setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
				124	setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
				125	setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
				126	setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
				127	setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
				128	setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
				129
				130	// PowerPC does not have ROTR
				131	setOperationAction(ISD::ROTR, MVT::i32 , Expand);
				132
				133	// PowerPC does not have Select
				134	setOperationAction(ISD::SELECT, MVT::i32, Expand);
				135	setOperationAction(ISD::SELECT, MVT::i64, Expand);
				136	setOperationAction(ISD::SELECT, MVT::f32, Expand);
				137	setOperationAction(ISD::SELECT, MVT::f64, Expand);
				138
				139	// PowerPC wants to turn select_cc of FP into fsel when possible.
				140	setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
				141	setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
				142
				143	// PowerPC wants to optimize integer setcc a bit
				144	setOperationAction(ISD::SETCC, MVT::i32, Custom);
				145
				146	// PowerPC does not have BRCOND which requires SetCC
				147	setOperationAction(ISD::BRCOND, MVT::Other, Expand);
				148
				149	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
				150
				151	// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
				152	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
				153
				154	// PowerPC does not have [U\|S]INT_TO_FP
				155	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
				156	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
				157
				158	setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
				159	setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
				160	setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
				161	setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
				162
				163	// We cannot sextinreg(i1). Expand to shifts.
				164	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
				165
				166	// Support label based line numbers.
				167	setOperationAction(ISD::LOCATION, MVT::Other, Expand);
				168	setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
Nicolas Geoffray	6186476	2007-12-21 12:19:44 +0000	[diff] [blame]	169
				170	setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
				171	setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
				172	setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
				173	setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
				174
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	175
				176	// We want to legalize GlobalAddress and ConstantPool nodes into the
				177	// appropriate instructions to materialize the address.
				178	setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
				179	setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
				180	setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
				181	setOperationAction(ISD::JumpTable, MVT::i32, Custom);
				182	setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
				183	setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
				184	setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
				185	setOperationAction(ISD::JumpTable, MVT::i64, Custom);
				186
				187	// RET must be custom lowered, to meet ABI requirements
				188	setOperationAction(ISD::RET , MVT::Other, Custom);
Duncan Sands	38947cd	2007-07-27 12:58:54 +0000	[diff] [blame]	189
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	190	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
				191	setOperationAction(ISD::VASTART , MVT::Other, Custom);
				192
				193	// VAARG is custom lowered with ELF 32 ABI
				194	if (TM.getSubtarget<PPCSubtarget>().isELF32_ABI())
				195	setOperationAction(ISD::VAARG, MVT::Other, Custom);
				196	else
				197	setOperationAction(ISD::VAARG, MVT::Other, Expand);
				198
				199	// Use the default implementation.
				200	setOperationAction(ISD::VACOPY , MVT::Other, Expand);
				201	setOperationAction(ISD::VAEND , MVT::Other, Expand);
				202	setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
				203	setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
				204	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
				205	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
				206
				207	// We want to custom lower some of our intrinsics.
				208	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
				209
				210	if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
				211	// They also have instructions for converting between i64 and fp.
				212	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
				213	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
				214	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
				215	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
				216	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
				217
				218	// FIXME: disable this lowered code. This generates 64-bit register values,
				219	// and we don't model the fact that the top part is clobbered by calls. We
				220	// need to flag these together so that the value isn't live across a call.
				221	//setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
				222
				223	// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
				224	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
				225	} else {
				226	// PowerPC does not have FP_TO_UINT on 32-bit implementations.
				227	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
				228	}
				229
				230	if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
Chris Lattner	c882caf	2007-10-19 04:08:28 +0000	[diff] [blame]	231	// 64-bit PowerPC implementations can support i64 types directly
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	232	addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
				233	// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
				234	setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
				235	} else {
Chris Lattner	c882caf	2007-10-19 04:08:28 +0000	[diff] [blame]	236	// 32-bit PowerPC wants to expand i64 shifts itself.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	237	setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
				238	setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
				239	setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
				240	}
				241
				242	if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
				243	// First set operation action for all vector types to expand. Then we
				244	// will selectively turn on ones that can be effectively codegen'd.
				245	for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
				246	VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
				247	// add/sub are legal for all supported vector VT's.
				248	setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
				249	setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
				250
				251	// We promote all shuffles to v16i8.
				252	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote);
				253	AddPromotedToType (ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8);
				254
				255	// We promote all non-typed operations to v4i32.
				256	setOperationAction(ISD::AND , (MVT::ValueType)VT, Promote);
				257	AddPromotedToType (ISD::AND , (MVT::ValueType)VT, MVT::v4i32);
				258	setOperationAction(ISD::OR , (MVT::ValueType)VT, Promote);
				259	AddPromotedToType (ISD::OR , (MVT::ValueType)VT, MVT::v4i32);
				260	setOperationAction(ISD::XOR , (MVT::ValueType)VT, Promote);
				261	AddPromotedToType (ISD::XOR , (MVT::ValueType)VT, MVT::v4i32);
				262	setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Promote);
				263	AddPromotedToType (ISD::LOAD , (MVT::ValueType)VT, MVT::v4i32);
				264	setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
				265	AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v4i32);
				266	setOperationAction(ISD::STORE, (MVT::ValueType)VT, Promote);
				267	AddPromotedToType (ISD::STORE, (MVT::ValueType)VT, MVT::v4i32);
				268
				269	// No other operations are legal.
				270	setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
				271	setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
				272	setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
				273	setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
				274	setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
				275	setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
Evan Cheng	c5912e3	2007-07-30 07:51:22 +0000	[diff] [blame]	276	setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	277	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
				278	setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
				279	setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);
Dan Gohman	c9130bb	2007-10-08 17:28:24 +0000	[diff] [blame]	280	setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand);
				281	setOperationAction(ISD::SMUL_LOHI, (MVT::ValueType)VT, Expand);
				282	setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand);
				283	setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	284	setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand);
Dan Gohman	4e22ac4	2007-10-12 14:08:57 +0000	[diff] [blame]	285	setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand);
				286	setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand);
				287	setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand);
				288	setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	289	}
				290
				291	// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
				292	// with merges, splats, etc.
				293	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
				294
				295	setOperationAction(ISD::AND , MVT::v4i32, Legal);
				296	setOperationAction(ISD::OR , MVT::v4i32, Legal);
				297	setOperationAction(ISD::XOR , MVT::v4i32, Legal);
				298	setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
				299	setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
				300	setOperationAction(ISD::STORE , MVT::v4i32, Legal);
				301
				302	addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
				303	addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
				304	addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
				305	addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
				306
				307	setOperationAction(ISD::MUL, MVT::v4f32, Legal);
				308	setOperationAction(ISD::MUL, MVT::v4i32, Custom);
				309	setOperationAction(ISD::MUL, MVT::v8i16, Custom);
				310	setOperationAction(ISD::MUL, MVT::v16i8, Custom);
				311
				312	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
				313	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
				314
				315	setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
				316	setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
				317	setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
				318	setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
				319	}
				320
				321	setSetCCResultType(MVT::i32);
				322	setShiftAmountType(MVT::i32);
				323	setSetCCResultContents(ZeroOrOneSetCCResult);
				324
				325	if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
				326	setStackPointerRegisterToSaveRestore(PPC::X1);
				327	setExceptionPointerRegister(PPC::X3);
				328	setExceptionSelectorRegister(PPC::X4);
				329	} else {
				330	setStackPointerRegisterToSaveRestore(PPC::R1);
				331	setExceptionPointerRegister(PPC::R3);
				332	setExceptionSelectorRegister(PPC::R4);
				333	}
				334
				335	// We have target-specific dag combine patterns for the following nodes:
				336	setTargetDAGCombine(ISD::SINT_TO_FP);
				337	setTargetDAGCombine(ISD::STORE);
				338	setTargetDAGCombine(ISD::BR_CC);
				339	setTargetDAGCombine(ISD::BSWAP);
				340
Dale Johannesen	6f3c7bf	2007-10-19 00:59:18 +0000	[diff] [blame]	341	// Darwin long double math library functions have $LDBL128 appended.
				342	if (TM.getSubtarget<PPCSubtarget>().isDarwin()) {
Duncan Sands	37a3f47	2008-01-10 10:28:30 +0000	[diff] [blame]	343	setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
Dale Johannesen	6f3c7bf	2007-10-19 00:59:18 +0000	[diff] [blame]	344	setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
				345	setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
Duncan Sands	37a3f47	2008-01-10 10:28:30 +0000	[diff] [blame]	346	setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
				347	setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
Dale Johannesen	6f3c7bf	2007-10-19 00:59:18 +0000	[diff] [blame]	348	}
				349
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	350	computeRegisterProperties();
				351	}
				352
				353	const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
				354	switch (Opcode) {
				355	default: return 0;
				356	case PPCISD::FSEL: return "PPCISD::FSEL";
				357	case PPCISD::FCFID: return "PPCISD::FCFID";
				358	case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
				359	case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
				360	case PPCISD::STFIWX: return "PPCISD::STFIWX";
				361	case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
				362	case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
				363	case PPCISD::VPERM: return "PPCISD::VPERM";
				364	case PPCISD::Hi: return "PPCISD::Hi";
				365	case PPCISD::Lo: return "PPCISD::Lo";
				366	case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
				367	case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
				368	case PPCISD::SRL: return "PPCISD::SRL";
				369	case PPCISD::SRA: return "PPCISD::SRA";
				370	case PPCISD::SHL: return "PPCISD::SHL";
				371	case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
				372	case PPCISD::STD_32: return "PPCISD::STD_32";
				373	case PPCISD::CALL_ELF: return "PPCISD::CALL_ELF";
				374	case PPCISD::CALL_Macho: return "PPCISD::CALL_Macho";
				375	case PPCISD::MTCTR: return "PPCISD::MTCTR";
				376	case PPCISD::BCTRL_Macho: return "PPCISD::BCTRL_Macho";
				377	case PPCISD::BCTRL_ELF: return "PPCISD::BCTRL_ELF";
				378	case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
				379	case PPCISD::MFCR: return "PPCISD::MFCR";
				380	case PPCISD::VCMP: return "PPCISD::VCMP";
				381	case PPCISD::VCMPo: return "PPCISD::VCMPo";
				382	case PPCISD::LBRX: return "PPCISD::LBRX";
				383	case PPCISD::STBRX: return "PPCISD::STBRX";
				384	case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
				385	}
				386	}
				387
				388	//===----------------------------------------------------------------------===//
				389	// Node matching predicates, for use by the tblgen matching code.
				390	//===----------------------------------------------------------------------===//
				391
				392	/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
				393	static bool isFloatingPointZero(SDOperand Op) {
				394	if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
Dale Johannesen	df8a831	2007-08-31 04:03:46 +0000	[diff] [blame]	395	return CFP->getValueAPF().isZero();
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	396	else if (ISD::isEXTLoad(Op.Val) \|\| ISD::isNON_EXTLoad(Op.Val)) {
				397	// Maybe this has already been legalized into the constant pool?
				398	if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
				399	if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
Dale Johannesen	df8a831	2007-08-31 04:03:46 +0000	[diff] [blame]	400	return CFP->getValueAPF().isZero();
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	401	}
				402	return false;
				403	}
				404
				405	/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
				406	/// true if Op is undef or if it matches the specified value.
				407	static bool isConstantOrUndef(SDOperand Op, unsigned Val) {
				408	return Op.getOpcode() == ISD::UNDEF \|\|
				409	cast<ConstantSDNode>(Op)->getValue() == Val;
				410	}
				411
				412	/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
				413	/// VPKUHUM instruction.
				414	bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) {
				415	if (!isUnary) {
				416	for (unsigned i = 0; i != 16; ++i)
				417	if (!isConstantOrUndef(N->getOperand(i), i*2+1))
				418	return false;
				419	} else {
				420	for (unsigned i = 0; i != 8; ++i)
				421	if (!isConstantOrUndef(N->getOperand(i), i*2+1) \|\|
				422	!isConstantOrUndef(N->getOperand(i+8), i*2+1))
				423	return false;
				424	}
				425	return true;
				426	}
				427
				428	/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
				429	/// VPKUWUM instruction.
				430	bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) {
				431	if (!isUnary) {
				432	for (unsigned i = 0; i != 16; i += 2)
				433	if (!isConstantOrUndef(N->getOperand(i ), i*2+2) \|\|
				434	!isConstantOrUndef(N->getOperand(i+1), i*2+3))
				435	return false;
				436	} else {
				437	for (unsigned i = 0; i != 8; i += 2)
				438	if (!isConstantOrUndef(N->getOperand(i ), i*2+2) \|\|
				439	!isConstantOrUndef(N->getOperand(i+1), i*2+3) \|\|
				440	!isConstantOrUndef(N->getOperand(i+8), i*2+2) \|\|
				441	!isConstantOrUndef(N->getOperand(i+9), i*2+3))
				442	return false;
				443	}
				444	return true;
				445	}
				446
				447	/// isVMerge - Common function, used to match vmrg* shuffles.
				448	///
				449	static bool isVMerge(SDNode *N, unsigned UnitSize,
				450	unsigned LHSStart, unsigned RHSStart) {
				451	assert(N->getOpcode() == ISD::BUILD_VECTOR &&
				452	N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
				453	assert((UnitSize == 1 \|\| UnitSize == 2 \|\| UnitSize == 4) &&
				454	"Unsupported merge size!");
				455
				456	for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
				457	for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
				458	if (!isConstantOrUndef(N->getOperand(iUnitSize2+j),
				459	LHSStart+j+i*UnitSize) \|\|
				460	!isConstantOrUndef(N->getOperand(iUnitSize2+UnitSize+j),
				461	RHSStart+j+i*UnitSize))
				462	return false;
				463	}
				464	return true;
				465	}
				466
				467	/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
				468	/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
				469	bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
				470	if (!isUnary)
				471	return isVMerge(N, UnitSize, 8, 24);
				472	return isVMerge(N, UnitSize, 8, 8);
				473	}
				474
				475	/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
				476	/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
				477	bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
				478	if (!isUnary)
				479	return isVMerge(N, UnitSize, 0, 16);
				480	return isVMerge(N, UnitSize, 0, 0);
				481	}
				482
				483
				484	/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
				485	/// amount, otherwise return -1.
				486	int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
				487	assert(N->getOpcode() == ISD::BUILD_VECTOR &&
				488	N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
				489	// Find the first non-undef value in the shuffle mask.
				490	unsigned i;
				491	for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i)
				492	/search/;
				493
				494	if (i == 16) return -1; // all undef.
				495
				496	// Otherwise, check to see if the rest of the elements are consequtively
				497	// numbered from this value.
				498	unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue();
				499	if (ShiftAmt < i) return -1;
				500	ShiftAmt -= i;
				501
				502	if (!isUnary) {
				503	// Check the rest of the elements to see if they are consequtive.
				504	for (++i; i != 16; ++i)
				505	if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i))
				506	return -1;
				507	} else {
				508	// Check the rest of the elements to see if they are consequtive.
				509	for (++i; i != 16; ++i)
				510	if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15))
				511	return -1;
				512	}
				513
				514	return ShiftAmt;
				515	}
				516
				517	/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
				518	/// specifies a splat of a single element that is suitable for input to
				519	/// VSPLTB/VSPLTH/VSPLTW.
				520	bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) {
				521	assert(N->getOpcode() == ISD::BUILD_VECTOR &&
				522	N->getNumOperands() == 16 &&
				523	(EltSize == 1 \|\| EltSize == 2 \|\| EltSize == 4));
				524
				525	// This is a splat operation if each element of the permute is the same, and
				526	// if the value doesn't reference the second vector.
				527	unsigned ElementBase = 0;
				528	SDOperand Elt = N->getOperand(0);
				529	if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt))
				530	ElementBase = EltV->getValue();
				531	else
				532	return false; // FIXME: Handle UNDEF elements too!
				533
				534	if (cast<ConstantSDNode>(Elt)->getValue() >= 16)
				535	return false;
				536
				537	// Check that they are consequtive.
				538	for (unsigned i = 1; i != EltSize; ++i) {
				539	if (!isa<ConstantSDNode>(N->getOperand(i)) \|\|
				540	cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase)
				541	return false;
				542	}
				543
				544	assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
				545	for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
				546	if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
				547	assert(isa<ConstantSDNode>(N->getOperand(i)) &&
				548	"Invalid VECTOR_SHUFFLE mask!");
				549	for (unsigned j = 0; j != EltSize; ++j)
				550	if (N->getOperand(i+j) != N->getOperand(j))
				551	return false;
				552	}
				553
				554	return true;
				555	}
				556
Evan Cheng	c5912e3	2007-07-30 07:51:22 +0000	[diff] [blame]	557	/// isAllNegativeZeroVector - Returns true if all elements of build_vector
				558	/// are -0.0.
				559	bool PPC::isAllNegativeZeroVector(SDNode *N) {
				560	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				561	if (PPC::isSplatShuffleMask(N, N->getNumOperands()))
				562	if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N))
Dale Johannesen	df8a831	2007-08-31 04:03:46 +0000	[diff] [blame]	563	return CFP->getValueAPF().isNegZero();
Evan Cheng	c5912e3	2007-07-30 07:51:22 +0000	[diff] [blame]	564	return false;
				565	}
				566
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	567	/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
				568	/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
				569	unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
				570	assert(isSplatShuffleMask(N, EltSize));
				571	return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize;
				572	}
				573
				574	/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
				575	/// by using a vspltis[bhw] instruction of the specified element size, return
				576	/// the constant being splatted. The ByteSize field indicates the number of
				577	/// bytes of each element [124] -> [bhw].
				578	SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
				579	SDOperand OpVal(0, 0);
				580
				581	// If ByteSize of the splat is bigger than the element size of the
				582	// build_vector, then we have a case where we are checking for a splat where
				583	// multiple elements of the buildvector are folded together into a single
				584	// logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
				585	unsigned EltSize = 16/N->getNumOperands();
				586	if (EltSize < ByteSize) {
				587	unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
				588	SDOperand UniquedVals[4];
				589	assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
				590
				591	// See if all of the elements in the buildvector agree across.
				592	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
				593	if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
				594	// If the element isn't a constant, bail fully out.
				595	if (!isa<ConstantSDNode>(N->getOperand(i))) return SDOperand();
				596
				597
				598	if (UniquedVals[i&(Multiple-1)].Val == 0)
				599	UniquedVals[i&(Multiple-1)] = N->getOperand(i);
				600	else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
				601	return SDOperand(); // no match.
				602	}
				603
				604	// Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
				605	// either constant or undef values that are identical for each chunk. See
				606	// if these chunks can form into a larger vspltis*.
				607
				608	// Check to see if all of the leading entries are either 0 or -1. If
				609	// neither, then this won't fit into the immediate field.
				610	bool LeadingZero = true;
				611	bool LeadingOnes = true;
				612	for (unsigned i = 0; i != Multiple-1; ++i) {
				613	if (UniquedVals[i].Val == 0) continue; // Must have been undefs.
				614
				615	LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
				616	LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
				617	}
				618	// Finally, check the least significant entry.
				619	if (LeadingZero) {
				620	if (UniquedVals[Multiple-1].Val == 0)
				621	return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef
				622	int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getValue();
				623	if (Val < 16)
				624	return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)
				625	}
				626	if (LeadingOnes) {
				627	if (UniquedVals[Multiple-1].Val == 0)
				628	return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef
				629	int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSignExtended();
				630	if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
				631	return DAG.getTargetConstant(Val, MVT::i32);
				632	}
				633
				634	return SDOperand();
				635	}
				636
				637	// Check to see if this buildvec has a single non-undef value in its elements.
				638	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
				639	if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
				640	if (OpVal.Val == 0)
				641	OpVal = N->getOperand(i);
				642	else if (OpVal != N->getOperand(i))
				643	return SDOperand();
				644	}
				645
				646	if (OpVal.Val == 0) return SDOperand(); // All UNDEF: use implicit def.
				647
				648	unsigned ValSizeInBytes = 0;
				649	uint64_t Value = 0;
				650	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
				651	Value = CN->getValue();
				652	ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8;
				653	} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
				654	assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
Dale Johannesen	df8a831	2007-08-31 04:03:46 +0000	[diff] [blame]	655	Value = FloatToBits(CN->getValueAPF().convertToFloat());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	656	ValSizeInBytes = 4;
				657	}
				658
				659	// If the splat value is larger than the element value, then we can never do
				660	// this splat. The only case that we could fit the replicated bits into our
				661	// immediate field for would be zero, and we prefer to use vxor for it.
				662	if (ValSizeInBytes < ByteSize) return SDOperand();
				663
				664	// If the element value is larger than the splat value, cut it in half and
				665	// check to see if the two halves are equal. Continue doing this until we
				666	// get to ByteSize. This allows us to handle 0x01010101 as 0x01.
				667	while (ValSizeInBytes > ByteSize) {
				668	ValSizeInBytes >>= 1;
				669
				670	// If the top half equals the bottom half, we're still ok.
				671	if (((Value >> (ValSizeInBytes8)) & ((1 << (8ValSizeInBytes))-1)) !=
				672	(Value & ((1 << (8*ValSizeInBytes))-1)))
				673	return SDOperand();
				674	}
				675
				676	// Properly sign extend the value.
				677	int ShAmt = (4-ByteSize)*8;
				678	int MaskVal = ((int)Value << ShAmt) >> ShAmt;
				679
				680	// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
				681	if (MaskVal == 0) return SDOperand();
				682
				683	// Finally, if this value fits in a 5 bit sext field, return it
				684	if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
				685	return DAG.getTargetConstant(MaskVal, MVT::i32);
				686	return SDOperand();
				687	}
				688
				689	//===----------------------------------------------------------------------===//
				690	// Addressing Mode Selection
				691	//===----------------------------------------------------------------------===//
				692
				693	/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
				694	/// or 64-bit immediate, and if the value can be accurately represented as a
				695	/// sign extension from a 16-bit value. If so, this returns true and the
				696	/// immediate.
				697	static bool isIntS16Immediate(SDNode *N, short &Imm) {
				698	if (N->getOpcode() != ISD::Constant)
				699	return false;
				700
				701	Imm = (short)cast<ConstantSDNode>(N)->getValue();
				702	if (N->getValueType(0) == MVT::i32)
				703	return Imm == (int32_t)cast<ConstantSDNode>(N)->getValue();
				704	else
				705	return Imm == (int64_t)cast<ConstantSDNode>(N)->getValue();
				706	}
				707	static bool isIntS16Immediate(SDOperand Op, short &Imm) {
				708	return isIntS16Immediate(Op.Val, Imm);
				709	}
				710
				711
				712	/// SelectAddressRegReg - Given the specified addressed, check to see if it
				713	/// can be represented as an indexed [r+r] operation. Returns false if it
				714	/// can be more efficiently represented with [r+imm].
				715	bool PPCTargetLowering::SelectAddressRegReg(SDOperand N, SDOperand &Base,
				716	SDOperand &Index,
				717	SelectionDAG &DAG) {
				718	short imm = 0;
				719	if (N.getOpcode() == ISD::ADD) {
				720	if (isIntS16Immediate(N.getOperand(1), imm))
				721	return false; // r+i
				722	if (N.getOperand(1).getOpcode() == PPCISD::Lo)
				723	return false; // r+i
				724
				725	Base = N.getOperand(0);
				726	Index = N.getOperand(1);
				727	return true;
				728	} else if (N.getOpcode() == ISD::OR) {
				729	if (isIntS16Immediate(N.getOperand(1), imm))
				730	return false; // r+i can fold it if we can.
				731
				732	// If this is an or of disjoint bitfields, we can codegen this as an add
				733	// (for better address arithmetic) if the LHS and RHS of the OR are provably
				734	// disjoint.
				735	uint64_t LHSKnownZero, LHSKnownOne;
				736	uint64_t RHSKnownZero, RHSKnownOne;
				737	DAG.ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne);
				738
				739	if (LHSKnownZero) {
				740	DAG.ComputeMaskedBits(N.getOperand(1), ~0U, RHSKnownZero, RHSKnownOne);
				741	// If all of the bits are known zero on the LHS or RHS, the add won't
				742	// carry.
				743	if ((LHSKnownZero \| RHSKnownZero) == ~0U) {
				744	Base = N.getOperand(0);
				745	Index = N.getOperand(1);
				746	return true;
				747	}
				748	}
				749	}
				750
				751	return false;
				752	}
				753
				754	/// Returns true if the address N can be represented by a base register plus
				755	/// a signed 16-bit displacement [r+imm], and if it is not better
				756	/// represented as reg+reg.
				757	bool PPCTargetLowering::SelectAddressRegImm(SDOperand N, SDOperand &Disp,
				758	SDOperand &Base, SelectionDAG &DAG){
				759	// If this can be more profitably realized as r+r, fail.
				760	if (SelectAddressRegReg(N, Disp, Base, DAG))
				761	return false;
				762
				763	if (N.getOpcode() == ISD::ADD) {
				764	short imm = 0;
				765	if (isIntS16Immediate(N.getOperand(1), imm)) {
				766	Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
				767	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
				768	Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
				769	} else {
				770	Base = N.getOperand(0);
				771	}
				772	return true; // [r+i]
				773	} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
				774	// Match LOAD (ADD (X, Lo(G))).
				775	assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getValue()
				776	&& "Cannot handle constant offsets yet!");
				777	Disp = N.getOperand(1).getOperand(0); // The global address.
				778	assert(Disp.getOpcode() == ISD::TargetGlobalAddress \|\|
				779	Disp.getOpcode() == ISD::TargetConstantPool \|\|
				780	Disp.getOpcode() == ISD::TargetJumpTable);
				781	Base = N.getOperand(0);
				782	return true; // [&g+r]
				783	}
				784	} else if (N.getOpcode() == ISD::OR) {
				785	short imm = 0;
				786	if (isIntS16Immediate(N.getOperand(1), imm)) {
				787	// If this is an or of disjoint bitfields, we can codegen this as an add
				788	// (for better address arithmetic) if the LHS and RHS of the OR are
				789	// provably disjoint.
				790	uint64_t LHSKnownZero, LHSKnownOne;
				791	DAG.ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne);
				792	if ((LHSKnownZero\|~(unsigned)imm) == ~0U) {
				793	// If all of the bits are known zero on the LHS or RHS, the add won't
				794	// carry.
				795	Base = N.getOperand(0);
				796	Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
				797	return true;
				798	}
				799	}
				800	} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
				801	// Loading from a constant address.
				802
				803	// If this address fits entirely in a 16-bit sext immediate field, codegen
				804	// this as "d, 0"
				805	short Imm;
				806	if (isIntS16Immediate(CN, Imm)) {
				807	Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
				808	Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
				809	return true;
				810	}
				811
				812	// Handle 32-bit sext immediates with LIS + addr mode.
				813	if (CN->getValueType(0) == MVT::i32 \|\|
				814	(int64_t)CN->getValue() == (int)CN->getValue()) {
				815	int Addr = (int)CN->getValue();
				816
				817	// Otherwise, break this down into an LIS + disp.
				818	Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
				819
				820	Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
				821	unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
				822	Base = SDOperand(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0);
				823	return true;
				824	}
				825	}
				826
				827	Disp = DAG.getTargetConstant(0, getPointerTy());
				828	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
				829	Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
				830	else
				831	Base = N;
				832	return true; // [r+0]
				833	}
				834
				835	/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
				836	/// represented as an indexed [r+r] operation.
				837	bool PPCTargetLowering::SelectAddressRegRegOnly(SDOperand N, SDOperand &Base,
				838	SDOperand &Index,
				839	SelectionDAG &DAG) {
				840	// Check to see if we can easily represent this as an [r+r] address. This
				841	// will fail if it thinks that the address is more profitably represented as
				842	// reg+imm, e.g. where imm = 0.
				843	if (SelectAddressRegReg(N, Base, Index, DAG))
				844	return true;
				845
				846	// If the operand is an addition, always emit this as [r+r], since this is
				847	// better (for code size, and execution, as the memop does the add for free)
				848	// than emitting an explicit add.
				849	if (N.getOpcode() == ISD::ADD) {
				850	Base = N.getOperand(0);
				851	Index = N.getOperand(1);
				852	return true;
				853	}
				854
				855	// Otherwise, do it the hard way, using R0 as the base register.
				856	Base = DAG.getRegister(PPC::R0, N.getValueType());
				857	Index = N;
				858	return true;
				859	}
				860
				861	/// SelectAddressRegImmShift - Returns true if the address N can be
				862	/// represented by a base register plus a signed 14-bit displacement
				863	/// [r+imm*4]. Suitable for use by STD and friends.
				864	bool PPCTargetLowering::SelectAddressRegImmShift(SDOperand N, SDOperand &Disp,
				865	SDOperand &Base,
				866	SelectionDAG &DAG) {
				867	// If this can be more profitably realized as r+r, fail.
				868	if (SelectAddressRegReg(N, Disp, Base, DAG))
				869	return false;
				870
				871	if (N.getOpcode() == ISD::ADD) {
				872	short imm = 0;
				873	if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
				874	Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
				875	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
				876	Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
				877	} else {
				878	Base = N.getOperand(0);
				879	}
				880	return true; // [r+i]
				881	} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
				882	// Match LOAD (ADD (X, Lo(G))).
				883	assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getValue()
				884	&& "Cannot handle constant offsets yet!");
				885	Disp = N.getOperand(1).getOperand(0); // The global address.
				886	assert(Disp.getOpcode() == ISD::TargetGlobalAddress \|\|
				887	Disp.getOpcode() == ISD::TargetConstantPool \|\|
				888	Disp.getOpcode() == ISD::TargetJumpTable);
				889	Base = N.getOperand(0);
				890	return true; // [&g+r]
				891	}
				892	} else if (N.getOpcode() == ISD::OR) {
				893	short imm = 0;
				894	if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
				895	// If this is an or of disjoint bitfields, we can codegen this as an add
				896	// (for better address arithmetic) if the LHS and RHS of the OR are
				897	// provably disjoint.
				898	uint64_t LHSKnownZero, LHSKnownOne;
				899	DAG.ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne);
				900	if ((LHSKnownZero\|~(unsigned)imm) == ~0U) {
				901	// If all of the bits are known zero on the LHS or RHS, the add won't
				902	// carry.
				903	Base = N.getOperand(0);
				904	Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
				905	return true;
				906	}
				907	}
				908	} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
				909	// Loading from a constant address. Verify low two bits are clear.
				910	if ((CN->getValue() & 3) == 0) {
				911	// If this address fits entirely in a 14-bit sext immediate field, codegen
				912	// this as "d, 0"
				913	short Imm;
				914	if (isIntS16Immediate(CN, Imm)) {
				915	Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
				916	Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
				917	return true;
				918	}
				919
				920	// Fold the low-part of 32-bit absolute addresses into addr mode.
				921	if (CN->getValueType(0) == MVT::i32 \|\|
				922	(int64_t)CN->getValue() == (int)CN->getValue()) {
				923	int Addr = (int)CN->getValue();
				924
				925	// Otherwise, break this down into an LIS + disp.
				926	Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
				927
				928	Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
				929	unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
				930	Base = SDOperand(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0);
				931	return true;
				932	}
				933	}
				934	}
				935
				936	Disp = DAG.getTargetConstant(0, getPointerTy());
				937	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
				938	Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
				939	else
				940	Base = N;
				941	return true; // [r+0]
				942	}
				943
				944
				945	/// getPreIndexedAddressParts - returns true by value, base pointer and
				946	/// offset pointer and addressing mode by reference if the node's address
				947	/// can be legally represented as pre-indexed load / store address.
				948	bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDOperand &Base,
				949	SDOperand &Offset,
				950	ISD::MemIndexedMode &AM,
				951	SelectionDAG &DAG) {
				952	// Disabled by default for now.
				953	if (!EnablePPCPreinc) return false;
				954
				955	SDOperand Ptr;
				956	MVT::ValueType VT;
				957	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
				958	Ptr = LD->getBasePtr();
				959	VT = LD->getLoadedVT();
				960
				961	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
				962	ST = ST;
				963	Ptr = ST->getBasePtr();
				964	VT = ST->getStoredVT();
				965	} else
				966	return false;
				967
				968	// PowerPC doesn't have preinc load/store instructions for vectors.
				969	if (MVT::isVector(VT))
				970	return false;
				971
				972	// TODO: Check reg+reg first.
				973
				974	// LDU/STU use reg+imm*4, others use reg+imm.
				975	if (VT != MVT::i64) {
				976	// reg + imm
				977	if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
				978	return false;
				979	} else {
				980	// reg + imm * 4.
				981	if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
				982	return false;
				983	}
				984
				985	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
				986	// PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
				987	// sext i32 to i64 when addr mode is r+i.
				988	if (LD->getValueType(0) == MVT::i64 && LD->getLoadedVT() == MVT::i32 &&
				989	LD->getExtensionType() == ISD::SEXTLOAD &&
				990	isa<ConstantSDNode>(Offset))
				991	return false;
				992	}
				993
				994	AM = ISD::PRE_INC;
				995	return true;
				996	}
				997
				998	//===----------------------------------------------------------------------===//
				999	// LowerOperation implementation
				1000	//===----------------------------------------------------------------------===//
				1001
				1002	static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
				1003	MVT::ValueType PtrVT = Op.getValueType();
				1004	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
				1005	Constant *C = CP->getConstVal();
				1006	SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
				1007	SDOperand Zero = DAG.getConstant(0, PtrVT);
				1008
				1009	const TargetMachine &TM = DAG.getTarget();
				1010
				1011	SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, CPI, Zero);
				1012	SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, CPI, Zero);
				1013
				1014	// If this is a non-darwin platform, we don't support non-static relo models
				1015	// yet.
				1016	if (TM.getRelocationModel() == Reloc::Static \|\|
				1017	!TM.getSubtarget<PPCSubtarget>().isDarwin()) {
				1018	// Generate non-pic code that has direct accesses to the constant pool.
				1019	// The address of the global is just (hi(&g)+lo(&g)).
				1020	return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
				1021	}
				1022
				1023	if (TM.getRelocationModel() == Reloc::PIC_) {
				1024	// With PIC, the first instruction is actually "GR+hi(&G)".
				1025	Hi = DAG.getNode(ISD::ADD, PtrVT,
				1026	DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi);
				1027	}
				1028
				1029	Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
				1030	return Lo;
				1031	}
				1032
				1033	static SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
				1034	MVT::ValueType PtrVT = Op.getValueType();
				1035	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
				1036	SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
				1037	SDOperand Zero = DAG.getConstant(0, PtrVT);
				1038
				1039	const TargetMachine &TM = DAG.getTarget();
				1040
				1041	SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, JTI, Zero);
				1042	SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, JTI, Zero);
				1043
				1044	// If this is a non-darwin platform, we don't support non-static relo models
				1045	// yet.
				1046	if (TM.getRelocationModel() == Reloc::Static \|\|
				1047	!TM.getSubtarget<PPCSubtarget>().isDarwin()) {
				1048	// Generate non-pic code that has direct accesses to the constant pool.
				1049	// The address of the global is just (hi(&g)+lo(&g)).
				1050	return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
				1051	}
				1052
				1053	if (TM.getRelocationModel() == Reloc::PIC_) {
				1054	// With PIC, the first instruction is actually "GR+hi(&G)".
				1055	Hi = DAG.getNode(ISD::ADD, PtrVT,
				1056	DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi);
				1057	}
				1058
				1059	Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
				1060	return Lo;
				1061	}
				1062
				1063	static SDOperand LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) {
				1064	assert(0 && "TLS not implemented for PPC.");
				1065	}
				1066
				1067	static SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
				1068	MVT::ValueType PtrVT = Op.getValueType();
				1069	GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
				1070	GlobalValue *GV = GSDN->getGlobal();
				1071	SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
				1072	SDOperand Zero = DAG.getConstant(0, PtrVT);
				1073
				1074	const TargetMachine &TM = DAG.getTarget();
				1075
				1076	SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, GA, Zero);
				1077	SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, GA, Zero);
				1078
				1079	// If this is a non-darwin platform, we don't support non-static relo models
				1080	// yet.
				1081	if (TM.getRelocationModel() == Reloc::Static \|\|
				1082	!TM.getSubtarget<PPCSubtarget>().isDarwin()) {
				1083	// Generate non-pic code that has direct accesses to globals.
				1084	// The address of the global is just (hi(&g)+lo(&g)).
				1085	return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
				1086	}
				1087
				1088	if (TM.getRelocationModel() == Reloc::PIC_) {
				1089	// With PIC, the first instruction is actually "GR+hi(&G)".
				1090	Hi = DAG.getNode(ISD::ADD, PtrVT,
				1091	DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi);
				1092	}
				1093
				1094	Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
				1095
				1096	if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV))
				1097	return Lo;
				1098
				1099	// If the global is weak or external, we have to go through the lazy
				1100	// resolution stub.
				1101	return DAG.getLoad(PtrVT, DAG.getEntryNode(), Lo, NULL, 0);
				1102	}
				1103
				1104	static SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG) {
				1105	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
				1106
				1107	// If we're comparing for equality to zero, expose the fact that this is
				1108	// implented as a ctlz/srl pair on ppc, so that the dag combiner can
				1109	// fold the new nodes.
				1110	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
				1111	if (C->isNullValue() && CC == ISD::SETEQ) {
				1112	MVT::ValueType VT = Op.getOperand(0).getValueType();
				1113	SDOperand Zext = Op.getOperand(0);
				1114	if (VT < MVT::i32) {
				1115	VT = MVT::i32;
				1116	Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0));
				1117	}
				1118	unsigned Log2b = Log2_32(MVT::getSizeInBits(VT));
				1119	SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext);
				1120	SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz,
				1121	DAG.getConstant(Log2b, MVT::i32));
				1122	return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc);
				1123	}
				1124	// Leave comparisons against 0 and -1 alone for now, since they're usually
				1125	// optimized. FIXME: revisit this when we can custom lower all setcc
				1126	// optimizations.
				1127	if (C->isAllOnesValue() \|\| C->isNullValue())
				1128	return SDOperand();
				1129	}
				1130
				1131	// If we have an integer seteq/setne, turn it into a compare against zero
				1132	// by xor'ing the rhs with the lhs, which is faster than setting a
				1133	// condition register, reading it back out, and masking the correct bit. The
				1134	// normal approach here uses sub to do this instead of xor. Using xor exposes
				1135	// the result to other bit-twiddling opportunities.
				1136	MVT::ValueType LHSVT = Op.getOperand(0).getValueType();
				1137	if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {
				1138	MVT::ValueType VT = Op.getValueType();
				1139	SDOperand Sub = DAG.getNode(ISD::XOR, LHSVT, Op.getOperand(0),
				1140	Op.getOperand(1));
				1141	return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC);
				1142	}
				1143	return SDOperand();
				1144	}
				1145
				1146	static SDOperand LowerVAARG(SDOperand Op, SelectionDAG &DAG,
				1147	int VarArgsFrameIndex,
				1148	int VarArgsStackOffset,
				1149	unsigned VarArgsNumGPR,
				1150	unsigned VarArgsNumFPR,
				1151	const PPCSubtarget &Subtarget) {
				1152
				1153	assert(0 && "VAARG in ELF32 ABI not implemented yet!");
				1154	}
				1155
				1156	static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG,
				1157	int VarArgsFrameIndex,
				1158	int VarArgsStackOffset,
				1159	unsigned VarArgsNumGPR,
				1160	unsigned VarArgsNumFPR,
				1161	const PPCSubtarget &Subtarget) {
				1162
				1163	if (Subtarget.isMachoABI()) {
				1164	// vastart just stores the address of the VarArgsFrameIndex slot into the
				1165	// memory location argument.
				1166	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1167	SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
				1168	SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
				1169	return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV->getValue(),
				1170	SV->getOffset());
				1171	}
				1172
				1173	// For ELF 32 ABI we follow the layout of the va_list struct.
				1174	// We suppose the given va_list is already allocated.
				1175	//
				1176	// typedef struct {
				1177	// char gpr; /* index into the array of 8 GPRs
				1178	// * stored in the register save area
				1179	// * gpr=0 corresponds to r3,
				1180	// * gpr=1 to r4, etc.
				1181	// */
				1182	// char fpr; /* index into the array of 8 FPRs
				1183	// * stored in the register save area
				1184	// * fpr=0 corresponds to f1,
				1185	// * fpr=1 to f2, etc.
				1186	// */
				1187	// char *overflow_arg_area;
				1188	// /* location on stack that holds
				1189	// * the next overflow argument
				1190	// */
				1191	// char *reg_save_area;
				1192	// /* where r3:r10 and f1:f8 (if saved)
				1193	// * are stored
				1194	// */
				1195	// } va_list[1];
				1196
				1197
				1198	SDOperand ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8);
				1199	SDOperand ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8);
				1200
				1201
				1202	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1203
				1204	SDOperand StackOffset = DAG.getFrameIndex(VarArgsStackOffset, PtrVT);
				1205	SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
				1206
				1207	SDOperand ConstFrameOffset = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8,
				1208	PtrVT);
				1209	SDOperand ConstStackOffset = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8 - 1,
				1210	PtrVT);
				1211	SDOperand ConstFPROffset = DAG.getConstant(1, PtrVT);
				1212
				1213	SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
				1214
				1215	// Store first byte : number of int regs
				1216	SDOperand firstStore = DAG.getStore(Op.getOperand(0), ArgGPR,
				1217	Op.getOperand(1), SV->getValue(),
				1218	SV->getOffset());
				1219	SDOperand nextPtr = DAG.getNode(ISD::ADD, PtrVT, Op.getOperand(1),
				1220	ConstFPROffset);
				1221
				1222	// Store second byte : number of float regs
				1223	SDOperand secondStore = DAG.getStore(firstStore, ArgFPR, nextPtr,
				1224	SV->getValue(), SV->getOffset());
				1225	nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstStackOffset);
				1226
				1227	// Store second word : arguments given on stack
				1228	SDOperand thirdStore = DAG.getStore(secondStore, StackOffset, nextPtr,
				1229	SV->getValue(), SV->getOffset());
				1230	nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstFrameOffset);
				1231
				1232	// Store third word : arguments given in registers
				1233	return DAG.getStore(thirdStore, FR, nextPtr, SV->getValue(),
				1234	SV->getOffset());
				1235
				1236	}
				1237
				1238	#include "PPCGenCallingConv.inc"
				1239
				1240	/// GetFPR - Get the set of FP registers that should be allocated for arguments,
				1241	/// depending on which subtarget is selected.
				1242	static const unsigned *GetFPR(const PPCSubtarget &Subtarget) {
				1243	if (Subtarget.isMachoABI()) {
				1244	static const unsigned FPR[] = {
				1245	PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
				1246	PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
				1247	};
				1248	return FPR;
				1249	}
				1250
				1251
				1252	static const unsigned FPR[] = {
				1253	PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
				1254	PPC::F8
				1255	};
				1256	return FPR;
				1257	}
				1258
				1259	static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG,
				1260	int &VarArgsFrameIndex,
				1261	int &VarArgsStackOffset,
				1262	unsigned &VarArgsNumGPR,
				1263	unsigned &VarArgsNumFPR,
				1264	const PPCSubtarget &Subtarget) {
				1265	// TODO: add description of PPC stack frame format, or at least some docs.
				1266	//
				1267	MachineFunction &MF = DAG.getMachineFunction();
				1268	MachineFrameInfo *MFI = MF.getFrameInfo();
Chris Lattner	1b98919	2007-12-31 04:13:23 +0000	[diff] [blame]	1269	MachineRegisterInfo &RegInfo = MF.getRegInfo();
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1270	SmallVector<SDOperand, 8> ArgValues;
				1271	SDOperand Root = Op.getOperand(0);
				1272
				1273	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1274	bool isPPC64 = PtrVT == MVT::i64;
				1275	bool isMachoABI = Subtarget.isMachoABI();
				1276	bool isELF32_ABI = Subtarget.isELF32_ABI();
				1277	unsigned PtrByteSize = isPPC64 ? 8 : 4;
				1278
				1279	unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
				1280
				1281	static const unsigned GPR_32[] = { // 32-bit registers.
				1282	PPC::R3, PPC::R4, PPC::R5, PPC::R6,
				1283	PPC::R7, PPC::R8, PPC::R9, PPC::R10,
				1284	};
				1285	static const unsigned GPR_64[] = { // 64-bit registers.
				1286	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
				1287	PPC::X7, PPC::X8, PPC::X9, PPC::X10,
				1288	};
				1289
				1290	static const unsigned *FPR = GetFPR(Subtarget);
				1291
				1292	static const unsigned VR[] = {
				1293	PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
				1294	PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
				1295	};
				1296
Owen Anderson	1636de9	2007-09-07 04:06:50 +0000	[diff] [blame]	1297	const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1298	const unsigned Num_FPR_Regs = isMachoABI ? 13 : 8;
Owen Anderson	1636de9	2007-09-07 04:06:50 +0000	[diff] [blame]	1299	const unsigned Num_VR_Regs = array_lengthof( VR);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1300
				1301	unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
				1302
				1303	const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
				1304
				1305	// Add DAG nodes to load the arguments or copy them out of registers. On
				1306	// entry to a function on PPC, the arguments start after the linkage area,
				1307	// although the first ones are often in registers.
				1308	//
				1309	// In the ELF 32 ABI, GPRs and stack are double word align: an argument
				1310	// represented with two words (long long or double) must be copied to an
				1311	// even GPR_idx value or to an even ArgOffset value.
				1312
				1313	for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
				1314	SDOperand ArgVal;
				1315	bool needsLoad = false;
				1316	MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
				1317	unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
				1318	unsigned ArgSize = ObjSize;
				1319	unsigned Flags = cast<ConstantSDNode>(Op.getOperand(ArgNo+3))->getValue();
				1320	unsigned AlignFlag = 1 << ISD::ParamFlags::OrigAlignmentOffs;
				1321	// See if next argument requires stack alignment in ELF
				1322	bool Expand = (ObjectVT == MVT::f64) \|\| ((ArgNo + 1 < e) &&
				1323	(cast<ConstantSDNode>(Op.getOperand(ArgNo+4))->getValue() & AlignFlag) &&
				1324	(!(Flags & AlignFlag)));
				1325
				1326	unsigned CurArgOffset = ArgOffset;
				1327	switch (ObjectVT) {
				1328	default: assert(0 && "Unhandled argument type!");
				1329	case MVT::i32:
				1330	// Double word align in ELF
				1331	if (Expand && isELF32_ABI) GPR_idx += (GPR_idx % 2);
				1332	if (GPR_idx != Num_GPR_Regs) {
Chris Lattner	1b98919	2007-12-31 04:13:23 +0000	[diff] [blame]	1333	unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
				1334	RegInfo.addLiveIn(GPR[GPR_idx], VReg);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1335	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
				1336	++GPR_idx;
				1337	} else {
				1338	needsLoad = true;
				1339	ArgSize = PtrByteSize;
				1340	}
				1341	// Stack align in ELF
				1342	if (needsLoad && Expand && isELF32_ABI)
				1343	ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
				1344	// All int arguments reserve stack space in Macho ABI.
				1345	if (isMachoABI \|\| needsLoad) ArgOffset += PtrByteSize;
				1346	break;
				1347
				1348	case MVT::i64: // PPC64
				1349	if (GPR_idx != Num_GPR_Regs) {
Chris Lattner	1b98919	2007-12-31 04:13:23 +0000	[diff] [blame]	1350	unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
				1351	RegInfo.addLiveIn(GPR[GPR_idx], VReg);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1352	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
				1353	++GPR_idx;
				1354	} else {
				1355	needsLoad = true;
				1356	}
				1357	// All int arguments reserve stack space in Macho ABI.
				1358	if (isMachoABI \|\| needsLoad) ArgOffset += 8;
				1359	break;
				1360
				1361	case MVT::f32:
				1362	case MVT::f64:
				1363	// Every 4 bytes of argument space consumes one of the GPRs available for
				1364	// argument passing.
				1365	if (GPR_idx != Num_GPR_Regs && isMachoABI) {
				1366	++GPR_idx;
				1367	if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
				1368	++GPR_idx;
				1369	}
				1370	if (FPR_idx != Num_FPR_Regs) {
				1371	unsigned VReg;
				1372	if (ObjectVT == MVT::f32)
Chris Lattner	1b98919	2007-12-31 04:13:23 +0000	[diff] [blame]	1373	VReg = RegInfo.createVirtualRegister(&PPC::F4RCRegClass);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1374	else
Chris Lattner	1b98919	2007-12-31 04:13:23 +0000	[diff] [blame]	1375	VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
				1376	RegInfo.addLiveIn(FPR[FPR_idx], VReg);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1377	ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
				1378	++FPR_idx;
				1379	} else {
				1380	needsLoad = true;
				1381	}
				1382
				1383	// Stack align in ELF
				1384	if (needsLoad && Expand && isELF32_ABI)
				1385	ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
				1386	// All FP arguments reserve stack space in Macho ABI.
				1387	if (isMachoABI \|\| needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize;
				1388	break;
				1389	case MVT::v4f32:
				1390	case MVT::v4i32:
				1391	case MVT::v8i16:
				1392	case MVT::v16i8:
				1393	// Note that vector arguments in registers don't reserve stack space.
				1394	if (VR_idx != Num_VR_Regs) {
Chris Lattner	1b98919	2007-12-31 04:13:23 +0000	[diff] [blame]	1395	unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass);
				1396	RegInfo.addLiveIn(VR[VR_idx], VReg);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1397	ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
				1398	++VR_idx;
				1399	} else {
				1400	// This should be simple, but requires getting 16-byte aligned stack
				1401	// values.
				1402	assert(0 && "Loading VR argument not implemented yet!");
				1403	needsLoad = true;
				1404	}
				1405	break;
				1406	}
				1407
				1408	// We need to load the argument to a virtual register if we determined above
				1409	// that we ran out of physical registers of the appropriate type
				1410	if (needsLoad) {
				1411	// If the argument is actually used, emit a load from the right stack
				1412	// slot.
				1413	if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
				1414	int FI = MFI->CreateFixedObject(ObjSize,
				1415	CurArgOffset + (ArgSize - ObjSize));
				1416	SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
				1417	ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
				1418	} else {
				1419	// Don't emit a dead load.
				1420	ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
				1421	}
				1422	}
				1423
				1424	ArgValues.push_back(ArgVal);
				1425	}
				1426
				1427	// If the function takes variable number of arguments, make a frame index for
				1428	// the start of the first vararg value... for expansion of llvm.va_start.
				1429	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1430	if (isVarArg) {
				1431
				1432	int depth;
				1433	if (isELF32_ABI) {
				1434	VarArgsNumGPR = GPR_idx;
				1435	VarArgsNumFPR = FPR_idx;
				1436
				1437	// Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame
				1438	// pointer.
				1439	depth = -(Num_GPR_Regs * MVT::getSizeInBits(PtrVT)/8 +
				1440	Num_FPR_Regs * MVT::getSizeInBits(MVT::f64)/8 +
				1441	MVT::getSizeInBits(PtrVT)/8);
				1442
				1443	VarArgsStackOffset = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
				1444	ArgOffset);
				1445
				1446	}
				1447	else
				1448	depth = ArgOffset;
				1449
				1450	VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
				1451	depth);
				1452	SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
				1453
				1454	SmallVector<SDOperand, 8> MemOps;
				1455
				1456	// In ELF 32 ABI, the fixed integer arguments of a variadic function are
				1457	// stored to the VarArgsFrameIndex on the stack.
				1458	if (isELF32_ABI) {
				1459	for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) {
				1460	SDOperand Val = DAG.getRegister(GPR[GPR_idx], PtrVT);
				1461	SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0);
				1462	MemOps.push_back(Store);
				1463	// Increment the address by four for the next argument to store
				1464	SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
				1465	FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
				1466	}
				1467	}
				1468
				1469	// If this function is vararg, store any remaining integer argument regs
				1470	// to their spots on the stack so that they may be loaded by deferencing the
				1471	// result of va_next.
				1472	for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
				1473	unsigned VReg;
				1474	if (isPPC64)
Chris Lattner	1b98919	2007-12-31 04:13:23 +0000	[diff] [blame]	1475	VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1476	else
Chris Lattner	1b98919	2007-12-31 04:13:23 +0000	[diff] [blame]	1477	VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1478
Chris Lattner	1b98919	2007-12-31 04:13:23 +0000	[diff] [blame]	1479	RegInfo.addLiveIn(GPR[GPR_idx], VReg);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1480	SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
				1481	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1482	MemOps.push_back(Store);
				1483	// Increment the address by four for the next argument to store
				1484	SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
				1485	FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
				1486	}
				1487
				1488	// In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex
				1489	// on the stack.
				1490	if (isELF32_ABI) {
				1491	for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) {
				1492	SDOperand Val = DAG.getRegister(FPR[FPR_idx], MVT::f64);
				1493	SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0);
				1494	MemOps.push_back(Store);
				1495	// Increment the address by eight for the next argument to store
				1496	SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(MVT::f64)/8,
				1497	PtrVT);
				1498	FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
				1499	}
				1500
				1501	for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) {
				1502	unsigned VReg;
Chris Lattner	1b98919	2007-12-31 04:13:23 +0000	[diff] [blame]	1503	VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1504
Chris Lattner	1b98919	2007-12-31 04:13:23 +0000	[diff] [blame]	1505	RegInfo.addLiveIn(FPR[FPR_idx], VReg);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1506	SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::f64);
				1507	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1508	MemOps.push_back(Store);
				1509	// Increment the address by eight for the next argument to store
				1510	SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(MVT::f64)/8,
				1511	PtrVT);
				1512	FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
				1513	}
				1514	}
				1515
				1516	if (!MemOps.empty())
				1517	Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
				1518	}
				1519
				1520	ArgValues.push_back(Root);
				1521
				1522	// Return the new list of results.
				1523	std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
				1524	Op.Val->value_end());
				1525	return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
				1526	}
				1527
				1528	/// isCallCompatibleAddress - Return the immediate to use if the specified
				1529	/// 32-bit value is representable in the immediate field of a BxA instruction.
				1530	static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) {
				1531	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
				1532	if (!C) return 0;
				1533
				1534	int Addr = C->getValue();
				1535	if ((Addr & 3) != 0 \|\| // Low 2 bits are implicitly zero.
				1536	(Addr << 6 >> 6) != Addr)
				1537	return 0; // Top 6 bits have to be sext of immediate.
				1538
Evan Cheng	282c646	2007-10-22 19:46:19 +0000	[diff] [blame]	1539	return DAG.getConstant((int)C->getValue() >> 2,
				1540	DAG.getTargetLoweringInfo().getPointerTy()).Val;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1541	}
				1542
				1543
				1544	static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG,
				1545	const PPCSubtarget &Subtarget) {
				1546	SDOperand Chain = Op.getOperand(0);
				1547	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1548	SDOperand Callee = Op.getOperand(4);
				1549	unsigned NumOps = (Op.getNumOperands() - 5) / 2;
				1550
				1551	bool isMachoABI = Subtarget.isMachoABI();
				1552	bool isELF32_ABI = Subtarget.isELF32_ABI();
				1553
				1554	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1555	bool isPPC64 = PtrVT == MVT::i64;
				1556	unsigned PtrByteSize = isPPC64 ? 8 : 4;
				1557
				1558	// args_to_use will accumulate outgoing args for the PPCISD::CALL case in
				1559	// SelectExpr to use to put the arguments in the appropriate registers.
				1560	std::vector<SDOperand> args_to_use;
				1561
				1562	// Count how many bytes are to be pushed on the stack, including the linkage
				1563	// area, and parameter passing area. We start with 24/48 bytes, which is
				1564	// prereserved space for [SP][CR][LR][3 x unused].
				1565	unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
				1566
				1567	// Add up all the space actually used.
				1568	for (unsigned i = 0; i != NumOps; ++i) {
				1569	unsigned ArgSize =MVT::getSizeInBits(Op.getOperand(5+2*i).getValueType())/8;
				1570	ArgSize = std::max(ArgSize, PtrByteSize);
				1571	NumBytes += ArgSize;
				1572	}
				1573
				1574	// The prolog code of the callee may store up to 8 GPR argument registers to
				1575	// the stack, allowing va_start to index over them in memory if its varargs.
				1576	// Because we cannot tell if this is needed on the caller side, we have to
				1577	// conservatively assume that it is needed. As such, make sure we have at
				1578	// least enough stack space for the caller to store the 8 GPRs.
				1579	NumBytes = std::max(NumBytes,
				1580	PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
				1581
				1582	// Adjust the stack pointer for the new arguments...
				1583	// These operations are automatically eliminated by the prolog/epilog pass
				1584	Chain = DAG.getCALLSEQ_START(Chain,
				1585	DAG.getConstant(NumBytes, PtrVT));
				1586
				1587	// Set up a copy of the stack pointer for use loading and storing any
				1588	// arguments that may not fit in the registers available for argument
				1589	// passing.
				1590	SDOperand StackPtr;
				1591	if (isPPC64)
				1592	StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
				1593	else
				1594	StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
				1595
				1596	// Figure out which arguments are going to go in registers, and which in
				1597	// memory. Also, if this is a vararg function, floating point operations
				1598	// must be stored to our stack, and loaded into integer regs as well, if
				1599	// any integer regs are available for argument passing.
				1600	unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
				1601	unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
				1602
				1603	static const unsigned GPR_32[] = { // 32-bit registers.
				1604	PPC::R3, PPC::R4, PPC::R5, PPC::R6,
				1605	PPC::R7, PPC::R8, PPC::R9, PPC::R10,
				1606	};
				1607	static const unsigned GPR_64[] = { // 64-bit registers.
				1608	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
				1609	PPC::X7, PPC::X8, PPC::X9, PPC::X10,
				1610	};
				1611	static const unsigned *FPR = GetFPR(Subtarget);
				1612
				1613	static const unsigned VR[] = {
				1614	PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
				1615	PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
				1616	};
Owen Anderson	1636de9	2007-09-07 04:06:50 +0000	[diff] [blame]	1617	const unsigned NumGPRs = array_lengthof(GPR_32);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1618	const unsigned NumFPRs = isMachoABI ? 13 : 8;
Owen Anderson	1636de9	2007-09-07 04:06:50 +0000	[diff] [blame]	1619	const unsigned NumVRs = array_lengthof( VR);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1620
				1621	const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
				1622
				1623	std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
				1624	SmallVector<SDOperand, 8> MemOpChains;
				1625	for (unsigned i = 0; i != NumOps; ++i) {
				1626	bool inMem = false;
				1627	SDOperand Arg = Op.getOperand(5+2*i);
				1628	unsigned Flags = cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue();
				1629	unsigned AlignFlag = 1 << ISD::ParamFlags::OrigAlignmentOffs;
				1630	// See if next argument requires stack alignment in ELF
				1631	unsigned next = 5+2*(i+1)+1;
				1632	bool Expand = (Arg.getValueType() == MVT::f64) \|\| ((i + 1 < NumOps) &&
				1633	(cast<ConstantSDNode>(Op.getOperand(next))->getValue() & AlignFlag) &&
				1634	(!(Flags & AlignFlag)));
				1635
				1636	// PtrOff will be used to store the current argument to the stack if a
				1637	// register cannot be found for it.
				1638	SDOperand PtrOff;
				1639
				1640	// Stack align in ELF 32
				1641	if (isELF32_ABI && Expand)
				1642	PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize,
				1643	StackPtr.getValueType());
				1644	else
				1645	PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
				1646
				1647	PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
				1648
				1649	// On PPC64, promote integers to 64-bit values.
				1650	if (isPPC64 && Arg.getValueType() == MVT::i32) {
				1651	unsigned ExtOp = (Flags & 1) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
				1652
				1653	Arg = DAG.getNode(ExtOp, MVT::i64, Arg);
				1654	}
				1655
				1656	switch (Arg.getValueType()) {
				1657	default: assert(0 && "Unexpected ValueType for argument!");
				1658	case MVT::i32:
				1659	case MVT::i64:
				1660	// Double word align in ELF
				1661	if (isELF32_ABI && Expand) GPR_idx += (GPR_idx % 2);
				1662	if (GPR_idx != NumGPRs) {
				1663	RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
				1664	} else {
				1665	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1666	inMem = true;
				1667	}
				1668	if (inMem \|\| isMachoABI) {
				1669	// Stack align in ELF
				1670	if (isELF32_ABI && Expand)
				1671	ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
				1672
				1673	ArgOffset += PtrByteSize;
				1674	}
				1675	break;
				1676	case MVT::f32:
				1677	case MVT::f64:
				1678	if (isVarArg) {
				1679	// Float varargs need to be promoted to double.
				1680	if (Arg.getValueType() == MVT::f32)
				1681	Arg = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Arg);
				1682	}
				1683
				1684	if (FPR_idx != NumFPRs) {
				1685	RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
				1686
				1687	if (isVarArg) {
				1688	SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
				1689	MemOpChains.push_back(Store);
				1690
				1691	// Float varargs are always shadowed in available integer registers
				1692	if (GPR_idx != NumGPRs) {
				1693	SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff, NULL, 0);
				1694	MemOpChains.push_back(Load.getValue(1));
				1695	if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
				1696	Load));
				1697	}
				1698	if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
				1699	SDOperand ConstFour = DAG.getConstant(4, PtrOff.getValueType());
				1700	PtrOff = DAG.getNode(ISD::ADD, PtrVT, PtrOff, ConstFour);
				1701	SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff, NULL, 0);
				1702	MemOpChains.push_back(Load.getValue(1));
				1703	if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
				1704	Load));
				1705	}
				1706	} else {
				1707	// If we have any FPRs remaining, we may also have GPRs remaining.
				1708	// Args passed in FPRs consume either 1 (f32) or 2 (f64) available
				1709	// GPRs.
				1710	if (isMachoABI) {
				1711	if (GPR_idx != NumGPRs)
				1712	++GPR_idx;
				1713	if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
				1714	!isPPC64) // PPC64 has 64-bit GPR's obviously :)
				1715	++GPR_idx;
				1716	}
				1717	}
				1718	} else {
				1719	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1720	inMem = true;
				1721	}
				1722	if (inMem \|\| isMachoABI) {
				1723	// Stack align in ELF
				1724	if (isELF32_ABI && Expand)
				1725	ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
				1726	if (isPPC64)
				1727	ArgOffset += 8;
				1728	else
				1729	ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
				1730	}
				1731	break;
				1732	case MVT::v4f32:
				1733	case MVT::v4i32:
				1734	case MVT::v8i16:
				1735	case MVT::v16i8:
				1736	assert(!isVarArg && "Don't support passing vectors to varargs yet!");
				1737	assert(VR_idx != NumVRs &&
				1738	"Don't support passing more than 12 vector args yet!");
				1739	RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
				1740	break;
				1741	}
				1742	}
				1743	if (!MemOpChains.empty())
				1744	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1745	&MemOpChains[0], MemOpChains.size());
				1746
				1747	// Build a sequence of copy-to-reg nodes chained together with token chain
				1748	// and flag operands which copy the outgoing args into the appropriate regs.
				1749	SDOperand InFlag;
				1750	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1751	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1752	InFlag);
				1753	InFlag = Chain.getValue(1);
				1754	}
				1755
				1756	// With the ELF 32 ABI, set CR6 to true if this is a vararg call.
				1757	if (isVarArg && isELF32_ABI) {
				1758	SDOperand SetCR(DAG.getTargetNode(PPC::SETCR, MVT::i32), 0);
				1759	Chain = DAG.getCopyToReg(Chain, PPC::CR6, SetCR, InFlag);
				1760	InFlag = Chain.getValue(1);
				1761	}
				1762
				1763	std::vector<MVT::ValueType> NodeTys;
				1764	NodeTys.push_back(MVT::Other); // Returns a chain
				1765	NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
				1766
				1767	SmallVector<SDOperand, 8> Ops;
				1768	unsigned CallOpc = isMachoABI? PPCISD::CALL_Macho : PPCISD::CALL_ELF;
				1769
				1770	// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
				1771	// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
				1772	// node so that legalize doesn't hack it.
Nicolas Geoffray	455a2e0	2007-12-21 12:22:29 +0000	[diff] [blame]	1773	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
				1774	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());
				1775	else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1776	Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
				1777	else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
				1778	// If this is an absolute destination address, use the munged value.
				1779	Callee = SDOperand(Dest, 0);
				1780	else {
				1781	// Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
				1782	// to do the call, we can't use PPCISD::CALL.
				1783	SDOperand MTCTROps[] = {Chain, Callee, InFlag};
				1784	Chain = DAG.getNode(PPCISD::MTCTR, NodeTys, MTCTROps, 2+(InFlag.Val!=0));
				1785	InFlag = Chain.getValue(1);
				1786
				1787	// Copy the callee address into R12 on darwin.
				1788	if (isMachoABI) {
				1789	Chain = DAG.getCopyToReg(Chain, PPC::R12, Callee, InFlag);
				1790	InFlag = Chain.getValue(1);
				1791	}
				1792
				1793	NodeTys.clear();
				1794	NodeTys.push_back(MVT::Other);
				1795	NodeTys.push_back(MVT::Flag);
				1796	Ops.push_back(Chain);
				1797	CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF;
				1798	Callee.Val = 0;
				1799	}
				1800
				1801	// If this is a direct call, pass the chain and the callee.
				1802	if (Callee.Val) {
				1803	Ops.push_back(Chain);
				1804	Ops.push_back(Callee);
				1805	}
				1806
				1807	// Add argument registers to the end of the list so that they are known live
				1808	// into the call.
				1809	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1810	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1811	RegsToPass[i].second.getValueType()));
				1812
				1813	if (InFlag.Val)
				1814	Ops.push_back(InFlag);
				1815	Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
				1816	InFlag = Chain.getValue(1);
				1817
Bill Wendling	22f8deb	2007-11-13 00:44:25 +0000	[diff] [blame]	1818	Chain = DAG.getCALLSEQ_END(Chain,
				1819	DAG.getConstant(NumBytes, PtrVT),
				1820	DAG.getConstant(0, PtrVT),
				1821	InFlag);
				1822	if (Op.Val->getValueType(0) != MVT::Other)
				1823	InFlag = Chain.getValue(1);
				1824
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1825	SDOperand ResultVals[3];
				1826	unsigned NumResults = 0;
				1827	NodeTys.clear();
				1828
				1829	// If the call has results, copy the values out of the ret val registers.
				1830	switch (Op.Val->getValueType(0)) {
				1831	default: assert(0 && "Unexpected ret value!");
				1832	case MVT::Other: break;
				1833	case MVT::i32:
				1834	if (Op.Val->getValueType(1) == MVT::i32) {
				1835	Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32, InFlag).getValue(1);
				1836	ResultVals[0] = Chain.getValue(0);
				1837	Chain = DAG.getCopyFromReg(Chain, PPC::R4, MVT::i32,
				1838	Chain.getValue(2)).getValue(1);
				1839	ResultVals[1] = Chain.getValue(0);
				1840	NumResults = 2;
				1841	NodeTys.push_back(MVT::i32);
				1842	} else {
				1843	Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32, InFlag).getValue(1);
				1844	ResultVals[0] = Chain.getValue(0);
				1845	NumResults = 1;
				1846	}
				1847	NodeTys.push_back(MVT::i32);
				1848	break;
				1849	case MVT::i64:
				1850	Chain = DAG.getCopyFromReg(Chain, PPC::X3, MVT::i64, InFlag).getValue(1);
				1851	ResultVals[0] = Chain.getValue(0);
				1852	NumResults = 1;
				1853	NodeTys.push_back(MVT::i64);
				1854	break;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1855	case MVT::f64:
Dale Johannesen	ac77b27	2007-10-05 20:04:43 +0000	[diff] [blame]	1856	if (Op.Val->getValueType(1) == MVT::f64) {
				1857	Chain = DAG.getCopyFromReg(Chain, PPC::F1, MVT::f64, InFlag).getValue(1);
				1858	ResultVals[0] = Chain.getValue(0);
				1859	Chain = DAG.getCopyFromReg(Chain, PPC::F2, MVT::f64,
				1860	Chain.getValue(2)).getValue(1);
				1861	ResultVals[1] = Chain.getValue(0);
				1862	NumResults = 2;
				1863	NodeTys.push_back(MVT::f64);
				1864	NodeTys.push_back(MVT::f64);
				1865	break;
				1866	}
				1867	// else fall through
				1868	case MVT::f32:
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1869	Chain = DAG.getCopyFromReg(Chain, PPC::F1, Op.Val->getValueType(0),
				1870	InFlag).getValue(1);
				1871	ResultVals[0] = Chain.getValue(0);
				1872	NumResults = 1;
				1873	NodeTys.push_back(Op.Val->getValueType(0));
				1874	break;
				1875	case MVT::v4f32:
				1876	case MVT::v4i32:
				1877	case MVT::v8i16:
				1878	case MVT::v16i8:
				1879	Chain = DAG.getCopyFromReg(Chain, PPC::V2, Op.Val->getValueType(0),
				1880	InFlag).getValue(1);
				1881	ResultVals[0] = Chain.getValue(0);
				1882	NumResults = 1;
				1883	NodeTys.push_back(Op.Val->getValueType(0));
				1884	break;
				1885	}
				1886
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1887	NodeTys.push_back(MVT::Other);
				1888
				1889	// If the function returns void, just return the chain.
				1890	if (NumResults == 0)
				1891	return Chain;
				1892
				1893	// Otherwise, merge everything together with a MERGE_VALUES node.
				1894	ResultVals[NumResults++] = Chain;
				1895	SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
				1896	ResultVals, NumResults);
				1897	return Res.getValue(Op.ResNo);
				1898	}
				1899
				1900	static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
				1901	SmallVector<CCValAssign, 16> RVLocs;
				1902	unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
				1903	bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
				1904	CCState CCInfo(CC, isVarArg, TM, RVLocs);
				1905	CCInfo.AnalyzeReturn(Op.Val, RetCC_PPC);
				1906
				1907	// If this is the first return lowered for this function, add the regs to the
				1908	// liveout set for the function.
Chris Lattner	1b98919	2007-12-31 04:13:23 +0000	[diff] [blame]	1909	if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1910	for (unsigned i = 0; i != RVLocs.size(); ++i)
Chris Lattner	1b98919	2007-12-31 04:13:23 +0000	[diff] [blame]	1911	DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1912	}
				1913
				1914	SDOperand Chain = Op.getOperand(0);
				1915	SDOperand Flag;
				1916
				1917	// Copy the result values into the output registers.
				1918	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				1919	CCValAssign &VA = RVLocs[i];
				1920	assert(VA.isRegLoc() && "Can only return in registers!");
				1921	Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
				1922	Flag = Chain.getValue(1);
				1923	}
				1924
				1925	if (Flag.Val)
				1926	return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain, Flag);
				1927	else
				1928	return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain);
				1929	}
				1930
				1931	static SDOperand LowerSTACKRESTORE(SDOperand Op, SelectionDAG &DAG,
				1932	const PPCSubtarget &Subtarget) {
				1933	// When we pop the dynamic allocation we need to restore the SP link.
				1934
				1935	// Get the corect type for pointers.
				1936	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1937
				1938	// Construct the stack pointer operand.
				1939	bool IsPPC64 = Subtarget.isPPC64();
				1940	unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1;
				1941	SDOperand StackPtr = DAG.getRegister(SP, PtrVT);
				1942
				1943	// Get the operands for the STACKRESTORE.
				1944	SDOperand Chain = Op.getOperand(0);
				1945	SDOperand SaveSP = Op.getOperand(1);
				1946
				1947	// Load the old link SP.
				1948	SDOperand LoadLinkSP = DAG.getLoad(PtrVT, Chain, StackPtr, NULL, 0);
				1949
				1950	// Restore the stack pointer.
				1951	Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), SP, SaveSP);
				1952
				1953	// Store the old link SP.
				1954	return DAG.getStore(Chain, LoadLinkSP, StackPtr, NULL, 0);
				1955	}
				1956
				1957	static SDOperand LowerDYNAMIC_STACKALLOC(SDOperand Op, SelectionDAG &DAG,
				1958	const PPCSubtarget &Subtarget) {
				1959	MachineFunction &MF = DAG.getMachineFunction();
				1960	bool IsPPC64 = Subtarget.isPPC64();
				1961	bool isMachoABI = Subtarget.isMachoABI();
				1962
				1963	// Get current frame pointer save index. The users of this index will be
				1964	// primarily DYNALLOC instructions.
				1965	PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
				1966	int FPSI = FI->getFramePointerSaveIndex();
				1967
				1968	// If the frame pointer save index hasn't been defined yet.
				1969	if (!FPSI) {
				1970	// Find out what the fix offset of the frame pointer save area.
				1971	int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isMachoABI);
				1972
				1973	// Allocate the frame index for frame pointer save area.
				1974	FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
				1975	// Save the result.
				1976	FI->setFramePointerSaveIndex(FPSI);
				1977	}
				1978
				1979	// Get the inputs.
				1980	SDOperand Chain = Op.getOperand(0);
				1981	SDOperand Size = Op.getOperand(1);
				1982
				1983	// Get the corect type for pointers.
				1984	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1985	// Negate the size.
				1986	SDOperand NegSize = DAG.getNode(ISD::SUB, PtrVT,
				1987	DAG.getConstant(0, PtrVT), Size);
				1988	// Construct a node for the frame pointer save index.
				1989	SDOperand FPSIdx = DAG.getFrameIndex(FPSI, PtrVT);
				1990	// Build a DYNALLOC node.
				1991	SDOperand Ops[3] = { Chain, NegSize, FPSIdx };
				1992	SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
				1993	return DAG.getNode(PPCISD::DYNALLOC, VTs, Ops, 3);
				1994	}
				1995
				1996
				1997	/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
				1998	/// possible.
				1999	static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) {
				2000	// Not FP? Not a fsel.
				2001	if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) \|\|
				2002	!MVT::isFloatingPoint(Op.getOperand(2).getValueType()))
				2003	return SDOperand();
				2004
				2005	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
				2006
				2007	// Cannot handle SETEQ/SETNE.
				2008	if (CC == ISD::SETEQ \|\| CC == ISD::SETNE) return SDOperand();
				2009
				2010	MVT::ValueType ResVT = Op.getValueType();
				2011	MVT::ValueType CmpVT = Op.getOperand(0).getValueType();
				2012	SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
				2013	SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3);
				2014
				2015	// If the RHS of the comparison is a 0.0, we don't need to do the
				2016	// subtraction at all.
				2017	if (isFloatingPointZero(RHS))
				2018	switch (CC) {
				2019	default: break; // SETUO etc aren't handled by fsel.
				2020	case ISD::SETULT:
				2021	case ISD::SETOLT:
				2022	case ISD::SETLT:
				2023	std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
				2024	case ISD::SETUGE:
				2025	case ISD::SETOGE:
				2026	case ISD::SETGE:
				2027	if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
				2028	LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
				2029	return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV);
				2030	case ISD::SETUGT:
				2031	case ISD::SETOGT:
				2032	case ISD::SETGT:
				2033	std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
				2034	case ISD::SETULE:
				2035	case ISD::SETOLE:
				2036	case ISD::SETLE:
				2037	if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
				2038	LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
				2039	return DAG.getNode(PPCISD::FSEL, ResVT,
				2040	DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV);
				2041	}
				2042
Chris Lattner	a216bee	2007-10-15 20:14:52 +0000	[diff] [blame]	2043	SDOperand Cmp;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2044	switch (CC) {
				2045	default: break; // SETUO etc aren't handled by fsel.
				2046	case ISD::SETULT:
				2047	case ISD::SETOLT:
				2048	case ISD::SETLT:
				2049	Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
				2050	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
				2051	Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
				2052	return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
				2053	case ISD::SETUGE:
				2054	case ISD::SETOGE:
				2055	case ISD::SETGE:
				2056	Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
				2057	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
				2058	Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
				2059	return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
				2060	case ISD::SETUGT:
				2061	case ISD::SETOGT:
				2062	case ISD::SETGT:
				2063	Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
				2064	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
				2065	Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
				2066	return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
				2067	case ISD::SETULE:
				2068	case ISD::SETOLE:
				2069	case ISD::SETLE:
				2070	Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
				2071	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
				2072	Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
				2073	return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
				2074	}
				2075	return SDOperand();
				2076	}
				2077
Chris Lattner	2877109	2007-11-28 18:44:47 +0000	[diff] [blame]	2078	// FIXME: Split this code up when LegalizeDAGTypes lands.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2079	static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
				2080	assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));
				2081	SDOperand Src = Op.getOperand(0);
				2082	if (Src.getValueType() == MVT::f32)
				2083	Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);
				2084
				2085	SDOperand Tmp;
				2086	switch (Op.getValueType()) {
				2087	default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
				2088	case MVT::i32:
				2089	Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);
				2090	break;
				2091	case MVT::i64:
				2092	Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src);
				2093	break;
				2094	}
				2095
				2096	// Convert the FP value to an int value through memory.
Chris Lattner	a216bee	2007-10-15 20:14:52 +0000	[diff] [blame]	2097	SDOperand FIPtr = DAG.CreateStackTemporary(MVT::f64);
				2098
				2099	// Emit a store to the stack slot.
				2100	SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Tmp, FIPtr, NULL, 0);
				2101
				2102	// Result is a load from the stack slot. If loading 4 bytes, make sure to
				2103	// add in a bias.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2104	if (Op.getValueType() == MVT::i32)
Chris Lattner	a216bee	2007-10-15 20:14:52 +0000	[diff] [blame]	2105	FIPtr = DAG.getNode(ISD::ADD, FIPtr.getValueType(), FIPtr,
				2106	DAG.getConstant(4, FIPtr.getValueType()));
				2107	return DAG.getLoad(Op.getValueType(), Chain, FIPtr, NULL, 0);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2108	}
				2109
Dale Johannesen	3d8578b	2007-10-10 01:01:31 +0000	[diff] [blame]	2110	static SDOperand LowerFP_ROUND_INREG(SDOperand Op, SelectionDAG &DAG) {
				2111	assert(Op.getValueType() == MVT::ppcf128);
				2112	SDNode *Node = Op.Val;
				2113	assert(Node->getOperand(0).getValueType() == MVT::ppcf128);
Chris Lattner	c882caf	2007-10-19 04:08:28 +0000	[diff] [blame]	2114	assert(Node->getOperand(0).Val->getOpcode() == ISD::BUILD_PAIR);
Dale Johannesen	3d8578b	2007-10-10 01:01:31 +0000	[diff] [blame]	2115	SDOperand Lo = Node->getOperand(0).Val->getOperand(0);
				2116	SDOperand Hi = Node->getOperand(0).Val->getOperand(1);
				2117
				2118	// This sequence changes FPSCR to do round-to-zero, adds the two halves
				2119	// of the long double, and puts FPSCR back the way it was. We do not
				2120	// actually model FPSCR.
				2121	std::vector<MVT::ValueType> NodeTys;
				2122	SDOperand Ops[4], Result, MFFSreg, InFlag, FPreg;
				2123
				2124	NodeTys.push_back(MVT::f64); // Return register
				2125	NodeTys.push_back(MVT::Flag); // Returns a flag for later insns
				2126	Result = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0);
				2127	MFFSreg = Result.getValue(0);
				2128	InFlag = Result.getValue(1);
				2129
				2130	NodeTys.clear();
				2131	NodeTys.push_back(MVT::Flag); // Returns a flag
				2132	Ops[0] = DAG.getConstant(31, MVT::i32);
				2133	Ops[1] = InFlag;
				2134	Result = DAG.getNode(PPCISD::MTFSB1, NodeTys, Ops, 2);
				2135	InFlag = Result.getValue(0);
				2136
				2137	NodeTys.clear();
				2138	NodeTys.push_back(MVT::Flag); // Returns a flag
				2139	Ops[0] = DAG.getConstant(30, MVT::i32);
				2140	Ops[1] = InFlag;
				2141	Result = DAG.getNode(PPCISD::MTFSB0, NodeTys, Ops, 2);
				2142	InFlag = Result.getValue(0);
				2143
				2144	NodeTys.clear();
				2145	NodeTys.push_back(MVT::f64); // result of add
				2146	NodeTys.push_back(MVT::Flag); // Returns a flag
				2147	Ops[0] = Lo;
				2148	Ops[1] = Hi;
				2149	Ops[2] = InFlag;
				2150	Result = DAG.getNode(PPCISD::FADDRTZ, NodeTys, Ops, 3);
				2151	FPreg = Result.getValue(0);
				2152	InFlag = Result.getValue(1);
				2153
				2154	NodeTys.clear();
				2155	NodeTys.push_back(MVT::f64);
				2156	Ops[0] = DAG.getConstant(1, MVT::i32);
				2157	Ops[1] = MFFSreg;
				2158	Ops[2] = FPreg;
				2159	Ops[3] = InFlag;
				2160	Result = DAG.getNode(PPCISD::MTFSF, NodeTys, Ops, 4);
				2161	FPreg = Result.getValue(0);
				2162
				2163	// We know the low half is about to be thrown away, so just use something
				2164	// convenient.
				2165	return DAG.getNode(ISD::BUILD_PAIR, Lo.getValueType(), FPreg, FPreg);
				2166	}
				2167
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2168	static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
				2169	if (Op.getOperand(0).getValueType() == MVT::i64) {
				2170	SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
				2171	SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);
				2172	if (Op.getValueType() == MVT::f32)
Chris Lattner	5872a36	2008-01-17 07:00:52 +0000	[diff] [blame]	2173	FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2174	return FP;
				2175	}
				2176
				2177	assert(Op.getOperand(0).getValueType() == MVT::i32 &&
				2178	"Unhandled SINT_TO_FP type in custom expander!");
				2179	// Since we only generate this in 64-bit mode, we can take advantage of
				2180	// 64-bit registers. In particular, sign extend the input value into the
				2181	// 64-bit register with extsw, store the WHOLE 64-bit value into the stack
				2182	// then lfd it and fcfid it.
				2183	MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
				2184	int FrameIdx = FrameInfo->CreateStackObject(8, 8);
				2185	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				2186	SDOperand FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
				2187
				2188	SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32,
				2189	Op.getOperand(0));
				2190
				2191	// STD the extended value into the stack slot.
				2192	SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other,
				2193	DAG.getEntryNode(), Ext64, FIdx,
				2194	DAG.getSrcValue(NULL));
				2195	// Load the value as a double.
				2196	SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, NULL, 0);
				2197
				2198	// FCFID it and return it.
				2199	SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld);
				2200	if (Op.getValueType() == MVT::f32)
Chris Lattner	5872a36	2008-01-17 07:00:52 +0000	[diff] [blame]	2201	FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2202	return FP;
				2203	}
				2204
				2205	static SDOperand LowerSHL_PARTS(SDOperand Op, SelectionDAG &DAG) {
				2206	assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
				2207	Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
				2208
				2209	// Expand into a bunch of logical ops. Note that these ops
				2210	// depend on the PPC behavior for oversized shift amounts.
				2211	SDOperand Lo = Op.getOperand(0);
				2212	SDOperand Hi = Op.getOperand(1);
				2213	SDOperand Amt = Op.getOperand(2);
				2214
				2215	SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
				2216	DAG.getConstant(32, MVT::i32), Amt);
				2217	SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt);
				2218	SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1);
				2219	SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
				2220	SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
				2221	DAG.getConstant(-32U, MVT::i32));
				2222	SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5);
				2223	SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
				2224	SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt);
				2225	SDOperand OutOps[] = { OutLo, OutHi };
				2226	return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(MVT::i32, MVT::i32),
				2227	OutOps, 2);
				2228	}
				2229
				2230	static SDOperand LowerSRL_PARTS(SDOperand Op, SelectionDAG &DAG) {
				2231	assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
				2232	Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRL!");
				2233
				2234	// Otherwise, expand into a bunch of logical ops. Note that these ops
				2235	// depend on the PPC behavior for oversized shift amounts.
				2236	SDOperand Lo = Op.getOperand(0);
				2237	SDOperand Hi = Op.getOperand(1);
				2238	SDOperand Amt = Op.getOperand(2);
				2239
				2240	SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
				2241	DAG.getConstant(32, MVT::i32), Amt);
				2242	SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
				2243	SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
				2244	SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
				2245	SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
				2246	DAG.getConstant(-32U, MVT::i32));
				2247	SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5);
				2248	SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
				2249	SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt);
				2250	SDOperand OutOps[] = { OutLo, OutHi };
				2251	return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(MVT::i32, MVT::i32),
				2252	OutOps, 2);
				2253	}
				2254
				2255	static SDOperand LowerSRA_PARTS(SDOperand Op, SelectionDAG &DAG) {
				2256	assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
				2257	Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!");
				2258
				2259	// Otherwise, expand into a bunch of logical ops, followed by a select_cc.
				2260	SDOperand Lo = Op.getOperand(0);
				2261	SDOperand Hi = Op.getOperand(1);
				2262	SDOperand Amt = Op.getOperand(2);
				2263
				2264	SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
				2265	DAG.getConstant(32, MVT::i32), Amt);
				2266	SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
				2267	SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
				2268	SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
				2269	SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
				2270	DAG.getConstant(-32U, MVT::i32));
				2271	SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5);
				2272	SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt);
				2273	SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32),
				2274	Tmp4, Tmp6, ISD::SETLE);
				2275	SDOperand OutOps[] = { OutLo, OutHi };
				2276	return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(MVT::i32, MVT::i32),
				2277	OutOps, 2);
				2278	}
				2279
				2280	//===----------------------------------------------------------------------===//
				2281	// Vector related lowering.
				2282	//
				2283
				2284	// If this is a vector of constants or undefs, get the bits. A bit in
				2285	// UndefBits is set if the corresponding element of the vector is an
				2286	// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
				2287	// zero. Return true if this is not an array of constants, false if it is.
				2288	//
				2289	static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
				2290	uint64_t UndefBits[2]) {
				2291	// Start with zero'd results.
				2292	VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
				2293
				2294	unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
				2295	for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
				2296	SDOperand OpVal = BV->getOperand(i);
				2297
				2298	unsigned PartNo = i >= e/2; // In the upper 128 bits?
				2299	unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
				2300
				2301	uint64_t EltBits = 0;
				2302	if (OpVal.getOpcode() == ISD::UNDEF) {
				2303	uint64_t EltUndefBits = ~0U >> (32-EltBitSize);
				2304	UndefBits[PartNo] \|= EltUndefBits << (SlotNo*EltBitSize);
				2305	continue;
				2306	} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
				2307	EltBits = CN->getValue() & (~0U >> (32-EltBitSize));
				2308	} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
				2309	assert(CN->getValueType(0) == MVT::f32 &&
				2310	"Only one legal FP vector type!");
Dale Johannesen	df8a831	2007-08-31 04:03:46 +0000	[diff] [blame]	2311	EltBits = FloatToBits(CN->getValueAPF().convertToFloat());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2312	} else {
				2313	// Nonconstant element.
				2314	return true;
				2315	}
				2316
				2317	VectorBits[PartNo] \|= EltBits << (SlotNo*EltBitSize);
				2318	}
				2319
				2320	//printf("%llx %llx %llx %llx\n",
				2321	// VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
				2322	return false;
				2323	}
				2324
				2325	// If this is a splat (repetition) of a value across the whole vector, return
				2326	// the smallest size that splats it. For example, "0x01010101010101..." is a
				2327	// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
				2328	// SplatSize = 1 byte.
				2329	static bool isConstantSplat(const uint64_t Bits128[2],
				2330	const uint64_t Undef128[2],
				2331	unsigned &SplatBits, unsigned &SplatUndef,
				2332	unsigned &SplatSize) {
				2333
				2334	// Don't let undefs prevent splats from matching. See if the top 64-bits are
				2335	// the same as the lower 64-bits, ignoring undefs.
				2336	if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0]))
				2337	return false; // Can't be a splat if two pieces don't match.
				2338
				2339	uint64_t Bits64 = Bits128[0] \| Bits128[1];
				2340	uint64_t Undef64 = Undef128[0] & Undef128[1];
				2341
				2342	// Check that the top 32-bits are the same as the lower 32-bits, ignoring
				2343	// undefs.
				2344	if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64))
				2345	return false; // Can't be a splat if two pieces don't match.
				2346
				2347	uint32_t Bits32 = uint32_t(Bits64) \| uint32_t(Bits64 >> 32);
				2348	uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
				2349
				2350	// If the top 16-bits are different than the lower 16-bits, ignoring
				2351	// undefs, we have an i32 splat.
				2352	if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) {
				2353	SplatBits = Bits32;
				2354	SplatUndef = Undef32;
				2355	SplatSize = 4;
				2356	return true;
				2357	}
				2358
				2359	uint16_t Bits16 = uint16_t(Bits32) \| uint16_t(Bits32 >> 16);
				2360	uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
				2361
				2362	// If the top 8-bits are different than the lower 8-bits, ignoring
				2363	// undefs, we have an i16 splat.
				2364	if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) {
				2365	SplatBits = Bits16;
				2366	SplatUndef = Undef16;
				2367	SplatSize = 2;
				2368	return true;
				2369	}
				2370
				2371	// Otherwise, we have an 8-bit splat.
				2372	SplatBits = uint8_t(Bits16) \| uint8_t(Bits16 >> 8);
				2373	SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
				2374	SplatSize = 1;
				2375	return true;
				2376	}
				2377
				2378	/// BuildSplatI - Build a canonical splati of Val with an element size of
				2379	/// SplatSize. Cast the result to VT.
				2380	static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT::ValueType VT,
				2381	SelectionDAG &DAG) {
				2382	assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
				2383
				2384	static const MVT::ValueType VTys[] = { // canonical VT to use for each size.
				2385	MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
				2386	};
				2387
				2388	MVT::ValueType ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
				2389
				2390	// Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
				2391	if (Val == -1)
				2392	SplatSize = 1;
				2393
				2394	MVT::ValueType CanonicalVT = VTys[SplatSize-1];
				2395
				2396	// Build a canonical splat for this value.
				2397	SDOperand Elt = DAG.getConstant(Val, MVT::getVectorElementType(CanonicalVT));
				2398	SmallVector<SDOperand, 8> Ops;
				2399	Ops.assign(MVT::getVectorNumElements(CanonicalVT), Elt);
				2400	SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT,
				2401	&Ops[0], Ops.size());
				2402	return DAG.getNode(ISD::BIT_CONVERT, ReqVT, Res);
				2403	}
				2404
				2405	/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
				2406	/// specified intrinsic ID.
				2407	static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS,
				2408	SelectionDAG &DAG,
				2409	MVT::ValueType DestVT = MVT::Other) {
				2410	if (DestVT == MVT::Other) DestVT = LHS.getValueType();
				2411	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,
				2412	DAG.getConstant(IID, MVT::i32), LHS, RHS);
				2413	}
				2414
				2415	/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
				2416	/// specified intrinsic ID.
				2417	static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1,
				2418	SDOperand Op2, SelectionDAG &DAG,
				2419	MVT::ValueType DestVT = MVT::Other) {
				2420	if (DestVT == MVT::Other) DestVT = Op0.getValueType();
				2421	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,
				2422	DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
				2423	}
				2424
				2425
				2426	/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
				2427	/// amount. The result has the specified value type.
				2428	static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt,
				2429	MVT::ValueType VT, SelectionDAG &DAG) {
				2430	// Force LHS/RHS to be the right type.
				2431	LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS);
				2432	RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS);
				2433
				2434	SDOperand Ops[16];
				2435	for (unsigned i = 0; i != 16; ++i)
				2436	Ops[i] = DAG.getConstant(i+Amt, MVT::i32);
				2437	SDOperand T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS,
				2438	DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops,16));
				2439	return DAG.getNode(ISD::BIT_CONVERT, VT, T);
				2440	}
				2441
				2442	// If this is a case we can't handle, return null and let the default
				2443	// expansion code take care of it. If we CAN select this case, and if it
				2444	// selects to a single instruction, return Op. Otherwise, if we can codegen
				2445	// this case more efficiently than a constant pool load, lower it to the
				2446	// sequence of ops that should be used.
				2447	static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				2448	// If this is a vector of constants or undefs, get the bits. A bit in
				2449	// UndefBits is set if the corresponding element of the vector is an
				2450	// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
				2451	// zero.
				2452	uint64_t VectorBits[2];
				2453	uint64_t UndefBits[2];
				2454	if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits))
				2455	return SDOperand(); // Not a constant vector.
				2456
				2457	// If this is a splat (repetition) of a value across the whole vector, return
				2458	// the smallest size that splats it. For example, "0x01010101010101..." is a
				2459	// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
				2460	// SplatSize = 1 byte.
				2461	unsigned SplatBits, SplatUndef, SplatSize;
				2462	if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){
				2463	bool HasAnyUndefs = (UndefBits[0] \| UndefBits[1]) != 0;
				2464
				2465	// First, handle single instruction cases.
				2466
				2467	// All zeros?
				2468	if (SplatBits == 0) {
				2469	// Canonicalize all zero vectors to be v4i32.
				2470	if (Op.getValueType() != MVT::v4i32 \|\| HasAnyUndefs) {
				2471	SDOperand Z = DAG.getConstant(0, MVT::i32);
				2472	Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z);
				2473	Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z);
				2474	}
				2475	return Op;
				2476	}
				2477
				2478	// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
				2479	int32_t SextVal= int32_t(SplatBits << (32-8SplatSize)) >> (32-8SplatSize);
				2480	if (SextVal >= -16 && SextVal <= 15)
				2481	return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG);
				2482
				2483
				2484	// Two instruction sequences.
				2485
				2486	// If this value is in the range [-32,30] and is even, use:
				2487	// tmp = VSPLTI[bhw], result = add tmp, tmp
				2488	if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
				2489	Op = BuildSplatI(SextVal >> 1, SplatSize, Op.getValueType(), DAG);
				2490	return DAG.getNode(ISD::ADD, Op.getValueType(), Op, Op);
				2491	}
				2492
				2493	// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
				2494	// 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
				2495	// for fneg/fabs.
				2496	if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
				2497	// Make -1 and vspltisw -1:
				2498	SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG);
				2499
				2500	// Make the VSLW intrinsic, computing 0x8000_0000.
				2501	SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
				2502	OnesV, DAG);
				2503
				2504	// xor by OnesV to invert it.
				2505	Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV);
				2506	return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
				2507	}
				2508
				2509	// Check to see if this is a wide variety of vsplti*, binop self cases.
				2510	unsigned SplatBitSize = SplatSize*8;
				2511	static const signed char SplatCsts[] = {
				2512	-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
				2513	-8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
				2514	};
				2515
Owen Anderson	1636de9	2007-09-07 04:06:50 +0000	[diff] [blame]	2516	for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2517	// Indirect through the SplatCsts array so that we favor 'vsplti -1' for
				2518	// cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
				2519	int i = SplatCsts[idx];
				2520
				2521	// Figure out what shift amount will be used by altivec if shifted by i in
				2522	// this splat size.
				2523	unsigned TypeShiftAmt = i & (SplatBitSize-1);
				2524
				2525	// vsplti + shl self.
				2526	if (SextVal == (i << (int)TypeShiftAmt)) {
				2527	SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG);
				2528	static const unsigned IIDs[] = { // Intrinsic to use for each size.
				2529	Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
				2530	Intrinsic::ppc_altivec_vslw
				2531	};
				2532	Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG);
				2533	return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
				2534	}
				2535
				2536	// vsplti + srl self.
				2537	if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
				2538	SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG);
				2539	static const unsigned IIDs[] = { // Intrinsic to use for each size.
				2540	Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
				2541	Intrinsic::ppc_altivec_vsrw
				2542	};
				2543	Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG);
				2544	return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
				2545	}
				2546
				2547	// vsplti + sra self.
				2548	if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
				2549	SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG);
				2550	static const unsigned IIDs[] = { // Intrinsic to use for each size.
				2551	Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
				2552	Intrinsic::ppc_altivec_vsraw
				2553	};
				2554	Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG);
				2555	return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
				2556	}
				2557
				2558	// vsplti + rol self.
				2559	if (SextVal == (int)(((unsigned)i << TypeShiftAmt) \|
				2560	((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
				2561	SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG);
				2562	static const unsigned IIDs[] = { // Intrinsic to use for each size.
				2563	Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
				2564	Intrinsic::ppc_altivec_vrlw
				2565	};
				2566	Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG);
				2567	return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
				2568	}
				2569
				2570	// t = vsplti c, result = vsldoi t, t, 1
				2571	if (SextVal == ((i << 8) \| (i >> (TypeShiftAmt-8)))) {
				2572	SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
				2573	return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG);
				2574	}
				2575	// t = vsplti c, result = vsldoi t, t, 2
				2576	if (SextVal == ((i << 16) \| (i >> (TypeShiftAmt-16)))) {
				2577	SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
				2578	return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG);
				2579	}
				2580	// t = vsplti c, result = vsldoi t, t, 3
				2581	if (SextVal == ((i << 24) \| (i >> (TypeShiftAmt-24)))) {
				2582	SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
				2583	return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG);
				2584	}
				2585	}
				2586
				2587	// Three instruction sequences.
				2588
				2589	// Odd, in range [17,31]: (vsplti C)-(vsplti -16).
				2590	if (SextVal >= 0 && SextVal <= 31) {
				2591	SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG);
				2592	SDOperand RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG);
Dale Johannesen	6fdf931	2007-10-14 01:58:32 +0000	[diff] [blame]	2593	LHS = DAG.getNode(ISD::SUB, LHS.getValueType(), LHS, RHS);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2594	return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS);
				2595	}
				2596	// Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
				2597	if (SextVal >= -31 && SextVal <= 0) {
				2598	SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG);
				2599	SDOperand RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG);
Dale Johannesen	6fdf931	2007-10-14 01:58:32 +0000	[diff] [blame]	2600	LHS = DAG.getNode(ISD::ADD, LHS.getValueType(), LHS, RHS);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2601	return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS);
				2602	}
				2603	}
				2604
				2605	return SDOperand();
				2606	}
				2607
				2608	/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
				2609	/// the specified operations to build the shuffle.
				2610	static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS,
				2611	SDOperand RHS, SelectionDAG &DAG) {
				2612	unsigned OpNum = (PFEntry >> 26) & 0x0F;
				2613	unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
				2614	unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
				2615
				2616	enum {
				2617	OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
				2618	OP_VMRGHW,
				2619	OP_VMRGLW,
				2620	OP_VSPLTISW0,
				2621	OP_VSPLTISW1,
				2622	OP_VSPLTISW2,
				2623	OP_VSPLTISW3,
				2624	OP_VSLDOI4,
				2625	OP_VSLDOI8,
				2626	OP_VSLDOI12
				2627	};
				2628
				2629	if (OpNum == OP_COPY) {
				2630	if (LHSID == (19+2)9+3) return LHS;
				2631	assert(LHSID == ((49+5)9+6)*9+7 && "Illegal OP_COPY!");
				2632	return RHS;
				2633	}
				2634
				2635	SDOperand OpLHS, OpRHS;
				2636	OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG);
				2637	OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG);
				2638
				2639	unsigned ShufIdxs[16];
				2640	switch (OpNum) {
				2641	default: assert(0 && "Unknown i32 permute!");
				2642	case OP_VMRGHW:
				2643	ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
				2644	ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
				2645	ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
				2646	ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
				2647	break;
				2648	case OP_VMRGLW:
				2649	ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
				2650	ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
				2651	ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
				2652	ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
				2653	break;
				2654	case OP_VSPLTISW0:
				2655	for (unsigned i = 0; i != 16; ++i)
				2656	ShufIdxs[i] = (i&3)+0;
				2657	break;
				2658	case OP_VSPLTISW1:
				2659	for (unsigned i = 0; i != 16; ++i)
				2660	ShufIdxs[i] = (i&3)+4;
				2661	break;
				2662	case OP_VSPLTISW2:
				2663	for (unsigned i = 0; i != 16; ++i)
				2664	ShufIdxs[i] = (i&3)+8;
				2665	break;
				2666	case OP_VSPLTISW3:
				2667	for (unsigned i = 0; i != 16; ++i)
				2668	ShufIdxs[i] = (i&3)+12;
				2669	break;
				2670	case OP_VSLDOI4:
				2671	return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG);
				2672	case OP_VSLDOI8:
				2673	return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG);
				2674	case OP_VSLDOI12:
				2675	return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG);
				2676	}
				2677	SDOperand Ops[16];
				2678	for (unsigned i = 0; i != 16; ++i)
				2679	Ops[i] = DAG.getConstant(ShufIdxs[i], MVT::i32);
				2680
				2681	return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS,
				2682	DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16));
				2683	}
				2684
				2685	/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
				2686	/// is a shuffle we can handle in a single instruction, return it. Otherwise,
				2687	/// return the code it can be lowered into. Worst case, it can always be
				2688	/// lowered into a vperm.
				2689	static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
				2690	SDOperand V1 = Op.getOperand(0);
				2691	SDOperand V2 = Op.getOperand(1);
				2692	SDOperand PermMask = Op.getOperand(2);
				2693
				2694	// Cases that are handled by instructions that take permute immediates
				2695	// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
				2696	// selected by the instruction selector.
				2697	if (V2.getOpcode() == ISD::UNDEF) {
				2698	if (PPC::isSplatShuffleMask(PermMask.Val, 1) \|\|
				2699	PPC::isSplatShuffleMask(PermMask.Val, 2) \|\|
				2700	PPC::isSplatShuffleMask(PermMask.Val, 4) \|\|
				2701	PPC::isVPKUWUMShuffleMask(PermMask.Val, true) \|\|
				2702	PPC::isVPKUHUMShuffleMask(PermMask.Val, true) \|\|
				2703	PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 \|\|
				2704	PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) \|\|
				2705	PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) \|\|
				2706	PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) \|\|
				2707	PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) \|\|
				2708	PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) \|\|
				2709	PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) {
				2710	return Op;
				2711	}
				2712	}
				2713
				2714	// Altivec has a variety of "shuffle immediates" that take two vector inputs
				2715	// and produce a fixed permutation. If any of these match, do not lower to
				2716	// VPERM.
				2717	if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) \|\|
				2718	PPC::isVPKUHUMShuffleMask(PermMask.Val, false) \|\|
				2719	PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 \|\|
				2720	PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) \|\|
				2721	PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) \|\|
				2722	PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) \|\|
				2723	PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) \|\|
				2724	PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) \|\|
				2725	PPC::isVMRGHShuffleMask(PermMask.Val, 4, false))
				2726	return Op;
				2727
				2728	// Check to see if this is a shuffle of 4-byte values. If so, we can use our
				2729	// perfect shuffle table to emit an optimal matching sequence.
				2730	unsigned PFIndexes[4];
				2731	bool isFourElementShuffle = true;
				2732	for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
				2733	unsigned EltNo = 8; // Start out undef.
				2734	for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
				2735	if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF)
				2736	continue; // Undef, ignore it.
				2737
				2738	unsigned ByteSource =
				2739	cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue();
				2740	if ((ByteSource & 3) != j) {
				2741	isFourElementShuffle = false;
				2742	break;
				2743	}
				2744
				2745	if (EltNo == 8) {
				2746	EltNo = ByteSource/4;
				2747	} else if (EltNo != ByteSource/4) {
				2748	isFourElementShuffle = false;
				2749	break;
				2750	}
				2751	}
				2752	PFIndexes[i] = EltNo;
				2753	}
				2754
				2755	// If this shuffle can be expressed as a shuffle of 4-byte elements, use the
				2756	// perfect shuffle vector to determine if it is cost effective to do this as
				2757	// discrete instructions, or whether we should use a vperm.
				2758	if (isFourElementShuffle) {
				2759	// Compute the index in the perfect shuffle table.
				2760	unsigned PFTableIndex =
				2761	PFIndexes[0]999+PFIndexes[1]99+PFIndexes[2]9+PFIndexes[3];
				2762
				2763	unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
				2764	unsigned Cost = (PFEntry >> 30);
				2765
				2766	// Determining when to avoid vperm is tricky. Many things affect the cost
				2767	// of vperm, particularly how many times the perm mask needs to be computed.
				2768	// For example, if the perm mask can be hoisted out of a loop or is already
				2769	// used (perhaps because there are multiple permutes with the same shuffle
				2770	// mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
				2771	// the loop requires an extra register.
				2772	//
				2773	// As a compromise, we only emit discrete instructions if the shuffle can be
				2774	// generated in 3 or fewer operations. When we have loop information
				2775	// available, if this block is within a loop, we should avoid using vperm
				2776	// for 3-operation perms and use a constant pool load instead.
				2777	if (Cost < 3)
				2778	return GeneratePerfectShuffle(PFEntry, V1, V2, DAG);
				2779	}
				2780
				2781	// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
				2782	// vector that will get spilled to the constant pool.
				2783	if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
				2784
				2785	// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
				2786	// that it is in input element units, not in bytes. Convert now.
				2787	MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
				2788	unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
				2789
				2790	SmallVector<SDOperand, 16> ResultMask;
				2791	for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
				2792	unsigned SrcElt;
				2793	if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
				2794	SrcElt = 0;
				2795	else
				2796	SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
				2797
				2798	for (unsigned j = 0; j != BytesPerElement; ++j)
				2799	ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
				2800	MVT::i8));
				2801	}
				2802
				2803	SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				2804	&ResultMask[0], ResultMask.size());
				2805	return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
				2806	}
				2807
				2808	/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
				2809	/// altivec comparison. If it is, return true and fill in Opc/isDot with
				2810	/// information about the intrinsic.
				2811	static bool getAltivecCompareInfo(SDOperand Intrin, int &CompareOpc,
				2812	bool &isDot) {
				2813	unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getValue();
				2814	CompareOpc = -1;
				2815	isDot = false;
				2816	switch (IntrinsicID) {
				2817	default: return false;
				2818	// Comparison predicates.
				2819	case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
				2820	case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
				2821	case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
				2822	case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
				2823	case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
				2824	case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
				2825	case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
				2826	case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
				2827	case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
				2828	case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
				2829	case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
				2830	case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
				2831	case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
				2832
				2833	// Normal Comparisons.
				2834	case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
				2835	case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
				2836	case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
				2837	case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
				2838	case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
				2839	case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
				2840	case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
				2841	case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
				2842	case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
				2843	case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
				2844	case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
				2845	case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
				2846	case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
				2847	}
				2848	return true;
				2849	}
				2850
				2851	/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
				2852	/// lower, do it, otherwise return null.
				2853	static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
				2854	// If this is a lowered altivec predicate compare, CompareOpc is set to the
				2855	// opcode number of the comparison.
				2856	int CompareOpc;
				2857	bool isDot;
				2858	if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
				2859	return SDOperand(); // Don't custom lower most intrinsics.
				2860
				2861	// If this is a non-dot comparison, make the VCMP node and we are done.
				2862	if (!isDot) {
				2863	SDOperand Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(),
				2864	Op.getOperand(1), Op.getOperand(2),
				2865	DAG.getConstant(CompareOpc, MVT::i32));
				2866	return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp);
				2867	}
				2868
				2869	// Create the PPCISD altivec 'dot' comparison node.
				2870	SDOperand Ops[] = {
				2871	Op.getOperand(2), // LHS
				2872	Op.getOperand(3), // RHS
				2873	DAG.getConstant(CompareOpc, MVT::i32)
				2874	};
				2875	std::vector<MVT::ValueType> VTs;
				2876	VTs.push_back(Op.getOperand(2).getValueType());
				2877	VTs.push_back(MVT::Flag);
				2878	SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3);
				2879
				2880	// Now that we have the comparison, emit a copy from the CR to a GPR.
				2881	// This is flagged to the above dot comparison.
				2882	SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32,
				2883	DAG.getRegister(PPC::CR6, MVT::i32),
				2884	CompNode.getValue(1));
				2885
				2886	// Unpack the result based on how the target uses it.
				2887	unsigned BitNo; // Bit # of CR6.
				2888	bool InvertBit; // Invert result?
				2889	switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) {
				2890	default: // Can't happen, don't crash on invalid number though.
				2891	case 0: // Return the value of the EQ bit of CR6.
				2892	BitNo = 0; InvertBit = false;
				2893	break;
				2894	case 1: // Return the inverted value of the EQ bit of CR6.
				2895	BitNo = 0; InvertBit = true;
				2896	break;
				2897	case 2: // Return the value of the LT bit of CR6.
				2898	BitNo = 2; InvertBit = false;
				2899	break;
				2900	case 3: // Return the inverted value of the LT bit of CR6.
				2901	BitNo = 2; InvertBit = true;
				2902	break;
				2903	}
				2904
				2905	// Shift the bit into the low position.
				2906	Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags,
				2907	DAG.getConstant(8-(3-BitNo), MVT::i32));
				2908	// Isolate the bit.
				2909	Flags = DAG.getNode(ISD::AND, MVT::i32, Flags,
				2910	DAG.getConstant(1, MVT::i32));
				2911
				2912	// If we are supposed to, toggle the bit.
				2913	if (InvertBit)
				2914	Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags,
				2915	DAG.getConstant(1, MVT::i32));
				2916	return Flags;
				2917	}
				2918
				2919	static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				2920	// Create a stack slot that is 16-byte aligned.
				2921	MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
				2922	int FrameIdx = FrameInfo->CreateStackObject(16, 16);
				2923	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				2924	SDOperand FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
				2925
				2926	// Store the input value into Value#0 of the stack slot.
				2927	SDOperand Store = DAG.getStore(DAG.getEntryNode(),
				2928	Op.getOperand(0), FIdx, NULL, 0);
				2929	// Load it out.
				2930	return DAG.getLoad(Op.getValueType(), Store, FIdx, NULL, 0);
				2931	}
				2932
				2933	static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) {
				2934	if (Op.getValueType() == MVT::v4i32) {
				2935	SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
				2936
				2937	SDOperand Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG);
				2938	SDOperand Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG); // +16 as shift amt.
				2939
				2940	SDOperand RHSSwap = // = vrlw RHS, 16
				2941	BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG);
				2942
				2943	// Shrinkify inputs to v8i16.
				2944	LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS);
				2945	RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS);
				2946	RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap);
				2947
				2948	// Low parts multiplied together, generating 32-bit results (we ignore the
				2949	// top parts).
				2950	SDOperand LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
				2951	LHS, RHS, DAG, MVT::v4i32);
				2952
				2953	SDOperand HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
				2954	LHS, RHSSwap, Zero, DAG, MVT::v4i32);
				2955	// Shift the high parts up 16 bits.
				2956	HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG);
				2957	return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd);
				2958	} else if (Op.getValueType() == MVT::v8i16) {
				2959	SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
				2960
				2961	SDOperand Zero = BuildSplatI(0, 1, MVT::v8i16, DAG);
				2962
				2963	return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
				2964	LHS, RHS, Zero, DAG);
				2965	} else if (Op.getValueType() == MVT::v16i8) {
				2966	SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
				2967
				2968	// Multiply the even 8-bit parts, producing 16-bit sums.
				2969	SDOperand EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
				2970	LHS, RHS, DAG, MVT::v8i16);
				2971	EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, EvenParts);
				2972
				2973	// Multiply the odd 8-bit parts, producing 16-bit sums.
				2974	SDOperand OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
				2975	LHS, RHS, DAG, MVT::v8i16);
				2976	OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, OddParts);
				2977
				2978	// Merge the results together.
				2979	SDOperand Ops[16];
				2980	for (unsigned i = 0; i != 8; ++i) {
				2981	Ops[i2 ] = DAG.getConstant(2i+1, MVT::i8);
				2982	Ops[i2+1] = DAG.getConstant(2i+1+16, MVT::i8);
				2983	}
				2984	return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, EvenParts, OddParts,
				2985	DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16));
				2986	} else {
				2987	assert(0 && "Unknown mul to lower!");
				2988	abort();
				2989	}
				2990	}
				2991
				2992	/// LowerOperation - Provide custom lowering hooks for some operations.
				2993	///
				2994	SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
				2995	switch (Op.getOpcode()) {
				2996	default: assert(0 && "Wasn't expecting to be able to lower this!");
				2997	case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
				2998	case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
				2999	case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
				3000	case ISD::JumpTable: return LowerJumpTable(Op, DAG);
				3001	case ISD::SETCC: return LowerSETCC(Op, DAG);
				3002	case ISD::VASTART:
				3003	return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
				3004	VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
				3005
				3006	case ISD::VAARG:
				3007	return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
				3008	VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
				3009
				3010	case ISD::FORMAL_ARGUMENTS:
				3011	return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex,
				3012	VarArgsStackOffset, VarArgsNumGPR,
				3013	VarArgsNumFPR, PPCSubTarget);
				3014
				3015	case ISD::CALL: return LowerCALL(Op, DAG, PPCSubTarget);
				3016	case ISD::RET: return LowerRET(Op, DAG, getTargetMachine());
				3017	case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
				3018	case ISD::DYNAMIC_STACKALLOC:
				3019	return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
				3020
				3021	case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
				3022	case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
				3023	case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
Dale Johannesen	3d8578b	2007-10-10 01:01:31 +0000	[diff] [blame]	3024	case ISD::FP_ROUND_INREG: return LowerFP_ROUND_INREG(Op, DAG);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3025
				3026	// Lower 64-bit shifts.
				3027	case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
				3028	case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
				3029	case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
				3030
				3031	// Vector-related lowering.
				3032	case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
				3033	case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
				3034	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
				3035	case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
				3036	case ISD::MUL: return LowerMUL(Op, DAG);
				3037
Chris Lattner	f8b9337	2007-12-08 06:59:59 +0000	[diff] [blame]	3038	// Frame & Return address.
				3039	case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3040	case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
				3041	}
				3042	return SDOperand();
				3043	}
				3044
Chris Lattner	2877109	2007-11-28 18:44:47 +0000	[diff] [blame]	3045	SDNode PPCTargetLowering::ExpandOperationResult(SDNode N, SelectionDAG &DAG) {
				3046	switch (N->getOpcode()) {
				3047	default: assert(0 && "Wasn't expecting to be able to lower this!");
				3048	case ISD::FP_TO_SINT: return LowerFP_TO_SINT(SDOperand(N, 0), DAG).Val;
				3049	}
				3050	}
				3051
				3052
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3053	//===----------------------------------------------------------------------===//
				3054	// Other Lowering Code
				3055	//===----------------------------------------------------------------------===//
				3056
				3057	MachineBasicBlock *
				3058	PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
				3059	MachineBasicBlock *BB) {
				3060	const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
				3061	assert((MI->getOpcode() == PPC::SELECT_CC_I4 \|\|
				3062	MI->getOpcode() == PPC::SELECT_CC_I8 \|\|
				3063	MI->getOpcode() == PPC::SELECT_CC_F4 \|\|
				3064	MI->getOpcode() == PPC::SELECT_CC_F8 \|\|
				3065	MI->getOpcode() == PPC::SELECT_CC_VRRC) &&
				3066	"Unexpected instr type to insert");
				3067
				3068	// To "insert" a SELECT_CC instruction, we actually have to insert the diamond
				3069	// control-flow pattern. The incoming instruction knows the destination vreg
				3070	// to set, the condition code register to branch on, the true/false values to
				3071	// select between, and a branch opcode to use.
				3072	const BasicBlock *LLVM_BB = BB->getBasicBlock();
				3073	ilist<MachineBasicBlock>::iterator It = BB;
				3074	++It;
				3075
				3076	// thisMBB:
				3077	// ...
				3078	// TrueVal = ...
				3079	// cmpTY ccX, r1, r2
				3080	// bCC copy1MBB
				3081	// fallthrough --> copy0MBB
				3082	MachineBasicBlock *thisMBB = BB;
				3083	MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
				3084	MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
				3085	unsigned SelectPred = MI->getOperand(4).getImm();
				3086	BuildMI(BB, TII->get(PPC::BCC))
				3087	.addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
				3088	MachineFunction *F = BB->getParent();
				3089	F->getBasicBlockList().insert(It, copy0MBB);
				3090	F->getBasicBlockList().insert(It, sinkMBB);
				3091	// Update machine-CFG edges by first adding all successors of the current
				3092	// block to the new block which will contain the Phi node for the select.
				3093	for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
				3094	e = BB->succ_end(); i != e; ++i)
				3095	sinkMBB->addSuccessor(*i);
				3096	// Next, remove all successors of the current block, and add the true
				3097	// and fallthrough blocks as its successors.
				3098	while(!BB->succ_empty())
				3099	BB->removeSuccessor(BB->succ_begin());
				3100	BB->addSuccessor(copy0MBB);
				3101	BB->addSuccessor(sinkMBB);
				3102
				3103	// copy0MBB:
				3104	// %FalseValue = ...
				3105	// # fallthrough to sinkMBB
				3106	BB = copy0MBB;
				3107
				3108	// Update machine-CFG edges
				3109	BB->addSuccessor(sinkMBB);
				3110
				3111	// sinkMBB:
				3112	// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
				3113	// ...
				3114	BB = sinkMBB;
				3115	BuildMI(BB, TII->get(PPC::PHI), MI->getOperand(0).getReg())
				3116	.addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
				3117	.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
				3118
				3119	delete MI; // The pseudo instruction is gone now.
				3120	return BB;
				3121	}
				3122
				3123	//===----------------------------------------------------------------------===//
				3124	// Target Optimization Hooks
				3125	//===----------------------------------------------------------------------===//
				3126
				3127	SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,
				3128	DAGCombinerInfo &DCI) const {
				3129	TargetMachine &TM = getTargetMachine();
				3130	SelectionDAG &DAG = DCI.DAG;
				3131	switch (N->getOpcode()) {
				3132	default: break;
				3133	case PPCISD::SHL:
				3134	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
				3135	if (C->getValue() == 0) // 0 << V -> 0.
				3136	return N->getOperand(0);
				3137	}
				3138	break;
				3139	case PPCISD::SRL:
				3140	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
				3141	if (C->getValue() == 0) // 0 >>u V -> 0.
				3142	return N->getOperand(0);
				3143	}
				3144	break;
				3145	case PPCISD::SRA:
				3146	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
				3147	if (C->getValue() == 0 \|\| // 0 >>s V -> 0.
				3148	C->isAllOnesValue()) // -1 >>s V -> -1.
				3149	return N->getOperand(0);
				3150	}
				3151	break;
				3152
				3153	case ISD::SINT_TO_FP:
				3154	if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
				3155	if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
				3156	// Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
				3157	// We allow the src/dst to be either f32/f64, but the intermediate
				3158	// type must be i64.
Dale Johannesen	cbc0351	2007-10-23 23:20:14 +0000	[diff] [blame]	3159	if (N->getOperand(0).getValueType() == MVT::i64 &&
				3160	N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3161	SDOperand Val = N->getOperand(0).getOperand(0);
				3162	if (Val.getValueType() == MVT::f32) {
				3163	Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
				3164	DCI.AddToWorklist(Val.Val);
				3165	}
				3166
				3167	Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);
				3168	DCI.AddToWorklist(Val.Val);
				3169	Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);
				3170	DCI.AddToWorklist(Val.Val);
				3171	if (N->getValueType(0) == MVT::f32) {
Chris Lattner	5872a36	2008-01-17 07:00:52 +0000	[diff] [blame]	3172	Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val,
				3173	DAG.getIntPtrConstant(0));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3174	DCI.AddToWorklist(Val.Val);
				3175	}
				3176	return Val;
				3177	} else if (N->getOperand(0).getValueType() == MVT::i32) {
				3178	// If the intermediate type is i32, we can avoid the load/store here
				3179	// too.
				3180	}
				3181	}
				3182	}
				3183	break;
				3184	case ISD::STORE:
				3185	// Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
				3186	if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
				3187	N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
Dale Johannesen	cbc0351	2007-10-23 23:20:14 +0000	[diff] [blame]	3188	N->getOperand(1).getValueType() == MVT::i32 &&
				3189	N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3190	SDOperand Val = N->getOperand(1).getOperand(0);
				3191	if (Val.getValueType() == MVT::f32) {
				3192	Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
				3193	DCI.AddToWorklist(Val.Val);
				3194	}
				3195	Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val);
				3196	DCI.AddToWorklist(Val.Val);
				3197
				3198	Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val,
				3199	N->getOperand(2), N->getOperand(3));
				3200	DCI.AddToWorklist(Val.Val);
				3201	return Val;
				3202	}
				3203
				3204	// Turn STORE (BSWAP) -> sthbrx/stwbrx.
				3205	if (N->getOperand(1).getOpcode() == ISD::BSWAP &&
				3206	N->getOperand(1).Val->hasOneUse() &&
				3207	(N->getOperand(1).getValueType() == MVT::i32 \|\|
				3208	N->getOperand(1).getValueType() == MVT::i16)) {
				3209	SDOperand BSwapOp = N->getOperand(1).getOperand(0);
				3210	// Do an any-extend to 32-bits if this is a half-word input.
				3211	if (BSwapOp.getValueType() == MVT::i16)
				3212	BSwapOp = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, BSwapOp);
				3213
				3214	return DAG.getNode(PPCISD::STBRX, MVT::Other, N->getOperand(0), BSwapOp,
				3215	N->getOperand(2), N->getOperand(3),
				3216	DAG.getValueType(N->getOperand(1).getValueType()));
				3217	}
				3218	break;
				3219	case ISD::BSWAP:
				3220	// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
				3221	if (ISD::isNON_EXTLoad(N->getOperand(0).Val) &&
				3222	N->getOperand(0).hasOneUse() &&
				3223	(N->getValueType(0) == MVT::i32 \|\| N->getValueType(0) == MVT::i16)) {
				3224	SDOperand Load = N->getOperand(0);
				3225	LoadSDNode *LD = cast<LoadSDNode>(Load);
				3226	// Create the byte-swapping load.
				3227	std::vector<MVT::ValueType> VTs;
				3228	VTs.push_back(MVT::i32);
				3229	VTs.push_back(MVT::Other);
				3230	SDOperand SV = DAG.getSrcValue(LD->getSrcValue(), LD->getSrcValueOffset());
				3231	SDOperand Ops[] = {
				3232	LD->getChain(), // Chain
				3233	LD->getBasePtr(), // Ptr
				3234	SV, // SrcValue
				3235	DAG.getValueType(N->getValueType(0)) // VT
				3236	};
				3237	SDOperand BSLoad = DAG.getNode(PPCISD::LBRX, VTs, Ops, 4);
				3238
				3239	// If this is an i16 load, insert the truncate.
				3240	SDOperand ResVal = BSLoad;
				3241	if (N->getValueType(0) == MVT::i16)
				3242	ResVal = DAG.getNode(ISD::TRUNCATE, MVT::i16, BSLoad);
				3243
				3244	// First, combine the bswap away. This makes the value produced by the
				3245	// load dead.
				3246	DCI.CombineTo(N, ResVal);
				3247
				3248	// Next, combine the load away, we give it a bogus result value but a real
				3249	// chain result. The result value is dead because the bswap is dead.
				3250	DCI.CombineTo(Load.Val, ResVal, BSLoad.getValue(1));
				3251
				3252	// Return N so it doesn't get rechecked!
				3253	return SDOperand(N, 0);
				3254	}
				3255
				3256	break;
				3257	case PPCISD::VCMP: {
				3258	// If a VCMPo node already exists with exactly the same operands as this
				3259	// node, use its result instead of this node (VCMPo computes both a CR6 and
				3260	// a normal output).
				3261	//
				3262	if (!N->getOperand(0).hasOneUse() &&
				3263	!N->getOperand(1).hasOneUse() &&
				3264	!N->getOperand(2).hasOneUse()) {
				3265
				3266	// Scan all of the users of the LHS, looking for VCMPo's that match.
				3267	SDNode *VCMPoNode = 0;
				3268
				3269	SDNode *LHSN = N->getOperand(0).Val;
				3270	for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
				3271	UI != E; ++UI)
				3272	if ((*UI)->getOpcode() == PPCISD::VCMPo &&
				3273	(*UI)->getOperand(1) == N->getOperand(1) &&
				3274	(*UI)->getOperand(2) == N->getOperand(2) &&
				3275	(*UI)->getOperand(0) == N->getOperand(0)) {
				3276	VCMPoNode = *UI;
				3277	break;
				3278	}
				3279
				3280	// If there is no VCMPo node, or if the flag value has a single use, don't
				3281	// transform this.
				3282	if (!VCMPoNode \|\| VCMPoNode->hasNUsesOfValue(0, 1))
				3283	break;
				3284
				3285	// Look at the (necessarily single) use of the flag value. If it has a
				3286	// chain, this transformation is more complex. Note that multiple things
				3287	// could use the value result, which we should ignore.
				3288	SDNode *FlagUser = 0;
				3289	for (SDNode::use_iterator UI = VCMPoNode->use_begin();
				3290	FlagUser == 0; ++UI) {
				3291	assert(UI != VCMPoNode->use_end() && "Didn't find user!");
				3292	SDNode User = UI;
				3293	for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
				3294	if (User->getOperand(i) == SDOperand(VCMPoNode, 1)) {
				3295	FlagUser = User;
				3296	break;
				3297	}
				3298	}
				3299	}
				3300
				3301	// If the user is a MFCR instruction, we know this is safe. Otherwise we
				3302	// give up for right now.
				3303	if (FlagUser->getOpcode() == PPCISD::MFCR)
				3304	return SDOperand(VCMPoNode, 0);
				3305	}
				3306	break;
				3307	}
				3308	case ISD::BR_CC: {
				3309	// If this is a branch on an altivec predicate comparison, lower this so
				3310	// that we don't have to do a MFCR: instead, branch directly on CR6. This
				3311	// lowering is done pre-legalize, because the legalizer lowers the predicate
				3312	// compare down to code that is difficult to reassemble.
				3313	ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
				3314	SDOperand LHS = N->getOperand(2), RHS = N->getOperand(3);
				3315	int CompareOpc;
				3316	bool isDot;
				3317
				3318	if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
				3319	isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ \|\| CC == ISD::SETNE) &&
				3320	getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
				3321	assert(isDot && "Can't compare against a vector result!");
				3322
				3323	// If this is a comparison against something other than 0/1, then we know
				3324	// that the condition is never/always true.
				3325	unsigned Val = cast<ConstantSDNode>(RHS)->getValue();
				3326	if (Val != 0 && Val != 1) {
				3327	if (CC == ISD::SETEQ) // Cond never true, remove branch.
				3328	return N->getOperand(0);
				3329	// Always !=, turn it into an unconditional branch.
				3330	return DAG.getNode(ISD::BR, MVT::Other,
				3331	N->getOperand(0), N->getOperand(4));
				3332	}
				3333
				3334	bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
				3335
				3336	// Create the PPCISD altivec 'dot' comparison node.
				3337	std::vector<MVT::ValueType> VTs;
				3338	SDOperand Ops[] = {
				3339	LHS.getOperand(2), // LHS of compare
				3340	LHS.getOperand(3), // RHS of compare
				3341	DAG.getConstant(CompareOpc, MVT::i32)
				3342	};
				3343	VTs.push_back(LHS.getOperand(2).getValueType());
				3344	VTs.push_back(MVT::Flag);
				3345	SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3);
				3346
				3347	// Unpack the result based on how the target uses it.
				3348	PPC::Predicate CompOpc;
				3349	switch (cast<ConstantSDNode>(LHS.getOperand(1))->getValue()) {
				3350	default: // Can't happen, don't crash on invalid number though.
				3351	case 0: // Branch on the value of the EQ bit of CR6.
				3352	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
				3353	break;
				3354	case 1: // Branch on the inverted value of the EQ bit of CR6.
				3355	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
				3356	break;
				3357	case 2: // Branch on the value of the LT bit of CR6.
				3358	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
				3359	break;
				3360	case 3: // Branch on the inverted value of the LT bit of CR6.
				3361	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
				3362	break;
				3363	}
				3364
				3365	return DAG.getNode(PPCISD::COND_BRANCH, MVT::Other, N->getOperand(0),
				3366	DAG.getConstant(CompOpc, MVT::i32),
				3367	DAG.getRegister(PPC::CR6, MVT::i32),
				3368	N->getOperand(4), CompNode.getValue(1));
				3369	}
				3370	break;
				3371	}
				3372	}
				3373
				3374	return SDOperand();
				3375	}
				3376
				3377	//===----------------------------------------------------------------------===//
				3378	// Inline Assembly Support
				3379	//===----------------------------------------------------------------------===//
				3380
				3381	void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
				3382	uint64_t Mask,
				3383	uint64_t &KnownZero,
				3384	uint64_t &KnownOne,
				3385	const SelectionDAG &DAG,
				3386	unsigned Depth) const {
				3387	KnownZero = 0;
				3388	KnownOne = 0;
				3389	switch (Op.getOpcode()) {
				3390	default: break;
				3391	case PPCISD::LBRX: {
				3392	// lhbrx is known to have the top bits cleared out.
				3393	if (cast<VTSDNode>(Op.getOperand(3))->getVT() == MVT::i16)
				3394	KnownZero = 0xFFFF0000;
				3395	break;
				3396	}
				3397	case ISD::INTRINSIC_WO_CHAIN: {
				3398	switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) {
				3399	default: break;
				3400	case Intrinsic::ppc_altivec_vcmpbfp_p:
				3401	case Intrinsic::ppc_altivec_vcmpeqfp_p:
				3402	case Intrinsic::ppc_altivec_vcmpequb_p:
				3403	case Intrinsic::ppc_altivec_vcmpequh_p:
				3404	case Intrinsic::ppc_altivec_vcmpequw_p:
				3405	case Intrinsic::ppc_altivec_vcmpgefp_p:
				3406	case Intrinsic::ppc_altivec_vcmpgtfp_p:
				3407	case Intrinsic::ppc_altivec_vcmpgtsb_p:
				3408	case Intrinsic::ppc_altivec_vcmpgtsh_p:
				3409	case Intrinsic::ppc_altivec_vcmpgtsw_p:
				3410	case Intrinsic::ppc_altivec_vcmpgtub_p:
				3411	case Intrinsic::ppc_altivec_vcmpgtuh_p:
				3412	case Intrinsic::ppc_altivec_vcmpgtuw_p:
				3413	KnownZero = ~1U; // All bits but the low one are known to be zero.
				3414	break;
				3415	}
				3416	}
				3417	}
				3418	}
				3419
				3420
				3421	/// getConstraintType - Given a constraint, return the type of
				3422	/// constraint it is for this target.
				3423	PPCTargetLowering::ConstraintType
				3424	PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
				3425	if (Constraint.size() == 1) {
				3426	switch (Constraint[0]) {
				3427	default: break;
				3428	case 'b':
				3429	case 'r':
				3430	case 'f':
				3431	case 'v':
				3432	case 'y':
				3433	return C_RegisterClass;
				3434	}
				3435	}
				3436	return TargetLowering::getConstraintType(Constraint);
				3437	}
				3438
				3439	std::pair<unsigned, const TargetRegisterClass*>
				3440	PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
				3441	MVT::ValueType VT) const {
				3442	if (Constraint.size() == 1) {
				3443	// GCC RS6000 Constraint Letters
				3444	switch (Constraint[0]) {
				3445	case 'b': // R1-R31
				3446	case 'r': // R0-R31
				3447	if (VT == MVT::i64 && PPCSubTarget.isPPC64())
				3448	return std::make_pair(0U, PPC::G8RCRegisterClass);
				3449	return std::make_pair(0U, PPC::GPRCRegisterClass);
				3450	case 'f':
				3451	if (VT == MVT::f32)
				3452	return std::make_pair(0U, PPC::F4RCRegisterClass);
				3453	else if (VT == MVT::f64)
				3454	return std::make_pair(0U, PPC::F8RCRegisterClass);
				3455	break;
				3456	case 'v':
				3457	return std::make_pair(0U, PPC::VRRCRegisterClass);
				3458	case 'y': // crrc
				3459	return std::make_pair(0U, PPC::CRRCRegisterClass);
				3460	}
				3461	}
				3462
				3463	return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
				3464	}
				3465
				3466
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	3467	/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
				3468	/// vector. If it is invalid, don't add anything to Ops.
				3469	void PPCTargetLowering::LowerAsmOperandForConstraint(SDOperand Op, char Letter,
				3470	std::vector<SDOperand>&Ops,
				3471	SelectionDAG &DAG) {
				3472	SDOperand Result(0,0);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3473	switch (Letter) {
				3474	default: break;
				3475	case 'I':
				3476	case 'J':
				3477	case 'K':
				3478	case 'L':
				3479	case 'M':
				3480	case 'N':
				3481	case 'O':
				3482	case 'P': {
				3483	ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	3484	if (!CST) return; // Must be an immediate to match.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3485	unsigned Value = CST->getValue();
				3486	switch (Letter) {
				3487	default: assert(0 && "Unknown constraint letter!");
				3488	case 'I': // "I" is a signed 16-bit constant.
				3489	if ((short)Value == (int)Value)
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	3490	Result = DAG.getTargetConstant(Value, Op.getValueType());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3491	break;
				3492	case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
				3493	case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
				3494	if ((short)Value == 0)
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	3495	Result = DAG.getTargetConstant(Value, Op.getValueType());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3496	break;
				3497	case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
				3498	if ((Value >> 16) == 0)
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	3499	Result = DAG.getTargetConstant(Value, Op.getValueType());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3500	break;
				3501	case 'M': // "M" is a constant that is greater than 31.
				3502	if (Value > 31)
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	3503	Result = DAG.getTargetConstant(Value, Op.getValueType());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3504	break;
				3505	case 'N': // "N" is a positive constant that is an exact power of two.
				3506	if ((int)Value > 0 && isPowerOf2_32(Value))
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	3507	Result = DAG.getTargetConstant(Value, Op.getValueType());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3508	break;
				3509	case 'O': // "O" is the constant zero.
				3510	if (Value == 0)
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	3511	Result = DAG.getTargetConstant(Value, Op.getValueType());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3512	break;
				3513	case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
				3514	if ((short)-Value == (int)-Value)
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	3515	Result = DAG.getTargetConstant(Value, Op.getValueType());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3516	break;
				3517	}
				3518	break;
				3519	}
				3520	}
				3521
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	3522	if (Result.Val) {
				3523	Ops.push_back(Result);
				3524	return;
				3525	}
				3526
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3527	// Handle standard constraint letters.
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	3528	TargetLowering::LowerAsmOperandForConstraint(Op, Letter, Ops, DAG);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3529	}
				3530
				3531	// isLegalAddressingMode - Return true if the addressing mode represented
				3532	// by AM is legal for this target, for a load/store of the specified type.
				3533	bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
				3534	const Type *Ty) const {
				3535	// FIXME: PPC does not allow r+i addressing modes for vectors!
				3536
				3537	// PPC allows a sign-extended 16-bit immediate field.
				3538	if (AM.BaseOffs <= -(1LL << 16) \|\| AM.BaseOffs >= (1LL << 16)-1)
				3539	return false;
				3540
				3541	// No global is ever allowed as a base.
				3542	if (AM.BaseGV)
				3543	return false;
				3544
				3545	// PPC only support r+r,
				3546	switch (AM.Scale) {
				3547	case 0: // "r+i" or just "i", depending on HasBaseReg.
				3548	break;
				3549	case 1:
				3550	if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
				3551	return false;
				3552	// Otherwise we have r+r or r+i.
				3553	break;
				3554	case 2:
				3555	if (AM.HasBaseReg \|\| AM.BaseOffs) // 2r+r or 2r+i is not allowed.
				3556	return false;
				3557	// Allow 2*r as r+r.
				3558	break;
				3559	default:
				3560	// No other scales are supported.
				3561	return false;
				3562	}
				3563
				3564	return true;
				3565	}
				3566
				3567	/// isLegalAddressImmediate - Return true if the integer value can be used
				3568	/// as the offset of the target addressing mode for load / store of the
				3569	/// given type.
				3570	bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{
				3571	// PPC allows a sign-extended 16-bit immediate field.
				3572	return (V > -(1 << 16) && V < (1 << 16)-1);
				3573	}
				3574
				3575	bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
				3576	return false;
				3577	}
				3578
Chris Lattner	f8b9337	2007-12-08 06:59:59 +0000	[diff] [blame]	3579	SDOperand PPCTargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) {
				3580	// Depths > 0 not supported yet!
				3581	if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
				3582	return SDOperand();
				3583
				3584	MachineFunction &MF = DAG.getMachineFunction();
				3585	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
				3586	int RAIdx = FuncInfo->getReturnAddrSaveIndex();
				3587	if (RAIdx == 0) {
				3588	bool isPPC64 = PPCSubTarget.isPPC64();
				3589	int Offset =
				3590	PPCFrameInfo::getReturnSaveOffset(isPPC64, PPCSubTarget.isMachoABI());
				3591
				3592	// Set up a frame object for the return address.
				3593	RAIdx = MF.getFrameInfo()->CreateFixedObject(isPPC64 ? 8 : 4, Offset);
				3594
				3595	// Remember it for next time.
				3596	FuncInfo->setReturnAddrSaveIndex(RAIdx);
				3597
				3598	// Make sure the function really does not optimize away the store of the RA
				3599	// to the stack.
				3600	FuncInfo->setLRStoreRequired();
				3601	}
				3602
				3603	// Just load the return address off the stack.
				3604	SDOperand RetAddrFI = DAG.getFrameIndex(RAIdx, getPointerTy());
				3605	return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0);
				3606	}
				3607
				3608	SDOperand PPCTargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3609	// Depths > 0 not supported yet!
				3610	if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
				3611	return SDOperand();
				3612
				3613	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				3614	bool isPPC64 = PtrVT == MVT::i64;
				3615
				3616	MachineFunction &MF = DAG.getMachineFunction();
				3617	MachineFrameInfo *MFI = MF.getFrameInfo();
				3618	bool is31 = (NoFramePointerElim \|\| MFI->hasVarSizedObjects())
				3619	&& MFI->getStackSize();
				3620
				3621	if (isPPC64)
				3622	return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::X31 : PPC::X1,
Bill Wendling	5e28ab1	2007-08-30 00:59:19 +0000	[diff] [blame]	3623	MVT::i64);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3624	else
				3625	return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::R31 : PPC::R1,
				3626	MVT::i32);
				3627	}