Blame - lib/Target/X86/X86ISelLowering.cpp - fp2-dev/platform/external/llvm

blob: e79ee90eae8965b40181b5848355c499897381ed [file] [log] [blame]

Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1	//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by Chris Lattner and is distributed under
				6	// the University of Illinois Open Source License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file defines the interfaces that X86 uses to lower LLVM code into a
				11	// selection DAG.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "X86.h"
				16	#include "X86InstrBuilder.h"
				17	#include "X86ISelLowering.h"
				18	#include "X86MachineFunctionInfo.h"
				19	#include "X86TargetMachine.h"
				20	#include "llvm/CallingConv.h"
				21	#include "llvm/Constants.h"
				22	#include "llvm/DerivedTypes.h"
				23	#include "llvm/GlobalVariable.h"
				24	#include "llvm/Function.h"
				25	#include "llvm/Intrinsics.h"
				26	#include "llvm/ADT/VectorExtras.h"
				27	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
				28	#include "llvm/CodeGen/CallingConvLower.h"
				29	#include "llvm/CodeGen/MachineFrameInfo.h"
				30	#include "llvm/CodeGen/MachineFunction.h"
				31	#include "llvm/CodeGen/MachineInstrBuilder.h"
				32	#include "llvm/CodeGen/SelectionDAG.h"
				33	#include "llvm/CodeGen/SSARegMap.h"
				34	#include "llvm/Support/MathExtras.h"
				35	#include "llvm/Target/TargetOptions.h"
				36	#include "llvm/ADT/StringExtras.h"
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	37	#include "llvm/ParameterAttributes.h"
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	38	using namespace llvm;
				39
				40	X86TargetLowering::X86TargetLowering(TargetMachine &TM)
				41	: TargetLowering(TM) {
				42	Subtarget = &TM.getSubtarget<X86Subtarget>();
				43	X86ScalarSSE = Subtarget->hasSSE2();
				44	X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
				45
				46	RegInfo = TM.getRegisterInfo();
				47
				48	// Set up the TargetLowering object.
				49
				50	// X86 is weird, it always uses i8 for shift amounts and setcc results.
				51	setShiftAmountType(MVT::i8);
				52	setSetCCResultType(MVT::i8);
				53	setSetCCResultContents(ZeroOrOneSetCCResult);
				54	setSchedulingPreference(SchedulingForRegPressure);
				55	setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0
				56	setStackPointerRegisterToSaveRestore(X86StackPtr);
				57
				58	if (Subtarget->isTargetDarwin()) {
				59	// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
				60	setUseUnderscoreSetJmp(false);
				61	setUseUnderscoreLongJmp(false);
				62	} else if (Subtarget->isTargetMingw()) {
				63	// MS runtime is weird: it exports _setjmp, but longjmp!
				64	setUseUnderscoreSetJmp(true);
				65	setUseUnderscoreLongJmp(false);
				66	} else {
				67	setUseUnderscoreSetJmp(true);
				68	setUseUnderscoreLongJmp(true);
				69	}
				70
				71	// Set up the register classes.
				72	addRegisterClass(MVT::i8, X86::GR8RegisterClass);
				73	addRegisterClass(MVT::i16, X86::GR16RegisterClass);
				74	addRegisterClass(MVT::i32, X86::GR32RegisterClass);
				75	if (Subtarget->is64Bit())
				76	addRegisterClass(MVT::i64, X86::GR64RegisterClass);
				77
				78	setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand);
				79
				80	// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
				81	// operation.
				82	setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
				83	setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
				84	setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
				85
				86	if (Subtarget->is64Bit()) {
				87	setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
				88	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
				89	} else {
				90	if (X86ScalarSSE)
				91	// If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
				92	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
				93	else
				94	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
				95	}
				96
				97	// Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
				98	// this operation.
				99	setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
				100	setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
				101	// SSE has no i16 to fp conversion, only i32
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	102	if (X86ScalarSSE) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	103	setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	104	// f32 and f64 cases are Legal, f80 case is not
				105	setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
				106	} else {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	107	setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
				108	setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
				109	}
				110
Dale Johannesen	958b08b	2007-09-19 23:55:34 +0000	[diff] [blame]	111	// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
				112	// are Legal, f80 is custom lowered.
				113	setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
				114	setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	115
				116	// Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
				117	// this operation.
				118	setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
				119	setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
				120
				121	if (X86ScalarSSE) {
				122	setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	123	// f32 and f64 cases are Legal, f80 case is not
				124	setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	125	} else {
				126	setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
				127	setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
				128	}
				129
				130	// Handle FP_TO_UINT by promoting the destination to a larger signed
				131	// conversion.
				132	setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
				133	setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
				134	setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
				135
				136	if (Subtarget->is64Bit()) {
				137	setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
				138	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
				139	} else {
				140	if (X86ScalarSSE && !Subtarget->hasSSE3())
				141	// Expand FP_TO_UINT into a select.
				142	// FIXME: We would like to use a Custom expander here eventually to do
				143	// the optimal thing for SSE vs. the default expansion in the legalizer.
				144	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
				145	else
				146	// With SSE3 we can use fisttpll to convert to a signed i64.
				147	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
				148	}
				149
				150	// TODO: when we have SSE, these could be more efficient, by using movd/movq.
				151	if (!X86ScalarSSE) {
				152	setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
				153	setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
				154	}
				155
				156	setOperationAction(ISD::BR_JT , MVT::Other, Expand);
				157	setOperationAction(ISD::BRCOND , MVT::Other, Custom);
				158	setOperationAction(ISD::BR_CC , MVT::Other, Expand);
				159	setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
				160	setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
				161	if (Subtarget->is64Bit())
Christopher Lamb	0a7c866	2007-08-10 21:48:46 +0000	[diff] [blame]	162	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
				163	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
				164	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	165	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
				166	setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
				167	setOperationAction(ISD::FREM , MVT::f64 , Expand);
				168
				169	setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
				170	setOperationAction(ISD::CTTZ , MVT::i8 , Expand);
				171	setOperationAction(ISD::CTLZ , MVT::i8 , Expand);
				172	setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
				173	setOperationAction(ISD::CTTZ , MVT::i16 , Expand);
				174	setOperationAction(ISD::CTLZ , MVT::i16 , Expand);
				175	setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
				176	setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
				177	setOperationAction(ISD::CTLZ , MVT::i32 , Expand);
				178	if (Subtarget->is64Bit()) {
				179	setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
				180	setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
				181	setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
				182	}
				183
				184	setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
				185	setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
				186
				187	// These should be promoted to a larger select which is supported.
				188	setOperationAction(ISD::SELECT , MVT::i1 , Promote);
				189	setOperationAction(ISD::SELECT , MVT::i8 , Promote);
				190	// X86 wants to expand cmov itself.
				191	setOperationAction(ISD::SELECT , MVT::i16 , Custom);
				192	setOperationAction(ISD::SELECT , MVT::i32 , Custom);
				193	setOperationAction(ISD::SELECT , MVT::f32 , Custom);
				194	setOperationAction(ISD::SELECT , MVT::f64 , Custom);
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	195	setOperationAction(ISD::SELECT , MVT::f80 , Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	196	setOperationAction(ISD::SETCC , MVT::i8 , Custom);
				197	setOperationAction(ISD::SETCC , MVT::i16 , Custom);
				198	setOperationAction(ISD::SETCC , MVT::i32 , Custom);
				199	setOperationAction(ISD::SETCC , MVT::f32 , Custom);
				200	setOperationAction(ISD::SETCC , MVT::f64 , Custom);
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	201	setOperationAction(ISD::SETCC , MVT::f80 , Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	202	if (Subtarget->is64Bit()) {
				203	setOperationAction(ISD::SELECT , MVT::i64 , Custom);
				204	setOperationAction(ISD::SETCC , MVT::i64 , Custom);
				205	}
				206	// X86 ret instruction may pop stack.
				207	setOperationAction(ISD::RET , MVT::Other, Custom);
				208	if (!Subtarget->is64Bit())
				209	setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
				210
				211	// Darwin ABI issue.
				212	setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
				213	setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
				214	setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
				215	setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
				216	setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
				217	if (Subtarget->is64Bit()) {
				218	setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
				219	setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
				220	setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
				221	setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
				222	}
				223	// 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
				224	setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
				225	setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
				226	setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
				227	// X86 wants to expand memset / memcpy itself.
				228	setOperationAction(ISD::MEMSET , MVT::Other, Custom);
				229	setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
				230
				231	// We don't have line number support yet.
				232	setOperationAction(ISD::LOCATION, MVT::Other, Expand);
				233	setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
				234	// FIXME - use subtarget debug flags
				235	if (!Subtarget->isTargetDarwin() &&
				236	!Subtarget->isTargetELF() &&
				237	!Subtarget->isTargetCygMing())
				238	setOperationAction(ISD::LABEL, MVT::Other, Expand);
				239
				240	setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
				241	setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
				242	setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
				243	setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
				244	if (Subtarget->is64Bit()) {
				245	// FIXME: Verify
				246	setExceptionPointerRegister(X86::RAX);
				247	setExceptionSelectorRegister(X86::RDX);
				248	} else {
				249	setExceptionPointerRegister(X86::EAX);
				250	setExceptionSelectorRegister(X86::EDX);
				251	}
Anton Korobeynikov	23ca9c5	2007-09-03 00:36:06 +0000	[diff] [blame]	252	setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	253
Duncan Sands	7407a9f	2007-09-11 14:10:23 +0000	[diff] [blame]	254	setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	255
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	256	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
				257	setOperationAction(ISD::VASTART , MVT::Other, Custom);
				258	setOperationAction(ISD::VAARG , MVT::Other, Expand);
				259	setOperationAction(ISD::VAEND , MVT::Other, Expand);
				260	if (Subtarget->is64Bit())
				261	setOperationAction(ISD::VACOPY , MVT::Other, Custom);
				262	else
				263	setOperationAction(ISD::VACOPY , MVT::Other, Expand);
				264
				265	setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
				266	setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
				267	if (Subtarget->is64Bit())
				268	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
				269	if (Subtarget->isTargetCygMing())
				270	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
				271	else
				272	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
				273
				274	if (X86ScalarSSE) {
				275	// Set up the FP register classes.
				276	addRegisterClass(MVT::f32, X86::FR32RegisterClass);
				277	addRegisterClass(MVT::f64, X86::FR64RegisterClass);
				278
				279	// Use ANDPD to simulate FABS.
				280	setOperationAction(ISD::FABS , MVT::f64, Custom);
				281	setOperationAction(ISD::FABS , MVT::f32, Custom);
				282
				283	// Use XORP to simulate FNEG.
				284	setOperationAction(ISD::FNEG , MVT::f64, Custom);
				285	setOperationAction(ISD::FNEG , MVT::f32, Custom);
				286
				287	// Use ANDPD and ORPD to simulate FCOPYSIGN.
				288	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
				289	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
				290
				291	// We don't support sin/cos/fmod
				292	setOperationAction(ISD::FSIN , MVT::f64, Expand);
				293	setOperationAction(ISD::FCOS , MVT::f64, Expand);
				294	setOperationAction(ISD::FREM , MVT::f64, Expand);
				295	setOperationAction(ISD::FSIN , MVT::f32, Expand);
				296	setOperationAction(ISD::FCOS , MVT::f32, Expand);
				297	setOperationAction(ISD::FREM , MVT::f32, Expand);
				298
				299	// Expand FP immediates into loads from the stack, except for the special
				300	// cases we handle.
				301	setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
				302	setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
Dale Johannesen	bbe2b70	2007-08-30 00:23:21 +0000	[diff] [blame]	303	addLegalFPImmediate(APFloat(+0.0)); // xorps / xorpd
Dale Johannesen	8f83a6b	2007-08-09 01:04:01 +0000	[diff] [blame]	304
				305	// Conversions to long double (in X87) go through memory.
				306	setConvertAction(MVT::f32, MVT::f80, Expand);
				307	setConvertAction(MVT::f64, MVT::f80, Expand);
				308
				309	// Conversions from long double (in X87) go through memory.
				310	setConvertAction(MVT::f80, MVT::f32, Expand);
				311	setConvertAction(MVT::f80, MVT::f64, Expand);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	312	} else {
				313	// Set up the FP register classes.
				314	addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
				315	addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
				316
				317	setOperationAction(ISD::UNDEF, MVT::f64, Expand);
				318	setOperationAction(ISD::UNDEF, MVT::f32, Expand);
				319	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
				320	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
Dale Johannesen	8f83a6b	2007-08-09 01:04:01 +0000	[diff] [blame]	321
				322	// Floating truncations need to go through memory.
				323	setConvertAction(MVT::f80, MVT::f32, Expand);
				324	setConvertAction(MVT::f64, MVT::f32, Expand);
				325	setConvertAction(MVT::f80, MVT::f64, Expand);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	326
				327	if (!UnsafeFPMath) {
				328	setOperationAction(ISD::FSIN , MVT::f64 , Expand);
				329	setOperationAction(ISD::FCOS , MVT::f64 , Expand);
				330	}
				331
				332	setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
				333	setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
Dale Johannesen	bbe2b70	2007-08-30 00:23:21 +0000	[diff] [blame]	334	addLegalFPImmediate(APFloat(+0.0)); // FLD0
				335	addLegalFPImmediate(APFloat(+1.0)); // FLD1
				336	addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
				337	addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	338	}
				339
Dale Johannesen	4ab00bd	2007-08-05 18:49:15 +0000	[diff] [blame]	340	// Long double always uses X87.
				341	addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	342	setOperationAction(ISD::UNDEF, MVT::f80, Expand);
				343	setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
				344	setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
Dale Johannesen	4ab00bd	2007-08-05 18:49:15 +0000	[diff] [blame]	345
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	346	// First set operation action for all vector types to expand. Then we
				347	// will selectively turn on ones that can be effectively codegen'd.
				348	for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
				349	VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
				350	setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
				351	setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
				352	setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand);
				353	setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand);
				354	setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand);
				355	setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
				356	setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand);
				357	setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
				358	setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
				359	setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
				360	setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
				361	setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
				362	setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
				363	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand);
				364	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
				365	setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
				366	setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand);
				367	setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand);
				368	setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand);
				369	setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand);
				370	setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand);
				371	setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand);
				372	setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand);
				373	}
				374
				375	if (Subtarget->hasMMX()) {
				376	addRegisterClass(MVT::v8i8, X86::VR64RegisterClass);
				377	addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
				378	addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
				379	addRegisterClass(MVT::v1i64, X86::VR64RegisterClass);
				380
				381	// FIXME: add MMX packed arithmetics
				382
				383	setOperationAction(ISD::ADD, MVT::v8i8, Legal);
				384	setOperationAction(ISD::ADD, MVT::v4i16, Legal);
				385	setOperationAction(ISD::ADD, MVT::v2i32, Legal);
				386	setOperationAction(ISD::ADD, MVT::v1i64, Legal);
				387
				388	setOperationAction(ISD::SUB, MVT::v8i8, Legal);
				389	setOperationAction(ISD::SUB, MVT::v4i16, Legal);
				390	setOperationAction(ISD::SUB, MVT::v2i32, Legal);
				391
				392	setOperationAction(ISD::MULHS, MVT::v4i16, Legal);
				393	setOperationAction(ISD::MUL, MVT::v4i16, Legal);
				394
				395	setOperationAction(ISD::AND, MVT::v8i8, Promote);
				396	AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64);
				397	setOperationAction(ISD::AND, MVT::v4i16, Promote);
				398	AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64);
				399	setOperationAction(ISD::AND, MVT::v2i32, Promote);
				400	AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64);
				401	setOperationAction(ISD::AND, MVT::v1i64, Legal);
				402
				403	setOperationAction(ISD::OR, MVT::v8i8, Promote);
				404	AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64);
				405	setOperationAction(ISD::OR, MVT::v4i16, Promote);
				406	AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64);
				407	setOperationAction(ISD::OR, MVT::v2i32, Promote);
				408	AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64);
				409	setOperationAction(ISD::OR, MVT::v1i64, Legal);
				410
				411	setOperationAction(ISD::XOR, MVT::v8i8, Promote);
				412	AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64);
				413	setOperationAction(ISD::XOR, MVT::v4i16, Promote);
				414	AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64);
				415	setOperationAction(ISD::XOR, MVT::v2i32, Promote);
				416	AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64);
				417	setOperationAction(ISD::XOR, MVT::v1i64, Legal);
				418
				419	setOperationAction(ISD::LOAD, MVT::v8i8, Promote);
				420	AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64);
				421	setOperationAction(ISD::LOAD, MVT::v4i16, Promote);
				422	AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64);
				423	setOperationAction(ISD::LOAD, MVT::v2i32, Promote);
				424	AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64);
				425	setOperationAction(ISD::LOAD, MVT::v1i64, Legal);
				426
				427	setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
				428	setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
				429	setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
				430	setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
				431
				432	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
				433	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
				434	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
				435	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
				436
				437	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom);
				438	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom);
				439	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom);
				440	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
				441	}
				442
				443	if (Subtarget->hasSSE1()) {
				444	addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
				445
				446	setOperationAction(ISD::FADD, MVT::v4f32, Legal);
				447	setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
				448	setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
				449	setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
				450	setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
				451	setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	452	setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
				453	setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
				454	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
				455	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
				456	setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
				457	}
				458
				459	if (Subtarget->hasSSE2()) {
				460	addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
				461	addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
				462	addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
				463	addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
				464	addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
				465
				466	setOperationAction(ISD::ADD, MVT::v16i8, Legal);
				467	setOperationAction(ISD::ADD, MVT::v8i16, Legal);
				468	setOperationAction(ISD::ADD, MVT::v4i32, Legal);
				469	setOperationAction(ISD::ADD, MVT::v2i64, Legal);
				470	setOperationAction(ISD::SUB, MVT::v16i8, Legal);
				471	setOperationAction(ISD::SUB, MVT::v8i16, Legal);
				472	setOperationAction(ISD::SUB, MVT::v4i32, Legal);
				473	setOperationAction(ISD::SUB, MVT::v2i64, Legal);
				474	setOperationAction(ISD::MUL, MVT::v8i16, Legal);
				475	setOperationAction(ISD::FADD, MVT::v2f64, Legal);
				476	setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
				477	setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
				478	setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
				479	setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
				480	setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	481
				482	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
				483	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
				484	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
				485	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
				486	// Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
				487	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
				488
				489	// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
				490	for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
				491	setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
				492	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
				493	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				494	}
				495	setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
				496	setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
				497	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
				498	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
				499	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
				500	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
				501
				502	// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
				503	for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
				504	setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote);
				505	AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64);
				506	setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote);
				507	AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64);
				508	setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote);
				509	AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64);
				510	setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote);
				511	AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64);
				512	setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
				513	AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
				514	}
				515
				516	// Custom lower v2i64 and v2f64 selects.
				517	setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
				518	setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
				519	setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
				520	setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
				521	}
				522
				523	// We want to custom lower some of our intrinsics.
				524	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
				525
				526	// We have target-specific dag combine patterns for the following nodes:
				527	setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
				528	setTargetDAGCombine(ISD::SELECT);
				529
				530	computeRegisterProperties();
				531
				532	// FIXME: These should be based on subtarget info. Plus, the values should
				533	// be smaller when we are in optimizing for size mode.
				534	maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
				535	maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
				536	maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
				537	allowUnalignedMemoryAccesses = true; // x86 supports it!
				538	}
				539
				540
				541	//===----------------------------------------------------------------------===//
				542	// Return Value Calling Convention Implementation
				543	//===----------------------------------------------------------------------===//
				544
				545	#include "X86GenCallingConv.inc"
				546
				547	/// LowerRET - Lower an ISD::RET node.
				548	SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
				549	assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
				550
				551	SmallVector<CCValAssign, 16> RVLocs;
				552	unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
				553	bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
				554	CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
				555	CCInfo.AnalyzeReturn(Op.Val, RetCC_X86);
				556
				557
				558	// If this is the first return lowered for this function, add the regs to the
				559	// liveout set for the function.
				560	if (DAG.getMachineFunction().liveout_empty()) {
				561	for (unsigned i = 0; i != RVLocs.size(); ++i)
				562	if (RVLocs[i].isRegLoc())
				563	DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
				564	}
				565
				566	SDOperand Chain = Op.getOperand(0);
				567	SDOperand Flag;
				568
				569	// Copy the result values into the output registers.
				570	if (RVLocs.size() != 1 \|\| !RVLocs[0].isRegLoc() \|\|
				571	RVLocs[0].getLocReg() != X86::ST0) {
				572	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				573	CCValAssign &VA = RVLocs[i];
				574	assert(VA.isRegLoc() && "Can only return in registers!");
				575	Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1),
				576	Flag);
				577	Flag = Chain.getValue(1);
				578	}
				579	} else {
				580	// We need to handle a destination of ST0 specially, because it isn't really
				581	// a register.
				582	SDOperand Value = Op.getOperand(1);
				583
				584	// If this is an FP return with ScalarSSE, we need to move the value from
				585	// an XMM register onto the fp-stack.
				586	if (X86ScalarSSE) {
				587	SDOperand MemLoc;
				588
				589	// If this is a load into a scalarsse value, don't store the loaded value
				590	// back to the stack, only to reload it: just replace the scalar-sse load.
				591	if (ISD::isNON_EXTLoad(Value.Val) &&
				592	(Chain == Value.getValue(1) \|\| Chain == Value.getOperand(0))) {
				593	Chain = Value.getOperand(0);
				594	MemLoc = Value.getOperand(1);
				595	} else {
				596	// Spill the value to memory and reload it into top of stack.
				597	unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8;
				598	MachineFunction &MF = DAG.getMachineFunction();
				599	int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
				600	MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
				601	Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0);
				602	}
				603	SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other);
				604	SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())};
				605	Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
				606	Chain = Value.getValue(1);
				607	}
				608
				609	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				610	SDOperand Ops[] = { Chain, Value };
				611	Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2);
				612	Flag = Chain.getValue(1);
				613	}
				614
				615	SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16);
				616	if (Flag.Val)
				617	return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag);
				618	else
				619	return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop);
				620	}
				621
				622
				623	/// LowerCallResult - Lower the result values of an ISD::CALL into the
				624	/// appropriate copies out of appropriate physical registers. This assumes that
				625	/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
				626	/// being lowered. The returns a SDNode with the same number of values as the
				627	/// ISD::CALL.
				628	SDNode *X86TargetLowering::
				629	LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
				630	unsigned CallingConv, SelectionDAG &DAG) {
				631
				632	// Assign locations to each value returned by this call.
				633	SmallVector<CCValAssign, 16> RVLocs;
				634	bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0;
				635	CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
				636	CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
				637
				638
				639	SmallVector<SDOperand, 8> ResultVals;
				640
				641	// Copy all of the result registers out of their specified physreg.
				642	if (RVLocs.size() != 1 \|\| RVLocs[0].getLocReg() != X86::ST0) {
				643	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				644	Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(),
				645	RVLocs[i].getValVT(), InFlag).getValue(1);
				646	InFlag = Chain.getValue(2);
				647	ResultVals.push_back(Chain.getValue(0));
				648	}
				649	} else {
				650	// Copies from the FP stack are special, as ST0 isn't a valid register
				651	// before the fp stackifier runs.
				652
				653	// Copy ST0 into an RFP register with FP_GET_RESULT.
				654	SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag);
				655	SDOperand GROps[] = { Chain, InFlag };
				656	SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
				657	Chain = RetVal.getValue(1);
				658	InFlag = RetVal.getValue(2);
				659
				660	// If we are using ScalarSSE, store ST(0) to the stack and reload it into
				661	// an XMM register.
				662	if (X86ScalarSSE) {
				663	// FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
				664	// shouldn't be necessary except that RFP cannot be live across
				665	// multiple blocks. When stackifier is fixed, they can be uncoupled.
				666	MachineFunction &MF = DAG.getMachineFunction();
				667	int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
				668	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				669	SDOperand Ops[] = {
				670	Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag
				671	};
				672	Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5);
				673	RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0);
				674	Chain = RetVal.getValue(1);
				675	}
				676	ResultVals.push_back(RetVal);
				677	}
				678
				679	// Merge everything together with a MERGE_VALUES node.
				680	ResultVals.push_back(Chain);
				681	return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(),
				682	&ResultVals[0], ResultVals.size()).Val;
				683	}
				684
				685
				686	//===----------------------------------------------------------------------===//
				687	// C & StdCall Calling Convention implementation
				688	//===----------------------------------------------------------------------===//
				689	// StdCall calling convention seems to be standard for many Windows' API
				690	// routines and around. It differs from C calling convention just a little:
				691	// callee should clean up the stack, not caller. Symbols should be also
				692	// decorated in some fancy way :) It doesn't support any vector arguments.
				693
				694	/// AddLiveIn - This helper function adds the specified physical register to the
				695	/// MachineFunction as a live in value. It also creates a corresponding virtual
				696	/// register for it.
				697	static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
				698	const TargetRegisterClass *RC) {
				699	assert(RC->contains(PReg) && "Not the correct regclass!");
				700	unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
				701	MF.addLiveIn(PReg, VReg);
				702	return VReg;
				703	}
				704
Rafael Espindola	03cbeb7	2007-09-14 15:48:13 +0000	[diff] [blame]	705	SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
				706	const CCValAssign &VA,
				707	MachineFrameInfo *MFI,
				708	SDOperand Root, unsigned i) {
				709	// Create the nodes corresponding to a load from this parameter slot.
				710	int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
				711	VA.getLocMemOffset());
				712	SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
				713
				714	unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue();
				715
				716	if (Flags & ISD::ParamFlags::ByVal)
				717	return FIN;
				718	else
				719	return DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0);
				720	}
				721
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	722	SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
				723	bool isStdCall) {
				724	unsigned NumArgs = Op.Val->getNumValues() - 1;
				725	MachineFunction &MF = DAG.getMachineFunction();
				726	MachineFrameInfo *MFI = MF.getFrameInfo();
				727	SDOperand Root = Op.getOperand(0);
				728	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				729
				730	// Assign locations to all of the incoming arguments.
				731	SmallVector<CCValAssign, 16> ArgLocs;
				732	CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
				733	getTargetMachine(), ArgLocs);
				734	CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C);
				735
				736	SmallVector<SDOperand, 8> ArgValues;
				737	unsigned LastVal = ~0U;
				738	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				739	CCValAssign &VA = ArgLocs[i];
				740	// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
				741	// places.
				742	assert(VA.getValNo() != LastVal &&
				743	"Don't support value assigned to multiple locs yet");
				744	LastVal = VA.getValNo();
				745
				746	if (VA.isRegLoc()) {
				747	MVT::ValueType RegVT = VA.getLocVT();
				748	TargetRegisterClass *RC;
				749	if (RegVT == MVT::i32)
				750	RC = X86::GR32RegisterClass;
				751	else {
				752	assert(MVT::isVector(RegVT));
				753	RC = X86::VR128RegisterClass;
				754	}
				755
				756	unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
				757	SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
				758
				759	// If this is an 8 or 16-bit value, it is really passed promoted to 32
				760	// bits. Insert an assert[sz]ext to capture this, then truncate to the
				761	// right size.
				762	if (VA.getLocInfo() == CCValAssign::SExt)
				763	ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
				764	DAG.getValueType(VA.getValVT()));
				765	else if (VA.getLocInfo() == CCValAssign::ZExt)
				766	ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
				767	DAG.getValueType(VA.getValVT()));
				768
				769	if (VA.getLocInfo() != CCValAssign::Full)
				770	ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
				771
				772	ArgValues.push_back(ArgValue);
				773	} else {
				774	assert(VA.isMemLoc());
Rafael Espindola	03cbeb7	2007-09-14 15:48:13 +0000	[diff] [blame]	775	ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	776	}
				777	}
				778
				779	unsigned StackSize = CCInfo.getNextStackOffset();
				780
				781	ArgValues.push_back(Root);
				782
				783	// If the function takes variable number of arguments, make a frame index for
				784	// the start of the first vararg value... for expansion of llvm.va_start.
				785	if (isVarArg)
				786	VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
				787
				788	if (isStdCall && !isVarArg) {
				789	BytesToPopOnReturn = StackSize; // Callee pops everything..
				790	BytesCallerReserves = 0;
				791	} else {
				792	BytesToPopOnReturn = 0; // Callee pops nothing.
				793
				794	// If this is an sret function, the return should pop the hidden pointer.
				795	if (NumArgs &&
				796	(cast<ConstantSDNode>(Op.getOperand(3))->getValue() &
				797	ISD::ParamFlags::StructReturn))
				798	BytesToPopOnReturn = 4;
				799
				800	BytesCallerReserves = StackSize;
				801	}
Anton Korobeynikov	e844e47	2007-08-15 17:12:32 +0000	[diff] [blame]	802
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	803	RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	804
Anton Korobeynikov	e844e47	2007-08-15 17:12:32 +0000	[diff] [blame]	805	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
				806	FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	807
				808	// Return the new list of results.
				809	return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
				810	&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
				811	}
				812
				813	SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG,
				814	unsigned CC) {
				815	SDOperand Chain = Op.getOperand(0);
				816	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				817	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				818	SDOperand Callee = Op.getOperand(4);
				819	unsigned NumOps = (Op.getNumOperands() - 5) / 2;
				820
				821	// Analyze operands of the call, assigning locations to each operand.
				822	SmallVector<CCValAssign, 16> ArgLocs;
				823	CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
				824	CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C);
				825
				826	// Get a count of how many bytes are to be pushed on the stack.
				827	unsigned NumBytes = CCInfo.getNextStackOffset();
				828
				829	Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
				830
				831	SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
				832	SmallVector<SDOperand, 8> MemOpChains;
				833
				834	SDOperand StackPtr;
				835
				836	// Walk the register/memloc assignments, inserting copies/loads.
				837	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				838	CCValAssign &VA = ArgLocs[i];
				839	SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
				840
				841	// Promote the value if needed.
				842	switch (VA.getLocInfo()) {
				843	default: assert(0 && "Unknown loc info!");
				844	case CCValAssign::Full: break;
				845	case CCValAssign::SExt:
				846	Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
				847	break;
				848	case CCValAssign::ZExt:
				849	Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
				850	break;
				851	case CCValAssign::AExt:
				852	Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
				853	break;
				854	}
				855
				856	if (VA.isRegLoc()) {
				857	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
				858	} else {
				859	assert(VA.isMemLoc());
				860	if (StackPtr.Val == 0)
				861	StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
Rafael Espindola	007b714	2007-09-21 15:50:22 +0000	[diff] [blame]	862
				863	MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
				864	Arg));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	865	}
				866	}
				867
				868	// If the first argument is an sret pointer, remember it.
				869	bool isSRet = NumOps &&
				870	(cast<ConstantSDNode>(Op.getOperand(6))->getValue() &
				871	ISD::ParamFlags::StructReturn);
				872
				873	if (!MemOpChains.empty())
				874	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				875	&MemOpChains[0], MemOpChains.size());
				876
				877	// Build a sequence of copy-to-reg nodes chained together with token chain
				878	// and flag operands which copy the outgoing args into registers.
				879	SDOperand InFlag;
				880	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				881	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				882	InFlag);
				883	InFlag = Chain.getValue(1);
				884	}
				885
				886	// ELF / PIC requires GOT in the EBX register before function calls via PLT
				887	// GOT pointer.
				888	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				889	Subtarget->isPICStyleGOT()) {
				890	Chain = DAG.getCopyToReg(Chain, X86::EBX,
				891	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				892	InFlag);
				893	InFlag = Chain.getValue(1);
				894	}
				895
				896	// If the callee is a GlobalAddress node (quite common, every direct call is)
				897	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
				898	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				899	// We should use extra load for direct calls to dllimported functions in
				900	// non-JIT mode.
				901	if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
				902	getTargetMachine(), true))
				903	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
				904	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				905	Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
				906
				907	// Returns a chain & a flag for retval copy to use.
				908	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				909	SmallVector<SDOperand, 8> Ops;
				910	Ops.push_back(Chain);
				911	Ops.push_back(Callee);
				912
				913	// Add argument registers to the end of the list so that they are known live
				914	// into the call.
				915	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				916	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				917	RegsToPass[i].second.getValueType()));
				918
				919	// Add an implicit use GOT pointer in EBX.
				920	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				921	Subtarget->isPICStyleGOT())
				922	Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
				923
				924	if (InFlag.Val)
				925	Ops.push_back(InFlag);
				926
				927	Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
				928	NodeTys, &Ops[0], Ops.size());
				929	InFlag = Chain.getValue(1);
				930
				931	// Create the CALLSEQ_END node.
				932	unsigned NumBytesForCalleeToPush = 0;
				933
				934	if (CC == CallingConv::X86_StdCall) {
				935	if (isVarArg)
				936	NumBytesForCalleeToPush = isSRet ? 4 : 0;
				937	else
				938	NumBytesForCalleeToPush = NumBytes;
				939	} else {
				940	// If this is is a call to a struct-return function, the callee
				941	// pops the hidden struct pointer, so we have to push it back.
				942	// This is common for Darwin/X86, Linux & Mingw32 targets.
				943	NumBytesForCalleeToPush = isSRet ? 4 : 0;
				944	}
				945
				946	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				947	Ops.clear();
				948	Ops.push_back(Chain);
				949	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				950	Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
				951	Ops.push_back(InFlag);
				952	Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
				953	InFlag = Chain.getValue(1);
				954
				955	// Handle result values, copying them out of physregs into vregs that we
				956	// return.
				957	return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
				958	}
				959
				960
				961	//===----------------------------------------------------------------------===//
				962	// FastCall Calling Convention implementation
				963	//===----------------------------------------------------------------------===//
				964	//
				965	// The X86 'fastcall' calling convention passes up to two integer arguments in
				966	// registers (an appropriate portion of ECX/EDX), passes arguments in C order,
				967	// and requires that the callee pop its arguments off the stack (allowing proper
				968	// tail calls), and has the same return value conventions as C calling convs.
				969	//
				970	// This calling convention always arranges for the callee pop value to be 8n+4
				971	// bytes, which is needed for tail recursion elimination and stack alignment
				972	// reasons.
				973	SDOperand
				974	X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
				975	MachineFunction &MF = DAG.getMachineFunction();
				976	MachineFrameInfo *MFI = MF.getFrameInfo();
				977	SDOperand Root = Op.getOperand(0);
				978	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				979
				980	// Assign locations to all of the incoming arguments.
				981	SmallVector<CCValAssign, 16> ArgLocs;
				982	CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
				983	getTargetMachine(), ArgLocs);
				984	CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall);
				985
				986	SmallVector<SDOperand, 8> ArgValues;
				987	unsigned LastVal = ~0U;
				988	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				989	CCValAssign &VA = ArgLocs[i];
				990	// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
				991	// places.
				992	assert(VA.getValNo() != LastVal &&
				993	"Don't support value assigned to multiple locs yet");
				994	LastVal = VA.getValNo();
				995
				996	if (VA.isRegLoc()) {
				997	MVT::ValueType RegVT = VA.getLocVT();
				998	TargetRegisterClass *RC;
				999	if (RegVT == MVT::i32)
				1000	RC = X86::GR32RegisterClass;
				1001	else {
				1002	assert(MVT::isVector(RegVT));
				1003	RC = X86::VR128RegisterClass;
				1004	}
				1005
				1006	unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
				1007	SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
				1008
				1009	// If this is an 8 or 16-bit value, it is really passed promoted to 32
				1010	// bits. Insert an assert[sz]ext to capture this, then truncate to the
				1011	// right size.
				1012	if (VA.getLocInfo() == CCValAssign::SExt)
				1013	ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
				1014	DAG.getValueType(VA.getValVT()));
				1015	else if (VA.getLocInfo() == CCValAssign::ZExt)
				1016	ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
				1017	DAG.getValueType(VA.getValVT()));
				1018
				1019	if (VA.getLocInfo() != CCValAssign::Full)
				1020	ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
				1021
				1022	ArgValues.push_back(ArgValue);
				1023	} else {
				1024	assert(VA.isMemLoc());
Rafael Espindola	b53ef12	2007-09-21 14:55:38 +0000	[diff] [blame]	1025	ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1026	}
				1027	}
				1028
				1029	ArgValues.push_back(Root);
				1030
				1031	unsigned StackSize = CCInfo.getNextStackOffset();
				1032
				1033	if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
				1034	// Make sure the instruction takes 8n+4 bytes to make sure the start of the
				1035	// arguments and the arguments after the retaddr has been pushed are aligned.
				1036	if ((StackSize & 7) == 0)
				1037	StackSize += 4;
				1038	}
				1039
				1040	VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
				1041	RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1042	BytesToPopOnReturn = StackSize; // Callee pops all stack arguments.
				1043	BytesCallerReserves = 0;
				1044
Anton Korobeynikov	e844e47	2007-08-15 17:12:32 +0000	[diff] [blame]	1045	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
				1046	FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1047
				1048	// Return the new list of results.
				1049	return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
				1050	&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
				1051	}
				1052
Rafael Espindola	ddb88da	2007-08-31 15:06:30 +0000	[diff] [blame]	1053	SDOperand
				1054	X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG,
				1055	const SDOperand &StackPtr,
				1056	const CCValAssign &VA,
				1057	SDOperand Chain,
				1058	SDOperand Arg) {
				1059	SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
				1060	PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
				1061	SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
				1062	unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
				1063	if (Flags & ISD::ParamFlags::ByVal) {
				1064	unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
				1065	ISD::ParamFlags::ByValAlignOffs);
				1066
Rafael Espindola	ddb88da	2007-08-31 15:06:30 +0000	[diff] [blame]	1067	unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
				1068	ISD::ParamFlags::ByValSizeOffs;
				1069
				1070	SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
				1071	SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
				1072
				1073	return DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, PtrOff, Arg, SizeNode,
				1074	AlignNode);
				1075	} else {
				1076	return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
				1077	}
				1078	}
				1079
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1080	SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
				1081	unsigned CC) {
				1082	SDOperand Chain = Op.getOperand(0);
				1083	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				1084	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1085	SDOperand Callee = Op.getOperand(4);
				1086
				1087	// Analyze operands of the call, assigning locations to each operand.
				1088	SmallVector<CCValAssign, 16> ArgLocs;
				1089	CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
				1090	CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall);
				1091
				1092	// Get a count of how many bytes are to be pushed on the stack.
				1093	unsigned NumBytes = CCInfo.getNextStackOffset();
				1094
				1095	if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
				1096	// Make sure the instruction takes 8n+4 bytes to make sure the start of the
				1097	// arguments and the arguments after the retaddr has been pushed are aligned.
				1098	if ((NumBytes & 7) == 0)
				1099	NumBytes += 4;
				1100	}
				1101
				1102	Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
				1103
				1104	SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
				1105	SmallVector<SDOperand, 8> MemOpChains;
				1106
				1107	SDOperand StackPtr;
				1108
				1109	// Walk the register/memloc assignments, inserting copies/loads.
				1110	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1111	CCValAssign &VA = ArgLocs[i];
				1112	SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
				1113
				1114	// Promote the value if needed.
				1115	switch (VA.getLocInfo()) {
				1116	default: assert(0 && "Unknown loc info!");
				1117	case CCValAssign::Full: break;
				1118	case CCValAssign::SExt:
				1119	Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
				1120	break;
				1121	case CCValAssign::ZExt:
				1122	Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
				1123	break;
				1124	case CCValAssign::AExt:
				1125	Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
				1126	break;
				1127	}
				1128
				1129	if (VA.isRegLoc()) {
				1130	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
				1131	} else {
				1132	assert(VA.isMemLoc());
				1133	if (StackPtr.Val == 0)
				1134	StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
Rafael Espindola	007b714	2007-09-21 15:50:22 +0000	[diff] [blame]	1135
				1136	MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
				1137	Arg));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1138	}
				1139	}
				1140
				1141	if (!MemOpChains.empty())
				1142	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1143	&MemOpChains[0], MemOpChains.size());
				1144
				1145	// Build a sequence of copy-to-reg nodes chained together with token chain
				1146	// and flag operands which copy the outgoing args into registers.
				1147	SDOperand InFlag;
				1148	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1149	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1150	InFlag);
				1151	InFlag = Chain.getValue(1);
				1152	}
				1153
				1154	// If the callee is a GlobalAddress node (quite common, every direct call is)
				1155	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
				1156	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1157	// We should use extra load for direct calls to dllimported functions in
				1158	// non-JIT mode.
				1159	if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
				1160	getTargetMachine(), true))
				1161	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
				1162	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				1163	Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
				1164
				1165	// ELF / PIC requires GOT in the EBX register before function calls via PLT
				1166	// GOT pointer.
				1167	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				1168	Subtarget->isPICStyleGOT()) {
				1169	Chain = DAG.getCopyToReg(Chain, X86::EBX,
				1170	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				1171	InFlag);
				1172	InFlag = Chain.getValue(1);
				1173	}
				1174
				1175	// Returns a chain & a flag for retval copy to use.
				1176	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1177	SmallVector<SDOperand, 8> Ops;
				1178	Ops.push_back(Chain);
				1179	Ops.push_back(Callee);
				1180
				1181	// Add argument registers to the end of the list so that they are known live
				1182	// into the call.
				1183	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1184	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1185	RegsToPass[i].second.getValueType()));
				1186
				1187	// Add an implicit use GOT pointer in EBX.
				1188	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				1189	Subtarget->isPICStyleGOT())
				1190	Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
				1191
				1192	if (InFlag.Val)
				1193	Ops.push_back(InFlag);
				1194
				1195	// FIXME: Do not generate X86ISD::TAILCALL for now.
				1196	Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
				1197	NodeTys, &Ops[0], Ops.size());
				1198	InFlag = Chain.getValue(1);
				1199
				1200	// Returns a flag for retval copy to use.
				1201	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1202	Ops.clear();
				1203	Ops.push_back(Chain);
				1204	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				1205	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				1206	Ops.push_back(InFlag);
				1207	Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
				1208	InFlag = Chain.getValue(1);
				1209
				1210	// Handle result values, copying them out of physregs into vregs that we
				1211	// return.
				1212	return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
				1213	}
				1214
				1215
				1216	//===----------------------------------------------------------------------===//
				1217	// X86-64 C Calling Convention implementation
				1218	//===----------------------------------------------------------------------===//
				1219
				1220	SDOperand
				1221	X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
				1222	MachineFunction &MF = DAG.getMachineFunction();
				1223	MachineFrameInfo *MFI = MF.getFrameInfo();
				1224	SDOperand Root = Op.getOperand(0);
				1225	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1226
				1227	static const unsigned GPR64ArgRegs[] = {
				1228	X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
				1229	};
				1230	static const unsigned XMMArgRegs[] = {
				1231	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
				1232	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
				1233	};
				1234
				1235
				1236	// Assign locations to all of the incoming arguments.
				1237	SmallVector<CCValAssign, 16> ArgLocs;
				1238	CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
				1239	getTargetMachine(), ArgLocs);
				1240	CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C);
				1241
				1242	SmallVector<SDOperand, 8> ArgValues;
				1243	unsigned LastVal = ~0U;
				1244	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1245	CCValAssign &VA = ArgLocs[i];
				1246	// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
				1247	// places.
				1248	assert(VA.getValNo() != LastVal &&
				1249	"Don't support value assigned to multiple locs yet");
				1250	LastVal = VA.getValNo();
				1251
				1252	if (VA.isRegLoc()) {
				1253	MVT::ValueType RegVT = VA.getLocVT();
				1254	TargetRegisterClass *RC;
				1255	if (RegVT == MVT::i32)
				1256	RC = X86::GR32RegisterClass;
				1257	else if (RegVT == MVT::i64)
				1258	RC = X86::GR64RegisterClass;
				1259	else if (RegVT == MVT::f32)
				1260	RC = X86::FR32RegisterClass;
				1261	else if (RegVT == MVT::f64)
				1262	RC = X86::FR64RegisterClass;
				1263	else {
				1264	assert(MVT::isVector(RegVT));
				1265	if (MVT::getSizeInBits(RegVT) == 64) {
				1266	RC = X86::GR64RegisterClass; // MMX values are passed in GPRs.
				1267	RegVT = MVT::i64;
				1268	} else
				1269	RC = X86::VR128RegisterClass;
				1270	}
				1271
				1272	unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
				1273	SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
				1274
				1275	// If this is an 8 or 16-bit value, it is really passed promoted to 32
				1276	// bits. Insert an assert[sz]ext to capture this, then truncate to the
				1277	// right size.
				1278	if (VA.getLocInfo() == CCValAssign::SExt)
				1279	ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
				1280	DAG.getValueType(VA.getValVT()));
				1281	else if (VA.getLocInfo() == CCValAssign::ZExt)
				1282	ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
				1283	DAG.getValueType(VA.getValVT()));
				1284
				1285	if (VA.getLocInfo() != CCValAssign::Full)
				1286	ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
				1287
				1288	// Handle MMX values passed in GPRs.
				1289	if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass &&
				1290	MVT::getSizeInBits(RegVT) == 64)
				1291	ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue);
				1292
				1293	ArgValues.push_back(ArgValue);
				1294	} else {
				1295	assert(VA.isMemLoc());
Rafael Espindola	03cbeb7	2007-09-14 15:48:13 +0000	[diff] [blame]	1296	ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1297	}
				1298	}
				1299
				1300	unsigned StackSize = CCInfo.getNextStackOffset();
				1301
				1302	// If the function takes variable number of arguments, make a frame index for
				1303	// the start of the first vararg value... for expansion of llvm.va_start.
				1304	if (isVarArg) {
				1305	unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
				1306	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
				1307
				1308	// For X86-64, if there are vararg parameters that are passed via
				1309	// registers, then we must store them to their spots on the stack so they
				1310	// may be loaded by deferencing the result of va_next.
				1311	VarArgsGPOffset = NumIntRegs * 8;
				1312	VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
				1313	VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
				1314	RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
				1315
				1316	// Store the integer parameter registers.
				1317	SmallVector<SDOperand, 8> MemOps;
				1318	SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
				1319	SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
				1320	DAG.getConstant(VarArgsGPOffset, getPointerTy()));
				1321	for (; NumIntRegs != 6; ++NumIntRegs) {
				1322	unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
				1323	X86::GR64RegisterClass);
				1324	SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
				1325	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1326	MemOps.push_back(Store);
				1327	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				1328	DAG.getConstant(8, getPointerTy()));
				1329	}
				1330
				1331	// Now store the XMM (fp + vector) parameter registers.
				1332	FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
				1333	DAG.getConstant(VarArgsFPOffset, getPointerTy()));
				1334	for (; NumXMMRegs != 8; ++NumXMMRegs) {
				1335	unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
				1336	X86::VR128RegisterClass);
				1337	SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
				1338	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1339	MemOps.push_back(Store);
				1340	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				1341	DAG.getConstant(16, getPointerTy()));
				1342	}
				1343	if (!MemOps.empty())
				1344	Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1345	&MemOps[0], MemOps.size());
				1346	}
				1347
				1348	ArgValues.push_back(Root);
				1349
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1350	BytesToPopOnReturn = 0; // Callee pops nothing.
				1351	BytesCallerReserves = StackSize;
				1352
Anton Korobeynikov	e844e47	2007-08-15 17:12:32 +0000	[diff] [blame]	1353	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
				1354	FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
				1355
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1356	// Return the new list of results.
				1357	return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
				1358	&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
				1359	}
				1360
				1361	SDOperand
				1362	X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,
				1363	unsigned CC) {
				1364	SDOperand Chain = Op.getOperand(0);
				1365	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1366	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				1367	SDOperand Callee = Op.getOperand(4);
				1368
				1369	// Analyze operands of the call, assigning locations to each operand.
				1370	SmallVector<CCValAssign, 16> ArgLocs;
				1371	CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
				1372	CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C);
				1373
				1374	// Get a count of how many bytes are to be pushed on the stack.
				1375	unsigned NumBytes = CCInfo.getNextStackOffset();
				1376	Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
				1377
				1378	SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
				1379	SmallVector<SDOperand, 8> MemOpChains;
				1380
				1381	SDOperand StackPtr;
				1382
				1383	// Walk the register/memloc assignments, inserting copies/loads.
				1384	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1385	CCValAssign &VA = ArgLocs[i];
				1386	SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
				1387
				1388	// Promote the value if needed.
				1389	switch (VA.getLocInfo()) {
				1390	default: assert(0 && "Unknown loc info!");
				1391	case CCValAssign::Full: break;
				1392	case CCValAssign::SExt:
				1393	Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
				1394	break;
				1395	case CCValAssign::ZExt:
				1396	Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
				1397	break;
				1398	case CCValAssign::AExt:
				1399	Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
				1400	break;
				1401	}
				1402
				1403	if (VA.isRegLoc()) {
				1404	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
				1405	} else {
				1406	assert(VA.isMemLoc());
				1407	if (StackPtr.Val == 0)
				1408	StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
Rafael Espindola	b8bcfcd	2007-08-20 15:18:24 +0000	[diff] [blame]	1409
Rafael Espindola	ddb88da	2007-08-31 15:06:30 +0000	[diff] [blame]	1410	MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
				1411	Arg));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1412	}
				1413	}
				1414
				1415	if (!MemOpChains.empty())
				1416	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1417	&MemOpChains[0], MemOpChains.size());
				1418
				1419	// Build a sequence of copy-to-reg nodes chained together with token chain
				1420	// and flag operands which copy the outgoing args into registers.
				1421	SDOperand InFlag;
				1422	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1423	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1424	InFlag);
				1425	InFlag = Chain.getValue(1);
				1426	}
				1427
				1428	if (isVarArg) {
				1429	// From AMD64 ABI document:
				1430	// For calls that may call functions that use varargs or stdargs
				1431	// (prototype-less calls or calls to functions containing ellipsis (...) in
				1432	// the declaration) %al is used as hidden argument to specify the number
				1433	// of SSE registers used. The contents of %al do not need to match exactly
				1434	// the number of registers, but must be an ubound on the number of SSE
				1435	// registers used and is in the range 0 - 8 inclusive.
				1436
				1437	// Count the number of XMM registers allocated.
				1438	static const unsigned XMMArgRegs[] = {
				1439	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
				1440	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
				1441	};
				1442	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
				1443
				1444	Chain = DAG.getCopyToReg(Chain, X86::AL,
				1445	DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
				1446	InFlag = Chain.getValue(1);
				1447	}
				1448
				1449	// If the callee is a GlobalAddress node (quite common, every direct call is)
				1450	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
				1451	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1452	// We should use extra load for direct calls to dllimported functions in
				1453	// non-JIT mode.
				1454	if (getTargetMachine().getCodeModel() != CodeModel::Large
				1455	&& !Subtarget->GVRequiresExtraLoad(G->getGlobal(),
				1456	getTargetMachine(), true))
				1457	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
				1458	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				1459	if (getTargetMachine().getCodeModel() != CodeModel::Large)
				1460	Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
				1461
				1462	// Returns a chain & a flag for retval copy to use.
				1463	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1464	SmallVector<SDOperand, 8> Ops;
				1465	Ops.push_back(Chain);
				1466	Ops.push_back(Callee);
				1467
				1468	// Add argument registers to the end of the list so that they are known live
				1469	// into the call.
				1470	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1471	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1472	RegsToPass[i].second.getValueType()));
				1473
				1474	if (InFlag.Val)
				1475	Ops.push_back(InFlag);
				1476
				1477	// FIXME: Do not generate X86ISD::TAILCALL for now.
				1478	Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
				1479	NodeTys, &Ops[0], Ops.size());
				1480	InFlag = Chain.getValue(1);
				1481
				1482	// Returns a flag for retval copy to use.
				1483	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1484	Ops.clear();
				1485	Ops.push_back(Chain);
				1486	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				1487	Ops.push_back(DAG.getConstant(0, getPointerTy()));
				1488	Ops.push_back(InFlag);
				1489	Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
				1490	InFlag = Chain.getValue(1);
				1491
				1492	// Handle result values, copying them out of physregs into vregs that we
				1493	// return.
				1494	return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
				1495	}
				1496
				1497
				1498	//===----------------------------------------------------------------------===//
				1499	// Other Lowering Hooks
				1500	//===----------------------------------------------------------------------===//
				1501
				1502
				1503	SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
Anton Korobeynikov	e844e47	2007-08-15 17:12:32 +0000	[diff] [blame]	1504	MachineFunction &MF = DAG.getMachineFunction();
				1505	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
				1506	int ReturnAddrIndex = FuncInfo->getRAIndex();
				1507
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1508	if (ReturnAddrIndex == 0) {
				1509	// Set up a frame object for the return address.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1510	if (Subtarget->is64Bit())
				1511	ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8);
				1512	else
				1513	ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
Anton Korobeynikov	e844e47	2007-08-15 17:12:32 +0000	[diff] [blame]	1514
				1515	FuncInfo->setRAIndex(ReturnAddrIndex);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1516	}
				1517
				1518	return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
				1519	}
				1520
				1521
				1522
				1523	/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
				1524	/// specific condition code. It returns a false if it cannot do a direct
				1525	/// translation. X86CC is the translated CondCode. LHS/RHS are modified as
				1526	/// needed.
				1527	static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
				1528	unsigned &X86CC, SDOperand &LHS, SDOperand &RHS,
				1529	SelectionDAG &DAG) {
				1530	X86CC = X86::COND_INVALID;
				1531	if (!isFP) {
				1532	if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
				1533	if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
				1534	// X > -1 -> X == 0, jump !sign.
				1535	RHS = DAG.getConstant(0, RHS.getValueType());
				1536	X86CC = X86::COND_NS;
				1537	return true;
				1538	} else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
				1539	// X < 0 -> X == 0, jump on sign.
				1540	X86CC = X86::COND_S;
				1541	return true;
Dan Gohman	37b3426	2007-09-17 14:49:27 +0000	[diff] [blame]	1542	} else if (SetCCOpcode == ISD::SETLT && RHSC->getValue() == 1) {
				1543	// X < 1 -> X <= 0
				1544	RHS = DAG.getConstant(0, RHS.getValueType());
				1545	X86CC = X86::COND_LE;
				1546	return true;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1547	}
				1548	}
				1549
				1550	switch (SetCCOpcode) {
				1551	default: break;
				1552	case ISD::SETEQ: X86CC = X86::COND_E; break;
				1553	case ISD::SETGT: X86CC = X86::COND_G; break;
				1554	case ISD::SETGE: X86CC = X86::COND_GE; break;
				1555	case ISD::SETLT: X86CC = X86::COND_L; break;
				1556	case ISD::SETLE: X86CC = X86::COND_LE; break;
				1557	case ISD::SETNE: X86CC = X86::COND_NE; break;
				1558	case ISD::SETULT: X86CC = X86::COND_B; break;
				1559	case ISD::SETUGT: X86CC = X86::COND_A; break;
				1560	case ISD::SETULE: X86CC = X86::COND_BE; break;
				1561	case ISD::SETUGE: X86CC = X86::COND_AE; break;
				1562	}
				1563	} else {
				1564	// On a floating point condition, the flags are set as follows:
				1565	// ZF PF CF op
				1566	// 0 \| 0 \| 0 \| X > Y
				1567	// 0 \| 0 \| 1 \| X < Y
				1568	// 1 \| 0 \| 0 \| X == Y
				1569	// 1 \| 1 \| 1 \| unordered
				1570	bool Flip = false;
				1571	switch (SetCCOpcode) {
				1572	default: break;
				1573	case ISD::SETUEQ:
				1574	case ISD::SETEQ: X86CC = X86::COND_E; break;
				1575	case ISD::SETOLT: Flip = true; // Fallthrough
				1576	case ISD::SETOGT:
				1577	case ISD::SETGT: X86CC = X86::COND_A; break;
				1578	case ISD::SETOLE: Flip = true; // Fallthrough
				1579	case ISD::SETOGE:
				1580	case ISD::SETGE: X86CC = X86::COND_AE; break;
				1581	case ISD::SETUGT: Flip = true; // Fallthrough
				1582	case ISD::SETULT:
				1583	case ISD::SETLT: X86CC = X86::COND_B; break;
				1584	case ISD::SETUGE: Flip = true; // Fallthrough
				1585	case ISD::SETULE:
				1586	case ISD::SETLE: X86CC = X86::COND_BE; break;
				1587	case ISD::SETONE:
				1588	case ISD::SETNE: X86CC = X86::COND_NE; break;
				1589	case ISD::SETUO: X86CC = X86::COND_P; break;
				1590	case ISD::SETO: X86CC = X86::COND_NP; break;
				1591	}
				1592	if (Flip)
				1593	std::swap(LHS, RHS);
				1594	}
				1595
				1596	return X86CC != X86::COND_INVALID;
				1597	}
				1598
				1599	/// hasFPCMov - is there a floating point cmov for the specific X86 condition
				1600	/// code. Current x86 isa includes the following FP cmov instructions:
				1601	/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
				1602	static bool hasFPCMov(unsigned X86CC) {
				1603	switch (X86CC) {
				1604	default:
				1605	return false;
				1606	case X86::COND_B:
				1607	case X86::COND_BE:
				1608	case X86::COND_E:
				1609	case X86::COND_P:
				1610	case X86::COND_A:
				1611	case X86::COND_AE:
				1612	case X86::COND_NE:
				1613	case X86::COND_NP:
				1614	return true;
				1615	}
				1616	}
				1617
				1618	/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return
				1619	/// true if Op is undef or if its value falls within the specified range (L, H].
				1620	static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
				1621	if (Op.getOpcode() == ISD::UNDEF)
				1622	return true;
				1623
				1624	unsigned Val = cast<ConstantSDNode>(Op)->getValue();
				1625	return (Val >= Low && Val < Hi);
				1626	}
				1627
				1628	/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return
				1629	/// true if Op is undef or if its value equal to the specified value.
				1630	static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
				1631	if (Op.getOpcode() == ISD::UNDEF)
				1632	return true;
				1633	return cast<ConstantSDNode>(Op)->getValue() == Val;
				1634	}
				1635
				1636	/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
				1637	/// specifies a shuffle of elements that is suitable for input to PSHUFD.
				1638	bool X86::isPSHUFDMask(SDNode *N) {
				1639	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1640
Dan Gohman	7dc1901	2007-08-02 21:17:01 +0000	[diff] [blame]	1641	if (N->getNumOperands() != 2 && N->getNumOperands() != 4)
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1642	return false;
				1643
				1644	// Check if the value doesn't reference the second vector.
				1645	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
				1646	SDOperand Arg = N->getOperand(i);
				1647	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1648	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
Dan Gohman	7dc1901	2007-08-02 21:17:01 +0000	[diff] [blame]	1649	if (cast<ConstantSDNode>(Arg)->getValue() >= e)
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1650	return false;
				1651	}
				1652
				1653	return true;
				1654	}
				1655
				1656	/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
				1657	/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
				1658	bool X86::isPSHUFHWMask(SDNode *N) {
				1659	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1660
				1661	if (N->getNumOperands() != 8)
				1662	return false;
				1663
				1664	// Lower quadword copied in order.
				1665	for (unsigned i = 0; i != 4; ++i) {
				1666	SDOperand Arg = N->getOperand(i);
				1667	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1668	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1669	if (cast<ConstantSDNode>(Arg)->getValue() != i)
				1670	return false;
				1671	}
				1672
				1673	// Upper quadword shuffled.
				1674	for (unsigned i = 4; i != 8; ++i) {
				1675	SDOperand Arg = N->getOperand(i);
				1676	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1677	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1678	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1679	if (Val < 4 \|\| Val > 7)
				1680	return false;
				1681	}
				1682
				1683	return true;
				1684	}
				1685
				1686	/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
				1687	/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
				1688	bool X86::isPSHUFLWMask(SDNode *N) {
				1689	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1690
				1691	if (N->getNumOperands() != 8)
				1692	return false;
				1693
				1694	// Upper quadword copied in order.
				1695	for (unsigned i = 4; i != 8; ++i)
				1696	if (!isUndefOrEqual(N->getOperand(i), i))
				1697	return false;
				1698
				1699	// Lower quadword shuffled.
				1700	for (unsigned i = 0; i != 4; ++i)
				1701	if (!isUndefOrInRange(N->getOperand(i), 0, 4))
				1702	return false;
				1703
				1704	return true;
				1705	}
				1706
				1707	/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
				1708	/// specifies a shuffle of elements that is suitable for input to SHUFP*.
				1709	static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) {
				1710	if (NumElems != 2 && NumElems != 4) return false;
				1711
				1712	unsigned Half = NumElems / 2;
				1713	for (unsigned i = 0; i < Half; ++i)
				1714	if (!isUndefOrInRange(Elems[i], 0, NumElems))
				1715	return false;
				1716	for (unsigned i = Half; i < NumElems; ++i)
				1717	if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2))
				1718	return false;
				1719
				1720	return true;
				1721	}
				1722
				1723	bool X86::isSHUFPMask(SDNode *N) {
				1724	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1725	return ::isSHUFPMask(N->op_begin(), N->getNumOperands());
				1726	}
				1727
				1728	/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
				1729	/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
				1730	/// half elements to come from vector 1 (which would equal the dest.) and
				1731	/// the upper half to come from vector 2.
				1732	static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) {
				1733	if (NumOps != 2 && NumOps != 4) return false;
				1734
				1735	unsigned Half = NumOps / 2;
				1736	for (unsigned i = 0; i < Half; ++i)
				1737	if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2))
				1738	return false;
				1739	for (unsigned i = Half; i < NumOps; ++i)
				1740	if (!isUndefOrInRange(Ops[i], 0, NumOps))
				1741	return false;
				1742	return true;
				1743	}
				1744
				1745	static bool isCommutedSHUFP(SDNode *N) {
				1746	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1747	return isCommutedSHUFP(N->op_begin(), N->getNumOperands());
				1748	}
				1749
				1750	/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
				1751	/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
				1752	bool X86::isMOVHLPSMask(SDNode *N) {
				1753	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1754
				1755	if (N->getNumOperands() != 4)
				1756	return false;
				1757
				1758	// Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
				1759	return isUndefOrEqual(N->getOperand(0), 6) &&
				1760	isUndefOrEqual(N->getOperand(1), 7) &&
				1761	isUndefOrEqual(N->getOperand(2), 2) &&
				1762	isUndefOrEqual(N->getOperand(3), 3);
				1763	}
				1764
				1765	/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
				1766	/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
				1767	/// <2, 3, 2, 3>
				1768	bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) {
				1769	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1770
				1771	if (N->getNumOperands() != 4)
				1772	return false;
				1773
				1774	// Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3
				1775	return isUndefOrEqual(N->getOperand(0), 2) &&
				1776	isUndefOrEqual(N->getOperand(1), 3) &&
				1777	isUndefOrEqual(N->getOperand(2), 2) &&
				1778	isUndefOrEqual(N->getOperand(3), 3);
				1779	}
				1780
				1781	/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
				1782	/// specifies a shuffle of elements that is suitable for input to MOVLP{S\|D}.
				1783	bool X86::isMOVLPMask(SDNode *N) {
				1784	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1785
				1786	unsigned NumElems = N->getNumOperands();
				1787	if (NumElems != 2 && NumElems != 4)
				1788	return false;
				1789
				1790	for (unsigned i = 0; i < NumElems/2; ++i)
				1791	if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
				1792	return false;
				1793
				1794	for (unsigned i = NumElems/2; i < NumElems; ++i)
				1795	if (!isUndefOrEqual(N->getOperand(i), i))
				1796	return false;
				1797
				1798	return true;
				1799	}
				1800
				1801	/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
				1802	/// specifies a shuffle of elements that is suitable for input to MOVHP{S\|D}
				1803	/// and MOVLHPS.
				1804	bool X86::isMOVHPMask(SDNode *N) {
				1805	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1806
				1807	unsigned NumElems = N->getNumOperands();
				1808	if (NumElems != 2 && NumElems != 4)
				1809	return false;
				1810
				1811	for (unsigned i = 0; i < NumElems/2; ++i)
				1812	if (!isUndefOrEqual(N->getOperand(i), i))
				1813	return false;
				1814
				1815	for (unsigned i = 0; i < NumElems/2; ++i) {
				1816	SDOperand Arg = N->getOperand(i + NumElems/2);
				1817	if (!isUndefOrEqual(Arg, i + NumElems))
				1818	return false;
				1819	}
				1820
				1821	return true;
				1822	}
				1823
				1824	/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
				1825	/// specifies a shuffle of elements that is suitable for input to UNPCKL.
				1826	bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts,
				1827	bool V2IsSplat = false) {
				1828	if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
				1829	return false;
				1830
				1831	for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
				1832	SDOperand BitI = Elts[i];
				1833	SDOperand BitI1 = Elts[i+1];
				1834	if (!isUndefOrEqual(BitI, j))
				1835	return false;
				1836	if (V2IsSplat) {
				1837	if (isUndefOrEqual(BitI1, NumElts))
				1838	return false;
				1839	} else {
				1840	if (!isUndefOrEqual(BitI1, j + NumElts))
				1841	return false;
				1842	}
				1843	}
				1844
				1845	return true;
				1846	}
				1847
				1848	bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) {
				1849	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1850	return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
				1851	}
				1852
				1853	/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
				1854	/// specifies a shuffle of elements that is suitable for input to UNPCKH.
				1855	bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts,
				1856	bool V2IsSplat = false) {
				1857	if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
				1858	return false;
				1859
				1860	for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
				1861	SDOperand BitI = Elts[i];
				1862	SDOperand BitI1 = Elts[i+1];
				1863	if (!isUndefOrEqual(BitI, j + NumElts/2))
				1864	return false;
				1865	if (V2IsSplat) {
				1866	if (isUndefOrEqual(BitI1, NumElts))
				1867	return false;
				1868	} else {
				1869	if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts))
				1870	return false;
				1871	}
				1872	}
				1873
				1874	return true;
				1875	}
				1876
				1877	bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) {
				1878	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1879	return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
				1880	}
				1881
				1882	/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
				1883	/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
				1884	/// <0, 0, 1, 1>
				1885	bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
				1886	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1887
				1888	unsigned NumElems = N->getNumOperands();
				1889	if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
				1890	return false;
				1891
				1892	for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
				1893	SDOperand BitI = N->getOperand(i);
				1894	SDOperand BitI1 = N->getOperand(i+1);
				1895
				1896	if (!isUndefOrEqual(BitI, j))
				1897	return false;
				1898	if (!isUndefOrEqual(BitI1, j))
				1899	return false;
				1900	}
				1901
				1902	return true;
				1903	}
				1904
				1905	/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
				1906	/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
				1907	/// <2, 2, 3, 3>
				1908	bool X86::isUNPCKH_v_undef_Mask(SDNode *N) {
				1909	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1910
				1911	unsigned NumElems = N->getNumOperands();
				1912	if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
				1913	return false;
				1914
				1915	for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
				1916	SDOperand BitI = N->getOperand(i);
				1917	SDOperand BitI1 = N->getOperand(i + 1);
				1918
				1919	if (!isUndefOrEqual(BitI, j))
				1920	return false;
				1921	if (!isUndefOrEqual(BitI1, j))
				1922	return false;
				1923	}
				1924
				1925	return true;
				1926	}
				1927
				1928	/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
				1929	/// specifies a shuffle of elements that is suitable for input to MOVSS,
				1930	/// MOVSD, and MOVD, i.e. setting the lowest element.
				1931	static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) {
				1932	if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
				1933	return false;
				1934
				1935	if (!isUndefOrEqual(Elts[0], NumElts))
				1936	return false;
				1937
				1938	for (unsigned i = 1; i < NumElts; ++i) {
				1939	if (!isUndefOrEqual(Elts[i], i))
				1940	return false;
				1941	}
				1942
				1943	return true;
				1944	}
				1945
				1946	bool X86::isMOVLMask(SDNode *N) {
				1947	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1948	return ::isMOVLMask(N->op_begin(), N->getNumOperands());
				1949	}
				1950
				1951	/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
				1952	/// of what x86 movss want. X86 movs requires the lowest element to be lowest
				1953	/// element of vector 2 and the other elements to come from vector 1 in order.
				1954	static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps,
				1955	bool V2IsSplat = false,
				1956	bool V2IsUndef = false) {
				1957	if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
				1958	return false;
				1959
				1960	if (!isUndefOrEqual(Ops[0], 0))
				1961	return false;
				1962
				1963	for (unsigned i = 1; i < NumOps; ++i) {
				1964	SDOperand Arg = Ops[i];
				1965	if (!(isUndefOrEqual(Arg, i+NumOps) \|\|
				1966	(V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) \|\|
				1967	(V2IsSplat && isUndefOrEqual(Arg, NumOps))))
				1968	return false;
				1969	}
				1970
				1971	return true;
				1972	}
				1973
				1974	static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false,
				1975	bool V2IsUndef = false) {
				1976	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1977	return isCommutedMOVL(N->op_begin(), N->getNumOperands(),
				1978	V2IsSplat, V2IsUndef);
				1979	}
				1980
				1981	/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
				1982	/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
				1983	bool X86::isMOVSHDUPMask(SDNode *N) {
				1984	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1985
				1986	if (N->getNumOperands() != 4)
				1987	return false;
				1988
				1989	// Expect 1, 1, 3, 3
				1990	for (unsigned i = 0; i < 2; ++i) {
				1991	SDOperand Arg = N->getOperand(i);
				1992	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1993	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1994	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1995	if (Val != 1) return false;
				1996	}
				1997
				1998	bool HasHi = false;
				1999	for (unsigned i = 2; i < 4; ++i) {
				2000	SDOperand Arg = N->getOperand(i);
				2001	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2002	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2003	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2004	if (Val != 3) return false;
				2005	HasHi = true;
				2006	}
				2007
				2008	// Don't use movshdup if it can be done with a shufps.
				2009	return HasHi;
				2010	}
				2011
				2012	/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
				2013	/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
				2014	bool X86::isMOVSLDUPMask(SDNode *N) {
				2015	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2016
				2017	if (N->getNumOperands() != 4)
				2018	return false;
				2019
				2020	// Expect 0, 0, 2, 2
				2021	for (unsigned i = 0; i < 2; ++i) {
				2022	SDOperand Arg = N->getOperand(i);
				2023	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2024	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2025	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2026	if (Val != 0) return false;
				2027	}
				2028
				2029	bool HasHi = false;
				2030	for (unsigned i = 2; i < 4; ++i) {
				2031	SDOperand Arg = N->getOperand(i);
				2032	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2033	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2034	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2035	if (Val != 2) return false;
				2036	HasHi = true;
				2037	}
				2038
				2039	// Don't use movshdup if it can be done with a shufps.
				2040	return HasHi;
				2041	}
				2042
				2043	/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand
				2044	/// specifies a identity operation on the LHS or RHS.
				2045	static bool isIdentityMask(SDNode *N, bool RHS = false) {
				2046	unsigned NumElems = N->getNumOperands();
				2047	for (unsigned i = 0; i < NumElems; ++i)
				2048	if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0)))
				2049	return false;
				2050	return true;
				2051	}
				2052
				2053	/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
				2054	/// a splat of a single element.
				2055	static bool isSplatMask(SDNode *N) {
				2056	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2057
				2058	// This is a splat operation if each element of the permute is the same, and
				2059	// if the value doesn't reference the second vector.
				2060	unsigned NumElems = N->getNumOperands();
				2061	SDOperand ElementBase;
				2062	unsigned i = 0;
				2063	for (; i != NumElems; ++i) {
				2064	SDOperand Elt = N->getOperand(i);
				2065	if (isa<ConstantSDNode>(Elt)) {
				2066	ElementBase = Elt;
				2067	break;
				2068	}
				2069	}
				2070
				2071	if (!ElementBase.Val)
				2072	return false;
				2073
				2074	for (; i != NumElems; ++i) {
				2075	SDOperand Arg = N->getOperand(i);
				2076	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2077	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2078	if (Arg != ElementBase) return false;
				2079	}
				2080
				2081	// Make sure it is a splat of the first vector operand.
				2082	return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems;
				2083	}
				2084
				2085	/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
				2086	/// a splat of a single element and it's a 2 or 4 element mask.
				2087	bool X86::isSplatMask(SDNode *N) {
				2088	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2089
				2090	// We can only splat 64-bit, and 32-bit quantities with a single instruction.
				2091	if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
				2092	return false;
				2093	return ::isSplatMask(N);
				2094	}
				2095
				2096	/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
				2097	/// specifies a splat of zero element.
				2098	bool X86::isSplatLoMask(SDNode *N) {
				2099	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2100
				2101	for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
				2102	if (!isUndefOrEqual(N->getOperand(i), 0))
				2103	return false;
				2104	return true;
				2105	}
				2106
				2107	/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
				2108	/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
				2109	/// instructions.
				2110	unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
				2111	unsigned NumOperands = N->getNumOperands();
				2112	unsigned Shift = (NumOperands == 4) ? 2 : 1;
				2113	unsigned Mask = 0;
				2114	for (unsigned i = 0; i < NumOperands; ++i) {
				2115	unsigned Val = 0;
				2116	SDOperand Arg = N->getOperand(NumOperands-i-1);
				2117	if (Arg.getOpcode() != ISD::UNDEF)
				2118	Val = cast<ConstantSDNode>(Arg)->getValue();
				2119	if (Val >= NumOperands) Val -= NumOperands;
				2120	Mask \|= Val;
				2121	if (i != NumOperands - 1)
				2122	Mask <<= Shift;
				2123	}
				2124
				2125	return Mask;
				2126	}
				2127
				2128	/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
				2129	/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
				2130	/// instructions.
				2131	unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
				2132	unsigned Mask = 0;
				2133	// 8 nodes, but we only care about the last 4.
				2134	for (unsigned i = 7; i >= 4; --i) {
				2135	unsigned Val = 0;
				2136	SDOperand Arg = N->getOperand(i);
				2137	if (Arg.getOpcode() != ISD::UNDEF)
				2138	Val = cast<ConstantSDNode>(Arg)->getValue();
				2139	Mask \|= (Val - 4);
				2140	if (i != 4)
				2141	Mask <<= 2;
				2142	}
				2143
				2144	return Mask;
				2145	}
				2146
				2147	/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
				2148	/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
				2149	/// instructions.
				2150	unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
				2151	unsigned Mask = 0;
				2152	// 8 nodes, but we only care about the first 4.
				2153	for (int i = 3; i >= 0; --i) {
				2154	unsigned Val = 0;
				2155	SDOperand Arg = N->getOperand(i);
				2156	if (Arg.getOpcode() != ISD::UNDEF)
				2157	Val = cast<ConstantSDNode>(Arg)->getValue();
				2158	Mask \|= Val;
				2159	if (i != 0)
				2160	Mask <<= 2;
				2161	}
				2162
				2163	return Mask;
				2164	}
				2165
				2166	/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
				2167	/// specifies a 8 element shuffle that can be broken into a pair of
				2168	/// PSHUFHW and PSHUFLW.
				2169	static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
				2170	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2171
				2172	if (N->getNumOperands() != 8)
				2173	return false;
				2174
				2175	// Lower quadword shuffled.
				2176	for (unsigned i = 0; i != 4; ++i) {
				2177	SDOperand Arg = N->getOperand(i);
				2178	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2179	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2180	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2181	if (Val > 4)
				2182	return false;
				2183	}
				2184
				2185	// Upper quadword shuffled.
				2186	for (unsigned i = 4; i != 8; ++i) {
				2187	SDOperand Arg = N->getOperand(i);
				2188	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2189	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2190	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2191	if (Val < 4 \|\| Val > 7)
				2192	return false;
				2193	}
				2194
				2195	return true;
				2196	}
				2197
				2198	/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
				2199	/// values in ther permute mask.
				2200	static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1,
				2201	SDOperand &V2, SDOperand &Mask,
				2202	SelectionDAG &DAG) {
				2203	MVT::ValueType VT = Op.getValueType();
				2204	MVT::ValueType MaskVT = Mask.getValueType();
				2205	MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT);
				2206	unsigned NumElems = Mask.getNumOperands();
				2207	SmallVector<SDOperand, 8> MaskVec;
				2208
				2209	for (unsigned i = 0; i != NumElems; ++i) {
				2210	SDOperand Arg = Mask.getOperand(i);
				2211	if (Arg.getOpcode() == ISD::UNDEF) {
				2212	MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
				2213	continue;
				2214	}
				2215	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2216	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2217	if (Val < NumElems)
				2218	MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
				2219	else
				2220	MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
				2221	}
				2222
				2223	std::swap(V1, V2);
				2224	Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2225	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2226	}
				2227
				2228	/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
				2229	/// match movhlps. The lower half elements should come from upper half of
				2230	/// V1 (and in order), and the upper half elements should come from the upper
				2231	/// half of V2 (and in order).
				2232	static bool ShouldXformToMOVHLPS(SDNode *Mask) {
				2233	unsigned NumElems = Mask->getNumOperands();
				2234	if (NumElems != 4)
				2235	return false;
				2236	for (unsigned i = 0, e = 2; i != e; ++i)
				2237	if (!isUndefOrEqual(Mask->getOperand(i), i+2))
				2238	return false;
				2239	for (unsigned i = 2; i != 4; ++i)
				2240	if (!isUndefOrEqual(Mask->getOperand(i), i+4))
				2241	return false;
				2242	return true;
				2243	}
				2244
				2245	/// isScalarLoadToVector - Returns true if the node is a scalar load that
				2246	/// is promoted to a vector.
				2247	static inline bool isScalarLoadToVector(SDNode *N) {
				2248	if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
				2249	N = N->getOperand(0).Val;
				2250	return ISD::isNON_EXTLoad(N);
				2251	}
				2252	return false;
				2253	}
				2254
				2255	/// ShouldXformToMOVLP{S\|D} - Return true if the node should be transformed to
				2256	/// match movlp{s\|d}. The lower half elements should come from lower half of
				2257	/// V1 (and in order), and the upper half elements should come from the upper
				2258	/// half of V2 (and in order). And since V1 will become the source of the
				2259	/// MOVLP, it must be either a vector load or a scalar load to vector.
				2260	static bool ShouldXformToMOVLP(SDNode V1, SDNode V2, SDNode *Mask) {
				2261	if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
				2262	return false;
				2263	// Is V2 is a vector load, don't do this transformation. We will try to use
				2264	// load folding shufps op.
				2265	if (ISD::isNON_EXTLoad(V2))
				2266	return false;
				2267
				2268	unsigned NumElems = Mask->getNumOperands();
				2269	if (NumElems != 2 && NumElems != 4)
				2270	return false;
				2271	for (unsigned i = 0, e = NumElems/2; i != e; ++i)
				2272	if (!isUndefOrEqual(Mask->getOperand(i), i))
				2273	return false;
				2274	for (unsigned i = NumElems/2; i != NumElems; ++i)
				2275	if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
				2276	return false;
				2277	return true;
				2278	}
				2279
				2280	/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
				2281	/// all the same.
				2282	static bool isSplatVector(SDNode *N) {
				2283	if (N->getOpcode() != ISD::BUILD_VECTOR)
				2284	return false;
				2285
				2286	SDOperand SplatValue = N->getOperand(0);
				2287	for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
				2288	if (N->getOperand(i) != SplatValue)
				2289	return false;
				2290	return true;
				2291	}
				2292
				2293	/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
				2294	/// to an undef.
				2295	static bool isUndefShuffle(SDNode *N) {
				2296	if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
				2297	return false;
				2298
				2299	SDOperand V1 = N->getOperand(0);
				2300	SDOperand V2 = N->getOperand(1);
				2301	SDOperand Mask = N->getOperand(2);
				2302	unsigned NumElems = Mask.getNumOperands();
				2303	for (unsigned i = 0; i != NumElems; ++i) {
				2304	SDOperand Arg = Mask.getOperand(i);
				2305	if (Arg.getOpcode() != ISD::UNDEF) {
				2306	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2307	if (Val < NumElems && V1.getOpcode() != ISD::UNDEF)
				2308	return false;
				2309	else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF)
				2310	return false;
				2311	}
				2312	}
				2313	return true;
				2314	}
				2315
				2316	/// isZeroNode - Returns true if Elt is a constant zero or a floating point
				2317	/// constant +0.0.
				2318	static inline bool isZeroNode(SDOperand Elt) {
				2319	return ((isa<ConstantSDNode>(Elt) &&
				2320	cast<ConstantSDNode>(Elt)->getValue() == 0) \|\|
				2321	(isa<ConstantFPSDNode>(Elt) &&
Dale Johannesen	df8a831	2007-08-31 04:03:46 +0000	[diff] [blame]	2322	cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2323	}
				2324
				2325	/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
				2326	/// to an zero vector.
				2327	static bool isZeroShuffle(SDNode *N) {
				2328	if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
				2329	return false;
				2330
				2331	SDOperand V1 = N->getOperand(0);
				2332	SDOperand V2 = N->getOperand(1);
				2333	SDOperand Mask = N->getOperand(2);
				2334	unsigned NumElems = Mask.getNumOperands();
				2335	for (unsigned i = 0; i != NumElems; ++i) {
				2336	SDOperand Arg = Mask.getOperand(i);
				2337	if (Arg.getOpcode() != ISD::UNDEF) {
				2338	unsigned Idx = cast<ConstantSDNode>(Arg)->getValue();
				2339	if (Idx < NumElems) {
				2340	unsigned Opc = V1.Val->getOpcode();
				2341	if (Opc == ISD::UNDEF)
				2342	continue;
				2343	if (Opc != ISD::BUILD_VECTOR \|\|
				2344	!isZeroNode(V1.Val->getOperand(Idx)))
				2345	return false;
				2346	} else if (Idx >= NumElems) {
				2347	unsigned Opc = V2.Val->getOpcode();
				2348	if (Opc == ISD::UNDEF)
				2349	continue;
				2350	if (Opc != ISD::BUILD_VECTOR \|\|
				2351	!isZeroNode(V2.Val->getOperand(Idx - NumElems)))
				2352	return false;
				2353	}
				2354	}
				2355	}
				2356	return true;
				2357	}
				2358
				2359	/// getZeroVector - Returns a vector of specified type with all zero elements.
				2360	///
				2361	static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
				2362	assert(MVT::isVector(VT) && "Expected a vector type");
				2363	unsigned NumElems = MVT::getVectorNumElements(VT);
				2364	MVT::ValueType EVT = MVT::getVectorElementType(VT);
				2365	bool isFP = MVT::isFloatingPoint(EVT);
				2366	SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
				2367	SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero);
				2368	return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
				2369	}
				2370
				2371	/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
				2372	/// that point to V2 points to its first element.
				2373	static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
				2374	assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
				2375
				2376	bool Changed = false;
				2377	SmallVector<SDOperand, 8> MaskVec;
				2378	unsigned NumElems = Mask.getNumOperands();
				2379	for (unsigned i = 0; i != NumElems; ++i) {
				2380	SDOperand Arg = Mask.getOperand(i);
				2381	if (Arg.getOpcode() != ISD::UNDEF) {
				2382	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2383	if (Val > NumElems) {
				2384	Arg = DAG.getConstant(NumElems, Arg.getValueType());
				2385	Changed = true;
				2386	}
				2387	}
				2388	MaskVec.push_back(Arg);
				2389	}
				2390
				2391	if (Changed)
				2392	Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
				2393	&MaskVec[0], MaskVec.size());
				2394	return Mask;
				2395	}
				2396
				2397	/// getMOVLMask - Returns a vector_shuffle mask for an movs{s\|d}, movd
				2398	/// operation of specified width.
				2399	static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
				2400	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2401	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2402
				2403	SmallVector<SDOperand, 8> MaskVec;
				2404	MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
				2405	for (unsigned i = 1; i != NumElems; ++i)
				2406	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2407	return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2408	}
				2409
				2410	/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
				2411	/// of specified width.
				2412	static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
				2413	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2414	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2415	SmallVector<SDOperand, 8> MaskVec;
				2416	for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
				2417	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2418	MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
				2419	}
				2420	return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2421	}
				2422
				2423	/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
				2424	/// of specified width.
				2425	static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
				2426	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2427	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2428	unsigned Half = NumElems/2;
				2429	SmallVector<SDOperand, 8> MaskVec;
				2430	for (unsigned i = 0; i != Half; ++i) {
				2431	MaskVec.push_back(DAG.getConstant(i + Half, BaseVT));
				2432	MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
				2433	}
				2434	return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2435	}
				2436
				2437	/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
				2438	///
				2439	static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
				2440	SDOperand V1 = Op.getOperand(0);
				2441	SDOperand Mask = Op.getOperand(2);
				2442	MVT::ValueType VT = Op.getValueType();
				2443	unsigned NumElems = Mask.getNumOperands();
				2444	Mask = getUnpacklMask(NumElems, DAG);
				2445	while (NumElems != 4) {
				2446	V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
				2447	NumElems >>= 1;
				2448	}
				2449	V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
				2450
				2451	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				2452	Mask = getZeroVector(MaskVT, DAG);
				2453	SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
				2454	DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
				2455	return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
				2456	}
				2457
				2458	/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
				2459	/// vector of zero or undef vector.
				2460	static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
				2461	unsigned NumElems, unsigned Idx,
				2462	bool isZero, SelectionDAG &DAG) {
				2463	SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
				2464	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2465	MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
				2466	SDOperand Zero = DAG.getConstant(0, EVT);
				2467	SmallVector<SDOperand, 8> MaskVec(NumElems, Zero);
				2468	MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
				2469	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2470	&MaskVec[0], MaskVec.size());
				2471	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2472	}
				2473
				2474	/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
				2475	///
				2476	static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
				2477	unsigned NumNonZero, unsigned NumZero,
				2478	SelectionDAG &DAG, TargetLowering &TLI) {
				2479	if (NumNonZero > 8)
				2480	return SDOperand();
				2481
				2482	SDOperand V(0, 0);
				2483	bool First = true;
				2484	for (unsigned i = 0; i < 16; ++i) {
				2485	bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
				2486	if (ThisIsNonZero && First) {
				2487	if (NumZero)
				2488	V = getZeroVector(MVT::v8i16, DAG);
				2489	else
				2490	V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
				2491	First = false;
				2492	}
				2493
				2494	if ((i & 1) != 0) {
				2495	SDOperand ThisElt(0, 0), LastElt(0, 0);
				2496	bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
				2497	if (LastIsNonZero) {
				2498	LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
				2499	}
				2500	if (ThisIsNonZero) {
				2501	ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
				2502	ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
				2503	ThisElt, DAG.getConstant(8, MVT::i8));
				2504	if (LastIsNonZero)
				2505	ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
				2506	} else
				2507	ThisElt = LastElt;
				2508
				2509	if (ThisElt.Val)
				2510	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
				2511	DAG.getConstant(i/2, TLI.getPointerTy()));
				2512	}
				2513	}
				2514
				2515	return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
				2516	}
				2517
				2518	/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
				2519	///
				2520	static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
				2521	unsigned NumNonZero, unsigned NumZero,
				2522	SelectionDAG &DAG, TargetLowering &TLI) {
				2523	if (NumNonZero > 4)
				2524	return SDOperand();
				2525
				2526	SDOperand V(0, 0);
				2527	bool First = true;
				2528	for (unsigned i = 0; i < 8; ++i) {
				2529	bool isNonZero = (NonZeros & (1 << i)) != 0;
				2530	if (isNonZero) {
				2531	if (First) {
				2532	if (NumZero)
				2533	V = getZeroVector(MVT::v8i16, DAG);
				2534	else
				2535	V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
				2536	First = false;
				2537	}
				2538	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
				2539	DAG.getConstant(i, TLI.getPointerTy()));
				2540	}
				2541	}
				2542
				2543	return V;
				2544	}
				2545
				2546	SDOperand
				2547	X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				2548	// All zero's are handled with pxor.
				2549	if (ISD::isBuildVectorAllZeros(Op.Val))
				2550	return Op;
				2551
				2552	// All one's are handled with pcmpeqd.
				2553	if (ISD::isBuildVectorAllOnes(Op.Val))
				2554	return Op;
				2555
				2556	MVT::ValueType VT = Op.getValueType();
				2557	MVT::ValueType EVT = MVT::getVectorElementType(VT);
				2558	unsigned EVTBits = MVT::getSizeInBits(EVT);
				2559
				2560	unsigned NumElems = Op.getNumOperands();
				2561	unsigned NumZero = 0;
				2562	unsigned NumNonZero = 0;
				2563	unsigned NonZeros = 0;
Dan Gohman	2146324	2007-07-24 22:55:08 +0000	[diff] [blame]	2564	unsigned NumNonZeroImms = 0;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2565	std::set<SDOperand> Values;
				2566	for (unsigned i = 0; i < NumElems; ++i) {
				2567	SDOperand Elt = Op.getOperand(i);
				2568	if (Elt.getOpcode() != ISD::UNDEF) {
				2569	Values.insert(Elt);
				2570	if (isZeroNode(Elt))
				2571	NumZero++;
				2572	else {
				2573	NonZeros \|= (1 << i);
				2574	NumNonZero++;
Dan Gohman	2146324	2007-07-24 22:55:08 +0000	[diff] [blame]	2575	if (Elt.getOpcode() == ISD::Constant \|\|
				2576	Elt.getOpcode() == ISD::ConstantFP)
				2577	NumNonZeroImms++;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2578	}
				2579	}
				2580	}
				2581
				2582	if (NumNonZero == 0) {
				2583	if (NumZero == 0)
				2584	// All undef vector. Return an UNDEF.
				2585	return DAG.getNode(ISD::UNDEF, VT);
				2586	else
				2587	// A mix of zero and undef. Return a zero vector.
				2588	return getZeroVector(VT, DAG);
				2589	}
				2590
				2591	// Splat is obviously ok. Let legalizer expand it to a shuffle.
				2592	if (Values.size() == 1)
				2593	return SDOperand();
				2594
				2595	// Special case for single non-zero element.
				2596	if (NumNonZero == 1) {
				2597	unsigned Idx = CountTrailingZeros_32(NonZeros);
				2598	SDOperand Item = Op.getOperand(Idx);
				2599	Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
				2600	if (Idx == 0)
				2601	// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
				2602	return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
				2603	NumZero > 0, DAG);
				2604
				2605	if (EVTBits == 32) {
				2606	// Turn it into a shuffle of zero and zero-extended scalar to vector.
				2607	Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
				2608	DAG);
				2609	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2610	MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
				2611	SmallVector<SDOperand, 8> MaskVec;
				2612	for (unsigned i = 0; i < NumElems; i++)
				2613	MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
				2614	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2615	&MaskVec[0], MaskVec.size());
				2616	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
				2617	DAG.getNode(ISD::UNDEF, VT), Mask);
				2618	}
				2619	}
				2620
Dan Gohman	2146324	2007-07-24 22:55:08 +0000	[diff] [blame]	2621	// A vector full of immediates; various special cases are already
				2622	// handled, so this is best done with a single constant-pool load.
				2623	if (NumNonZero == NumNonZeroImms)
				2624	return SDOperand();
				2625
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2626	// Let legalizer expand 2-wide build_vectors.
				2627	if (EVTBits == 64)
				2628	return SDOperand();
				2629
				2630	// If element VT is < 32 bits, convert it to inserts into a zero vector.
				2631	if (EVTBits == 8 && NumElems == 16) {
				2632	SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
				2633	*this);
				2634	if (V.Val) return V;
				2635	}
				2636
				2637	if (EVTBits == 16 && NumElems == 8) {
				2638	SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
				2639	*this);
				2640	if (V.Val) return V;
				2641	}
				2642
				2643	// If element VT is == 32 bits, turn it into a number of shuffles.
				2644	SmallVector<SDOperand, 8> V;
				2645	V.resize(NumElems);
				2646	if (NumElems == 4 && NumZero > 0) {
				2647	for (unsigned i = 0; i < 4; ++i) {
				2648	bool isZero = !(NonZeros & (1 << i));
				2649	if (isZero)
				2650	V[i] = getZeroVector(VT, DAG);
				2651	else
				2652	V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
				2653	}
				2654
				2655	for (unsigned i = 0; i < 2; ++i) {
				2656	switch ((NonZeros & (0x3 << i2)) >> (i2)) {
				2657	default: break;
				2658	case 0:
				2659	V[i] = V[i*2]; // Must be a zero vector.
				2660	break;
				2661	case 1:
				2662	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i2+1], V[i2],
				2663	getMOVLMask(NumElems, DAG));
				2664	break;
				2665	case 2:
				2666	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i2], V[i2+1],
				2667	getMOVLMask(NumElems, DAG));
				2668	break;
				2669	case 3:
				2670	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i2], V[i2+1],
				2671	getUnpacklMask(NumElems, DAG));
				2672	break;
				2673	}
				2674	}
				2675
				2676	// Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
				2677	// clears the upper bits.
				2678	// FIXME: we can do the same for v4f32 case when we know both parts of
				2679	// the lower half come from scalar_to_vector (loadf32). We should do
				2680	// that in post legalizer dag combiner with target specific hooks.
				2681	if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
				2682	return V[0];
				2683	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2684	MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
				2685	SmallVector<SDOperand, 8> MaskVec;
				2686	bool Reverse = (NonZeros & 0x3) == 2;
				2687	for (unsigned i = 0; i < 2; ++i)
				2688	if (Reverse)
				2689	MaskVec.push_back(DAG.getConstant(1-i, EVT));
				2690	else
				2691	MaskVec.push_back(DAG.getConstant(i, EVT));
				2692	Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
				2693	for (unsigned i = 0; i < 2; ++i)
				2694	if (Reverse)
				2695	MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
				2696	else
				2697	MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
				2698	SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2699	&MaskVec[0], MaskVec.size());
				2700	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
				2701	}
				2702
				2703	if (Values.size() > 2) {
				2704	// Expand into a number of unpckl*.
				2705	// e.g. for v4f32
				2706	// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
				2707	// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
				2708	// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
				2709	SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
				2710	for (unsigned i = 0; i < NumElems; ++i)
				2711	V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
				2712	NumElems >>= 1;
				2713	while (NumElems != 0) {
				2714	for (unsigned i = 0; i < NumElems; ++i)
				2715	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
				2716	UnpckMask);
				2717	NumElems >>= 1;
				2718	}
				2719	return V[0];
				2720	}
				2721
				2722	return SDOperand();
				2723	}
				2724
				2725	SDOperand
				2726	X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
				2727	SDOperand V1 = Op.getOperand(0);
				2728	SDOperand V2 = Op.getOperand(1);
				2729	SDOperand PermMask = Op.getOperand(2);
				2730	MVT::ValueType VT = Op.getValueType();
				2731	unsigned NumElems = PermMask.getNumOperands();
				2732	bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
				2733	bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
				2734	bool V1IsSplat = false;
				2735	bool V2IsSplat = false;
				2736
				2737	if (isUndefShuffle(Op.Val))
				2738	return DAG.getNode(ISD::UNDEF, VT);
				2739
				2740	if (isZeroShuffle(Op.Val))
				2741	return getZeroVector(VT, DAG);
				2742
				2743	if (isIdentityMask(PermMask.Val))
				2744	return V1;
				2745	else if (isIdentityMask(PermMask.Val, true))
				2746	return V2;
				2747
				2748	if (isSplatMask(PermMask.Val)) {
				2749	if (NumElems <= 4) return Op;
				2750	// Promote it to a v4i32 splat.
				2751	return PromoteSplat(Op, DAG);
				2752	}
				2753
				2754	if (X86::isMOVLMask(PermMask.Val))
				2755	return (V1IsUndef) ? V2 : Op;
				2756
				2757	if (X86::isMOVSHDUPMask(PermMask.Val) \|\|
				2758	X86::isMOVSLDUPMask(PermMask.Val) \|\|
				2759	X86::isMOVHLPSMask(PermMask.Val) \|\|
				2760	X86::isMOVHPMask(PermMask.Val) \|\|
				2761	X86::isMOVLPMask(PermMask.Val))
				2762	return Op;
				2763
				2764	if (ShouldXformToMOVHLPS(PermMask.Val) \|\|
				2765	ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
				2766	return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2767
				2768	bool Commuted = false;
				2769	V1IsSplat = isSplatVector(V1.Val);
				2770	V2IsSplat = isSplatVector(V2.Val);
				2771	if ((V1IsSplat \|\| V1IsUndef) && !(V2IsSplat \|\| V2IsUndef)) {
				2772	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2773	std::swap(V1IsSplat, V2IsSplat);
				2774	std::swap(V1IsUndef, V2IsUndef);
				2775	Commuted = true;
				2776	}
				2777
				2778	if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
				2779	if (V2IsUndef) return V1;
				2780	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2781	if (V2IsSplat) {
				2782	// V2 is a splat, so the mask may be malformed. That is, it may point
				2783	// to any V2 element. The instruction selectior won't like this. Get
				2784	// a corrected mask and commute to form a proper MOVS{S\|D}.
				2785	SDOperand NewMask = getMOVLMask(NumElems, DAG);
				2786	if (NewMask.Val != PermMask.Val)
				2787	Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
				2788	}
				2789	return Op;
				2790	}
				2791
				2792	if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) \|\|
				2793	X86::isUNPCKH_v_undef_Mask(PermMask.Val) \|\|
				2794	X86::isUNPCKLMask(PermMask.Val) \|\|
				2795	X86::isUNPCKHMask(PermMask.Val))
				2796	return Op;
				2797
				2798	if (V2IsSplat) {
				2799	// Normalize mask so all entries that point to V2 points to its first
				2800	// element then try to match unpck{h\|l} again. If match, return a
				2801	// new vector_shuffle with the corrected mask.
				2802	SDOperand NewMask = NormalizeMask(PermMask, DAG);
				2803	if (NewMask.Val != PermMask.Val) {
				2804	if (X86::isUNPCKLMask(PermMask.Val, true)) {
				2805	SDOperand NewMask = getUnpacklMask(NumElems, DAG);
				2806	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
				2807	} else if (X86::isUNPCKHMask(PermMask.Val, true)) {
				2808	SDOperand NewMask = getUnpackhMask(NumElems, DAG);
				2809	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
				2810	}
				2811	}
				2812	}
				2813
				2814	// Normalize the node to match x86 shuffle ops if needed
				2815	if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val))
				2816	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2817
				2818	if (Commuted) {
				2819	// Commute is back and try unpck* again.
				2820	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2821	if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) \|\|
				2822	X86::isUNPCKH_v_undef_Mask(PermMask.Val) \|\|
				2823	X86::isUNPCKLMask(PermMask.Val) \|\|
				2824	X86::isUNPCKHMask(PermMask.Val))
				2825	return Op;
				2826	}
				2827
				2828	// If VT is integer, try PSHUF* first, then SHUFP*.
				2829	if (MVT::isInteger(VT)) {
Dan Gohman	7dc1901	2007-08-02 21:17:01 +0000	[diff] [blame]	2830	// MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically
				2831	// possible to shuffle a v2i32 using PSHUFW, that's not yet implemented.
				2832	if (((MVT::getSizeInBits(VT) != 64 \|\| NumElems == 4) &&
				2833	X86::isPSHUFDMask(PermMask.Val)) \|\|
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2834	X86::isPSHUFHWMask(PermMask.Val) \|\|
				2835	X86::isPSHUFLWMask(PermMask.Val)) {
				2836	if (V2.getOpcode() != ISD::UNDEF)
				2837	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
				2838	DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
				2839	return Op;
				2840	}
				2841
				2842	if (X86::isSHUFPMask(PermMask.Val) &&
				2843	MVT::getSizeInBits(VT) != 64) // Don't do this for MMX.
				2844	return Op;
				2845
				2846	// Handle v8i16 shuffle high / low shuffle node pair.
				2847	if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
				2848	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2849	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2850	SmallVector<SDOperand, 8> MaskVec;
				2851	for (unsigned i = 0; i != 4; ++i)
				2852	MaskVec.push_back(PermMask.getOperand(i));
				2853	for (unsigned i = 4; i != 8; ++i)
				2854	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2855	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2856	&MaskVec[0], MaskVec.size());
				2857	V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2858	MaskVec.clear();
				2859	for (unsigned i = 0; i != 4; ++i)
				2860	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2861	for (unsigned i = 4; i != 8; ++i)
				2862	MaskVec.push_back(PermMask.getOperand(i));
				2863	Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
				2864	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2865	}
				2866	} else {
				2867	// Floating point cases in the other order.
				2868	if (X86::isSHUFPMask(PermMask.Val))
				2869	return Op;
				2870	if (X86::isPSHUFDMask(PermMask.Val) \|\|
				2871	X86::isPSHUFHWMask(PermMask.Val) \|\|
				2872	X86::isPSHUFLWMask(PermMask.Val)) {
				2873	if (V2.getOpcode() != ISD::UNDEF)
				2874	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
				2875	DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
				2876	return Op;
				2877	}
				2878	}
				2879
				2880	if (NumElems == 4 &&
				2881	// Don't do this for MMX.
				2882	MVT::getSizeInBits(VT) != 64) {
				2883	MVT::ValueType MaskVT = PermMask.getValueType();
				2884	MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
				2885	SmallVector<std::pair<int, int>, 8> Locs;
				2886	Locs.reserve(NumElems);
				2887	SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2888	SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2889	unsigned NumHi = 0;
				2890	unsigned NumLo = 0;
				2891	// If no more than two elements come from either vector. This can be
				2892	// implemented with two shuffles. First shuffle gather the elements.
				2893	// The second shuffle, which takes the first shuffle as both of its
				2894	// vector operands, put the elements into the right order.
				2895	for (unsigned i = 0; i != NumElems; ++i) {
				2896	SDOperand Elt = PermMask.getOperand(i);
				2897	if (Elt.getOpcode() == ISD::UNDEF) {
				2898	Locs[i] = std::make_pair(-1, -1);
				2899	} else {
				2900	unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
				2901	if (Val < NumElems) {
				2902	Locs[i] = std::make_pair(0, NumLo);
				2903	Mask1[NumLo] = Elt;
				2904	NumLo++;
				2905	} else {
				2906	Locs[i] = std::make_pair(1, NumHi);
				2907	if (2+NumHi < NumElems)
				2908	Mask1[2+NumHi] = Elt;
				2909	NumHi++;
				2910	}
				2911	}
				2912	}
				2913	if (NumLo <= 2 && NumHi <= 2) {
				2914	V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
				2915	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2916	&Mask1[0], Mask1.size()));
				2917	for (unsigned i = 0; i != NumElems; ++i) {
				2918	if (Locs[i].first == -1)
				2919	continue;
				2920	else {
				2921	unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
				2922	Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
				2923	Mask2[i] = DAG.getConstant(Idx, MaskEVT);
				2924	}
				2925	}
				2926
				2927	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
				2928	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2929	&Mask2[0], Mask2.size()));
				2930	}
				2931
				2932	// Break it into (shuffle shuffle_hi, shuffle_lo).
				2933	Locs.clear();
				2934	SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2935	SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2936	SmallVector<SDOperand,8> *MaskPtr = &LoMask;
				2937	unsigned MaskIdx = 0;
				2938	unsigned LoIdx = 0;
				2939	unsigned HiIdx = NumElems/2;
				2940	for (unsigned i = 0; i != NumElems; ++i) {
				2941	if (i == NumElems/2) {
				2942	MaskPtr = &HiMask;
				2943	MaskIdx = 1;
				2944	LoIdx = 0;
				2945	HiIdx = NumElems/2;
				2946	}
				2947	SDOperand Elt = PermMask.getOperand(i);
				2948	if (Elt.getOpcode() == ISD::UNDEF) {
				2949	Locs[i] = std::make_pair(-1, -1);
				2950	} else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
				2951	Locs[i] = std::make_pair(MaskIdx, LoIdx);
				2952	(*MaskPtr)[LoIdx] = Elt;
				2953	LoIdx++;
				2954	} else {
				2955	Locs[i] = std::make_pair(MaskIdx, HiIdx);
				2956	(*MaskPtr)[HiIdx] = Elt;
				2957	HiIdx++;
				2958	}
				2959	}
				2960
				2961	SDOperand LoShuffle =
				2962	DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
				2963	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2964	&LoMask[0], LoMask.size()));
				2965	SDOperand HiShuffle =
				2966	DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
				2967	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2968	&HiMask[0], HiMask.size()));
				2969	SmallVector<SDOperand, 8> MaskOps;
				2970	for (unsigned i = 0; i != NumElems; ++i) {
				2971	if (Locs[i].first == -1) {
				2972	MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
				2973	} else {
				2974	unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
				2975	MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
				2976	}
				2977	}
				2978	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
				2979	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2980	&MaskOps[0], MaskOps.size()));
				2981	}
				2982
				2983	return SDOperand();
				2984	}
				2985
				2986	SDOperand
				2987	X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2988	if (!isa<ConstantSDNode>(Op.getOperand(1)))
				2989	return SDOperand();
				2990
				2991	MVT::ValueType VT = Op.getValueType();
				2992	// TODO: handle v16i8.
				2993	if (MVT::getSizeInBits(VT) == 16) {
				2994	// Transform it so it match pextrw which produces a 32-bit result.
				2995	MVT::ValueType EVT = (MVT::ValueType)(VT+1);
				2996	SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
				2997	Op.getOperand(0), Op.getOperand(1));
				2998	SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract,
				2999	DAG.getValueType(VT));
				3000	return DAG.getNode(ISD::TRUNCATE, VT, Assert);
				3001	} else if (MVT::getSizeInBits(VT) == 32) {
				3002	SDOperand Vec = Op.getOperand(0);
				3003	unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				3004	if (Idx == 0)
				3005	return Op;
				3006	// SHUFPS the element to the lowest double word, then movss.
				3007	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				3008	SmallVector<SDOperand, 8> IdxVec;
				3009	IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT)));
				3010	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				3011	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				3012	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				3013	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				3014	&IdxVec[0], IdxVec.size());
				3015	Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
				3016	Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
				3017	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
				3018	DAG.getConstant(0, getPointerTy()));
				3019	} else if (MVT::getSizeInBits(VT) == 64) {
				3020	SDOperand Vec = Op.getOperand(0);
				3021	unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				3022	if (Idx == 0)
				3023	return Op;
				3024
				3025	// UNPCKHPD the element to the lowest double word, then movsd.
				3026	// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
				3027	// to a f64mem, the whole operation is folded into a single MOVHPDmr.
				3028	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				3029	SmallVector<SDOperand, 8> IdxVec;
				3030	IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT)));
				3031	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				3032	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				3033	&IdxVec[0], IdxVec.size());
				3034	Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
				3035	Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
				3036	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
				3037	DAG.getConstant(0, getPointerTy()));
				3038	}
				3039
				3040	return SDOperand();
				3041	}
				3042
				3043	SDOperand
				3044	X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				3045	// Transform it so it match pinsrw which expects a 16-bit value in a GR32
				3046	// as its second argument.
				3047	MVT::ValueType VT = Op.getValueType();
				3048	MVT::ValueType BaseVT = MVT::getVectorElementType(VT);
				3049	SDOperand N0 = Op.getOperand(0);
				3050	SDOperand N1 = Op.getOperand(1);
				3051	SDOperand N2 = Op.getOperand(2);
				3052	if (MVT::getSizeInBits(BaseVT) == 16) {
				3053	if (N1.getValueType() != MVT::i32)
				3054	N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
				3055	if (N2.getValueType() != MVT::i32)
				3056	N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy());
				3057	return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
				3058	} else if (MVT::getSizeInBits(BaseVT) == 32) {
				3059	unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
				3060	if (Idx == 0) {
				3061	// Use a movss.
				3062	N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
				3063	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				3064	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				3065	SmallVector<SDOperand, 8> MaskVec;
				3066	MaskVec.push_back(DAG.getConstant(4, BaseVT));
				3067	for (unsigned i = 1; i <= 3; ++i)
				3068	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				3069	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
				3070	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				3071	&MaskVec[0], MaskVec.size()));
				3072	} else {
				3073	// Use two pinsrw instructions to insert a 32 bit value.
				3074	Idx <<= 1;
				3075	if (MVT::isFloatingPoint(N1.getValueType())) {
Evan Cheng	1eea675	2007-07-31 06:21:44 +0000	[diff] [blame]	3076	N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
				3077	N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
				3078	N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
				3079	DAG.getConstant(0, getPointerTy()));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3080	}
				3081	N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
				3082	N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
				3083	DAG.getConstant(Idx, getPointerTy()));
				3084	N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
				3085	N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
				3086	DAG.getConstant(Idx+1, getPointerTy()));
				3087	return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
				3088	}
				3089	}
				3090
				3091	return SDOperand();
				3092	}
				3093
				3094	SDOperand
				3095	X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				3096	SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
				3097	return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
				3098	}
				3099
				3100	// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
				3101	// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
				3102	// one of the above mentioned nodes. It has to be wrapped because otherwise
				3103	// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
				3104	// be used to form addressing mode. These wrapped nodes will be selected
				3105	// into MOV32ri.
				3106	SDOperand
				3107	X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
				3108	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
				3109	SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(),
				3110	getPointerTy(),
				3111	CP->getAlignment());
				3112	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3113	// With PIC, the address is actually $g + Offset.
				3114	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3115	!Subtarget->isPICStyleRIPRel()) {
				3116	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3117	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3118	Result);
				3119	}
				3120
				3121	return Result;
				3122	}
				3123
				3124	SDOperand
				3125	X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
				3126	GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
				3127	SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
				3128	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3129	// With PIC, the address is actually $g + Offset.
				3130	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3131	!Subtarget->isPICStyleRIPRel()) {
				3132	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3133	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3134	Result);
				3135	}
				3136
				3137	// For Darwin & Mingw32, external and weak symbols are indirect, so we want to
				3138	// load the value at address GV, not the value of GV itself. This means that
				3139	// the GlobalAddress must be in the base or index register of the address, not
				3140	// the GV offset field. Platform check is inside GVRequiresExtraLoad() call
				3141	// The same applies for external symbols during PIC codegen
				3142	if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
				3143	Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);
				3144
				3145	return Result;
				3146	}
				3147
				3148	// Lower ISD::GlobalTLSAddress using the "general dynamic" model
				3149	static SDOperand
				3150	LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
				3151	const MVT::ValueType PtrVT) {
				3152	SDOperand InFlag;
				3153	SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX,
				3154	DAG.getNode(X86ISD::GlobalBaseReg,
				3155	PtrVT), InFlag);
				3156	InFlag = Chain.getValue(1);
				3157
				3158	// emit leal symbol@TLSGD(,%ebx,1), %eax
				3159	SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
				3160	SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
				3161	GA->getValueType(0),
				3162	GA->getOffset());
				3163	SDOperand Ops[] = { Chain, TGA, InFlag };
				3164	SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3);
				3165	InFlag = Result.getValue(2);
				3166	Chain = Result.getValue(1);
				3167
				3168	// call ___tls_get_addr. This function receives its argument in
				3169	// the register EAX.
				3170	Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag);
				3171	InFlag = Chain.getValue(1);
				3172
				3173	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				3174	SDOperand Ops1[] = { Chain,
				3175	DAG.getTargetExternalSymbol("___tls_get_addr",
				3176	PtrVT),
				3177	DAG.getRegister(X86::EAX, PtrVT),
				3178	DAG.getRegister(X86::EBX, PtrVT),
				3179	InFlag };
				3180	Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5);
				3181	InFlag = Chain.getValue(1);
				3182
				3183	return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag);
				3184	}
				3185
				3186	// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
				3187	// "local exec" model.
				3188	static SDOperand
				3189	LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
				3190	const MVT::ValueType PtrVT) {
				3191	// Get the Thread Pointer
				3192	SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT);
				3193	// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
				3194	// exec)
				3195	SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
				3196	GA->getValueType(0),
				3197	GA->getOffset());
				3198	SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA);
				3199
				3200	if (GA->getGlobal()->isDeclaration()) // initial exec TLS model
				3201	Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0);
				3202
				3203	// The address of the thread local variable is the add of the thread
				3204	// pointer with the offset of the variable.
				3205	return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset);
				3206	}
				3207
				3208	SDOperand
				3209	X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) {
				3210	// TODO: implement the "local dynamic" model
				3211	// TODO: implement the "initial exec"model for pic executables
				3212	assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() &&
				3213	"TLS not implemented for non-ELF and 64-bit targets");
				3214	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
				3215	// If the relocation model is PIC, use the "General Dynamic" TLS Model,
				3216	// otherwise use the "Local Exec"TLS Model
				3217	if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
				3218	return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy());
				3219	else
				3220	return LowerToTLSExecModel(GA, DAG, getPointerTy());
				3221	}
				3222
				3223	SDOperand
				3224	X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
				3225	const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
				3226	SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
				3227	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3228	// With PIC, the address is actually $g + Offset.
				3229	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3230	!Subtarget->isPICStyleRIPRel()) {
				3231	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3232	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3233	Result);
				3234	}
				3235
				3236	return Result;
				3237	}
				3238
				3239	SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
				3240	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
				3241	SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
				3242	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3243	// With PIC, the address is actually $g + Offset.
				3244	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3245	!Subtarget->isPICStyleRIPRel()) {
				3246	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3247	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3248	Result);
				3249	}
				3250
				3251	return Result;
				3252	}
				3253
				3254	SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
				3255	assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
				3256	"Not an i64 shift!");
				3257	bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
				3258	SDOperand ShOpLo = Op.getOperand(0);
				3259	SDOperand ShOpHi = Op.getOperand(1);
				3260	SDOperand ShAmt = Op.getOperand(2);
				3261	SDOperand Tmp1 = isSRA ?
				3262	DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
				3263	DAG.getConstant(0, MVT::i32);
				3264
				3265	SDOperand Tmp2, Tmp3;
				3266	if (Op.getOpcode() == ISD::SHL_PARTS) {
				3267	Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
				3268	Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
				3269	} else {
				3270	Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
				3271	Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
				3272	}
				3273
				3274	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3275	SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
				3276	DAG.getConstant(32, MVT::i8));
				3277	SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)};
				3278	SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1);
				3279
				3280	SDOperand Hi, Lo;
				3281	SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
				3282
				3283	VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
				3284	SmallVector<SDOperand, 4> Ops;
				3285	if (Op.getOpcode() == ISD::SHL_PARTS) {
				3286	Ops.push_back(Tmp2);
				3287	Ops.push_back(Tmp3);
				3288	Ops.push_back(CC);
				3289	Ops.push_back(InFlag);
				3290	Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3291	InFlag = Hi.getValue(1);
				3292
				3293	Ops.clear();
				3294	Ops.push_back(Tmp3);
				3295	Ops.push_back(Tmp1);
				3296	Ops.push_back(CC);
				3297	Ops.push_back(InFlag);
				3298	Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3299	} else {
				3300	Ops.push_back(Tmp2);
				3301	Ops.push_back(Tmp3);
				3302	Ops.push_back(CC);
				3303	Ops.push_back(InFlag);
				3304	Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3305	InFlag = Lo.getValue(1);
				3306
				3307	Ops.clear();
				3308	Ops.push_back(Tmp3);
				3309	Ops.push_back(Tmp1);
				3310	Ops.push_back(CC);
				3311	Ops.push_back(InFlag);
				3312	Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3313	}
				3314
				3315	VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
				3316	Ops.clear();
				3317	Ops.push_back(Lo);
				3318	Ops.push_back(Hi);
				3319	return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
				3320	}
				3321
				3322	SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
				3323	assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
				3324	Op.getOperand(0).getValueType() >= MVT::i16 &&
				3325	"Unknown SINT_TO_FP to lower!");
				3326
				3327	SDOperand Result;
				3328	MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
				3329	unsigned Size = MVT::getSizeInBits(SrcVT)/8;
				3330	MachineFunction &MF = DAG.getMachineFunction();
				3331	int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
				3332	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3333	SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
				3334	StackSlot, NULL, 0);
				3335
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3336	// These are really Legal; caller falls through into that case.
				3337	if (SrcVT==MVT::i32 && Op.getValueType() != MVT::f80 && X86ScalarSSE)
				3338	return Result;
Dale Johannesen	958b08b	2007-09-19 23:55:34 +0000	[diff] [blame]	3339	if (SrcVT==MVT::i64 && Op.getValueType() != MVT::f80 &&
				3340	Subtarget->is64Bit())
				3341	return Result;
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3342
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3343	// Build the FILD
				3344	SDVTList Tys;
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3345	bool useSSE = X86ScalarSSE && Op.getValueType() != MVT::f80;
				3346	if (useSSE)
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3347	Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
				3348	else
				3349	Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
				3350	SmallVector<SDOperand, 8> Ops;
				3351	Ops.push_back(Chain);
				3352	Ops.push_back(StackSlot);
				3353	Ops.push_back(DAG.getValueType(SrcVT));
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3354	Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3355	Tys, &Ops[0], Ops.size());
				3356
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3357	if (useSSE) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3358	Chain = Result.getValue(1);
				3359	SDOperand InFlag = Result.getValue(2);
				3360
				3361	// FIXME: Currently the FST is flagged to the FILD_FLAG. This
				3362	// shouldn't be necessary except that RFP cannot be live across
				3363	// multiple blocks. When stackifier is fixed, they can be uncoupled.
				3364	MachineFunction &MF = DAG.getMachineFunction();
				3365	int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
				3366	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3367	Tys = DAG.getVTList(MVT::Other);
				3368	SmallVector<SDOperand, 8> Ops;
				3369	Ops.push_back(Chain);
				3370	Ops.push_back(Result);
				3371	Ops.push_back(StackSlot);
				3372	Ops.push_back(DAG.getValueType(Op.getValueType()));
				3373	Ops.push_back(InFlag);
				3374	Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
				3375	Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0);
				3376	}
				3377
				3378	return Result;
				3379	}
				3380
				3381	SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
				3382	assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
				3383	"Unknown FP_TO_SINT to lower!");
				3384	// We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
				3385	// stack slot.
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3386	SDOperand Result;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3387	MachineFunction &MF = DAG.getMachineFunction();
				3388	unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
				3389	int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
				3390	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3391
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3392	// These are really Legal.
				3393	if (Op.getValueType() == MVT::i32 && X86ScalarSSE &&
				3394	Op.getOperand(0).getValueType() != MVT::f80)
				3395	return Result;
Dale Johannesen	958b08b	2007-09-19 23:55:34 +0000	[diff] [blame]	3396	if (Subtarget->is64Bit() &&
				3397	Op.getValueType() == MVT::i64 &&
				3398	Op.getOperand(0).getValueType() != MVT::f80)
				3399	return Result;
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3400
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3401	unsigned Opc;
				3402	switch (Op.getValueType()) {
				3403	default: assert(0 && "Invalid FP_TO_SINT to lower!");
				3404	case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
				3405	case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
				3406	case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
				3407	}
				3408
				3409	SDOperand Chain = DAG.getEntryNode();
				3410	SDOperand Value = Op.getOperand(0);
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3411	if (X86ScalarSSE && Op.getOperand(0).getValueType() != MVT::f80) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3412	assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
				3413	Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
				3414	SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
				3415	SDOperand Ops[] = {
				3416	Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
				3417	};
				3418	Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
				3419	Chain = Value.getValue(1);
				3420	SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
				3421	StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3422	}
				3423
				3424	// Build the FP_TO_INT*_IN_MEM
				3425	SDOperand Ops[] = { Chain, Value, StackSlot };
				3426	SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);
				3427
				3428	// Load the result.
				3429	return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
				3430	}
				3431
				3432	SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
				3433	MVT::ValueType VT = Op.getValueType();
				3434	MVT::ValueType EltVT = VT;
				3435	if (MVT::isVector(VT))
				3436	EltVT = MVT::getVectorElementType(VT);
				3437	const Type *OpNTy = MVT::getTypeForValueType(EltVT);
				3438	std::vector<Constant*> CV;
				3439	if (EltVT == MVT::f64) {
Dale Johannesen	1616e90	2007-09-11 18:32:33 +0000	[diff] [blame]	3440	Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, ~(1ULL << 63))));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3441	CV.push_back(C);
				3442	CV.push_back(C);
				3443	} else {
Dale Johannesen	1616e90	2007-09-11 18:32:33 +0000	[diff] [blame]	3444	Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, ~(1U << 31))));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3445	CV.push_back(C);
				3446	CV.push_back(C);
				3447	CV.push_back(C);
				3448	CV.push_back(C);
				3449	}
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	3450	Constant *C = ConstantVector::get(CV);
				3451	SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
				3452	SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
				3453	false, 16);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3454	return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
				3455	}
				3456
				3457	SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
				3458	MVT::ValueType VT = Op.getValueType();
				3459	MVT::ValueType EltVT = VT;
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3460	unsigned EltNum = 1;
				3461	if (MVT::isVector(VT)) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3462	EltVT = MVT::getVectorElementType(VT);
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3463	EltNum = MVT::getVectorNumElements(VT);
				3464	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3465	const Type *OpNTy = MVT::getTypeForValueType(EltVT);
				3466	std::vector<Constant*> CV;
				3467	if (EltVT == MVT::f64) {
Dale Johannesen	1616e90	2007-09-11 18:32:33 +0000	[diff] [blame]	3468	Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, 1ULL << 63)));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3469	CV.push_back(C);
				3470	CV.push_back(C);
				3471	} else {
Dale Johannesen	1616e90	2007-09-11 18:32:33 +0000	[diff] [blame]	3472	Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, 1U << 31)));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3473	CV.push_back(C);
				3474	CV.push_back(C);
				3475	CV.push_back(C);
				3476	CV.push_back(C);
				3477	}
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	3478	Constant *C = ConstantVector::get(CV);
				3479	SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
				3480	SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
				3481	false, 16);
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3482	if (MVT::isVector(VT)) {
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3483	return DAG.getNode(ISD::BIT_CONVERT, VT,
				3484	DAG.getNode(ISD::XOR, MVT::v2i64,
				3485	DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)),
				3486	DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask)));
				3487	} else {
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3488	return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
				3489	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3490	}
				3491
				3492	SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
				3493	SDOperand Op0 = Op.getOperand(0);
				3494	SDOperand Op1 = Op.getOperand(1);
				3495	MVT::ValueType VT = Op.getValueType();
				3496	MVT::ValueType SrcVT = Op1.getValueType();
				3497	const Type *SrcTy = MVT::getTypeForValueType(SrcVT);
				3498
				3499	// If second operand is smaller, extend it first.
				3500	if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) {
				3501	Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1);
				3502	SrcVT = VT;
Dale Johannesen	b9de9f0	2007-09-06 18:13:44 +0000	[diff] [blame]	3503	SrcTy = MVT::getTypeForValueType(SrcVT);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3504	}
				3505
				3506	// First get the sign bit of second operand.
				3507	std::vector<Constant*> CV;
				3508	if (SrcVT == MVT::f64) {
Dale Johannesen	1616e90	2007-09-11 18:32:33 +0000	[diff] [blame]	3509	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 1ULL << 63))));
				3510	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0))));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3511	} else {
Dale Johannesen	1616e90	2007-09-11 18:32:33 +0000	[diff] [blame]	3512	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 1U << 31))));
				3513	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
				3514	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
				3515	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3516	}
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	3517	Constant *C = ConstantVector::get(CV);
				3518	SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
				3519	SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0,
				3520	false, 16);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3521	SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
				3522
				3523	// Shift sign bit right or left if the two operands have different types.
				3524	if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
				3525	// Op0 is MVT::f32, Op1 is MVT::f64.
				3526	SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit);
				3527	SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit,
				3528	DAG.getConstant(32, MVT::i32));
				3529	SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
				3530	SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
				3531	DAG.getConstant(0, getPointerTy()));
				3532	}
				3533
				3534	// Clear first operand sign bit.
				3535	CV.clear();
				3536	if (VT == MVT::f64) {
Dale Johannesen	1616e90	2007-09-11 18:32:33 +0000	[diff] [blame]	3537	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, ~(1ULL << 63)))));
				3538	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0))));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3539	} else {
Dale Johannesen	1616e90	2007-09-11 18:32:33 +0000	[diff] [blame]	3540	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, ~(1U << 31)))));
				3541	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
				3542	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
				3543	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3544	}
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	3545	C = ConstantVector::get(CV);
				3546	CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
				3547	SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
				3548	false, 16);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3549	SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
				3550
				3551	// Or the value with the sign bit.
				3552	return DAG.getNode(X86ISD::FOR, VT, Val, SignBit);
				3553	}
				3554
				3555	SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG,
				3556	SDOperand Chain) {
				3557	assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
				3558	SDOperand Cond;
				3559	SDOperand Op0 = Op.getOperand(0);
				3560	SDOperand Op1 = Op.getOperand(1);
				3561	SDOperand CC = Op.getOperand(2);
				3562	ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
				3563	const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3564	const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
				3565	bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
				3566	unsigned X86CC;
				3567
				3568	if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
				3569	Op0, Op1, DAG)) {
				3570	SDOperand Ops1[] = { Chain, Op0, Op1 };
				3571	Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1);
				3572	SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
				3573	return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
				3574	}
				3575
				3576	assert(isFP && "Illegal integer SetCC!");
				3577
				3578	SDOperand COps[] = { Chain, Op0, Op1 };
				3579	Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1);
				3580
				3581	switch (SetCCOpcode) {
				3582	default: assert(false && "Illegal floating point SetCC!");
				3583	case ISD::SETOEQ: { // !PF & ZF
				3584	SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond };
				3585	SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
				3586	SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8),
				3587	Tmp1.getValue(1) };
				3588	SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
				3589	return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
				3590	}
				3591	case ISD::SETUNE: { // PF \| !ZF
				3592	SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond };
				3593	SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
				3594	SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8),
				3595	Tmp1.getValue(1) };
				3596	SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
				3597	return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
				3598	}
				3599	}
				3600	}
				3601
				3602	SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
				3603	bool addTest = true;
				3604	SDOperand Chain = DAG.getEntryNode();
				3605	SDOperand Cond = Op.getOperand(0);
				3606	SDOperand CC;
				3607	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3608
				3609	if (Cond.getOpcode() == ISD::SETCC)
				3610	Cond = LowerSETCC(Cond, DAG, Chain);
				3611
				3612	if (Cond.getOpcode() == X86ISD::SETCC) {
				3613	CC = Cond.getOperand(0);
				3614
				3615	// If condition flag is set by a X86ISD::CMP, then make a copy of it
				3616	// (since flag operand cannot be shared). Use it as the condition setting
				3617	// operand in place of the X86ISD::SETCC.
				3618	// If the X86ISD::SETCC has more than one use, then perhaps it's better
				3619	// to use a test instead of duplicating the X86ISD::CMP (for register
				3620	// pressure reason)?
				3621	SDOperand Cmp = Cond.getOperand(1);
				3622	unsigned Opc = Cmp.getOpcode();
				3623	bool IllegalFPCMov = !X86ScalarSSE &&
				3624	MVT::isFloatingPoint(Op.getValueType()) &&
				3625	!hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
				3626	if ((Opc == X86ISD::CMP \|\| Opc == X86ISD::COMI \|\| Opc == X86ISD::UCOMI) &&
				3627	!IllegalFPCMov) {
				3628	SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
				3629	Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
				3630	addTest = false;
				3631	}
				3632	}
				3633
				3634	if (addTest) {
				3635	CC = DAG.getConstant(X86::COND_NE, MVT::i8);
				3636	SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
				3637	Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
				3638	}
				3639
				3640	VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag);
				3641	SmallVector<SDOperand, 4> Ops;
				3642	// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
				3643	// condition is true.
				3644	Ops.push_back(Op.getOperand(2));
				3645	Ops.push_back(Op.getOperand(1));
				3646	Ops.push_back(CC);
				3647	Ops.push_back(Cond.getValue(1));
				3648	return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3649	}
				3650
				3651	SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
				3652	bool addTest = true;
				3653	SDOperand Chain = Op.getOperand(0);
				3654	SDOperand Cond = Op.getOperand(1);
				3655	SDOperand Dest = Op.getOperand(2);
				3656	SDOperand CC;
				3657	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3658
				3659	if (Cond.getOpcode() == ISD::SETCC)
				3660	Cond = LowerSETCC(Cond, DAG, Chain);
				3661
				3662	if (Cond.getOpcode() == X86ISD::SETCC) {
				3663	CC = Cond.getOperand(0);
				3664
				3665	// If condition flag is set by a X86ISD::CMP, then make a copy of it
				3666	// (since flag operand cannot be shared). Use it as the condition setting
				3667	// operand in place of the X86ISD::SETCC.
				3668	// If the X86ISD::SETCC has more than one use, then perhaps it's better
				3669	// to use a test instead of duplicating the X86ISD::CMP (for register
				3670	// pressure reason)?
				3671	SDOperand Cmp = Cond.getOperand(1);
				3672	unsigned Opc = Cmp.getOpcode();
				3673	if (Opc == X86ISD::CMP \|\| Opc == X86ISD::COMI \|\| Opc == X86ISD::UCOMI) {
				3674	SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
				3675	Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
				3676	addTest = false;
				3677	}
				3678	}
				3679
				3680	if (addTest) {
				3681	CC = DAG.getConstant(X86::COND_NE, MVT::i8);
				3682	SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
				3683	Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
				3684	}
				3685	return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
				3686	Cond, Op.getOperand(2), CC, Cond.getValue(1));
				3687	}
				3688
				3689	SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
				3690	unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				3691
				3692	if (Subtarget->is64Bit())
				3693	return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
				3694	else
				3695	switch (CallingConv) {
				3696	default:
				3697	assert(0 && "Unsupported calling convention");
				3698	case CallingConv::Fast:
				3699	// TODO: Implement fastcc
				3700	// Falls through
				3701	case CallingConv::C:
				3702	case CallingConv::X86_StdCall:
				3703	return LowerCCCCallTo(Op, DAG, CallingConv);
				3704	case CallingConv::X86_FastCall:
				3705	return LowerFastCCCallTo(Op, DAG, CallingConv);
				3706	}
				3707	}
				3708
				3709
				3710	// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
				3711	// Calls to _alloca is needed to probe the stack when allocating more than 4k
				3712	// bytes in one go. Touching the stack at 4K increments is necessary to ensure
				3713	// that the guard pages used by the OS virtual memory manager are allocated in
				3714	// correct sequence.
				3715	SDOperand
				3716	X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
				3717	SelectionDAG &DAG) {
				3718	assert(Subtarget->isTargetCygMing() &&
				3719	"This should be used only on Cygwin/Mingw targets");
				3720
				3721	// Get the inputs.
				3722	SDOperand Chain = Op.getOperand(0);
				3723	SDOperand Size = Op.getOperand(1);
				3724	// FIXME: Ensure alignment here
				3725
				3726	SDOperand Flag;
				3727
				3728	MVT::ValueType IntPtr = getPointerTy();
				3729	MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
				3730
				3731	Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag);
				3732	Flag = Chain.getValue(1);
				3733
				3734	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				3735	SDOperand Ops[] = { Chain,
				3736	DAG.getTargetExternalSymbol("_alloca", IntPtr),
				3737	DAG.getRegister(X86::EAX, IntPtr),
				3738	Flag };
				3739	Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4);
				3740	Flag = Chain.getValue(1);
				3741
				3742	Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1);
				3743
				3744	std::vector<MVT::ValueType> Tys;
				3745	Tys.push_back(SPTy);
				3746	Tys.push_back(MVT::Other);
				3747	SDOperand Ops1[2] = { Chain.getValue(0), Chain };
				3748	return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
				3749	}
				3750
				3751	SDOperand
				3752	X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
				3753	MachineFunction &MF = DAG.getMachineFunction();
				3754	const Function* Fn = MF.getFunction();
				3755	if (Fn->hasExternalLinkage() &&
				3756	Subtarget->isTargetCygMing() &&
				3757	Fn->getName() == "main")
				3758	MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true);
				3759
				3760	unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				3761	if (Subtarget->is64Bit())
				3762	return LowerX86_64CCCArguments(Op, DAG);
				3763	else
				3764	switch(CC) {
				3765	default:
				3766	assert(0 && "Unsupported calling convention");
				3767	case CallingConv::Fast:
				3768	// TODO: implement fastcc.
				3769
				3770	// Falls through
				3771	case CallingConv::C:
				3772	return LowerCCCArguments(Op, DAG);
				3773	case CallingConv::X86_StdCall:
				3774	MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall);
				3775	return LowerCCCArguments(Op, DAG, true);
				3776	case CallingConv::X86_FastCall:
				3777	MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall);
				3778	return LowerFastCCArguments(Op, DAG);
				3779	}
				3780	}
				3781
				3782	SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
				3783	SDOperand InFlag(0, 0);
				3784	SDOperand Chain = Op.getOperand(0);
				3785	unsigned Align =
				3786	(unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
				3787	if (Align == 0) Align = 1;
				3788
				3789	ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
Rafael Espindola	5d3e762	2007-08-27 10:18:20 +0000	[diff] [blame]	3790	// If not DWORD aligned or size is more than the threshold, call memset.
Rafael Espindola	b2e7a6b	2007-08-27 17:48:26 +0000	[diff] [blame]	3791	// The libc version is likely to be faster for these cases. It can use the
				3792	// address value and run time information about the CPU.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3793	if ((Align & 3) != 0 \|\|
Rafael Espindola	5d3e762	2007-08-27 10:18:20 +0000	[diff] [blame]	3794	(I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3795	MVT::ValueType IntPtr = getPointerTy();
				3796	const Type *IntPtrTy = getTargetData()->getIntPtrType();
				3797	TargetLowering::ArgListTy Args;
				3798	TargetLowering::ArgListEntry Entry;
				3799	Entry.Node = Op.getOperand(1);
				3800	Entry.Ty = IntPtrTy;
				3801	Args.push_back(Entry);
				3802	// Extend the unsigned i8 argument to be an int value for the call.
				3803	Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
				3804	Entry.Ty = IntPtrTy;
				3805	Args.push_back(Entry);
				3806	Entry.Node = Op.getOperand(3);
				3807	Args.push_back(Entry);
				3808	std::pair<SDOperand,SDOperand> CallResult =
				3809	LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
				3810	DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
				3811	return CallResult.second;
				3812	}
				3813
				3814	MVT::ValueType AVT;
				3815	SDOperand Count;
				3816	ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
				3817	unsigned BytesLeft = 0;
				3818	bool TwoRepStos = false;
				3819	if (ValC) {
				3820	unsigned ValReg;
				3821	uint64_t Val = ValC->getValue() & 255;
				3822
				3823	// If the value is a constant, then we can potentially use larger sets.
				3824	switch (Align & 3) {
				3825	case 2: // WORD aligned
				3826	AVT = MVT::i16;
				3827	ValReg = X86::AX;
				3828	Val = (Val << 8) \| Val;
				3829	break;
				3830	case 0: // DWORD aligned
				3831	AVT = MVT::i32;
				3832	ValReg = X86::EAX;
				3833	Val = (Val << 8) \| Val;
				3834	Val = (Val << 16) \| Val;
				3835	if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned
				3836	AVT = MVT::i64;
				3837	ValReg = X86::RAX;
				3838	Val = (Val << 32) \| Val;
				3839	}
				3840	break;
				3841	default: // Byte aligned
				3842	AVT = MVT::i8;
				3843	ValReg = X86::AL;
				3844	Count = Op.getOperand(3);
				3845	break;
				3846	}
				3847
				3848	if (AVT > MVT::i8) {
				3849	if (I) {
				3850	unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
				3851	Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
				3852	BytesLeft = I->getValue() % UBytes;
				3853	} else {
				3854	assert(AVT >= MVT::i32 &&
				3855	"Do not use rep;stos if not at least DWORD aligned");
				3856	Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
				3857	Op.getOperand(3), DAG.getConstant(2, MVT::i8));
				3858	TwoRepStos = true;
				3859	}
				3860	}
				3861
				3862	Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
				3863	InFlag);
				3864	InFlag = Chain.getValue(1);
				3865	} else {
				3866	AVT = MVT::i8;
				3867	Count = Op.getOperand(3);
				3868	Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
				3869	InFlag = Chain.getValue(1);
				3870	}
				3871
				3872	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
				3873	Count, InFlag);
				3874	InFlag = Chain.getValue(1);
				3875	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
				3876	Op.getOperand(1), InFlag);
				3877	InFlag = Chain.getValue(1);
				3878
				3879	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				3880	SmallVector<SDOperand, 8> Ops;
				3881	Ops.push_back(Chain);
				3882	Ops.push_back(DAG.getValueType(AVT));
				3883	Ops.push_back(InFlag);
				3884	Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
				3885
				3886	if (TwoRepStos) {
				3887	InFlag = Chain.getValue(1);
				3888	Count = Op.getOperand(3);
				3889	MVT::ValueType CVT = Count.getValueType();
				3890	SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
				3891	DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
				3892	Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
				3893	Left, InFlag);
				3894	InFlag = Chain.getValue(1);
				3895	Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				3896	Ops.clear();
				3897	Ops.push_back(Chain);
				3898	Ops.push_back(DAG.getValueType(MVT::i8));
				3899	Ops.push_back(InFlag);
				3900	Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
				3901	} else if (BytesLeft) {
				3902	// Issue stores for the last 1 - 7 bytes.
				3903	SDOperand Value;
				3904	unsigned Val = ValC->getValue() & 255;
				3905	unsigned Offset = I->getValue() - BytesLeft;
				3906	SDOperand DstAddr = Op.getOperand(1);
				3907	MVT::ValueType AddrVT = DstAddr.getValueType();
				3908	if (BytesLeft >= 4) {
				3909	Val = (Val << 8) \| Val;
				3910	Val = (Val << 16) \| Val;
				3911	Value = DAG.getConstant(Val, MVT::i32);
				3912	Chain = DAG.getStore(Chain, Value,
				3913	DAG.getNode(ISD::ADD, AddrVT, DstAddr,
				3914	DAG.getConstant(Offset, AddrVT)),
				3915	NULL, 0);
				3916	BytesLeft -= 4;
				3917	Offset += 4;
				3918	}
				3919	if (BytesLeft >= 2) {
				3920	Value = DAG.getConstant((Val << 8) \| Val, MVT::i16);
				3921	Chain = DAG.getStore(Chain, Value,
				3922	DAG.getNode(ISD::ADD, AddrVT, DstAddr,
				3923	DAG.getConstant(Offset, AddrVT)),
				3924	NULL, 0);
				3925	BytesLeft -= 2;
				3926	Offset += 2;
				3927	}
				3928	if (BytesLeft == 1) {
				3929	Value = DAG.getConstant(Val, MVT::i8);
				3930	Chain = DAG.getStore(Chain, Value,
				3931	DAG.getNode(ISD::ADD, AddrVT, DstAddr,
				3932	DAG.getConstant(Offset, AddrVT)),
				3933	NULL, 0);
				3934	}
				3935	}
				3936
				3937	return Chain;
				3938	}
				3939
				3940	SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
				3941	SDOperand Chain = Op.getOperand(0);
				3942	unsigned Align =
				3943	(unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
				3944	if (Align == 0) Align = 1;
				3945
				3946	ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
Rafael Espindola	5d3e762	2007-08-27 10:18:20 +0000	[diff] [blame]	3947	// If not DWORD aligned or size is more than the threshold, call memcpy.
Rafael Espindola	b2e7a6b	2007-08-27 17:48:26 +0000	[diff] [blame]	3948	// The libc version is likely to be faster for these cases. It can use the
				3949	// address value and run time information about the CPU.
				3950	// With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3951	if ((Align & 3) != 0 \|\|
Rafael Espindola	5d3e762	2007-08-27 10:18:20 +0000	[diff] [blame]	3952	(I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3953	MVT::ValueType IntPtr = getPointerTy();
				3954	TargetLowering::ArgListTy Args;
				3955	TargetLowering::ArgListEntry Entry;
				3956	Entry.Ty = getTargetData()->getIntPtrType();
				3957	Entry.Node = Op.getOperand(1); Args.push_back(Entry);
				3958	Entry.Node = Op.getOperand(2); Args.push_back(Entry);
				3959	Entry.Node = Op.getOperand(3); Args.push_back(Entry);
				3960	std::pair<SDOperand,SDOperand> CallResult =
				3961	LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
				3962	DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
				3963	return CallResult.second;
				3964	}
				3965
				3966	MVT::ValueType AVT;
				3967	SDOperand Count;
				3968	unsigned BytesLeft = 0;
				3969	bool TwoRepMovs = false;
				3970	switch (Align & 3) {
				3971	case 2: // WORD aligned
				3972	AVT = MVT::i16;
				3973	break;
				3974	case 0: // DWORD aligned
				3975	AVT = MVT::i32;
				3976	if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned
				3977	AVT = MVT::i64;
				3978	break;
				3979	default: // Byte aligned
				3980	AVT = MVT::i8;
				3981	Count = Op.getOperand(3);
				3982	break;
				3983	}
				3984
				3985	if (AVT > MVT::i8) {
				3986	if (I) {
				3987	unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
				3988	Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
				3989	BytesLeft = I->getValue() % UBytes;
				3990	} else {
				3991	assert(AVT >= MVT::i32 &&
				3992	"Do not use rep;movs if not at least DWORD aligned");
				3993	Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
				3994	Op.getOperand(3), DAG.getConstant(2, MVT::i8));
				3995	TwoRepMovs = true;
				3996	}
				3997	}
				3998
				3999	SDOperand InFlag(0, 0);
				4000	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
				4001	Count, InFlag);
				4002	InFlag = Chain.getValue(1);
				4003	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
				4004	Op.getOperand(1), InFlag);
				4005	InFlag = Chain.getValue(1);
				4006	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
				4007	Op.getOperand(2), InFlag);
				4008	InFlag = Chain.getValue(1);
				4009
				4010	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				4011	SmallVector<SDOperand, 8> Ops;
				4012	Ops.push_back(Chain);
				4013	Ops.push_back(DAG.getValueType(AVT));
				4014	Ops.push_back(InFlag);
				4015	Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
				4016
				4017	if (TwoRepMovs) {
				4018	InFlag = Chain.getValue(1);
				4019	Count = Op.getOperand(3);
				4020	MVT::ValueType CVT = Count.getValueType();
				4021	SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
				4022	DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
				4023	Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
				4024	Left, InFlag);
				4025	InFlag = Chain.getValue(1);
				4026	Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				4027	Ops.clear();
				4028	Ops.push_back(Chain);
				4029	Ops.push_back(DAG.getValueType(MVT::i8));
				4030	Ops.push_back(InFlag);
				4031	Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
				4032	} else if (BytesLeft) {
				4033	// Issue loads and stores for the last 1 - 7 bytes.
				4034	unsigned Offset = I->getValue() - BytesLeft;
				4035	SDOperand DstAddr = Op.getOperand(1);
				4036	MVT::ValueType DstVT = DstAddr.getValueType();
				4037	SDOperand SrcAddr = Op.getOperand(2);
				4038	MVT::ValueType SrcVT = SrcAddr.getValueType();
				4039	SDOperand Value;
				4040	if (BytesLeft >= 4) {
				4041	Value = DAG.getLoad(MVT::i32, Chain,
				4042	DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
				4043	DAG.getConstant(Offset, SrcVT)),
				4044	NULL, 0);
				4045	Chain = Value.getValue(1);
				4046	Chain = DAG.getStore(Chain, Value,
				4047	DAG.getNode(ISD::ADD, DstVT, DstAddr,
				4048	DAG.getConstant(Offset, DstVT)),
				4049	NULL, 0);
				4050	BytesLeft -= 4;
				4051	Offset += 4;
				4052	}
				4053	if (BytesLeft >= 2) {
				4054	Value = DAG.getLoad(MVT::i16, Chain,
				4055	DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
				4056	DAG.getConstant(Offset, SrcVT)),
				4057	NULL, 0);
				4058	Chain = Value.getValue(1);
				4059	Chain = DAG.getStore(Chain, Value,
				4060	DAG.getNode(ISD::ADD, DstVT, DstAddr,
				4061	DAG.getConstant(Offset, DstVT)),
				4062	NULL, 0);
				4063	BytesLeft -= 2;
				4064	Offset += 2;
				4065	}
				4066
				4067	if (BytesLeft == 1) {
				4068	Value = DAG.getLoad(MVT::i8, Chain,
				4069	DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
				4070	DAG.getConstant(Offset, SrcVT)),
				4071	NULL, 0);
				4072	Chain = Value.getValue(1);
				4073	Chain = DAG.getStore(Chain, Value,
				4074	DAG.getNode(ISD::ADD, DstVT, DstAddr,
				4075	DAG.getConstant(Offset, DstVT)),
				4076	NULL, 0);
				4077	}
				4078	}
				4079
				4080	return Chain;
				4081	}
				4082
				4083	SDOperand
				4084	X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
				4085	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				4086	SDOperand TheOp = Op.getOperand(0);
				4087	SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1);
				4088	if (Subtarget->is64Bit()) {
				4089	SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
				4090	SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX,
				4091	MVT::i64, Copy1.getValue(2));
				4092	SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2,
				4093	DAG.getConstant(32, MVT::i8));
				4094	SDOperand Ops[] = {
				4095	DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1)
				4096	};
				4097
				4098	Tys = DAG.getVTList(MVT::i64, MVT::Other);
				4099	return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2);
				4100	}
				4101
				4102	SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
				4103	SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX,
				4104	MVT::i32, Copy1.getValue(2));
				4105	SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) };
				4106	Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
				4107	return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3);
				4108	}
				4109
				4110	SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
				4111	SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
				4112
				4113	if (!Subtarget->is64Bit()) {
				4114	// vastart just stores the address of the VarArgsFrameIndex slot into the
				4115	// memory location argument.
				4116	SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
				4117	return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(),
				4118	SV->getOffset());
				4119	}
				4120
				4121	// __va_list_tag:
				4122	// gp_offset (0 - 6 * 8)
				4123	// fp_offset (48 - 48 + 8 * 16)
				4124	// overflow_arg_area (point to parameters coming in memory).
				4125	// reg_save_area
				4126	SmallVector<SDOperand, 8> MemOps;
				4127	SDOperand FIN = Op.getOperand(1);
				4128	// Store gp_offset
				4129	SDOperand Store = DAG.getStore(Op.getOperand(0),
				4130	DAG.getConstant(VarArgsGPOffset, MVT::i32),
				4131	FIN, SV->getValue(), SV->getOffset());
				4132	MemOps.push_back(Store);
				4133
				4134	// Store fp_offset
				4135	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				4136	DAG.getConstant(4, getPointerTy()));
				4137	Store = DAG.getStore(Op.getOperand(0),
				4138	DAG.getConstant(VarArgsFPOffset, MVT::i32),
				4139	FIN, SV->getValue(), SV->getOffset());
				4140	MemOps.push_back(Store);
				4141
				4142	// Store ptr to overflow_arg_area
				4143	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				4144	DAG.getConstant(4, getPointerTy()));
				4145	SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
				4146	Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(),
				4147	SV->getOffset());
				4148	MemOps.push_back(Store);
				4149
				4150	// Store ptr to reg_save_area.
				4151	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				4152	DAG.getConstant(8, getPointerTy()));
				4153	SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
				4154	Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(),
				4155	SV->getOffset());
				4156	MemOps.push_back(Store);
				4157	return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
				4158	}
				4159
				4160	SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) {
				4161	// X86-64 va_list is a struct { i32, i32, i8, i8 }.
				4162	SDOperand Chain = Op.getOperand(0);
				4163	SDOperand DstPtr = Op.getOperand(1);
				4164	SDOperand SrcPtr = Op.getOperand(2);
				4165	SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3));
				4166	SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4));
				4167
				4168	SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr,
				4169	SrcSV->getValue(), SrcSV->getOffset());
				4170	Chain = SrcPtr.getValue(1);
				4171	for (unsigned i = 0; i < 3; ++i) {
				4172	SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr,
				4173	SrcSV->getValue(), SrcSV->getOffset());
				4174	Chain = Val.getValue(1);
				4175	Chain = DAG.getStore(Chain, Val, DstPtr,
				4176	DstSV->getValue(), DstSV->getOffset());
				4177	if (i == 2)
				4178	break;
				4179	SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr,
				4180	DAG.getConstant(8, getPointerTy()));
				4181	DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr,
				4182	DAG.getConstant(8, getPointerTy()));
				4183	}
				4184	return Chain;
				4185	}
				4186
				4187	SDOperand
				4188	X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
				4189	unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
				4190	switch (IntNo) {
				4191	default: return SDOperand(); // Don't custom lower most intrinsics.
				4192	// Comparison intrinsics.
				4193	case Intrinsic::x86_sse_comieq_ss:
				4194	case Intrinsic::x86_sse_comilt_ss:
				4195	case Intrinsic::x86_sse_comile_ss:
				4196	case Intrinsic::x86_sse_comigt_ss:
				4197	case Intrinsic::x86_sse_comige_ss:
				4198	case Intrinsic::x86_sse_comineq_ss:
				4199	case Intrinsic::x86_sse_ucomieq_ss:
				4200	case Intrinsic::x86_sse_ucomilt_ss:
				4201	case Intrinsic::x86_sse_ucomile_ss:
				4202	case Intrinsic::x86_sse_ucomigt_ss:
				4203	case Intrinsic::x86_sse_ucomige_ss:
				4204	case Intrinsic::x86_sse_ucomineq_ss:
				4205	case Intrinsic::x86_sse2_comieq_sd:
				4206	case Intrinsic::x86_sse2_comilt_sd:
				4207	case Intrinsic::x86_sse2_comile_sd:
				4208	case Intrinsic::x86_sse2_comigt_sd:
				4209	case Intrinsic::x86_sse2_comige_sd:
				4210	case Intrinsic::x86_sse2_comineq_sd:
				4211	case Intrinsic::x86_sse2_ucomieq_sd:
				4212	case Intrinsic::x86_sse2_ucomilt_sd:
				4213	case Intrinsic::x86_sse2_ucomile_sd:
				4214	case Intrinsic::x86_sse2_ucomigt_sd:
				4215	case Intrinsic::x86_sse2_ucomige_sd:
				4216	case Intrinsic::x86_sse2_ucomineq_sd: {
				4217	unsigned Opc = 0;
				4218	ISD::CondCode CC = ISD::SETCC_INVALID;
				4219	switch (IntNo) {
				4220	default: break;
				4221	case Intrinsic::x86_sse_comieq_ss:
				4222	case Intrinsic::x86_sse2_comieq_sd:
				4223	Opc = X86ISD::COMI;
				4224	CC = ISD::SETEQ;
				4225	break;
				4226	case Intrinsic::x86_sse_comilt_ss:
				4227	case Intrinsic::x86_sse2_comilt_sd:
				4228	Opc = X86ISD::COMI;
				4229	CC = ISD::SETLT;
				4230	break;
				4231	case Intrinsic::x86_sse_comile_ss:
				4232	case Intrinsic::x86_sse2_comile_sd:
				4233	Opc = X86ISD::COMI;
				4234	CC = ISD::SETLE;
				4235	break;
				4236	case Intrinsic::x86_sse_comigt_ss:
				4237	case Intrinsic::x86_sse2_comigt_sd:
				4238	Opc = X86ISD::COMI;
				4239	CC = ISD::SETGT;
				4240	break;
				4241	case Intrinsic::x86_sse_comige_ss:
				4242	case Intrinsic::x86_sse2_comige_sd:
				4243	Opc = X86ISD::COMI;
				4244	CC = ISD::SETGE;
				4245	break;
				4246	case Intrinsic::x86_sse_comineq_ss:
				4247	case Intrinsic::x86_sse2_comineq_sd:
				4248	Opc = X86ISD::COMI;
				4249	CC = ISD::SETNE;
				4250	break;
				4251	case Intrinsic::x86_sse_ucomieq_ss:
				4252	case Intrinsic::x86_sse2_ucomieq_sd:
				4253	Opc = X86ISD::UCOMI;
				4254	CC = ISD::SETEQ;
				4255	break;
				4256	case Intrinsic::x86_sse_ucomilt_ss:
				4257	case Intrinsic::x86_sse2_ucomilt_sd:
				4258	Opc = X86ISD::UCOMI;
				4259	CC = ISD::SETLT;
				4260	break;
				4261	case Intrinsic::x86_sse_ucomile_ss:
				4262	case Intrinsic::x86_sse2_ucomile_sd:
				4263	Opc = X86ISD::UCOMI;
				4264	CC = ISD::SETLE;
				4265	break;
				4266	case Intrinsic::x86_sse_ucomigt_ss:
				4267	case Intrinsic::x86_sse2_ucomigt_sd:
				4268	Opc = X86ISD::UCOMI;
				4269	CC = ISD::SETGT;
				4270	break;
				4271	case Intrinsic::x86_sse_ucomige_ss:
				4272	case Intrinsic::x86_sse2_ucomige_sd:
				4273	Opc = X86ISD::UCOMI;
				4274	CC = ISD::SETGE;
				4275	break;
				4276	case Intrinsic::x86_sse_ucomineq_ss:
				4277	case Intrinsic::x86_sse2_ucomineq_sd:
				4278	Opc = X86ISD::UCOMI;
				4279	CC = ISD::SETNE;
				4280	break;
				4281	}
				4282
				4283	unsigned X86CC;
				4284	SDOperand LHS = Op.getOperand(1);
				4285	SDOperand RHS = Op.getOperand(2);
				4286	translateX86CC(CC, true, X86CC, LHS, RHS, DAG);
				4287
				4288	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				4289	SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS };
				4290	SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3);
				4291	VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
				4292	SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
				4293	SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2);
				4294	return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
				4295	}
				4296	}
				4297	}
				4298
				4299	SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) {
				4300	// Depths > 0 not supported yet!
				4301	if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
				4302	return SDOperand();
				4303
				4304	// Just load the return address
				4305	SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
				4306	return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0);
				4307	}
				4308
				4309	SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) {
				4310	// Depths > 0 not supported yet!
				4311	if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
				4312	return SDOperand();
				4313
				4314	SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
				4315	return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
				4316	DAG.getConstant(4, getPointerTy()));
				4317	}
				4318
				4319	SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op,
				4320	SelectionDAG &DAG) {
				4321	// Is not yet supported on x86-64
				4322	if (Subtarget->is64Bit())
				4323	return SDOperand();
				4324
				4325	return DAG.getConstant(8, getPointerTy());
				4326	}
				4327
				4328	SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG)
				4329	{
				4330	assert(!Subtarget->is64Bit() &&
				4331	"Lowering of eh_return builtin is not supported yet on x86-64");
				4332
				4333	MachineFunction &MF = DAG.getMachineFunction();
				4334	SDOperand Chain = Op.getOperand(0);
				4335	SDOperand Offset = Op.getOperand(1);
				4336	SDOperand Handler = Op.getOperand(2);
				4337
				4338	SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF),
				4339	getPointerTy());
				4340
				4341	SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame,
				4342	DAG.getConstant(-4UL, getPointerTy()));
				4343	StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset);
				4344	Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0);
				4345	Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr);
				4346	MF.addLiveOut(X86::ECX);
				4347
				4348	return DAG.getNode(X86ISD::EH_RETURN, MVT::Other,
				4349	Chain, DAG.getRegister(X86::ECX, getPointerTy()));
				4350	}
				4351
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4352	SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op,
				4353	SelectionDAG &DAG) {
				4354	SDOperand Root = Op.getOperand(0);
				4355	SDOperand Trmp = Op.getOperand(1); // trampoline
				4356	SDOperand FPtr = Op.getOperand(2); // nested function
				4357	SDOperand Nest = Op.getOperand(3); // 'nest' parameter value
				4358
				4359	SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4));
				4360
				4361	if (Subtarget->is64Bit()) {
				4362	return SDOperand(); // not yet supported
				4363	} else {
				4364	Function Func = (Function )
				4365	cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
				4366	unsigned CC = Func->getCallingConv();
Duncan Sands	466eadd	2007-08-29 19:01:20 +0000	[diff] [blame]	4367	unsigned NestReg;
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4368
				4369	switch (CC) {
				4370	default:
				4371	assert(0 && "Unsupported calling convention");
				4372	case CallingConv::C:
				4373	case CallingConv::Fast:
				4374	case CallingConv::X86_StdCall: {
				4375	// Pass 'nest' parameter in ECX.
				4376	// Must be kept in sync with X86CallingConv.td
Duncan Sands	466eadd	2007-08-29 19:01:20 +0000	[diff] [blame]	4377	NestReg = X86::ECX;
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4378
				4379	// Check that ECX wasn't needed by an 'inreg' parameter.
				4380	const FunctionType *FTy = Func->getFunctionType();
				4381	const ParamAttrsList *Attrs = FTy->getParamAttrs();
				4382
				4383	if (Attrs && !Func->isVarArg()) {
				4384	unsigned InRegCount = 0;
				4385	unsigned Idx = 1;
				4386
				4387	for (FunctionType::param_iterator I = FTy->param_begin(),
				4388	E = FTy->param_end(); I != E; ++I, ++Idx)
				4389	if (Attrs->paramHasAttr(Idx, ParamAttr::InReg))
				4390	// FIXME: should only count parameters that are lowered to integers.
				4391	InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32;
				4392
				4393	if (InRegCount > 2) {
				4394	cerr << "Nest register in use - reduce number of inreg parameters!\n";
				4395	abort();
				4396	}
				4397	}
				4398	break;
				4399	}
				4400	case CallingConv::X86_FastCall:
				4401	// Pass 'nest' parameter in EAX.
				4402	// Must be kept in sync with X86CallingConv.td
Duncan Sands	466eadd	2007-08-29 19:01:20 +0000	[diff] [blame]	4403	NestReg = X86::EAX;
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4404	break;
				4405	}
				4406
Duncan Sands	466eadd	2007-08-29 19:01:20 +0000	[diff] [blame]	4407	const X86InstrInfo *TII =
				4408	((X86TargetMachine&)getTargetMachine()).getInstrInfo();
				4409
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4410	SDOperand OutChains[4];
				4411	SDOperand Addr, Disp;
				4412
				4413	Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32));
				4414	Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr);
				4415
Duncan Sands	466eadd	2007-08-29 19:01:20 +0000	[diff] [blame]	4416	unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri);
				4417	unsigned char N86Reg = ((X86RegisterInfo&)RegInfo).getX86RegNum(NestReg);
				4418	OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri\|N86Reg, MVT::i8),
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4419	Trmp, TrmpSV->getValue(), TrmpSV->getOffset());
				4420
				4421	Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32));
				4422	OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(),
				4423	TrmpSV->getOffset() + 1, false, 1);
				4424
Duncan Sands	466eadd	2007-08-29 19:01:20 +0000	[diff] [blame]	4425	unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP);
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4426	Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32));
				4427	OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr,
				4428	TrmpSV->getValue() + 5, TrmpSV->getOffset());
				4429
				4430	Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32));
				4431	OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(),
				4432	TrmpSV->getOffset() + 6, false, 1);
				4433
Duncan Sands	7407a9f	2007-09-11 14:10:23 +0000	[diff] [blame]	4434	SDOperand Ops[] =
				4435	{ Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) };
				4436	return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2);
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4437	}
				4438	}
				4439
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4440	/// LowerOperation - Provide custom lowering hooks for some operations.
				4441	///
				4442	SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
				4443	switch (Op.getOpcode()) {
				4444	default: assert(0 && "Should not custom lower this!");
				4445	case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
				4446	case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
				4447	case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
				4448	case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
				4449	case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
				4450	case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
				4451	case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
				4452	case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
				4453	case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
				4454	case ISD::SHL_PARTS:
				4455	case ISD::SRA_PARTS:
				4456	case ISD::SRL_PARTS: return LowerShift(Op, DAG);
				4457	case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
				4458	case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
				4459	case ISD::FABS: return LowerFABS(Op, DAG);
				4460	case ISD::FNEG: return LowerFNEG(Op, DAG);
				4461	case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
				4462	case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode());
				4463	case ISD::SELECT: return LowerSELECT(Op, DAG);
				4464	case ISD::BRCOND: return LowerBRCOND(Op, DAG);
				4465	case ISD::JumpTable: return LowerJumpTable(Op, DAG);
				4466	case ISD::CALL: return LowerCALL(Op, DAG);
				4467	case ISD::RET: return LowerRET(Op, DAG);
				4468	case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
				4469	case ISD::MEMSET: return LowerMEMSET(Op, DAG);
				4470	case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
				4471	case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG);
				4472	case ISD::VASTART: return LowerVASTART(Op, DAG);
				4473	case ISD::VACOPY: return LowerVACOPY(Op, DAG);
				4474	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
				4475	case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
				4476	case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
				4477	case ISD::FRAME_TO_ARGS_OFFSET:
				4478	return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
				4479	case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
				4480	case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4481	case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4482	}
				4483	return SDOperand();
				4484	}
				4485
				4486	const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
				4487	switch (Opcode) {
				4488	default: return NULL;
				4489	case X86ISD::SHLD: return "X86ISD::SHLD";
				4490	case X86ISD::SHRD: return "X86ISD::SHRD";
				4491	case X86ISD::FAND: return "X86ISD::FAND";
				4492	case X86ISD::FOR: return "X86ISD::FOR";
				4493	case X86ISD::FXOR: return "X86ISD::FXOR";
				4494	case X86ISD::FSRL: return "X86ISD::FSRL";
				4495	case X86ISD::FILD: return "X86ISD::FILD";
				4496	case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
				4497	case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
				4498	case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
				4499	case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
				4500	case X86ISD::FLD: return "X86ISD::FLD";
				4501	case X86ISD::FST: return "X86ISD::FST";
				4502	case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT";
				4503	case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT";
				4504	case X86ISD::CALL: return "X86ISD::CALL";
				4505	case X86ISD::TAILCALL: return "X86ISD::TAILCALL";
				4506	case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
				4507	case X86ISD::CMP: return "X86ISD::CMP";
				4508	case X86ISD::COMI: return "X86ISD::COMI";
				4509	case X86ISD::UCOMI: return "X86ISD::UCOMI";
				4510	case X86ISD::SETCC: return "X86ISD::SETCC";
				4511	case X86ISD::CMOV: return "X86ISD::CMOV";
				4512	case X86ISD::BRCOND: return "X86ISD::BRCOND";
				4513	case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
				4514	case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
				4515	case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4516	case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
				4517	case X86ISD::Wrapper: return "X86ISD::Wrapper";
				4518	case X86ISD::S2VEC: return "X86ISD::S2VEC";
				4519	case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
				4520	case X86ISD::PINSRW: return "X86ISD::PINSRW";
				4521	case X86ISD::FMAX: return "X86ISD::FMAX";
				4522	case X86ISD::FMIN: return "X86ISD::FMIN";
				4523	case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
				4524	case X86ISD::FRCP: return "X86ISD::FRCP";
				4525	case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
				4526	case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER";
				4527	case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
				4528	}
				4529	}
				4530
				4531	// isLegalAddressingMode - Return true if the addressing mode represented
				4532	// by AM is legal for this target, for a load/store of the specified type.
				4533	bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
				4534	const Type *Ty) const {
				4535	// X86 supports extremely general addressing modes.
				4536
				4537	// X86 allows a sign-extended 32-bit immediate field as a displacement.
				4538	if (AM.BaseOffs <= -(1LL << 32) \|\| AM.BaseOffs >= (1LL << 32)-1)
				4539	return false;
				4540
				4541	if (AM.BaseGV) {
Evan Cheng	6a1f3f1	2007-08-01 23:46:47 +0000	[diff] [blame]	4542	// We can only fold this if we don't need an extra load.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4543	if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false))
				4544	return false;
Evan Cheng	6a1f3f1	2007-08-01 23:46:47 +0000	[diff] [blame]	4545
				4546	// X86-64 only supports addr of globals in small code model.
				4547	if (Subtarget->is64Bit()) {
				4548	if (getTargetMachine().getCodeModel() != CodeModel::Small)
				4549	return false;
				4550	// If lower 4G is not available, then we must use rip-relative addressing.
				4551	if (AM.BaseOffs \|\| AM.Scale > 1)
				4552	return false;
				4553	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4554	}
				4555
				4556	switch (AM.Scale) {
				4557	case 0:
				4558	case 1:
				4559	case 2:
				4560	case 4:
				4561	case 8:
				4562	// These scales always work.
				4563	break;
				4564	case 3:
				4565	case 5:
				4566	case 9:
				4567	// These scales are formed with basereg+scalereg. Only accept if there is
				4568	// no basereg yet.
				4569	if (AM.HasBaseReg)
				4570	return false;
				4571	break;
				4572	default: // Other stuff never works.
				4573	return false;
				4574	}
				4575
				4576	return true;
				4577	}
				4578
				4579
				4580	/// isShuffleMaskLegal - Targets can use this to indicate that they only
				4581	/// support some VECTOR_SHUFFLE operations, those with specific masks.
				4582	/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
				4583	/// are assumed to be legal.
				4584	bool
				4585	X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
				4586	// Only do shuffles on 128-bit vector types for now.
				4587	if (MVT::getSizeInBits(VT) == 64) return false;
				4588	return (Mask.Val->getNumOperands() <= 4 \|\|
				4589	isIdentityMask(Mask.Val) \|\|
				4590	isIdentityMask(Mask.Val, true) \|\|
				4591	isSplatMask(Mask.Val) \|\|
				4592	isPSHUFHW_PSHUFLWMask(Mask.Val) \|\|
				4593	X86::isUNPCKLMask(Mask.Val) \|\|
				4594	X86::isUNPCKHMask(Mask.Val) \|\|
				4595	X86::isUNPCKL_v_undef_Mask(Mask.Val) \|\|
				4596	X86::isUNPCKH_v_undef_Mask(Mask.Val));
				4597	}
				4598
				4599	bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
				4600	MVT::ValueType EVT,
				4601	SelectionDAG &DAG) const {
				4602	unsigned NumElts = BVOps.size();
				4603	// Only do shuffles on 128-bit vector types for now.
				4604	if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
				4605	if (NumElts == 2) return true;
				4606	if (NumElts == 4) {
				4607	return (isMOVLMask(&BVOps[0], 4) \|\|
				4608	isCommutedMOVL(&BVOps[0], 4, true) \|\|
				4609	isSHUFPMask(&BVOps[0], 4) \|\|
				4610	isCommutedSHUFP(&BVOps[0], 4));
				4611	}
				4612	return false;
				4613	}
				4614
				4615	//===----------------------------------------------------------------------===//
				4616	// X86 Scheduler Hooks
				4617	//===----------------------------------------------------------------------===//
				4618
				4619	MachineBasicBlock *
				4620	X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
				4621	MachineBasicBlock *BB) {
				4622	const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
				4623	switch (MI->getOpcode()) {
				4624	default: assert(false && "Unexpected instr type to insert");
				4625	case X86::CMOV_FR32:
				4626	case X86::CMOV_FR64:
				4627	case X86::CMOV_V4F32:
				4628	case X86::CMOV_V2F64:
				4629	case X86::CMOV_V2I64: {
				4630	// To "insert" a SELECT_CC instruction, we actually have to insert the
				4631	// diamond control-flow pattern. The incoming instruction knows the
				4632	// destination vreg to set, the condition code register to branch on, the
				4633	// true/false values to select between, and a branch opcode to use.
				4634	const BasicBlock *LLVM_BB = BB->getBasicBlock();
				4635	ilist<MachineBasicBlock>::iterator It = BB;
				4636	++It;
				4637
				4638	// thisMBB:
				4639	// ...
				4640	// TrueVal = ...
				4641	// cmpTY ccX, r1, r2
				4642	// bCC copy1MBB
				4643	// fallthrough --> copy0MBB
				4644	MachineBasicBlock *thisMBB = BB;
				4645	MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
				4646	MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
				4647	unsigned Opc =
				4648	X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
				4649	BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB);
				4650	MachineFunction *F = BB->getParent();
				4651	F->getBasicBlockList().insert(It, copy0MBB);
				4652	F->getBasicBlockList().insert(It, sinkMBB);
				4653	// Update machine-CFG edges by first adding all successors of the current
				4654	// block to the new block which will contain the Phi node for the select.
				4655	for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
				4656	e = BB->succ_end(); i != e; ++i)
				4657	sinkMBB->addSuccessor(*i);
				4658	// Next, remove all successors of the current block, and add the true
				4659	// and fallthrough blocks as its successors.
				4660	while(!BB->succ_empty())
				4661	BB->removeSuccessor(BB->succ_begin());
				4662	BB->addSuccessor(copy0MBB);
				4663	BB->addSuccessor(sinkMBB);
				4664
				4665	// copy0MBB:
				4666	// %FalseValue = ...
				4667	// # fallthrough to sinkMBB
				4668	BB = copy0MBB;
				4669
				4670	// Update machine-CFG edges
				4671	BB->addSuccessor(sinkMBB);
				4672
				4673	// sinkMBB:
				4674	// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
				4675	// ...
				4676	BB = sinkMBB;
				4677	BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg())
				4678	.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
				4679	.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
				4680
				4681	delete MI; // The pseudo instruction is gone now.
				4682	return BB;
				4683	}
				4684
				4685	case X86::FP32_TO_INT16_IN_MEM:
				4686	case X86::FP32_TO_INT32_IN_MEM:
				4687	case X86::FP32_TO_INT64_IN_MEM:
				4688	case X86::FP64_TO_INT16_IN_MEM:
				4689	case X86::FP64_TO_INT32_IN_MEM:
Dale Johannesen	6d0e36a	2007-08-07 01:17:37 +0000	[diff] [blame]	4690	case X86::FP64_TO_INT64_IN_MEM:
				4691	case X86::FP80_TO_INT16_IN_MEM:
				4692	case X86::FP80_TO_INT32_IN_MEM:
				4693	case X86::FP80_TO_INT64_IN_MEM: {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4694	// Change the floating point control register to use "round towards zero"
				4695	// mode when truncating to an integer value.
				4696	MachineFunction *F = BB->getParent();
				4697	int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
				4698	addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx);
				4699
				4700	// Load the old value of the high byte of the control word...
				4701	unsigned OldCW =
				4702	F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
				4703	addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx);
				4704
				4705	// Set the high part to be round to zero...
				4706	addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx)
				4707	.addImm(0xC7F);
				4708
				4709	// Reload the modified control word now...
				4710	addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
				4711
				4712	// Restore the memory image of control word to original value
				4713	addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx)
				4714	.addReg(OldCW);
				4715
				4716	// Get the X86 opcode to use.
				4717	unsigned Opc;
				4718	switch (MI->getOpcode()) {
				4719	default: assert(0 && "illegal opcode!");
				4720	case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
				4721	case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
				4722	case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
				4723	case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
				4724	case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
				4725	case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
Dale Johannesen	6d0e36a	2007-08-07 01:17:37 +0000	[diff] [blame]	4726	case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
				4727	case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
				4728	case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4729	}
				4730
				4731	X86AddressMode AM;
				4732	MachineOperand &Op = MI->getOperand(0);
				4733	if (Op.isRegister()) {
				4734	AM.BaseType = X86AddressMode::RegBase;
				4735	AM.Base.Reg = Op.getReg();
				4736	} else {
				4737	AM.BaseType = X86AddressMode::FrameIndexBase;
				4738	AM.Base.FrameIndex = Op.getFrameIndex();
				4739	}
				4740	Op = MI->getOperand(1);
				4741	if (Op.isImmediate())
				4742	AM.Scale = Op.getImm();
				4743	Op = MI->getOperand(2);
				4744	if (Op.isImmediate())
				4745	AM.IndexReg = Op.getImm();
				4746	Op = MI->getOperand(3);
				4747	if (Op.isGlobalAddress()) {
				4748	AM.GV = Op.getGlobal();
				4749	} else {
				4750	AM.Disp = Op.getImm();
				4751	}
				4752	addFullAddress(BuildMI(BB, TII->get(Opc)), AM)
				4753	.addReg(MI->getOperand(4).getReg());
				4754
				4755	// Reload the original control word now.
				4756	addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
				4757
				4758	delete MI; // The pseudo instruction is gone now.
				4759	return BB;
				4760	}
				4761	}
				4762	}
				4763
				4764	//===----------------------------------------------------------------------===//
				4765	// X86 Optimization Hooks
				4766	//===----------------------------------------------------------------------===//
				4767
				4768	void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
				4769	uint64_t Mask,
				4770	uint64_t &KnownZero,
				4771	uint64_t &KnownOne,
				4772	const SelectionDAG &DAG,
				4773	unsigned Depth) const {
				4774	unsigned Opc = Op.getOpcode();
				4775	assert((Opc >= ISD::BUILTIN_OP_END \|\|
				4776	Opc == ISD::INTRINSIC_WO_CHAIN \|\|
				4777	Opc == ISD::INTRINSIC_W_CHAIN \|\|
				4778	Opc == ISD::INTRINSIC_VOID) &&
				4779	"Should use MaskedValueIsZero if you don't know whether Op"
				4780	" is a target node!");
				4781
				4782	KnownZero = KnownOne = 0; // Don't know anything.
				4783	switch (Opc) {
				4784	default: break;
				4785	case X86ISD::SETCC:
				4786	KnownZero \|= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
				4787	break;
				4788	}
				4789	}
				4790
				4791	/// getShuffleScalarElt - Returns the scalar element that will make up the ith
				4792	/// element of the result of the vector shuffle.
				4793	static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
				4794	MVT::ValueType VT = N->getValueType(0);
				4795	SDOperand PermMask = N->getOperand(2);
				4796	unsigned NumElems = PermMask.getNumOperands();
				4797	SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
				4798	i %= NumElems;
				4799	if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
				4800	return (i == 0)
				4801	? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
				4802	} else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
				4803	SDOperand Idx = PermMask.getOperand(i);
				4804	if (Idx.getOpcode() == ISD::UNDEF)
				4805	return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
				4806	return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
				4807	}
				4808	return SDOperand();
				4809	}
				4810
				4811	/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
				4812	/// node is a GlobalAddress + an offset.
				4813	static bool isGAPlusOffset(SDNode N, GlobalValue &GA, int64_t &Offset) {
				4814	unsigned Opc = N->getOpcode();
				4815	if (Opc == X86ISD::Wrapper) {
				4816	if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) {
				4817	GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
				4818	return true;
				4819	}
				4820	} else if (Opc == ISD::ADD) {
				4821	SDOperand N1 = N->getOperand(0);
				4822	SDOperand N2 = N->getOperand(1);
				4823	if (isGAPlusOffset(N1.Val, GA, Offset)) {
				4824	ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
				4825	if (V) {
				4826	Offset += V->getSignExtended();
				4827	return true;
				4828	}
				4829	} else if (isGAPlusOffset(N2.Val, GA, Offset)) {
				4830	ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
				4831	if (V) {
				4832	Offset += V->getSignExtended();
				4833	return true;
				4834	}
				4835	}
				4836	}
				4837	return false;
				4838	}
				4839
				4840	/// isConsecutiveLoad - Returns true if N is loading from an address of Base
				4841	/// + Dist * Size.
				4842	static bool isConsecutiveLoad(SDNode N, SDNode Base, int Dist, int Size,
				4843	MachineFrameInfo *MFI) {
				4844	if (N->getOperand(0).Val != Base->getOperand(0).Val)
				4845	return false;
				4846
				4847	SDOperand Loc = N->getOperand(1);
				4848	SDOperand BaseLoc = Base->getOperand(1);
				4849	if (Loc.getOpcode() == ISD::FrameIndex) {
				4850	if (BaseLoc.getOpcode() != ISD::FrameIndex)
				4851	return false;
Dan Gohman	53491e9	2007-07-23 20:24:29 +0000	[diff] [blame]	4852	int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
				4853	int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4854	int FS = MFI->getObjectSize(FI);
				4855	int BFS = MFI->getObjectSize(BFI);
				4856	if (FS != BFS \|\| FS != Size) return false;
				4857	return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size);
				4858	} else {
				4859	GlobalValue *GV1 = NULL;
				4860	GlobalValue *GV2 = NULL;
				4861	int64_t Offset1 = 0;
				4862	int64_t Offset2 = 0;
				4863	bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1);
				4864	bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2);
				4865	if (isGA1 && isGA2 && GV1 == GV2)
				4866	return Offset1 == (Offset2 + Dist*Size);
				4867	}
				4868
				4869	return false;
				4870	}
				4871
				4872	static bool isBaseAlignment16(SDNode Base, MachineFrameInfo MFI,
				4873	const X86Subtarget *Subtarget) {
				4874	GlobalValue *GV;
				4875	int64_t Offset;
				4876	if (isGAPlusOffset(Base, GV, Offset))
				4877	return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
				4878	else {
				4879	assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!");
Dan Gohman	53491e9	2007-07-23 20:24:29 +0000	[diff] [blame]	4880	int BFI = cast<FrameIndexSDNode>(Base)->getIndex();
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4881	if (BFI < 0)
				4882	// Fixed objects do not specify alignment, however the offsets are known.
				4883	return ((Subtarget->getStackAlignment() % 16) == 0 &&
				4884	(MFI->getObjectOffset(BFI) % 16) == 0);
				4885	else
				4886	return MFI->getObjectAlignment(BFI) >= 16;
				4887	}
				4888	return false;
				4889	}
				4890
				4891
				4892	/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
				4893	/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
				4894	/// if the load addresses are consecutive, non-overlapping, and in the right
				4895	/// order.
				4896	static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
				4897	const X86Subtarget *Subtarget) {
				4898	MachineFunction &MF = DAG.getMachineFunction();
				4899	MachineFrameInfo *MFI = MF.getFrameInfo();
				4900	MVT::ValueType VT = N->getValueType(0);
				4901	MVT::ValueType EVT = MVT::getVectorElementType(VT);
				4902	SDOperand PermMask = N->getOperand(2);
				4903	int NumElems = (int)PermMask.getNumOperands();
				4904	SDNode *Base = NULL;
				4905	for (int i = 0; i < NumElems; ++i) {
				4906	SDOperand Idx = PermMask.getOperand(i);
				4907	if (Idx.getOpcode() == ISD::UNDEF) {
				4908	if (!Base) return SDOperand();
				4909	} else {
				4910	SDOperand Arg =
				4911	getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG);
				4912	if (!Arg.Val \|\| !ISD::isNON_EXTLoad(Arg.Val))
				4913	return SDOperand();
				4914	if (!Base)
				4915	Base = Arg.Val;
				4916	else if (!isConsecutiveLoad(Arg.Val, Base,
				4917	i, MVT::getSizeInBits(EVT)/8,MFI))
				4918	return SDOperand();
				4919	}
				4920	}
				4921
				4922	bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	4923	LoadSDNode *LD = cast<LoadSDNode>(Base);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4924	if (isAlign16) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4925	return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	4926	LD->getSrcValueOffset(), LD->isVolatile());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4927	} else {
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	4928	return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
				4929	LD->getSrcValueOffset(), LD->isVolatile(),
				4930	LD->getAlignment());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4931	}
				4932	}
				4933
				4934	/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
				4935	static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
				4936	const X86Subtarget *Subtarget) {
				4937	SDOperand Cond = N->getOperand(0);
				4938
				4939	// If we have SSE[12] support, try to form min/max nodes.
				4940	if (Subtarget->hasSSE2() &&
				4941	(N->getValueType(0) == MVT::f32 \|\| N->getValueType(0) == MVT::f64)) {
				4942	if (Cond.getOpcode() == ISD::SETCC) {
				4943	// Get the LHS/RHS of the select.
				4944	SDOperand LHS = N->getOperand(1);
				4945	SDOperand RHS = N->getOperand(2);
				4946	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
				4947
				4948	unsigned Opcode = 0;
				4949	if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
				4950	switch (CC) {
				4951	default: break;
				4952	case ISD::SETOLE: // (X <= Y) ? X : Y -> min
				4953	case ISD::SETULE:
				4954	case ISD::SETLE:
				4955	if (!UnsafeFPMath) break;
				4956	// FALL THROUGH.
				4957	case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min
				4958	case ISD::SETLT:
				4959	Opcode = X86ISD::FMIN;
				4960	break;
				4961
				4962	case ISD::SETOGT: // (X > Y) ? X : Y -> max
				4963	case ISD::SETUGT:
				4964	case ISD::SETGT:
				4965	if (!UnsafeFPMath) break;
				4966	// FALL THROUGH.
				4967	case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max
				4968	case ISD::SETGE:
				4969	Opcode = X86ISD::FMAX;
				4970	break;
				4971	}
				4972	} else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
				4973	switch (CC) {
				4974	default: break;
				4975	case ISD::SETOGT: // (X > Y) ? Y : X -> min
				4976	case ISD::SETUGT:
				4977	case ISD::SETGT:
				4978	if (!UnsafeFPMath) break;
				4979	// FALL THROUGH.
				4980	case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min
				4981	case ISD::SETGE:
				4982	Opcode = X86ISD::FMIN;
				4983	break;
				4984
				4985	case ISD::SETOLE: // (X <= Y) ? Y : X -> max
				4986	case ISD::SETULE:
				4987	case ISD::SETLE:
				4988	if (!UnsafeFPMath) break;
				4989	// FALL THROUGH.
				4990	case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max
				4991	case ISD::SETLT:
				4992	Opcode = X86ISD::FMAX;
				4993	break;
				4994	}
				4995	}
				4996
				4997	if (Opcode)
				4998	return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS);
				4999	}
				5000
				5001	}
				5002
				5003	return SDOperand();
				5004	}
				5005
				5006
				5007	SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
				5008	DAGCombinerInfo &DCI) const {
				5009	SelectionDAG &DAG = DCI.DAG;
				5010	switch (N->getOpcode()) {
				5011	default: break;
				5012	case ISD::VECTOR_SHUFFLE:
				5013	return PerformShuffleCombine(N, DAG, Subtarget);
				5014	case ISD::SELECT:
				5015	return PerformSELECTCombine(N, DAG, Subtarget);
				5016	}
				5017
				5018	return SDOperand();
				5019	}
				5020
				5021	//===----------------------------------------------------------------------===//
				5022	// X86 Inline Assembly Support
				5023	//===----------------------------------------------------------------------===//
				5024
				5025	/// getConstraintType - Given a constraint letter, return the type of
				5026	/// constraint it is for this target.
				5027	X86TargetLowering::ConstraintType
				5028	X86TargetLowering::getConstraintType(const std::string &Constraint) const {
				5029	if (Constraint.size() == 1) {
				5030	switch (Constraint[0]) {
				5031	case 'A':
				5032	case 'r':
				5033	case 'R':
				5034	case 'l':
				5035	case 'q':
				5036	case 'Q':
				5037	case 'x':
				5038	case 'Y':
				5039	return C_RegisterClass;
				5040	default:
				5041	break;
				5042	}
				5043	}
				5044	return TargetLowering::getConstraintType(Constraint);
				5045	}
				5046
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5047	/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
				5048	/// vector. If it is invalid, don't add anything to Ops.
				5049	void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
				5050	char Constraint,
				5051	std::vector<SDOperand>&Ops,
				5052	SelectionDAG &DAG) {
				5053	SDOperand Result(0, 0);
				5054
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5055	switch (Constraint) {
				5056	default: break;
				5057	case 'I':
				5058	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5059	if (C->getValue() <= 31) {
				5060	Result = DAG.getTargetConstant(C->getValue(), Op.getValueType());
				5061	break;
				5062	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5063	}
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5064	return;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5065	case 'N':
				5066	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5067	if (C->getValue() <= 255) {
				5068	Result = DAG.getTargetConstant(C->getValue(), Op.getValueType());
				5069	break;
				5070	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5071	}
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5072	return;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5073	case 'i': {
				5074	// Literal immediates are always ok.
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5075	if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
				5076	Result = DAG.getTargetConstant(CST->getValue(), Op.getValueType());
				5077	break;
				5078	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5079
				5080	// If we are in non-pic codegen mode, we allow the address of a global (with
				5081	// an optional displacement) to be used with 'i'.
				5082	GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
				5083	int64_t Offset = 0;
				5084
				5085	// Match either (GA) or (GA+C)
				5086	if (GA) {
				5087	Offset = GA->getOffset();
				5088	} else if (Op.getOpcode() == ISD::ADD) {
				5089	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
				5090	GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
				5091	if (C && GA) {
				5092	Offset = GA->getOffset()+C->getValue();
				5093	} else {
				5094	C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
				5095	GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
				5096	if (C && GA)
				5097	Offset = GA->getOffset()+C->getValue();
				5098	else
				5099	C = 0, GA = 0;
				5100	}
				5101	}
				5102
				5103	if (GA) {
				5104	// If addressing this global requires a load (e.g. in PIC mode), we can't
				5105	// match.
				5106	if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(),
				5107	false))
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5108	return;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5109
				5110	Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
				5111	Offset);
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5112	Result = Op;
				5113	break;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5114	}
				5115
				5116	// Otherwise, not valid for this mode.
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5117	return;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5118	}
				5119	}
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5120
				5121	if (Result.Val) {
				5122	Ops.push_back(Result);
				5123	return;
				5124	}
				5125	return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5126	}
				5127
				5128	std::vector<unsigned> X86TargetLowering::
				5129	getRegClassForInlineAsmConstraint(const std::string &Constraint,
				5130	MVT::ValueType VT) const {
				5131	if (Constraint.size() == 1) {
				5132	// FIXME: not handling fp-stack yet!
				5133	switch (Constraint[0]) { // GCC X86 Constraint Letters
				5134	default: break; // Unknown constraint letter
				5135	case 'A': // EAX/EDX
				5136	if (VT == MVT::i32 \|\| VT == MVT::i64)
				5137	return make_vector<unsigned>(X86::EAX, X86::EDX, 0);
				5138	break;
				5139	case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode)
				5140	case 'Q': // Q_REGS
				5141	if (VT == MVT::i32)
				5142	return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
				5143	else if (VT == MVT::i16)
				5144	return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
				5145	else if (VT == MVT::i8)
Evan Cheng	f85c10f	2007-08-13 23:27:11 +0000	[diff] [blame]	5146	return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5147	break;
				5148	}
				5149	}
				5150
				5151	return std::vector<unsigned>();
				5152	}
				5153
				5154	std::pair<unsigned, const TargetRegisterClass*>
				5155	X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
				5156	MVT::ValueType VT) const {
				5157	// First, see if this is a constraint that directly corresponds to an LLVM
				5158	// register class.
				5159	if (Constraint.size() == 1) {
				5160	// GCC Constraint Letters
				5161	switch (Constraint[0]) {
				5162	default: break;
				5163	case 'r': // GENERAL_REGS
				5164	case 'R': // LEGACY_REGS
				5165	case 'l': // INDEX_REGS
				5166	if (VT == MVT::i64 && Subtarget->is64Bit())
				5167	return std::make_pair(0U, X86::GR64RegisterClass);
				5168	if (VT == MVT::i32)
				5169	return std::make_pair(0U, X86::GR32RegisterClass);
				5170	else if (VT == MVT::i16)
				5171	return std::make_pair(0U, X86::GR16RegisterClass);
				5172	else if (VT == MVT::i8)
				5173	return std::make_pair(0U, X86::GR8RegisterClass);
				5174	break;
				5175	case 'y': // MMX_REGS if MMX allowed.
				5176	if (!Subtarget->hasMMX()) break;
				5177	return std::make_pair(0U, X86::VR64RegisterClass);
				5178	break;
				5179	case 'Y': // SSE_REGS if SSE2 allowed
				5180	if (!Subtarget->hasSSE2()) break;
				5181	// FALL THROUGH.
				5182	case 'x': // SSE_REGS if SSE1 allowed
				5183	if (!Subtarget->hasSSE1()) break;
				5184
				5185	switch (VT) {
				5186	default: break;
				5187	// Scalar SSE types.
				5188	case MVT::f32:
				5189	case MVT::i32:
				5190	return std::make_pair(0U, X86::FR32RegisterClass);
				5191	case MVT::f64:
				5192	case MVT::i64:
				5193	return std::make_pair(0U, X86::FR64RegisterClass);
				5194	// Vector types.
				5195	case MVT::v16i8:
				5196	case MVT::v8i16:
				5197	case MVT::v4i32:
				5198	case MVT::v2i64:
				5199	case MVT::v4f32:
				5200	case MVT::v2f64:
				5201	return std::make_pair(0U, X86::VR128RegisterClass);
				5202	}
				5203	break;
				5204	}
				5205	}
				5206
				5207	// Use the default implementation in TargetLowering to convert the register
				5208	// constraint into a member of a register class.
				5209	std::pair<unsigned, const TargetRegisterClass*> Res;
				5210	Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
				5211
				5212	// Not found as a standard register?
				5213	if (Res.second == 0) {
				5214	// GCC calls "st(0)" just plain "st".
				5215	if (StringsEqualNoCase("{st}", Constraint)) {
				5216	Res.first = X86::ST0;
				5217	Res.second = X86::RSTRegisterClass;
				5218	}
				5219
				5220	return Res;
				5221	}
				5222
				5223	// Otherwise, check to see if this is a register class of the wrong value
				5224	// type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
				5225	// turn into {ax},{dx}.
				5226	if (Res.second->hasType(VT))
				5227	return Res; // Correct type already, nothing to do.
				5228
				5229	// All of the single-register GCC register classes map their values onto
				5230	// 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we
				5231	// really want an 8-bit or 32-bit register, map to the appropriate register
				5232	// class and return the appropriate register.
				5233	if (Res.second != X86::GR16RegisterClass)
				5234	return Res;
				5235
				5236	if (VT == MVT::i8) {
				5237	unsigned DestReg = 0;
				5238	switch (Res.first) {
				5239	default: break;
				5240	case X86::AX: DestReg = X86::AL; break;
				5241	case X86::DX: DestReg = X86::DL; break;
				5242	case X86::CX: DestReg = X86::CL; break;
				5243	case X86::BX: DestReg = X86::BL; break;
				5244	}
				5245	if (DestReg) {
				5246	Res.first = DestReg;
				5247	Res.second = Res.second = X86::GR8RegisterClass;
				5248	}
				5249	} else if (VT == MVT::i32) {
				5250	unsigned DestReg = 0;
				5251	switch (Res.first) {
				5252	default: break;
				5253	case X86::AX: DestReg = X86::EAX; break;
				5254	case X86::DX: DestReg = X86::EDX; break;
				5255	case X86::CX: DestReg = X86::ECX; break;
				5256	case X86::BX: DestReg = X86::EBX; break;
				5257	case X86::SI: DestReg = X86::ESI; break;
				5258	case X86::DI: DestReg = X86::EDI; break;
				5259	case X86::BP: DestReg = X86::EBP; break;
				5260	case X86::SP: DestReg = X86::ESP; break;
				5261	}
				5262	if (DestReg) {
				5263	Res.first = DestReg;
				5264	Res.second = Res.second = X86::GR32RegisterClass;
				5265	}
				5266	} else if (VT == MVT::i64) {
				5267	unsigned DestReg = 0;
				5268	switch (Res.first) {
				5269	default: break;
				5270	case X86::AX: DestReg = X86::RAX; break;
				5271	case X86::DX: DestReg = X86::RDX; break;
				5272	case X86::CX: DestReg = X86::RCX; break;
				5273	case X86::BX: DestReg = X86::RBX; break;
				5274	case X86::SI: DestReg = X86::RSI; break;
				5275	case X86::DI: DestReg = X86::RDI; break;
				5276	case X86::BP: DestReg = X86::RBP; break;
				5277	case X86::SP: DestReg = X86::RSP; break;
				5278	}
				5279	if (DestReg) {
				5280	Res.first = DestReg;
				5281	Res.second = Res.second = X86::GR64RegisterClass;
				5282	}
				5283	}
				5284
				5285	return Res;
				5286	}