Blame - lib/Target/X86/X86ISelLowering.cpp - platform/external/llvm

blob: ab7a7edbfc7d33ad64ecaafb0d599adf850c389b [file] [log] [blame]

Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1	//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by Chris Lattner and is distributed under
				6	// the University of Illinois Open Source License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file defines the interfaces that X86 uses to lower LLVM code into a
				11	// selection DAG.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "X86.h"
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	16	#include "X86CodeEmitter.h"
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	17	#include "X86InstrBuilder.h"
				18	#include "X86ISelLowering.h"
				19	#include "X86MachineFunctionInfo.h"
				20	#include "X86TargetMachine.h"
				21	#include "llvm/CallingConv.h"
				22	#include "llvm/Constants.h"
				23	#include "llvm/DerivedTypes.h"
				24	#include "llvm/GlobalVariable.h"
				25	#include "llvm/Function.h"
				26	#include "llvm/Intrinsics.h"
				27	#include "llvm/ADT/VectorExtras.h"
				28	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
				29	#include "llvm/CodeGen/CallingConvLower.h"
				30	#include "llvm/CodeGen/MachineFrameInfo.h"
				31	#include "llvm/CodeGen/MachineFunction.h"
				32	#include "llvm/CodeGen/MachineInstrBuilder.h"
				33	#include "llvm/CodeGen/SelectionDAG.h"
				34	#include "llvm/CodeGen/SSARegMap.h"
				35	#include "llvm/Support/MathExtras.h"
				36	#include "llvm/Target/TargetOptions.h"
				37	#include "llvm/ADT/StringExtras.h"
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	38	#include "llvm/ParameterAttributes.h"
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	39	using namespace llvm;
				40
				41	X86TargetLowering::X86TargetLowering(TargetMachine &TM)
				42	: TargetLowering(TM) {
				43	Subtarget = &TM.getSubtarget<X86Subtarget>();
				44	X86ScalarSSE = Subtarget->hasSSE2();
				45	X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
				46
				47	RegInfo = TM.getRegisterInfo();
				48
				49	// Set up the TargetLowering object.
				50
				51	// X86 is weird, it always uses i8 for shift amounts and setcc results.
				52	setShiftAmountType(MVT::i8);
				53	setSetCCResultType(MVT::i8);
				54	setSetCCResultContents(ZeroOrOneSetCCResult);
				55	setSchedulingPreference(SchedulingForRegPressure);
				56	setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0
				57	setStackPointerRegisterToSaveRestore(X86StackPtr);
				58
				59	if (Subtarget->isTargetDarwin()) {
				60	// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
				61	setUseUnderscoreSetJmp(false);
				62	setUseUnderscoreLongJmp(false);
				63	} else if (Subtarget->isTargetMingw()) {
				64	// MS runtime is weird: it exports _setjmp, but longjmp!
				65	setUseUnderscoreSetJmp(true);
				66	setUseUnderscoreLongJmp(false);
				67	} else {
				68	setUseUnderscoreSetJmp(true);
				69	setUseUnderscoreLongJmp(true);
				70	}
				71
				72	// Set up the register classes.
				73	addRegisterClass(MVT::i8, X86::GR8RegisterClass);
				74	addRegisterClass(MVT::i16, X86::GR16RegisterClass);
				75	addRegisterClass(MVT::i32, X86::GR32RegisterClass);
				76	if (Subtarget->is64Bit())
				77	addRegisterClass(MVT::i64, X86::GR64RegisterClass);
				78
				79	setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand);
				80
				81	// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
				82	// operation.
				83	setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
				84	setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
				85	setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
				86
				87	if (Subtarget->is64Bit()) {
				88	setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
				89	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
				90	} else {
				91	if (X86ScalarSSE)
				92	// If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
				93	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
				94	else
				95	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
				96	}
				97
				98	// Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
				99	// this operation.
				100	setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
				101	setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
				102	// SSE has no i16 to fp conversion, only i32
				103	if (X86ScalarSSE)
				104	setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
				105	else {
				106	setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
				107	setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
				108	}
				109
				110	if (!Subtarget->is64Bit()) {
				111	// Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode.
				112	setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
				113	setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
				114	}
				115
				116	// Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
				117	// this operation.
				118	setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
				119	setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
				120
				121	if (X86ScalarSSE) {
				122	setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
				123	} else {
				124	setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
				125	setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
				126	}
				127
				128	// Handle FP_TO_UINT by promoting the destination to a larger signed
				129	// conversion.
				130	setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
				131	setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
				132	setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
				133
				134	if (Subtarget->is64Bit()) {
				135	setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
				136	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
				137	} else {
				138	if (X86ScalarSSE && !Subtarget->hasSSE3())
				139	// Expand FP_TO_UINT into a select.
				140	// FIXME: We would like to use a Custom expander here eventually to do
				141	// the optimal thing for SSE vs. the default expansion in the legalizer.
				142	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
				143	else
				144	// With SSE3 we can use fisttpll to convert to a signed i64.
				145	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
				146	}
				147
				148	// TODO: when we have SSE, these could be more efficient, by using movd/movq.
				149	if (!X86ScalarSSE) {
				150	setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
				151	setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
				152	}
				153
				154	setOperationAction(ISD::BR_JT , MVT::Other, Expand);
				155	setOperationAction(ISD::BRCOND , MVT::Other, Custom);
				156	setOperationAction(ISD::BR_CC , MVT::Other, Expand);
				157	setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
				158	setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
				159	if (Subtarget->is64Bit())
Christopher Lamb	0a7c866	2007-08-10 21:48:46 +0000	[diff] [blame]	160	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
				161	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
				162	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	163	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
				164	setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
				165	setOperationAction(ISD::FREM , MVT::f64 , Expand);
				166
				167	setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
				168	setOperationAction(ISD::CTTZ , MVT::i8 , Expand);
				169	setOperationAction(ISD::CTLZ , MVT::i8 , Expand);
				170	setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
				171	setOperationAction(ISD::CTTZ , MVT::i16 , Expand);
				172	setOperationAction(ISD::CTLZ , MVT::i16 , Expand);
				173	setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
				174	setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
				175	setOperationAction(ISD::CTLZ , MVT::i32 , Expand);
				176	if (Subtarget->is64Bit()) {
				177	setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
				178	setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
				179	setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
				180	}
				181
				182	setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
				183	setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
				184
				185	// These should be promoted to a larger select which is supported.
				186	setOperationAction(ISD::SELECT , MVT::i1 , Promote);
				187	setOperationAction(ISD::SELECT , MVT::i8 , Promote);
				188	// X86 wants to expand cmov itself.
				189	setOperationAction(ISD::SELECT , MVT::i16 , Custom);
				190	setOperationAction(ISD::SELECT , MVT::i32 , Custom);
				191	setOperationAction(ISD::SELECT , MVT::f32 , Custom);
				192	setOperationAction(ISD::SELECT , MVT::f64 , Custom);
				193	setOperationAction(ISD::SETCC , MVT::i8 , Custom);
				194	setOperationAction(ISD::SETCC , MVT::i16 , Custom);
				195	setOperationAction(ISD::SETCC , MVT::i32 , Custom);
				196	setOperationAction(ISD::SETCC , MVT::f32 , Custom);
				197	setOperationAction(ISD::SETCC , MVT::f64 , Custom);
				198	if (Subtarget->is64Bit()) {
				199	setOperationAction(ISD::SELECT , MVT::i64 , Custom);
				200	setOperationAction(ISD::SETCC , MVT::i64 , Custom);
				201	}
				202	// X86 ret instruction may pop stack.
				203	setOperationAction(ISD::RET , MVT::Other, Custom);
				204	if (!Subtarget->is64Bit())
				205	setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
				206
				207	// Darwin ABI issue.
				208	setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
				209	setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
				210	setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
				211	setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
				212	setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
				213	if (Subtarget->is64Bit()) {
				214	setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
				215	setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
				216	setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
				217	setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
				218	}
				219	// 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
				220	setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
				221	setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
				222	setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
				223	// X86 wants to expand memset / memcpy itself.
				224	setOperationAction(ISD::MEMSET , MVT::Other, Custom);
				225	setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
				226
				227	// We don't have line number support yet.
				228	setOperationAction(ISD::LOCATION, MVT::Other, Expand);
				229	setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
				230	// FIXME - use subtarget debug flags
				231	if (!Subtarget->isTargetDarwin() &&
				232	!Subtarget->isTargetELF() &&
				233	!Subtarget->isTargetCygMing())
				234	setOperationAction(ISD::LABEL, MVT::Other, Expand);
				235
				236	setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
				237	setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
				238	setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
				239	setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
				240	if (Subtarget->is64Bit()) {
				241	// FIXME: Verify
				242	setExceptionPointerRegister(X86::RAX);
				243	setExceptionSelectorRegister(X86::RDX);
				244	} else {
				245	setExceptionPointerRegister(X86::EAX);
				246	setExceptionSelectorRegister(X86::EDX);
				247	}
				248
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	249	setOperationAction(ISD::ADJUST_TRAMP, MVT::i32, Expand);
				250	setOperationAction(ISD::ADJUST_TRAMP, MVT::i64, Expand);
				251	setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
				252
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	253	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
				254	setOperationAction(ISD::VASTART , MVT::Other, Custom);
				255	setOperationAction(ISD::VAARG , MVT::Other, Expand);
				256	setOperationAction(ISD::VAEND , MVT::Other, Expand);
				257	if (Subtarget->is64Bit())
				258	setOperationAction(ISD::VACOPY , MVT::Other, Custom);
				259	else
				260	setOperationAction(ISD::VACOPY , MVT::Other, Expand);
				261
				262	setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
				263	setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
				264	if (Subtarget->is64Bit())
				265	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
				266	if (Subtarget->isTargetCygMing())
				267	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
				268	else
				269	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
				270
				271	if (X86ScalarSSE) {
				272	// Set up the FP register classes.
				273	addRegisterClass(MVT::f32, X86::FR32RegisterClass);
				274	addRegisterClass(MVT::f64, X86::FR64RegisterClass);
				275
				276	// Use ANDPD to simulate FABS.
				277	setOperationAction(ISD::FABS , MVT::f64, Custom);
				278	setOperationAction(ISD::FABS , MVT::f32, Custom);
				279
				280	// Use XORP to simulate FNEG.
				281	setOperationAction(ISD::FNEG , MVT::f64, Custom);
				282	setOperationAction(ISD::FNEG , MVT::f32, Custom);
				283
				284	// Use ANDPD and ORPD to simulate FCOPYSIGN.
				285	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
				286	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
				287
				288	// We don't support sin/cos/fmod
				289	setOperationAction(ISD::FSIN , MVT::f64, Expand);
				290	setOperationAction(ISD::FCOS , MVT::f64, Expand);
				291	setOperationAction(ISD::FREM , MVT::f64, Expand);
				292	setOperationAction(ISD::FSIN , MVT::f32, Expand);
				293	setOperationAction(ISD::FCOS , MVT::f32, Expand);
				294	setOperationAction(ISD::FREM , MVT::f32, Expand);
				295
				296	// Expand FP immediates into loads from the stack, except for the special
				297	// cases we handle.
				298	setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
				299	setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
				300	addLegalFPImmediate(+0.0); // xorps / xorpd
Dale Johannesen	8f83a6b	2007-08-09 01:04:01 +0000	[diff] [blame]	301
				302	// Conversions to long double (in X87) go through memory.
				303	setConvertAction(MVT::f32, MVT::f80, Expand);
				304	setConvertAction(MVT::f64, MVT::f80, Expand);
				305
				306	// Conversions from long double (in X87) go through memory.
				307	setConvertAction(MVT::f80, MVT::f32, Expand);
				308	setConvertAction(MVT::f80, MVT::f64, Expand);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	309	} else {
				310	// Set up the FP register classes.
				311	addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
				312	addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
				313
				314	setOperationAction(ISD::UNDEF, MVT::f64, Expand);
				315	setOperationAction(ISD::UNDEF, MVT::f32, Expand);
				316	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
				317	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
Dale Johannesen	8f83a6b	2007-08-09 01:04:01 +0000	[diff] [blame]	318
				319	// Floating truncations need to go through memory.
				320	setConvertAction(MVT::f80, MVT::f32, Expand);
				321	setConvertAction(MVT::f64, MVT::f32, Expand);
				322	setConvertAction(MVT::f80, MVT::f64, Expand);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	323
				324	if (!UnsafeFPMath) {
				325	setOperationAction(ISD::FSIN , MVT::f64 , Expand);
				326	setOperationAction(ISD::FCOS , MVT::f64 , Expand);
				327	}
				328
				329	setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
				330	setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
				331	addLegalFPImmediate(+0.0); // FLD0
				332	addLegalFPImmediate(+1.0); // FLD1
				333	addLegalFPImmediate(-0.0); // FLD0/FCHS
				334	addLegalFPImmediate(-1.0); // FLD1/FCHS
				335	}
				336
Dale Johannesen	4ab00bd	2007-08-05 18:49:15 +0000	[diff] [blame]	337	// Long double always uses X87.
				338	addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
				339
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	340	// First set operation action for all vector types to expand. Then we
				341	// will selectively turn on ones that can be effectively codegen'd.
				342	for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
				343	VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
				344	setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
				345	setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
				346	setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand);
				347	setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand);
				348	setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand);
				349	setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
				350	setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand);
				351	setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
				352	setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
				353	setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
				354	setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
				355	setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
				356	setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
				357	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand);
				358	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
				359	setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
				360	setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand);
				361	setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand);
				362	setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand);
				363	setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand);
				364	setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand);
				365	setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand);
				366	setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand);
				367	}
				368
				369	if (Subtarget->hasMMX()) {
				370	addRegisterClass(MVT::v8i8, X86::VR64RegisterClass);
				371	addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
				372	addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
				373	addRegisterClass(MVT::v1i64, X86::VR64RegisterClass);
				374
				375	// FIXME: add MMX packed arithmetics
				376
				377	setOperationAction(ISD::ADD, MVT::v8i8, Legal);
				378	setOperationAction(ISD::ADD, MVT::v4i16, Legal);
				379	setOperationAction(ISD::ADD, MVT::v2i32, Legal);
				380	setOperationAction(ISD::ADD, MVT::v1i64, Legal);
				381
				382	setOperationAction(ISD::SUB, MVT::v8i8, Legal);
				383	setOperationAction(ISD::SUB, MVT::v4i16, Legal);
				384	setOperationAction(ISD::SUB, MVT::v2i32, Legal);
				385
				386	setOperationAction(ISD::MULHS, MVT::v4i16, Legal);
				387	setOperationAction(ISD::MUL, MVT::v4i16, Legal);
				388
				389	setOperationAction(ISD::AND, MVT::v8i8, Promote);
				390	AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64);
				391	setOperationAction(ISD::AND, MVT::v4i16, Promote);
				392	AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64);
				393	setOperationAction(ISD::AND, MVT::v2i32, Promote);
				394	AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64);
				395	setOperationAction(ISD::AND, MVT::v1i64, Legal);
				396
				397	setOperationAction(ISD::OR, MVT::v8i8, Promote);
				398	AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64);
				399	setOperationAction(ISD::OR, MVT::v4i16, Promote);
				400	AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64);
				401	setOperationAction(ISD::OR, MVT::v2i32, Promote);
				402	AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64);
				403	setOperationAction(ISD::OR, MVT::v1i64, Legal);
				404
				405	setOperationAction(ISD::XOR, MVT::v8i8, Promote);
				406	AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64);
				407	setOperationAction(ISD::XOR, MVT::v4i16, Promote);
				408	AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64);
				409	setOperationAction(ISD::XOR, MVT::v2i32, Promote);
				410	AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64);
				411	setOperationAction(ISD::XOR, MVT::v1i64, Legal);
				412
				413	setOperationAction(ISD::LOAD, MVT::v8i8, Promote);
				414	AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64);
				415	setOperationAction(ISD::LOAD, MVT::v4i16, Promote);
				416	AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64);
				417	setOperationAction(ISD::LOAD, MVT::v2i32, Promote);
				418	AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64);
				419	setOperationAction(ISD::LOAD, MVT::v1i64, Legal);
				420
				421	setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
				422	setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
				423	setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
				424	setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
				425
				426	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
				427	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
				428	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
				429	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
				430
				431	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom);
				432	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom);
				433	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom);
				434	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
				435	}
				436
				437	if (Subtarget->hasSSE1()) {
				438	addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
				439
				440	setOperationAction(ISD::FADD, MVT::v4f32, Legal);
				441	setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
				442	setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
				443	setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
				444	setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
				445	setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	446	setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
				447	setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
				448	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
				449	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
				450	setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
				451	}
				452
				453	if (Subtarget->hasSSE2()) {
				454	addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
				455	addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
				456	addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
				457	addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
				458	addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
				459
				460	setOperationAction(ISD::ADD, MVT::v16i8, Legal);
				461	setOperationAction(ISD::ADD, MVT::v8i16, Legal);
				462	setOperationAction(ISD::ADD, MVT::v4i32, Legal);
				463	setOperationAction(ISD::ADD, MVT::v2i64, Legal);
				464	setOperationAction(ISD::SUB, MVT::v16i8, Legal);
				465	setOperationAction(ISD::SUB, MVT::v8i16, Legal);
				466	setOperationAction(ISD::SUB, MVT::v4i32, Legal);
				467	setOperationAction(ISD::SUB, MVT::v2i64, Legal);
				468	setOperationAction(ISD::MUL, MVT::v8i16, Legal);
				469	setOperationAction(ISD::FADD, MVT::v2f64, Legal);
				470	setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
				471	setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
				472	setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
				473	setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
				474	setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	475
				476	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
				477	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
				478	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
				479	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
				480	// Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
				481	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
				482
				483	// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
				484	for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
				485	setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
				486	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
				487	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				488	}
				489	setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
				490	setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
				491	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
				492	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
				493	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
				494	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
				495
				496	// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
				497	for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
				498	setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote);
				499	AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64);
				500	setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote);
				501	AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64);
				502	setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote);
				503	AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64);
				504	setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote);
				505	AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64);
				506	setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
				507	AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
				508	}
				509
				510	// Custom lower v2i64 and v2f64 selects.
				511	setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
				512	setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
				513	setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
				514	setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
				515	}
				516
				517	// We want to custom lower some of our intrinsics.
				518	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
				519
				520	// We have target-specific dag combine patterns for the following nodes:
				521	setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
				522	setTargetDAGCombine(ISD::SELECT);
				523
				524	computeRegisterProperties();
				525
				526	// FIXME: These should be based on subtarget info. Plus, the values should
				527	// be smaller when we are in optimizing for size mode.
				528	maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
				529	maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
				530	maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
				531	allowUnalignedMemoryAccesses = true; // x86 supports it!
				532	}
				533
				534
				535	//===----------------------------------------------------------------------===//
				536	// Return Value Calling Convention Implementation
				537	//===----------------------------------------------------------------------===//
				538
				539	#include "X86GenCallingConv.inc"
				540
				541	/// LowerRET - Lower an ISD::RET node.
				542	SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
				543	assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
				544
				545	SmallVector<CCValAssign, 16> RVLocs;
				546	unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
				547	bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
				548	CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
				549	CCInfo.AnalyzeReturn(Op.Val, RetCC_X86);
				550
				551
				552	// If this is the first return lowered for this function, add the regs to the
				553	// liveout set for the function.
				554	if (DAG.getMachineFunction().liveout_empty()) {
				555	for (unsigned i = 0; i != RVLocs.size(); ++i)
				556	if (RVLocs[i].isRegLoc())
				557	DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
				558	}
				559
				560	SDOperand Chain = Op.getOperand(0);
				561	SDOperand Flag;
				562
				563	// Copy the result values into the output registers.
				564	if (RVLocs.size() != 1 \|\| !RVLocs[0].isRegLoc() \|\|
				565	RVLocs[0].getLocReg() != X86::ST0) {
				566	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				567	CCValAssign &VA = RVLocs[i];
				568	assert(VA.isRegLoc() && "Can only return in registers!");
				569	Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1),
				570	Flag);
				571	Flag = Chain.getValue(1);
				572	}
				573	} else {
				574	// We need to handle a destination of ST0 specially, because it isn't really
				575	// a register.
				576	SDOperand Value = Op.getOperand(1);
				577
				578	// If this is an FP return with ScalarSSE, we need to move the value from
				579	// an XMM register onto the fp-stack.
				580	if (X86ScalarSSE) {
				581	SDOperand MemLoc;
				582
				583	// If this is a load into a scalarsse value, don't store the loaded value
				584	// back to the stack, only to reload it: just replace the scalar-sse load.
				585	if (ISD::isNON_EXTLoad(Value.Val) &&
				586	(Chain == Value.getValue(1) \|\| Chain == Value.getOperand(0))) {
				587	Chain = Value.getOperand(0);
				588	MemLoc = Value.getOperand(1);
				589	} else {
				590	// Spill the value to memory and reload it into top of stack.
				591	unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8;
				592	MachineFunction &MF = DAG.getMachineFunction();
				593	int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
				594	MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
				595	Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0);
				596	}
				597	SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other);
				598	SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())};
				599	Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
				600	Chain = Value.getValue(1);
				601	}
				602
				603	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				604	SDOperand Ops[] = { Chain, Value };
				605	Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2);
				606	Flag = Chain.getValue(1);
				607	}
				608
				609	SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16);
				610	if (Flag.Val)
				611	return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag);
				612	else
				613	return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop);
				614	}
				615
				616
				617	/// LowerCallResult - Lower the result values of an ISD::CALL into the
				618	/// appropriate copies out of appropriate physical registers. This assumes that
				619	/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
				620	/// being lowered. The returns a SDNode with the same number of values as the
				621	/// ISD::CALL.
				622	SDNode *X86TargetLowering::
				623	LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
				624	unsigned CallingConv, SelectionDAG &DAG) {
				625
				626	// Assign locations to each value returned by this call.
				627	SmallVector<CCValAssign, 16> RVLocs;
				628	bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0;
				629	CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
				630	CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
				631
				632
				633	SmallVector<SDOperand, 8> ResultVals;
				634
				635	// Copy all of the result registers out of their specified physreg.
				636	if (RVLocs.size() != 1 \|\| RVLocs[0].getLocReg() != X86::ST0) {
				637	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				638	Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(),
				639	RVLocs[i].getValVT(), InFlag).getValue(1);
				640	InFlag = Chain.getValue(2);
				641	ResultVals.push_back(Chain.getValue(0));
				642	}
				643	} else {
				644	// Copies from the FP stack are special, as ST0 isn't a valid register
				645	// before the fp stackifier runs.
				646
				647	// Copy ST0 into an RFP register with FP_GET_RESULT.
				648	SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag);
				649	SDOperand GROps[] = { Chain, InFlag };
				650	SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
				651	Chain = RetVal.getValue(1);
				652	InFlag = RetVal.getValue(2);
				653
				654	// If we are using ScalarSSE, store ST(0) to the stack and reload it into
				655	// an XMM register.
				656	if (X86ScalarSSE) {
				657	// FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
				658	// shouldn't be necessary except that RFP cannot be live across
				659	// multiple blocks. When stackifier is fixed, they can be uncoupled.
				660	MachineFunction &MF = DAG.getMachineFunction();
				661	int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
				662	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				663	SDOperand Ops[] = {
				664	Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag
				665	};
				666	Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5);
				667	RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0);
				668	Chain = RetVal.getValue(1);
				669	}
				670	ResultVals.push_back(RetVal);
				671	}
				672
				673	// Merge everything together with a MERGE_VALUES node.
				674	ResultVals.push_back(Chain);
				675	return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(),
				676	&ResultVals[0], ResultVals.size()).Val;
				677	}
				678
				679
				680	//===----------------------------------------------------------------------===//
				681	// C & StdCall Calling Convention implementation
				682	//===----------------------------------------------------------------------===//
				683	// StdCall calling convention seems to be standard for many Windows' API
				684	// routines and around. It differs from C calling convention just a little:
				685	// callee should clean up the stack, not caller. Symbols should be also
				686	// decorated in some fancy way :) It doesn't support any vector arguments.
				687
				688	/// AddLiveIn - This helper function adds the specified physical register to the
				689	/// MachineFunction as a live in value. It also creates a corresponding virtual
				690	/// register for it.
				691	static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
				692	const TargetRegisterClass *RC) {
				693	assert(RC->contains(PReg) && "Not the correct regclass!");
				694	unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
				695	MF.addLiveIn(PReg, VReg);
				696	return VReg;
				697	}
				698
				699	SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
				700	bool isStdCall) {
				701	unsigned NumArgs = Op.Val->getNumValues() - 1;
				702	MachineFunction &MF = DAG.getMachineFunction();
				703	MachineFrameInfo *MFI = MF.getFrameInfo();
				704	SDOperand Root = Op.getOperand(0);
				705	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				706
				707	// Assign locations to all of the incoming arguments.
				708	SmallVector<CCValAssign, 16> ArgLocs;
				709	CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
				710	getTargetMachine(), ArgLocs);
				711	CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C);
				712
				713	SmallVector<SDOperand, 8> ArgValues;
				714	unsigned LastVal = ~0U;
				715	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				716	CCValAssign &VA = ArgLocs[i];
				717	// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
				718	// places.
				719	assert(VA.getValNo() != LastVal &&
				720	"Don't support value assigned to multiple locs yet");
				721	LastVal = VA.getValNo();
				722
				723	if (VA.isRegLoc()) {
				724	MVT::ValueType RegVT = VA.getLocVT();
				725	TargetRegisterClass *RC;
				726	if (RegVT == MVT::i32)
				727	RC = X86::GR32RegisterClass;
				728	else {
				729	assert(MVT::isVector(RegVT));
				730	RC = X86::VR128RegisterClass;
				731	}
				732
				733	unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
				734	SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
				735
				736	// If this is an 8 or 16-bit value, it is really passed promoted to 32
				737	// bits. Insert an assert[sz]ext to capture this, then truncate to the
				738	// right size.
				739	if (VA.getLocInfo() == CCValAssign::SExt)
				740	ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
				741	DAG.getValueType(VA.getValVT()));
				742	else if (VA.getLocInfo() == CCValAssign::ZExt)
				743	ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
				744	DAG.getValueType(VA.getValVT()));
				745
				746	if (VA.getLocInfo() != CCValAssign::Full)
				747	ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
				748
				749	ArgValues.push_back(ArgValue);
				750	} else {
				751	assert(VA.isMemLoc());
				752
				753	// Create the nodes corresponding to a load from this parameter slot.
				754	int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
				755	VA.getLocMemOffset());
				756	SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
				757	ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
				758	}
				759	}
				760
				761	unsigned StackSize = CCInfo.getNextStackOffset();
				762
				763	ArgValues.push_back(Root);
				764
				765	// If the function takes variable number of arguments, make a frame index for
				766	// the start of the first vararg value... for expansion of llvm.va_start.
				767	if (isVarArg)
				768	VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
				769
				770	if (isStdCall && !isVarArg) {
				771	BytesToPopOnReturn = StackSize; // Callee pops everything..
				772	BytesCallerReserves = 0;
				773	} else {
				774	BytesToPopOnReturn = 0; // Callee pops nothing.
				775
				776	// If this is an sret function, the return should pop the hidden pointer.
				777	if (NumArgs &&
				778	(cast<ConstantSDNode>(Op.getOperand(3))->getValue() &
				779	ISD::ParamFlags::StructReturn))
				780	BytesToPopOnReturn = 4;
				781
				782	BytesCallerReserves = StackSize;
				783	}
				784
				785	RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
				786	ReturnAddrIndex = 0; // No return address slot generated yet.
				787
				788	MF.getInfo<X86MachineFunctionInfo>()
				789	->setBytesToPopOnReturn(BytesToPopOnReturn);
				790
				791	// Return the new list of results.
				792	return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
				793	&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
				794	}
				795
				796	SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG,
				797	unsigned CC) {
				798	SDOperand Chain = Op.getOperand(0);
				799	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				800	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				801	SDOperand Callee = Op.getOperand(4);
				802	unsigned NumOps = (Op.getNumOperands() - 5) / 2;
				803
				804	// Analyze operands of the call, assigning locations to each operand.
				805	SmallVector<CCValAssign, 16> ArgLocs;
				806	CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
				807	CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C);
				808
				809	// Get a count of how many bytes are to be pushed on the stack.
				810	unsigned NumBytes = CCInfo.getNextStackOffset();
				811
				812	Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
				813
				814	SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
				815	SmallVector<SDOperand, 8> MemOpChains;
				816
				817	SDOperand StackPtr;
				818
				819	// Walk the register/memloc assignments, inserting copies/loads.
				820	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				821	CCValAssign &VA = ArgLocs[i];
				822	SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
				823
				824	// Promote the value if needed.
				825	switch (VA.getLocInfo()) {
				826	default: assert(0 && "Unknown loc info!");
				827	case CCValAssign::Full: break;
				828	case CCValAssign::SExt:
				829	Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
				830	break;
				831	case CCValAssign::ZExt:
				832	Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
				833	break;
				834	case CCValAssign::AExt:
				835	Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
				836	break;
				837	}
				838
				839	if (VA.isRegLoc()) {
				840	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
				841	} else {
				842	assert(VA.isMemLoc());
				843	if (StackPtr.Val == 0)
				844	StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
				845	SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
				846	PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
				847	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				848	}
				849	}
				850
				851	// If the first argument is an sret pointer, remember it.
				852	bool isSRet = NumOps &&
				853	(cast<ConstantSDNode>(Op.getOperand(6))->getValue() &
				854	ISD::ParamFlags::StructReturn);
				855
				856	if (!MemOpChains.empty())
				857	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				858	&MemOpChains[0], MemOpChains.size());
				859
				860	// Build a sequence of copy-to-reg nodes chained together with token chain
				861	// and flag operands which copy the outgoing args into registers.
				862	SDOperand InFlag;
				863	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				864	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				865	InFlag);
				866	InFlag = Chain.getValue(1);
				867	}
				868
				869	// ELF / PIC requires GOT in the EBX register before function calls via PLT
				870	// GOT pointer.
				871	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				872	Subtarget->isPICStyleGOT()) {
				873	Chain = DAG.getCopyToReg(Chain, X86::EBX,
				874	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				875	InFlag);
				876	InFlag = Chain.getValue(1);
				877	}
				878
				879	// If the callee is a GlobalAddress node (quite common, every direct call is)
				880	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
				881	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				882	// We should use extra load for direct calls to dllimported functions in
				883	// non-JIT mode.
				884	if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
				885	getTargetMachine(), true))
				886	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
				887	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				888	Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
				889
				890	// Returns a chain & a flag for retval copy to use.
				891	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				892	SmallVector<SDOperand, 8> Ops;
				893	Ops.push_back(Chain);
				894	Ops.push_back(Callee);
				895
				896	// Add argument registers to the end of the list so that they are known live
				897	// into the call.
				898	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				899	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				900	RegsToPass[i].second.getValueType()));
				901
				902	// Add an implicit use GOT pointer in EBX.
				903	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				904	Subtarget->isPICStyleGOT())
				905	Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
				906
				907	if (InFlag.Val)
				908	Ops.push_back(InFlag);
				909
				910	Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
				911	NodeTys, &Ops[0], Ops.size());
				912	InFlag = Chain.getValue(1);
				913
				914	// Create the CALLSEQ_END node.
				915	unsigned NumBytesForCalleeToPush = 0;
				916
				917	if (CC == CallingConv::X86_StdCall) {
				918	if (isVarArg)
				919	NumBytesForCalleeToPush = isSRet ? 4 : 0;
				920	else
				921	NumBytesForCalleeToPush = NumBytes;
				922	} else {
				923	// If this is is a call to a struct-return function, the callee
				924	// pops the hidden struct pointer, so we have to push it back.
				925	// This is common for Darwin/X86, Linux & Mingw32 targets.
				926	NumBytesForCalleeToPush = isSRet ? 4 : 0;
				927	}
				928
				929	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				930	Ops.clear();
				931	Ops.push_back(Chain);
				932	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				933	Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
				934	Ops.push_back(InFlag);
				935	Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
				936	InFlag = Chain.getValue(1);
				937
				938	// Handle result values, copying them out of physregs into vregs that we
				939	// return.
				940	return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
				941	}
				942
				943
				944	//===----------------------------------------------------------------------===//
				945	// FastCall Calling Convention implementation
				946	//===----------------------------------------------------------------------===//
				947	//
				948	// The X86 'fastcall' calling convention passes up to two integer arguments in
				949	// registers (an appropriate portion of ECX/EDX), passes arguments in C order,
				950	// and requires that the callee pop its arguments off the stack (allowing proper
				951	// tail calls), and has the same return value conventions as C calling convs.
				952	//
				953	// This calling convention always arranges for the callee pop value to be 8n+4
				954	// bytes, which is needed for tail recursion elimination and stack alignment
				955	// reasons.
				956	SDOperand
				957	X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
				958	MachineFunction &MF = DAG.getMachineFunction();
				959	MachineFrameInfo *MFI = MF.getFrameInfo();
				960	SDOperand Root = Op.getOperand(0);
				961	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				962
				963	// Assign locations to all of the incoming arguments.
				964	SmallVector<CCValAssign, 16> ArgLocs;
				965	CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
				966	getTargetMachine(), ArgLocs);
				967	CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall);
				968
				969	SmallVector<SDOperand, 8> ArgValues;
				970	unsigned LastVal = ~0U;
				971	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				972	CCValAssign &VA = ArgLocs[i];
				973	// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
				974	// places.
				975	assert(VA.getValNo() != LastVal &&
				976	"Don't support value assigned to multiple locs yet");
				977	LastVal = VA.getValNo();
				978
				979	if (VA.isRegLoc()) {
				980	MVT::ValueType RegVT = VA.getLocVT();
				981	TargetRegisterClass *RC;
				982	if (RegVT == MVT::i32)
				983	RC = X86::GR32RegisterClass;
				984	else {
				985	assert(MVT::isVector(RegVT));
				986	RC = X86::VR128RegisterClass;
				987	}
				988
				989	unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
				990	SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
				991
				992	// If this is an 8 or 16-bit value, it is really passed promoted to 32
				993	// bits. Insert an assert[sz]ext to capture this, then truncate to the
				994	// right size.
				995	if (VA.getLocInfo() == CCValAssign::SExt)
				996	ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
				997	DAG.getValueType(VA.getValVT()));
				998	else if (VA.getLocInfo() == CCValAssign::ZExt)
				999	ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
				1000	DAG.getValueType(VA.getValVT()));
				1001
				1002	if (VA.getLocInfo() != CCValAssign::Full)
				1003	ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
				1004
				1005	ArgValues.push_back(ArgValue);
				1006	} else {
				1007	assert(VA.isMemLoc());
				1008
				1009	// Create the nodes corresponding to a load from this parameter slot.
				1010	int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
				1011	VA.getLocMemOffset());
				1012	SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
				1013	ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
				1014	}
				1015	}
				1016
				1017	ArgValues.push_back(Root);
				1018
				1019	unsigned StackSize = CCInfo.getNextStackOffset();
				1020
				1021	if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
				1022	// Make sure the instruction takes 8n+4 bytes to make sure the start of the
				1023	// arguments and the arguments after the retaddr has been pushed are aligned.
				1024	if ((StackSize & 7) == 0)
				1025	StackSize += 4;
				1026	}
				1027
				1028	VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
				1029	RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
				1030	ReturnAddrIndex = 0; // No return address slot generated yet.
				1031	BytesToPopOnReturn = StackSize; // Callee pops all stack arguments.
				1032	BytesCallerReserves = 0;
				1033
				1034	MF.getInfo<X86MachineFunctionInfo>()
				1035	->setBytesToPopOnReturn(BytesToPopOnReturn);
				1036
				1037	// Return the new list of results.
				1038	return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
				1039	&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
				1040	}
				1041
				1042	SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
				1043	unsigned CC) {
				1044	SDOperand Chain = Op.getOperand(0);
				1045	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				1046	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1047	SDOperand Callee = Op.getOperand(4);
				1048
				1049	// Analyze operands of the call, assigning locations to each operand.
				1050	SmallVector<CCValAssign, 16> ArgLocs;
				1051	CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
				1052	CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall);
				1053
				1054	// Get a count of how many bytes are to be pushed on the stack.
				1055	unsigned NumBytes = CCInfo.getNextStackOffset();
				1056
				1057	if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
				1058	// Make sure the instruction takes 8n+4 bytes to make sure the start of the
				1059	// arguments and the arguments after the retaddr has been pushed are aligned.
				1060	if ((NumBytes & 7) == 0)
				1061	NumBytes += 4;
				1062	}
				1063
				1064	Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
				1065
				1066	SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
				1067	SmallVector<SDOperand, 8> MemOpChains;
				1068
				1069	SDOperand StackPtr;
				1070
				1071	// Walk the register/memloc assignments, inserting copies/loads.
				1072	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1073	CCValAssign &VA = ArgLocs[i];
				1074	SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
				1075
				1076	// Promote the value if needed.
				1077	switch (VA.getLocInfo()) {
				1078	default: assert(0 && "Unknown loc info!");
				1079	case CCValAssign::Full: break;
				1080	case CCValAssign::SExt:
				1081	Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
				1082	break;
				1083	case CCValAssign::ZExt:
				1084	Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
				1085	break;
				1086	case CCValAssign::AExt:
				1087	Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
				1088	break;
				1089	}
				1090
				1091	if (VA.isRegLoc()) {
				1092	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
				1093	} else {
				1094	assert(VA.isMemLoc());
				1095	if (StackPtr.Val == 0)
				1096	StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
				1097	SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
				1098	PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
				1099	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1100	}
				1101	}
				1102
				1103	if (!MemOpChains.empty())
				1104	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1105	&MemOpChains[0], MemOpChains.size());
				1106
				1107	// Build a sequence of copy-to-reg nodes chained together with token chain
				1108	// and flag operands which copy the outgoing args into registers.
				1109	SDOperand InFlag;
				1110	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1111	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1112	InFlag);
				1113	InFlag = Chain.getValue(1);
				1114	}
				1115
				1116	// If the callee is a GlobalAddress node (quite common, every direct call is)
				1117	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
				1118	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1119	// We should use extra load for direct calls to dllimported functions in
				1120	// non-JIT mode.
				1121	if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
				1122	getTargetMachine(), true))
				1123	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
				1124	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				1125	Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
				1126
				1127	// ELF / PIC requires GOT in the EBX register before function calls via PLT
				1128	// GOT pointer.
				1129	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				1130	Subtarget->isPICStyleGOT()) {
				1131	Chain = DAG.getCopyToReg(Chain, X86::EBX,
				1132	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				1133	InFlag);
				1134	InFlag = Chain.getValue(1);
				1135	}
				1136
				1137	// Returns a chain & a flag for retval copy to use.
				1138	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1139	SmallVector<SDOperand, 8> Ops;
				1140	Ops.push_back(Chain);
				1141	Ops.push_back(Callee);
				1142
				1143	// Add argument registers to the end of the list so that they are known live
				1144	// into the call.
				1145	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1146	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1147	RegsToPass[i].second.getValueType()));
				1148
				1149	// Add an implicit use GOT pointer in EBX.
				1150	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				1151	Subtarget->isPICStyleGOT())
				1152	Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
				1153
				1154	if (InFlag.Val)
				1155	Ops.push_back(InFlag);
				1156
				1157	// FIXME: Do not generate X86ISD::TAILCALL for now.
				1158	Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
				1159	NodeTys, &Ops[0], Ops.size());
				1160	InFlag = Chain.getValue(1);
				1161
				1162	// Returns a flag for retval copy to use.
				1163	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1164	Ops.clear();
				1165	Ops.push_back(Chain);
				1166	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				1167	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				1168	Ops.push_back(InFlag);
				1169	Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
				1170	InFlag = Chain.getValue(1);
				1171
				1172	// Handle result values, copying them out of physregs into vregs that we
				1173	// return.
				1174	return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
				1175	}
				1176
				1177
				1178	//===----------------------------------------------------------------------===//
				1179	// X86-64 C Calling Convention implementation
				1180	//===----------------------------------------------------------------------===//
				1181
				1182	SDOperand
				1183	X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
				1184	MachineFunction &MF = DAG.getMachineFunction();
				1185	MachineFrameInfo *MFI = MF.getFrameInfo();
				1186	SDOperand Root = Op.getOperand(0);
				1187	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1188
				1189	static const unsigned GPR64ArgRegs[] = {
				1190	X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
				1191	};
				1192	static const unsigned XMMArgRegs[] = {
				1193	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
				1194	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
				1195	};
				1196
				1197
				1198	// Assign locations to all of the incoming arguments.
				1199	SmallVector<CCValAssign, 16> ArgLocs;
				1200	CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
				1201	getTargetMachine(), ArgLocs);
				1202	CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C);
				1203
				1204	SmallVector<SDOperand, 8> ArgValues;
				1205	unsigned LastVal = ~0U;
				1206	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1207	CCValAssign &VA = ArgLocs[i];
				1208	// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
				1209	// places.
				1210	assert(VA.getValNo() != LastVal &&
				1211	"Don't support value assigned to multiple locs yet");
				1212	LastVal = VA.getValNo();
				1213
				1214	if (VA.isRegLoc()) {
				1215	MVT::ValueType RegVT = VA.getLocVT();
				1216	TargetRegisterClass *RC;
				1217	if (RegVT == MVT::i32)
				1218	RC = X86::GR32RegisterClass;
				1219	else if (RegVT == MVT::i64)
				1220	RC = X86::GR64RegisterClass;
				1221	else if (RegVT == MVT::f32)
				1222	RC = X86::FR32RegisterClass;
				1223	else if (RegVT == MVT::f64)
				1224	RC = X86::FR64RegisterClass;
				1225	else {
				1226	assert(MVT::isVector(RegVT));
				1227	if (MVT::getSizeInBits(RegVT) == 64) {
				1228	RC = X86::GR64RegisterClass; // MMX values are passed in GPRs.
				1229	RegVT = MVT::i64;
				1230	} else
				1231	RC = X86::VR128RegisterClass;
				1232	}
				1233
				1234	unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
				1235	SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
				1236
				1237	// If this is an 8 or 16-bit value, it is really passed promoted to 32
				1238	// bits. Insert an assert[sz]ext to capture this, then truncate to the
				1239	// right size.
				1240	if (VA.getLocInfo() == CCValAssign::SExt)
				1241	ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
				1242	DAG.getValueType(VA.getValVT()));
				1243	else if (VA.getLocInfo() == CCValAssign::ZExt)
				1244	ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
				1245	DAG.getValueType(VA.getValVT()));
				1246
				1247	if (VA.getLocInfo() != CCValAssign::Full)
				1248	ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
				1249
				1250	// Handle MMX values passed in GPRs.
				1251	if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass &&
				1252	MVT::getSizeInBits(RegVT) == 64)
				1253	ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue);
				1254
				1255	ArgValues.push_back(ArgValue);
				1256	} else {
				1257	assert(VA.isMemLoc());
				1258
				1259	// Create the nodes corresponding to a load from this parameter slot.
				1260	int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
				1261	VA.getLocMemOffset());
				1262	SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
Rafael Espindola	e4e4d3e	2007-08-10 14:44:42 +0000	[diff] [blame]	1263
				1264	unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue();
				1265	if (Flags & ISD::ParamFlags::ByVal)
				1266	ArgValues.push_back(FIN);
				1267	else
				1268	ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1269	}
				1270	}
				1271
				1272	unsigned StackSize = CCInfo.getNextStackOffset();
				1273
				1274	// If the function takes variable number of arguments, make a frame index for
				1275	// the start of the first vararg value... for expansion of llvm.va_start.
				1276	if (isVarArg) {
				1277	unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
				1278	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
				1279
				1280	// For X86-64, if there are vararg parameters that are passed via
				1281	// registers, then we must store them to their spots on the stack so they
				1282	// may be loaded by deferencing the result of va_next.
				1283	VarArgsGPOffset = NumIntRegs * 8;
				1284	VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
				1285	VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
				1286	RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
				1287
				1288	// Store the integer parameter registers.
				1289	SmallVector<SDOperand, 8> MemOps;
				1290	SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
				1291	SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
				1292	DAG.getConstant(VarArgsGPOffset, getPointerTy()));
				1293	for (; NumIntRegs != 6; ++NumIntRegs) {
				1294	unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
				1295	X86::GR64RegisterClass);
				1296	SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
				1297	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1298	MemOps.push_back(Store);
				1299	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				1300	DAG.getConstant(8, getPointerTy()));
				1301	}
				1302
				1303	// Now store the XMM (fp + vector) parameter registers.
				1304	FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
				1305	DAG.getConstant(VarArgsFPOffset, getPointerTy()));
				1306	for (; NumXMMRegs != 8; ++NumXMMRegs) {
				1307	unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
				1308	X86::VR128RegisterClass);
				1309	SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
				1310	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1311	MemOps.push_back(Store);
				1312	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				1313	DAG.getConstant(16, getPointerTy()));
				1314	}
				1315	if (!MemOps.empty())
				1316	Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1317	&MemOps[0], MemOps.size());
				1318	}
				1319
				1320	ArgValues.push_back(Root);
				1321
				1322	ReturnAddrIndex = 0; // No return address slot generated yet.
				1323	BytesToPopOnReturn = 0; // Callee pops nothing.
				1324	BytesCallerReserves = StackSize;
				1325
				1326	// Return the new list of results.
				1327	return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
				1328	&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
				1329	}
				1330
				1331	SDOperand
				1332	X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,
				1333	unsigned CC) {
				1334	SDOperand Chain = Op.getOperand(0);
				1335	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1336	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				1337	SDOperand Callee = Op.getOperand(4);
				1338
				1339	// Analyze operands of the call, assigning locations to each operand.
				1340	SmallVector<CCValAssign, 16> ArgLocs;
				1341	CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
				1342	CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C);
				1343
				1344	// Get a count of how many bytes are to be pushed on the stack.
				1345	unsigned NumBytes = CCInfo.getNextStackOffset();
				1346	Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
				1347
				1348	SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
				1349	SmallVector<SDOperand, 8> MemOpChains;
				1350
				1351	SDOperand StackPtr;
				1352
				1353	// Walk the register/memloc assignments, inserting copies/loads.
				1354	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1355	CCValAssign &VA = ArgLocs[i];
				1356	SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
				1357
				1358	// Promote the value if needed.
				1359	switch (VA.getLocInfo()) {
				1360	default: assert(0 && "Unknown loc info!");
				1361	case CCValAssign::Full: break;
				1362	case CCValAssign::SExt:
				1363	Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
				1364	break;
				1365	case CCValAssign::ZExt:
				1366	Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
				1367	break;
				1368	case CCValAssign::AExt:
				1369	Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
				1370	break;
				1371	}
				1372
				1373	if (VA.isRegLoc()) {
				1374	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
				1375	} else {
				1376	assert(VA.isMemLoc());
				1377	if (StackPtr.Val == 0)
				1378	StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
				1379	SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
				1380	PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
				1381	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1382	}
				1383	}
				1384
				1385	if (!MemOpChains.empty())
				1386	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1387	&MemOpChains[0], MemOpChains.size());
				1388
				1389	// Build a sequence of copy-to-reg nodes chained together with token chain
				1390	// and flag operands which copy the outgoing args into registers.
				1391	SDOperand InFlag;
				1392	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1393	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1394	InFlag);
				1395	InFlag = Chain.getValue(1);
				1396	}
				1397
				1398	if (isVarArg) {
				1399	// From AMD64 ABI document:
				1400	// For calls that may call functions that use varargs or stdargs
				1401	// (prototype-less calls or calls to functions containing ellipsis (...) in
				1402	// the declaration) %al is used as hidden argument to specify the number
				1403	// of SSE registers used. The contents of %al do not need to match exactly
				1404	// the number of registers, but must be an ubound on the number of SSE
				1405	// registers used and is in the range 0 - 8 inclusive.
				1406
				1407	// Count the number of XMM registers allocated.
				1408	static const unsigned XMMArgRegs[] = {
				1409	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
				1410	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
				1411	};
				1412	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
				1413
				1414	Chain = DAG.getCopyToReg(Chain, X86::AL,
				1415	DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
				1416	InFlag = Chain.getValue(1);
				1417	}
				1418
				1419	// If the callee is a GlobalAddress node (quite common, every direct call is)
				1420	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
				1421	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1422	// We should use extra load for direct calls to dllimported functions in
				1423	// non-JIT mode.
				1424	if (getTargetMachine().getCodeModel() != CodeModel::Large
				1425	&& !Subtarget->GVRequiresExtraLoad(G->getGlobal(),
				1426	getTargetMachine(), true))
				1427	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
				1428	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				1429	if (getTargetMachine().getCodeModel() != CodeModel::Large)
				1430	Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
				1431
				1432	// Returns a chain & a flag for retval copy to use.
				1433	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1434	SmallVector<SDOperand, 8> Ops;
				1435	Ops.push_back(Chain);
				1436	Ops.push_back(Callee);
				1437
				1438	// Add argument registers to the end of the list so that they are known live
				1439	// into the call.
				1440	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1441	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1442	RegsToPass[i].second.getValueType()));
				1443
				1444	if (InFlag.Val)
				1445	Ops.push_back(InFlag);
				1446
				1447	// FIXME: Do not generate X86ISD::TAILCALL for now.
				1448	Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
				1449	NodeTys, &Ops[0], Ops.size());
				1450	InFlag = Chain.getValue(1);
				1451
				1452	// Returns a flag for retval copy to use.
				1453	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1454	Ops.clear();
				1455	Ops.push_back(Chain);
				1456	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				1457	Ops.push_back(DAG.getConstant(0, getPointerTy()));
				1458	Ops.push_back(InFlag);
				1459	Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
				1460	InFlag = Chain.getValue(1);
				1461
				1462	// Handle result values, copying them out of physregs into vregs that we
				1463	// return.
				1464	return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
				1465	}
				1466
				1467
				1468	//===----------------------------------------------------------------------===//
				1469	// Other Lowering Hooks
				1470	//===----------------------------------------------------------------------===//
				1471
				1472
				1473	SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
				1474	if (ReturnAddrIndex == 0) {
				1475	// Set up a frame object for the return address.
				1476	MachineFunction &MF = DAG.getMachineFunction();
				1477	if (Subtarget->is64Bit())
				1478	ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8);
				1479	else
				1480	ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
				1481	}
				1482
				1483	return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
				1484	}
				1485
				1486
				1487
				1488	/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
				1489	/// specific condition code. It returns a false if it cannot do a direct
				1490	/// translation. X86CC is the translated CondCode. LHS/RHS are modified as
				1491	/// needed.
				1492	static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
				1493	unsigned &X86CC, SDOperand &LHS, SDOperand &RHS,
				1494	SelectionDAG &DAG) {
				1495	X86CC = X86::COND_INVALID;
				1496	if (!isFP) {
				1497	if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
				1498	if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
				1499	// X > -1 -> X == 0, jump !sign.
				1500	RHS = DAG.getConstant(0, RHS.getValueType());
				1501	X86CC = X86::COND_NS;
				1502	return true;
				1503	} else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
				1504	// X < 0 -> X == 0, jump on sign.
				1505	X86CC = X86::COND_S;
				1506	return true;
				1507	}
				1508	}
				1509
				1510	switch (SetCCOpcode) {
				1511	default: break;
				1512	case ISD::SETEQ: X86CC = X86::COND_E; break;
				1513	case ISD::SETGT: X86CC = X86::COND_G; break;
				1514	case ISD::SETGE: X86CC = X86::COND_GE; break;
				1515	case ISD::SETLT: X86CC = X86::COND_L; break;
				1516	case ISD::SETLE: X86CC = X86::COND_LE; break;
				1517	case ISD::SETNE: X86CC = X86::COND_NE; break;
				1518	case ISD::SETULT: X86CC = X86::COND_B; break;
				1519	case ISD::SETUGT: X86CC = X86::COND_A; break;
				1520	case ISD::SETULE: X86CC = X86::COND_BE; break;
				1521	case ISD::SETUGE: X86CC = X86::COND_AE; break;
				1522	}
				1523	} else {
				1524	// On a floating point condition, the flags are set as follows:
				1525	// ZF PF CF op
				1526	// 0 \| 0 \| 0 \| X > Y
				1527	// 0 \| 0 \| 1 \| X < Y
				1528	// 1 \| 0 \| 0 \| X == Y
				1529	// 1 \| 1 \| 1 \| unordered
				1530	bool Flip = false;
				1531	switch (SetCCOpcode) {
				1532	default: break;
				1533	case ISD::SETUEQ:
				1534	case ISD::SETEQ: X86CC = X86::COND_E; break;
				1535	case ISD::SETOLT: Flip = true; // Fallthrough
				1536	case ISD::SETOGT:
				1537	case ISD::SETGT: X86CC = X86::COND_A; break;
				1538	case ISD::SETOLE: Flip = true; // Fallthrough
				1539	case ISD::SETOGE:
				1540	case ISD::SETGE: X86CC = X86::COND_AE; break;
				1541	case ISD::SETUGT: Flip = true; // Fallthrough
				1542	case ISD::SETULT:
				1543	case ISD::SETLT: X86CC = X86::COND_B; break;
				1544	case ISD::SETUGE: Flip = true; // Fallthrough
				1545	case ISD::SETULE:
				1546	case ISD::SETLE: X86CC = X86::COND_BE; break;
				1547	case ISD::SETONE:
				1548	case ISD::SETNE: X86CC = X86::COND_NE; break;
				1549	case ISD::SETUO: X86CC = X86::COND_P; break;
				1550	case ISD::SETO: X86CC = X86::COND_NP; break;
				1551	}
				1552	if (Flip)
				1553	std::swap(LHS, RHS);
				1554	}
				1555
				1556	return X86CC != X86::COND_INVALID;
				1557	}
				1558
				1559	/// hasFPCMov - is there a floating point cmov for the specific X86 condition
				1560	/// code. Current x86 isa includes the following FP cmov instructions:
				1561	/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
				1562	static bool hasFPCMov(unsigned X86CC) {
				1563	switch (X86CC) {
				1564	default:
				1565	return false;
				1566	case X86::COND_B:
				1567	case X86::COND_BE:
				1568	case X86::COND_E:
				1569	case X86::COND_P:
				1570	case X86::COND_A:
				1571	case X86::COND_AE:
				1572	case X86::COND_NE:
				1573	case X86::COND_NP:
				1574	return true;
				1575	}
				1576	}
				1577
				1578	/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return
				1579	/// true if Op is undef or if its value falls within the specified range (L, H].
				1580	static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
				1581	if (Op.getOpcode() == ISD::UNDEF)
				1582	return true;
				1583
				1584	unsigned Val = cast<ConstantSDNode>(Op)->getValue();
				1585	return (Val >= Low && Val < Hi);
				1586	}
				1587
				1588	/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return
				1589	/// true if Op is undef or if its value equal to the specified value.
				1590	static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
				1591	if (Op.getOpcode() == ISD::UNDEF)
				1592	return true;
				1593	return cast<ConstantSDNode>(Op)->getValue() == Val;
				1594	}
				1595
				1596	/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
				1597	/// specifies a shuffle of elements that is suitable for input to PSHUFD.
				1598	bool X86::isPSHUFDMask(SDNode *N) {
				1599	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1600
Dan Gohman	7dc1901	2007-08-02 21:17:01 +0000	[diff] [blame]	1601	if (N->getNumOperands() != 2 && N->getNumOperands() != 4)
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1602	return false;
				1603
				1604	// Check if the value doesn't reference the second vector.
				1605	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
				1606	SDOperand Arg = N->getOperand(i);
				1607	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1608	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
Dan Gohman	7dc1901	2007-08-02 21:17:01 +0000	[diff] [blame]	1609	if (cast<ConstantSDNode>(Arg)->getValue() >= e)
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1610	return false;
				1611	}
				1612
				1613	return true;
				1614	}
				1615
				1616	/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
				1617	/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
				1618	bool X86::isPSHUFHWMask(SDNode *N) {
				1619	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1620
				1621	if (N->getNumOperands() != 8)
				1622	return false;
				1623
				1624	// Lower quadword copied in order.
				1625	for (unsigned i = 0; i != 4; ++i) {
				1626	SDOperand Arg = N->getOperand(i);
				1627	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1628	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1629	if (cast<ConstantSDNode>(Arg)->getValue() != i)
				1630	return false;
				1631	}
				1632
				1633	// Upper quadword shuffled.
				1634	for (unsigned i = 4; i != 8; ++i) {
				1635	SDOperand Arg = N->getOperand(i);
				1636	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1637	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1638	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1639	if (Val < 4 \|\| Val > 7)
				1640	return false;
				1641	}
				1642
				1643	return true;
				1644	}
				1645
				1646	/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
				1647	/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
				1648	bool X86::isPSHUFLWMask(SDNode *N) {
				1649	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1650
				1651	if (N->getNumOperands() != 8)
				1652	return false;
				1653
				1654	// Upper quadword copied in order.
				1655	for (unsigned i = 4; i != 8; ++i)
				1656	if (!isUndefOrEqual(N->getOperand(i), i))
				1657	return false;
				1658
				1659	// Lower quadword shuffled.
				1660	for (unsigned i = 0; i != 4; ++i)
				1661	if (!isUndefOrInRange(N->getOperand(i), 0, 4))
				1662	return false;
				1663
				1664	return true;
				1665	}
				1666
				1667	/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
				1668	/// specifies a shuffle of elements that is suitable for input to SHUFP*.
				1669	static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) {
				1670	if (NumElems != 2 && NumElems != 4) return false;
				1671
				1672	unsigned Half = NumElems / 2;
				1673	for (unsigned i = 0; i < Half; ++i)
				1674	if (!isUndefOrInRange(Elems[i], 0, NumElems))
				1675	return false;
				1676	for (unsigned i = Half; i < NumElems; ++i)
				1677	if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2))
				1678	return false;
				1679
				1680	return true;
				1681	}
				1682
				1683	bool X86::isSHUFPMask(SDNode *N) {
				1684	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1685	return ::isSHUFPMask(N->op_begin(), N->getNumOperands());
				1686	}
				1687
				1688	/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
				1689	/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
				1690	/// half elements to come from vector 1 (which would equal the dest.) and
				1691	/// the upper half to come from vector 2.
				1692	static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) {
				1693	if (NumOps != 2 && NumOps != 4) return false;
				1694
				1695	unsigned Half = NumOps / 2;
				1696	for (unsigned i = 0; i < Half; ++i)
				1697	if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2))
				1698	return false;
				1699	for (unsigned i = Half; i < NumOps; ++i)
				1700	if (!isUndefOrInRange(Ops[i], 0, NumOps))
				1701	return false;
				1702	return true;
				1703	}
				1704
				1705	static bool isCommutedSHUFP(SDNode *N) {
				1706	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1707	return isCommutedSHUFP(N->op_begin(), N->getNumOperands());
				1708	}
				1709
				1710	/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
				1711	/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
				1712	bool X86::isMOVHLPSMask(SDNode *N) {
				1713	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1714
				1715	if (N->getNumOperands() != 4)
				1716	return false;
				1717
				1718	// Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
				1719	return isUndefOrEqual(N->getOperand(0), 6) &&
				1720	isUndefOrEqual(N->getOperand(1), 7) &&
				1721	isUndefOrEqual(N->getOperand(2), 2) &&
				1722	isUndefOrEqual(N->getOperand(3), 3);
				1723	}
				1724
				1725	/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
				1726	/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
				1727	/// <2, 3, 2, 3>
				1728	bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) {
				1729	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1730
				1731	if (N->getNumOperands() != 4)
				1732	return false;
				1733
				1734	// Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3
				1735	return isUndefOrEqual(N->getOperand(0), 2) &&
				1736	isUndefOrEqual(N->getOperand(1), 3) &&
				1737	isUndefOrEqual(N->getOperand(2), 2) &&
				1738	isUndefOrEqual(N->getOperand(3), 3);
				1739	}
				1740
				1741	/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
				1742	/// specifies a shuffle of elements that is suitable for input to MOVLP{S\|D}.
				1743	bool X86::isMOVLPMask(SDNode *N) {
				1744	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1745
				1746	unsigned NumElems = N->getNumOperands();
				1747	if (NumElems != 2 && NumElems != 4)
				1748	return false;
				1749
				1750	for (unsigned i = 0; i < NumElems/2; ++i)
				1751	if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
				1752	return false;
				1753
				1754	for (unsigned i = NumElems/2; i < NumElems; ++i)
				1755	if (!isUndefOrEqual(N->getOperand(i), i))
				1756	return false;
				1757
				1758	return true;
				1759	}
				1760
				1761	/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
				1762	/// specifies a shuffle of elements that is suitable for input to MOVHP{S\|D}
				1763	/// and MOVLHPS.
				1764	bool X86::isMOVHPMask(SDNode *N) {
				1765	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1766
				1767	unsigned NumElems = N->getNumOperands();
				1768	if (NumElems != 2 && NumElems != 4)
				1769	return false;
				1770
				1771	for (unsigned i = 0; i < NumElems/2; ++i)
				1772	if (!isUndefOrEqual(N->getOperand(i), i))
				1773	return false;
				1774
				1775	for (unsigned i = 0; i < NumElems/2; ++i) {
				1776	SDOperand Arg = N->getOperand(i + NumElems/2);
				1777	if (!isUndefOrEqual(Arg, i + NumElems))
				1778	return false;
				1779	}
				1780
				1781	return true;
				1782	}
				1783
				1784	/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
				1785	/// specifies a shuffle of elements that is suitable for input to UNPCKL.
				1786	bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts,
				1787	bool V2IsSplat = false) {
				1788	if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
				1789	return false;
				1790
				1791	for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
				1792	SDOperand BitI = Elts[i];
				1793	SDOperand BitI1 = Elts[i+1];
				1794	if (!isUndefOrEqual(BitI, j))
				1795	return false;
				1796	if (V2IsSplat) {
				1797	if (isUndefOrEqual(BitI1, NumElts))
				1798	return false;
				1799	} else {
				1800	if (!isUndefOrEqual(BitI1, j + NumElts))
				1801	return false;
				1802	}
				1803	}
				1804
				1805	return true;
				1806	}
				1807
				1808	bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) {
				1809	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1810	return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
				1811	}
				1812
				1813	/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
				1814	/// specifies a shuffle of elements that is suitable for input to UNPCKH.
				1815	bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts,
				1816	bool V2IsSplat = false) {
				1817	if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
				1818	return false;
				1819
				1820	for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
				1821	SDOperand BitI = Elts[i];
				1822	SDOperand BitI1 = Elts[i+1];
				1823	if (!isUndefOrEqual(BitI, j + NumElts/2))
				1824	return false;
				1825	if (V2IsSplat) {
				1826	if (isUndefOrEqual(BitI1, NumElts))
				1827	return false;
				1828	} else {
				1829	if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts))
				1830	return false;
				1831	}
				1832	}
				1833
				1834	return true;
				1835	}
				1836
				1837	bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) {
				1838	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1839	return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
				1840	}
				1841
				1842	/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
				1843	/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
				1844	/// <0, 0, 1, 1>
				1845	bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
				1846	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1847
				1848	unsigned NumElems = N->getNumOperands();
				1849	if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
				1850	return false;
				1851
				1852	for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
				1853	SDOperand BitI = N->getOperand(i);
				1854	SDOperand BitI1 = N->getOperand(i+1);
				1855
				1856	if (!isUndefOrEqual(BitI, j))
				1857	return false;
				1858	if (!isUndefOrEqual(BitI1, j))
				1859	return false;
				1860	}
				1861
				1862	return true;
				1863	}
				1864
				1865	/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
				1866	/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
				1867	/// <2, 2, 3, 3>
				1868	bool X86::isUNPCKH_v_undef_Mask(SDNode *N) {
				1869	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1870
				1871	unsigned NumElems = N->getNumOperands();
				1872	if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
				1873	return false;
				1874
				1875	for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
				1876	SDOperand BitI = N->getOperand(i);
				1877	SDOperand BitI1 = N->getOperand(i + 1);
				1878
				1879	if (!isUndefOrEqual(BitI, j))
				1880	return false;
				1881	if (!isUndefOrEqual(BitI1, j))
				1882	return false;
				1883	}
				1884
				1885	return true;
				1886	}
				1887
				1888	/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
				1889	/// specifies a shuffle of elements that is suitable for input to MOVSS,
				1890	/// MOVSD, and MOVD, i.e. setting the lowest element.
				1891	static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) {
				1892	if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
				1893	return false;
				1894
				1895	if (!isUndefOrEqual(Elts[0], NumElts))
				1896	return false;
				1897
				1898	for (unsigned i = 1; i < NumElts; ++i) {
				1899	if (!isUndefOrEqual(Elts[i], i))
				1900	return false;
				1901	}
				1902
				1903	return true;
				1904	}
				1905
				1906	bool X86::isMOVLMask(SDNode *N) {
				1907	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1908	return ::isMOVLMask(N->op_begin(), N->getNumOperands());
				1909	}
				1910
				1911	/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
				1912	/// of what x86 movss want. X86 movs requires the lowest element to be lowest
				1913	/// element of vector 2 and the other elements to come from vector 1 in order.
				1914	static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps,
				1915	bool V2IsSplat = false,
				1916	bool V2IsUndef = false) {
				1917	if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
				1918	return false;
				1919
				1920	if (!isUndefOrEqual(Ops[0], 0))
				1921	return false;
				1922
				1923	for (unsigned i = 1; i < NumOps; ++i) {
				1924	SDOperand Arg = Ops[i];
				1925	if (!(isUndefOrEqual(Arg, i+NumOps) \|\|
				1926	(V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) \|\|
				1927	(V2IsSplat && isUndefOrEqual(Arg, NumOps))))
				1928	return false;
				1929	}
				1930
				1931	return true;
				1932	}
				1933
				1934	static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false,
				1935	bool V2IsUndef = false) {
				1936	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1937	return isCommutedMOVL(N->op_begin(), N->getNumOperands(),
				1938	V2IsSplat, V2IsUndef);
				1939	}
				1940
				1941	/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
				1942	/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
				1943	bool X86::isMOVSHDUPMask(SDNode *N) {
				1944	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1945
				1946	if (N->getNumOperands() != 4)
				1947	return false;
				1948
				1949	// Expect 1, 1, 3, 3
				1950	for (unsigned i = 0; i < 2; ++i) {
				1951	SDOperand Arg = N->getOperand(i);
				1952	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1953	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1954	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1955	if (Val != 1) return false;
				1956	}
				1957
				1958	bool HasHi = false;
				1959	for (unsigned i = 2; i < 4; ++i) {
				1960	SDOperand Arg = N->getOperand(i);
				1961	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1962	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1963	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1964	if (Val != 3) return false;
				1965	HasHi = true;
				1966	}
				1967
				1968	// Don't use movshdup if it can be done with a shufps.
				1969	return HasHi;
				1970	}
				1971
				1972	/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
				1973	/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
				1974	bool X86::isMOVSLDUPMask(SDNode *N) {
				1975	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1976
				1977	if (N->getNumOperands() != 4)
				1978	return false;
				1979
				1980	// Expect 0, 0, 2, 2
				1981	for (unsigned i = 0; i < 2; ++i) {
				1982	SDOperand Arg = N->getOperand(i);
				1983	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1984	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1985	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1986	if (Val != 0) return false;
				1987	}
				1988
				1989	bool HasHi = false;
				1990	for (unsigned i = 2; i < 4; ++i) {
				1991	SDOperand Arg = N->getOperand(i);
				1992	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1993	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1994	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1995	if (Val != 2) return false;
				1996	HasHi = true;
				1997	}
				1998
				1999	// Don't use movshdup if it can be done with a shufps.
				2000	return HasHi;
				2001	}
				2002
				2003	/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand
				2004	/// specifies a identity operation on the LHS or RHS.
				2005	static bool isIdentityMask(SDNode *N, bool RHS = false) {
				2006	unsigned NumElems = N->getNumOperands();
				2007	for (unsigned i = 0; i < NumElems; ++i)
				2008	if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0)))
				2009	return false;
				2010	return true;
				2011	}
				2012
				2013	/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
				2014	/// a splat of a single element.
				2015	static bool isSplatMask(SDNode *N) {
				2016	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2017
				2018	// This is a splat operation if each element of the permute is the same, and
				2019	// if the value doesn't reference the second vector.
				2020	unsigned NumElems = N->getNumOperands();
				2021	SDOperand ElementBase;
				2022	unsigned i = 0;
				2023	for (; i != NumElems; ++i) {
				2024	SDOperand Elt = N->getOperand(i);
				2025	if (isa<ConstantSDNode>(Elt)) {
				2026	ElementBase = Elt;
				2027	break;
				2028	}
				2029	}
				2030
				2031	if (!ElementBase.Val)
				2032	return false;
				2033
				2034	for (; i != NumElems; ++i) {
				2035	SDOperand Arg = N->getOperand(i);
				2036	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2037	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2038	if (Arg != ElementBase) return false;
				2039	}
				2040
				2041	// Make sure it is a splat of the first vector operand.
				2042	return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems;
				2043	}
				2044
				2045	/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
				2046	/// a splat of a single element and it's a 2 or 4 element mask.
				2047	bool X86::isSplatMask(SDNode *N) {
				2048	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2049
				2050	// We can only splat 64-bit, and 32-bit quantities with a single instruction.
				2051	if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
				2052	return false;
				2053	return ::isSplatMask(N);
				2054	}
				2055
				2056	/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
				2057	/// specifies a splat of zero element.
				2058	bool X86::isSplatLoMask(SDNode *N) {
				2059	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2060
				2061	for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
				2062	if (!isUndefOrEqual(N->getOperand(i), 0))
				2063	return false;
				2064	return true;
				2065	}
				2066
				2067	/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
				2068	/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
				2069	/// instructions.
				2070	unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
				2071	unsigned NumOperands = N->getNumOperands();
				2072	unsigned Shift = (NumOperands == 4) ? 2 : 1;
				2073	unsigned Mask = 0;
				2074	for (unsigned i = 0; i < NumOperands; ++i) {
				2075	unsigned Val = 0;
				2076	SDOperand Arg = N->getOperand(NumOperands-i-1);
				2077	if (Arg.getOpcode() != ISD::UNDEF)
				2078	Val = cast<ConstantSDNode>(Arg)->getValue();
				2079	if (Val >= NumOperands) Val -= NumOperands;
				2080	Mask \|= Val;
				2081	if (i != NumOperands - 1)
				2082	Mask <<= Shift;
				2083	}
				2084
				2085	return Mask;
				2086	}
				2087
				2088	/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
				2089	/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
				2090	/// instructions.
				2091	unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
				2092	unsigned Mask = 0;
				2093	// 8 nodes, but we only care about the last 4.
				2094	for (unsigned i = 7; i >= 4; --i) {
				2095	unsigned Val = 0;
				2096	SDOperand Arg = N->getOperand(i);
				2097	if (Arg.getOpcode() != ISD::UNDEF)
				2098	Val = cast<ConstantSDNode>(Arg)->getValue();
				2099	Mask \|= (Val - 4);
				2100	if (i != 4)
				2101	Mask <<= 2;
				2102	}
				2103
				2104	return Mask;
				2105	}
				2106
				2107	/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
				2108	/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
				2109	/// instructions.
				2110	unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
				2111	unsigned Mask = 0;
				2112	// 8 nodes, but we only care about the first 4.
				2113	for (int i = 3; i >= 0; --i) {
				2114	unsigned Val = 0;
				2115	SDOperand Arg = N->getOperand(i);
				2116	if (Arg.getOpcode() != ISD::UNDEF)
				2117	Val = cast<ConstantSDNode>(Arg)->getValue();
				2118	Mask \|= Val;
				2119	if (i != 0)
				2120	Mask <<= 2;
				2121	}
				2122
				2123	return Mask;
				2124	}
				2125
				2126	/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
				2127	/// specifies a 8 element shuffle that can be broken into a pair of
				2128	/// PSHUFHW and PSHUFLW.
				2129	static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
				2130	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2131
				2132	if (N->getNumOperands() != 8)
				2133	return false;
				2134
				2135	// Lower quadword shuffled.
				2136	for (unsigned i = 0; i != 4; ++i) {
				2137	SDOperand Arg = N->getOperand(i);
				2138	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2139	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2140	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2141	if (Val > 4)
				2142	return false;
				2143	}
				2144
				2145	// Upper quadword shuffled.
				2146	for (unsigned i = 4; i != 8; ++i) {
				2147	SDOperand Arg = N->getOperand(i);
				2148	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2149	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2150	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2151	if (Val < 4 \|\| Val > 7)
				2152	return false;
				2153	}
				2154
				2155	return true;
				2156	}
				2157
				2158	/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
				2159	/// values in ther permute mask.
				2160	static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1,
				2161	SDOperand &V2, SDOperand &Mask,
				2162	SelectionDAG &DAG) {
				2163	MVT::ValueType VT = Op.getValueType();
				2164	MVT::ValueType MaskVT = Mask.getValueType();
				2165	MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT);
				2166	unsigned NumElems = Mask.getNumOperands();
				2167	SmallVector<SDOperand, 8> MaskVec;
				2168
				2169	for (unsigned i = 0; i != NumElems; ++i) {
				2170	SDOperand Arg = Mask.getOperand(i);
				2171	if (Arg.getOpcode() == ISD::UNDEF) {
				2172	MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
				2173	continue;
				2174	}
				2175	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2176	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2177	if (Val < NumElems)
				2178	MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
				2179	else
				2180	MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
				2181	}
				2182
				2183	std::swap(V1, V2);
				2184	Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2185	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2186	}
				2187
				2188	/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
				2189	/// match movhlps. The lower half elements should come from upper half of
				2190	/// V1 (and in order), and the upper half elements should come from the upper
				2191	/// half of V2 (and in order).
				2192	static bool ShouldXformToMOVHLPS(SDNode *Mask) {
				2193	unsigned NumElems = Mask->getNumOperands();
				2194	if (NumElems != 4)
				2195	return false;
				2196	for (unsigned i = 0, e = 2; i != e; ++i)
				2197	if (!isUndefOrEqual(Mask->getOperand(i), i+2))
				2198	return false;
				2199	for (unsigned i = 2; i != 4; ++i)
				2200	if (!isUndefOrEqual(Mask->getOperand(i), i+4))
				2201	return false;
				2202	return true;
				2203	}
				2204
				2205	/// isScalarLoadToVector - Returns true if the node is a scalar load that
				2206	/// is promoted to a vector.
				2207	static inline bool isScalarLoadToVector(SDNode *N) {
				2208	if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
				2209	N = N->getOperand(0).Val;
				2210	return ISD::isNON_EXTLoad(N);
				2211	}
				2212	return false;
				2213	}
				2214
				2215	/// ShouldXformToMOVLP{S\|D} - Return true if the node should be transformed to
				2216	/// match movlp{s\|d}. The lower half elements should come from lower half of
				2217	/// V1 (and in order), and the upper half elements should come from the upper
				2218	/// half of V2 (and in order). And since V1 will become the source of the
				2219	/// MOVLP, it must be either a vector load or a scalar load to vector.
				2220	static bool ShouldXformToMOVLP(SDNode V1, SDNode V2, SDNode *Mask) {
				2221	if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
				2222	return false;
				2223	// Is V2 is a vector load, don't do this transformation. We will try to use
				2224	// load folding shufps op.
				2225	if (ISD::isNON_EXTLoad(V2))
				2226	return false;
				2227
				2228	unsigned NumElems = Mask->getNumOperands();
				2229	if (NumElems != 2 && NumElems != 4)
				2230	return false;
				2231	for (unsigned i = 0, e = NumElems/2; i != e; ++i)
				2232	if (!isUndefOrEqual(Mask->getOperand(i), i))
				2233	return false;
				2234	for (unsigned i = NumElems/2; i != NumElems; ++i)
				2235	if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
				2236	return false;
				2237	return true;
				2238	}
				2239
				2240	/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
				2241	/// all the same.
				2242	static bool isSplatVector(SDNode *N) {
				2243	if (N->getOpcode() != ISD::BUILD_VECTOR)
				2244	return false;
				2245
				2246	SDOperand SplatValue = N->getOperand(0);
				2247	for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
				2248	if (N->getOperand(i) != SplatValue)
				2249	return false;
				2250	return true;
				2251	}
				2252
				2253	/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
				2254	/// to an undef.
				2255	static bool isUndefShuffle(SDNode *N) {
				2256	if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
				2257	return false;
				2258
				2259	SDOperand V1 = N->getOperand(0);
				2260	SDOperand V2 = N->getOperand(1);
				2261	SDOperand Mask = N->getOperand(2);
				2262	unsigned NumElems = Mask.getNumOperands();
				2263	for (unsigned i = 0; i != NumElems; ++i) {
				2264	SDOperand Arg = Mask.getOperand(i);
				2265	if (Arg.getOpcode() != ISD::UNDEF) {
				2266	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2267	if (Val < NumElems && V1.getOpcode() != ISD::UNDEF)
				2268	return false;
				2269	else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF)
				2270	return false;
				2271	}
				2272	}
				2273	return true;
				2274	}
				2275
				2276	/// isZeroNode - Returns true if Elt is a constant zero or a floating point
				2277	/// constant +0.0.
				2278	static inline bool isZeroNode(SDOperand Elt) {
				2279	return ((isa<ConstantSDNode>(Elt) &&
				2280	cast<ConstantSDNode>(Elt)->getValue() == 0) \|\|
				2281	(isa<ConstantFPSDNode>(Elt) &&
				2282	cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
				2283	}
				2284
				2285	/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
				2286	/// to an zero vector.
				2287	static bool isZeroShuffle(SDNode *N) {
				2288	if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
				2289	return false;
				2290
				2291	SDOperand V1 = N->getOperand(0);
				2292	SDOperand V2 = N->getOperand(1);
				2293	SDOperand Mask = N->getOperand(2);
				2294	unsigned NumElems = Mask.getNumOperands();
				2295	for (unsigned i = 0; i != NumElems; ++i) {
				2296	SDOperand Arg = Mask.getOperand(i);
				2297	if (Arg.getOpcode() != ISD::UNDEF) {
				2298	unsigned Idx = cast<ConstantSDNode>(Arg)->getValue();
				2299	if (Idx < NumElems) {
				2300	unsigned Opc = V1.Val->getOpcode();
				2301	if (Opc == ISD::UNDEF)
				2302	continue;
				2303	if (Opc != ISD::BUILD_VECTOR \|\|
				2304	!isZeroNode(V1.Val->getOperand(Idx)))
				2305	return false;
				2306	} else if (Idx >= NumElems) {
				2307	unsigned Opc = V2.Val->getOpcode();
				2308	if (Opc == ISD::UNDEF)
				2309	continue;
				2310	if (Opc != ISD::BUILD_VECTOR \|\|
				2311	!isZeroNode(V2.Val->getOperand(Idx - NumElems)))
				2312	return false;
				2313	}
				2314	}
				2315	}
				2316	return true;
				2317	}
				2318
				2319	/// getZeroVector - Returns a vector of specified type with all zero elements.
				2320	///
				2321	static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
				2322	assert(MVT::isVector(VT) && "Expected a vector type");
				2323	unsigned NumElems = MVT::getVectorNumElements(VT);
				2324	MVT::ValueType EVT = MVT::getVectorElementType(VT);
				2325	bool isFP = MVT::isFloatingPoint(EVT);
				2326	SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
				2327	SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero);
				2328	return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
				2329	}
				2330
				2331	/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
				2332	/// that point to V2 points to its first element.
				2333	static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
				2334	assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
				2335
				2336	bool Changed = false;
				2337	SmallVector<SDOperand, 8> MaskVec;
				2338	unsigned NumElems = Mask.getNumOperands();
				2339	for (unsigned i = 0; i != NumElems; ++i) {
				2340	SDOperand Arg = Mask.getOperand(i);
				2341	if (Arg.getOpcode() != ISD::UNDEF) {
				2342	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2343	if (Val > NumElems) {
				2344	Arg = DAG.getConstant(NumElems, Arg.getValueType());
				2345	Changed = true;
				2346	}
				2347	}
				2348	MaskVec.push_back(Arg);
				2349	}
				2350
				2351	if (Changed)
				2352	Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
				2353	&MaskVec[0], MaskVec.size());
				2354	return Mask;
				2355	}
				2356
				2357	/// getMOVLMask - Returns a vector_shuffle mask for an movs{s\|d}, movd
				2358	/// operation of specified width.
				2359	static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
				2360	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2361	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2362
				2363	SmallVector<SDOperand, 8> MaskVec;
				2364	MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
				2365	for (unsigned i = 1; i != NumElems; ++i)
				2366	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2367	return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2368	}
				2369
				2370	/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
				2371	/// of specified width.
				2372	static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
				2373	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2374	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2375	SmallVector<SDOperand, 8> MaskVec;
				2376	for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
				2377	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2378	MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
				2379	}
				2380	return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2381	}
				2382
				2383	/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
				2384	/// of specified width.
				2385	static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
				2386	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2387	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2388	unsigned Half = NumElems/2;
				2389	SmallVector<SDOperand, 8> MaskVec;
				2390	for (unsigned i = 0; i != Half; ++i) {
				2391	MaskVec.push_back(DAG.getConstant(i + Half, BaseVT));
				2392	MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
				2393	}
				2394	return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2395	}
				2396
				2397	/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
				2398	///
				2399	static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
				2400	SDOperand V1 = Op.getOperand(0);
				2401	SDOperand Mask = Op.getOperand(2);
				2402	MVT::ValueType VT = Op.getValueType();
				2403	unsigned NumElems = Mask.getNumOperands();
				2404	Mask = getUnpacklMask(NumElems, DAG);
				2405	while (NumElems != 4) {
				2406	V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
				2407	NumElems >>= 1;
				2408	}
				2409	V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
				2410
				2411	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				2412	Mask = getZeroVector(MaskVT, DAG);
				2413	SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
				2414	DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
				2415	return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
				2416	}
				2417
				2418	/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
				2419	/// vector of zero or undef vector.
				2420	static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
				2421	unsigned NumElems, unsigned Idx,
				2422	bool isZero, SelectionDAG &DAG) {
				2423	SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
				2424	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2425	MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
				2426	SDOperand Zero = DAG.getConstant(0, EVT);
				2427	SmallVector<SDOperand, 8> MaskVec(NumElems, Zero);
				2428	MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
				2429	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2430	&MaskVec[0], MaskVec.size());
				2431	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2432	}
				2433
				2434	/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
				2435	///
				2436	static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
				2437	unsigned NumNonZero, unsigned NumZero,
				2438	SelectionDAG &DAG, TargetLowering &TLI) {
				2439	if (NumNonZero > 8)
				2440	return SDOperand();
				2441
				2442	SDOperand V(0, 0);
				2443	bool First = true;
				2444	for (unsigned i = 0; i < 16; ++i) {
				2445	bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
				2446	if (ThisIsNonZero && First) {
				2447	if (NumZero)
				2448	V = getZeroVector(MVT::v8i16, DAG);
				2449	else
				2450	V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
				2451	First = false;
				2452	}
				2453
				2454	if ((i & 1) != 0) {
				2455	SDOperand ThisElt(0, 0), LastElt(0, 0);
				2456	bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
				2457	if (LastIsNonZero) {
				2458	LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
				2459	}
				2460	if (ThisIsNonZero) {
				2461	ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
				2462	ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
				2463	ThisElt, DAG.getConstant(8, MVT::i8));
				2464	if (LastIsNonZero)
				2465	ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
				2466	} else
				2467	ThisElt = LastElt;
				2468
				2469	if (ThisElt.Val)
				2470	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
				2471	DAG.getConstant(i/2, TLI.getPointerTy()));
				2472	}
				2473	}
				2474
				2475	return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
				2476	}
				2477
				2478	/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
				2479	///
				2480	static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
				2481	unsigned NumNonZero, unsigned NumZero,
				2482	SelectionDAG &DAG, TargetLowering &TLI) {
				2483	if (NumNonZero > 4)
				2484	return SDOperand();
				2485
				2486	SDOperand V(0, 0);
				2487	bool First = true;
				2488	for (unsigned i = 0; i < 8; ++i) {
				2489	bool isNonZero = (NonZeros & (1 << i)) != 0;
				2490	if (isNonZero) {
				2491	if (First) {
				2492	if (NumZero)
				2493	V = getZeroVector(MVT::v8i16, DAG);
				2494	else
				2495	V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
				2496	First = false;
				2497	}
				2498	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
				2499	DAG.getConstant(i, TLI.getPointerTy()));
				2500	}
				2501	}
				2502
				2503	return V;
				2504	}
				2505
				2506	SDOperand
				2507	X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				2508	// All zero's are handled with pxor.
				2509	if (ISD::isBuildVectorAllZeros(Op.Val))
				2510	return Op;
				2511
				2512	// All one's are handled with pcmpeqd.
				2513	if (ISD::isBuildVectorAllOnes(Op.Val))
				2514	return Op;
				2515
				2516	MVT::ValueType VT = Op.getValueType();
				2517	MVT::ValueType EVT = MVT::getVectorElementType(VT);
				2518	unsigned EVTBits = MVT::getSizeInBits(EVT);
				2519
				2520	unsigned NumElems = Op.getNumOperands();
				2521	unsigned NumZero = 0;
				2522	unsigned NumNonZero = 0;
				2523	unsigned NonZeros = 0;
Dan Gohman	2146324	2007-07-24 22:55:08 +0000	[diff] [blame]	2524	unsigned NumNonZeroImms = 0;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2525	std::set<SDOperand> Values;
				2526	for (unsigned i = 0; i < NumElems; ++i) {
				2527	SDOperand Elt = Op.getOperand(i);
				2528	if (Elt.getOpcode() != ISD::UNDEF) {
				2529	Values.insert(Elt);
				2530	if (isZeroNode(Elt))
				2531	NumZero++;
				2532	else {
				2533	NonZeros \|= (1 << i);
				2534	NumNonZero++;
Dan Gohman	2146324	2007-07-24 22:55:08 +0000	[diff] [blame]	2535	if (Elt.getOpcode() == ISD::Constant \|\|
				2536	Elt.getOpcode() == ISD::ConstantFP)
				2537	NumNonZeroImms++;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2538	}
				2539	}
				2540	}
				2541
				2542	if (NumNonZero == 0) {
				2543	if (NumZero == 0)
				2544	// All undef vector. Return an UNDEF.
				2545	return DAG.getNode(ISD::UNDEF, VT);
				2546	else
				2547	// A mix of zero and undef. Return a zero vector.
				2548	return getZeroVector(VT, DAG);
				2549	}
				2550
				2551	// Splat is obviously ok. Let legalizer expand it to a shuffle.
				2552	if (Values.size() == 1)
				2553	return SDOperand();
				2554
				2555	// Special case for single non-zero element.
				2556	if (NumNonZero == 1) {
				2557	unsigned Idx = CountTrailingZeros_32(NonZeros);
				2558	SDOperand Item = Op.getOperand(Idx);
				2559	Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
				2560	if (Idx == 0)
				2561	// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
				2562	return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
				2563	NumZero > 0, DAG);
				2564
				2565	if (EVTBits == 32) {
				2566	// Turn it into a shuffle of zero and zero-extended scalar to vector.
				2567	Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
				2568	DAG);
				2569	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2570	MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
				2571	SmallVector<SDOperand, 8> MaskVec;
				2572	for (unsigned i = 0; i < NumElems; i++)
				2573	MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
				2574	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2575	&MaskVec[0], MaskVec.size());
				2576	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
				2577	DAG.getNode(ISD::UNDEF, VT), Mask);
				2578	}
				2579	}
				2580
Dan Gohman	2146324	2007-07-24 22:55:08 +0000	[diff] [blame]	2581	// A vector full of immediates; various special cases are already
				2582	// handled, so this is best done with a single constant-pool load.
				2583	if (NumNonZero == NumNonZeroImms)
				2584	return SDOperand();
				2585
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2586	// Let legalizer expand 2-wide build_vectors.
				2587	if (EVTBits == 64)
				2588	return SDOperand();
				2589
				2590	// If element VT is < 32 bits, convert it to inserts into a zero vector.
				2591	if (EVTBits == 8 && NumElems == 16) {
				2592	SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
				2593	*this);
				2594	if (V.Val) return V;
				2595	}
				2596
				2597	if (EVTBits == 16 && NumElems == 8) {
				2598	SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
				2599	*this);
				2600	if (V.Val) return V;
				2601	}
				2602
				2603	// If element VT is == 32 bits, turn it into a number of shuffles.
				2604	SmallVector<SDOperand, 8> V;
				2605	V.resize(NumElems);
				2606	if (NumElems == 4 && NumZero > 0) {
				2607	for (unsigned i = 0; i < 4; ++i) {
				2608	bool isZero = !(NonZeros & (1 << i));
				2609	if (isZero)
				2610	V[i] = getZeroVector(VT, DAG);
				2611	else
				2612	V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
				2613	}
				2614
				2615	for (unsigned i = 0; i < 2; ++i) {
				2616	switch ((NonZeros & (0x3 << i2)) >> (i2)) {
				2617	default: break;
				2618	case 0:
				2619	V[i] = V[i*2]; // Must be a zero vector.
				2620	break;
				2621	case 1:
				2622	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i2+1], V[i2],
				2623	getMOVLMask(NumElems, DAG));
				2624	break;
				2625	case 2:
				2626	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i2], V[i2+1],
				2627	getMOVLMask(NumElems, DAG));
				2628	break;
				2629	case 3:
				2630	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i2], V[i2+1],
				2631	getUnpacklMask(NumElems, DAG));
				2632	break;
				2633	}
				2634	}
				2635
				2636	// Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
				2637	// clears the upper bits.
				2638	// FIXME: we can do the same for v4f32 case when we know both parts of
				2639	// the lower half come from scalar_to_vector (loadf32). We should do
				2640	// that in post legalizer dag combiner with target specific hooks.
				2641	if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
				2642	return V[0];
				2643	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2644	MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
				2645	SmallVector<SDOperand, 8> MaskVec;
				2646	bool Reverse = (NonZeros & 0x3) == 2;
				2647	for (unsigned i = 0; i < 2; ++i)
				2648	if (Reverse)
				2649	MaskVec.push_back(DAG.getConstant(1-i, EVT));
				2650	else
				2651	MaskVec.push_back(DAG.getConstant(i, EVT));
				2652	Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
				2653	for (unsigned i = 0; i < 2; ++i)
				2654	if (Reverse)
				2655	MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
				2656	else
				2657	MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
				2658	SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2659	&MaskVec[0], MaskVec.size());
				2660	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
				2661	}
				2662
				2663	if (Values.size() > 2) {
				2664	// Expand into a number of unpckl*.
				2665	// e.g. for v4f32
				2666	// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
				2667	// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
				2668	// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
				2669	SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
				2670	for (unsigned i = 0; i < NumElems; ++i)
				2671	V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
				2672	NumElems >>= 1;
				2673	while (NumElems != 0) {
				2674	for (unsigned i = 0; i < NumElems; ++i)
				2675	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
				2676	UnpckMask);
				2677	NumElems >>= 1;
				2678	}
				2679	return V[0];
				2680	}
				2681
				2682	return SDOperand();
				2683	}
				2684
				2685	SDOperand
				2686	X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
				2687	SDOperand V1 = Op.getOperand(0);
				2688	SDOperand V2 = Op.getOperand(1);
				2689	SDOperand PermMask = Op.getOperand(2);
				2690	MVT::ValueType VT = Op.getValueType();
				2691	unsigned NumElems = PermMask.getNumOperands();
				2692	bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
				2693	bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
				2694	bool V1IsSplat = false;
				2695	bool V2IsSplat = false;
				2696
				2697	if (isUndefShuffle(Op.Val))
				2698	return DAG.getNode(ISD::UNDEF, VT);
				2699
				2700	if (isZeroShuffle(Op.Val))
				2701	return getZeroVector(VT, DAG);
				2702
				2703	if (isIdentityMask(PermMask.Val))
				2704	return V1;
				2705	else if (isIdentityMask(PermMask.Val, true))
				2706	return V2;
				2707
				2708	if (isSplatMask(PermMask.Val)) {
				2709	if (NumElems <= 4) return Op;
				2710	// Promote it to a v4i32 splat.
				2711	return PromoteSplat(Op, DAG);
				2712	}
				2713
				2714	if (X86::isMOVLMask(PermMask.Val))
				2715	return (V1IsUndef) ? V2 : Op;
				2716
				2717	if (X86::isMOVSHDUPMask(PermMask.Val) \|\|
				2718	X86::isMOVSLDUPMask(PermMask.Val) \|\|
				2719	X86::isMOVHLPSMask(PermMask.Val) \|\|
				2720	X86::isMOVHPMask(PermMask.Val) \|\|
				2721	X86::isMOVLPMask(PermMask.Val))
				2722	return Op;
				2723
				2724	if (ShouldXformToMOVHLPS(PermMask.Val) \|\|
				2725	ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
				2726	return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2727
				2728	bool Commuted = false;
				2729	V1IsSplat = isSplatVector(V1.Val);
				2730	V2IsSplat = isSplatVector(V2.Val);
				2731	if ((V1IsSplat \|\| V1IsUndef) && !(V2IsSplat \|\| V2IsUndef)) {
				2732	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2733	std::swap(V1IsSplat, V2IsSplat);
				2734	std::swap(V1IsUndef, V2IsUndef);
				2735	Commuted = true;
				2736	}
				2737
				2738	if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
				2739	if (V2IsUndef) return V1;
				2740	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2741	if (V2IsSplat) {
				2742	// V2 is a splat, so the mask may be malformed. That is, it may point
				2743	// to any V2 element. The instruction selectior won't like this. Get
				2744	// a corrected mask and commute to form a proper MOVS{S\|D}.
				2745	SDOperand NewMask = getMOVLMask(NumElems, DAG);
				2746	if (NewMask.Val != PermMask.Val)
				2747	Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
				2748	}
				2749	return Op;
				2750	}
				2751
				2752	if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) \|\|
				2753	X86::isUNPCKH_v_undef_Mask(PermMask.Val) \|\|
				2754	X86::isUNPCKLMask(PermMask.Val) \|\|
				2755	X86::isUNPCKHMask(PermMask.Val))
				2756	return Op;
				2757
				2758	if (V2IsSplat) {
				2759	// Normalize mask so all entries that point to V2 points to its first
				2760	// element then try to match unpck{h\|l} again. If match, return a
				2761	// new vector_shuffle with the corrected mask.
				2762	SDOperand NewMask = NormalizeMask(PermMask, DAG);
				2763	if (NewMask.Val != PermMask.Val) {
				2764	if (X86::isUNPCKLMask(PermMask.Val, true)) {
				2765	SDOperand NewMask = getUnpacklMask(NumElems, DAG);
				2766	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
				2767	} else if (X86::isUNPCKHMask(PermMask.Val, true)) {
				2768	SDOperand NewMask = getUnpackhMask(NumElems, DAG);
				2769	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
				2770	}
				2771	}
				2772	}
				2773
				2774	// Normalize the node to match x86 shuffle ops if needed
				2775	if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val))
				2776	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2777
				2778	if (Commuted) {
				2779	// Commute is back and try unpck* again.
				2780	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2781	if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) \|\|
				2782	X86::isUNPCKH_v_undef_Mask(PermMask.Val) \|\|
				2783	X86::isUNPCKLMask(PermMask.Val) \|\|
				2784	X86::isUNPCKHMask(PermMask.Val))
				2785	return Op;
				2786	}
				2787
				2788	// If VT is integer, try PSHUF* first, then SHUFP*.
				2789	if (MVT::isInteger(VT)) {
Dan Gohman	7dc1901	2007-08-02 21:17:01 +0000	[diff] [blame]	2790	// MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically
				2791	// possible to shuffle a v2i32 using PSHUFW, that's not yet implemented.
				2792	if (((MVT::getSizeInBits(VT) != 64 \|\| NumElems == 4) &&
				2793	X86::isPSHUFDMask(PermMask.Val)) \|\|
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2794	X86::isPSHUFHWMask(PermMask.Val) \|\|
				2795	X86::isPSHUFLWMask(PermMask.Val)) {
				2796	if (V2.getOpcode() != ISD::UNDEF)
				2797	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
				2798	DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
				2799	return Op;
				2800	}
				2801
				2802	if (X86::isSHUFPMask(PermMask.Val) &&
				2803	MVT::getSizeInBits(VT) != 64) // Don't do this for MMX.
				2804	return Op;
				2805
				2806	// Handle v8i16 shuffle high / low shuffle node pair.
				2807	if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
				2808	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2809	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2810	SmallVector<SDOperand, 8> MaskVec;
				2811	for (unsigned i = 0; i != 4; ++i)
				2812	MaskVec.push_back(PermMask.getOperand(i));
				2813	for (unsigned i = 4; i != 8; ++i)
				2814	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2815	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2816	&MaskVec[0], MaskVec.size());
				2817	V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2818	MaskVec.clear();
				2819	for (unsigned i = 0; i != 4; ++i)
				2820	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2821	for (unsigned i = 4; i != 8; ++i)
				2822	MaskVec.push_back(PermMask.getOperand(i));
				2823	Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
				2824	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2825	}
				2826	} else {
				2827	// Floating point cases in the other order.
				2828	if (X86::isSHUFPMask(PermMask.Val))
				2829	return Op;
				2830	if (X86::isPSHUFDMask(PermMask.Val) \|\|
				2831	X86::isPSHUFHWMask(PermMask.Val) \|\|
				2832	X86::isPSHUFLWMask(PermMask.Val)) {
				2833	if (V2.getOpcode() != ISD::UNDEF)
				2834	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
				2835	DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
				2836	return Op;
				2837	}
				2838	}
				2839
				2840	if (NumElems == 4 &&
				2841	// Don't do this for MMX.
				2842	MVT::getSizeInBits(VT) != 64) {
				2843	MVT::ValueType MaskVT = PermMask.getValueType();
				2844	MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
				2845	SmallVector<std::pair<int, int>, 8> Locs;
				2846	Locs.reserve(NumElems);
				2847	SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2848	SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2849	unsigned NumHi = 0;
				2850	unsigned NumLo = 0;
				2851	// If no more than two elements come from either vector. This can be
				2852	// implemented with two shuffles. First shuffle gather the elements.
				2853	// The second shuffle, which takes the first shuffle as both of its
				2854	// vector operands, put the elements into the right order.
				2855	for (unsigned i = 0; i != NumElems; ++i) {
				2856	SDOperand Elt = PermMask.getOperand(i);
				2857	if (Elt.getOpcode() == ISD::UNDEF) {
				2858	Locs[i] = std::make_pair(-1, -1);
				2859	} else {
				2860	unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
				2861	if (Val < NumElems) {
				2862	Locs[i] = std::make_pair(0, NumLo);
				2863	Mask1[NumLo] = Elt;
				2864	NumLo++;
				2865	} else {
				2866	Locs[i] = std::make_pair(1, NumHi);
				2867	if (2+NumHi < NumElems)
				2868	Mask1[2+NumHi] = Elt;
				2869	NumHi++;
				2870	}
				2871	}
				2872	}
				2873	if (NumLo <= 2 && NumHi <= 2) {
				2874	V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
				2875	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2876	&Mask1[0], Mask1.size()));
				2877	for (unsigned i = 0; i != NumElems; ++i) {
				2878	if (Locs[i].first == -1)
				2879	continue;
				2880	else {
				2881	unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
				2882	Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
				2883	Mask2[i] = DAG.getConstant(Idx, MaskEVT);
				2884	}
				2885	}
				2886
				2887	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
				2888	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2889	&Mask2[0], Mask2.size()));
				2890	}
				2891
				2892	// Break it into (shuffle shuffle_hi, shuffle_lo).
				2893	Locs.clear();
				2894	SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2895	SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2896	SmallVector<SDOperand,8> *MaskPtr = &LoMask;
				2897	unsigned MaskIdx = 0;
				2898	unsigned LoIdx = 0;
				2899	unsigned HiIdx = NumElems/2;
				2900	for (unsigned i = 0; i != NumElems; ++i) {
				2901	if (i == NumElems/2) {
				2902	MaskPtr = &HiMask;
				2903	MaskIdx = 1;
				2904	LoIdx = 0;
				2905	HiIdx = NumElems/2;
				2906	}
				2907	SDOperand Elt = PermMask.getOperand(i);
				2908	if (Elt.getOpcode() == ISD::UNDEF) {
				2909	Locs[i] = std::make_pair(-1, -1);
				2910	} else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
				2911	Locs[i] = std::make_pair(MaskIdx, LoIdx);
				2912	(*MaskPtr)[LoIdx] = Elt;
				2913	LoIdx++;
				2914	} else {
				2915	Locs[i] = std::make_pair(MaskIdx, HiIdx);
				2916	(*MaskPtr)[HiIdx] = Elt;
				2917	HiIdx++;
				2918	}
				2919	}
				2920
				2921	SDOperand LoShuffle =
				2922	DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
				2923	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2924	&LoMask[0], LoMask.size()));
				2925	SDOperand HiShuffle =
				2926	DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
				2927	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2928	&HiMask[0], HiMask.size()));
				2929	SmallVector<SDOperand, 8> MaskOps;
				2930	for (unsigned i = 0; i != NumElems; ++i) {
				2931	if (Locs[i].first == -1) {
				2932	MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
				2933	} else {
				2934	unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
				2935	MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
				2936	}
				2937	}
				2938	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
				2939	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2940	&MaskOps[0], MaskOps.size()));
				2941	}
				2942
				2943	return SDOperand();
				2944	}
				2945
				2946	SDOperand
				2947	X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2948	if (!isa<ConstantSDNode>(Op.getOperand(1)))
				2949	return SDOperand();
				2950
				2951	MVT::ValueType VT = Op.getValueType();
				2952	// TODO: handle v16i8.
				2953	if (MVT::getSizeInBits(VT) == 16) {
				2954	// Transform it so it match pextrw which produces a 32-bit result.
				2955	MVT::ValueType EVT = (MVT::ValueType)(VT+1);
				2956	SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
				2957	Op.getOperand(0), Op.getOperand(1));
				2958	SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract,
				2959	DAG.getValueType(VT));
				2960	return DAG.getNode(ISD::TRUNCATE, VT, Assert);
				2961	} else if (MVT::getSizeInBits(VT) == 32) {
				2962	SDOperand Vec = Op.getOperand(0);
				2963	unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				2964	if (Idx == 0)
				2965	return Op;
				2966	// SHUFPS the element to the lowest double word, then movss.
				2967	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				2968	SmallVector<SDOperand, 8> IdxVec;
				2969	IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT)));
				2970	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				2971	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				2972	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				2973	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2974	&IdxVec[0], IdxVec.size());
				2975	Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
				2976	Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
				2977	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
				2978	DAG.getConstant(0, getPointerTy()));
				2979	} else if (MVT::getSizeInBits(VT) == 64) {
				2980	SDOperand Vec = Op.getOperand(0);
				2981	unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				2982	if (Idx == 0)
				2983	return Op;
				2984
				2985	// UNPCKHPD the element to the lowest double word, then movsd.
				2986	// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
				2987	// to a f64mem, the whole operation is folded into a single MOVHPDmr.
				2988	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				2989	SmallVector<SDOperand, 8> IdxVec;
				2990	IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT)));
				2991	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				2992	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2993	&IdxVec[0], IdxVec.size());
				2994	Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
				2995	Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
				2996	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
				2997	DAG.getConstant(0, getPointerTy()));
				2998	}
				2999
				3000	return SDOperand();
				3001	}
				3002
				3003	SDOperand
				3004	X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				3005	// Transform it so it match pinsrw which expects a 16-bit value in a GR32
				3006	// as its second argument.
				3007	MVT::ValueType VT = Op.getValueType();
				3008	MVT::ValueType BaseVT = MVT::getVectorElementType(VT);
				3009	SDOperand N0 = Op.getOperand(0);
				3010	SDOperand N1 = Op.getOperand(1);
				3011	SDOperand N2 = Op.getOperand(2);
				3012	if (MVT::getSizeInBits(BaseVT) == 16) {
				3013	if (N1.getValueType() != MVT::i32)
				3014	N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
				3015	if (N2.getValueType() != MVT::i32)
				3016	N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy());
				3017	return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
				3018	} else if (MVT::getSizeInBits(BaseVT) == 32) {
				3019	unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
				3020	if (Idx == 0) {
				3021	// Use a movss.
				3022	N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
				3023	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				3024	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				3025	SmallVector<SDOperand, 8> MaskVec;
				3026	MaskVec.push_back(DAG.getConstant(4, BaseVT));
				3027	for (unsigned i = 1; i <= 3; ++i)
				3028	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				3029	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
				3030	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				3031	&MaskVec[0], MaskVec.size()));
				3032	} else {
				3033	// Use two pinsrw instructions to insert a 32 bit value.
				3034	Idx <<= 1;
				3035	if (MVT::isFloatingPoint(N1.getValueType())) {
Evan Cheng	1eea675	2007-07-31 06:21:44 +0000	[diff] [blame]	3036	N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
				3037	N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
				3038	N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
				3039	DAG.getConstant(0, getPointerTy()));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3040	}
				3041	N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
				3042	N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
				3043	DAG.getConstant(Idx, getPointerTy()));
				3044	N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
				3045	N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
				3046	DAG.getConstant(Idx+1, getPointerTy()));
				3047	return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
				3048	}
				3049	}
				3050
				3051	return SDOperand();
				3052	}
				3053
				3054	SDOperand
				3055	X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				3056	SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
				3057	return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
				3058	}
				3059
				3060	// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
				3061	// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
				3062	// one of the above mentioned nodes. It has to be wrapped because otherwise
				3063	// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
				3064	// be used to form addressing mode. These wrapped nodes will be selected
				3065	// into MOV32ri.
				3066	SDOperand
				3067	X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
				3068	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
				3069	SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(),
				3070	getPointerTy(),
				3071	CP->getAlignment());
				3072	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3073	// With PIC, the address is actually $g + Offset.
				3074	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3075	!Subtarget->isPICStyleRIPRel()) {
				3076	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3077	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3078	Result);
				3079	}
				3080
				3081	return Result;
				3082	}
				3083
				3084	SDOperand
				3085	X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
				3086	GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
				3087	SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
				3088	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3089	// With PIC, the address is actually $g + Offset.
				3090	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3091	!Subtarget->isPICStyleRIPRel()) {
				3092	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3093	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3094	Result);
				3095	}
				3096
				3097	// For Darwin & Mingw32, external and weak symbols are indirect, so we want to
				3098	// load the value at address GV, not the value of GV itself. This means that
				3099	// the GlobalAddress must be in the base or index register of the address, not
				3100	// the GV offset field. Platform check is inside GVRequiresExtraLoad() call
				3101	// The same applies for external symbols during PIC codegen
				3102	if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
				3103	Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);
				3104
				3105	return Result;
				3106	}
				3107
				3108	// Lower ISD::GlobalTLSAddress using the "general dynamic" model
				3109	static SDOperand
				3110	LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
				3111	const MVT::ValueType PtrVT) {
				3112	SDOperand InFlag;
				3113	SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX,
				3114	DAG.getNode(X86ISD::GlobalBaseReg,
				3115	PtrVT), InFlag);
				3116	InFlag = Chain.getValue(1);
				3117
				3118	// emit leal symbol@TLSGD(,%ebx,1), %eax
				3119	SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
				3120	SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
				3121	GA->getValueType(0),
				3122	GA->getOffset());
				3123	SDOperand Ops[] = { Chain, TGA, InFlag };
				3124	SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3);
				3125	InFlag = Result.getValue(2);
				3126	Chain = Result.getValue(1);
				3127
				3128	// call ___tls_get_addr. This function receives its argument in
				3129	// the register EAX.
				3130	Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag);
				3131	InFlag = Chain.getValue(1);
				3132
				3133	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				3134	SDOperand Ops1[] = { Chain,
				3135	DAG.getTargetExternalSymbol("___tls_get_addr",
				3136	PtrVT),
				3137	DAG.getRegister(X86::EAX, PtrVT),
				3138	DAG.getRegister(X86::EBX, PtrVT),
				3139	InFlag };
				3140	Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5);
				3141	InFlag = Chain.getValue(1);
				3142
				3143	return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag);
				3144	}
				3145
				3146	// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
				3147	// "local exec" model.
				3148	static SDOperand
				3149	LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
				3150	const MVT::ValueType PtrVT) {
				3151	// Get the Thread Pointer
				3152	SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT);
				3153	// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
				3154	// exec)
				3155	SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
				3156	GA->getValueType(0),
				3157	GA->getOffset());
				3158	SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA);
				3159
				3160	if (GA->getGlobal()->isDeclaration()) // initial exec TLS model
				3161	Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0);
				3162
				3163	// The address of the thread local variable is the add of the thread
				3164	// pointer with the offset of the variable.
				3165	return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset);
				3166	}
				3167
				3168	SDOperand
				3169	X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) {
				3170	// TODO: implement the "local dynamic" model
				3171	// TODO: implement the "initial exec"model for pic executables
				3172	assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() &&
				3173	"TLS not implemented for non-ELF and 64-bit targets");
				3174	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
				3175	// If the relocation model is PIC, use the "General Dynamic" TLS Model,
				3176	// otherwise use the "Local Exec"TLS Model
				3177	if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
				3178	return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy());
				3179	else
				3180	return LowerToTLSExecModel(GA, DAG, getPointerTy());
				3181	}
				3182
				3183	SDOperand
				3184	X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
				3185	const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
				3186	SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
				3187	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3188	// With PIC, the address is actually $g + Offset.
				3189	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3190	!Subtarget->isPICStyleRIPRel()) {
				3191	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3192	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3193	Result);
				3194	}
				3195
				3196	return Result;
				3197	}
				3198
				3199	SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
				3200	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
				3201	SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
				3202	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3203	// With PIC, the address is actually $g + Offset.
				3204	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3205	!Subtarget->isPICStyleRIPRel()) {
				3206	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3207	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3208	Result);
				3209	}
				3210
				3211	return Result;
				3212	}
				3213
				3214	SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
				3215	assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
				3216	"Not an i64 shift!");
				3217	bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
				3218	SDOperand ShOpLo = Op.getOperand(0);
				3219	SDOperand ShOpHi = Op.getOperand(1);
				3220	SDOperand ShAmt = Op.getOperand(2);
				3221	SDOperand Tmp1 = isSRA ?
				3222	DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
				3223	DAG.getConstant(0, MVT::i32);
				3224
				3225	SDOperand Tmp2, Tmp3;
				3226	if (Op.getOpcode() == ISD::SHL_PARTS) {
				3227	Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
				3228	Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
				3229	} else {
				3230	Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
				3231	Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
				3232	}
				3233
				3234	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3235	SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
				3236	DAG.getConstant(32, MVT::i8));
				3237	SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)};
				3238	SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1);
				3239
				3240	SDOperand Hi, Lo;
				3241	SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
				3242
				3243	VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
				3244	SmallVector<SDOperand, 4> Ops;
				3245	if (Op.getOpcode() == ISD::SHL_PARTS) {
				3246	Ops.push_back(Tmp2);
				3247	Ops.push_back(Tmp3);
				3248	Ops.push_back(CC);
				3249	Ops.push_back(InFlag);
				3250	Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3251	InFlag = Hi.getValue(1);
				3252
				3253	Ops.clear();
				3254	Ops.push_back(Tmp3);
				3255	Ops.push_back(Tmp1);
				3256	Ops.push_back(CC);
				3257	Ops.push_back(InFlag);
				3258	Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3259	} else {
				3260	Ops.push_back(Tmp2);
				3261	Ops.push_back(Tmp3);
				3262	Ops.push_back(CC);
				3263	Ops.push_back(InFlag);
				3264	Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3265	InFlag = Lo.getValue(1);
				3266
				3267	Ops.clear();
				3268	Ops.push_back(Tmp3);
				3269	Ops.push_back(Tmp1);
				3270	Ops.push_back(CC);
				3271	Ops.push_back(InFlag);
				3272	Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3273	}
				3274
				3275	VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
				3276	Ops.clear();
				3277	Ops.push_back(Lo);
				3278	Ops.push_back(Hi);
				3279	return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
				3280	}
				3281
				3282	SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
				3283	assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
				3284	Op.getOperand(0).getValueType() >= MVT::i16 &&
				3285	"Unknown SINT_TO_FP to lower!");
				3286
				3287	SDOperand Result;
				3288	MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
				3289	unsigned Size = MVT::getSizeInBits(SrcVT)/8;
				3290	MachineFunction &MF = DAG.getMachineFunction();
				3291	int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
				3292	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3293	SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
				3294	StackSlot, NULL, 0);
				3295
				3296	// Build the FILD
				3297	SDVTList Tys;
				3298	if (X86ScalarSSE)
				3299	Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
				3300	else
				3301	Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
				3302	SmallVector<SDOperand, 8> Ops;
				3303	Ops.push_back(Chain);
				3304	Ops.push_back(StackSlot);
				3305	Ops.push_back(DAG.getValueType(SrcVT));
				3306	Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
				3307	Tys, &Ops[0], Ops.size());
				3308
				3309	if (X86ScalarSSE) {
				3310	Chain = Result.getValue(1);
				3311	SDOperand InFlag = Result.getValue(2);
				3312
				3313	// FIXME: Currently the FST is flagged to the FILD_FLAG. This
				3314	// shouldn't be necessary except that RFP cannot be live across
				3315	// multiple blocks. When stackifier is fixed, they can be uncoupled.
				3316	MachineFunction &MF = DAG.getMachineFunction();
				3317	int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
				3318	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3319	Tys = DAG.getVTList(MVT::Other);
				3320	SmallVector<SDOperand, 8> Ops;
				3321	Ops.push_back(Chain);
				3322	Ops.push_back(Result);
				3323	Ops.push_back(StackSlot);
				3324	Ops.push_back(DAG.getValueType(Op.getValueType()));
				3325	Ops.push_back(InFlag);
				3326	Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
				3327	Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0);
				3328	}
				3329
				3330	return Result;
				3331	}
				3332
				3333	SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
				3334	assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
				3335	"Unknown FP_TO_SINT to lower!");
				3336	// We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
				3337	// stack slot.
				3338	MachineFunction &MF = DAG.getMachineFunction();
				3339	unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
				3340	int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
				3341	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3342
				3343	unsigned Opc;
				3344	switch (Op.getValueType()) {
				3345	default: assert(0 && "Invalid FP_TO_SINT to lower!");
				3346	case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
				3347	case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
				3348	case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
				3349	}
				3350
				3351	SDOperand Chain = DAG.getEntryNode();
				3352	SDOperand Value = Op.getOperand(0);
				3353	if (X86ScalarSSE) {
				3354	assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
				3355	Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
				3356	SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
				3357	SDOperand Ops[] = {
				3358	Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
				3359	};
				3360	Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
				3361	Chain = Value.getValue(1);
				3362	SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
				3363	StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3364	}
				3365
				3366	// Build the FP_TO_INT*_IN_MEM
				3367	SDOperand Ops[] = { Chain, Value, StackSlot };
				3368	SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);
				3369
				3370	// Load the result.
				3371	return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
				3372	}
				3373
				3374	SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
				3375	MVT::ValueType VT = Op.getValueType();
				3376	MVT::ValueType EltVT = VT;
				3377	if (MVT::isVector(VT))
				3378	EltVT = MVT::getVectorElementType(VT);
				3379	const Type *OpNTy = MVT::getTypeForValueType(EltVT);
				3380	std::vector<Constant*> CV;
				3381	if (EltVT == MVT::f64) {
				3382	Constant *C = ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)));
				3383	CV.push_back(C);
				3384	CV.push_back(C);
				3385	} else {
				3386	Constant *C = ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)));
				3387	CV.push_back(C);
				3388	CV.push_back(C);
				3389	CV.push_back(C);
				3390	CV.push_back(C);
				3391	}
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	3392	Constant *C = ConstantVector::get(CV);
				3393	SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
				3394	SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
				3395	false, 16);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3396	return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
				3397	}
				3398
				3399	SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
				3400	MVT::ValueType VT = Op.getValueType();
				3401	MVT::ValueType EltVT = VT;
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3402	unsigned EltNum = 1;
				3403	if (MVT::isVector(VT)) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3404	EltVT = MVT::getVectorElementType(VT);
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3405	EltNum = MVT::getVectorNumElements(VT);
				3406	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3407	const Type *OpNTy = MVT::getTypeForValueType(EltVT);
				3408	std::vector<Constant*> CV;
				3409	if (EltVT == MVT::f64) {
				3410	Constant *C = ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63));
				3411	CV.push_back(C);
				3412	CV.push_back(C);
				3413	} else {
				3414	Constant *C = ConstantFP::get(OpNTy, BitsToFloat(1U << 31));
				3415	CV.push_back(C);
				3416	CV.push_back(C);
				3417	CV.push_back(C);
				3418	CV.push_back(C);
				3419	}
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	3420	Constant *C = ConstantVector::get(CV);
				3421	SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
				3422	SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
				3423	false, 16);
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3424	if (MVT::isVector(VT)) {
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3425	return DAG.getNode(ISD::BIT_CONVERT, VT,
				3426	DAG.getNode(ISD::XOR, MVT::v2i64,
				3427	DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)),
				3428	DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask)));
				3429	} else {
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3430	return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
				3431	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3432	}
				3433
				3434	SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
				3435	SDOperand Op0 = Op.getOperand(0);
				3436	SDOperand Op1 = Op.getOperand(1);
				3437	MVT::ValueType VT = Op.getValueType();
				3438	MVT::ValueType SrcVT = Op1.getValueType();
				3439	const Type *SrcTy = MVT::getTypeForValueType(SrcVT);
				3440
				3441	// If second operand is smaller, extend it first.
				3442	if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) {
				3443	Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1);
				3444	SrcVT = VT;
				3445	}
				3446
				3447	// First get the sign bit of second operand.
				3448	std::vector<Constant*> CV;
				3449	if (SrcVT == MVT::f64) {
				3450	CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(1ULL << 63)));
				3451	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3452	} else {
				3453	CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(1U << 31)));
				3454	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3455	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3456	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3457	}
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	3458	Constant *C = ConstantVector::get(CV);
				3459	SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
				3460	SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0,
				3461	false, 16);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3462	SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
				3463
				3464	// Shift sign bit right or left if the two operands have different types.
				3465	if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
				3466	// Op0 is MVT::f32, Op1 is MVT::f64.
				3467	SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit);
				3468	SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit,
				3469	DAG.getConstant(32, MVT::i32));
				3470	SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
				3471	SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
				3472	DAG.getConstant(0, getPointerTy()));
				3473	}
				3474
				3475	// Clear first operand sign bit.
				3476	CV.clear();
				3477	if (VT == MVT::f64) {
				3478	CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(~(1ULL << 63))));
				3479	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3480	} else {
				3481	CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(~(1U << 31))));
				3482	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3483	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3484	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3485	}
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	3486	C = ConstantVector::get(CV);
				3487	CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
				3488	SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
				3489	false, 16);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3490	SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
				3491
				3492	// Or the value with the sign bit.
				3493	return DAG.getNode(X86ISD::FOR, VT, Val, SignBit);
				3494	}
				3495
				3496	SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG,
				3497	SDOperand Chain) {
				3498	assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
				3499	SDOperand Cond;
				3500	SDOperand Op0 = Op.getOperand(0);
				3501	SDOperand Op1 = Op.getOperand(1);
				3502	SDOperand CC = Op.getOperand(2);
				3503	ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
				3504	const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3505	const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
				3506	bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
				3507	unsigned X86CC;
				3508
				3509	if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
				3510	Op0, Op1, DAG)) {
				3511	SDOperand Ops1[] = { Chain, Op0, Op1 };
				3512	Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1);
				3513	SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
				3514	return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
				3515	}
				3516
				3517	assert(isFP && "Illegal integer SetCC!");
				3518
				3519	SDOperand COps[] = { Chain, Op0, Op1 };
				3520	Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1);
				3521
				3522	switch (SetCCOpcode) {
				3523	default: assert(false && "Illegal floating point SetCC!");
				3524	case ISD::SETOEQ: { // !PF & ZF
				3525	SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond };
				3526	SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
				3527	SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8),
				3528	Tmp1.getValue(1) };
				3529	SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
				3530	return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
				3531	}
				3532	case ISD::SETUNE: { // PF \| !ZF
				3533	SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond };
				3534	SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
				3535	SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8),
				3536	Tmp1.getValue(1) };
				3537	SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
				3538	return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
				3539	}
				3540	}
				3541	}
				3542
				3543	SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
				3544	bool addTest = true;
				3545	SDOperand Chain = DAG.getEntryNode();
				3546	SDOperand Cond = Op.getOperand(0);
				3547	SDOperand CC;
				3548	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3549
				3550	if (Cond.getOpcode() == ISD::SETCC)
				3551	Cond = LowerSETCC(Cond, DAG, Chain);
				3552
				3553	if (Cond.getOpcode() == X86ISD::SETCC) {
				3554	CC = Cond.getOperand(0);
				3555
				3556	// If condition flag is set by a X86ISD::CMP, then make a copy of it
				3557	// (since flag operand cannot be shared). Use it as the condition setting
				3558	// operand in place of the X86ISD::SETCC.
				3559	// If the X86ISD::SETCC has more than one use, then perhaps it's better
				3560	// to use a test instead of duplicating the X86ISD::CMP (for register
				3561	// pressure reason)?
				3562	SDOperand Cmp = Cond.getOperand(1);
				3563	unsigned Opc = Cmp.getOpcode();
				3564	bool IllegalFPCMov = !X86ScalarSSE &&
				3565	MVT::isFloatingPoint(Op.getValueType()) &&
				3566	!hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
				3567	if ((Opc == X86ISD::CMP \|\| Opc == X86ISD::COMI \|\| Opc == X86ISD::UCOMI) &&
				3568	!IllegalFPCMov) {
				3569	SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
				3570	Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
				3571	addTest = false;
				3572	}
				3573	}
				3574
				3575	if (addTest) {
				3576	CC = DAG.getConstant(X86::COND_NE, MVT::i8);
				3577	SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
				3578	Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
				3579	}
				3580
				3581	VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag);
				3582	SmallVector<SDOperand, 4> Ops;
				3583	// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
				3584	// condition is true.
				3585	Ops.push_back(Op.getOperand(2));
				3586	Ops.push_back(Op.getOperand(1));
				3587	Ops.push_back(CC);
				3588	Ops.push_back(Cond.getValue(1));
				3589	return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3590	}
				3591
				3592	SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
				3593	bool addTest = true;
				3594	SDOperand Chain = Op.getOperand(0);
				3595	SDOperand Cond = Op.getOperand(1);
				3596	SDOperand Dest = Op.getOperand(2);
				3597	SDOperand CC;
				3598	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3599
				3600	if (Cond.getOpcode() == ISD::SETCC)
				3601	Cond = LowerSETCC(Cond, DAG, Chain);
				3602
				3603	if (Cond.getOpcode() == X86ISD::SETCC) {
				3604	CC = Cond.getOperand(0);
				3605
				3606	// If condition flag is set by a X86ISD::CMP, then make a copy of it
				3607	// (since flag operand cannot be shared). Use it as the condition setting
				3608	// operand in place of the X86ISD::SETCC.
				3609	// If the X86ISD::SETCC has more than one use, then perhaps it's better
				3610	// to use a test instead of duplicating the X86ISD::CMP (for register
				3611	// pressure reason)?
				3612	SDOperand Cmp = Cond.getOperand(1);
				3613	unsigned Opc = Cmp.getOpcode();
				3614	if (Opc == X86ISD::CMP \|\| Opc == X86ISD::COMI \|\| Opc == X86ISD::UCOMI) {
				3615	SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
				3616	Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
				3617	addTest = false;
				3618	}
				3619	}
				3620
				3621	if (addTest) {
				3622	CC = DAG.getConstant(X86::COND_NE, MVT::i8);
				3623	SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
				3624	Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
				3625	}
				3626	return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
				3627	Cond, Op.getOperand(2), CC, Cond.getValue(1));
				3628	}
				3629
				3630	SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
				3631	unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				3632
				3633	if (Subtarget->is64Bit())
				3634	return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
				3635	else
				3636	switch (CallingConv) {
				3637	default:
				3638	assert(0 && "Unsupported calling convention");
				3639	case CallingConv::Fast:
				3640	// TODO: Implement fastcc
				3641	// Falls through
				3642	case CallingConv::C:
				3643	case CallingConv::X86_StdCall:
				3644	return LowerCCCCallTo(Op, DAG, CallingConv);
				3645	case CallingConv::X86_FastCall:
				3646	return LowerFastCCCallTo(Op, DAG, CallingConv);
				3647	}
				3648	}
				3649
				3650
				3651	// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
				3652	// Calls to _alloca is needed to probe the stack when allocating more than 4k
				3653	// bytes in one go. Touching the stack at 4K increments is necessary to ensure
				3654	// that the guard pages used by the OS virtual memory manager are allocated in
				3655	// correct sequence.
				3656	SDOperand
				3657	X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
				3658	SelectionDAG &DAG) {
				3659	assert(Subtarget->isTargetCygMing() &&
				3660	"This should be used only on Cygwin/Mingw targets");
				3661
				3662	// Get the inputs.
				3663	SDOperand Chain = Op.getOperand(0);
				3664	SDOperand Size = Op.getOperand(1);
				3665	// FIXME: Ensure alignment here
				3666
				3667	SDOperand Flag;
				3668
				3669	MVT::ValueType IntPtr = getPointerTy();
				3670	MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
				3671
				3672	Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag);
				3673	Flag = Chain.getValue(1);
				3674
				3675	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				3676	SDOperand Ops[] = { Chain,
				3677	DAG.getTargetExternalSymbol("_alloca", IntPtr),
				3678	DAG.getRegister(X86::EAX, IntPtr),
				3679	Flag };
				3680	Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4);
				3681	Flag = Chain.getValue(1);
				3682
				3683	Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1);
				3684
				3685	std::vector<MVT::ValueType> Tys;
				3686	Tys.push_back(SPTy);
				3687	Tys.push_back(MVT::Other);
				3688	SDOperand Ops1[2] = { Chain.getValue(0), Chain };
				3689	return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
				3690	}
				3691
				3692	SDOperand
				3693	X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
				3694	MachineFunction &MF = DAG.getMachineFunction();
				3695	const Function* Fn = MF.getFunction();
				3696	if (Fn->hasExternalLinkage() &&
				3697	Subtarget->isTargetCygMing() &&
				3698	Fn->getName() == "main")
				3699	MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true);
				3700
				3701	unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				3702	if (Subtarget->is64Bit())
				3703	return LowerX86_64CCCArguments(Op, DAG);
				3704	else
				3705	switch(CC) {
				3706	default:
				3707	assert(0 && "Unsupported calling convention");
				3708	case CallingConv::Fast:
				3709	// TODO: implement fastcc.
				3710
				3711	// Falls through
				3712	case CallingConv::C:
				3713	return LowerCCCArguments(Op, DAG);
				3714	case CallingConv::X86_StdCall:
				3715	MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall);
				3716	return LowerCCCArguments(Op, DAG, true);
				3717	case CallingConv::X86_FastCall:
				3718	MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall);
				3719	return LowerFastCCArguments(Op, DAG);
				3720	}
				3721	}
				3722
				3723	SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
				3724	SDOperand InFlag(0, 0);
				3725	SDOperand Chain = Op.getOperand(0);
				3726	unsigned Align =
				3727	(unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
				3728	if (Align == 0) Align = 1;
				3729
				3730	ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
				3731	// If not DWORD aligned, call memset if size is less than the threshold.
				3732	// It knows how to align to the right boundary first.
				3733	if ((Align & 3) != 0 \|\|
				3734	(I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
				3735	MVT::ValueType IntPtr = getPointerTy();
				3736	const Type *IntPtrTy = getTargetData()->getIntPtrType();
				3737	TargetLowering::ArgListTy Args;
				3738	TargetLowering::ArgListEntry Entry;
				3739	Entry.Node = Op.getOperand(1);
				3740	Entry.Ty = IntPtrTy;
				3741	Args.push_back(Entry);
				3742	// Extend the unsigned i8 argument to be an int value for the call.
				3743	Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
				3744	Entry.Ty = IntPtrTy;
				3745	Args.push_back(Entry);
				3746	Entry.Node = Op.getOperand(3);
				3747	Args.push_back(Entry);
				3748	std::pair<SDOperand,SDOperand> CallResult =
				3749	LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
				3750	DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
				3751	return CallResult.second;
				3752	}
				3753
				3754	MVT::ValueType AVT;
				3755	SDOperand Count;
				3756	ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
				3757	unsigned BytesLeft = 0;
				3758	bool TwoRepStos = false;
				3759	if (ValC) {
				3760	unsigned ValReg;
				3761	uint64_t Val = ValC->getValue() & 255;
				3762
				3763	// If the value is a constant, then we can potentially use larger sets.
				3764	switch (Align & 3) {
				3765	case 2: // WORD aligned
				3766	AVT = MVT::i16;
				3767	ValReg = X86::AX;
				3768	Val = (Val << 8) \| Val;
				3769	break;
				3770	case 0: // DWORD aligned
				3771	AVT = MVT::i32;
				3772	ValReg = X86::EAX;
				3773	Val = (Val << 8) \| Val;
				3774	Val = (Val << 16) \| Val;
				3775	if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned
				3776	AVT = MVT::i64;
				3777	ValReg = X86::RAX;
				3778	Val = (Val << 32) \| Val;
				3779	}
				3780	break;
				3781	default: // Byte aligned
				3782	AVT = MVT::i8;
				3783	ValReg = X86::AL;
				3784	Count = Op.getOperand(3);
				3785	break;
				3786	}
				3787
				3788	if (AVT > MVT::i8) {
				3789	if (I) {
				3790	unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
				3791	Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
				3792	BytesLeft = I->getValue() % UBytes;
				3793	} else {
				3794	assert(AVT >= MVT::i32 &&
				3795	"Do not use rep;stos if not at least DWORD aligned");
				3796	Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
				3797	Op.getOperand(3), DAG.getConstant(2, MVT::i8));
				3798	TwoRepStos = true;
				3799	}
				3800	}
				3801
				3802	Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
				3803	InFlag);
				3804	InFlag = Chain.getValue(1);
				3805	} else {
				3806	AVT = MVT::i8;
				3807	Count = Op.getOperand(3);
				3808	Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
				3809	InFlag = Chain.getValue(1);
				3810	}
				3811
				3812	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
				3813	Count, InFlag);
				3814	InFlag = Chain.getValue(1);
				3815	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
				3816	Op.getOperand(1), InFlag);
				3817	InFlag = Chain.getValue(1);
				3818
				3819	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				3820	SmallVector<SDOperand, 8> Ops;
				3821	Ops.push_back(Chain);
				3822	Ops.push_back(DAG.getValueType(AVT));
				3823	Ops.push_back(InFlag);
				3824	Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
				3825
				3826	if (TwoRepStos) {
				3827	InFlag = Chain.getValue(1);
				3828	Count = Op.getOperand(3);
				3829	MVT::ValueType CVT = Count.getValueType();
				3830	SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
				3831	DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
				3832	Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
				3833	Left, InFlag);
				3834	InFlag = Chain.getValue(1);
				3835	Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				3836	Ops.clear();
				3837	Ops.push_back(Chain);
				3838	Ops.push_back(DAG.getValueType(MVT::i8));
				3839	Ops.push_back(InFlag);
				3840	Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
				3841	} else if (BytesLeft) {
				3842	// Issue stores for the last 1 - 7 bytes.
				3843	SDOperand Value;
				3844	unsigned Val = ValC->getValue() & 255;
				3845	unsigned Offset = I->getValue() - BytesLeft;
				3846	SDOperand DstAddr = Op.getOperand(1);
				3847	MVT::ValueType AddrVT = DstAddr.getValueType();
				3848	if (BytesLeft >= 4) {
				3849	Val = (Val << 8) \| Val;
				3850	Val = (Val << 16) \| Val;
				3851	Value = DAG.getConstant(Val, MVT::i32);
				3852	Chain = DAG.getStore(Chain, Value,
				3853	DAG.getNode(ISD::ADD, AddrVT, DstAddr,
				3854	DAG.getConstant(Offset, AddrVT)),
				3855	NULL, 0);
				3856	BytesLeft -= 4;
				3857	Offset += 4;
				3858	}
				3859	if (BytesLeft >= 2) {
				3860	Value = DAG.getConstant((Val << 8) \| Val, MVT::i16);
				3861	Chain = DAG.getStore(Chain, Value,
				3862	DAG.getNode(ISD::ADD, AddrVT, DstAddr,
				3863	DAG.getConstant(Offset, AddrVT)),
				3864	NULL, 0);
				3865	BytesLeft -= 2;
				3866	Offset += 2;
				3867	}
				3868	if (BytesLeft == 1) {
				3869	Value = DAG.getConstant(Val, MVT::i8);
				3870	Chain = DAG.getStore(Chain, Value,
				3871	DAG.getNode(ISD::ADD, AddrVT, DstAddr,
				3872	DAG.getConstant(Offset, AddrVT)),
				3873	NULL, 0);
				3874	}
				3875	}
				3876
				3877	return Chain;
				3878	}
				3879
				3880	SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
				3881	SDOperand Chain = Op.getOperand(0);
				3882	unsigned Align =
				3883	(unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
				3884	if (Align == 0) Align = 1;
				3885
				3886	ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
				3887	// If not DWORD aligned, call memcpy if size is less than the threshold.
				3888	// It knows how to align to the right boundary first.
				3889	if ((Align & 3) != 0 \|\|
				3890	(I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
				3891	MVT::ValueType IntPtr = getPointerTy();
				3892	TargetLowering::ArgListTy Args;
				3893	TargetLowering::ArgListEntry Entry;
				3894	Entry.Ty = getTargetData()->getIntPtrType();
				3895	Entry.Node = Op.getOperand(1); Args.push_back(Entry);
				3896	Entry.Node = Op.getOperand(2); Args.push_back(Entry);
				3897	Entry.Node = Op.getOperand(3); Args.push_back(Entry);
				3898	std::pair<SDOperand,SDOperand> CallResult =
				3899	LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
				3900	DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
				3901	return CallResult.second;
				3902	}
				3903
				3904	MVT::ValueType AVT;
				3905	SDOperand Count;
				3906	unsigned BytesLeft = 0;
				3907	bool TwoRepMovs = false;
				3908	switch (Align & 3) {
				3909	case 2: // WORD aligned
				3910	AVT = MVT::i16;
				3911	break;
				3912	case 0: // DWORD aligned
				3913	AVT = MVT::i32;
				3914	if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned
				3915	AVT = MVT::i64;
				3916	break;
				3917	default: // Byte aligned
				3918	AVT = MVT::i8;
				3919	Count = Op.getOperand(3);
				3920	break;
				3921	}
				3922
				3923	if (AVT > MVT::i8) {
				3924	if (I) {
				3925	unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
				3926	Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
				3927	BytesLeft = I->getValue() % UBytes;
				3928	} else {
				3929	assert(AVT >= MVT::i32 &&
				3930	"Do not use rep;movs if not at least DWORD aligned");
				3931	Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
				3932	Op.getOperand(3), DAG.getConstant(2, MVT::i8));
				3933	TwoRepMovs = true;
				3934	}
				3935	}
				3936
				3937	SDOperand InFlag(0, 0);
				3938	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
				3939	Count, InFlag);
				3940	InFlag = Chain.getValue(1);
				3941	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
				3942	Op.getOperand(1), InFlag);
				3943	InFlag = Chain.getValue(1);
				3944	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
				3945	Op.getOperand(2), InFlag);
				3946	InFlag = Chain.getValue(1);
				3947
				3948	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				3949	SmallVector<SDOperand, 8> Ops;
				3950	Ops.push_back(Chain);
				3951	Ops.push_back(DAG.getValueType(AVT));
				3952	Ops.push_back(InFlag);
				3953	Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
				3954
				3955	if (TwoRepMovs) {
				3956	InFlag = Chain.getValue(1);
				3957	Count = Op.getOperand(3);
				3958	MVT::ValueType CVT = Count.getValueType();
				3959	SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
				3960	DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
				3961	Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
				3962	Left, InFlag);
				3963	InFlag = Chain.getValue(1);
				3964	Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				3965	Ops.clear();
				3966	Ops.push_back(Chain);
				3967	Ops.push_back(DAG.getValueType(MVT::i8));
				3968	Ops.push_back(InFlag);
				3969	Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
				3970	} else if (BytesLeft) {
				3971	// Issue loads and stores for the last 1 - 7 bytes.
				3972	unsigned Offset = I->getValue() - BytesLeft;
				3973	SDOperand DstAddr = Op.getOperand(1);
				3974	MVT::ValueType DstVT = DstAddr.getValueType();
				3975	SDOperand SrcAddr = Op.getOperand(2);
				3976	MVT::ValueType SrcVT = SrcAddr.getValueType();
				3977	SDOperand Value;
				3978	if (BytesLeft >= 4) {
				3979	Value = DAG.getLoad(MVT::i32, Chain,
				3980	DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
				3981	DAG.getConstant(Offset, SrcVT)),
				3982	NULL, 0);
				3983	Chain = Value.getValue(1);
				3984	Chain = DAG.getStore(Chain, Value,
				3985	DAG.getNode(ISD::ADD, DstVT, DstAddr,
				3986	DAG.getConstant(Offset, DstVT)),
				3987	NULL, 0);
				3988	BytesLeft -= 4;
				3989	Offset += 4;
				3990	}
				3991	if (BytesLeft >= 2) {
				3992	Value = DAG.getLoad(MVT::i16, Chain,
				3993	DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
				3994	DAG.getConstant(Offset, SrcVT)),
				3995	NULL, 0);
				3996	Chain = Value.getValue(1);
				3997	Chain = DAG.getStore(Chain, Value,
				3998	DAG.getNode(ISD::ADD, DstVT, DstAddr,
				3999	DAG.getConstant(Offset, DstVT)),
				4000	NULL, 0);
				4001	BytesLeft -= 2;
				4002	Offset += 2;
				4003	}
				4004
				4005	if (BytesLeft == 1) {
				4006	Value = DAG.getLoad(MVT::i8, Chain,
				4007	DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
				4008	DAG.getConstant(Offset, SrcVT)),
				4009	NULL, 0);
				4010	Chain = Value.getValue(1);
				4011	Chain = DAG.getStore(Chain, Value,
				4012	DAG.getNode(ISD::ADD, DstVT, DstAddr,
				4013	DAG.getConstant(Offset, DstVT)),
				4014	NULL, 0);
				4015	}
				4016	}
				4017
				4018	return Chain;
				4019	}
				4020
				4021	SDOperand
				4022	X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
				4023	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				4024	SDOperand TheOp = Op.getOperand(0);
				4025	SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1);
				4026	if (Subtarget->is64Bit()) {
				4027	SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
				4028	SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX,
				4029	MVT::i64, Copy1.getValue(2));
				4030	SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2,
				4031	DAG.getConstant(32, MVT::i8));
				4032	SDOperand Ops[] = {
				4033	DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1)
				4034	};
				4035
				4036	Tys = DAG.getVTList(MVT::i64, MVT::Other);
				4037	return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2);
				4038	}
				4039
				4040	SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
				4041	SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX,
				4042	MVT::i32, Copy1.getValue(2));
				4043	SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) };
				4044	Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
				4045	return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3);
				4046	}
				4047
				4048	SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
				4049	SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
				4050
				4051	if (!Subtarget->is64Bit()) {
				4052	// vastart just stores the address of the VarArgsFrameIndex slot into the
				4053	// memory location argument.
				4054	SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
				4055	return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(),
				4056	SV->getOffset());
				4057	}
				4058
				4059	// __va_list_tag:
				4060	// gp_offset (0 - 6 * 8)
				4061	// fp_offset (48 - 48 + 8 * 16)
				4062	// overflow_arg_area (point to parameters coming in memory).
				4063	// reg_save_area
				4064	SmallVector<SDOperand, 8> MemOps;
				4065	SDOperand FIN = Op.getOperand(1);
				4066	// Store gp_offset
				4067	SDOperand Store = DAG.getStore(Op.getOperand(0),
				4068	DAG.getConstant(VarArgsGPOffset, MVT::i32),
				4069	FIN, SV->getValue(), SV->getOffset());
				4070	MemOps.push_back(Store);
				4071
				4072	// Store fp_offset
				4073	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				4074	DAG.getConstant(4, getPointerTy()));
				4075	Store = DAG.getStore(Op.getOperand(0),
				4076	DAG.getConstant(VarArgsFPOffset, MVT::i32),
				4077	FIN, SV->getValue(), SV->getOffset());
				4078	MemOps.push_back(Store);
				4079
				4080	// Store ptr to overflow_arg_area
				4081	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				4082	DAG.getConstant(4, getPointerTy()));
				4083	SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
				4084	Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(),
				4085	SV->getOffset());
				4086	MemOps.push_back(Store);
				4087
				4088	// Store ptr to reg_save_area.
				4089	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				4090	DAG.getConstant(8, getPointerTy()));
				4091	SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
				4092	Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(),
				4093	SV->getOffset());
				4094	MemOps.push_back(Store);
				4095	return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
				4096	}
				4097
				4098	SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) {
				4099	// X86-64 va_list is a struct { i32, i32, i8, i8 }.
				4100	SDOperand Chain = Op.getOperand(0);
				4101	SDOperand DstPtr = Op.getOperand(1);
				4102	SDOperand SrcPtr = Op.getOperand(2);
				4103	SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3));
				4104	SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4));
				4105
				4106	SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr,
				4107	SrcSV->getValue(), SrcSV->getOffset());
				4108	Chain = SrcPtr.getValue(1);
				4109	for (unsigned i = 0; i < 3; ++i) {
				4110	SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr,
				4111	SrcSV->getValue(), SrcSV->getOffset());
				4112	Chain = Val.getValue(1);
				4113	Chain = DAG.getStore(Chain, Val, DstPtr,
				4114	DstSV->getValue(), DstSV->getOffset());
				4115	if (i == 2)
				4116	break;
				4117	SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr,
				4118	DAG.getConstant(8, getPointerTy()));
				4119	DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr,
				4120	DAG.getConstant(8, getPointerTy()));
				4121	}
				4122	return Chain;
				4123	}
				4124
				4125	SDOperand
				4126	X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
				4127	unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
				4128	switch (IntNo) {
				4129	default: return SDOperand(); // Don't custom lower most intrinsics.
				4130	// Comparison intrinsics.
				4131	case Intrinsic::x86_sse_comieq_ss:
				4132	case Intrinsic::x86_sse_comilt_ss:
				4133	case Intrinsic::x86_sse_comile_ss:
				4134	case Intrinsic::x86_sse_comigt_ss:
				4135	case Intrinsic::x86_sse_comige_ss:
				4136	case Intrinsic::x86_sse_comineq_ss:
				4137	case Intrinsic::x86_sse_ucomieq_ss:
				4138	case Intrinsic::x86_sse_ucomilt_ss:
				4139	case Intrinsic::x86_sse_ucomile_ss:
				4140	case Intrinsic::x86_sse_ucomigt_ss:
				4141	case Intrinsic::x86_sse_ucomige_ss:
				4142	case Intrinsic::x86_sse_ucomineq_ss:
				4143	case Intrinsic::x86_sse2_comieq_sd:
				4144	case Intrinsic::x86_sse2_comilt_sd:
				4145	case Intrinsic::x86_sse2_comile_sd:
				4146	case Intrinsic::x86_sse2_comigt_sd:
				4147	case Intrinsic::x86_sse2_comige_sd:
				4148	case Intrinsic::x86_sse2_comineq_sd:
				4149	case Intrinsic::x86_sse2_ucomieq_sd:
				4150	case Intrinsic::x86_sse2_ucomilt_sd:
				4151	case Intrinsic::x86_sse2_ucomile_sd:
				4152	case Intrinsic::x86_sse2_ucomigt_sd:
				4153	case Intrinsic::x86_sse2_ucomige_sd:
				4154	case Intrinsic::x86_sse2_ucomineq_sd: {
				4155	unsigned Opc = 0;
				4156	ISD::CondCode CC = ISD::SETCC_INVALID;
				4157	switch (IntNo) {
				4158	default: break;
				4159	case Intrinsic::x86_sse_comieq_ss:
				4160	case Intrinsic::x86_sse2_comieq_sd:
				4161	Opc = X86ISD::COMI;
				4162	CC = ISD::SETEQ;
				4163	break;
				4164	case Intrinsic::x86_sse_comilt_ss:
				4165	case Intrinsic::x86_sse2_comilt_sd:
				4166	Opc = X86ISD::COMI;
				4167	CC = ISD::SETLT;
				4168	break;
				4169	case Intrinsic::x86_sse_comile_ss:
				4170	case Intrinsic::x86_sse2_comile_sd:
				4171	Opc = X86ISD::COMI;
				4172	CC = ISD::SETLE;
				4173	break;
				4174	case Intrinsic::x86_sse_comigt_ss:
				4175	case Intrinsic::x86_sse2_comigt_sd:
				4176	Opc = X86ISD::COMI;
				4177	CC = ISD::SETGT;
				4178	break;
				4179	case Intrinsic::x86_sse_comige_ss:
				4180	case Intrinsic::x86_sse2_comige_sd:
				4181	Opc = X86ISD::COMI;
				4182	CC = ISD::SETGE;
				4183	break;
				4184	case Intrinsic::x86_sse_comineq_ss:
				4185	case Intrinsic::x86_sse2_comineq_sd:
				4186	Opc = X86ISD::COMI;
				4187	CC = ISD::SETNE;
				4188	break;
				4189	case Intrinsic::x86_sse_ucomieq_ss:
				4190	case Intrinsic::x86_sse2_ucomieq_sd:
				4191	Opc = X86ISD::UCOMI;
				4192	CC = ISD::SETEQ;
				4193	break;
				4194	case Intrinsic::x86_sse_ucomilt_ss:
				4195	case Intrinsic::x86_sse2_ucomilt_sd:
				4196	Opc = X86ISD::UCOMI;
				4197	CC = ISD::SETLT;
				4198	break;
				4199	case Intrinsic::x86_sse_ucomile_ss:
				4200	case Intrinsic::x86_sse2_ucomile_sd:
				4201	Opc = X86ISD::UCOMI;
				4202	CC = ISD::SETLE;
				4203	break;
				4204	case Intrinsic::x86_sse_ucomigt_ss:
				4205	case Intrinsic::x86_sse2_ucomigt_sd:
				4206	Opc = X86ISD::UCOMI;
				4207	CC = ISD::SETGT;
				4208	break;
				4209	case Intrinsic::x86_sse_ucomige_ss:
				4210	case Intrinsic::x86_sse2_ucomige_sd:
				4211	Opc = X86ISD::UCOMI;
				4212	CC = ISD::SETGE;
				4213	break;
				4214	case Intrinsic::x86_sse_ucomineq_ss:
				4215	case Intrinsic::x86_sse2_ucomineq_sd:
				4216	Opc = X86ISD::UCOMI;
				4217	CC = ISD::SETNE;
				4218	break;
				4219	}
				4220
				4221	unsigned X86CC;
				4222	SDOperand LHS = Op.getOperand(1);
				4223	SDOperand RHS = Op.getOperand(2);
				4224	translateX86CC(CC, true, X86CC, LHS, RHS, DAG);
				4225
				4226	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				4227	SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS };
				4228	SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3);
				4229	VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
				4230	SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
				4231	SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2);
				4232	return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
				4233	}
				4234	}
				4235	}
				4236
				4237	SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) {
				4238	// Depths > 0 not supported yet!
				4239	if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
				4240	return SDOperand();
				4241
				4242	// Just load the return address
				4243	SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
				4244	return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0);
				4245	}
				4246
				4247	SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) {
				4248	// Depths > 0 not supported yet!
				4249	if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
				4250	return SDOperand();
				4251
				4252	SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
				4253	return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
				4254	DAG.getConstant(4, getPointerTy()));
				4255	}
				4256
				4257	SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op,
				4258	SelectionDAG &DAG) {
				4259	// Is not yet supported on x86-64
				4260	if (Subtarget->is64Bit())
				4261	return SDOperand();
				4262
				4263	return DAG.getConstant(8, getPointerTy());
				4264	}
				4265
				4266	SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG)
				4267	{
				4268	assert(!Subtarget->is64Bit() &&
				4269	"Lowering of eh_return builtin is not supported yet on x86-64");
				4270
				4271	MachineFunction &MF = DAG.getMachineFunction();
				4272	SDOperand Chain = Op.getOperand(0);
				4273	SDOperand Offset = Op.getOperand(1);
				4274	SDOperand Handler = Op.getOperand(2);
				4275
				4276	SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF),
				4277	getPointerTy());
				4278
				4279	SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame,
				4280	DAG.getConstant(-4UL, getPointerTy()));
				4281	StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset);
				4282	Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0);
				4283	Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr);
				4284	MF.addLiveOut(X86::ECX);
				4285
				4286	return DAG.getNode(X86ISD::EH_RETURN, MVT::Other,
				4287	Chain, DAG.getRegister(X86::ECX, getPointerTy()));
				4288	}
				4289
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4290	SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op,
				4291	SelectionDAG &DAG) {
				4292	SDOperand Root = Op.getOperand(0);
				4293	SDOperand Trmp = Op.getOperand(1); // trampoline
				4294	SDOperand FPtr = Op.getOperand(2); // nested function
				4295	SDOperand Nest = Op.getOperand(3); // 'nest' parameter value
				4296
				4297	SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4));
				4298
				4299	if (Subtarget->is64Bit()) {
				4300	return SDOperand(); // not yet supported
				4301	} else {
				4302	Function Func = (Function )
				4303	cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
				4304	unsigned CC = Func->getCallingConv();
				4305	unsigned char NestReg;
				4306
				4307	switch (CC) {
				4308	default:
				4309	assert(0 && "Unsupported calling convention");
				4310	case CallingConv::C:
				4311	case CallingConv::Fast:
				4312	case CallingConv::X86_StdCall: {
				4313	// Pass 'nest' parameter in ECX.
				4314	// Must be kept in sync with X86CallingConv.td
				4315	NestReg = N86::ECX;
				4316
				4317	// Check that ECX wasn't needed by an 'inreg' parameter.
				4318	const FunctionType *FTy = Func->getFunctionType();
				4319	const ParamAttrsList *Attrs = FTy->getParamAttrs();
				4320
				4321	if (Attrs && !Func->isVarArg()) {
				4322	unsigned InRegCount = 0;
				4323	unsigned Idx = 1;
				4324
				4325	for (FunctionType::param_iterator I = FTy->param_begin(),
				4326	E = FTy->param_end(); I != E; ++I, ++Idx)
				4327	if (Attrs->paramHasAttr(Idx, ParamAttr::InReg))
				4328	// FIXME: should only count parameters that are lowered to integers.
				4329	InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32;
				4330
				4331	if (InRegCount > 2) {
				4332	cerr << "Nest register in use - reduce number of inreg parameters!\n";
				4333	abort();
				4334	}
				4335	}
				4336	break;
				4337	}
				4338	case CallingConv::X86_FastCall:
				4339	// Pass 'nest' parameter in EAX.
				4340	// Must be kept in sync with X86CallingConv.td
				4341	NestReg = N86::EAX;
				4342	break;
				4343	}
				4344
				4345	SDOperand OutChains[4];
				4346	SDOperand Addr, Disp;
				4347
				4348	Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32));
				4349	Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr);
				4350
				4351	const unsigned char MOV32ri = 0xB8;
				4352	const unsigned char JMP = 0xE9;
				4353
				4354	OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri\|NestReg, MVT::i8),
				4355	Trmp, TrmpSV->getValue(), TrmpSV->getOffset());
				4356
				4357	Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32));
				4358	OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(),
				4359	TrmpSV->getOffset() + 1, false, 1);
				4360
				4361	Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32));
				4362	OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr,
				4363	TrmpSV->getValue() + 5, TrmpSV->getOffset());
				4364
				4365	Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32));
				4366	OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(),
				4367	TrmpSV->getOffset() + 6, false, 1);
				4368
				4369	return DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4);
				4370	}
				4371	}
				4372
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4373	/// LowerOperation - Provide custom lowering hooks for some operations.
				4374	///
				4375	SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
				4376	switch (Op.getOpcode()) {
				4377	default: assert(0 && "Should not custom lower this!");
				4378	case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
				4379	case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
				4380	case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
				4381	case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
				4382	case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
				4383	case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
				4384	case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
				4385	case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
				4386	case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
				4387	case ISD::SHL_PARTS:
				4388	case ISD::SRA_PARTS:
				4389	case ISD::SRL_PARTS: return LowerShift(Op, DAG);
				4390	case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
				4391	case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
				4392	case ISD::FABS: return LowerFABS(Op, DAG);
				4393	case ISD::FNEG: return LowerFNEG(Op, DAG);
				4394	case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
				4395	case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode());
				4396	case ISD::SELECT: return LowerSELECT(Op, DAG);
				4397	case ISD::BRCOND: return LowerBRCOND(Op, DAG);
				4398	case ISD::JumpTable: return LowerJumpTable(Op, DAG);
				4399	case ISD::CALL: return LowerCALL(Op, DAG);
				4400	case ISD::RET: return LowerRET(Op, DAG);
				4401	case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
				4402	case ISD::MEMSET: return LowerMEMSET(Op, DAG);
				4403	case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
				4404	case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG);
				4405	case ISD::VASTART: return LowerVASTART(Op, DAG);
				4406	case ISD::VACOPY: return LowerVACOPY(Op, DAG);
				4407	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
				4408	case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
				4409	case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
				4410	case ISD::FRAME_TO_ARGS_OFFSET:
				4411	return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
				4412	case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
				4413	case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4414	case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4415	}
				4416	return SDOperand();
				4417	}
				4418
				4419	const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
				4420	switch (Opcode) {
				4421	default: return NULL;
				4422	case X86ISD::SHLD: return "X86ISD::SHLD";
				4423	case X86ISD::SHRD: return "X86ISD::SHRD";
				4424	case X86ISD::FAND: return "X86ISD::FAND";
				4425	case X86ISD::FOR: return "X86ISD::FOR";
				4426	case X86ISD::FXOR: return "X86ISD::FXOR";
				4427	case X86ISD::FSRL: return "X86ISD::FSRL";
				4428	case X86ISD::FILD: return "X86ISD::FILD";
				4429	case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
				4430	case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
				4431	case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
				4432	case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
				4433	case X86ISD::FLD: return "X86ISD::FLD";
				4434	case X86ISD::FST: return "X86ISD::FST";
				4435	case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT";
				4436	case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT";
				4437	case X86ISD::CALL: return "X86ISD::CALL";
				4438	case X86ISD::TAILCALL: return "X86ISD::TAILCALL";
				4439	case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
				4440	case X86ISD::CMP: return "X86ISD::CMP";
				4441	case X86ISD::COMI: return "X86ISD::COMI";
				4442	case X86ISD::UCOMI: return "X86ISD::UCOMI";
				4443	case X86ISD::SETCC: return "X86ISD::SETCC";
				4444	case X86ISD::CMOV: return "X86ISD::CMOV";
				4445	case X86ISD::BRCOND: return "X86ISD::BRCOND";
				4446	case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
				4447	case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
				4448	case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4449	case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
				4450	case X86ISD::Wrapper: return "X86ISD::Wrapper";
				4451	case X86ISD::S2VEC: return "X86ISD::S2VEC";
				4452	case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
				4453	case X86ISD::PINSRW: return "X86ISD::PINSRW";
				4454	case X86ISD::FMAX: return "X86ISD::FMAX";
				4455	case X86ISD::FMIN: return "X86ISD::FMIN";
				4456	case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
				4457	case X86ISD::FRCP: return "X86ISD::FRCP";
				4458	case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
				4459	case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER";
				4460	case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
				4461	}
				4462	}
				4463
				4464	// isLegalAddressingMode - Return true if the addressing mode represented
				4465	// by AM is legal for this target, for a load/store of the specified type.
				4466	bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
				4467	const Type *Ty) const {
				4468	// X86 supports extremely general addressing modes.
				4469
				4470	// X86 allows a sign-extended 32-bit immediate field as a displacement.
				4471	if (AM.BaseOffs <= -(1LL << 32) \|\| AM.BaseOffs >= (1LL << 32)-1)
				4472	return false;
				4473
				4474	if (AM.BaseGV) {
Evan Cheng	6a1f3f1	2007-08-01 23:46:47 +0000	[diff] [blame]	4475	// We can only fold this if we don't need an extra load.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4476	if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false))
				4477	return false;
Evan Cheng	6a1f3f1	2007-08-01 23:46:47 +0000	[diff] [blame]	4478
				4479	// X86-64 only supports addr of globals in small code model.
				4480	if (Subtarget->is64Bit()) {
				4481	if (getTargetMachine().getCodeModel() != CodeModel::Small)
				4482	return false;
				4483	// If lower 4G is not available, then we must use rip-relative addressing.
				4484	if (AM.BaseOffs \|\| AM.Scale > 1)
				4485	return false;
				4486	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4487	}
				4488
				4489	switch (AM.Scale) {
				4490	case 0:
				4491	case 1:
				4492	case 2:
				4493	case 4:
				4494	case 8:
				4495	// These scales always work.
				4496	break;
				4497	case 3:
				4498	case 5:
				4499	case 9:
				4500	// These scales are formed with basereg+scalereg. Only accept if there is
				4501	// no basereg yet.
				4502	if (AM.HasBaseReg)
				4503	return false;
				4504	break;
				4505	default: // Other stuff never works.
				4506	return false;
				4507	}
				4508
				4509	return true;
				4510	}
				4511
				4512
				4513	/// isShuffleMaskLegal - Targets can use this to indicate that they only
				4514	/// support some VECTOR_SHUFFLE operations, those with specific masks.
				4515	/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
				4516	/// are assumed to be legal.
				4517	bool
				4518	X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
				4519	// Only do shuffles on 128-bit vector types for now.
				4520	if (MVT::getSizeInBits(VT) == 64) return false;
				4521	return (Mask.Val->getNumOperands() <= 4 \|\|
				4522	isIdentityMask(Mask.Val) \|\|
				4523	isIdentityMask(Mask.Val, true) \|\|
				4524	isSplatMask(Mask.Val) \|\|
				4525	isPSHUFHW_PSHUFLWMask(Mask.Val) \|\|
				4526	X86::isUNPCKLMask(Mask.Val) \|\|
				4527	X86::isUNPCKHMask(Mask.Val) \|\|
				4528	X86::isUNPCKL_v_undef_Mask(Mask.Val) \|\|
				4529	X86::isUNPCKH_v_undef_Mask(Mask.Val));
				4530	}
				4531
				4532	bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
				4533	MVT::ValueType EVT,
				4534	SelectionDAG &DAG) const {
				4535	unsigned NumElts = BVOps.size();
				4536	// Only do shuffles on 128-bit vector types for now.
				4537	if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
				4538	if (NumElts == 2) return true;
				4539	if (NumElts == 4) {
				4540	return (isMOVLMask(&BVOps[0], 4) \|\|
				4541	isCommutedMOVL(&BVOps[0], 4, true) \|\|
				4542	isSHUFPMask(&BVOps[0], 4) \|\|
				4543	isCommutedSHUFP(&BVOps[0], 4));
				4544	}
				4545	return false;
				4546	}
				4547
				4548	//===----------------------------------------------------------------------===//
				4549	// X86 Scheduler Hooks
				4550	//===----------------------------------------------------------------------===//
				4551
				4552	MachineBasicBlock *
				4553	X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
				4554	MachineBasicBlock *BB) {
				4555	const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
				4556	switch (MI->getOpcode()) {
				4557	default: assert(false && "Unexpected instr type to insert");
				4558	case X86::CMOV_FR32:
				4559	case X86::CMOV_FR64:
				4560	case X86::CMOV_V4F32:
				4561	case X86::CMOV_V2F64:
				4562	case X86::CMOV_V2I64: {
				4563	// To "insert" a SELECT_CC instruction, we actually have to insert the
				4564	// diamond control-flow pattern. The incoming instruction knows the
				4565	// destination vreg to set, the condition code register to branch on, the
				4566	// true/false values to select between, and a branch opcode to use.
				4567	const BasicBlock *LLVM_BB = BB->getBasicBlock();
				4568	ilist<MachineBasicBlock>::iterator It = BB;
				4569	++It;
				4570
				4571	// thisMBB:
				4572	// ...
				4573	// TrueVal = ...
				4574	// cmpTY ccX, r1, r2
				4575	// bCC copy1MBB
				4576	// fallthrough --> copy0MBB
				4577	MachineBasicBlock *thisMBB = BB;
				4578	MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
				4579	MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
				4580	unsigned Opc =
				4581	X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
				4582	BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB);
				4583	MachineFunction *F = BB->getParent();
				4584	F->getBasicBlockList().insert(It, copy0MBB);
				4585	F->getBasicBlockList().insert(It, sinkMBB);
				4586	// Update machine-CFG edges by first adding all successors of the current
				4587	// block to the new block which will contain the Phi node for the select.
				4588	for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
				4589	e = BB->succ_end(); i != e; ++i)
				4590	sinkMBB->addSuccessor(*i);
				4591	// Next, remove all successors of the current block, and add the true
				4592	// and fallthrough blocks as its successors.
				4593	while(!BB->succ_empty())
				4594	BB->removeSuccessor(BB->succ_begin());
				4595	BB->addSuccessor(copy0MBB);
				4596	BB->addSuccessor(sinkMBB);
				4597
				4598	// copy0MBB:
				4599	// %FalseValue = ...
				4600	// # fallthrough to sinkMBB
				4601	BB = copy0MBB;
				4602
				4603	// Update machine-CFG edges
				4604	BB->addSuccessor(sinkMBB);
				4605
				4606	// sinkMBB:
				4607	// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
				4608	// ...
				4609	BB = sinkMBB;
				4610	BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg())
				4611	.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
				4612	.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
				4613
				4614	delete MI; // The pseudo instruction is gone now.
				4615	return BB;
				4616	}
				4617
				4618	case X86::FP32_TO_INT16_IN_MEM:
				4619	case X86::FP32_TO_INT32_IN_MEM:
				4620	case X86::FP32_TO_INT64_IN_MEM:
				4621	case X86::FP64_TO_INT16_IN_MEM:
				4622	case X86::FP64_TO_INT32_IN_MEM:
Dale Johannesen	6d0e36a	2007-08-07 01:17:37 +0000	[diff] [blame]	4623	case X86::FP64_TO_INT64_IN_MEM:
				4624	case X86::FP80_TO_INT16_IN_MEM:
				4625	case X86::FP80_TO_INT32_IN_MEM:
				4626	case X86::FP80_TO_INT64_IN_MEM: {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4627	// Change the floating point control register to use "round towards zero"
				4628	// mode when truncating to an integer value.
				4629	MachineFunction *F = BB->getParent();
				4630	int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
				4631	addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx);
				4632
				4633	// Load the old value of the high byte of the control word...
				4634	unsigned OldCW =
				4635	F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
				4636	addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx);
				4637
				4638	// Set the high part to be round to zero...
				4639	addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx)
				4640	.addImm(0xC7F);
				4641
				4642	// Reload the modified control word now...
				4643	addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
				4644
				4645	// Restore the memory image of control word to original value
				4646	addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx)
				4647	.addReg(OldCW);
				4648
				4649	// Get the X86 opcode to use.
				4650	unsigned Opc;
				4651	switch (MI->getOpcode()) {
				4652	default: assert(0 && "illegal opcode!");
				4653	case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
				4654	case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
				4655	case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
				4656	case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
				4657	case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
				4658	case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
Dale Johannesen	6d0e36a	2007-08-07 01:17:37 +0000	[diff] [blame]	4659	case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
				4660	case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
				4661	case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4662	}
				4663
				4664	X86AddressMode AM;
				4665	MachineOperand &Op = MI->getOperand(0);
				4666	if (Op.isRegister()) {
				4667	AM.BaseType = X86AddressMode::RegBase;
				4668	AM.Base.Reg = Op.getReg();
				4669	} else {
				4670	AM.BaseType = X86AddressMode::FrameIndexBase;
				4671	AM.Base.FrameIndex = Op.getFrameIndex();
				4672	}
				4673	Op = MI->getOperand(1);
				4674	if (Op.isImmediate())
				4675	AM.Scale = Op.getImm();
				4676	Op = MI->getOperand(2);
				4677	if (Op.isImmediate())
				4678	AM.IndexReg = Op.getImm();
				4679	Op = MI->getOperand(3);
				4680	if (Op.isGlobalAddress()) {
				4681	AM.GV = Op.getGlobal();
				4682	} else {
				4683	AM.Disp = Op.getImm();
				4684	}
				4685	addFullAddress(BuildMI(BB, TII->get(Opc)), AM)
				4686	.addReg(MI->getOperand(4).getReg());
				4687
				4688	// Reload the original control word now.
				4689	addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
				4690
				4691	delete MI; // The pseudo instruction is gone now.
				4692	return BB;
				4693	}
				4694	}
				4695	}
				4696
				4697	//===----------------------------------------------------------------------===//
				4698	// X86 Optimization Hooks
				4699	//===----------------------------------------------------------------------===//
				4700
				4701	void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
				4702	uint64_t Mask,
				4703	uint64_t &KnownZero,
				4704	uint64_t &KnownOne,
				4705	const SelectionDAG &DAG,
				4706	unsigned Depth) const {
				4707	unsigned Opc = Op.getOpcode();
				4708	assert((Opc >= ISD::BUILTIN_OP_END \|\|
				4709	Opc == ISD::INTRINSIC_WO_CHAIN \|\|
				4710	Opc == ISD::INTRINSIC_W_CHAIN \|\|
				4711	Opc == ISD::INTRINSIC_VOID) &&
				4712	"Should use MaskedValueIsZero if you don't know whether Op"
				4713	" is a target node!");
				4714
				4715	KnownZero = KnownOne = 0; // Don't know anything.
				4716	switch (Opc) {
				4717	default: break;
				4718	case X86ISD::SETCC:
				4719	KnownZero \|= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
				4720	break;
				4721	}
				4722	}
				4723
				4724	/// getShuffleScalarElt - Returns the scalar element that will make up the ith
				4725	/// element of the result of the vector shuffle.
				4726	static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
				4727	MVT::ValueType VT = N->getValueType(0);
				4728	SDOperand PermMask = N->getOperand(2);
				4729	unsigned NumElems = PermMask.getNumOperands();
				4730	SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
				4731	i %= NumElems;
				4732	if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
				4733	return (i == 0)
				4734	? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
				4735	} else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
				4736	SDOperand Idx = PermMask.getOperand(i);
				4737	if (Idx.getOpcode() == ISD::UNDEF)
				4738	return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
				4739	return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
				4740	}
				4741	return SDOperand();
				4742	}
				4743
				4744	/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
				4745	/// node is a GlobalAddress + an offset.
				4746	static bool isGAPlusOffset(SDNode N, GlobalValue &GA, int64_t &Offset) {
				4747	unsigned Opc = N->getOpcode();
				4748	if (Opc == X86ISD::Wrapper) {
				4749	if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) {
				4750	GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
				4751	return true;
				4752	}
				4753	} else if (Opc == ISD::ADD) {
				4754	SDOperand N1 = N->getOperand(0);
				4755	SDOperand N2 = N->getOperand(1);
				4756	if (isGAPlusOffset(N1.Val, GA, Offset)) {
				4757	ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
				4758	if (V) {
				4759	Offset += V->getSignExtended();
				4760	return true;
				4761	}
				4762	} else if (isGAPlusOffset(N2.Val, GA, Offset)) {
				4763	ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
				4764	if (V) {
				4765	Offset += V->getSignExtended();
				4766	return true;
				4767	}
				4768	}
				4769	}
				4770	return false;
				4771	}
				4772
				4773	/// isConsecutiveLoad - Returns true if N is loading from an address of Base
				4774	/// + Dist * Size.
				4775	static bool isConsecutiveLoad(SDNode N, SDNode Base, int Dist, int Size,
				4776	MachineFrameInfo *MFI) {
				4777	if (N->getOperand(0).Val != Base->getOperand(0).Val)
				4778	return false;
				4779
				4780	SDOperand Loc = N->getOperand(1);
				4781	SDOperand BaseLoc = Base->getOperand(1);
				4782	if (Loc.getOpcode() == ISD::FrameIndex) {
				4783	if (BaseLoc.getOpcode() != ISD::FrameIndex)
				4784	return false;
Dan Gohman	53491e9	2007-07-23 20:24:29 +0000	[diff] [blame]	4785	int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
				4786	int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4787	int FS = MFI->getObjectSize(FI);
				4788	int BFS = MFI->getObjectSize(BFI);
				4789	if (FS != BFS \|\| FS != Size) return false;
				4790	return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size);
				4791	} else {
				4792	GlobalValue *GV1 = NULL;
				4793	GlobalValue *GV2 = NULL;
				4794	int64_t Offset1 = 0;
				4795	int64_t Offset2 = 0;
				4796	bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1);
				4797	bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2);
				4798	if (isGA1 && isGA2 && GV1 == GV2)
				4799	return Offset1 == (Offset2 + Dist*Size);
				4800	}
				4801
				4802	return false;
				4803	}
				4804
				4805	static bool isBaseAlignment16(SDNode Base, MachineFrameInfo MFI,
				4806	const X86Subtarget *Subtarget) {
				4807	GlobalValue *GV;
				4808	int64_t Offset;
				4809	if (isGAPlusOffset(Base, GV, Offset))
				4810	return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
				4811	else {
				4812	assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!");
Dan Gohman	53491e9	2007-07-23 20:24:29 +0000	[diff] [blame]	4813	int BFI = cast<FrameIndexSDNode>(Base)->getIndex();
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4814	if (BFI < 0)
				4815	// Fixed objects do not specify alignment, however the offsets are known.
				4816	return ((Subtarget->getStackAlignment() % 16) == 0 &&
				4817	(MFI->getObjectOffset(BFI) % 16) == 0);
				4818	else
				4819	return MFI->getObjectAlignment(BFI) >= 16;
				4820	}
				4821	return false;
				4822	}
				4823
				4824
				4825	/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
				4826	/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
				4827	/// if the load addresses are consecutive, non-overlapping, and in the right
				4828	/// order.
				4829	static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
				4830	const X86Subtarget *Subtarget) {
				4831	MachineFunction &MF = DAG.getMachineFunction();
				4832	MachineFrameInfo *MFI = MF.getFrameInfo();
				4833	MVT::ValueType VT = N->getValueType(0);
				4834	MVT::ValueType EVT = MVT::getVectorElementType(VT);
				4835	SDOperand PermMask = N->getOperand(2);
				4836	int NumElems = (int)PermMask.getNumOperands();
				4837	SDNode *Base = NULL;
				4838	for (int i = 0; i < NumElems; ++i) {
				4839	SDOperand Idx = PermMask.getOperand(i);
				4840	if (Idx.getOpcode() == ISD::UNDEF) {
				4841	if (!Base) return SDOperand();
				4842	} else {
				4843	SDOperand Arg =
				4844	getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG);
				4845	if (!Arg.Val \|\| !ISD::isNON_EXTLoad(Arg.Val))
				4846	return SDOperand();
				4847	if (!Base)
				4848	Base = Arg.Val;
				4849	else if (!isConsecutiveLoad(Arg.Val, Base,
				4850	i, MVT::getSizeInBits(EVT)/8,MFI))
				4851	return SDOperand();
				4852	}
				4853	}
				4854
				4855	bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	4856	LoadSDNode *LD = cast<LoadSDNode>(Base);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4857	if (isAlign16) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4858	return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	4859	LD->getSrcValueOffset(), LD->isVolatile());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4860	} else {
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	4861	return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
				4862	LD->getSrcValueOffset(), LD->isVolatile(),
				4863	LD->getAlignment());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4864	}
				4865	}
				4866
				4867	/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
				4868	static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
				4869	const X86Subtarget *Subtarget) {
				4870	SDOperand Cond = N->getOperand(0);
				4871
				4872	// If we have SSE[12] support, try to form min/max nodes.
				4873	if (Subtarget->hasSSE2() &&
				4874	(N->getValueType(0) == MVT::f32 \|\| N->getValueType(0) == MVT::f64)) {
				4875	if (Cond.getOpcode() == ISD::SETCC) {
				4876	// Get the LHS/RHS of the select.
				4877	SDOperand LHS = N->getOperand(1);
				4878	SDOperand RHS = N->getOperand(2);
				4879	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
				4880
				4881	unsigned Opcode = 0;
				4882	if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
				4883	switch (CC) {
				4884	default: break;
				4885	case ISD::SETOLE: // (X <= Y) ? X : Y -> min
				4886	case ISD::SETULE:
				4887	case ISD::SETLE:
				4888	if (!UnsafeFPMath) break;
				4889	// FALL THROUGH.
				4890	case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min
				4891	case ISD::SETLT:
				4892	Opcode = X86ISD::FMIN;
				4893	break;
				4894
				4895	case ISD::SETOGT: // (X > Y) ? X : Y -> max
				4896	case ISD::SETUGT:
				4897	case ISD::SETGT:
				4898	if (!UnsafeFPMath) break;
				4899	// FALL THROUGH.
				4900	case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max
				4901	case ISD::SETGE:
				4902	Opcode = X86ISD::FMAX;
				4903	break;
				4904	}
				4905	} else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
				4906	switch (CC) {
				4907	default: break;
				4908	case ISD::SETOGT: // (X > Y) ? Y : X -> min
				4909	case ISD::SETUGT:
				4910	case ISD::SETGT:
				4911	if (!UnsafeFPMath) break;
				4912	// FALL THROUGH.
				4913	case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min
				4914	case ISD::SETGE:
				4915	Opcode = X86ISD::FMIN;
				4916	break;
				4917
				4918	case ISD::SETOLE: // (X <= Y) ? Y : X -> max
				4919	case ISD::SETULE:
				4920	case ISD::SETLE:
				4921	if (!UnsafeFPMath) break;
				4922	// FALL THROUGH.
				4923	case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max
				4924	case ISD::SETLT:
				4925	Opcode = X86ISD::FMAX;
				4926	break;
				4927	}
				4928	}
				4929
				4930	if (Opcode)
				4931	return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS);
				4932	}
				4933
				4934	}
				4935
				4936	return SDOperand();
				4937	}
				4938
				4939
				4940	SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
				4941	DAGCombinerInfo &DCI) const {
				4942	SelectionDAG &DAG = DCI.DAG;
				4943	switch (N->getOpcode()) {
				4944	default: break;
				4945	case ISD::VECTOR_SHUFFLE:
				4946	return PerformShuffleCombine(N, DAG, Subtarget);
				4947	case ISD::SELECT:
				4948	return PerformSELECTCombine(N, DAG, Subtarget);
				4949	}
				4950
				4951	return SDOperand();
				4952	}
				4953
				4954	//===----------------------------------------------------------------------===//
				4955	// X86 Inline Assembly Support
				4956	//===----------------------------------------------------------------------===//
				4957
				4958	/// getConstraintType - Given a constraint letter, return the type of
				4959	/// constraint it is for this target.
				4960	X86TargetLowering::ConstraintType
				4961	X86TargetLowering::getConstraintType(const std::string &Constraint) const {
				4962	if (Constraint.size() == 1) {
				4963	switch (Constraint[0]) {
				4964	case 'A':
				4965	case 'r':
				4966	case 'R':
				4967	case 'l':
				4968	case 'q':
				4969	case 'Q':
				4970	case 'x':
				4971	case 'Y':
				4972	return C_RegisterClass;
				4973	default:
				4974	break;
				4975	}
				4976	}
				4977	return TargetLowering::getConstraintType(Constraint);
				4978	}
				4979
				4980	/// isOperandValidForConstraint - Return the specified operand (possibly
				4981	/// modified) if the specified SDOperand is valid for the specified target
				4982	/// constraint letter, otherwise return null.
				4983	SDOperand X86TargetLowering::
				4984	isOperandValidForConstraint(SDOperand Op, char Constraint, SelectionDAG &DAG) {
				4985	switch (Constraint) {
				4986	default: break;
				4987	case 'I':
				4988	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
				4989	if (C->getValue() <= 31)
				4990	return DAG.getTargetConstant(C->getValue(), Op.getValueType());
				4991	}
				4992	return SDOperand(0,0);
				4993	case 'N':
				4994	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
				4995	if (C->getValue() <= 255)
				4996	return DAG.getTargetConstant(C->getValue(), Op.getValueType());
				4997	}
				4998	return SDOperand(0,0);
				4999	case 'i': {
				5000	// Literal immediates are always ok.
				5001	if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op))
				5002	return DAG.getTargetConstant(CST->getValue(), Op.getValueType());
				5003
				5004	// If we are in non-pic codegen mode, we allow the address of a global (with
				5005	// an optional displacement) to be used with 'i'.
				5006	GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
				5007	int64_t Offset = 0;
				5008
				5009	// Match either (GA) or (GA+C)
				5010	if (GA) {
				5011	Offset = GA->getOffset();
				5012	} else if (Op.getOpcode() == ISD::ADD) {
				5013	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
				5014	GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
				5015	if (C && GA) {
				5016	Offset = GA->getOffset()+C->getValue();
				5017	} else {
				5018	C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
				5019	GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
				5020	if (C && GA)
				5021	Offset = GA->getOffset()+C->getValue();
				5022	else
				5023	C = 0, GA = 0;
				5024	}
				5025	}
				5026
				5027	if (GA) {
				5028	// If addressing this global requires a load (e.g. in PIC mode), we can't
				5029	// match.
				5030	if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(),
				5031	false))
				5032	return SDOperand(0, 0);
				5033
				5034	Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
				5035	Offset);
				5036	return Op;
				5037	}
				5038
				5039	// Otherwise, not valid for this mode.
				5040	return SDOperand(0, 0);
				5041	}
				5042	}
				5043	return TargetLowering::isOperandValidForConstraint(Op, Constraint, DAG);
				5044	}
				5045
				5046	std::vector<unsigned> X86TargetLowering::
				5047	getRegClassForInlineAsmConstraint(const std::string &Constraint,
				5048	MVT::ValueType VT) const {
				5049	if (Constraint.size() == 1) {
				5050	// FIXME: not handling fp-stack yet!
				5051	switch (Constraint[0]) { // GCC X86 Constraint Letters
				5052	default: break; // Unknown constraint letter
				5053	case 'A': // EAX/EDX
				5054	if (VT == MVT::i32 \|\| VT == MVT::i64)
				5055	return make_vector<unsigned>(X86::EAX, X86::EDX, 0);
				5056	break;
				5057	case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode)
				5058	case 'Q': // Q_REGS
				5059	if (VT == MVT::i32)
				5060	return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
				5061	else if (VT == MVT::i16)
				5062	return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
				5063	else if (VT == MVT::i8)
Evan Cheng	f85c10f	2007-08-13 23:27:11 +0000	[diff] [blame^]	5064	return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5065	break;
				5066	}
				5067	}
				5068
				5069	return std::vector<unsigned>();
				5070	}
				5071
				5072	std::pair<unsigned, const TargetRegisterClass*>
				5073	X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
				5074	MVT::ValueType VT) const {
				5075	// First, see if this is a constraint that directly corresponds to an LLVM
				5076	// register class.
				5077	if (Constraint.size() == 1) {
				5078	// GCC Constraint Letters
				5079	switch (Constraint[0]) {
				5080	default: break;
				5081	case 'r': // GENERAL_REGS
				5082	case 'R': // LEGACY_REGS
				5083	case 'l': // INDEX_REGS
				5084	if (VT == MVT::i64 && Subtarget->is64Bit())
				5085	return std::make_pair(0U, X86::GR64RegisterClass);
				5086	if (VT == MVT::i32)
				5087	return std::make_pair(0U, X86::GR32RegisterClass);
				5088	else if (VT == MVT::i16)
				5089	return std::make_pair(0U, X86::GR16RegisterClass);
				5090	else if (VT == MVT::i8)
				5091	return std::make_pair(0U, X86::GR8RegisterClass);
				5092	break;
				5093	case 'y': // MMX_REGS if MMX allowed.
				5094	if (!Subtarget->hasMMX()) break;
				5095	return std::make_pair(0U, X86::VR64RegisterClass);
				5096	break;
				5097	case 'Y': // SSE_REGS if SSE2 allowed
				5098	if (!Subtarget->hasSSE2()) break;
				5099	// FALL THROUGH.
				5100	case 'x': // SSE_REGS if SSE1 allowed
				5101	if (!Subtarget->hasSSE1()) break;
				5102
				5103	switch (VT) {
				5104	default: break;
				5105	// Scalar SSE types.
				5106	case MVT::f32:
				5107	case MVT::i32:
				5108	return std::make_pair(0U, X86::FR32RegisterClass);
				5109	case MVT::f64:
				5110	case MVT::i64:
				5111	return std::make_pair(0U, X86::FR64RegisterClass);
				5112	// Vector types.
				5113	case MVT::v16i8:
				5114	case MVT::v8i16:
				5115	case MVT::v4i32:
				5116	case MVT::v2i64:
				5117	case MVT::v4f32:
				5118	case MVT::v2f64:
				5119	return std::make_pair(0U, X86::VR128RegisterClass);
				5120	}
				5121	break;
				5122	}
				5123	}
				5124
				5125	// Use the default implementation in TargetLowering to convert the register
				5126	// constraint into a member of a register class.
				5127	std::pair<unsigned, const TargetRegisterClass*> Res;
				5128	Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
				5129
				5130	// Not found as a standard register?
				5131	if (Res.second == 0) {
				5132	// GCC calls "st(0)" just plain "st".
				5133	if (StringsEqualNoCase("{st}", Constraint)) {
				5134	Res.first = X86::ST0;
				5135	Res.second = X86::RSTRegisterClass;
				5136	}
				5137
				5138	return Res;
				5139	}
				5140
				5141	// Otherwise, check to see if this is a register class of the wrong value
				5142	// type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
				5143	// turn into {ax},{dx}.
				5144	if (Res.second->hasType(VT))
				5145	return Res; // Correct type already, nothing to do.
				5146
				5147	// All of the single-register GCC register classes map their values onto
				5148	// 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we
				5149	// really want an 8-bit or 32-bit register, map to the appropriate register
				5150	// class and return the appropriate register.
				5151	if (Res.second != X86::GR16RegisterClass)
				5152	return Res;
				5153
				5154	if (VT == MVT::i8) {
				5155	unsigned DestReg = 0;
				5156	switch (Res.first) {
				5157	default: break;
				5158	case X86::AX: DestReg = X86::AL; break;
				5159	case X86::DX: DestReg = X86::DL; break;
				5160	case X86::CX: DestReg = X86::CL; break;
				5161	case X86::BX: DestReg = X86::BL; break;
				5162	}
				5163	if (DestReg) {
				5164	Res.first = DestReg;
				5165	Res.second = Res.second = X86::GR8RegisterClass;
				5166	}
				5167	} else if (VT == MVT::i32) {
				5168	unsigned DestReg = 0;
				5169	switch (Res.first) {
				5170	default: break;
				5171	case X86::AX: DestReg = X86::EAX; break;
				5172	case X86::DX: DestReg = X86::EDX; break;
				5173	case X86::CX: DestReg = X86::ECX; break;
				5174	case X86::BX: DestReg = X86::EBX; break;
				5175	case X86::SI: DestReg = X86::ESI; break;
				5176	case X86::DI: DestReg = X86::EDI; break;
				5177	case X86::BP: DestReg = X86::EBP; break;
				5178	case X86::SP: DestReg = X86::ESP; break;
				5179	}
				5180	if (DestReg) {
				5181	Res.first = DestReg;
				5182	Res.second = Res.second = X86::GR32RegisterClass;
				5183	}
				5184	} else if (VT == MVT::i64) {
				5185	unsigned DestReg = 0;
				5186	switch (Res.first) {
				5187	default: break;
				5188	case X86::AX: DestReg = X86::RAX; break;
				5189	case X86::DX: DestReg = X86::RDX; break;
				5190	case X86::CX: DestReg = X86::RCX; break;
				5191	case X86::BX: DestReg = X86::RBX; break;
				5192	case X86::SI: DestReg = X86::RSI; break;
				5193	case X86::DI: DestReg = X86::RDI; break;
				5194	case X86::BP: DestReg = X86::RBP; break;
				5195	case X86::SP: DestReg = X86::RSP; break;
				5196	}
				5197	if (DestReg) {
				5198	Res.first = DestReg;
				5199	Res.second = Res.second = X86::GR64RegisterClass;
				5200	}
				5201	}
				5202
				5203	return Res;
				5204	}