Blame - lib/Target/X86/X86ISelLowering.cpp - fp2-dev/platform/external/llvm

blob: 23f9e9500c2dde606a1cc11adde7835c8aaf3e29 [file] [log] [blame]

Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1	//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by Chris Lattner and is distributed under
				6	// the University of Illinois Open Source License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file defines the interfaces that X86 uses to lower LLVM code into a
				11	// selection DAG.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "X86.h"
				16	#include "X86InstrBuilder.h"
				17	#include "X86ISelLowering.h"
				18	#include "X86MachineFunctionInfo.h"
				19	#include "X86TargetMachine.h"
				20	#include "llvm/CallingConv.h"
				21	#include "llvm/Constants.h"
				22	#include "llvm/DerivedTypes.h"
				23	#include "llvm/GlobalVariable.h"
				24	#include "llvm/Function.h"
				25	#include "llvm/Intrinsics.h"
				26	#include "llvm/ADT/VectorExtras.h"
				27	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
				28	#include "llvm/CodeGen/CallingConvLower.h"
				29	#include "llvm/CodeGen/MachineFrameInfo.h"
				30	#include "llvm/CodeGen/MachineFunction.h"
				31	#include "llvm/CodeGen/MachineInstrBuilder.h"
				32	#include "llvm/CodeGen/SelectionDAG.h"
				33	#include "llvm/CodeGen/SSARegMap.h"
				34	#include "llvm/Support/MathExtras.h"
				35	#include "llvm/Target/TargetOptions.h"
				36	#include "llvm/ADT/StringExtras.h"
				37	using namespace llvm;
				38
				39	X86TargetLowering::X86TargetLowering(TargetMachine &TM)
				40	: TargetLowering(TM) {
				41	Subtarget = &TM.getSubtarget<X86Subtarget>();
				42	X86ScalarSSE = Subtarget->hasSSE2();
				43	X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
				44
				45	RegInfo = TM.getRegisterInfo();
				46
				47	// Set up the TargetLowering object.
				48
				49	// X86 is weird, it always uses i8 for shift amounts and setcc results.
				50	setShiftAmountType(MVT::i8);
				51	setSetCCResultType(MVT::i8);
				52	setSetCCResultContents(ZeroOrOneSetCCResult);
				53	setSchedulingPreference(SchedulingForRegPressure);
				54	setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0
				55	setStackPointerRegisterToSaveRestore(X86StackPtr);
				56
				57	if (Subtarget->isTargetDarwin()) {
				58	// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
				59	setUseUnderscoreSetJmp(false);
				60	setUseUnderscoreLongJmp(false);
				61	} else if (Subtarget->isTargetMingw()) {
				62	// MS runtime is weird: it exports _setjmp, but longjmp!
				63	setUseUnderscoreSetJmp(true);
				64	setUseUnderscoreLongJmp(false);
				65	} else {
				66	setUseUnderscoreSetJmp(true);
				67	setUseUnderscoreLongJmp(true);
				68	}
				69
				70	// Set up the register classes.
				71	addRegisterClass(MVT::i8, X86::GR8RegisterClass);
				72	addRegisterClass(MVT::i16, X86::GR16RegisterClass);
				73	addRegisterClass(MVT::i32, X86::GR32RegisterClass);
				74	if (Subtarget->is64Bit())
				75	addRegisterClass(MVT::i64, X86::GR64RegisterClass);
				76
				77	setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand);
				78
				79	// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
				80	// operation.
				81	setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
				82	setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
				83	setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
				84
				85	if (Subtarget->is64Bit()) {
				86	setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
				87	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
				88	} else {
				89	if (X86ScalarSSE)
				90	// If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
				91	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
				92	else
				93	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
				94	}
				95
				96	// Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
				97	// this operation.
				98	setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
				99	setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
				100	// SSE has no i16 to fp conversion, only i32
				101	if (X86ScalarSSE)
				102	setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
				103	else {
				104	setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
				105	setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
				106	}
				107
				108	if (!Subtarget->is64Bit()) {
				109	// Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode.
				110	setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
				111	setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
				112	}
				113
				114	// Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
				115	// this operation.
				116	setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
				117	setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
				118
				119	if (X86ScalarSSE) {
				120	setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
				121	} else {
				122	setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
				123	setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
				124	}
				125
				126	// Handle FP_TO_UINT by promoting the destination to a larger signed
				127	// conversion.
				128	setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
				129	setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
				130	setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
				131
				132	if (Subtarget->is64Bit()) {
				133	setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
				134	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
				135	} else {
				136	if (X86ScalarSSE && !Subtarget->hasSSE3())
				137	// Expand FP_TO_UINT into a select.
				138	// FIXME: We would like to use a Custom expander here eventually to do
				139	// the optimal thing for SSE vs. the default expansion in the legalizer.
				140	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
				141	else
				142	// With SSE3 we can use fisttpll to convert to a signed i64.
				143	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
				144	}
				145
				146	// TODO: when we have SSE, these could be more efficient, by using movd/movq.
				147	if (!X86ScalarSSE) {
				148	setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
				149	setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
				150	}
				151
				152	setOperationAction(ISD::BR_JT , MVT::Other, Expand);
				153	setOperationAction(ISD::BRCOND , MVT::Other, Custom);
				154	setOperationAction(ISD::BR_CC , MVT::Other, Expand);
				155	setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
				156	setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
				157	if (Subtarget->is64Bit())
				158	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
				159	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand);
				160	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
				161	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
				162	setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
				163	setOperationAction(ISD::FREM , MVT::f64 , Expand);
				164
				165	setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
				166	setOperationAction(ISD::CTTZ , MVT::i8 , Expand);
				167	setOperationAction(ISD::CTLZ , MVT::i8 , Expand);
				168	setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
				169	setOperationAction(ISD::CTTZ , MVT::i16 , Expand);
				170	setOperationAction(ISD::CTLZ , MVT::i16 , Expand);
				171	setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
				172	setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
				173	setOperationAction(ISD::CTLZ , MVT::i32 , Expand);
				174	if (Subtarget->is64Bit()) {
				175	setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
				176	setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
				177	setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
				178	}
				179
				180	setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
				181	setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
				182
				183	// These should be promoted to a larger select which is supported.
				184	setOperationAction(ISD::SELECT , MVT::i1 , Promote);
				185	setOperationAction(ISD::SELECT , MVT::i8 , Promote);
				186	// X86 wants to expand cmov itself.
				187	setOperationAction(ISD::SELECT , MVT::i16 , Custom);
				188	setOperationAction(ISD::SELECT , MVT::i32 , Custom);
				189	setOperationAction(ISD::SELECT , MVT::f32 , Custom);
				190	setOperationAction(ISD::SELECT , MVT::f64 , Custom);
				191	setOperationAction(ISD::SETCC , MVT::i8 , Custom);
				192	setOperationAction(ISD::SETCC , MVT::i16 , Custom);
				193	setOperationAction(ISD::SETCC , MVT::i32 , Custom);
				194	setOperationAction(ISD::SETCC , MVT::f32 , Custom);
				195	setOperationAction(ISD::SETCC , MVT::f64 , Custom);
				196	if (Subtarget->is64Bit()) {
				197	setOperationAction(ISD::SELECT , MVT::i64 , Custom);
				198	setOperationAction(ISD::SETCC , MVT::i64 , Custom);
				199	}
				200	// X86 ret instruction may pop stack.
				201	setOperationAction(ISD::RET , MVT::Other, Custom);
				202	if (!Subtarget->is64Bit())
				203	setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
				204
				205	// Darwin ABI issue.
				206	setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
				207	setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
				208	setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
				209	setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
				210	setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
				211	if (Subtarget->is64Bit()) {
				212	setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
				213	setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
				214	setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
				215	setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
				216	}
				217	// 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
				218	setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
				219	setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
				220	setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
				221	// X86 wants to expand memset / memcpy itself.
				222	setOperationAction(ISD::MEMSET , MVT::Other, Custom);
				223	setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
				224
				225	// We don't have line number support yet.
				226	setOperationAction(ISD::LOCATION, MVT::Other, Expand);
				227	setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
				228	// FIXME - use subtarget debug flags
				229	if (!Subtarget->isTargetDarwin() &&
				230	!Subtarget->isTargetELF() &&
				231	!Subtarget->isTargetCygMing())
				232	setOperationAction(ISD::LABEL, MVT::Other, Expand);
				233
				234	setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
				235	setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
				236	setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
				237	setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
				238	if (Subtarget->is64Bit()) {
				239	// FIXME: Verify
				240	setExceptionPointerRegister(X86::RAX);
				241	setExceptionSelectorRegister(X86::RDX);
				242	} else {
				243	setExceptionPointerRegister(X86::EAX);
				244	setExceptionSelectorRegister(X86::EDX);
				245	}
				246
				247	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
				248	setOperationAction(ISD::VASTART , MVT::Other, Custom);
				249	setOperationAction(ISD::VAARG , MVT::Other, Expand);
				250	setOperationAction(ISD::VAEND , MVT::Other, Expand);
				251	if (Subtarget->is64Bit())
				252	setOperationAction(ISD::VACOPY , MVT::Other, Custom);
				253	else
				254	setOperationAction(ISD::VACOPY , MVT::Other, Expand);
				255
				256	setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
				257	setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
				258	if (Subtarget->is64Bit())
				259	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
				260	if (Subtarget->isTargetCygMing())
				261	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
				262	else
				263	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
				264
				265	if (X86ScalarSSE) {
				266	// Set up the FP register classes.
				267	addRegisterClass(MVT::f32, X86::FR32RegisterClass);
				268	addRegisterClass(MVT::f64, X86::FR64RegisterClass);
				269
				270	// Use ANDPD to simulate FABS.
				271	setOperationAction(ISD::FABS , MVT::f64, Custom);
				272	setOperationAction(ISD::FABS , MVT::f32, Custom);
				273
				274	// Use XORP to simulate FNEG.
				275	setOperationAction(ISD::FNEG , MVT::f64, Custom);
				276	setOperationAction(ISD::FNEG , MVT::f32, Custom);
				277
				278	// Use ANDPD and ORPD to simulate FCOPYSIGN.
				279	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
				280	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
				281
				282	// We don't support sin/cos/fmod
				283	setOperationAction(ISD::FSIN , MVT::f64, Expand);
				284	setOperationAction(ISD::FCOS , MVT::f64, Expand);
				285	setOperationAction(ISD::FREM , MVT::f64, Expand);
				286	setOperationAction(ISD::FSIN , MVT::f32, Expand);
				287	setOperationAction(ISD::FCOS , MVT::f32, Expand);
				288	setOperationAction(ISD::FREM , MVT::f32, Expand);
				289
				290	// Expand FP immediates into loads from the stack, except for the special
				291	// cases we handle.
				292	setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
				293	setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
				294	addLegalFPImmediate(+0.0); // xorps / xorpd
				295	} else {
				296	// Set up the FP register classes.
				297	addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
				298	addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
				299
				300	setOperationAction(ISD::UNDEF, MVT::f64, Expand);
				301	setOperationAction(ISD::UNDEF, MVT::f32, Expand);
				302	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
				303	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
				304	setOperationAction(ISD::FP_ROUND, MVT::f32, Expand);
				305
				306	if (!UnsafeFPMath) {
				307	setOperationAction(ISD::FSIN , MVT::f64 , Expand);
				308	setOperationAction(ISD::FCOS , MVT::f64 , Expand);
				309	}
				310
				311	setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
				312	setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
				313	addLegalFPImmediate(+0.0); // FLD0
				314	addLegalFPImmediate(+1.0); // FLD1
				315	addLegalFPImmediate(-0.0); // FLD0/FCHS
				316	addLegalFPImmediate(-1.0); // FLD1/FCHS
				317	}
				318
				319	// First set operation action for all vector types to expand. Then we
				320	// will selectively turn on ones that can be effectively codegen'd.
				321	for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
				322	VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
				323	setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
				324	setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
				325	setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand);
				326	setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand);
				327	setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand);
				328	setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
				329	setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand);
				330	setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
				331	setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
				332	setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
				333	setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
				334	setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
				335	setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
				336	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand);
				337	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
				338	setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
				339	setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand);
				340	setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand);
				341	setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand);
				342	setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand);
				343	setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand);
				344	setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand);
				345	setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand);
				346	}
				347
				348	if (Subtarget->hasMMX()) {
				349	addRegisterClass(MVT::v8i8, X86::VR64RegisterClass);
				350	addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
				351	addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
				352	addRegisterClass(MVT::v1i64, X86::VR64RegisterClass);
				353
				354	// FIXME: add MMX packed arithmetics
				355
				356	setOperationAction(ISD::ADD, MVT::v8i8, Legal);
				357	setOperationAction(ISD::ADD, MVT::v4i16, Legal);
				358	setOperationAction(ISD::ADD, MVT::v2i32, Legal);
				359	setOperationAction(ISD::ADD, MVT::v1i64, Legal);
				360
				361	setOperationAction(ISD::SUB, MVT::v8i8, Legal);
				362	setOperationAction(ISD::SUB, MVT::v4i16, Legal);
				363	setOperationAction(ISD::SUB, MVT::v2i32, Legal);
				364
				365	setOperationAction(ISD::MULHS, MVT::v4i16, Legal);
				366	setOperationAction(ISD::MUL, MVT::v4i16, Legal);
				367
				368	setOperationAction(ISD::AND, MVT::v8i8, Promote);
				369	AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64);
				370	setOperationAction(ISD::AND, MVT::v4i16, Promote);
				371	AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64);
				372	setOperationAction(ISD::AND, MVT::v2i32, Promote);
				373	AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64);
				374	setOperationAction(ISD::AND, MVT::v1i64, Legal);
				375
				376	setOperationAction(ISD::OR, MVT::v8i8, Promote);
				377	AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64);
				378	setOperationAction(ISD::OR, MVT::v4i16, Promote);
				379	AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64);
				380	setOperationAction(ISD::OR, MVT::v2i32, Promote);
				381	AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64);
				382	setOperationAction(ISD::OR, MVT::v1i64, Legal);
				383
				384	setOperationAction(ISD::XOR, MVT::v8i8, Promote);
				385	AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64);
				386	setOperationAction(ISD::XOR, MVT::v4i16, Promote);
				387	AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64);
				388	setOperationAction(ISD::XOR, MVT::v2i32, Promote);
				389	AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64);
				390	setOperationAction(ISD::XOR, MVT::v1i64, Legal);
				391
				392	setOperationAction(ISD::LOAD, MVT::v8i8, Promote);
				393	AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64);
				394	setOperationAction(ISD::LOAD, MVT::v4i16, Promote);
				395	AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64);
				396	setOperationAction(ISD::LOAD, MVT::v2i32, Promote);
				397	AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64);
				398	setOperationAction(ISD::LOAD, MVT::v1i64, Legal);
				399
				400	setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
				401	setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
				402	setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
				403	setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
				404
				405	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
				406	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
				407	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
				408	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
				409
				410	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom);
				411	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom);
				412	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom);
				413	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
				414	}
				415
				416	if (Subtarget->hasSSE1()) {
				417	addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
				418
				419	setOperationAction(ISD::FADD, MVT::v4f32, Legal);
				420	setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
				421	setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
				422	setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
				423	setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
				424	setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	425	setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
				426	setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
				427	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
				428	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
				429	setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
				430	}
				431
				432	if (Subtarget->hasSSE2()) {
				433	addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
				434	addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
				435	addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
				436	addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
				437	addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
				438
				439	setOperationAction(ISD::ADD, MVT::v16i8, Legal);
				440	setOperationAction(ISD::ADD, MVT::v8i16, Legal);
				441	setOperationAction(ISD::ADD, MVT::v4i32, Legal);
				442	setOperationAction(ISD::ADD, MVT::v2i64, Legal);
				443	setOperationAction(ISD::SUB, MVT::v16i8, Legal);
				444	setOperationAction(ISD::SUB, MVT::v8i16, Legal);
				445	setOperationAction(ISD::SUB, MVT::v4i32, Legal);
				446	setOperationAction(ISD::SUB, MVT::v2i64, Legal);
				447	setOperationAction(ISD::MUL, MVT::v8i16, Legal);
				448	setOperationAction(ISD::FADD, MVT::v2f64, Legal);
				449	setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
				450	setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
				451	setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
				452	setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
				453	setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	454
				455	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
				456	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
				457	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
				458	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
				459	// Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
				460	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
				461
				462	// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
				463	for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
				464	setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
				465	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
				466	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				467	}
				468	setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
				469	setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
				470	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
				471	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
				472	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
				473	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
				474
				475	// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
				476	for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
				477	setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote);
				478	AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64);
				479	setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote);
				480	AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64);
				481	setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote);
				482	AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64);
				483	setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote);
				484	AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64);
				485	setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
				486	AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
				487	}
				488
				489	// Custom lower v2i64 and v2f64 selects.
				490	setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
				491	setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
				492	setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
				493	setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
				494	}
				495
				496	// We want to custom lower some of our intrinsics.
				497	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
				498
				499	// We have target-specific dag combine patterns for the following nodes:
				500	setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
				501	setTargetDAGCombine(ISD::SELECT);
				502
				503	computeRegisterProperties();
				504
				505	// FIXME: These should be based on subtarget info. Plus, the values should
				506	// be smaller when we are in optimizing for size mode.
				507	maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
				508	maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
				509	maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
				510	allowUnalignedMemoryAccesses = true; // x86 supports it!
				511	}
				512
				513
				514	//===----------------------------------------------------------------------===//
				515	// Return Value Calling Convention Implementation
				516	//===----------------------------------------------------------------------===//
				517
				518	#include "X86GenCallingConv.inc"
				519
				520	/// LowerRET - Lower an ISD::RET node.
				521	SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
				522	assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
				523
				524	SmallVector<CCValAssign, 16> RVLocs;
				525	unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
				526	bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
				527	CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
				528	CCInfo.AnalyzeReturn(Op.Val, RetCC_X86);
				529
				530
				531	// If this is the first return lowered for this function, add the regs to the
				532	// liveout set for the function.
				533	if (DAG.getMachineFunction().liveout_empty()) {
				534	for (unsigned i = 0; i != RVLocs.size(); ++i)
				535	if (RVLocs[i].isRegLoc())
				536	DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
				537	}
				538
				539	SDOperand Chain = Op.getOperand(0);
				540	SDOperand Flag;
				541
				542	// Copy the result values into the output registers.
				543	if (RVLocs.size() != 1 \|\| !RVLocs[0].isRegLoc() \|\|
				544	RVLocs[0].getLocReg() != X86::ST0) {
				545	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				546	CCValAssign &VA = RVLocs[i];
				547	assert(VA.isRegLoc() && "Can only return in registers!");
				548	Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1),
				549	Flag);
				550	Flag = Chain.getValue(1);
				551	}
				552	} else {
				553	// We need to handle a destination of ST0 specially, because it isn't really
				554	// a register.
				555	SDOperand Value = Op.getOperand(1);
				556
				557	// If this is an FP return with ScalarSSE, we need to move the value from
				558	// an XMM register onto the fp-stack.
				559	if (X86ScalarSSE) {
				560	SDOperand MemLoc;
				561
				562	// If this is a load into a scalarsse value, don't store the loaded value
				563	// back to the stack, only to reload it: just replace the scalar-sse load.
				564	if (ISD::isNON_EXTLoad(Value.Val) &&
				565	(Chain == Value.getValue(1) \|\| Chain == Value.getOperand(0))) {
				566	Chain = Value.getOperand(0);
				567	MemLoc = Value.getOperand(1);
				568	} else {
				569	// Spill the value to memory and reload it into top of stack.
				570	unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8;
				571	MachineFunction &MF = DAG.getMachineFunction();
				572	int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
				573	MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
				574	Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0);
				575	}
				576	SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other);
				577	SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())};
				578	Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
				579	Chain = Value.getValue(1);
				580	}
				581
				582	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				583	SDOperand Ops[] = { Chain, Value };
				584	Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2);
				585	Flag = Chain.getValue(1);
				586	}
				587
				588	SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16);
				589	if (Flag.Val)
				590	return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag);
				591	else
				592	return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop);
				593	}
				594
				595
				596	/// LowerCallResult - Lower the result values of an ISD::CALL into the
				597	/// appropriate copies out of appropriate physical registers. This assumes that
				598	/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
				599	/// being lowered. The returns a SDNode with the same number of values as the
				600	/// ISD::CALL.
				601	SDNode *X86TargetLowering::
				602	LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
				603	unsigned CallingConv, SelectionDAG &DAG) {
				604
				605	// Assign locations to each value returned by this call.
				606	SmallVector<CCValAssign, 16> RVLocs;
				607	bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0;
				608	CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
				609	CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
				610
				611
				612	SmallVector<SDOperand, 8> ResultVals;
				613
				614	// Copy all of the result registers out of their specified physreg.
				615	if (RVLocs.size() != 1 \|\| RVLocs[0].getLocReg() != X86::ST0) {
				616	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				617	Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(),
				618	RVLocs[i].getValVT(), InFlag).getValue(1);
				619	InFlag = Chain.getValue(2);
				620	ResultVals.push_back(Chain.getValue(0));
				621	}
				622	} else {
				623	// Copies from the FP stack are special, as ST0 isn't a valid register
				624	// before the fp stackifier runs.
				625
				626	// Copy ST0 into an RFP register with FP_GET_RESULT.
				627	SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag);
				628	SDOperand GROps[] = { Chain, InFlag };
				629	SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
				630	Chain = RetVal.getValue(1);
				631	InFlag = RetVal.getValue(2);
				632
				633	// If we are using ScalarSSE, store ST(0) to the stack and reload it into
				634	// an XMM register.
				635	if (X86ScalarSSE) {
				636	// FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
				637	// shouldn't be necessary except that RFP cannot be live across
				638	// multiple blocks. When stackifier is fixed, they can be uncoupled.
				639	MachineFunction &MF = DAG.getMachineFunction();
				640	int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
				641	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				642	SDOperand Ops[] = {
				643	Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag
				644	};
				645	Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5);
				646	RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0);
				647	Chain = RetVal.getValue(1);
				648	}
				649	ResultVals.push_back(RetVal);
				650	}
				651
				652	// Merge everything together with a MERGE_VALUES node.
				653	ResultVals.push_back(Chain);
				654	return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(),
				655	&ResultVals[0], ResultVals.size()).Val;
				656	}
				657
				658
				659	//===----------------------------------------------------------------------===//
				660	// C & StdCall Calling Convention implementation
				661	//===----------------------------------------------------------------------===//
				662	// StdCall calling convention seems to be standard for many Windows' API
				663	// routines and around. It differs from C calling convention just a little:
				664	// callee should clean up the stack, not caller. Symbols should be also
				665	// decorated in some fancy way :) It doesn't support any vector arguments.
				666
				667	/// AddLiveIn - This helper function adds the specified physical register to the
				668	/// MachineFunction as a live in value. It also creates a corresponding virtual
				669	/// register for it.
				670	static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
				671	const TargetRegisterClass *RC) {
				672	assert(RC->contains(PReg) && "Not the correct regclass!");
				673	unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
				674	MF.addLiveIn(PReg, VReg);
				675	return VReg;
				676	}
				677
				678	SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
				679	bool isStdCall) {
				680	unsigned NumArgs = Op.Val->getNumValues() - 1;
				681	MachineFunction &MF = DAG.getMachineFunction();
				682	MachineFrameInfo *MFI = MF.getFrameInfo();
				683	SDOperand Root = Op.getOperand(0);
				684	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				685
				686	// Assign locations to all of the incoming arguments.
				687	SmallVector<CCValAssign, 16> ArgLocs;
				688	CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
				689	getTargetMachine(), ArgLocs);
				690	CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C);
				691
				692	SmallVector<SDOperand, 8> ArgValues;
				693	unsigned LastVal = ~0U;
				694	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				695	CCValAssign &VA = ArgLocs[i];
				696	// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
				697	// places.
				698	assert(VA.getValNo() != LastVal &&
				699	"Don't support value assigned to multiple locs yet");
				700	LastVal = VA.getValNo();
				701
				702	if (VA.isRegLoc()) {
				703	MVT::ValueType RegVT = VA.getLocVT();
				704	TargetRegisterClass *RC;
				705	if (RegVT == MVT::i32)
				706	RC = X86::GR32RegisterClass;
				707	else {
				708	assert(MVT::isVector(RegVT));
				709	RC = X86::VR128RegisterClass;
				710	}
				711
				712	unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
				713	SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
				714
				715	// If this is an 8 or 16-bit value, it is really passed promoted to 32
				716	// bits. Insert an assert[sz]ext to capture this, then truncate to the
				717	// right size.
				718	if (VA.getLocInfo() == CCValAssign::SExt)
				719	ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
				720	DAG.getValueType(VA.getValVT()));
				721	else if (VA.getLocInfo() == CCValAssign::ZExt)
				722	ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
				723	DAG.getValueType(VA.getValVT()));
				724
				725	if (VA.getLocInfo() != CCValAssign::Full)
				726	ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
				727
				728	ArgValues.push_back(ArgValue);
				729	} else {
				730	assert(VA.isMemLoc());
				731
				732	// Create the nodes corresponding to a load from this parameter slot.
				733	int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
				734	VA.getLocMemOffset());
				735	SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
				736	ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
				737	}
				738	}
				739
				740	unsigned StackSize = CCInfo.getNextStackOffset();
				741
				742	ArgValues.push_back(Root);
				743
				744	// If the function takes variable number of arguments, make a frame index for
				745	// the start of the first vararg value... for expansion of llvm.va_start.
				746	if (isVarArg)
				747	VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
				748
				749	if (isStdCall && !isVarArg) {
				750	BytesToPopOnReturn = StackSize; // Callee pops everything..
				751	BytesCallerReserves = 0;
				752	} else {
				753	BytesToPopOnReturn = 0; // Callee pops nothing.
				754
				755	// If this is an sret function, the return should pop the hidden pointer.
				756	if (NumArgs &&
				757	(cast<ConstantSDNode>(Op.getOperand(3))->getValue() &
				758	ISD::ParamFlags::StructReturn))
				759	BytesToPopOnReturn = 4;
				760
				761	BytesCallerReserves = StackSize;
				762	}
				763
				764	RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
				765	ReturnAddrIndex = 0; // No return address slot generated yet.
				766
				767	MF.getInfo<X86MachineFunctionInfo>()
				768	->setBytesToPopOnReturn(BytesToPopOnReturn);
				769
				770	// Return the new list of results.
				771	return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
				772	&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
				773	}
				774
				775	SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG,
				776	unsigned CC) {
				777	SDOperand Chain = Op.getOperand(0);
				778	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				779	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				780	SDOperand Callee = Op.getOperand(4);
				781	unsigned NumOps = (Op.getNumOperands() - 5) / 2;
				782
				783	// Analyze operands of the call, assigning locations to each operand.
				784	SmallVector<CCValAssign, 16> ArgLocs;
				785	CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
				786	CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C);
				787
				788	// Get a count of how many bytes are to be pushed on the stack.
				789	unsigned NumBytes = CCInfo.getNextStackOffset();
				790
				791	Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
				792
				793	SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
				794	SmallVector<SDOperand, 8> MemOpChains;
				795
				796	SDOperand StackPtr;
				797
				798	// Walk the register/memloc assignments, inserting copies/loads.
				799	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				800	CCValAssign &VA = ArgLocs[i];
				801	SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
				802
				803	// Promote the value if needed.
				804	switch (VA.getLocInfo()) {
				805	default: assert(0 && "Unknown loc info!");
				806	case CCValAssign::Full: break;
				807	case CCValAssign::SExt:
				808	Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
				809	break;
				810	case CCValAssign::ZExt:
				811	Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
				812	break;
				813	case CCValAssign::AExt:
				814	Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
				815	break;
				816	}
				817
				818	if (VA.isRegLoc()) {
				819	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
				820	} else {
				821	assert(VA.isMemLoc());
				822	if (StackPtr.Val == 0)
				823	StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
				824	SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
				825	PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
				826	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				827	}
				828	}
				829
				830	// If the first argument is an sret pointer, remember it.
				831	bool isSRet = NumOps &&
				832	(cast<ConstantSDNode>(Op.getOperand(6))->getValue() &
				833	ISD::ParamFlags::StructReturn);
				834
				835	if (!MemOpChains.empty())
				836	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				837	&MemOpChains[0], MemOpChains.size());
				838
				839	// Build a sequence of copy-to-reg nodes chained together with token chain
				840	// and flag operands which copy the outgoing args into registers.
				841	SDOperand InFlag;
				842	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				843	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				844	InFlag);
				845	InFlag = Chain.getValue(1);
				846	}
				847
				848	// ELF / PIC requires GOT in the EBX register before function calls via PLT
				849	// GOT pointer.
				850	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				851	Subtarget->isPICStyleGOT()) {
				852	Chain = DAG.getCopyToReg(Chain, X86::EBX,
				853	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				854	InFlag);
				855	InFlag = Chain.getValue(1);
				856	}
				857
				858	// If the callee is a GlobalAddress node (quite common, every direct call is)
				859	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
				860	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				861	// We should use extra load for direct calls to dllimported functions in
				862	// non-JIT mode.
				863	if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
				864	getTargetMachine(), true))
				865	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
				866	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				867	Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
				868
				869	// Returns a chain & a flag for retval copy to use.
				870	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				871	SmallVector<SDOperand, 8> Ops;
				872	Ops.push_back(Chain);
				873	Ops.push_back(Callee);
				874
				875	// Add argument registers to the end of the list so that they are known live
				876	// into the call.
				877	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				878	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				879	RegsToPass[i].second.getValueType()));
				880
				881	// Add an implicit use GOT pointer in EBX.
				882	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				883	Subtarget->isPICStyleGOT())
				884	Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
				885
				886	if (InFlag.Val)
				887	Ops.push_back(InFlag);
				888
				889	Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
				890	NodeTys, &Ops[0], Ops.size());
				891	InFlag = Chain.getValue(1);
				892
				893	// Create the CALLSEQ_END node.
				894	unsigned NumBytesForCalleeToPush = 0;
				895
				896	if (CC == CallingConv::X86_StdCall) {
				897	if (isVarArg)
				898	NumBytesForCalleeToPush = isSRet ? 4 : 0;
				899	else
				900	NumBytesForCalleeToPush = NumBytes;
				901	} else {
				902	// If this is is a call to a struct-return function, the callee
				903	// pops the hidden struct pointer, so we have to push it back.
				904	// This is common for Darwin/X86, Linux & Mingw32 targets.
				905	NumBytesForCalleeToPush = isSRet ? 4 : 0;
				906	}
				907
				908	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				909	Ops.clear();
				910	Ops.push_back(Chain);
				911	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				912	Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
				913	Ops.push_back(InFlag);
				914	Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
				915	InFlag = Chain.getValue(1);
				916
				917	// Handle result values, copying them out of physregs into vregs that we
				918	// return.
				919	return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
				920	}
				921
				922
				923	//===----------------------------------------------------------------------===//
				924	// FastCall Calling Convention implementation
				925	//===----------------------------------------------------------------------===//
				926	//
				927	// The X86 'fastcall' calling convention passes up to two integer arguments in
				928	// registers (an appropriate portion of ECX/EDX), passes arguments in C order,
				929	// and requires that the callee pop its arguments off the stack (allowing proper
				930	// tail calls), and has the same return value conventions as C calling convs.
				931	//
				932	// This calling convention always arranges for the callee pop value to be 8n+4
				933	// bytes, which is needed for tail recursion elimination and stack alignment
				934	// reasons.
				935	SDOperand
				936	X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
				937	MachineFunction &MF = DAG.getMachineFunction();
				938	MachineFrameInfo *MFI = MF.getFrameInfo();
				939	SDOperand Root = Op.getOperand(0);
				940	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				941
				942	// Assign locations to all of the incoming arguments.
				943	SmallVector<CCValAssign, 16> ArgLocs;
				944	CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
				945	getTargetMachine(), ArgLocs);
				946	CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall);
				947
				948	SmallVector<SDOperand, 8> ArgValues;
				949	unsigned LastVal = ~0U;
				950	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				951	CCValAssign &VA = ArgLocs[i];
				952	// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
				953	// places.
				954	assert(VA.getValNo() != LastVal &&
				955	"Don't support value assigned to multiple locs yet");
				956	LastVal = VA.getValNo();
				957
				958	if (VA.isRegLoc()) {
				959	MVT::ValueType RegVT = VA.getLocVT();
				960	TargetRegisterClass *RC;
				961	if (RegVT == MVT::i32)
				962	RC = X86::GR32RegisterClass;
				963	else {
				964	assert(MVT::isVector(RegVT));
				965	RC = X86::VR128RegisterClass;
				966	}
				967
				968	unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
				969	SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
				970
				971	// If this is an 8 or 16-bit value, it is really passed promoted to 32
				972	// bits. Insert an assert[sz]ext to capture this, then truncate to the
				973	// right size.
				974	if (VA.getLocInfo() == CCValAssign::SExt)
				975	ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
				976	DAG.getValueType(VA.getValVT()));
				977	else if (VA.getLocInfo() == CCValAssign::ZExt)
				978	ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
				979	DAG.getValueType(VA.getValVT()));
				980
				981	if (VA.getLocInfo() != CCValAssign::Full)
				982	ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
				983
				984	ArgValues.push_back(ArgValue);
				985	} else {
				986	assert(VA.isMemLoc());
				987
				988	// Create the nodes corresponding to a load from this parameter slot.
				989	int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
				990	VA.getLocMemOffset());
				991	SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
				992	ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
				993	}
				994	}
				995
				996	ArgValues.push_back(Root);
				997
				998	unsigned StackSize = CCInfo.getNextStackOffset();
				999
				1000	if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
				1001	// Make sure the instruction takes 8n+4 bytes to make sure the start of the
				1002	// arguments and the arguments after the retaddr has been pushed are aligned.
				1003	if ((StackSize & 7) == 0)
				1004	StackSize += 4;
				1005	}
				1006
				1007	VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
				1008	RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
				1009	ReturnAddrIndex = 0; // No return address slot generated yet.
				1010	BytesToPopOnReturn = StackSize; // Callee pops all stack arguments.
				1011	BytesCallerReserves = 0;
				1012
				1013	MF.getInfo<X86MachineFunctionInfo>()
				1014	->setBytesToPopOnReturn(BytesToPopOnReturn);
				1015
				1016	// Return the new list of results.
				1017	return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
				1018	&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
				1019	}
				1020
				1021	SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
				1022	unsigned CC) {
				1023	SDOperand Chain = Op.getOperand(0);
				1024	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				1025	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1026	SDOperand Callee = Op.getOperand(4);
				1027
				1028	// Analyze operands of the call, assigning locations to each operand.
				1029	SmallVector<CCValAssign, 16> ArgLocs;
				1030	CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
				1031	CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall);
				1032
				1033	// Get a count of how many bytes are to be pushed on the stack.
				1034	unsigned NumBytes = CCInfo.getNextStackOffset();
				1035
				1036	if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
				1037	// Make sure the instruction takes 8n+4 bytes to make sure the start of the
				1038	// arguments and the arguments after the retaddr has been pushed are aligned.
				1039	if ((NumBytes & 7) == 0)
				1040	NumBytes += 4;
				1041	}
				1042
				1043	Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
				1044
				1045	SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
				1046	SmallVector<SDOperand, 8> MemOpChains;
				1047
				1048	SDOperand StackPtr;
				1049
				1050	// Walk the register/memloc assignments, inserting copies/loads.
				1051	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1052	CCValAssign &VA = ArgLocs[i];
				1053	SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
				1054
				1055	// Promote the value if needed.
				1056	switch (VA.getLocInfo()) {
				1057	default: assert(0 && "Unknown loc info!");
				1058	case CCValAssign::Full: break;
				1059	case CCValAssign::SExt:
				1060	Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
				1061	break;
				1062	case CCValAssign::ZExt:
				1063	Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
				1064	break;
				1065	case CCValAssign::AExt:
				1066	Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
				1067	break;
				1068	}
				1069
				1070	if (VA.isRegLoc()) {
				1071	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
				1072	} else {
				1073	assert(VA.isMemLoc());
				1074	if (StackPtr.Val == 0)
				1075	StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
				1076	SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
				1077	PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
				1078	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1079	}
				1080	}
				1081
				1082	if (!MemOpChains.empty())
				1083	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1084	&MemOpChains[0], MemOpChains.size());
				1085
				1086	// Build a sequence of copy-to-reg nodes chained together with token chain
				1087	// and flag operands which copy the outgoing args into registers.
				1088	SDOperand InFlag;
				1089	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1090	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1091	InFlag);
				1092	InFlag = Chain.getValue(1);
				1093	}
				1094
				1095	// If the callee is a GlobalAddress node (quite common, every direct call is)
				1096	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
				1097	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1098	// We should use extra load for direct calls to dllimported functions in
				1099	// non-JIT mode.
				1100	if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
				1101	getTargetMachine(), true))
				1102	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
				1103	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				1104	Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
				1105
				1106	// ELF / PIC requires GOT in the EBX register before function calls via PLT
				1107	// GOT pointer.
				1108	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				1109	Subtarget->isPICStyleGOT()) {
				1110	Chain = DAG.getCopyToReg(Chain, X86::EBX,
				1111	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				1112	InFlag);
				1113	InFlag = Chain.getValue(1);
				1114	}
				1115
				1116	// Returns a chain & a flag for retval copy to use.
				1117	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1118	SmallVector<SDOperand, 8> Ops;
				1119	Ops.push_back(Chain);
				1120	Ops.push_back(Callee);
				1121
				1122	// Add argument registers to the end of the list so that they are known live
				1123	// into the call.
				1124	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1125	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1126	RegsToPass[i].second.getValueType()));
				1127
				1128	// Add an implicit use GOT pointer in EBX.
				1129	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				1130	Subtarget->isPICStyleGOT())
				1131	Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
				1132
				1133	if (InFlag.Val)
				1134	Ops.push_back(InFlag);
				1135
				1136	// FIXME: Do not generate X86ISD::TAILCALL for now.
				1137	Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
				1138	NodeTys, &Ops[0], Ops.size());
				1139	InFlag = Chain.getValue(1);
				1140
				1141	// Returns a flag for retval copy to use.
				1142	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1143	Ops.clear();
				1144	Ops.push_back(Chain);
				1145	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				1146	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				1147	Ops.push_back(InFlag);
				1148	Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
				1149	InFlag = Chain.getValue(1);
				1150
				1151	// Handle result values, copying them out of physregs into vregs that we
				1152	// return.
				1153	return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
				1154	}
				1155
				1156
				1157	//===----------------------------------------------------------------------===//
				1158	// X86-64 C Calling Convention implementation
				1159	//===----------------------------------------------------------------------===//
				1160
				1161	SDOperand
				1162	X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
				1163	MachineFunction &MF = DAG.getMachineFunction();
				1164	MachineFrameInfo *MFI = MF.getFrameInfo();
				1165	SDOperand Root = Op.getOperand(0);
				1166	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1167
				1168	static const unsigned GPR64ArgRegs[] = {
				1169	X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
				1170	};
				1171	static const unsigned XMMArgRegs[] = {
				1172	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
				1173	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
				1174	};
				1175
				1176
				1177	// Assign locations to all of the incoming arguments.
				1178	SmallVector<CCValAssign, 16> ArgLocs;
				1179	CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
				1180	getTargetMachine(), ArgLocs);
				1181	CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C);
				1182
				1183	SmallVector<SDOperand, 8> ArgValues;
				1184	unsigned LastVal = ~0U;
				1185	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1186	CCValAssign &VA = ArgLocs[i];
				1187	// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
				1188	// places.
				1189	assert(VA.getValNo() != LastVal &&
				1190	"Don't support value assigned to multiple locs yet");
				1191	LastVal = VA.getValNo();
				1192
				1193	if (VA.isRegLoc()) {
				1194	MVT::ValueType RegVT = VA.getLocVT();
				1195	TargetRegisterClass *RC;
				1196	if (RegVT == MVT::i32)
				1197	RC = X86::GR32RegisterClass;
				1198	else if (RegVT == MVT::i64)
				1199	RC = X86::GR64RegisterClass;
				1200	else if (RegVT == MVT::f32)
				1201	RC = X86::FR32RegisterClass;
				1202	else if (RegVT == MVT::f64)
				1203	RC = X86::FR64RegisterClass;
				1204	else {
				1205	assert(MVT::isVector(RegVT));
				1206	if (MVT::getSizeInBits(RegVT) == 64) {
				1207	RC = X86::GR64RegisterClass; // MMX values are passed in GPRs.
				1208	RegVT = MVT::i64;
				1209	} else
				1210	RC = X86::VR128RegisterClass;
				1211	}
				1212
				1213	unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
				1214	SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
				1215
				1216	// If this is an 8 or 16-bit value, it is really passed promoted to 32
				1217	// bits. Insert an assert[sz]ext to capture this, then truncate to the
				1218	// right size.
				1219	if (VA.getLocInfo() == CCValAssign::SExt)
				1220	ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
				1221	DAG.getValueType(VA.getValVT()));
				1222	else if (VA.getLocInfo() == CCValAssign::ZExt)
				1223	ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
				1224	DAG.getValueType(VA.getValVT()));
				1225
				1226	if (VA.getLocInfo() != CCValAssign::Full)
				1227	ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
				1228
				1229	// Handle MMX values passed in GPRs.
				1230	if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass &&
				1231	MVT::getSizeInBits(RegVT) == 64)
				1232	ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue);
				1233
				1234	ArgValues.push_back(ArgValue);
				1235	} else {
				1236	assert(VA.isMemLoc());
				1237
				1238	// Create the nodes corresponding to a load from this parameter slot.
				1239	int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
				1240	VA.getLocMemOffset());
				1241	SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
				1242	ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
				1243	}
				1244	}
				1245
				1246	unsigned StackSize = CCInfo.getNextStackOffset();
				1247
				1248	// If the function takes variable number of arguments, make a frame index for
				1249	// the start of the first vararg value... for expansion of llvm.va_start.
				1250	if (isVarArg) {
				1251	unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
				1252	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
				1253
				1254	// For X86-64, if there are vararg parameters that are passed via
				1255	// registers, then we must store them to their spots on the stack so they
				1256	// may be loaded by deferencing the result of va_next.
				1257	VarArgsGPOffset = NumIntRegs * 8;
				1258	VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
				1259	VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
				1260	RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
				1261
				1262	// Store the integer parameter registers.
				1263	SmallVector<SDOperand, 8> MemOps;
				1264	SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
				1265	SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
				1266	DAG.getConstant(VarArgsGPOffset, getPointerTy()));
				1267	for (; NumIntRegs != 6; ++NumIntRegs) {
				1268	unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
				1269	X86::GR64RegisterClass);
				1270	SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
				1271	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1272	MemOps.push_back(Store);
				1273	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				1274	DAG.getConstant(8, getPointerTy()));
				1275	}
				1276
				1277	// Now store the XMM (fp + vector) parameter registers.
				1278	FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
				1279	DAG.getConstant(VarArgsFPOffset, getPointerTy()));
				1280	for (; NumXMMRegs != 8; ++NumXMMRegs) {
				1281	unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
				1282	X86::VR128RegisterClass);
				1283	SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
				1284	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1285	MemOps.push_back(Store);
				1286	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				1287	DAG.getConstant(16, getPointerTy()));
				1288	}
				1289	if (!MemOps.empty())
				1290	Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1291	&MemOps[0], MemOps.size());
				1292	}
				1293
				1294	ArgValues.push_back(Root);
				1295
				1296	ReturnAddrIndex = 0; // No return address slot generated yet.
				1297	BytesToPopOnReturn = 0; // Callee pops nothing.
				1298	BytesCallerReserves = StackSize;
				1299
				1300	// Return the new list of results.
				1301	return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
				1302	&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
				1303	}
				1304
				1305	SDOperand
				1306	X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,
				1307	unsigned CC) {
				1308	SDOperand Chain = Op.getOperand(0);
				1309	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1310	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				1311	SDOperand Callee = Op.getOperand(4);
				1312
				1313	// Analyze operands of the call, assigning locations to each operand.
				1314	SmallVector<CCValAssign, 16> ArgLocs;
				1315	CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
				1316	CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C);
				1317
				1318	// Get a count of how many bytes are to be pushed on the stack.
				1319	unsigned NumBytes = CCInfo.getNextStackOffset();
				1320	Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
				1321
				1322	SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
				1323	SmallVector<SDOperand, 8> MemOpChains;
				1324
				1325	SDOperand StackPtr;
				1326
				1327	// Walk the register/memloc assignments, inserting copies/loads.
				1328	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1329	CCValAssign &VA = ArgLocs[i];
				1330	SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
				1331
				1332	// Promote the value if needed.
				1333	switch (VA.getLocInfo()) {
				1334	default: assert(0 && "Unknown loc info!");
				1335	case CCValAssign::Full: break;
				1336	case CCValAssign::SExt:
				1337	Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
				1338	break;
				1339	case CCValAssign::ZExt:
				1340	Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
				1341	break;
				1342	case CCValAssign::AExt:
				1343	Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
				1344	break;
				1345	}
				1346
				1347	if (VA.isRegLoc()) {
				1348	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
				1349	} else {
				1350	assert(VA.isMemLoc());
				1351	if (StackPtr.Val == 0)
				1352	StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
				1353	SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
				1354	PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
				1355	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1356	}
				1357	}
				1358
				1359	if (!MemOpChains.empty())
				1360	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1361	&MemOpChains[0], MemOpChains.size());
				1362
				1363	// Build a sequence of copy-to-reg nodes chained together with token chain
				1364	// and flag operands which copy the outgoing args into registers.
				1365	SDOperand InFlag;
				1366	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1367	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1368	InFlag);
				1369	InFlag = Chain.getValue(1);
				1370	}
				1371
				1372	if (isVarArg) {
				1373	// From AMD64 ABI document:
				1374	// For calls that may call functions that use varargs or stdargs
				1375	// (prototype-less calls or calls to functions containing ellipsis (...) in
				1376	// the declaration) %al is used as hidden argument to specify the number
				1377	// of SSE registers used. The contents of %al do not need to match exactly
				1378	// the number of registers, but must be an ubound on the number of SSE
				1379	// registers used and is in the range 0 - 8 inclusive.
				1380
				1381	// Count the number of XMM registers allocated.
				1382	static const unsigned XMMArgRegs[] = {
				1383	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
				1384	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
				1385	};
				1386	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
				1387
				1388	Chain = DAG.getCopyToReg(Chain, X86::AL,
				1389	DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
				1390	InFlag = Chain.getValue(1);
				1391	}
				1392
				1393	// If the callee is a GlobalAddress node (quite common, every direct call is)
				1394	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
				1395	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1396	// We should use extra load for direct calls to dllimported functions in
				1397	// non-JIT mode.
				1398	if (getTargetMachine().getCodeModel() != CodeModel::Large
				1399	&& !Subtarget->GVRequiresExtraLoad(G->getGlobal(),
				1400	getTargetMachine(), true))
				1401	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
				1402	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				1403	if (getTargetMachine().getCodeModel() != CodeModel::Large)
				1404	Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
				1405
				1406	// Returns a chain & a flag for retval copy to use.
				1407	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1408	SmallVector<SDOperand, 8> Ops;
				1409	Ops.push_back(Chain);
				1410	Ops.push_back(Callee);
				1411
				1412	// Add argument registers to the end of the list so that they are known live
				1413	// into the call.
				1414	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1415	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1416	RegsToPass[i].second.getValueType()));
				1417
				1418	if (InFlag.Val)
				1419	Ops.push_back(InFlag);
				1420
				1421	// FIXME: Do not generate X86ISD::TAILCALL for now.
				1422	Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
				1423	NodeTys, &Ops[0], Ops.size());
				1424	InFlag = Chain.getValue(1);
				1425
				1426	// Returns a flag for retval copy to use.
				1427	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1428	Ops.clear();
				1429	Ops.push_back(Chain);
				1430	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				1431	Ops.push_back(DAG.getConstant(0, getPointerTy()));
				1432	Ops.push_back(InFlag);
				1433	Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
				1434	InFlag = Chain.getValue(1);
				1435
				1436	// Handle result values, copying them out of physregs into vregs that we
				1437	// return.
				1438	return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
				1439	}
				1440
				1441
				1442	//===----------------------------------------------------------------------===//
				1443	// Other Lowering Hooks
				1444	//===----------------------------------------------------------------------===//
				1445
				1446
				1447	SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
				1448	if (ReturnAddrIndex == 0) {
				1449	// Set up a frame object for the return address.
				1450	MachineFunction &MF = DAG.getMachineFunction();
				1451	if (Subtarget->is64Bit())
				1452	ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8);
				1453	else
				1454	ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
				1455	}
				1456
				1457	return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
				1458	}
				1459
				1460
				1461
				1462	/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
				1463	/// specific condition code. It returns a false if it cannot do a direct
				1464	/// translation. X86CC is the translated CondCode. LHS/RHS are modified as
				1465	/// needed.
				1466	static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
				1467	unsigned &X86CC, SDOperand &LHS, SDOperand &RHS,
				1468	SelectionDAG &DAG) {
				1469	X86CC = X86::COND_INVALID;
				1470	if (!isFP) {
				1471	if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
				1472	if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
				1473	// X > -1 -> X == 0, jump !sign.
				1474	RHS = DAG.getConstant(0, RHS.getValueType());
				1475	X86CC = X86::COND_NS;
				1476	return true;
				1477	} else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
				1478	// X < 0 -> X == 0, jump on sign.
				1479	X86CC = X86::COND_S;
				1480	return true;
				1481	}
				1482	}
				1483
				1484	switch (SetCCOpcode) {
				1485	default: break;
				1486	case ISD::SETEQ: X86CC = X86::COND_E; break;
				1487	case ISD::SETGT: X86CC = X86::COND_G; break;
				1488	case ISD::SETGE: X86CC = X86::COND_GE; break;
				1489	case ISD::SETLT: X86CC = X86::COND_L; break;
				1490	case ISD::SETLE: X86CC = X86::COND_LE; break;
				1491	case ISD::SETNE: X86CC = X86::COND_NE; break;
				1492	case ISD::SETULT: X86CC = X86::COND_B; break;
				1493	case ISD::SETUGT: X86CC = X86::COND_A; break;
				1494	case ISD::SETULE: X86CC = X86::COND_BE; break;
				1495	case ISD::SETUGE: X86CC = X86::COND_AE; break;
				1496	}
				1497	} else {
				1498	// On a floating point condition, the flags are set as follows:
				1499	// ZF PF CF op
				1500	// 0 \| 0 \| 0 \| X > Y
				1501	// 0 \| 0 \| 1 \| X < Y
				1502	// 1 \| 0 \| 0 \| X == Y
				1503	// 1 \| 1 \| 1 \| unordered
				1504	bool Flip = false;
				1505	switch (SetCCOpcode) {
				1506	default: break;
				1507	case ISD::SETUEQ:
				1508	case ISD::SETEQ: X86CC = X86::COND_E; break;
				1509	case ISD::SETOLT: Flip = true; // Fallthrough
				1510	case ISD::SETOGT:
				1511	case ISD::SETGT: X86CC = X86::COND_A; break;
				1512	case ISD::SETOLE: Flip = true; // Fallthrough
				1513	case ISD::SETOGE:
				1514	case ISD::SETGE: X86CC = X86::COND_AE; break;
				1515	case ISD::SETUGT: Flip = true; // Fallthrough
				1516	case ISD::SETULT:
				1517	case ISD::SETLT: X86CC = X86::COND_B; break;
				1518	case ISD::SETUGE: Flip = true; // Fallthrough
				1519	case ISD::SETULE:
				1520	case ISD::SETLE: X86CC = X86::COND_BE; break;
				1521	case ISD::SETONE:
				1522	case ISD::SETNE: X86CC = X86::COND_NE; break;
				1523	case ISD::SETUO: X86CC = X86::COND_P; break;
				1524	case ISD::SETO: X86CC = X86::COND_NP; break;
				1525	}
				1526	if (Flip)
				1527	std::swap(LHS, RHS);
				1528	}
				1529
				1530	return X86CC != X86::COND_INVALID;
				1531	}
				1532
				1533	/// hasFPCMov - is there a floating point cmov for the specific X86 condition
				1534	/// code. Current x86 isa includes the following FP cmov instructions:
				1535	/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
				1536	static bool hasFPCMov(unsigned X86CC) {
				1537	switch (X86CC) {
				1538	default:
				1539	return false;
				1540	case X86::COND_B:
				1541	case X86::COND_BE:
				1542	case X86::COND_E:
				1543	case X86::COND_P:
				1544	case X86::COND_A:
				1545	case X86::COND_AE:
				1546	case X86::COND_NE:
				1547	case X86::COND_NP:
				1548	return true;
				1549	}
				1550	}
				1551
				1552	/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return
				1553	/// true if Op is undef or if its value falls within the specified range (L, H].
				1554	static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
				1555	if (Op.getOpcode() == ISD::UNDEF)
				1556	return true;
				1557
				1558	unsigned Val = cast<ConstantSDNode>(Op)->getValue();
				1559	return (Val >= Low && Val < Hi);
				1560	}
				1561
				1562	/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return
				1563	/// true if Op is undef or if its value equal to the specified value.
				1564	static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
				1565	if (Op.getOpcode() == ISD::UNDEF)
				1566	return true;
				1567	return cast<ConstantSDNode>(Op)->getValue() == Val;
				1568	}
				1569
				1570	/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
				1571	/// specifies a shuffle of elements that is suitable for input to PSHUFD.
				1572	bool X86::isPSHUFDMask(SDNode *N) {
				1573	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1574
				1575	if (N->getNumOperands() != 4)
				1576	return false;
				1577
				1578	// Check if the value doesn't reference the second vector.
				1579	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
				1580	SDOperand Arg = N->getOperand(i);
				1581	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1582	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1583	if (cast<ConstantSDNode>(Arg)->getValue() >= 4)
				1584	return false;
				1585	}
				1586
				1587	return true;
				1588	}
				1589
				1590	/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
				1591	/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
				1592	bool X86::isPSHUFHWMask(SDNode *N) {
				1593	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1594
				1595	if (N->getNumOperands() != 8)
				1596	return false;
				1597
				1598	// Lower quadword copied in order.
				1599	for (unsigned i = 0; i != 4; ++i) {
				1600	SDOperand Arg = N->getOperand(i);
				1601	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1602	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1603	if (cast<ConstantSDNode>(Arg)->getValue() != i)
				1604	return false;
				1605	}
				1606
				1607	// Upper quadword shuffled.
				1608	for (unsigned i = 4; i != 8; ++i) {
				1609	SDOperand Arg = N->getOperand(i);
				1610	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1611	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1612	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1613	if (Val < 4 \|\| Val > 7)
				1614	return false;
				1615	}
				1616
				1617	return true;
				1618	}
				1619
				1620	/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
				1621	/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
				1622	bool X86::isPSHUFLWMask(SDNode *N) {
				1623	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1624
				1625	if (N->getNumOperands() != 8)
				1626	return false;
				1627
				1628	// Upper quadword copied in order.
				1629	for (unsigned i = 4; i != 8; ++i)
				1630	if (!isUndefOrEqual(N->getOperand(i), i))
				1631	return false;
				1632
				1633	// Lower quadword shuffled.
				1634	for (unsigned i = 0; i != 4; ++i)
				1635	if (!isUndefOrInRange(N->getOperand(i), 0, 4))
				1636	return false;
				1637
				1638	return true;
				1639	}
				1640
				1641	/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
				1642	/// specifies a shuffle of elements that is suitable for input to SHUFP*.
				1643	static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) {
				1644	if (NumElems != 2 && NumElems != 4) return false;
				1645
				1646	unsigned Half = NumElems / 2;
				1647	for (unsigned i = 0; i < Half; ++i)
				1648	if (!isUndefOrInRange(Elems[i], 0, NumElems))
				1649	return false;
				1650	for (unsigned i = Half; i < NumElems; ++i)
				1651	if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2))
				1652	return false;
				1653
				1654	return true;
				1655	}
				1656
				1657	bool X86::isSHUFPMask(SDNode *N) {
				1658	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1659	return ::isSHUFPMask(N->op_begin(), N->getNumOperands());
				1660	}
				1661
				1662	/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
				1663	/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
				1664	/// half elements to come from vector 1 (which would equal the dest.) and
				1665	/// the upper half to come from vector 2.
				1666	static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) {
				1667	if (NumOps != 2 && NumOps != 4) return false;
				1668
				1669	unsigned Half = NumOps / 2;
				1670	for (unsigned i = 0; i < Half; ++i)
				1671	if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2))
				1672	return false;
				1673	for (unsigned i = Half; i < NumOps; ++i)
				1674	if (!isUndefOrInRange(Ops[i], 0, NumOps))
				1675	return false;
				1676	return true;
				1677	}
				1678
				1679	static bool isCommutedSHUFP(SDNode *N) {
				1680	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1681	return isCommutedSHUFP(N->op_begin(), N->getNumOperands());
				1682	}
				1683
				1684	/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
				1685	/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
				1686	bool X86::isMOVHLPSMask(SDNode *N) {
				1687	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1688
				1689	if (N->getNumOperands() != 4)
				1690	return false;
				1691
				1692	// Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
				1693	return isUndefOrEqual(N->getOperand(0), 6) &&
				1694	isUndefOrEqual(N->getOperand(1), 7) &&
				1695	isUndefOrEqual(N->getOperand(2), 2) &&
				1696	isUndefOrEqual(N->getOperand(3), 3);
				1697	}
				1698
				1699	/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
				1700	/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
				1701	/// <2, 3, 2, 3>
				1702	bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) {
				1703	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1704
				1705	if (N->getNumOperands() != 4)
				1706	return false;
				1707
				1708	// Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3
				1709	return isUndefOrEqual(N->getOperand(0), 2) &&
				1710	isUndefOrEqual(N->getOperand(1), 3) &&
				1711	isUndefOrEqual(N->getOperand(2), 2) &&
				1712	isUndefOrEqual(N->getOperand(3), 3);
				1713	}
				1714
				1715	/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
				1716	/// specifies a shuffle of elements that is suitable for input to MOVLP{S\|D}.
				1717	bool X86::isMOVLPMask(SDNode *N) {
				1718	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1719
				1720	unsigned NumElems = N->getNumOperands();
				1721	if (NumElems != 2 && NumElems != 4)
				1722	return false;
				1723
				1724	for (unsigned i = 0; i < NumElems/2; ++i)
				1725	if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
				1726	return false;
				1727
				1728	for (unsigned i = NumElems/2; i < NumElems; ++i)
				1729	if (!isUndefOrEqual(N->getOperand(i), i))
				1730	return false;
				1731
				1732	return true;
				1733	}
				1734
				1735	/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
				1736	/// specifies a shuffle of elements that is suitable for input to MOVHP{S\|D}
				1737	/// and MOVLHPS.
				1738	bool X86::isMOVHPMask(SDNode *N) {
				1739	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1740
				1741	unsigned NumElems = N->getNumOperands();
				1742	if (NumElems != 2 && NumElems != 4)
				1743	return false;
				1744
				1745	for (unsigned i = 0; i < NumElems/2; ++i)
				1746	if (!isUndefOrEqual(N->getOperand(i), i))
				1747	return false;
				1748
				1749	for (unsigned i = 0; i < NumElems/2; ++i) {
				1750	SDOperand Arg = N->getOperand(i + NumElems/2);
				1751	if (!isUndefOrEqual(Arg, i + NumElems))
				1752	return false;
				1753	}
				1754
				1755	return true;
				1756	}
				1757
				1758	/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
				1759	/// specifies a shuffle of elements that is suitable for input to UNPCKL.
				1760	bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts,
				1761	bool V2IsSplat = false) {
				1762	if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
				1763	return false;
				1764
				1765	for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
				1766	SDOperand BitI = Elts[i];
				1767	SDOperand BitI1 = Elts[i+1];
				1768	if (!isUndefOrEqual(BitI, j))
				1769	return false;
				1770	if (V2IsSplat) {
				1771	if (isUndefOrEqual(BitI1, NumElts))
				1772	return false;
				1773	} else {
				1774	if (!isUndefOrEqual(BitI1, j + NumElts))
				1775	return false;
				1776	}
				1777	}
				1778
				1779	return true;
				1780	}
				1781
				1782	bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) {
				1783	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1784	return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
				1785	}
				1786
				1787	/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
				1788	/// specifies a shuffle of elements that is suitable for input to UNPCKH.
				1789	bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts,
				1790	bool V2IsSplat = false) {
				1791	if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
				1792	return false;
				1793
				1794	for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
				1795	SDOperand BitI = Elts[i];
				1796	SDOperand BitI1 = Elts[i+1];
				1797	if (!isUndefOrEqual(BitI, j + NumElts/2))
				1798	return false;
				1799	if (V2IsSplat) {
				1800	if (isUndefOrEqual(BitI1, NumElts))
				1801	return false;
				1802	} else {
				1803	if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts))
				1804	return false;
				1805	}
				1806	}
				1807
				1808	return true;
				1809	}
				1810
				1811	bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) {
				1812	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1813	return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
				1814	}
				1815
				1816	/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
				1817	/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
				1818	/// <0, 0, 1, 1>
				1819	bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
				1820	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1821
				1822	unsigned NumElems = N->getNumOperands();
				1823	if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
				1824	return false;
				1825
				1826	for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
				1827	SDOperand BitI = N->getOperand(i);
				1828	SDOperand BitI1 = N->getOperand(i+1);
				1829
				1830	if (!isUndefOrEqual(BitI, j))
				1831	return false;
				1832	if (!isUndefOrEqual(BitI1, j))
				1833	return false;
				1834	}
				1835
				1836	return true;
				1837	}
				1838
				1839	/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
				1840	/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
				1841	/// <2, 2, 3, 3>
				1842	bool X86::isUNPCKH_v_undef_Mask(SDNode *N) {
				1843	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1844
				1845	unsigned NumElems = N->getNumOperands();
				1846	if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
				1847	return false;
				1848
				1849	for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
				1850	SDOperand BitI = N->getOperand(i);
				1851	SDOperand BitI1 = N->getOperand(i + 1);
				1852
				1853	if (!isUndefOrEqual(BitI, j))
				1854	return false;
				1855	if (!isUndefOrEqual(BitI1, j))
				1856	return false;
				1857	}
				1858
				1859	return true;
				1860	}
				1861
				1862	/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
				1863	/// specifies a shuffle of elements that is suitable for input to MOVSS,
				1864	/// MOVSD, and MOVD, i.e. setting the lowest element.
				1865	static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) {
				1866	if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
				1867	return false;
				1868
				1869	if (!isUndefOrEqual(Elts[0], NumElts))
				1870	return false;
				1871
				1872	for (unsigned i = 1; i < NumElts; ++i) {
				1873	if (!isUndefOrEqual(Elts[i], i))
				1874	return false;
				1875	}
				1876
				1877	return true;
				1878	}
				1879
				1880	bool X86::isMOVLMask(SDNode *N) {
				1881	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1882	return ::isMOVLMask(N->op_begin(), N->getNumOperands());
				1883	}
				1884
				1885	/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
				1886	/// of what x86 movss want. X86 movs requires the lowest element to be lowest
				1887	/// element of vector 2 and the other elements to come from vector 1 in order.
				1888	static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps,
				1889	bool V2IsSplat = false,
				1890	bool V2IsUndef = false) {
				1891	if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
				1892	return false;
				1893
				1894	if (!isUndefOrEqual(Ops[0], 0))
				1895	return false;
				1896
				1897	for (unsigned i = 1; i < NumOps; ++i) {
				1898	SDOperand Arg = Ops[i];
				1899	if (!(isUndefOrEqual(Arg, i+NumOps) \|\|
				1900	(V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) \|\|
				1901	(V2IsSplat && isUndefOrEqual(Arg, NumOps))))
				1902	return false;
				1903	}
				1904
				1905	return true;
				1906	}
				1907
				1908	static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false,
				1909	bool V2IsUndef = false) {
				1910	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1911	return isCommutedMOVL(N->op_begin(), N->getNumOperands(),
				1912	V2IsSplat, V2IsUndef);
				1913	}
				1914
				1915	/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
				1916	/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
				1917	bool X86::isMOVSHDUPMask(SDNode *N) {
				1918	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1919
				1920	if (N->getNumOperands() != 4)
				1921	return false;
				1922
				1923	// Expect 1, 1, 3, 3
				1924	for (unsigned i = 0; i < 2; ++i) {
				1925	SDOperand Arg = N->getOperand(i);
				1926	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1927	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1928	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1929	if (Val != 1) return false;
				1930	}
				1931
				1932	bool HasHi = false;
				1933	for (unsigned i = 2; i < 4; ++i) {
				1934	SDOperand Arg = N->getOperand(i);
				1935	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1936	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1937	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1938	if (Val != 3) return false;
				1939	HasHi = true;
				1940	}
				1941
				1942	// Don't use movshdup if it can be done with a shufps.
				1943	return HasHi;
				1944	}
				1945
				1946	/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
				1947	/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
				1948	bool X86::isMOVSLDUPMask(SDNode *N) {
				1949	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1950
				1951	if (N->getNumOperands() != 4)
				1952	return false;
				1953
				1954	// Expect 0, 0, 2, 2
				1955	for (unsigned i = 0; i < 2; ++i) {
				1956	SDOperand Arg = N->getOperand(i);
				1957	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1958	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1959	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1960	if (Val != 0) return false;
				1961	}
				1962
				1963	bool HasHi = false;
				1964	for (unsigned i = 2; i < 4; ++i) {
				1965	SDOperand Arg = N->getOperand(i);
				1966	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1967	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1968	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1969	if (Val != 2) return false;
				1970	HasHi = true;
				1971	}
				1972
				1973	// Don't use movshdup if it can be done with a shufps.
				1974	return HasHi;
				1975	}
				1976
				1977	/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand
				1978	/// specifies a identity operation on the LHS or RHS.
				1979	static bool isIdentityMask(SDNode *N, bool RHS = false) {
				1980	unsigned NumElems = N->getNumOperands();
				1981	for (unsigned i = 0; i < NumElems; ++i)
				1982	if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0)))
				1983	return false;
				1984	return true;
				1985	}
				1986
				1987	/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
				1988	/// a splat of a single element.
				1989	static bool isSplatMask(SDNode *N) {
				1990	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1991
				1992	// This is a splat operation if each element of the permute is the same, and
				1993	// if the value doesn't reference the second vector.
				1994	unsigned NumElems = N->getNumOperands();
				1995	SDOperand ElementBase;
				1996	unsigned i = 0;
				1997	for (; i != NumElems; ++i) {
				1998	SDOperand Elt = N->getOperand(i);
				1999	if (isa<ConstantSDNode>(Elt)) {
				2000	ElementBase = Elt;
				2001	break;
				2002	}
				2003	}
				2004
				2005	if (!ElementBase.Val)
				2006	return false;
				2007
				2008	for (; i != NumElems; ++i) {
				2009	SDOperand Arg = N->getOperand(i);
				2010	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2011	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2012	if (Arg != ElementBase) return false;
				2013	}
				2014
				2015	// Make sure it is a splat of the first vector operand.
				2016	return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems;
				2017	}
				2018
				2019	/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
				2020	/// a splat of a single element and it's a 2 or 4 element mask.
				2021	bool X86::isSplatMask(SDNode *N) {
				2022	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2023
				2024	// We can only splat 64-bit, and 32-bit quantities with a single instruction.
				2025	if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
				2026	return false;
				2027	return ::isSplatMask(N);
				2028	}
				2029
				2030	/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
				2031	/// specifies a splat of zero element.
				2032	bool X86::isSplatLoMask(SDNode *N) {
				2033	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2034
				2035	for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
				2036	if (!isUndefOrEqual(N->getOperand(i), 0))
				2037	return false;
				2038	return true;
				2039	}
				2040
				2041	/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
				2042	/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
				2043	/// instructions.
				2044	unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
				2045	unsigned NumOperands = N->getNumOperands();
				2046	unsigned Shift = (NumOperands == 4) ? 2 : 1;
				2047	unsigned Mask = 0;
				2048	for (unsigned i = 0; i < NumOperands; ++i) {
				2049	unsigned Val = 0;
				2050	SDOperand Arg = N->getOperand(NumOperands-i-1);
				2051	if (Arg.getOpcode() != ISD::UNDEF)
				2052	Val = cast<ConstantSDNode>(Arg)->getValue();
				2053	if (Val >= NumOperands) Val -= NumOperands;
				2054	Mask \|= Val;
				2055	if (i != NumOperands - 1)
				2056	Mask <<= Shift;
				2057	}
				2058
				2059	return Mask;
				2060	}
				2061
				2062	/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
				2063	/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
				2064	/// instructions.
				2065	unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
				2066	unsigned Mask = 0;
				2067	// 8 nodes, but we only care about the last 4.
				2068	for (unsigned i = 7; i >= 4; --i) {
				2069	unsigned Val = 0;
				2070	SDOperand Arg = N->getOperand(i);
				2071	if (Arg.getOpcode() != ISD::UNDEF)
				2072	Val = cast<ConstantSDNode>(Arg)->getValue();
				2073	Mask \|= (Val - 4);
				2074	if (i != 4)
				2075	Mask <<= 2;
				2076	}
				2077
				2078	return Mask;
				2079	}
				2080
				2081	/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
				2082	/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
				2083	/// instructions.
				2084	unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
				2085	unsigned Mask = 0;
				2086	// 8 nodes, but we only care about the first 4.
				2087	for (int i = 3; i >= 0; --i) {
				2088	unsigned Val = 0;
				2089	SDOperand Arg = N->getOperand(i);
				2090	if (Arg.getOpcode() != ISD::UNDEF)
				2091	Val = cast<ConstantSDNode>(Arg)->getValue();
				2092	Mask \|= Val;
				2093	if (i != 0)
				2094	Mask <<= 2;
				2095	}
				2096
				2097	return Mask;
				2098	}
				2099
				2100	/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
				2101	/// specifies a 8 element shuffle that can be broken into a pair of
				2102	/// PSHUFHW and PSHUFLW.
				2103	static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
				2104	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2105
				2106	if (N->getNumOperands() != 8)
				2107	return false;
				2108
				2109	// Lower quadword shuffled.
				2110	for (unsigned i = 0; i != 4; ++i) {
				2111	SDOperand Arg = N->getOperand(i);
				2112	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2113	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2114	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2115	if (Val > 4)
				2116	return false;
				2117	}
				2118
				2119	// Upper quadword shuffled.
				2120	for (unsigned i = 4; i != 8; ++i) {
				2121	SDOperand Arg = N->getOperand(i);
				2122	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2123	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2124	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2125	if (Val < 4 \|\| Val > 7)
				2126	return false;
				2127	}
				2128
				2129	return true;
				2130	}
				2131
				2132	/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
				2133	/// values in ther permute mask.
				2134	static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1,
				2135	SDOperand &V2, SDOperand &Mask,
				2136	SelectionDAG &DAG) {
				2137	MVT::ValueType VT = Op.getValueType();
				2138	MVT::ValueType MaskVT = Mask.getValueType();
				2139	MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT);
				2140	unsigned NumElems = Mask.getNumOperands();
				2141	SmallVector<SDOperand, 8> MaskVec;
				2142
				2143	for (unsigned i = 0; i != NumElems; ++i) {
				2144	SDOperand Arg = Mask.getOperand(i);
				2145	if (Arg.getOpcode() == ISD::UNDEF) {
				2146	MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
				2147	continue;
				2148	}
				2149	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2150	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2151	if (Val < NumElems)
				2152	MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
				2153	else
				2154	MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
				2155	}
				2156
				2157	std::swap(V1, V2);
				2158	Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2159	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2160	}
				2161
				2162	/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
				2163	/// match movhlps. The lower half elements should come from upper half of
				2164	/// V1 (and in order), and the upper half elements should come from the upper
				2165	/// half of V2 (and in order).
				2166	static bool ShouldXformToMOVHLPS(SDNode *Mask) {
				2167	unsigned NumElems = Mask->getNumOperands();
				2168	if (NumElems != 4)
				2169	return false;
				2170	for (unsigned i = 0, e = 2; i != e; ++i)
				2171	if (!isUndefOrEqual(Mask->getOperand(i), i+2))
				2172	return false;
				2173	for (unsigned i = 2; i != 4; ++i)
				2174	if (!isUndefOrEqual(Mask->getOperand(i), i+4))
				2175	return false;
				2176	return true;
				2177	}
				2178
				2179	/// isScalarLoadToVector - Returns true if the node is a scalar load that
				2180	/// is promoted to a vector.
				2181	static inline bool isScalarLoadToVector(SDNode *N) {
				2182	if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
				2183	N = N->getOperand(0).Val;
				2184	return ISD::isNON_EXTLoad(N);
				2185	}
				2186	return false;
				2187	}
				2188
				2189	/// ShouldXformToMOVLP{S\|D} - Return true if the node should be transformed to
				2190	/// match movlp{s\|d}. The lower half elements should come from lower half of
				2191	/// V1 (and in order), and the upper half elements should come from the upper
				2192	/// half of V2 (and in order). And since V1 will become the source of the
				2193	/// MOVLP, it must be either a vector load or a scalar load to vector.
				2194	static bool ShouldXformToMOVLP(SDNode V1, SDNode V2, SDNode *Mask) {
				2195	if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
				2196	return false;
				2197	// Is V2 is a vector load, don't do this transformation. We will try to use
				2198	// load folding shufps op.
				2199	if (ISD::isNON_EXTLoad(V2))
				2200	return false;
				2201
				2202	unsigned NumElems = Mask->getNumOperands();
				2203	if (NumElems != 2 && NumElems != 4)
				2204	return false;
				2205	for (unsigned i = 0, e = NumElems/2; i != e; ++i)
				2206	if (!isUndefOrEqual(Mask->getOperand(i), i))
				2207	return false;
				2208	for (unsigned i = NumElems/2; i != NumElems; ++i)
				2209	if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
				2210	return false;
				2211	return true;
				2212	}
				2213
				2214	/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
				2215	/// all the same.
				2216	static bool isSplatVector(SDNode *N) {
				2217	if (N->getOpcode() != ISD::BUILD_VECTOR)
				2218	return false;
				2219
				2220	SDOperand SplatValue = N->getOperand(0);
				2221	for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
				2222	if (N->getOperand(i) != SplatValue)
				2223	return false;
				2224	return true;
				2225	}
				2226
				2227	/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
				2228	/// to an undef.
				2229	static bool isUndefShuffle(SDNode *N) {
				2230	if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
				2231	return false;
				2232
				2233	SDOperand V1 = N->getOperand(0);
				2234	SDOperand V2 = N->getOperand(1);
				2235	SDOperand Mask = N->getOperand(2);
				2236	unsigned NumElems = Mask.getNumOperands();
				2237	for (unsigned i = 0; i != NumElems; ++i) {
				2238	SDOperand Arg = Mask.getOperand(i);
				2239	if (Arg.getOpcode() != ISD::UNDEF) {
				2240	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2241	if (Val < NumElems && V1.getOpcode() != ISD::UNDEF)
				2242	return false;
				2243	else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF)
				2244	return false;
				2245	}
				2246	}
				2247	return true;
				2248	}
				2249
				2250	/// isZeroNode - Returns true if Elt is a constant zero or a floating point
				2251	/// constant +0.0.
				2252	static inline bool isZeroNode(SDOperand Elt) {
				2253	return ((isa<ConstantSDNode>(Elt) &&
				2254	cast<ConstantSDNode>(Elt)->getValue() == 0) \|\|
				2255	(isa<ConstantFPSDNode>(Elt) &&
				2256	cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
				2257	}
				2258
				2259	/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
				2260	/// to an zero vector.
				2261	static bool isZeroShuffle(SDNode *N) {
				2262	if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
				2263	return false;
				2264
				2265	SDOperand V1 = N->getOperand(0);
				2266	SDOperand V2 = N->getOperand(1);
				2267	SDOperand Mask = N->getOperand(2);
				2268	unsigned NumElems = Mask.getNumOperands();
				2269	for (unsigned i = 0; i != NumElems; ++i) {
				2270	SDOperand Arg = Mask.getOperand(i);
				2271	if (Arg.getOpcode() != ISD::UNDEF) {
				2272	unsigned Idx = cast<ConstantSDNode>(Arg)->getValue();
				2273	if (Idx < NumElems) {
				2274	unsigned Opc = V1.Val->getOpcode();
				2275	if (Opc == ISD::UNDEF)
				2276	continue;
				2277	if (Opc != ISD::BUILD_VECTOR \|\|
				2278	!isZeroNode(V1.Val->getOperand(Idx)))
				2279	return false;
				2280	} else if (Idx >= NumElems) {
				2281	unsigned Opc = V2.Val->getOpcode();
				2282	if (Opc == ISD::UNDEF)
				2283	continue;
				2284	if (Opc != ISD::BUILD_VECTOR \|\|
				2285	!isZeroNode(V2.Val->getOperand(Idx - NumElems)))
				2286	return false;
				2287	}
				2288	}
				2289	}
				2290	return true;
				2291	}
				2292
				2293	/// getZeroVector - Returns a vector of specified type with all zero elements.
				2294	///
				2295	static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
				2296	assert(MVT::isVector(VT) && "Expected a vector type");
				2297	unsigned NumElems = MVT::getVectorNumElements(VT);
				2298	MVT::ValueType EVT = MVT::getVectorElementType(VT);
				2299	bool isFP = MVT::isFloatingPoint(EVT);
				2300	SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
				2301	SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero);
				2302	return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
				2303	}
				2304
				2305	/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
				2306	/// that point to V2 points to its first element.
				2307	static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
				2308	assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
				2309
				2310	bool Changed = false;
				2311	SmallVector<SDOperand, 8> MaskVec;
				2312	unsigned NumElems = Mask.getNumOperands();
				2313	for (unsigned i = 0; i != NumElems; ++i) {
				2314	SDOperand Arg = Mask.getOperand(i);
				2315	if (Arg.getOpcode() != ISD::UNDEF) {
				2316	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2317	if (Val > NumElems) {
				2318	Arg = DAG.getConstant(NumElems, Arg.getValueType());
				2319	Changed = true;
				2320	}
				2321	}
				2322	MaskVec.push_back(Arg);
				2323	}
				2324
				2325	if (Changed)
				2326	Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
				2327	&MaskVec[0], MaskVec.size());
				2328	return Mask;
				2329	}
				2330
				2331	/// getMOVLMask - Returns a vector_shuffle mask for an movs{s\|d}, movd
				2332	/// operation of specified width.
				2333	static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
				2334	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2335	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2336
				2337	SmallVector<SDOperand, 8> MaskVec;
				2338	MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
				2339	for (unsigned i = 1; i != NumElems; ++i)
				2340	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2341	return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2342	}
				2343
				2344	/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
				2345	/// of specified width.
				2346	static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
				2347	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2348	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2349	SmallVector<SDOperand, 8> MaskVec;
				2350	for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
				2351	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2352	MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
				2353	}
				2354	return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2355	}
				2356
				2357	/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
				2358	/// of specified width.
				2359	static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
				2360	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2361	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2362	unsigned Half = NumElems/2;
				2363	SmallVector<SDOperand, 8> MaskVec;
				2364	for (unsigned i = 0; i != Half; ++i) {
				2365	MaskVec.push_back(DAG.getConstant(i + Half, BaseVT));
				2366	MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
				2367	}
				2368	return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2369	}
				2370
				2371	/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
				2372	///
				2373	static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
				2374	SDOperand V1 = Op.getOperand(0);
				2375	SDOperand Mask = Op.getOperand(2);
				2376	MVT::ValueType VT = Op.getValueType();
				2377	unsigned NumElems = Mask.getNumOperands();
				2378	Mask = getUnpacklMask(NumElems, DAG);
				2379	while (NumElems != 4) {
				2380	V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
				2381	NumElems >>= 1;
				2382	}
				2383	V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
				2384
				2385	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				2386	Mask = getZeroVector(MaskVT, DAG);
				2387	SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
				2388	DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
				2389	return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
				2390	}
				2391
				2392	/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
				2393	/// vector of zero or undef vector.
				2394	static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
				2395	unsigned NumElems, unsigned Idx,
				2396	bool isZero, SelectionDAG &DAG) {
				2397	SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
				2398	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2399	MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
				2400	SDOperand Zero = DAG.getConstant(0, EVT);
				2401	SmallVector<SDOperand, 8> MaskVec(NumElems, Zero);
				2402	MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
				2403	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2404	&MaskVec[0], MaskVec.size());
				2405	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2406	}
				2407
				2408	/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
				2409	///
				2410	static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
				2411	unsigned NumNonZero, unsigned NumZero,
				2412	SelectionDAG &DAG, TargetLowering &TLI) {
				2413	if (NumNonZero > 8)
				2414	return SDOperand();
				2415
				2416	SDOperand V(0, 0);
				2417	bool First = true;
				2418	for (unsigned i = 0; i < 16; ++i) {
				2419	bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
				2420	if (ThisIsNonZero && First) {
				2421	if (NumZero)
				2422	V = getZeroVector(MVT::v8i16, DAG);
				2423	else
				2424	V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
				2425	First = false;
				2426	}
				2427
				2428	if ((i & 1) != 0) {
				2429	SDOperand ThisElt(0, 0), LastElt(0, 0);
				2430	bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
				2431	if (LastIsNonZero) {
				2432	LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
				2433	}
				2434	if (ThisIsNonZero) {
				2435	ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
				2436	ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
				2437	ThisElt, DAG.getConstant(8, MVT::i8));
				2438	if (LastIsNonZero)
				2439	ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
				2440	} else
				2441	ThisElt = LastElt;
				2442
				2443	if (ThisElt.Val)
				2444	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
				2445	DAG.getConstant(i/2, TLI.getPointerTy()));
				2446	}
				2447	}
				2448
				2449	return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
				2450	}
				2451
				2452	/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
				2453	///
				2454	static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
				2455	unsigned NumNonZero, unsigned NumZero,
				2456	SelectionDAG &DAG, TargetLowering &TLI) {
				2457	if (NumNonZero > 4)
				2458	return SDOperand();
				2459
				2460	SDOperand V(0, 0);
				2461	bool First = true;
				2462	for (unsigned i = 0; i < 8; ++i) {
				2463	bool isNonZero = (NonZeros & (1 << i)) != 0;
				2464	if (isNonZero) {
				2465	if (First) {
				2466	if (NumZero)
				2467	V = getZeroVector(MVT::v8i16, DAG);
				2468	else
				2469	V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
				2470	First = false;
				2471	}
				2472	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
				2473	DAG.getConstant(i, TLI.getPointerTy()));
				2474	}
				2475	}
				2476
				2477	return V;
				2478	}
				2479
				2480	SDOperand
				2481	X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				2482	// All zero's are handled with pxor.
				2483	if (ISD::isBuildVectorAllZeros(Op.Val))
				2484	return Op;
				2485
				2486	// All one's are handled with pcmpeqd.
				2487	if (ISD::isBuildVectorAllOnes(Op.Val))
				2488	return Op;
				2489
				2490	MVT::ValueType VT = Op.getValueType();
				2491	MVT::ValueType EVT = MVT::getVectorElementType(VT);
				2492	unsigned EVTBits = MVT::getSizeInBits(EVT);
				2493
				2494	unsigned NumElems = Op.getNumOperands();
				2495	unsigned NumZero = 0;
				2496	unsigned NumNonZero = 0;
				2497	unsigned NonZeros = 0;
Dan Gohman	2146324	2007-07-24 22:55:08 +0000	[diff] [blame]	2498	unsigned NumNonZeroImms = 0;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2499	std::set<SDOperand> Values;
				2500	for (unsigned i = 0; i < NumElems; ++i) {
				2501	SDOperand Elt = Op.getOperand(i);
				2502	if (Elt.getOpcode() != ISD::UNDEF) {
				2503	Values.insert(Elt);
				2504	if (isZeroNode(Elt))
				2505	NumZero++;
				2506	else {
				2507	NonZeros \|= (1 << i);
				2508	NumNonZero++;
Dan Gohman	2146324	2007-07-24 22:55:08 +0000	[diff] [blame]	2509	if (Elt.getOpcode() == ISD::Constant \|\|
				2510	Elt.getOpcode() == ISD::ConstantFP)
				2511	NumNonZeroImms++;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2512	}
				2513	}
				2514	}
				2515
				2516	if (NumNonZero == 0) {
				2517	if (NumZero == 0)
				2518	// All undef vector. Return an UNDEF.
				2519	return DAG.getNode(ISD::UNDEF, VT);
				2520	else
				2521	// A mix of zero and undef. Return a zero vector.
				2522	return getZeroVector(VT, DAG);
				2523	}
				2524
				2525	// Splat is obviously ok. Let legalizer expand it to a shuffle.
				2526	if (Values.size() == 1)
				2527	return SDOperand();
				2528
				2529	// Special case for single non-zero element.
				2530	if (NumNonZero == 1) {
				2531	unsigned Idx = CountTrailingZeros_32(NonZeros);
				2532	SDOperand Item = Op.getOperand(Idx);
				2533	Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
				2534	if (Idx == 0)
				2535	// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
				2536	return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
				2537	NumZero > 0, DAG);
				2538
				2539	if (EVTBits == 32) {
				2540	// Turn it into a shuffle of zero and zero-extended scalar to vector.
				2541	Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
				2542	DAG);
				2543	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2544	MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
				2545	SmallVector<SDOperand, 8> MaskVec;
				2546	for (unsigned i = 0; i < NumElems; i++)
				2547	MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
				2548	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2549	&MaskVec[0], MaskVec.size());
				2550	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
				2551	DAG.getNode(ISD::UNDEF, VT), Mask);
				2552	}
				2553	}
				2554
Dan Gohman	2146324	2007-07-24 22:55:08 +0000	[diff] [blame]	2555	// A vector full of immediates; various special cases are already
				2556	// handled, so this is best done with a single constant-pool load.
				2557	if (NumNonZero == NumNonZeroImms)
				2558	return SDOperand();
				2559
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2560	// Let legalizer expand 2-wide build_vectors.
				2561	if (EVTBits == 64)
				2562	return SDOperand();
				2563
				2564	// If element VT is < 32 bits, convert it to inserts into a zero vector.
				2565	if (EVTBits == 8 && NumElems == 16) {
				2566	SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
				2567	*this);
				2568	if (V.Val) return V;
				2569	}
				2570
				2571	if (EVTBits == 16 && NumElems == 8) {
				2572	SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
				2573	*this);
				2574	if (V.Val) return V;
				2575	}
				2576
				2577	// If element VT is == 32 bits, turn it into a number of shuffles.
				2578	SmallVector<SDOperand, 8> V;
				2579	V.resize(NumElems);
				2580	if (NumElems == 4 && NumZero > 0) {
				2581	for (unsigned i = 0; i < 4; ++i) {
				2582	bool isZero = !(NonZeros & (1 << i));
				2583	if (isZero)
				2584	V[i] = getZeroVector(VT, DAG);
				2585	else
				2586	V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
				2587	}
				2588
				2589	for (unsigned i = 0; i < 2; ++i) {
				2590	switch ((NonZeros & (0x3 << i2)) >> (i2)) {
				2591	default: break;
				2592	case 0:
				2593	V[i] = V[i*2]; // Must be a zero vector.
				2594	break;
				2595	case 1:
				2596	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i2+1], V[i2],
				2597	getMOVLMask(NumElems, DAG));
				2598	break;
				2599	case 2:
				2600	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i2], V[i2+1],
				2601	getMOVLMask(NumElems, DAG));
				2602	break;
				2603	case 3:
				2604	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i2], V[i2+1],
				2605	getUnpacklMask(NumElems, DAG));
				2606	break;
				2607	}
				2608	}
				2609
				2610	// Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
				2611	// clears the upper bits.
				2612	// FIXME: we can do the same for v4f32 case when we know both parts of
				2613	// the lower half come from scalar_to_vector (loadf32). We should do
				2614	// that in post legalizer dag combiner with target specific hooks.
				2615	if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
				2616	return V[0];
				2617	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2618	MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
				2619	SmallVector<SDOperand, 8> MaskVec;
				2620	bool Reverse = (NonZeros & 0x3) == 2;
				2621	for (unsigned i = 0; i < 2; ++i)
				2622	if (Reverse)
				2623	MaskVec.push_back(DAG.getConstant(1-i, EVT));
				2624	else
				2625	MaskVec.push_back(DAG.getConstant(i, EVT));
				2626	Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
				2627	for (unsigned i = 0; i < 2; ++i)
				2628	if (Reverse)
				2629	MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
				2630	else
				2631	MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
				2632	SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2633	&MaskVec[0], MaskVec.size());
				2634	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
				2635	}
				2636
				2637	if (Values.size() > 2) {
				2638	// Expand into a number of unpckl*.
				2639	// e.g. for v4f32
				2640	// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
				2641	// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
				2642	// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
				2643	SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
				2644	for (unsigned i = 0; i < NumElems; ++i)
				2645	V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
				2646	NumElems >>= 1;
				2647	while (NumElems != 0) {
				2648	for (unsigned i = 0; i < NumElems; ++i)
				2649	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
				2650	UnpckMask);
				2651	NumElems >>= 1;
				2652	}
				2653	return V[0];
				2654	}
				2655
				2656	return SDOperand();
				2657	}
				2658
				2659	SDOperand
				2660	X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
				2661	SDOperand V1 = Op.getOperand(0);
				2662	SDOperand V2 = Op.getOperand(1);
				2663	SDOperand PermMask = Op.getOperand(2);
				2664	MVT::ValueType VT = Op.getValueType();
				2665	unsigned NumElems = PermMask.getNumOperands();
				2666	bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
				2667	bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
				2668	bool V1IsSplat = false;
				2669	bool V2IsSplat = false;
				2670
				2671	if (isUndefShuffle(Op.Val))
				2672	return DAG.getNode(ISD::UNDEF, VT);
				2673
				2674	if (isZeroShuffle(Op.Val))
				2675	return getZeroVector(VT, DAG);
				2676
				2677	if (isIdentityMask(PermMask.Val))
				2678	return V1;
				2679	else if (isIdentityMask(PermMask.Val, true))
				2680	return V2;
				2681
				2682	if (isSplatMask(PermMask.Val)) {
				2683	if (NumElems <= 4) return Op;
				2684	// Promote it to a v4i32 splat.
				2685	return PromoteSplat(Op, DAG);
				2686	}
				2687
				2688	if (X86::isMOVLMask(PermMask.Val))
				2689	return (V1IsUndef) ? V2 : Op;
				2690
				2691	if (X86::isMOVSHDUPMask(PermMask.Val) \|\|
				2692	X86::isMOVSLDUPMask(PermMask.Val) \|\|
				2693	X86::isMOVHLPSMask(PermMask.Val) \|\|
				2694	X86::isMOVHPMask(PermMask.Val) \|\|
				2695	X86::isMOVLPMask(PermMask.Val))
				2696	return Op;
				2697
				2698	if (ShouldXformToMOVHLPS(PermMask.Val) \|\|
				2699	ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
				2700	return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2701
				2702	bool Commuted = false;
				2703	V1IsSplat = isSplatVector(V1.Val);
				2704	V2IsSplat = isSplatVector(V2.Val);
				2705	if ((V1IsSplat \|\| V1IsUndef) && !(V2IsSplat \|\| V2IsUndef)) {
				2706	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2707	std::swap(V1IsSplat, V2IsSplat);
				2708	std::swap(V1IsUndef, V2IsUndef);
				2709	Commuted = true;
				2710	}
				2711
				2712	if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
				2713	if (V2IsUndef) return V1;
				2714	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2715	if (V2IsSplat) {
				2716	// V2 is a splat, so the mask may be malformed. That is, it may point
				2717	// to any V2 element. The instruction selectior won't like this. Get
				2718	// a corrected mask and commute to form a proper MOVS{S\|D}.
				2719	SDOperand NewMask = getMOVLMask(NumElems, DAG);
				2720	if (NewMask.Val != PermMask.Val)
				2721	Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
				2722	}
				2723	return Op;
				2724	}
				2725
				2726	if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) \|\|
				2727	X86::isUNPCKH_v_undef_Mask(PermMask.Val) \|\|
				2728	X86::isUNPCKLMask(PermMask.Val) \|\|
				2729	X86::isUNPCKHMask(PermMask.Val))
				2730	return Op;
				2731
				2732	if (V2IsSplat) {
				2733	// Normalize mask so all entries that point to V2 points to its first
				2734	// element then try to match unpck{h\|l} again. If match, return a
				2735	// new vector_shuffle with the corrected mask.
				2736	SDOperand NewMask = NormalizeMask(PermMask, DAG);
				2737	if (NewMask.Val != PermMask.Val) {
				2738	if (X86::isUNPCKLMask(PermMask.Val, true)) {
				2739	SDOperand NewMask = getUnpacklMask(NumElems, DAG);
				2740	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
				2741	} else if (X86::isUNPCKHMask(PermMask.Val, true)) {
				2742	SDOperand NewMask = getUnpackhMask(NumElems, DAG);
				2743	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
				2744	}
				2745	}
				2746	}
				2747
				2748	// Normalize the node to match x86 shuffle ops if needed
				2749	if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val))
				2750	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2751
				2752	if (Commuted) {
				2753	// Commute is back and try unpck* again.
				2754	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2755	if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) \|\|
				2756	X86::isUNPCKH_v_undef_Mask(PermMask.Val) \|\|
				2757	X86::isUNPCKLMask(PermMask.Val) \|\|
				2758	X86::isUNPCKHMask(PermMask.Val))
				2759	return Op;
				2760	}
				2761
				2762	// If VT is integer, try PSHUF* first, then SHUFP*.
				2763	if (MVT::isInteger(VT)) {
				2764	if (X86::isPSHUFDMask(PermMask.Val) \|\|
				2765	X86::isPSHUFHWMask(PermMask.Val) \|\|
				2766	X86::isPSHUFLWMask(PermMask.Val)) {
				2767	if (V2.getOpcode() != ISD::UNDEF)
				2768	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
				2769	DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
				2770	return Op;
				2771	}
				2772
				2773	if (X86::isSHUFPMask(PermMask.Val) &&
				2774	MVT::getSizeInBits(VT) != 64) // Don't do this for MMX.
				2775	return Op;
				2776
				2777	// Handle v8i16 shuffle high / low shuffle node pair.
				2778	if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
				2779	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2780	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2781	SmallVector<SDOperand, 8> MaskVec;
				2782	for (unsigned i = 0; i != 4; ++i)
				2783	MaskVec.push_back(PermMask.getOperand(i));
				2784	for (unsigned i = 4; i != 8; ++i)
				2785	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2786	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2787	&MaskVec[0], MaskVec.size());
				2788	V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2789	MaskVec.clear();
				2790	for (unsigned i = 0; i != 4; ++i)
				2791	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2792	for (unsigned i = 4; i != 8; ++i)
				2793	MaskVec.push_back(PermMask.getOperand(i));
				2794	Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
				2795	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2796	}
				2797	} else {
				2798	// Floating point cases in the other order.
				2799	if (X86::isSHUFPMask(PermMask.Val))
				2800	return Op;
				2801	if (X86::isPSHUFDMask(PermMask.Val) \|\|
				2802	X86::isPSHUFHWMask(PermMask.Val) \|\|
				2803	X86::isPSHUFLWMask(PermMask.Val)) {
				2804	if (V2.getOpcode() != ISD::UNDEF)
				2805	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
				2806	DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
				2807	return Op;
				2808	}
				2809	}
				2810
				2811	if (NumElems == 4 &&
				2812	// Don't do this for MMX.
				2813	MVT::getSizeInBits(VT) != 64) {
				2814	MVT::ValueType MaskVT = PermMask.getValueType();
				2815	MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
				2816	SmallVector<std::pair<int, int>, 8> Locs;
				2817	Locs.reserve(NumElems);
				2818	SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2819	SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2820	unsigned NumHi = 0;
				2821	unsigned NumLo = 0;
				2822	// If no more than two elements come from either vector. This can be
				2823	// implemented with two shuffles. First shuffle gather the elements.
				2824	// The second shuffle, which takes the first shuffle as both of its
				2825	// vector operands, put the elements into the right order.
				2826	for (unsigned i = 0; i != NumElems; ++i) {
				2827	SDOperand Elt = PermMask.getOperand(i);
				2828	if (Elt.getOpcode() == ISD::UNDEF) {
				2829	Locs[i] = std::make_pair(-1, -1);
				2830	} else {
				2831	unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
				2832	if (Val < NumElems) {
				2833	Locs[i] = std::make_pair(0, NumLo);
				2834	Mask1[NumLo] = Elt;
				2835	NumLo++;
				2836	} else {
				2837	Locs[i] = std::make_pair(1, NumHi);
				2838	if (2+NumHi < NumElems)
				2839	Mask1[2+NumHi] = Elt;
				2840	NumHi++;
				2841	}
				2842	}
				2843	}
				2844	if (NumLo <= 2 && NumHi <= 2) {
				2845	V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
				2846	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2847	&Mask1[0], Mask1.size()));
				2848	for (unsigned i = 0; i != NumElems; ++i) {
				2849	if (Locs[i].first == -1)
				2850	continue;
				2851	else {
				2852	unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
				2853	Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
				2854	Mask2[i] = DAG.getConstant(Idx, MaskEVT);
				2855	}
				2856	}
				2857
				2858	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
				2859	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2860	&Mask2[0], Mask2.size()));
				2861	}
				2862
				2863	// Break it into (shuffle shuffle_hi, shuffle_lo).
				2864	Locs.clear();
				2865	SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2866	SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2867	SmallVector<SDOperand,8> *MaskPtr = &LoMask;
				2868	unsigned MaskIdx = 0;
				2869	unsigned LoIdx = 0;
				2870	unsigned HiIdx = NumElems/2;
				2871	for (unsigned i = 0; i != NumElems; ++i) {
				2872	if (i == NumElems/2) {
				2873	MaskPtr = &HiMask;
				2874	MaskIdx = 1;
				2875	LoIdx = 0;
				2876	HiIdx = NumElems/2;
				2877	}
				2878	SDOperand Elt = PermMask.getOperand(i);
				2879	if (Elt.getOpcode() == ISD::UNDEF) {
				2880	Locs[i] = std::make_pair(-1, -1);
				2881	} else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
				2882	Locs[i] = std::make_pair(MaskIdx, LoIdx);
				2883	(*MaskPtr)[LoIdx] = Elt;
				2884	LoIdx++;
				2885	} else {
				2886	Locs[i] = std::make_pair(MaskIdx, HiIdx);
				2887	(*MaskPtr)[HiIdx] = Elt;
				2888	HiIdx++;
				2889	}
				2890	}
				2891
				2892	SDOperand LoShuffle =
				2893	DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
				2894	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2895	&LoMask[0], LoMask.size()));
				2896	SDOperand HiShuffle =
				2897	DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
				2898	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2899	&HiMask[0], HiMask.size()));
				2900	SmallVector<SDOperand, 8> MaskOps;
				2901	for (unsigned i = 0; i != NumElems; ++i) {
				2902	if (Locs[i].first == -1) {
				2903	MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
				2904	} else {
				2905	unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
				2906	MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
				2907	}
				2908	}
				2909	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
				2910	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2911	&MaskOps[0], MaskOps.size()));
				2912	}
				2913
				2914	return SDOperand();
				2915	}
				2916
				2917	SDOperand
				2918	X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2919	if (!isa<ConstantSDNode>(Op.getOperand(1)))
				2920	return SDOperand();
				2921
				2922	MVT::ValueType VT = Op.getValueType();
				2923	// TODO: handle v16i8.
				2924	if (MVT::getSizeInBits(VT) == 16) {
				2925	// Transform it so it match pextrw which produces a 32-bit result.
				2926	MVT::ValueType EVT = (MVT::ValueType)(VT+1);
				2927	SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
				2928	Op.getOperand(0), Op.getOperand(1));
				2929	SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract,
				2930	DAG.getValueType(VT));
				2931	return DAG.getNode(ISD::TRUNCATE, VT, Assert);
				2932	} else if (MVT::getSizeInBits(VT) == 32) {
				2933	SDOperand Vec = Op.getOperand(0);
				2934	unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				2935	if (Idx == 0)
				2936	return Op;
				2937	// SHUFPS the element to the lowest double word, then movss.
				2938	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				2939	SmallVector<SDOperand, 8> IdxVec;
				2940	IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT)));
				2941	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				2942	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				2943	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				2944	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2945	&IdxVec[0], IdxVec.size());
				2946	Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
				2947	Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
				2948	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
				2949	DAG.getConstant(0, getPointerTy()));
				2950	} else if (MVT::getSizeInBits(VT) == 64) {
				2951	SDOperand Vec = Op.getOperand(0);
				2952	unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				2953	if (Idx == 0)
				2954	return Op;
				2955
				2956	// UNPCKHPD the element to the lowest double word, then movsd.
				2957	// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
				2958	// to a f64mem, the whole operation is folded into a single MOVHPDmr.
				2959	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				2960	SmallVector<SDOperand, 8> IdxVec;
				2961	IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT)));
				2962	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				2963	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2964	&IdxVec[0], IdxVec.size());
				2965	Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
				2966	Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
				2967	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
				2968	DAG.getConstant(0, getPointerTy()));
				2969	}
				2970
				2971	return SDOperand();
				2972	}
				2973
				2974	SDOperand
				2975	X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2976	// Transform it so it match pinsrw which expects a 16-bit value in a GR32
				2977	// as its second argument.
				2978	MVT::ValueType VT = Op.getValueType();
				2979	MVT::ValueType BaseVT = MVT::getVectorElementType(VT);
				2980	SDOperand N0 = Op.getOperand(0);
				2981	SDOperand N1 = Op.getOperand(1);
				2982	SDOperand N2 = Op.getOperand(2);
				2983	if (MVT::getSizeInBits(BaseVT) == 16) {
				2984	if (N1.getValueType() != MVT::i32)
				2985	N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
				2986	if (N2.getValueType() != MVT::i32)
				2987	N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy());
				2988	return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
				2989	} else if (MVT::getSizeInBits(BaseVT) == 32) {
				2990	unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
				2991	if (Idx == 0) {
				2992	// Use a movss.
				2993	N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
				2994	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				2995	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2996	SmallVector<SDOperand, 8> MaskVec;
				2997	MaskVec.push_back(DAG.getConstant(4, BaseVT));
				2998	for (unsigned i = 1; i <= 3; ++i)
				2999	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				3000	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
				3001	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				3002	&MaskVec[0], MaskVec.size()));
				3003	} else {
				3004	// Use two pinsrw instructions to insert a 32 bit value.
				3005	Idx <<= 1;
				3006	if (MVT::isFloatingPoint(N1.getValueType())) {
				3007	if (ISD::isNON_EXTLoad(N1.Val)) {
				3008	// Just load directly from f32mem to GR32.
				3009	LoadSDNode *LD = cast<LoadSDNode>(N1);
				3010	N1 = DAG.getLoad(MVT::i32, LD->getChain(), LD->getBasePtr(),
				3011	LD->getSrcValue(), LD->getSrcValueOffset());
				3012	} else {
				3013	N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
				3014	N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
				3015	N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
				3016	DAG.getConstant(0, getPointerTy()));
				3017	}
				3018	}
				3019	N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
				3020	N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
				3021	DAG.getConstant(Idx, getPointerTy()));
				3022	N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
				3023	N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
				3024	DAG.getConstant(Idx+1, getPointerTy()));
				3025	return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
				3026	}
				3027	}
				3028
				3029	return SDOperand();
				3030	}
				3031
				3032	SDOperand
				3033	X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				3034	SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
				3035	return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
				3036	}
				3037
				3038	// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
				3039	// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
				3040	// one of the above mentioned nodes. It has to be wrapped because otherwise
				3041	// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
				3042	// be used to form addressing mode. These wrapped nodes will be selected
				3043	// into MOV32ri.
				3044	SDOperand
				3045	X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
				3046	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
				3047	SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(),
				3048	getPointerTy(),
				3049	CP->getAlignment());
				3050	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3051	// With PIC, the address is actually $g + Offset.
				3052	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3053	!Subtarget->isPICStyleRIPRel()) {
				3054	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3055	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3056	Result);
				3057	}
				3058
				3059	return Result;
				3060	}
				3061
				3062	SDOperand
				3063	X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
				3064	GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
				3065	SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
				3066	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3067	// With PIC, the address is actually $g + Offset.
				3068	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3069	!Subtarget->isPICStyleRIPRel()) {
				3070	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3071	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3072	Result);
				3073	}
				3074
				3075	// For Darwin & Mingw32, external and weak symbols are indirect, so we want to
				3076	// load the value at address GV, not the value of GV itself. This means that
				3077	// the GlobalAddress must be in the base or index register of the address, not
				3078	// the GV offset field. Platform check is inside GVRequiresExtraLoad() call
				3079	// The same applies for external symbols during PIC codegen
				3080	if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
				3081	Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);
				3082
				3083	return Result;
				3084	}
				3085
				3086	// Lower ISD::GlobalTLSAddress using the "general dynamic" model
				3087	static SDOperand
				3088	LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
				3089	const MVT::ValueType PtrVT) {
				3090	SDOperand InFlag;
				3091	SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX,
				3092	DAG.getNode(X86ISD::GlobalBaseReg,
				3093	PtrVT), InFlag);
				3094	InFlag = Chain.getValue(1);
				3095
				3096	// emit leal symbol@TLSGD(,%ebx,1), %eax
				3097	SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
				3098	SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
				3099	GA->getValueType(0),
				3100	GA->getOffset());
				3101	SDOperand Ops[] = { Chain, TGA, InFlag };
				3102	SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3);
				3103	InFlag = Result.getValue(2);
				3104	Chain = Result.getValue(1);
				3105
				3106	// call ___tls_get_addr. This function receives its argument in
				3107	// the register EAX.
				3108	Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag);
				3109	InFlag = Chain.getValue(1);
				3110
				3111	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				3112	SDOperand Ops1[] = { Chain,
				3113	DAG.getTargetExternalSymbol("___tls_get_addr",
				3114	PtrVT),
				3115	DAG.getRegister(X86::EAX, PtrVT),
				3116	DAG.getRegister(X86::EBX, PtrVT),
				3117	InFlag };
				3118	Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5);
				3119	InFlag = Chain.getValue(1);
				3120
				3121	return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag);
				3122	}
				3123
				3124	// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
				3125	// "local exec" model.
				3126	static SDOperand
				3127	LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
				3128	const MVT::ValueType PtrVT) {
				3129	// Get the Thread Pointer
				3130	SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT);
				3131	// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
				3132	// exec)
				3133	SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
				3134	GA->getValueType(0),
				3135	GA->getOffset());
				3136	SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA);
				3137
				3138	if (GA->getGlobal()->isDeclaration()) // initial exec TLS model
				3139	Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0);
				3140
				3141	// The address of the thread local variable is the add of the thread
				3142	// pointer with the offset of the variable.
				3143	return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset);
				3144	}
				3145
				3146	SDOperand
				3147	X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) {
				3148	// TODO: implement the "local dynamic" model
				3149	// TODO: implement the "initial exec"model for pic executables
				3150	assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() &&
				3151	"TLS not implemented for non-ELF and 64-bit targets");
				3152	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
				3153	// If the relocation model is PIC, use the "General Dynamic" TLS Model,
				3154	// otherwise use the "Local Exec"TLS Model
				3155	if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
				3156	return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy());
				3157	else
				3158	return LowerToTLSExecModel(GA, DAG, getPointerTy());
				3159	}
				3160
				3161	SDOperand
				3162	X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
				3163	const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
				3164	SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
				3165	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3166	// With PIC, the address is actually $g + Offset.
				3167	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3168	!Subtarget->isPICStyleRIPRel()) {
				3169	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3170	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3171	Result);
				3172	}
				3173
				3174	return Result;
				3175	}
				3176
				3177	SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
				3178	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
				3179	SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
				3180	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3181	// With PIC, the address is actually $g + Offset.
				3182	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3183	!Subtarget->isPICStyleRIPRel()) {
				3184	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3185	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3186	Result);
				3187	}
				3188
				3189	return Result;
				3190	}
				3191
				3192	SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
				3193	assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
				3194	"Not an i64 shift!");
				3195	bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
				3196	SDOperand ShOpLo = Op.getOperand(0);
				3197	SDOperand ShOpHi = Op.getOperand(1);
				3198	SDOperand ShAmt = Op.getOperand(2);
				3199	SDOperand Tmp1 = isSRA ?
				3200	DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
				3201	DAG.getConstant(0, MVT::i32);
				3202
				3203	SDOperand Tmp2, Tmp3;
				3204	if (Op.getOpcode() == ISD::SHL_PARTS) {
				3205	Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
				3206	Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
				3207	} else {
				3208	Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
				3209	Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
				3210	}
				3211
				3212	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3213	SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
				3214	DAG.getConstant(32, MVT::i8));
				3215	SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)};
				3216	SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1);
				3217
				3218	SDOperand Hi, Lo;
				3219	SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
				3220
				3221	VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
				3222	SmallVector<SDOperand, 4> Ops;
				3223	if (Op.getOpcode() == ISD::SHL_PARTS) {
				3224	Ops.push_back(Tmp2);
				3225	Ops.push_back(Tmp3);
				3226	Ops.push_back(CC);
				3227	Ops.push_back(InFlag);
				3228	Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3229	InFlag = Hi.getValue(1);
				3230
				3231	Ops.clear();
				3232	Ops.push_back(Tmp3);
				3233	Ops.push_back(Tmp1);
				3234	Ops.push_back(CC);
				3235	Ops.push_back(InFlag);
				3236	Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3237	} else {
				3238	Ops.push_back(Tmp2);
				3239	Ops.push_back(Tmp3);
				3240	Ops.push_back(CC);
				3241	Ops.push_back(InFlag);
				3242	Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3243	InFlag = Lo.getValue(1);
				3244
				3245	Ops.clear();
				3246	Ops.push_back(Tmp3);
				3247	Ops.push_back(Tmp1);
				3248	Ops.push_back(CC);
				3249	Ops.push_back(InFlag);
				3250	Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3251	}
				3252
				3253	VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
				3254	Ops.clear();
				3255	Ops.push_back(Lo);
				3256	Ops.push_back(Hi);
				3257	return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
				3258	}
				3259
				3260	SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
				3261	assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
				3262	Op.getOperand(0).getValueType() >= MVT::i16 &&
				3263	"Unknown SINT_TO_FP to lower!");
				3264
				3265	SDOperand Result;
				3266	MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
				3267	unsigned Size = MVT::getSizeInBits(SrcVT)/8;
				3268	MachineFunction &MF = DAG.getMachineFunction();
				3269	int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
				3270	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3271	SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
				3272	StackSlot, NULL, 0);
				3273
				3274	// Build the FILD
				3275	SDVTList Tys;
				3276	if (X86ScalarSSE)
				3277	Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
				3278	else
				3279	Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
				3280	SmallVector<SDOperand, 8> Ops;
				3281	Ops.push_back(Chain);
				3282	Ops.push_back(StackSlot);
				3283	Ops.push_back(DAG.getValueType(SrcVT));
				3284	Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
				3285	Tys, &Ops[0], Ops.size());
				3286
				3287	if (X86ScalarSSE) {
				3288	Chain = Result.getValue(1);
				3289	SDOperand InFlag = Result.getValue(2);
				3290
				3291	// FIXME: Currently the FST is flagged to the FILD_FLAG. This
				3292	// shouldn't be necessary except that RFP cannot be live across
				3293	// multiple blocks. When stackifier is fixed, they can be uncoupled.
				3294	MachineFunction &MF = DAG.getMachineFunction();
				3295	int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
				3296	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3297	Tys = DAG.getVTList(MVT::Other);
				3298	SmallVector<SDOperand, 8> Ops;
				3299	Ops.push_back(Chain);
				3300	Ops.push_back(Result);
				3301	Ops.push_back(StackSlot);
				3302	Ops.push_back(DAG.getValueType(Op.getValueType()));
				3303	Ops.push_back(InFlag);
				3304	Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
				3305	Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0);
				3306	}
				3307
				3308	return Result;
				3309	}
				3310
				3311	SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
				3312	assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
				3313	"Unknown FP_TO_SINT to lower!");
				3314	// We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
				3315	// stack slot.
				3316	MachineFunction &MF = DAG.getMachineFunction();
				3317	unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
				3318	int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
				3319	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3320
				3321	unsigned Opc;
				3322	switch (Op.getValueType()) {
				3323	default: assert(0 && "Invalid FP_TO_SINT to lower!");
				3324	case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
				3325	case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
				3326	case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
				3327	}
				3328
				3329	SDOperand Chain = DAG.getEntryNode();
				3330	SDOperand Value = Op.getOperand(0);
				3331	if (X86ScalarSSE) {
				3332	assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
				3333	Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
				3334	SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
				3335	SDOperand Ops[] = {
				3336	Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
				3337	};
				3338	Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
				3339	Chain = Value.getValue(1);
				3340	SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
				3341	StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3342	}
				3343
				3344	// Build the FP_TO_INT*_IN_MEM
				3345	SDOperand Ops[] = { Chain, Value, StackSlot };
				3346	SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);
				3347
				3348	// Load the result.
				3349	return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
				3350	}
				3351
				3352	SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
				3353	MVT::ValueType VT = Op.getValueType();
				3354	MVT::ValueType EltVT = VT;
				3355	if (MVT::isVector(VT))
				3356	EltVT = MVT::getVectorElementType(VT);
				3357	const Type *OpNTy = MVT::getTypeForValueType(EltVT);
				3358	std::vector<Constant*> CV;
				3359	if (EltVT == MVT::f64) {
				3360	Constant *C = ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)));
				3361	CV.push_back(C);
				3362	CV.push_back(C);
				3363	} else {
				3364	Constant *C = ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)));
				3365	CV.push_back(C);
				3366	CV.push_back(C);
				3367	CV.push_back(C);
				3368	CV.push_back(C);
				3369	}
Evan Cheng	75b3832	2007-07-27 01:37:47 +0000	[diff] [blame]	3370	Constant *CS = ConstantStruct::get(CV);
				3371	SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
				3372	SDVTList Tys = DAG.getVTList(VT, MVT::Other);
				3373	SmallVector<SDOperand, 3> Ops;
				3374	Ops.push_back(DAG.getEntryNode());
				3375	Ops.push_back(CPIdx);
				3376	Ops.push_back(DAG.getSrcValue(NULL));
				3377	SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3378	return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
				3379	}
				3380
				3381	SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
				3382	MVT::ValueType VT = Op.getValueType();
				3383	MVT::ValueType EltVT = VT;
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3384	unsigned EltNum = 1;
				3385	if (MVT::isVector(VT)) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3386	EltVT = MVT::getVectorElementType(VT);
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3387	EltNum = MVT::getVectorNumElements(VT);
				3388	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3389	const Type *OpNTy = MVT::getTypeForValueType(EltVT);
				3390	std::vector<Constant*> CV;
				3391	if (EltVT == MVT::f64) {
				3392	Constant *C = ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63));
				3393	CV.push_back(C);
				3394	CV.push_back(C);
				3395	} else {
				3396	Constant *C = ConstantFP::get(OpNTy, BitsToFloat(1U << 31));
				3397	CV.push_back(C);
				3398	CV.push_back(C);
				3399	CV.push_back(C);
				3400	CV.push_back(C);
				3401	}
Evan Cheng	75b3832	2007-07-27 01:37:47 +0000	[diff] [blame]	3402	Constant *CS = ConstantStruct::get(CV);
				3403	SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3404	if (MVT::isVector(VT)) {
Evan Cheng	75b3832	2007-07-27 01:37:47 +0000	[diff] [blame]	3405	SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0);
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3406	return DAG.getNode(ISD::BIT_CONVERT, VT,
				3407	DAG.getNode(ISD::XOR, MVT::v2i64,
				3408	DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)),
				3409	DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask)));
				3410	} else {
Evan Cheng	75b3832	2007-07-27 01:37:47 +0000	[diff] [blame]	3411	SDVTList Tys = DAG.getVTList(VT, MVT::Other);
				3412	SmallVector<SDOperand, 3> Ops;
				3413	Ops.push_back(DAG.getEntryNode());
				3414	Ops.push_back(CPIdx);
				3415	Ops.push_back(DAG.getSrcValue(NULL));
				3416	SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3417	return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
				3418	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3419	}
				3420
				3421	SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
				3422	SDOperand Op0 = Op.getOperand(0);
				3423	SDOperand Op1 = Op.getOperand(1);
				3424	MVT::ValueType VT = Op.getValueType();
				3425	MVT::ValueType SrcVT = Op1.getValueType();
				3426	const Type *SrcTy = MVT::getTypeForValueType(SrcVT);
				3427
				3428	// If second operand is smaller, extend it first.
				3429	if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) {
				3430	Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1);
				3431	SrcVT = VT;
				3432	}
				3433
				3434	// First get the sign bit of second operand.
				3435	std::vector<Constant*> CV;
				3436	if (SrcVT == MVT::f64) {
				3437	CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(1ULL << 63)));
				3438	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3439	} else {
				3440	CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(1U << 31)));
				3441	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3442	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3443	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3444	}
Evan Cheng	75b3832	2007-07-27 01:37:47 +0000	[diff] [blame]	3445	Constant *CS = ConstantStruct::get(CV);
				3446	SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
				3447	SDVTList Tys = DAG.getVTList(SrcVT, MVT::Other);
				3448	SmallVector<SDOperand, 3> Ops;
				3449	Ops.push_back(DAG.getEntryNode());
				3450	Ops.push_back(CPIdx);
				3451	Ops.push_back(DAG.getSrcValue(NULL));
				3452	SDOperand Mask1 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3453	SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
				3454
				3455	// Shift sign bit right or left if the two operands have different types.
				3456	if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
				3457	// Op0 is MVT::f32, Op1 is MVT::f64.
				3458	SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit);
				3459	SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit,
				3460	DAG.getConstant(32, MVT::i32));
				3461	SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
				3462	SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
				3463	DAG.getConstant(0, getPointerTy()));
				3464	}
				3465
				3466	// Clear first operand sign bit.
				3467	CV.clear();
				3468	if (VT == MVT::f64) {
				3469	CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(~(1ULL << 63))));
				3470	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3471	} else {
				3472	CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(~(1U << 31))));
				3473	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3474	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3475	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3476	}
Evan Cheng	75b3832	2007-07-27 01:37:47 +0000	[diff] [blame]	3477	CS = ConstantStruct::get(CV);
				3478	CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
				3479	Tys = DAG.getVTList(VT, MVT::Other);
				3480	Ops.clear();
				3481	Ops.push_back(DAG.getEntryNode());
				3482	Ops.push_back(CPIdx);
				3483	Ops.push_back(DAG.getSrcValue(NULL));
				3484	SDOperand Mask2 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3485	SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
				3486
				3487	// Or the value with the sign bit.
				3488	return DAG.getNode(X86ISD::FOR, VT, Val, SignBit);
				3489	}
				3490
				3491	SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG,
				3492	SDOperand Chain) {
				3493	assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
				3494	SDOperand Cond;
				3495	SDOperand Op0 = Op.getOperand(0);
				3496	SDOperand Op1 = Op.getOperand(1);
				3497	SDOperand CC = Op.getOperand(2);
				3498	ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
				3499	const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3500	const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
				3501	bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
				3502	unsigned X86CC;
				3503
				3504	if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
				3505	Op0, Op1, DAG)) {
				3506	SDOperand Ops1[] = { Chain, Op0, Op1 };
				3507	Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1);
				3508	SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
				3509	return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
				3510	}
				3511
				3512	assert(isFP && "Illegal integer SetCC!");
				3513
				3514	SDOperand COps[] = { Chain, Op0, Op1 };
				3515	Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1);
				3516
				3517	switch (SetCCOpcode) {
				3518	default: assert(false && "Illegal floating point SetCC!");
				3519	case ISD::SETOEQ: { // !PF & ZF
				3520	SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond };
				3521	SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
				3522	SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8),
				3523	Tmp1.getValue(1) };
				3524	SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
				3525	return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
				3526	}
				3527	case ISD::SETUNE: { // PF \| !ZF
				3528	SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond };
				3529	SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
				3530	SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8),
				3531	Tmp1.getValue(1) };
				3532	SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
				3533	return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
				3534	}
				3535	}
				3536	}
				3537
				3538	SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
				3539	bool addTest = true;
				3540	SDOperand Chain = DAG.getEntryNode();
				3541	SDOperand Cond = Op.getOperand(0);
				3542	SDOperand CC;
				3543	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3544
				3545	if (Cond.getOpcode() == ISD::SETCC)
				3546	Cond = LowerSETCC(Cond, DAG, Chain);
				3547
				3548	if (Cond.getOpcode() == X86ISD::SETCC) {
				3549	CC = Cond.getOperand(0);
				3550
				3551	// If condition flag is set by a X86ISD::CMP, then make a copy of it
				3552	// (since flag operand cannot be shared). Use it as the condition setting
				3553	// operand in place of the X86ISD::SETCC.
				3554	// If the X86ISD::SETCC has more than one use, then perhaps it's better
				3555	// to use a test instead of duplicating the X86ISD::CMP (for register
				3556	// pressure reason)?
				3557	SDOperand Cmp = Cond.getOperand(1);
				3558	unsigned Opc = Cmp.getOpcode();
				3559	bool IllegalFPCMov = !X86ScalarSSE &&
				3560	MVT::isFloatingPoint(Op.getValueType()) &&
				3561	!hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
				3562	if ((Opc == X86ISD::CMP \|\| Opc == X86ISD::COMI \|\| Opc == X86ISD::UCOMI) &&
				3563	!IllegalFPCMov) {
				3564	SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
				3565	Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
				3566	addTest = false;
				3567	}
				3568	}
				3569
				3570	if (addTest) {
				3571	CC = DAG.getConstant(X86::COND_NE, MVT::i8);
				3572	SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
				3573	Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
				3574	}
				3575
				3576	VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag);
				3577	SmallVector<SDOperand, 4> Ops;
				3578	// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
				3579	// condition is true.
				3580	Ops.push_back(Op.getOperand(2));
				3581	Ops.push_back(Op.getOperand(1));
				3582	Ops.push_back(CC);
				3583	Ops.push_back(Cond.getValue(1));
				3584	return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3585	}
				3586
				3587	SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
				3588	bool addTest = true;
				3589	SDOperand Chain = Op.getOperand(0);
				3590	SDOperand Cond = Op.getOperand(1);
				3591	SDOperand Dest = Op.getOperand(2);
				3592	SDOperand CC;
				3593	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3594
				3595	if (Cond.getOpcode() == ISD::SETCC)
				3596	Cond = LowerSETCC(Cond, DAG, Chain);
				3597
				3598	if (Cond.getOpcode() == X86ISD::SETCC) {
				3599	CC = Cond.getOperand(0);
				3600
				3601	// If condition flag is set by a X86ISD::CMP, then make a copy of it
				3602	// (since flag operand cannot be shared). Use it as the condition setting
				3603	// operand in place of the X86ISD::SETCC.
				3604	// If the X86ISD::SETCC has more than one use, then perhaps it's better
				3605	// to use a test instead of duplicating the X86ISD::CMP (for register
				3606	// pressure reason)?
				3607	SDOperand Cmp = Cond.getOperand(1);
				3608	unsigned Opc = Cmp.getOpcode();
				3609	if (Opc == X86ISD::CMP \|\| Opc == X86ISD::COMI \|\| Opc == X86ISD::UCOMI) {
				3610	SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
				3611	Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
				3612	addTest = false;
				3613	}
				3614	}
				3615
				3616	if (addTest) {
				3617	CC = DAG.getConstant(X86::COND_NE, MVT::i8);
				3618	SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
				3619	Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
				3620	}
				3621	return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
				3622	Cond, Op.getOperand(2), CC, Cond.getValue(1));
				3623	}
				3624
				3625	SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
				3626	unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				3627
				3628	if (Subtarget->is64Bit())
				3629	return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
				3630	else
				3631	switch (CallingConv) {
				3632	default:
				3633	assert(0 && "Unsupported calling convention");
				3634	case CallingConv::Fast:
				3635	// TODO: Implement fastcc
				3636	// Falls through
				3637	case CallingConv::C:
				3638	case CallingConv::X86_StdCall:
				3639	return LowerCCCCallTo(Op, DAG, CallingConv);
				3640	case CallingConv::X86_FastCall:
				3641	return LowerFastCCCallTo(Op, DAG, CallingConv);
				3642	}
				3643	}
				3644
				3645
				3646	// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
				3647	// Calls to _alloca is needed to probe the stack when allocating more than 4k
				3648	// bytes in one go. Touching the stack at 4K increments is necessary to ensure
				3649	// that the guard pages used by the OS virtual memory manager are allocated in
				3650	// correct sequence.
				3651	SDOperand
				3652	X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
				3653	SelectionDAG &DAG) {
				3654	assert(Subtarget->isTargetCygMing() &&
				3655	"This should be used only on Cygwin/Mingw targets");
				3656
				3657	// Get the inputs.
				3658	SDOperand Chain = Op.getOperand(0);
				3659	SDOperand Size = Op.getOperand(1);
				3660	// FIXME: Ensure alignment here
				3661
				3662	SDOperand Flag;
				3663
				3664	MVT::ValueType IntPtr = getPointerTy();
				3665	MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
				3666
				3667	Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag);
				3668	Flag = Chain.getValue(1);
				3669
				3670	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				3671	SDOperand Ops[] = { Chain,
				3672	DAG.getTargetExternalSymbol("_alloca", IntPtr),
				3673	DAG.getRegister(X86::EAX, IntPtr),
				3674	Flag };
				3675	Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4);
				3676	Flag = Chain.getValue(1);
				3677
				3678	Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1);
				3679
				3680	std::vector<MVT::ValueType> Tys;
				3681	Tys.push_back(SPTy);
				3682	Tys.push_back(MVT::Other);
				3683	SDOperand Ops1[2] = { Chain.getValue(0), Chain };
				3684	return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
				3685	}
				3686
				3687	SDOperand
				3688	X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
				3689	MachineFunction &MF = DAG.getMachineFunction();
				3690	const Function* Fn = MF.getFunction();
				3691	if (Fn->hasExternalLinkage() &&
				3692	Subtarget->isTargetCygMing() &&
				3693	Fn->getName() == "main")
				3694	MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true);
				3695
				3696	unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				3697	if (Subtarget->is64Bit())
				3698	return LowerX86_64CCCArguments(Op, DAG);
				3699	else
				3700	switch(CC) {
				3701	default:
				3702	assert(0 && "Unsupported calling convention");
				3703	case CallingConv::Fast:
				3704	// TODO: implement fastcc.
				3705
				3706	// Falls through
				3707	case CallingConv::C:
				3708	return LowerCCCArguments(Op, DAG);
				3709	case CallingConv::X86_StdCall:
				3710	MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall);
				3711	return LowerCCCArguments(Op, DAG, true);
				3712	case CallingConv::X86_FastCall:
				3713	MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall);
				3714	return LowerFastCCArguments(Op, DAG);
				3715	}
				3716	}
				3717
				3718	SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
				3719	SDOperand InFlag(0, 0);
				3720	SDOperand Chain = Op.getOperand(0);
				3721	unsigned Align =
				3722	(unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
				3723	if (Align == 0) Align = 1;
				3724
				3725	ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
				3726	// If not DWORD aligned, call memset if size is less than the threshold.
				3727	// It knows how to align to the right boundary first.
				3728	if ((Align & 3) != 0 \|\|
				3729	(I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
				3730	MVT::ValueType IntPtr = getPointerTy();
				3731	const Type *IntPtrTy = getTargetData()->getIntPtrType();
				3732	TargetLowering::ArgListTy Args;
				3733	TargetLowering::ArgListEntry Entry;
				3734	Entry.Node = Op.getOperand(1);
				3735	Entry.Ty = IntPtrTy;
				3736	Args.push_back(Entry);
				3737	// Extend the unsigned i8 argument to be an int value for the call.
				3738	Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
				3739	Entry.Ty = IntPtrTy;
				3740	Args.push_back(Entry);
				3741	Entry.Node = Op.getOperand(3);
				3742	Args.push_back(Entry);
				3743	std::pair<SDOperand,SDOperand> CallResult =
				3744	LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
				3745	DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
				3746	return CallResult.second;
				3747	}
				3748
				3749	MVT::ValueType AVT;
				3750	SDOperand Count;
				3751	ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
				3752	unsigned BytesLeft = 0;
				3753	bool TwoRepStos = false;
				3754	if (ValC) {
				3755	unsigned ValReg;
				3756	uint64_t Val = ValC->getValue() & 255;
				3757
				3758	// If the value is a constant, then we can potentially use larger sets.
				3759	switch (Align & 3) {
				3760	case 2: // WORD aligned
				3761	AVT = MVT::i16;
				3762	ValReg = X86::AX;
				3763	Val = (Val << 8) \| Val;
				3764	break;
				3765	case 0: // DWORD aligned
				3766	AVT = MVT::i32;
				3767	ValReg = X86::EAX;
				3768	Val = (Val << 8) \| Val;
				3769	Val = (Val << 16) \| Val;
				3770	if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned
				3771	AVT = MVT::i64;
				3772	ValReg = X86::RAX;
				3773	Val = (Val << 32) \| Val;
				3774	}
				3775	break;
				3776	default: // Byte aligned
				3777	AVT = MVT::i8;
				3778	ValReg = X86::AL;
				3779	Count = Op.getOperand(3);
				3780	break;
				3781	}
				3782
				3783	if (AVT > MVT::i8) {
				3784	if (I) {
				3785	unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
				3786	Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
				3787	BytesLeft = I->getValue() % UBytes;
				3788	} else {
				3789	assert(AVT >= MVT::i32 &&
				3790	"Do not use rep;stos if not at least DWORD aligned");
				3791	Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
				3792	Op.getOperand(3), DAG.getConstant(2, MVT::i8));
				3793	TwoRepStos = true;
				3794	}
				3795	}
				3796
				3797	Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
				3798	InFlag);
				3799	InFlag = Chain.getValue(1);
				3800	} else {
				3801	AVT = MVT::i8;
				3802	Count = Op.getOperand(3);
				3803	Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
				3804	InFlag = Chain.getValue(1);
				3805	}
				3806
				3807	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
				3808	Count, InFlag);
				3809	InFlag = Chain.getValue(1);
				3810	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
				3811	Op.getOperand(1), InFlag);
				3812	InFlag = Chain.getValue(1);
				3813
				3814	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				3815	SmallVector<SDOperand, 8> Ops;
				3816	Ops.push_back(Chain);
				3817	Ops.push_back(DAG.getValueType(AVT));
				3818	Ops.push_back(InFlag);
				3819	Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
				3820
				3821	if (TwoRepStos) {
				3822	InFlag = Chain.getValue(1);
				3823	Count = Op.getOperand(3);
				3824	MVT::ValueType CVT = Count.getValueType();
				3825	SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
				3826	DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
				3827	Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
				3828	Left, InFlag);
				3829	InFlag = Chain.getValue(1);
				3830	Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				3831	Ops.clear();
				3832	Ops.push_back(Chain);
				3833	Ops.push_back(DAG.getValueType(MVT::i8));
				3834	Ops.push_back(InFlag);
				3835	Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
				3836	} else if (BytesLeft) {
				3837	// Issue stores for the last 1 - 7 bytes.
				3838	SDOperand Value;
				3839	unsigned Val = ValC->getValue() & 255;
				3840	unsigned Offset = I->getValue() - BytesLeft;
				3841	SDOperand DstAddr = Op.getOperand(1);
				3842	MVT::ValueType AddrVT = DstAddr.getValueType();
				3843	if (BytesLeft >= 4) {
				3844	Val = (Val << 8) \| Val;
				3845	Val = (Val << 16) \| Val;
				3846	Value = DAG.getConstant(Val, MVT::i32);
				3847	Chain = DAG.getStore(Chain, Value,
				3848	DAG.getNode(ISD::ADD, AddrVT, DstAddr,
				3849	DAG.getConstant(Offset, AddrVT)),
				3850	NULL, 0);
				3851	BytesLeft -= 4;
				3852	Offset += 4;
				3853	}
				3854	if (BytesLeft >= 2) {
				3855	Value = DAG.getConstant((Val << 8) \| Val, MVT::i16);
				3856	Chain = DAG.getStore(Chain, Value,
				3857	DAG.getNode(ISD::ADD, AddrVT, DstAddr,
				3858	DAG.getConstant(Offset, AddrVT)),
				3859	NULL, 0);
				3860	BytesLeft -= 2;
				3861	Offset += 2;
				3862	}
				3863	if (BytesLeft == 1) {
				3864	Value = DAG.getConstant(Val, MVT::i8);
				3865	Chain = DAG.getStore(Chain, Value,
				3866	DAG.getNode(ISD::ADD, AddrVT, DstAddr,
				3867	DAG.getConstant(Offset, AddrVT)),
				3868	NULL, 0);
				3869	}
				3870	}
				3871
				3872	return Chain;
				3873	}
				3874
				3875	SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
				3876	SDOperand Chain = Op.getOperand(0);
				3877	unsigned Align =
				3878	(unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
				3879	if (Align == 0) Align = 1;
				3880
				3881	ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
				3882	// If not DWORD aligned, call memcpy if size is less than the threshold.
				3883	// It knows how to align to the right boundary first.
				3884	if ((Align & 3) != 0 \|\|
				3885	(I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
				3886	MVT::ValueType IntPtr = getPointerTy();
				3887	TargetLowering::ArgListTy Args;
				3888	TargetLowering::ArgListEntry Entry;
				3889	Entry.Ty = getTargetData()->getIntPtrType();
				3890	Entry.Node = Op.getOperand(1); Args.push_back(Entry);
				3891	Entry.Node = Op.getOperand(2); Args.push_back(Entry);
				3892	Entry.Node = Op.getOperand(3); Args.push_back(Entry);
				3893	std::pair<SDOperand,SDOperand> CallResult =
				3894	LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
				3895	DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
				3896	return CallResult.second;
				3897	}
				3898
				3899	MVT::ValueType AVT;
				3900	SDOperand Count;
				3901	unsigned BytesLeft = 0;
				3902	bool TwoRepMovs = false;
				3903	switch (Align & 3) {
				3904	case 2: // WORD aligned
				3905	AVT = MVT::i16;
				3906	break;
				3907	case 0: // DWORD aligned
				3908	AVT = MVT::i32;
				3909	if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned
				3910	AVT = MVT::i64;
				3911	break;
				3912	default: // Byte aligned
				3913	AVT = MVT::i8;
				3914	Count = Op.getOperand(3);
				3915	break;
				3916	}
				3917
				3918	if (AVT > MVT::i8) {
				3919	if (I) {
				3920	unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
				3921	Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
				3922	BytesLeft = I->getValue() % UBytes;
				3923	} else {
				3924	assert(AVT >= MVT::i32 &&
				3925	"Do not use rep;movs if not at least DWORD aligned");
				3926	Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
				3927	Op.getOperand(3), DAG.getConstant(2, MVT::i8));
				3928	TwoRepMovs = true;
				3929	}
				3930	}
				3931
				3932	SDOperand InFlag(0, 0);
				3933	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
				3934	Count, InFlag);
				3935	InFlag = Chain.getValue(1);
				3936	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
				3937	Op.getOperand(1), InFlag);
				3938	InFlag = Chain.getValue(1);
				3939	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
				3940	Op.getOperand(2), InFlag);
				3941	InFlag = Chain.getValue(1);
				3942
				3943	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				3944	SmallVector<SDOperand, 8> Ops;
				3945	Ops.push_back(Chain);
				3946	Ops.push_back(DAG.getValueType(AVT));
				3947	Ops.push_back(InFlag);
				3948	Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
				3949
				3950	if (TwoRepMovs) {
				3951	InFlag = Chain.getValue(1);
				3952	Count = Op.getOperand(3);
				3953	MVT::ValueType CVT = Count.getValueType();
				3954	SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
				3955	DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
				3956	Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
				3957	Left, InFlag);
				3958	InFlag = Chain.getValue(1);
				3959	Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				3960	Ops.clear();
				3961	Ops.push_back(Chain);
				3962	Ops.push_back(DAG.getValueType(MVT::i8));
				3963	Ops.push_back(InFlag);
				3964	Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
				3965	} else if (BytesLeft) {
				3966	// Issue loads and stores for the last 1 - 7 bytes.
				3967	unsigned Offset = I->getValue() - BytesLeft;
				3968	SDOperand DstAddr = Op.getOperand(1);
				3969	MVT::ValueType DstVT = DstAddr.getValueType();
				3970	SDOperand SrcAddr = Op.getOperand(2);
				3971	MVT::ValueType SrcVT = SrcAddr.getValueType();
				3972	SDOperand Value;
				3973	if (BytesLeft >= 4) {
				3974	Value = DAG.getLoad(MVT::i32, Chain,
				3975	DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
				3976	DAG.getConstant(Offset, SrcVT)),
				3977	NULL, 0);
				3978	Chain = Value.getValue(1);
				3979	Chain = DAG.getStore(Chain, Value,
				3980	DAG.getNode(ISD::ADD, DstVT, DstAddr,
				3981	DAG.getConstant(Offset, DstVT)),
				3982	NULL, 0);
				3983	BytesLeft -= 4;
				3984	Offset += 4;
				3985	}
				3986	if (BytesLeft >= 2) {
				3987	Value = DAG.getLoad(MVT::i16, Chain,
				3988	DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
				3989	DAG.getConstant(Offset, SrcVT)),
				3990	NULL, 0);
				3991	Chain = Value.getValue(1);
				3992	Chain = DAG.getStore(Chain, Value,
				3993	DAG.getNode(ISD::ADD, DstVT, DstAddr,
				3994	DAG.getConstant(Offset, DstVT)),
				3995	NULL, 0);
				3996	BytesLeft -= 2;
				3997	Offset += 2;
				3998	}
				3999
				4000	if (BytesLeft == 1) {
				4001	Value = DAG.getLoad(MVT::i8, Chain,
				4002	DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
				4003	DAG.getConstant(Offset, SrcVT)),
				4004	NULL, 0);
				4005	Chain = Value.getValue(1);
				4006	Chain = DAG.getStore(Chain, Value,
				4007	DAG.getNode(ISD::ADD, DstVT, DstAddr,
				4008	DAG.getConstant(Offset, DstVT)),
				4009	NULL, 0);
				4010	}
				4011	}
				4012
				4013	return Chain;
				4014	}
				4015
				4016	SDOperand
				4017	X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
				4018	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				4019	SDOperand TheOp = Op.getOperand(0);
				4020	SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1);
				4021	if (Subtarget->is64Bit()) {
				4022	SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
				4023	SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX,
				4024	MVT::i64, Copy1.getValue(2));
				4025	SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2,
				4026	DAG.getConstant(32, MVT::i8));
				4027	SDOperand Ops[] = {
				4028	DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1)
				4029	};
				4030
				4031	Tys = DAG.getVTList(MVT::i64, MVT::Other);
				4032	return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2);
				4033	}
				4034
				4035	SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
				4036	SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX,
				4037	MVT::i32, Copy1.getValue(2));
				4038	SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) };
				4039	Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
				4040	return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3);
				4041	}
				4042
				4043	SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
				4044	SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
				4045
				4046	if (!Subtarget->is64Bit()) {
				4047	// vastart just stores the address of the VarArgsFrameIndex slot into the
				4048	// memory location argument.
				4049	SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
				4050	return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(),
				4051	SV->getOffset());
				4052	}
				4053
				4054	// __va_list_tag:
				4055	// gp_offset (0 - 6 * 8)
				4056	// fp_offset (48 - 48 + 8 * 16)
				4057	// overflow_arg_area (point to parameters coming in memory).
				4058	// reg_save_area
				4059	SmallVector<SDOperand, 8> MemOps;
				4060	SDOperand FIN = Op.getOperand(1);
				4061	// Store gp_offset
				4062	SDOperand Store = DAG.getStore(Op.getOperand(0),
				4063	DAG.getConstant(VarArgsGPOffset, MVT::i32),
				4064	FIN, SV->getValue(), SV->getOffset());
				4065	MemOps.push_back(Store);
				4066
				4067	// Store fp_offset
				4068	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				4069	DAG.getConstant(4, getPointerTy()));
				4070	Store = DAG.getStore(Op.getOperand(0),
				4071	DAG.getConstant(VarArgsFPOffset, MVT::i32),
				4072	FIN, SV->getValue(), SV->getOffset());
				4073	MemOps.push_back(Store);
				4074
				4075	// Store ptr to overflow_arg_area
				4076	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				4077	DAG.getConstant(4, getPointerTy()));
				4078	SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
				4079	Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(),
				4080	SV->getOffset());
				4081	MemOps.push_back(Store);
				4082
				4083	// Store ptr to reg_save_area.
				4084	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				4085	DAG.getConstant(8, getPointerTy()));
				4086	SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
				4087	Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(),
				4088	SV->getOffset());
				4089	MemOps.push_back(Store);
				4090	return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
				4091	}
				4092
				4093	SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) {
				4094	// X86-64 va_list is a struct { i32, i32, i8, i8 }.
				4095	SDOperand Chain = Op.getOperand(0);
				4096	SDOperand DstPtr = Op.getOperand(1);
				4097	SDOperand SrcPtr = Op.getOperand(2);
				4098	SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3));
				4099	SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4));
				4100
				4101	SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr,
				4102	SrcSV->getValue(), SrcSV->getOffset());
				4103	Chain = SrcPtr.getValue(1);
				4104	for (unsigned i = 0; i < 3; ++i) {
				4105	SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr,
				4106	SrcSV->getValue(), SrcSV->getOffset());
				4107	Chain = Val.getValue(1);
				4108	Chain = DAG.getStore(Chain, Val, DstPtr,
				4109	DstSV->getValue(), DstSV->getOffset());
				4110	if (i == 2)
				4111	break;
				4112	SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr,
				4113	DAG.getConstant(8, getPointerTy()));
				4114	DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr,
				4115	DAG.getConstant(8, getPointerTy()));
				4116	}
				4117	return Chain;
				4118	}
				4119
				4120	SDOperand
				4121	X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
				4122	unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
				4123	switch (IntNo) {
				4124	default: return SDOperand(); // Don't custom lower most intrinsics.
				4125	// Comparison intrinsics.
				4126	case Intrinsic::x86_sse_comieq_ss:
				4127	case Intrinsic::x86_sse_comilt_ss:
				4128	case Intrinsic::x86_sse_comile_ss:
				4129	case Intrinsic::x86_sse_comigt_ss:
				4130	case Intrinsic::x86_sse_comige_ss:
				4131	case Intrinsic::x86_sse_comineq_ss:
				4132	case Intrinsic::x86_sse_ucomieq_ss:
				4133	case Intrinsic::x86_sse_ucomilt_ss:
				4134	case Intrinsic::x86_sse_ucomile_ss:
				4135	case Intrinsic::x86_sse_ucomigt_ss:
				4136	case Intrinsic::x86_sse_ucomige_ss:
				4137	case Intrinsic::x86_sse_ucomineq_ss:
				4138	case Intrinsic::x86_sse2_comieq_sd:
				4139	case Intrinsic::x86_sse2_comilt_sd:
				4140	case Intrinsic::x86_sse2_comile_sd:
				4141	case Intrinsic::x86_sse2_comigt_sd:
				4142	case Intrinsic::x86_sse2_comige_sd:
				4143	case Intrinsic::x86_sse2_comineq_sd:
				4144	case Intrinsic::x86_sse2_ucomieq_sd:
				4145	case Intrinsic::x86_sse2_ucomilt_sd:
				4146	case Intrinsic::x86_sse2_ucomile_sd:
				4147	case Intrinsic::x86_sse2_ucomigt_sd:
				4148	case Intrinsic::x86_sse2_ucomige_sd:
				4149	case Intrinsic::x86_sse2_ucomineq_sd: {
				4150	unsigned Opc = 0;
				4151	ISD::CondCode CC = ISD::SETCC_INVALID;
				4152	switch (IntNo) {
				4153	default: break;
				4154	case Intrinsic::x86_sse_comieq_ss:
				4155	case Intrinsic::x86_sse2_comieq_sd:
				4156	Opc = X86ISD::COMI;
				4157	CC = ISD::SETEQ;
				4158	break;
				4159	case Intrinsic::x86_sse_comilt_ss:
				4160	case Intrinsic::x86_sse2_comilt_sd:
				4161	Opc = X86ISD::COMI;
				4162	CC = ISD::SETLT;
				4163	break;
				4164	case Intrinsic::x86_sse_comile_ss:
				4165	case Intrinsic::x86_sse2_comile_sd:
				4166	Opc = X86ISD::COMI;
				4167	CC = ISD::SETLE;
				4168	break;
				4169	case Intrinsic::x86_sse_comigt_ss:
				4170	case Intrinsic::x86_sse2_comigt_sd:
				4171	Opc = X86ISD::COMI;
				4172	CC = ISD::SETGT;
				4173	break;
				4174	case Intrinsic::x86_sse_comige_ss:
				4175	case Intrinsic::x86_sse2_comige_sd:
				4176	Opc = X86ISD::COMI;
				4177	CC = ISD::SETGE;
				4178	break;
				4179	case Intrinsic::x86_sse_comineq_ss:
				4180	case Intrinsic::x86_sse2_comineq_sd:
				4181	Opc = X86ISD::COMI;
				4182	CC = ISD::SETNE;
				4183	break;
				4184	case Intrinsic::x86_sse_ucomieq_ss:
				4185	case Intrinsic::x86_sse2_ucomieq_sd:
				4186	Opc = X86ISD::UCOMI;
				4187	CC = ISD::SETEQ;
				4188	break;
				4189	case Intrinsic::x86_sse_ucomilt_ss:
				4190	case Intrinsic::x86_sse2_ucomilt_sd:
				4191	Opc = X86ISD::UCOMI;
				4192	CC = ISD::SETLT;
				4193	break;
				4194	case Intrinsic::x86_sse_ucomile_ss:
				4195	case Intrinsic::x86_sse2_ucomile_sd:
				4196	Opc = X86ISD::UCOMI;
				4197	CC = ISD::SETLE;
				4198	break;
				4199	case Intrinsic::x86_sse_ucomigt_ss:
				4200	case Intrinsic::x86_sse2_ucomigt_sd:
				4201	Opc = X86ISD::UCOMI;
				4202	CC = ISD::SETGT;
				4203	break;
				4204	case Intrinsic::x86_sse_ucomige_ss:
				4205	case Intrinsic::x86_sse2_ucomige_sd:
				4206	Opc = X86ISD::UCOMI;
				4207	CC = ISD::SETGE;
				4208	break;
				4209	case Intrinsic::x86_sse_ucomineq_ss:
				4210	case Intrinsic::x86_sse2_ucomineq_sd:
				4211	Opc = X86ISD::UCOMI;
				4212	CC = ISD::SETNE;
				4213	break;
				4214	}
				4215
				4216	unsigned X86CC;
				4217	SDOperand LHS = Op.getOperand(1);
				4218	SDOperand RHS = Op.getOperand(2);
				4219	translateX86CC(CC, true, X86CC, LHS, RHS, DAG);
				4220
				4221	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				4222	SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS };
				4223	SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3);
				4224	VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
				4225	SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
				4226	SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2);
				4227	return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
				4228	}
				4229	}
				4230	}
				4231
				4232	SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) {
				4233	// Depths > 0 not supported yet!
				4234	if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
				4235	return SDOperand();
				4236
				4237	// Just load the return address
				4238	SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
				4239	return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0);
				4240	}
				4241
				4242	SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) {
				4243	// Depths > 0 not supported yet!
				4244	if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
				4245	return SDOperand();
				4246
				4247	SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
				4248	return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
				4249	DAG.getConstant(4, getPointerTy()));
				4250	}
				4251
				4252	SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op,
				4253	SelectionDAG &DAG) {
				4254	// Is not yet supported on x86-64
				4255	if (Subtarget->is64Bit())
				4256	return SDOperand();
				4257
				4258	return DAG.getConstant(8, getPointerTy());
				4259	}
				4260
				4261	SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG)
				4262	{
				4263	assert(!Subtarget->is64Bit() &&
				4264	"Lowering of eh_return builtin is not supported yet on x86-64");
				4265
				4266	MachineFunction &MF = DAG.getMachineFunction();
				4267	SDOperand Chain = Op.getOperand(0);
				4268	SDOperand Offset = Op.getOperand(1);
				4269	SDOperand Handler = Op.getOperand(2);
				4270
				4271	SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF),
				4272	getPointerTy());
				4273
				4274	SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame,
				4275	DAG.getConstant(-4UL, getPointerTy()));
				4276	StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset);
				4277	Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0);
				4278	Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr);
				4279	MF.addLiveOut(X86::ECX);
				4280
				4281	return DAG.getNode(X86ISD::EH_RETURN, MVT::Other,
				4282	Chain, DAG.getRegister(X86::ECX, getPointerTy()));
				4283	}
				4284
				4285	/// LowerOperation - Provide custom lowering hooks for some operations.
				4286	///
				4287	SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
				4288	switch (Op.getOpcode()) {
				4289	default: assert(0 && "Should not custom lower this!");
				4290	case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
				4291	case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
				4292	case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
				4293	case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
				4294	case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
				4295	case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
				4296	case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
				4297	case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
				4298	case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
				4299	case ISD::SHL_PARTS:
				4300	case ISD::SRA_PARTS:
				4301	case ISD::SRL_PARTS: return LowerShift(Op, DAG);
				4302	case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
				4303	case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
				4304	case ISD::FABS: return LowerFABS(Op, DAG);
				4305	case ISD::FNEG: return LowerFNEG(Op, DAG);
				4306	case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
				4307	case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode());
				4308	case ISD::SELECT: return LowerSELECT(Op, DAG);
				4309	case ISD::BRCOND: return LowerBRCOND(Op, DAG);
				4310	case ISD::JumpTable: return LowerJumpTable(Op, DAG);
				4311	case ISD::CALL: return LowerCALL(Op, DAG);
				4312	case ISD::RET: return LowerRET(Op, DAG);
				4313	case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
				4314	case ISD::MEMSET: return LowerMEMSET(Op, DAG);
				4315	case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
				4316	case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG);
				4317	case ISD::VASTART: return LowerVASTART(Op, DAG);
				4318	case ISD::VACOPY: return LowerVACOPY(Op, DAG);
				4319	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
				4320	case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
				4321	case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
				4322	case ISD::FRAME_TO_ARGS_OFFSET:
				4323	return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
				4324	case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
				4325	case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
				4326	}
				4327	return SDOperand();
				4328	}
				4329
				4330	const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
				4331	switch (Opcode) {
				4332	default: return NULL;
				4333	case X86ISD::SHLD: return "X86ISD::SHLD";
				4334	case X86ISD::SHRD: return "X86ISD::SHRD";
				4335	case X86ISD::FAND: return "X86ISD::FAND";
				4336	case X86ISD::FOR: return "X86ISD::FOR";
				4337	case X86ISD::FXOR: return "X86ISD::FXOR";
				4338	case X86ISD::FSRL: return "X86ISD::FSRL";
				4339	case X86ISD::FILD: return "X86ISD::FILD";
				4340	case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
				4341	case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
				4342	case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
				4343	case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
				4344	case X86ISD::FLD: return "X86ISD::FLD";
				4345	case X86ISD::FST: return "X86ISD::FST";
				4346	case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT";
				4347	case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT";
				4348	case X86ISD::CALL: return "X86ISD::CALL";
				4349	case X86ISD::TAILCALL: return "X86ISD::TAILCALL";
				4350	case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
				4351	case X86ISD::CMP: return "X86ISD::CMP";
				4352	case X86ISD::COMI: return "X86ISD::COMI";
				4353	case X86ISD::UCOMI: return "X86ISD::UCOMI";
				4354	case X86ISD::SETCC: return "X86ISD::SETCC";
				4355	case X86ISD::CMOV: return "X86ISD::CMOV";
				4356	case X86ISD::BRCOND: return "X86ISD::BRCOND";
				4357	case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
				4358	case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
				4359	case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
Evan Cheng	75b3832	2007-07-27 01:37:47 +0000	[diff] [blame]	4360	case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK";
				4361	case X86ISD::LOAD_UA: return "X86ISD::LOAD_UA";
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4362	case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
				4363	case X86ISD::Wrapper: return "X86ISD::Wrapper";
				4364	case X86ISD::S2VEC: return "X86ISD::S2VEC";
				4365	case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
				4366	case X86ISD::PINSRW: return "X86ISD::PINSRW";
				4367	case X86ISD::FMAX: return "X86ISD::FMAX";
				4368	case X86ISD::FMIN: return "X86ISD::FMIN";
				4369	case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
				4370	case X86ISD::FRCP: return "X86ISD::FRCP";
				4371	case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
				4372	case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER";
				4373	case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
				4374	}
				4375	}
				4376
				4377	// isLegalAddressingMode - Return true if the addressing mode represented
				4378	// by AM is legal for this target, for a load/store of the specified type.
				4379	bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
				4380	const Type *Ty) const {
				4381	// X86 supports extremely general addressing modes.
				4382
				4383	// X86 allows a sign-extended 32-bit immediate field as a displacement.
				4384	if (AM.BaseOffs <= -(1LL << 32) \|\| AM.BaseOffs >= (1LL << 32)-1)
				4385	return false;
				4386
				4387	if (AM.BaseGV) {
				4388	// X86-64 only supports addr of globals in small code model.
				4389	if (Subtarget->is64Bit() &&
				4390	getTargetMachine().getCodeModel() != CodeModel::Small)
				4391	return false;
				4392
				4393	// We can only fold this if we don't need a load either.
				4394	if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false))
				4395	return false;
				4396	}
				4397
				4398	switch (AM.Scale) {
				4399	case 0:
				4400	case 1:
				4401	case 2:
				4402	case 4:
				4403	case 8:
				4404	// These scales always work.
				4405	break;
				4406	case 3:
				4407	case 5:
				4408	case 9:
				4409	// These scales are formed with basereg+scalereg. Only accept if there is
				4410	// no basereg yet.
				4411	if (AM.HasBaseReg)
				4412	return false;
				4413	break;
				4414	default: // Other stuff never works.
				4415	return false;
				4416	}
				4417
				4418	return true;
				4419	}
				4420
				4421
				4422	/// isShuffleMaskLegal - Targets can use this to indicate that they only
				4423	/// support some VECTOR_SHUFFLE operations, those with specific masks.
				4424	/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
				4425	/// are assumed to be legal.
				4426	bool
				4427	X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
				4428	// Only do shuffles on 128-bit vector types for now.
				4429	if (MVT::getSizeInBits(VT) == 64) return false;
				4430	return (Mask.Val->getNumOperands() <= 4 \|\|
				4431	isIdentityMask(Mask.Val) \|\|
				4432	isIdentityMask(Mask.Val, true) \|\|
				4433	isSplatMask(Mask.Val) \|\|
				4434	isPSHUFHW_PSHUFLWMask(Mask.Val) \|\|
				4435	X86::isUNPCKLMask(Mask.Val) \|\|
				4436	X86::isUNPCKHMask(Mask.Val) \|\|
				4437	X86::isUNPCKL_v_undef_Mask(Mask.Val) \|\|
				4438	X86::isUNPCKH_v_undef_Mask(Mask.Val));
				4439	}
				4440
				4441	bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
				4442	MVT::ValueType EVT,
				4443	SelectionDAG &DAG) const {
				4444	unsigned NumElts = BVOps.size();
				4445	// Only do shuffles on 128-bit vector types for now.
				4446	if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
				4447	if (NumElts == 2) return true;
				4448	if (NumElts == 4) {
				4449	return (isMOVLMask(&BVOps[0], 4) \|\|
				4450	isCommutedMOVL(&BVOps[0], 4, true) \|\|
				4451	isSHUFPMask(&BVOps[0], 4) \|\|
				4452	isCommutedSHUFP(&BVOps[0], 4));
				4453	}
				4454	return false;
				4455	}
				4456
				4457	//===----------------------------------------------------------------------===//
				4458	// X86 Scheduler Hooks
				4459	//===----------------------------------------------------------------------===//
				4460
				4461	MachineBasicBlock *
				4462	X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
				4463	MachineBasicBlock *BB) {
				4464	const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
				4465	switch (MI->getOpcode()) {
				4466	default: assert(false && "Unexpected instr type to insert");
				4467	case X86::CMOV_FR32:
				4468	case X86::CMOV_FR64:
				4469	case X86::CMOV_V4F32:
				4470	case X86::CMOV_V2F64:
				4471	case X86::CMOV_V2I64: {
				4472	// To "insert" a SELECT_CC instruction, we actually have to insert the
				4473	// diamond control-flow pattern. The incoming instruction knows the
				4474	// destination vreg to set, the condition code register to branch on, the
				4475	// true/false values to select between, and a branch opcode to use.
				4476	const BasicBlock *LLVM_BB = BB->getBasicBlock();
				4477	ilist<MachineBasicBlock>::iterator It = BB;
				4478	++It;
				4479
				4480	// thisMBB:
				4481	// ...
				4482	// TrueVal = ...
				4483	// cmpTY ccX, r1, r2
				4484	// bCC copy1MBB
				4485	// fallthrough --> copy0MBB
				4486	MachineBasicBlock *thisMBB = BB;
				4487	MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
				4488	MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
				4489	unsigned Opc =
				4490	X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
				4491	BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB);
				4492	MachineFunction *F = BB->getParent();
				4493	F->getBasicBlockList().insert(It, copy0MBB);
				4494	F->getBasicBlockList().insert(It, sinkMBB);
				4495	// Update machine-CFG edges by first adding all successors of the current
				4496	// block to the new block which will contain the Phi node for the select.
				4497	for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
				4498	e = BB->succ_end(); i != e; ++i)
				4499	sinkMBB->addSuccessor(*i);
				4500	// Next, remove all successors of the current block, and add the true
				4501	// and fallthrough blocks as its successors.
				4502	while(!BB->succ_empty())
				4503	BB->removeSuccessor(BB->succ_begin());
				4504	BB->addSuccessor(copy0MBB);
				4505	BB->addSuccessor(sinkMBB);
				4506
				4507	// copy0MBB:
				4508	// %FalseValue = ...
				4509	// # fallthrough to sinkMBB
				4510	BB = copy0MBB;
				4511
				4512	// Update machine-CFG edges
				4513	BB->addSuccessor(sinkMBB);
				4514
				4515	// sinkMBB:
				4516	// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
				4517	// ...
				4518	BB = sinkMBB;
				4519	BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg())
				4520	.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
				4521	.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
				4522
				4523	delete MI; // The pseudo instruction is gone now.
				4524	return BB;
				4525	}
				4526
				4527	case X86::FP32_TO_INT16_IN_MEM:
				4528	case X86::FP32_TO_INT32_IN_MEM:
				4529	case X86::FP32_TO_INT64_IN_MEM:
				4530	case X86::FP64_TO_INT16_IN_MEM:
				4531	case X86::FP64_TO_INT32_IN_MEM:
				4532	case X86::FP64_TO_INT64_IN_MEM: {
				4533	// Change the floating point control register to use "round towards zero"
				4534	// mode when truncating to an integer value.
				4535	MachineFunction *F = BB->getParent();
				4536	int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
				4537	addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx);
				4538
				4539	// Load the old value of the high byte of the control word...
				4540	unsigned OldCW =
				4541	F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
				4542	addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx);
				4543
				4544	// Set the high part to be round to zero...
				4545	addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx)
				4546	.addImm(0xC7F);
				4547
				4548	// Reload the modified control word now...
				4549	addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
				4550
				4551	// Restore the memory image of control word to original value
				4552	addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx)
				4553	.addReg(OldCW);
				4554
				4555	// Get the X86 opcode to use.
				4556	unsigned Opc;
				4557	switch (MI->getOpcode()) {
				4558	default: assert(0 && "illegal opcode!");
				4559	case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
				4560	case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
				4561	case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
				4562	case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
				4563	case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
				4564	case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
				4565	}
				4566
				4567	X86AddressMode AM;
				4568	MachineOperand &Op = MI->getOperand(0);
				4569	if (Op.isRegister()) {
				4570	AM.BaseType = X86AddressMode::RegBase;
				4571	AM.Base.Reg = Op.getReg();
				4572	} else {
				4573	AM.BaseType = X86AddressMode::FrameIndexBase;
				4574	AM.Base.FrameIndex = Op.getFrameIndex();
				4575	}
				4576	Op = MI->getOperand(1);
				4577	if (Op.isImmediate())
				4578	AM.Scale = Op.getImm();
				4579	Op = MI->getOperand(2);
				4580	if (Op.isImmediate())
				4581	AM.IndexReg = Op.getImm();
				4582	Op = MI->getOperand(3);
				4583	if (Op.isGlobalAddress()) {
				4584	AM.GV = Op.getGlobal();
				4585	} else {
				4586	AM.Disp = Op.getImm();
				4587	}
				4588	addFullAddress(BuildMI(BB, TII->get(Opc)), AM)
				4589	.addReg(MI->getOperand(4).getReg());
				4590
				4591	// Reload the original control word now.
				4592	addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
				4593
				4594	delete MI; // The pseudo instruction is gone now.
				4595	return BB;
				4596	}
				4597	}
				4598	}
				4599
				4600	//===----------------------------------------------------------------------===//
				4601	// X86 Optimization Hooks
				4602	//===----------------------------------------------------------------------===//
				4603
				4604	void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
				4605	uint64_t Mask,
				4606	uint64_t &KnownZero,
				4607	uint64_t &KnownOne,
				4608	const SelectionDAG &DAG,
				4609	unsigned Depth) const {
				4610	unsigned Opc = Op.getOpcode();
				4611	assert((Opc >= ISD::BUILTIN_OP_END \|\|
				4612	Opc == ISD::INTRINSIC_WO_CHAIN \|\|
				4613	Opc == ISD::INTRINSIC_W_CHAIN \|\|
				4614	Opc == ISD::INTRINSIC_VOID) &&
				4615	"Should use MaskedValueIsZero if you don't know whether Op"
				4616	" is a target node!");
				4617
				4618	KnownZero = KnownOne = 0; // Don't know anything.
				4619	switch (Opc) {
				4620	default: break;
				4621	case X86ISD::SETCC:
				4622	KnownZero \|= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
				4623	break;
				4624	}
				4625	}
				4626
				4627	/// getShuffleScalarElt - Returns the scalar element that will make up the ith
				4628	/// element of the result of the vector shuffle.
				4629	static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
				4630	MVT::ValueType VT = N->getValueType(0);
				4631	SDOperand PermMask = N->getOperand(2);
				4632	unsigned NumElems = PermMask.getNumOperands();
				4633	SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
				4634	i %= NumElems;
				4635	if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
				4636	return (i == 0)
				4637	? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
				4638	} else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
				4639	SDOperand Idx = PermMask.getOperand(i);
				4640	if (Idx.getOpcode() == ISD::UNDEF)
				4641	return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
				4642	return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
				4643	}
				4644	return SDOperand();
				4645	}
				4646
				4647	/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
				4648	/// node is a GlobalAddress + an offset.
				4649	static bool isGAPlusOffset(SDNode N, GlobalValue &GA, int64_t &Offset) {
				4650	unsigned Opc = N->getOpcode();
				4651	if (Opc == X86ISD::Wrapper) {
				4652	if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) {
				4653	GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
				4654	return true;
				4655	}
				4656	} else if (Opc == ISD::ADD) {
				4657	SDOperand N1 = N->getOperand(0);
				4658	SDOperand N2 = N->getOperand(1);
				4659	if (isGAPlusOffset(N1.Val, GA, Offset)) {
				4660	ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
				4661	if (V) {
				4662	Offset += V->getSignExtended();
				4663	return true;
				4664	}
				4665	} else if (isGAPlusOffset(N2.Val, GA, Offset)) {
				4666	ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
				4667	if (V) {
				4668	Offset += V->getSignExtended();
				4669	return true;
				4670	}
				4671	}
				4672	}
				4673	return false;
				4674	}
				4675
				4676	/// isConsecutiveLoad - Returns true if N is loading from an address of Base
				4677	/// + Dist * Size.
				4678	static bool isConsecutiveLoad(SDNode N, SDNode Base, int Dist, int Size,
				4679	MachineFrameInfo *MFI) {
				4680	if (N->getOperand(0).Val != Base->getOperand(0).Val)
				4681	return false;
				4682
				4683	SDOperand Loc = N->getOperand(1);
				4684	SDOperand BaseLoc = Base->getOperand(1);
				4685	if (Loc.getOpcode() == ISD::FrameIndex) {
				4686	if (BaseLoc.getOpcode() != ISD::FrameIndex)
				4687	return false;
Dan Gohman	53491e9	2007-07-23 20:24:29 +0000	[diff] [blame]	4688	int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
				4689	int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4690	int FS = MFI->getObjectSize(FI);
				4691	int BFS = MFI->getObjectSize(BFI);
				4692	if (FS != BFS \|\| FS != Size) return false;
				4693	return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size);
				4694	} else {
				4695	GlobalValue *GV1 = NULL;
				4696	GlobalValue *GV2 = NULL;
				4697	int64_t Offset1 = 0;
				4698	int64_t Offset2 = 0;
				4699	bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1);
				4700	bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2);
				4701	if (isGA1 && isGA2 && GV1 == GV2)
				4702	return Offset1 == (Offset2 + Dist*Size);
				4703	}
				4704
				4705	return false;
				4706	}
				4707
				4708	static bool isBaseAlignment16(SDNode Base, MachineFrameInfo MFI,
				4709	const X86Subtarget *Subtarget) {
				4710	GlobalValue *GV;
				4711	int64_t Offset;
				4712	if (isGAPlusOffset(Base, GV, Offset))
				4713	return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
				4714	else {
				4715	assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!");
Dan Gohman	53491e9	2007-07-23 20:24:29 +0000	[diff] [blame]	4716	int BFI = cast<FrameIndexSDNode>(Base)->getIndex();
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4717	if (BFI < 0)
				4718	// Fixed objects do not specify alignment, however the offsets are known.
				4719	return ((Subtarget->getStackAlignment() % 16) == 0 &&
				4720	(MFI->getObjectOffset(BFI) % 16) == 0);
				4721	else
				4722	return MFI->getObjectAlignment(BFI) >= 16;
				4723	}
				4724	return false;
				4725	}
				4726
				4727
				4728	/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
				4729	/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
				4730	/// if the load addresses are consecutive, non-overlapping, and in the right
				4731	/// order.
				4732	static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
				4733	const X86Subtarget *Subtarget) {
				4734	MachineFunction &MF = DAG.getMachineFunction();
				4735	MachineFrameInfo *MFI = MF.getFrameInfo();
				4736	MVT::ValueType VT = N->getValueType(0);
				4737	MVT::ValueType EVT = MVT::getVectorElementType(VT);
				4738	SDOperand PermMask = N->getOperand(2);
				4739	int NumElems = (int)PermMask.getNumOperands();
				4740	SDNode *Base = NULL;
				4741	for (int i = 0; i < NumElems; ++i) {
				4742	SDOperand Idx = PermMask.getOperand(i);
				4743	if (Idx.getOpcode() == ISD::UNDEF) {
				4744	if (!Base) return SDOperand();
				4745	} else {
				4746	SDOperand Arg =
				4747	getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG);
				4748	if (!Arg.Val \|\| !ISD::isNON_EXTLoad(Arg.Val))
				4749	return SDOperand();
				4750	if (!Base)
				4751	Base = Arg.Val;
				4752	else if (!isConsecutiveLoad(Arg.Val, Base,
				4753	i, MVT::getSizeInBits(EVT)/8,MFI))
				4754	return SDOperand();
				4755	}
				4756	}
				4757
				4758	bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
				4759	if (isAlign16) {
Evan Cheng	75b3832	2007-07-27 01:37:47 +0000	[diff] [blame]	4760	LoadSDNode *LD = cast<LoadSDNode>(Base);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4761	return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
Evan Cheng	75b3832	2007-07-27 01:37:47 +0000	[diff] [blame]	4762	LD->getSrcValueOffset());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4763	} else {
Evan Cheng	75b3832	2007-07-27 01:37:47 +0000	[diff] [blame]	4764	// Just use movups, it's shorter.
				4765	SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other);
				4766	SmallVector<SDOperand, 3> Ops;
				4767	Ops.push_back(Base->getOperand(0));
				4768	Ops.push_back(Base->getOperand(1));
				4769	Ops.push_back(Base->getOperand(2));
				4770	return DAG.getNode(ISD::BIT_CONVERT, VT,
				4771	DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size()));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4772	}
				4773	}
				4774
				4775	/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
				4776	static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
				4777	const X86Subtarget *Subtarget) {
				4778	SDOperand Cond = N->getOperand(0);
				4779
				4780	// If we have SSE[12] support, try to form min/max nodes.
				4781	if (Subtarget->hasSSE2() &&
				4782	(N->getValueType(0) == MVT::f32 \|\| N->getValueType(0) == MVT::f64)) {
				4783	if (Cond.getOpcode() == ISD::SETCC) {
				4784	// Get the LHS/RHS of the select.
				4785	SDOperand LHS = N->getOperand(1);
				4786	SDOperand RHS = N->getOperand(2);
				4787	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
				4788
				4789	unsigned Opcode = 0;
				4790	if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
				4791	switch (CC) {
				4792	default: break;
				4793	case ISD::SETOLE: // (X <= Y) ? X : Y -> min
				4794	case ISD::SETULE:
				4795	case ISD::SETLE:
				4796	if (!UnsafeFPMath) break;
				4797	// FALL THROUGH.
				4798	case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min
				4799	case ISD::SETLT:
				4800	Opcode = X86ISD::FMIN;
				4801	break;
				4802
				4803	case ISD::SETOGT: // (X > Y) ? X : Y -> max
				4804	case ISD::SETUGT:
				4805	case ISD::SETGT:
				4806	if (!UnsafeFPMath) break;
				4807	// FALL THROUGH.
				4808	case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max
				4809	case ISD::SETGE:
				4810	Opcode = X86ISD::FMAX;
				4811	break;
				4812	}
				4813	} else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
				4814	switch (CC) {
				4815	default: break;
				4816	case ISD::SETOGT: // (X > Y) ? Y : X -> min
				4817	case ISD::SETUGT:
				4818	case ISD::SETGT:
				4819	if (!UnsafeFPMath) break;
				4820	// FALL THROUGH.
				4821	case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min
				4822	case ISD::SETGE:
				4823	Opcode = X86ISD::FMIN;
				4824	break;
				4825
				4826	case ISD::SETOLE: // (X <= Y) ? Y : X -> max
				4827	case ISD::SETULE:
				4828	case ISD::SETLE:
				4829	if (!UnsafeFPMath) break;
				4830	// FALL THROUGH.
				4831	case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max
				4832	case ISD::SETLT:
				4833	Opcode = X86ISD::FMAX;
				4834	break;
				4835	}
				4836	}
				4837
				4838	if (Opcode)
				4839	return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS);
				4840	}
				4841
				4842	}
				4843
				4844	return SDOperand();
				4845	}
				4846
				4847
				4848	SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
				4849	DAGCombinerInfo &DCI) const {
				4850	SelectionDAG &DAG = DCI.DAG;
				4851	switch (N->getOpcode()) {
				4852	default: break;
				4853	case ISD::VECTOR_SHUFFLE:
				4854	return PerformShuffleCombine(N, DAG, Subtarget);
				4855	case ISD::SELECT:
				4856	return PerformSELECTCombine(N, DAG, Subtarget);
				4857	}
				4858
				4859	return SDOperand();
				4860	}
				4861
				4862	//===----------------------------------------------------------------------===//
				4863	// X86 Inline Assembly Support
				4864	//===----------------------------------------------------------------------===//
				4865
				4866	/// getConstraintType - Given a constraint letter, return the type of
				4867	/// constraint it is for this target.
				4868	X86TargetLowering::ConstraintType
				4869	X86TargetLowering::getConstraintType(const std::string &Constraint) const {
				4870	if (Constraint.size() == 1) {
				4871	switch (Constraint[0]) {
				4872	case 'A':
				4873	case 'r':
				4874	case 'R':
				4875	case 'l':
				4876	case 'q':
				4877	case 'Q':
				4878	case 'x':
				4879	case 'Y':
				4880	return C_RegisterClass;
				4881	default:
				4882	break;
				4883	}
				4884	}
				4885	return TargetLowering::getConstraintType(Constraint);
				4886	}
				4887
				4888	/// isOperandValidForConstraint - Return the specified operand (possibly
				4889	/// modified) if the specified SDOperand is valid for the specified target
				4890	/// constraint letter, otherwise return null.
				4891	SDOperand X86TargetLowering::
				4892	isOperandValidForConstraint(SDOperand Op, char Constraint, SelectionDAG &DAG) {
				4893	switch (Constraint) {
				4894	default: break;
				4895	case 'I':
				4896	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
				4897	if (C->getValue() <= 31)
				4898	return DAG.getTargetConstant(C->getValue(), Op.getValueType());
				4899	}
				4900	return SDOperand(0,0);
				4901	case 'N':
				4902	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
				4903	if (C->getValue() <= 255)
				4904	return DAG.getTargetConstant(C->getValue(), Op.getValueType());
				4905	}
				4906	return SDOperand(0,0);
				4907	case 'i': {
				4908	// Literal immediates are always ok.
				4909	if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op))
				4910	return DAG.getTargetConstant(CST->getValue(), Op.getValueType());
				4911
				4912	// If we are in non-pic codegen mode, we allow the address of a global (with
				4913	// an optional displacement) to be used with 'i'.
				4914	GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
				4915	int64_t Offset = 0;
				4916
				4917	// Match either (GA) or (GA+C)
				4918	if (GA) {
				4919	Offset = GA->getOffset();
				4920	} else if (Op.getOpcode() == ISD::ADD) {
				4921	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
				4922	GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
				4923	if (C && GA) {
				4924	Offset = GA->getOffset()+C->getValue();
				4925	} else {
				4926	C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
				4927	GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
				4928	if (C && GA)
				4929	Offset = GA->getOffset()+C->getValue();
				4930	else
				4931	C = 0, GA = 0;
				4932	}
				4933	}
				4934
				4935	if (GA) {
				4936	// If addressing this global requires a load (e.g. in PIC mode), we can't
				4937	// match.
				4938	if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(),
				4939	false))
				4940	return SDOperand(0, 0);
				4941
				4942	Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
				4943	Offset);
				4944	return Op;
				4945	}
				4946
				4947	// Otherwise, not valid for this mode.
				4948	return SDOperand(0, 0);
				4949	}
				4950	}
				4951	return TargetLowering::isOperandValidForConstraint(Op, Constraint, DAG);
				4952	}
				4953
				4954	std::vector<unsigned> X86TargetLowering::
				4955	getRegClassForInlineAsmConstraint(const std::string &Constraint,
				4956	MVT::ValueType VT) const {
				4957	if (Constraint.size() == 1) {
				4958	// FIXME: not handling fp-stack yet!
				4959	switch (Constraint[0]) { // GCC X86 Constraint Letters
				4960	default: break; // Unknown constraint letter
				4961	case 'A': // EAX/EDX
				4962	if (VT == MVT::i32 \|\| VT == MVT::i64)
				4963	return make_vector<unsigned>(X86::EAX, X86::EDX, 0);
				4964	break;
				4965	case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode)
				4966	case 'Q': // Q_REGS
				4967	if (VT == MVT::i32)
				4968	return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
				4969	else if (VT == MVT::i16)
				4970	return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
				4971	else if (VT == MVT::i8)
				4972	return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
				4973	break;
				4974	}
				4975	}
				4976
				4977	return std::vector<unsigned>();
				4978	}
				4979
				4980	std::pair<unsigned, const TargetRegisterClass*>
				4981	X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
				4982	MVT::ValueType VT) const {
				4983	// First, see if this is a constraint that directly corresponds to an LLVM
				4984	// register class.
				4985	if (Constraint.size() == 1) {
				4986	// GCC Constraint Letters
				4987	switch (Constraint[0]) {
				4988	default: break;
				4989	case 'r': // GENERAL_REGS
				4990	case 'R': // LEGACY_REGS
				4991	case 'l': // INDEX_REGS
				4992	if (VT == MVT::i64 && Subtarget->is64Bit())
				4993	return std::make_pair(0U, X86::GR64RegisterClass);
				4994	if (VT == MVT::i32)
				4995	return std::make_pair(0U, X86::GR32RegisterClass);
				4996	else if (VT == MVT::i16)
				4997	return std::make_pair(0U, X86::GR16RegisterClass);
				4998	else if (VT == MVT::i8)
				4999	return std::make_pair(0U, X86::GR8RegisterClass);
				5000	break;
				5001	case 'y': // MMX_REGS if MMX allowed.
				5002	if (!Subtarget->hasMMX()) break;
				5003	return std::make_pair(0U, X86::VR64RegisterClass);
				5004	break;
				5005	case 'Y': // SSE_REGS if SSE2 allowed
				5006	if (!Subtarget->hasSSE2()) break;
				5007	// FALL THROUGH.
				5008	case 'x': // SSE_REGS if SSE1 allowed
				5009	if (!Subtarget->hasSSE1()) break;
				5010
				5011	switch (VT) {
				5012	default: break;
				5013	// Scalar SSE types.
				5014	case MVT::f32:
				5015	case MVT::i32:
				5016	return std::make_pair(0U, X86::FR32RegisterClass);
				5017	case MVT::f64:
				5018	case MVT::i64:
				5019	return std::make_pair(0U, X86::FR64RegisterClass);
				5020	// Vector types.
				5021	case MVT::v16i8:
				5022	case MVT::v8i16:
				5023	case MVT::v4i32:
				5024	case MVT::v2i64:
				5025	case MVT::v4f32:
				5026	case MVT::v2f64:
				5027	return std::make_pair(0U, X86::VR128RegisterClass);
				5028	}
				5029	break;
				5030	}
				5031	}
				5032
				5033	// Use the default implementation in TargetLowering to convert the register
				5034	// constraint into a member of a register class.
				5035	std::pair<unsigned, const TargetRegisterClass*> Res;
				5036	Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
				5037
				5038	// Not found as a standard register?
				5039	if (Res.second == 0) {
				5040	// GCC calls "st(0)" just plain "st".
				5041	if (StringsEqualNoCase("{st}", Constraint)) {
				5042	Res.first = X86::ST0;
				5043	Res.second = X86::RSTRegisterClass;
				5044	}
				5045
				5046	return Res;
				5047	}
				5048
				5049	// Otherwise, check to see if this is a register class of the wrong value
				5050	// type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
				5051	// turn into {ax},{dx}.
				5052	if (Res.second->hasType(VT))
				5053	return Res; // Correct type already, nothing to do.
				5054
				5055	// All of the single-register GCC register classes map their values onto
				5056	// 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we
				5057	// really want an 8-bit or 32-bit register, map to the appropriate register
				5058	// class and return the appropriate register.
				5059	if (Res.second != X86::GR16RegisterClass)
				5060	return Res;
				5061
				5062	if (VT == MVT::i8) {
				5063	unsigned DestReg = 0;
				5064	switch (Res.first) {
				5065	default: break;
				5066	case X86::AX: DestReg = X86::AL; break;
				5067	case X86::DX: DestReg = X86::DL; break;
				5068	case X86::CX: DestReg = X86::CL; break;
				5069	case X86::BX: DestReg = X86::BL; break;
				5070	}
				5071	if (DestReg) {
				5072	Res.first = DestReg;
				5073	Res.second = Res.second = X86::GR8RegisterClass;
				5074	}
				5075	} else if (VT == MVT::i32) {
				5076	unsigned DestReg = 0;
				5077	switch (Res.first) {
				5078	default: break;
				5079	case X86::AX: DestReg = X86::EAX; break;
				5080	case X86::DX: DestReg = X86::EDX; break;
				5081	case X86::CX: DestReg = X86::ECX; break;
				5082	case X86::BX: DestReg = X86::EBX; break;
				5083	case X86::SI: DestReg = X86::ESI; break;
				5084	case X86::DI: DestReg = X86::EDI; break;
				5085	case X86::BP: DestReg = X86::EBP; break;
				5086	case X86::SP: DestReg = X86::ESP; break;
				5087	}
				5088	if (DestReg) {
				5089	Res.first = DestReg;
				5090	Res.second = Res.second = X86::GR32RegisterClass;
				5091	}
				5092	} else if (VT == MVT::i64) {
				5093	unsigned DestReg = 0;
				5094	switch (Res.first) {
				5095	default: break;
				5096	case X86::AX: DestReg = X86::RAX; break;
				5097	case X86::DX: DestReg = X86::RDX; break;
				5098	case X86::CX: DestReg = X86::RCX; break;
				5099	case X86::BX: DestReg = X86::RBX; break;
				5100	case X86::SI: DestReg = X86::RSI; break;
				5101	case X86::DI: DestReg = X86::RDI; break;
				5102	case X86::BP: DestReg = X86::RBP; break;
				5103	case X86::SP: DestReg = X86::RSP; break;
				5104	}
				5105	if (DestReg) {
				5106	Res.first = DestReg;
				5107	Res.second = Res.second = X86::GR64RegisterClass;
				5108	}
				5109	}
				5110
				5111	return Res;
				5112	}