Blame - lib/Target/X86/X86ISelLowering.cpp - fp2-dev/platform/external/llvm

blob: f8ff6a055c3239aa7c1f6429015d4a9e2162672c [file] [log] [blame]

Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1	//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by Chris Lattner and is distributed under
				6	// the University of Illinois Open Source License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file defines the interfaces that X86 uses to lower LLVM code into a
				11	// selection DAG.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "X86.h"
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	16	#include "X86CodeEmitter.h"
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	17	#include "X86InstrBuilder.h"
				18	#include "X86ISelLowering.h"
				19	#include "X86MachineFunctionInfo.h"
				20	#include "X86TargetMachine.h"
				21	#include "llvm/CallingConv.h"
				22	#include "llvm/Constants.h"
				23	#include "llvm/DerivedTypes.h"
				24	#include "llvm/GlobalVariable.h"
				25	#include "llvm/Function.h"
				26	#include "llvm/Intrinsics.h"
				27	#include "llvm/ADT/VectorExtras.h"
				28	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
				29	#include "llvm/CodeGen/CallingConvLower.h"
				30	#include "llvm/CodeGen/MachineFrameInfo.h"
				31	#include "llvm/CodeGen/MachineFunction.h"
				32	#include "llvm/CodeGen/MachineInstrBuilder.h"
				33	#include "llvm/CodeGen/SelectionDAG.h"
				34	#include "llvm/CodeGen/SSARegMap.h"
				35	#include "llvm/Support/MathExtras.h"
				36	#include "llvm/Target/TargetOptions.h"
				37	#include "llvm/ADT/StringExtras.h"
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	38	#include "llvm/ParameterAttributes.h"
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	39	using namespace llvm;
				40
				41	X86TargetLowering::X86TargetLowering(TargetMachine &TM)
				42	: TargetLowering(TM) {
				43	Subtarget = &TM.getSubtarget<X86Subtarget>();
				44	X86ScalarSSE = Subtarget->hasSSE2();
				45	X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
				46
				47	RegInfo = TM.getRegisterInfo();
				48
				49	// Set up the TargetLowering object.
				50
				51	// X86 is weird, it always uses i8 for shift amounts and setcc results.
				52	setShiftAmountType(MVT::i8);
				53	setSetCCResultType(MVT::i8);
				54	setSetCCResultContents(ZeroOrOneSetCCResult);
				55	setSchedulingPreference(SchedulingForRegPressure);
				56	setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0
				57	setStackPointerRegisterToSaveRestore(X86StackPtr);
				58
				59	if (Subtarget->isTargetDarwin()) {
				60	// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
				61	setUseUnderscoreSetJmp(false);
				62	setUseUnderscoreLongJmp(false);
				63	} else if (Subtarget->isTargetMingw()) {
				64	// MS runtime is weird: it exports _setjmp, but longjmp!
				65	setUseUnderscoreSetJmp(true);
				66	setUseUnderscoreLongJmp(false);
				67	} else {
				68	setUseUnderscoreSetJmp(true);
				69	setUseUnderscoreLongJmp(true);
				70	}
				71
				72	// Set up the register classes.
				73	addRegisterClass(MVT::i8, X86::GR8RegisterClass);
				74	addRegisterClass(MVT::i16, X86::GR16RegisterClass);
				75	addRegisterClass(MVT::i32, X86::GR32RegisterClass);
				76	if (Subtarget->is64Bit())
				77	addRegisterClass(MVT::i64, X86::GR64RegisterClass);
				78
				79	setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand);
				80
				81	// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
				82	// operation.
				83	setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
				84	setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
				85	setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
				86
				87	if (Subtarget->is64Bit()) {
				88	setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
				89	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
				90	} else {
				91	if (X86ScalarSSE)
				92	// If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
				93	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
				94	else
				95	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
				96	}
				97
				98	// Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
				99	// this operation.
				100	setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
				101	setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
				102	// SSE has no i16 to fp conversion, only i32
				103	if (X86ScalarSSE)
				104	setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
				105	else {
				106	setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
				107	setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
				108	}
				109
				110	if (!Subtarget->is64Bit()) {
				111	// Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode.
				112	setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
				113	setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
				114	}
				115
				116	// Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
				117	// this operation.
				118	setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
				119	setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
				120
				121	if (X86ScalarSSE) {
				122	setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
				123	} else {
				124	setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
				125	setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
				126	}
				127
				128	// Handle FP_TO_UINT by promoting the destination to a larger signed
				129	// conversion.
				130	setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
				131	setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
				132	setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
				133
				134	if (Subtarget->is64Bit()) {
				135	setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
				136	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
				137	} else {
				138	if (X86ScalarSSE && !Subtarget->hasSSE3())
				139	// Expand FP_TO_UINT into a select.
				140	// FIXME: We would like to use a Custom expander here eventually to do
				141	// the optimal thing for SSE vs. the default expansion in the legalizer.
				142	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
				143	else
				144	// With SSE3 we can use fisttpll to convert to a signed i64.
				145	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
				146	}
				147
				148	// TODO: when we have SSE, these could be more efficient, by using movd/movq.
				149	if (!X86ScalarSSE) {
				150	setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
				151	setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
				152	}
				153
				154	setOperationAction(ISD::BR_JT , MVT::Other, Expand);
				155	setOperationAction(ISD::BRCOND , MVT::Other, Custom);
				156	setOperationAction(ISD::BR_CC , MVT::Other, Expand);
				157	setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
				158	setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
				159	if (Subtarget->is64Bit())
				160	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
				161	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand);
				162	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
				163	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
				164	setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
				165	setOperationAction(ISD::FREM , MVT::f64 , Expand);
				166
				167	setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
				168	setOperationAction(ISD::CTTZ , MVT::i8 , Expand);
				169	setOperationAction(ISD::CTLZ , MVT::i8 , Expand);
				170	setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
				171	setOperationAction(ISD::CTTZ , MVT::i16 , Expand);
				172	setOperationAction(ISD::CTLZ , MVT::i16 , Expand);
				173	setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
				174	setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
				175	setOperationAction(ISD::CTLZ , MVT::i32 , Expand);
				176	if (Subtarget->is64Bit()) {
				177	setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
				178	setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
				179	setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
				180	}
				181
				182	setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
				183	setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
				184
				185	// These should be promoted to a larger select which is supported.
				186	setOperationAction(ISD::SELECT , MVT::i1 , Promote);
				187	setOperationAction(ISD::SELECT , MVT::i8 , Promote);
				188	// X86 wants to expand cmov itself.
				189	setOperationAction(ISD::SELECT , MVT::i16 , Custom);
				190	setOperationAction(ISD::SELECT , MVT::i32 , Custom);
				191	setOperationAction(ISD::SELECT , MVT::f32 , Custom);
				192	setOperationAction(ISD::SELECT , MVT::f64 , Custom);
				193	setOperationAction(ISD::SETCC , MVT::i8 , Custom);
				194	setOperationAction(ISD::SETCC , MVT::i16 , Custom);
				195	setOperationAction(ISD::SETCC , MVT::i32 , Custom);
				196	setOperationAction(ISD::SETCC , MVT::f32 , Custom);
				197	setOperationAction(ISD::SETCC , MVT::f64 , Custom);
				198	if (Subtarget->is64Bit()) {
				199	setOperationAction(ISD::SELECT , MVT::i64 , Custom);
				200	setOperationAction(ISD::SETCC , MVT::i64 , Custom);
				201	}
				202	// X86 ret instruction may pop stack.
				203	setOperationAction(ISD::RET , MVT::Other, Custom);
				204	if (!Subtarget->is64Bit())
				205	setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
				206
				207	// Darwin ABI issue.
				208	setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
				209	setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
				210	setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
				211	setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
				212	setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
				213	if (Subtarget->is64Bit()) {
				214	setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
				215	setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
				216	setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
				217	setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
				218	}
				219	// 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
				220	setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
				221	setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
				222	setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
				223	// X86 wants to expand memset / memcpy itself.
				224	setOperationAction(ISD::MEMSET , MVT::Other, Custom);
				225	setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
				226
				227	// We don't have line number support yet.
				228	setOperationAction(ISD::LOCATION, MVT::Other, Expand);
				229	setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
				230	// FIXME - use subtarget debug flags
				231	if (!Subtarget->isTargetDarwin() &&
				232	!Subtarget->isTargetELF() &&
				233	!Subtarget->isTargetCygMing())
				234	setOperationAction(ISD::LABEL, MVT::Other, Expand);
				235
				236	setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
				237	setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
				238	setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
				239	setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
				240	if (Subtarget->is64Bit()) {
				241	// FIXME: Verify
				242	setExceptionPointerRegister(X86::RAX);
				243	setExceptionSelectorRegister(X86::RDX);
				244	} else {
				245	setExceptionPointerRegister(X86::EAX);
				246	setExceptionSelectorRegister(X86::EDX);
				247	}
				248
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	249	setOperationAction(ISD::ADJUST_TRAMP, MVT::i32, Expand);
				250	setOperationAction(ISD::ADJUST_TRAMP, MVT::i64, Expand);
				251	setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
				252
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	253	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
				254	setOperationAction(ISD::VASTART , MVT::Other, Custom);
				255	setOperationAction(ISD::VAARG , MVT::Other, Expand);
				256	setOperationAction(ISD::VAEND , MVT::Other, Expand);
				257	if (Subtarget->is64Bit())
				258	setOperationAction(ISD::VACOPY , MVT::Other, Custom);
				259	else
				260	setOperationAction(ISD::VACOPY , MVT::Other, Expand);
				261
				262	setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
				263	setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
				264	if (Subtarget->is64Bit())
				265	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
				266	if (Subtarget->isTargetCygMing())
				267	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
				268	else
				269	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
				270
				271	if (X86ScalarSSE) {
				272	// Set up the FP register classes.
				273	addRegisterClass(MVT::f32, X86::FR32RegisterClass);
				274	addRegisterClass(MVT::f64, X86::FR64RegisterClass);
				275
				276	// Use ANDPD to simulate FABS.
				277	setOperationAction(ISD::FABS , MVT::f64, Custom);
				278	setOperationAction(ISD::FABS , MVT::f32, Custom);
				279
				280	// Use XORP to simulate FNEG.
				281	setOperationAction(ISD::FNEG , MVT::f64, Custom);
				282	setOperationAction(ISD::FNEG , MVT::f32, Custom);
				283
				284	// Use ANDPD and ORPD to simulate FCOPYSIGN.
				285	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
				286	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
				287
				288	// We don't support sin/cos/fmod
				289	setOperationAction(ISD::FSIN , MVT::f64, Expand);
				290	setOperationAction(ISD::FCOS , MVT::f64, Expand);
				291	setOperationAction(ISD::FREM , MVT::f64, Expand);
				292	setOperationAction(ISD::FSIN , MVT::f32, Expand);
				293	setOperationAction(ISD::FCOS , MVT::f32, Expand);
				294	setOperationAction(ISD::FREM , MVT::f32, Expand);
				295
				296	// Expand FP immediates into loads from the stack, except for the special
				297	// cases we handle.
				298	setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
				299	setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
				300	addLegalFPImmediate(+0.0); // xorps / xorpd
				301	} else {
				302	// Set up the FP register classes.
				303	addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
				304	addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
				305
				306	setOperationAction(ISD::UNDEF, MVT::f64, Expand);
				307	setOperationAction(ISD::UNDEF, MVT::f32, Expand);
				308	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
				309	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
				310	setOperationAction(ISD::FP_ROUND, MVT::f32, Expand);
				311
				312	if (!UnsafeFPMath) {
				313	setOperationAction(ISD::FSIN , MVT::f64 , Expand);
				314	setOperationAction(ISD::FCOS , MVT::f64 , Expand);
				315	}
				316
				317	setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
				318	setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
				319	addLegalFPImmediate(+0.0); // FLD0
				320	addLegalFPImmediate(+1.0); // FLD1
				321	addLegalFPImmediate(-0.0); // FLD0/FCHS
				322	addLegalFPImmediate(-1.0); // FLD1/FCHS
				323	}
				324
Dale Johannesen	4ab00bd	2007-08-05 18:49:15 +0000	[diff] [blame]	325	// Long double always uses X87.
				326	addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
				327
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	328	// First set operation action for all vector types to expand. Then we
				329	// will selectively turn on ones that can be effectively codegen'd.
				330	for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
				331	VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
				332	setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
				333	setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
				334	setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand);
				335	setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand);
				336	setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand);
				337	setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
				338	setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand);
				339	setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
				340	setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
				341	setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
				342	setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
				343	setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
				344	setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
				345	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand);
				346	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
				347	setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
				348	setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand);
				349	setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand);
				350	setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand);
				351	setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand);
				352	setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand);
				353	setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand);
				354	setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand);
				355	}
				356
				357	if (Subtarget->hasMMX()) {
				358	addRegisterClass(MVT::v8i8, X86::VR64RegisterClass);
				359	addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
				360	addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
				361	addRegisterClass(MVT::v1i64, X86::VR64RegisterClass);
				362
				363	// FIXME: add MMX packed arithmetics
				364
				365	setOperationAction(ISD::ADD, MVT::v8i8, Legal);
				366	setOperationAction(ISD::ADD, MVT::v4i16, Legal);
				367	setOperationAction(ISD::ADD, MVT::v2i32, Legal);
				368	setOperationAction(ISD::ADD, MVT::v1i64, Legal);
				369
				370	setOperationAction(ISD::SUB, MVT::v8i8, Legal);
				371	setOperationAction(ISD::SUB, MVT::v4i16, Legal);
				372	setOperationAction(ISD::SUB, MVT::v2i32, Legal);
				373
				374	setOperationAction(ISD::MULHS, MVT::v4i16, Legal);
				375	setOperationAction(ISD::MUL, MVT::v4i16, Legal);
				376
				377	setOperationAction(ISD::AND, MVT::v8i8, Promote);
				378	AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64);
				379	setOperationAction(ISD::AND, MVT::v4i16, Promote);
				380	AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64);
				381	setOperationAction(ISD::AND, MVT::v2i32, Promote);
				382	AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64);
				383	setOperationAction(ISD::AND, MVT::v1i64, Legal);
				384
				385	setOperationAction(ISD::OR, MVT::v8i8, Promote);
				386	AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64);
				387	setOperationAction(ISD::OR, MVT::v4i16, Promote);
				388	AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64);
				389	setOperationAction(ISD::OR, MVT::v2i32, Promote);
				390	AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64);
				391	setOperationAction(ISD::OR, MVT::v1i64, Legal);
				392
				393	setOperationAction(ISD::XOR, MVT::v8i8, Promote);
				394	AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64);
				395	setOperationAction(ISD::XOR, MVT::v4i16, Promote);
				396	AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64);
				397	setOperationAction(ISD::XOR, MVT::v2i32, Promote);
				398	AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64);
				399	setOperationAction(ISD::XOR, MVT::v1i64, Legal);
				400
				401	setOperationAction(ISD::LOAD, MVT::v8i8, Promote);
				402	AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64);
				403	setOperationAction(ISD::LOAD, MVT::v4i16, Promote);
				404	AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64);
				405	setOperationAction(ISD::LOAD, MVT::v2i32, Promote);
				406	AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64);
				407	setOperationAction(ISD::LOAD, MVT::v1i64, Legal);
				408
				409	setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
				410	setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
				411	setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
				412	setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
				413
				414	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
				415	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
				416	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
				417	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
				418
				419	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom);
				420	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom);
				421	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom);
				422	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
				423	}
				424
				425	if (Subtarget->hasSSE1()) {
				426	addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
				427
				428	setOperationAction(ISD::FADD, MVT::v4f32, Legal);
				429	setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
				430	setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
				431	setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
				432	setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
				433	setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	434	setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
				435	setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
				436	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
				437	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
				438	setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
				439	}
				440
				441	if (Subtarget->hasSSE2()) {
				442	addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
				443	addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
				444	addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
				445	addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
				446	addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
				447
				448	setOperationAction(ISD::ADD, MVT::v16i8, Legal);
				449	setOperationAction(ISD::ADD, MVT::v8i16, Legal);
				450	setOperationAction(ISD::ADD, MVT::v4i32, Legal);
				451	setOperationAction(ISD::ADD, MVT::v2i64, Legal);
				452	setOperationAction(ISD::SUB, MVT::v16i8, Legal);
				453	setOperationAction(ISD::SUB, MVT::v8i16, Legal);
				454	setOperationAction(ISD::SUB, MVT::v4i32, Legal);
				455	setOperationAction(ISD::SUB, MVT::v2i64, Legal);
				456	setOperationAction(ISD::MUL, MVT::v8i16, Legal);
				457	setOperationAction(ISD::FADD, MVT::v2f64, Legal);
				458	setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
				459	setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
				460	setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
				461	setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
				462	setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	463
				464	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
				465	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
				466	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
				467	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
				468	// Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
				469	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
				470
				471	// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
				472	for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
				473	setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
				474	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
				475	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				476	}
				477	setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
				478	setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
				479	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
				480	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
				481	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
				482	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
				483
				484	// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
				485	for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
				486	setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote);
				487	AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64);
				488	setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote);
				489	AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64);
				490	setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote);
				491	AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64);
				492	setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote);
				493	AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64);
				494	setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
				495	AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
				496	}
				497
				498	// Custom lower v2i64 and v2f64 selects.
				499	setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
				500	setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
				501	setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
				502	setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
				503	}
				504
				505	// We want to custom lower some of our intrinsics.
				506	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
				507
				508	// We have target-specific dag combine patterns for the following nodes:
				509	setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
				510	setTargetDAGCombine(ISD::SELECT);
				511
				512	computeRegisterProperties();
				513
				514	// FIXME: These should be based on subtarget info. Plus, the values should
				515	// be smaller when we are in optimizing for size mode.
				516	maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
				517	maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
				518	maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
				519	allowUnalignedMemoryAccesses = true; // x86 supports it!
				520	}
				521
				522
				523	//===----------------------------------------------------------------------===//
				524	// Return Value Calling Convention Implementation
				525	//===----------------------------------------------------------------------===//
				526
				527	#include "X86GenCallingConv.inc"
				528
				529	/// LowerRET - Lower an ISD::RET node.
				530	SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
				531	assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
				532
				533	SmallVector<CCValAssign, 16> RVLocs;
				534	unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
				535	bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
				536	CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
				537	CCInfo.AnalyzeReturn(Op.Val, RetCC_X86);
				538
				539
				540	// If this is the first return lowered for this function, add the regs to the
				541	// liveout set for the function.
				542	if (DAG.getMachineFunction().liveout_empty()) {
				543	for (unsigned i = 0; i != RVLocs.size(); ++i)
				544	if (RVLocs[i].isRegLoc())
				545	DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
				546	}
				547
				548	SDOperand Chain = Op.getOperand(0);
				549	SDOperand Flag;
				550
				551	// Copy the result values into the output registers.
				552	if (RVLocs.size() != 1 \|\| !RVLocs[0].isRegLoc() \|\|
				553	RVLocs[0].getLocReg() != X86::ST0) {
				554	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				555	CCValAssign &VA = RVLocs[i];
				556	assert(VA.isRegLoc() && "Can only return in registers!");
				557	Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1),
				558	Flag);
				559	Flag = Chain.getValue(1);
				560	}
				561	} else {
				562	// We need to handle a destination of ST0 specially, because it isn't really
				563	// a register.
				564	SDOperand Value = Op.getOperand(1);
				565
				566	// If this is an FP return with ScalarSSE, we need to move the value from
				567	// an XMM register onto the fp-stack.
				568	if (X86ScalarSSE) {
				569	SDOperand MemLoc;
				570
				571	// If this is a load into a scalarsse value, don't store the loaded value
				572	// back to the stack, only to reload it: just replace the scalar-sse load.
				573	if (ISD::isNON_EXTLoad(Value.Val) &&
				574	(Chain == Value.getValue(1) \|\| Chain == Value.getOperand(0))) {
				575	Chain = Value.getOperand(0);
				576	MemLoc = Value.getOperand(1);
				577	} else {
				578	// Spill the value to memory and reload it into top of stack.
				579	unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8;
				580	MachineFunction &MF = DAG.getMachineFunction();
				581	int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
				582	MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
				583	Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0);
				584	}
				585	SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other);
				586	SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())};
				587	Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
				588	Chain = Value.getValue(1);
				589	}
				590
				591	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				592	SDOperand Ops[] = { Chain, Value };
				593	Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2);
				594	Flag = Chain.getValue(1);
				595	}
				596
				597	SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16);
				598	if (Flag.Val)
				599	return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag);
				600	else
				601	return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop);
				602	}
				603
				604
				605	/// LowerCallResult - Lower the result values of an ISD::CALL into the
				606	/// appropriate copies out of appropriate physical registers. This assumes that
				607	/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
				608	/// being lowered. The returns a SDNode with the same number of values as the
				609	/// ISD::CALL.
				610	SDNode *X86TargetLowering::
				611	LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
				612	unsigned CallingConv, SelectionDAG &DAG) {
				613
				614	// Assign locations to each value returned by this call.
				615	SmallVector<CCValAssign, 16> RVLocs;
				616	bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0;
				617	CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
				618	CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
				619
				620
				621	SmallVector<SDOperand, 8> ResultVals;
				622
				623	// Copy all of the result registers out of their specified physreg.
				624	if (RVLocs.size() != 1 \|\| RVLocs[0].getLocReg() != X86::ST0) {
				625	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				626	Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(),
				627	RVLocs[i].getValVT(), InFlag).getValue(1);
				628	InFlag = Chain.getValue(2);
				629	ResultVals.push_back(Chain.getValue(0));
				630	}
				631	} else {
				632	// Copies from the FP stack are special, as ST0 isn't a valid register
				633	// before the fp stackifier runs.
				634
				635	// Copy ST0 into an RFP register with FP_GET_RESULT.
				636	SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag);
				637	SDOperand GROps[] = { Chain, InFlag };
				638	SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
				639	Chain = RetVal.getValue(1);
				640	InFlag = RetVal.getValue(2);
				641
				642	// If we are using ScalarSSE, store ST(0) to the stack and reload it into
				643	// an XMM register.
				644	if (X86ScalarSSE) {
				645	// FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
				646	// shouldn't be necessary except that RFP cannot be live across
				647	// multiple blocks. When stackifier is fixed, they can be uncoupled.
				648	MachineFunction &MF = DAG.getMachineFunction();
				649	int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
				650	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				651	SDOperand Ops[] = {
				652	Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag
				653	};
				654	Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5);
				655	RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0);
				656	Chain = RetVal.getValue(1);
				657	}
				658	ResultVals.push_back(RetVal);
				659	}
				660
				661	// Merge everything together with a MERGE_VALUES node.
				662	ResultVals.push_back(Chain);
				663	return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(),
				664	&ResultVals[0], ResultVals.size()).Val;
				665	}
				666
				667
				668	//===----------------------------------------------------------------------===//
				669	// C & StdCall Calling Convention implementation
				670	//===----------------------------------------------------------------------===//
				671	// StdCall calling convention seems to be standard for many Windows' API
				672	// routines and around. It differs from C calling convention just a little:
				673	// callee should clean up the stack, not caller. Symbols should be also
				674	// decorated in some fancy way :) It doesn't support any vector arguments.
				675
				676	/// AddLiveIn - This helper function adds the specified physical register to the
				677	/// MachineFunction as a live in value. It also creates a corresponding virtual
				678	/// register for it.
				679	static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
				680	const TargetRegisterClass *RC) {
				681	assert(RC->contains(PReg) && "Not the correct regclass!");
				682	unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
				683	MF.addLiveIn(PReg, VReg);
				684	return VReg;
				685	}
				686
				687	SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
				688	bool isStdCall) {
				689	unsigned NumArgs = Op.Val->getNumValues() - 1;
				690	MachineFunction &MF = DAG.getMachineFunction();
				691	MachineFrameInfo *MFI = MF.getFrameInfo();
				692	SDOperand Root = Op.getOperand(0);
				693	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				694
				695	// Assign locations to all of the incoming arguments.
				696	SmallVector<CCValAssign, 16> ArgLocs;
				697	CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
				698	getTargetMachine(), ArgLocs);
				699	CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C);
				700
				701	SmallVector<SDOperand, 8> ArgValues;
				702	unsigned LastVal = ~0U;
				703	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				704	CCValAssign &VA = ArgLocs[i];
				705	// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
				706	// places.
				707	assert(VA.getValNo() != LastVal &&
				708	"Don't support value assigned to multiple locs yet");
				709	LastVal = VA.getValNo();
				710
				711	if (VA.isRegLoc()) {
				712	MVT::ValueType RegVT = VA.getLocVT();
				713	TargetRegisterClass *RC;
				714	if (RegVT == MVT::i32)
				715	RC = X86::GR32RegisterClass;
				716	else {
				717	assert(MVT::isVector(RegVT));
				718	RC = X86::VR128RegisterClass;
				719	}
				720
				721	unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
				722	SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
				723
				724	// If this is an 8 or 16-bit value, it is really passed promoted to 32
				725	// bits. Insert an assert[sz]ext to capture this, then truncate to the
				726	// right size.
				727	if (VA.getLocInfo() == CCValAssign::SExt)
				728	ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
				729	DAG.getValueType(VA.getValVT()));
				730	else if (VA.getLocInfo() == CCValAssign::ZExt)
				731	ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
				732	DAG.getValueType(VA.getValVT()));
				733
				734	if (VA.getLocInfo() != CCValAssign::Full)
				735	ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
				736
				737	ArgValues.push_back(ArgValue);
				738	} else {
				739	assert(VA.isMemLoc());
				740
				741	// Create the nodes corresponding to a load from this parameter slot.
				742	int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
				743	VA.getLocMemOffset());
				744	SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
				745	ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
				746	}
				747	}
				748
				749	unsigned StackSize = CCInfo.getNextStackOffset();
				750
				751	ArgValues.push_back(Root);
				752
				753	// If the function takes variable number of arguments, make a frame index for
				754	// the start of the first vararg value... for expansion of llvm.va_start.
				755	if (isVarArg)
				756	VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
				757
				758	if (isStdCall && !isVarArg) {
				759	BytesToPopOnReturn = StackSize; // Callee pops everything..
				760	BytesCallerReserves = 0;
				761	} else {
				762	BytesToPopOnReturn = 0; // Callee pops nothing.
				763
				764	// If this is an sret function, the return should pop the hidden pointer.
				765	if (NumArgs &&
				766	(cast<ConstantSDNode>(Op.getOperand(3))->getValue() &
				767	ISD::ParamFlags::StructReturn))
				768	BytesToPopOnReturn = 4;
				769
				770	BytesCallerReserves = StackSize;
				771	}
				772
				773	RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
				774	ReturnAddrIndex = 0; // No return address slot generated yet.
				775
				776	MF.getInfo<X86MachineFunctionInfo>()
				777	->setBytesToPopOnReturn(BytesToPopOnReturn);
				778
				779	// Return the new list of results.
				780	return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
				781	&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
				782	}
				783
				784	SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG,
				785	unsigned CC) {
				786	SDOperand Chain = Op.getOperand(0);
				787	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				788	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				789	SDOperand Callee = Op.getOperand(4);
				790	unsigned NumOps = (Op.getNumOperands() - 5) / 2;
				791
				792	// Analyze operands of the call, assigning locations to each operand.
				793	SmallVector<CCValAssign, 16> ArgLocs;
				794	CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
				795	CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C);
				796
				797	// Get a count of how many bytes are to be pushed on the stack.
				798	unsigned NumBytes = CCInfo.getNextStackOffset();
				799
				800	Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
				801
				802	SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
				803	SmallVector<SDOperand, 8> MemOpChains;
				804
				805	SDOperand StackPtr;
				806
				807	// Walk the register/memloc assignments, inserting copies/loads.
				808	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				809	CCValAssign &VA = ArgLocs[i];
				810	SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
				811
				812	// Promote the value if needed.
				813	switch (VA.getLocInfo()) {
				814	default: assert(0 && "Unknown loc info!");
				815	case CCValAssign::Full: break;
				816	case CCValAssign::SExt:
				817	Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
				818	break;
				819	case CCValAssign::ZExt:
				820	Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
				821	break;
				822	case CCValAssign::AExt:
				823	Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
				824	break;
				825	}
				826
				827	if (VA.isRegLoc()) {
				828	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
				829	} else {
				830	assert(VA.isMemLoc());
				831	if (StackPtr.Val == 0)
				832	StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
				833	SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
				834	PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
				835	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				836	}
				837	}
				838
				839	// If the first argument is an sret pointer, remember it.
				840	bool isSRet = NumOps &&
				841	(cast<ConstantSDNode>(Op.getOperand(6))->getValue() &
				842	ISD::ParamFlags::StructReturn);
				843
				844	if (!MemOpChains.empty())
				845	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				846	&MemOpChains[0], MemOpChains.size());
				847
				848	// Build a sequence of copy-to-reg nodes chained together with token chain
				849	// and flag operands which copy the outgoing args into registers.
				850	SDOperand InFlag;
				851	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				852	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				853	InFlag);
				854	InFlag = Chain.getValue(1);
				855	}
				856
				857	// ELF / PIC requires GOT in the EBX register before function calls via PLT
				858	// GOT pointer.
				859	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				860	Subtarget->isPICStyleGOT()) {
				861	Chain = DAG.getCopyToReg(Chain, X86::EBX,
				862	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				863	InFlag);
				864	InFlag = Chain.getValue(1);
				865	}
				866
				867	// If the callee is a GlobalAddress node (quite common, every direct call is)
				868	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
				869	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				870	// We should use extra load for direct calls to dllimported functions in
				871	// non-JIT mode.
				872	if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
				873	getTargetMachine(), true))
				874	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
				875	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				876	Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
				877
				878	// Returns a chain & a flag for retval copy to use.
				879	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				880	SmallVector<SDOperand, 8> Ops;
				881	Ops.push_back(Chain);
				882	Ops.push_back(Callee);
				883
				884	// Add argument registers to the end of the list so that they are known live
				885	// into the call.
				886	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				887	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				888	RegsToPass[i].second.getValueType()));
				889
				890	// Add an implicit use GOT pointer in EBX.
				891	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				892	Subtarget->isPICStyleGOT())
				893	Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
				894
				895	if (InFlag.Val)
				896	Ops.push_back(InFlag);
				897
				898	Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
				899	NodeTys, &Ops[0], Ops.size());
				900	InFlag = Chain.getValue(1);
				901
				902	// Create the CALLSEQ_END node.
				903	unsigned NumBytesForCalleeToPush = 0;
				904
				905	if (CC == CallingConv::X86_StdCall) {
				906	if (isVarArg)
				907	NumBytesForCalleeToPush = isSRet ? 4 : 0;
				908	else
				909	NumBytesForCalleeToPush = NumBytes;
				910	} else {
				911	// If this is is a call to a struct-return function, the callee
				912	// pops the hidden struct pointer, so we have to push it back.
				913	// This is common for Darwin/X86, Linux & Mingw32 targets.
				914	NumBytesForCalleeToPush = isSRet ? 4 : 0;
				915	}
				916
				917	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				918	Ops.clear();
				919	Ops.push_back(Chain);
				920	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				921	Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
				922	Ops.push_back(InFlag);
				923	Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
				924	InFlag = Chain.getValue(1);
				925
				926	// Handle result values, copying them out of physregs into vregs that we
				927	// return.
				928	return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
				929	}
				930
				931
				932	//===----------------------------------------------------------------------===//
				933	// FastCall Calling Convention implementation
				934	//===----------------------------------------------------------------------===//
				935	//
				936	// The X86 'fastcall' calling convention passes up to two integer arguments in
				937	// registers (an appropriate portion of ECX/EDX), passes arguments in C order,
				938	// and requires that the callee pop its arguments off the stack (allowing proper
				939	// tail calls), and has the same return value conventions as C calling convs.
				940	//
				941	// This calling convention always arranges for the callee pop value to be 8n+4
				942	// bytes, which is needed for tail recursion elimination and stack alignment
				943	// reasons.
				944	SDOperand
				945	X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
				946	MachineFunction &MF = DAG.getMachineFunction();
				947	MachineFrameInfo *MFI = MF.getFrameInfo();
				948	SDOperand Root = Op.getOperand(0);
				949	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				950
				951	// Assign locations to all of the incoming arguments.
				952	SmallVector<CCValAssign, 16> ArgLocs;
				953	CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
				954	getTargetMachine(), ArgLocs);
				955	CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall);
				956
				957	SmallVector<SDOperand, 8> ArgValues;
				958	unsigned LastVal = ~0U;
				959	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				960	CCValAssign &VA = ArgLocs[i];
				961	// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
				962	// places.
				963	assert(VA.getValNo() != LastVal &&
				964	"Don't support value assigned to multiple locs yet");
				965	LastVal = VA.getValNo();
				966
				967	if (VA.isRegLoc()) {
				968	MVT::ValueType RegVT = VA.getLocVT();
				969	TargetRegisterClass *RC;
				970	if (RegVT == MVT::i32)
				971	RC = X86::GR32RegisterClass;
				972	else {
				973	assert(MVT::isVector(RegVT));
				974	RC = X86::VR128RegisterClass;
				975	}
				976
				977	unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
				978	SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
				979
				980	// If this is an 8 or 16-bit value, it is really passed promoted to 32
				981	// bits. Insert an assert[sz]ext to capture this, then truncate to the
				982	// right size.
				983	if (VA.getLocInfo() == CCValAssign::SExt)
				984	ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
				985	DAG.getValueType(VA.getValVT()));
				986	else if (VA.getLocInfo() == CCValAssign::ZExt)
				987	ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
				988	DAG.getValueType(VA.getValVT()));
				989
				990	if (VA.getLocInfo() != CCValAssign::Full)
				991	ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
				992
				993	ArgValues.push_back(ArgValue);
				994	} else {
				995	assert(VA.isMemLoc());
				996
				997	// Create the nodes corresponding to a load from this parameter slot.
				998	int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
				999	VA.getLocMemOffset());
				1000	SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
				1001	ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
				1002	}
				1003	}
				1004
				1005	ArgValues.push_back(Root);
				1006
				1007	unsigned StackSize = CCInfo.getNextStackOffset();
				1008
				1009	if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
				1010	// Make sure the instruction takes 8n+4 bytes to make sure the start of the
				1011	// arguments and the arguments after the retaddr has been pushed are aligned.
				1012	if ((StackSize & 7) == 0)
				1013	StackSize += 4;
				1014	}
				1015
				1016	VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
				1017	RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
				1018	ReturnAddrIndex = 0; // No return address slot generated yet.
				1019	BytesToPopOnReturn = StackSize; // Callee pops all stack arguments.
				1020	BytesCallerReserves = 0;
				1021
				1022	MF.getInfo<X86MachineFunctionInfo>()
				1023	->setBytesToPopOnReturn(BytesToPopOnReturn);
				1024
				1025	// Return the new list of results.
				1026	return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
				1027	&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
				1028	}
				1029
				1030	SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
				1031	unsigned CC) {
				1032	SDOperand Chain = Op.getOperand(0);
				1033	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				1034	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1035	SDOperand Callee = Op.getOperand(4);
				1036
				1037	// Analyze operands of the call, assigning locations to each operand.
				1038	SmallVector<CCValAssign, 16> ArgLocs;
				1039	CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
				1040	CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall);
				1041
				1042	// Get a count of how many bytes are to be pushed on the stack.
				1043	unsigned NumBytes = CCInfo.getNextStackOffset();
				1044
				1045	if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
				1046	// Make sure the instruction takes 8n+4 bytes to make sure the start of the
				1047	// arguments and the arguments after the retaddr has been pushed are aligned.
				1048	if ((NumBytes & 7) == 0)
				1049	NumBytes += 4;
				1050	}
				1051
				1052	Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
				1053
				1054	SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
				1055	SmallVector<SDOperand, 8> MemOpChains;
				1056
				1057	SDOperand StackPtr;
				1058
				1059	// Walk the register/memloc assignments, inserting copies/loads.
				1060	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1061	CCValAssign &VA = ArgLocs[i];
				1062	SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
				1063
				1064	// Promote the value if needed.
				1065	switch (VA.getLocInfo()) {
				1066	default: assert(0 && "Unknown loc info!");
				1067	case CCValAssign::Full: break;
				1068	case CCValAssign::SExt:
				1069	Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
				1070	break;
				1071	case CCValAssign::ZExt:
				1072	Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
				1073	break;
				1074	case CCValAssign::AExt:
				1075	Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
				1076	break;
				1077	}
				1078
				1079	if (VA.isRegLoc()) {
				1080	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
				1081	} else {
				1082	assert(VA.isMemLoc());
				1083	if (StackPtr.Val == 0)
				1084	StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
				1085	SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
				1086	PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
				1087	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1088	}
				1089	}
				1090
				1091	if (!MemOpChains.empty())
				1092	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1093	&MemOpChains[0], MemOpChains.size());
				1094
				1095	// Build a sequence of copy-to-reg nodes chained together with token chain
				1096	// and flag operands which copy the outgoing args into registers.
				1097	SDOperand InFlag;
				1098	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1099	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1100	InFlag);
				1101	InFlag = Chain.getValue(1);
				1102	}
				1103
				1104	// If the callee is a GlobalAddress node (quite common, every direct call is)
				1105	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
				1106	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1107	// We should use extra load for direct calls to dllimported functions in
				1108	// non-JIT mode.
				1109	if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
				1110	getTargetMachine(), true))
				1111	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
				1112	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				1113	Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
				1114
				1115	// ELF / PIC requires GOT in the EBX register before function calls via PLT
				1116	// GOT pointer.
				1117	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				1118	Subtarget->isPICStyleGOT()) {
				1119	Chain = DAG.getCopyToReg(Chain, X86::EBX,
				1120	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				1121	InFlag);
				1122	InFlag = Chain.getValue(1);
				1123	}
				1124
				1125	// Returns a chain & a flag for retval copy to use.
				1126	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1127	SmallVector<SDOperand, 8> Ops;
				1128	Ops.push_back(Chain);
				1129	Ops.push_back(Callee);
				1130
				1131	// Add argument registers to the end of the list so that they are known live
				1132	// into the call.
				1133	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1134	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1135	RegsToPass[i].second.getValueType()));
				1136
				1137	// Add an implicit use GOT pointer in EBX.
				1138	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				1139	Subtarget->isPICStyleGOT())
				1140	Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
				1141
				1142	if (InFlag.Val)
				1143	Ops.push_back(InFlag);
				1144
				1145	// FIXME: Do not generate X86ISD::TAILCALL for now.
				1146	Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
				1147	NodeTys, &Ops[0], Ops.size());
				1148	InFlag = Chain.getValue(1);
				1149
				1150	// Returns a flag for retval copy to use.
				1151	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1152	Ops.clear();
				1153	Ops.push_back(Chain);
				1154	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				1155	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				1156	Ops.push_back(InFlag);
				1157	Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
				1158	InFlag = Chain.getValue(1);
				1159
				1160	// Handle result values, copying them out of physregs into vregs that we
				1161	// return.
				1162	return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
				1163	}
				1164
				1165
				1166	//===----------------------------------------------------------------------===//
				1167	// X86-64 C Calling Convention implementation
				1168	//===----------------------------------------------------------------------===//
				1169
				1170	SDOperand
				1171	X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
				1172	MachineFunction &MF = DAG.getMachineFunction();
				1173	MachineFrameInfo *MFI = MF.getFrameInfo();
				1174	SDOperand Root = Op.getOperand(0);
				1175	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1176
				1177	static const unsigned GPR64ArgRegs[] = {
				1178	X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
				1179	};
				1180	static const unsigned XMMArgRegs[] = {
				1181	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
				1182	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
				1183	};
				1184
				1185
				1186	// Assign locations to all of the incoming arguments.
				1187	SmallVector<CCValAssign, 16> ArgLocs;
				1188	CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
				1189	getTargetMachine(), ArgLocs);
				1190	CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C);
				1191
				1192	SmallVector<SDOperand, 8> ArgValues;
				1193	unsigned LastVal = ~0U;
				1194	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1195	CCValAssign &VA = ArgLocs[i];
				1196	// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
				1197	// places.
				1198	assert(VA.getValNo() != LastVal &&
				1199	"Don't support value assigned to multiple locs yet");
				1200	LastVal = VA.getValNo();
				1201
				1202	if (VA.isRegLoc()) {
				1203	MVT::ValueType RegVT = VA.getLocVT();
				1204	TargetRegisterClass *RC;
				1205	if (RegVT == MVT::i32)
				1206	RC = X86::GR32RegisterClass;
				1207	else if (RegVT == MVT::i64)
				1208	RC = X86::GR64RegisterClass;
				1209	else if (RegVT == MVT::f32)
				1210	RC = X86::FR32RegisterClass;
				1211	else if (RegVT == MVT::f64)
				1212	RC = X86::FR64RegisterClass;
				1213	else {
				1214	assert(MVT::isVector(RegVT));
				1215	if (MVT::getSizeInBits(RegVT) == 64) {
				1216	RC = X86::GR64RegisterClass; // MMX values are passed in GPRs.
				1217	RegVT = MVT::i64;
				1218	} else
				1219	RC = X86::VR128RegisterClass;
				1220	}
				1221
				1222	unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
				1223	SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
				1224
				1225	// If this is an 8 or 16-bit value, it is really passed promoted to 32
				1226	// bits. Insert an assert[sz]ext to capture this, then truncate to the
				1227	// right size.
				1228	if (VA.getLocInfo() == CCValAssign::SExt)
				1229	ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
				1230	DAG.getValueType(VA.getValVT()));
				1231	else if (VA.getLocInfo() == CCValAssign::ZExt)
				1232	ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
				1233	DAG.getValueType(VA.getValVT()));
				1234
				1235	if (VA.getLocInfo() != CCValAssign::Full)
				1236	ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
				1237
				1238	// Handle MMX values passed in GPRs.
				1239	if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass &&
				1240	MVT::getSizeInBits(RegVT) == 64)
				1241	ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue);
				1242
				1243	ArgValues.push_back(ArgValue);
				1244	} else {
				1245	assert(VA.isMemLoc());
				1246
				1247	// Create the nodes corresponding to a load from this parameter slot.
				1248	int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
				1249	VA.getLocMemOffset());
				1250	SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
				1251	ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
				1252	}
				1253	}
				1254
				1255	unsigned StackSize = CCInfo.getNextStackOffset();
				1256
				1257	// If the function takes variable number of arguments, make a frame index for
				1258	// the start of the first vararg value... for expansion of llvm.va_start.
				1259	if (isVarArg) {
				1260	unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
				1261	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
				1262
				1263	// For X86-64, if there are vararg parameters that are passed via
				1264	// registers, then we must store them to their spots on the stack so they
				1265	// may be loaded by deferencing the result of va_next.
				1266	VarArgsGPOffset = NumIntRegs * 8;
				1267	VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
				1268	VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
				1269	RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
				1270
				1271	// Store the integer parameter registers.
				1272	SmallVector<SDOperand, 8> MemOps;
				1273	SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
				1274	SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
				1275	DAG.getConstant(VarArgsGPOffset, getPointerTy()));
				1276	for (; NumIntRegs != 6; ++NumIntRegs) {
				1277	unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
				1278	X86::GR64RegisterClass);
				1279	SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
				1280	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1281	MemOps.push_back(Store);
				1282	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				1283	DAG.getConstant(8, getPointerTy()));
				1284	}
				1285
				1286	// Now store the XMM (fp + vector) parameter registers.
				1287	FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
				1288	DAG.getConstant(VarArgsFPOffset, getPointerTy()));
				1289	for (; NumXMMRegs != 8; ++NumXMMRegs) {
				1290	unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
				1291	X86::VR128RegisterClass);
				1292	SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
				1293	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1294	MemOps.push_back(Store);
				1295	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				1296	DAG.getConstant(16, getPointerTy()));
				1297	}
				1298	if (!MemOps.empty())
				1299	Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1300	&MemOps[0], MemOps.size());
				1301	}
				1302
				1303	ArgValues.push_back(Root);
				1304
				1305	ReturnAddrIndex = 0; // No return address slot generated yet.
				1306	BytesToPopOnReturn = 0; // Callee pops nothing.
				1307	BytesCallerReserves = StackSize;
				1308
				1309	// Return the new list of results.
				1310	return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
				1311	&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
				1312	}
				1313
				1314	SDOperand
				1315	X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,
				1316	unsigned CC) {
				1317	SDOperand Chain = Op.getOperand(0);
				1318	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1319	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				1320	SDOperand Callee = Op.getOperand(4);
				1321
				1322	// Analyze operands of the call, assigning locations to each operand.
				1323	SmallVector<CCValAssign, 16> ArgLocs;
				1324	CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
				1325	CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C);
				1326
				1327	// Get a count of how many bytes are to be pushed on the stack.
				1328	unsigned NumBytes = CCInfo.getNextStackOffset();
				1329	Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
				1330
				1331	SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
				1332	SmallVector<SDOperand, 8> MemOpChains;
				1333
				1334	SDOperand StackPtr;
				1335
				1336	// Walk the register/memloc assignments, inserting copies/loads.
				1337	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1338	CCValAssign &VA = ArgLocs[i];
				1339	SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
				1340
				1341	// Promote the value if needed.
				1342	switch (VA.getLocInfo()) {
				1343	default: assert(0 && "Unknown loc info!");
				1344	case CCValAssign::Full: break;
				1345	case CCValAssign::SExt:
				1346	Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
				1347	break;
				1348	case CCValAssign::ZExt:
				1349	Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
				1350	break;
				1351	case CCValAssign::AExt:
				1352	Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
				1353	break;
				1354	}
				1355
				1356	if (VA.isRegLoc()) {
				1357	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
				1358	} else {
				1359	assert(VA.isMemLoc());
				1360	if (StackPtr.Val == 0)
				1361	StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
				1362	SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
				1363	PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
				1364	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1365	}
				1366	}
				1367
				1368	if (!MemOpChains.empty())
				1369	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1370	&MemOpChains[0], MemOpChains.size());
				1371
				1372	// Build a sequence of copy-to-reg nodes chained together with token chain
				1373	// and flag operands which copy the outgoing args into registers.
				1374	SDOperand InFlag;
				1375	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1376	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1377	InFlag);
				1378	InFlag = Chain.getValue(1);
				1379	}
				1380
				1381	if (isVarArg) {
				1382	// From AMD64 ABI document:
				1383	// For calls that may call functions that use varargs or stdargs
				1384	// (prototype-less calls or calls to functions containing ellipsis (...) in
				1385	// the declaration) %al is used as hidden argument to specify the number
				1386	// of SSE registers used. The contents of %al do not need to match exactly
				1387	// the number of registers, but must be an ubound on the number of SSE
				1388	// registers used and is in the range 0 - 8 inclusive.
				1389
				1390	// Count the number of XMM registers allocated.
				1391	static const unsigned XMMArgRegs[] = {
				1392	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
				1393	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
				1394	};
				1395	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
				1396
				1397	Chain = DAG.getCopyToReg(Chain, X86::AL,
				1398	DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
				1399	InFlag = Chain.getValue(1);
				1400	}
				1401
				1402	// If the callee is a GlobalAddress node (quite common, every direct call is)
				1403	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
				1404	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1405	// We should use extra load for direct calls to dllimported functions in
				1406	// non-JIT mode.
				1407	if (getTargetMachine().getCodeModel() != CodeModel::Large
				1408	&& !Subtarget->GVRequiresExtraLoad(G->getGlobal(),
				1409	getTargetMachine(), true))
				1410	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
				1411	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				1412	if (getTargetMachine().getCodeModel() != CodeModel::Large)
				1413	Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
				1414
				1415	// Returns a chain & a flag for retval copy to use.
				1416	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1417	SmallVector<SDOperand, 8> Ops;
				1418	Ops.push_back(Chain);
				1419	Ops.push_back(Callee);
				1420
				1421	// Add argument registers to the end of the list so that they are known live
				1422	// into the call.
				1423	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1424	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1425	RegsToPass[i].second.getValueType()));
				1426
				1427	if (InFlag.Val)
				1428	Ops.push_back(InFlag);
				1429
				1430	// FIXME: Do not generate X86ISD::TAILCALL for now.
				1431	Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
				1432	NodeTys, &Ops[0], Ops.size());
				1433	InFlag = Chain.getValue(1);
				1434
				1435	// Returns a flag for retval copy to use.
				1436	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1437	Ops.clear();
				1438	Ops.push_back(Chain);
				1439	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				1440	Ops.push_back(DAG.getConstant(0, getPointerTy()));
				1441	Ops.push_back(InFlag);
				1442	Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
				1443	InFlag = Chain.getValue(1);
				1444
				1445	// Handle result values, copying them out of physregs into vregs that we
				1446	// return.
				1447	return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
				1448	}
				1449
				1450
				1451	//===----------------------------------------------------------------------===//
				1452	// Other Lowering Hooks
				1453	//===----------------------------------------------------------------------===//
				1454
				1455
				1456	SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
				1457	if (ReturnAddrIndex == 0) {
				1458	// Set up a frame object for the return address.
				1459	MachineFunction &MF = DAG.getMachineFunction();
				1460	if (Subtarget->is64Bit())
				1461	ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8);
				1462	else
				1463	ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
				1464	}
				1465
				1466	return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
				1467	}
				1468
				1469
				1470
				1471	/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
				1472	/// specific condition code. It returns a false if it cannot do a direct
				1473	/// translation. X86CC is the translated CondCode. LHS/RHS are modified as
				1474	/// needed.
				1475	static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
				1476	unsigned &X86CC, SDOperand &LHS, SDOperand &RHS,
				1477	SelectionDAG &DAG) {
				1478	X86CC = X86::COND_INVALID;
				1479	if (!isFP) {
				1480	if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
				1481	if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
				1482	// X > -1 -> X == 0, jump !sign.
				1483	RHS = DAG.getConstant(0, RHS.getValueType());
				1484	X86CC = X86::COND_NS;
				1485	return true;
				1486	} else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
				1487	// X < 0 -> X == 0, jump on sign.
				1488	X86CC = X86::COND_S;
				1489	return true;
				1490	}
				1491	}
				1492
				1493	switch (SetCCOpcode) {
				1494	default: break;
				1495	case ISD::SETEQ: X86CC = X86::COND_E; break;
				1496	case ISD::SETGT: X86CC = X86::COND_G; break;
				1497	case ISD::SETGE: X86CC = X86::COND_GE; break;
				1498	case ISD::SETLT: X86CC = X86::COND_L; break;
				1499	case ISD::SETLE: X86CC = X86::COND_LE; break;
				1500	case ISD::SETNE: X86CC = X86::COND_NE; break;
				1501	case ISD::SETULT: X86CC = X86::COND_B; break;
				1502	case ISD::SETUGT: X86CC = X86::COND_A; break;
				1503	case ISD::SETULE: X86CC = X86::COND_BE; break;
				1504	case ISD::SETUGE: X86CC = X86::COND_AE; break;
				1505	}
				1506	} else {
				1507	// On a floating point condition, the flags are set as follows:
				1508	// ZF PF CF op
				1509	// 0 \| 0 \| 0 \| X > Y
				1510	// 0 \| 0 \| 1 \| X < Y
				1511	// 1 \| 0 \| 0 \| X == Y
				1512	// 1 \| 1 \| 1 \| unordered
				1513	bool Flip = false;
				1514	switch (SetCCOpcode) {
				1515	default: break;
				1516	case ISD::SETUEQ:
				1517	case ISD::SETEQ: X86CC = X86::COND_E; break;
				1518	case ISD::SETOLT: Flip = true; // Fallthrough
				1519	case ISD::SETOGT:
				1520	case ISD::SETGT: X86CC = X86::COND_A; break;
				1521	case ISD::SETOLE: Flip = true; // Fallthrough
				1522	case ISD::SETOGE:
				1523	case ISD::SETGE: X86CC = X86::COND_AE; break;
				1524	case ISD::SETUGT: Flip = true; // Fallthrough
				1525	case ISD::SETULT:
				1526	case ISD::SETLT: X86CC = X86::COND_B; break;
				1527	case ISD::SETUGE: Flip = true; // Fallthrough
				1528	case ISD::SETULE:
				1529	case ISD::SETLE: X86CC = X86::COND_BE; break;
				1530	case ISD::SETONE:
				1531	case ISD::SETNE: X86CC = X86::COND_NE; break;
				1532	case ISD::SETUO: X86CC = X86::COND_P; break;
				1533	case ISD::SETO: X86CC = X86::COND_NP; break;
				1534	}
				1535	if (Flip)
				1536	std::swap(LHS, RHS);
				1537	}
				1538
				1539	return X86CC != X86::COND_INVALID;
				1540	}
				1541
				1542	/// hasFPCMov - is there a floating point cmov for the specific X86 condition
				1543	/// code. Current x86 isa includes the following FP cmov instructions:
				1544	/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
				1545	static bool hasFPCMov(unsigned X86CC) {
				1546	switch (X86CC) {
				1547	default:
				1548	return false;
				1549	case X86::COND_B:
				1550	case X86::COND_BE:
				1551	case X86::COND_E:
				1552	case X86::COND_P:
				1553	case X86::COND_A:
				1554	case X86::COND_AE:
				1555	case X86::COND_NE:
				1556	case X86::COND_NP:
				1557	return true;
				1558	}
				1559	}
				1560
				1561	/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return
				1562	/// true if Op is undef or if its value falls within the specified range (L, H].
				1563	static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
				1564	if (Op.getOpcode() == ISD::UNDEF)
				1565	return true;
				1566
				1567	unsigned Val = cast<ConstantSDNode>(Op)->getValue();
				1568	return (Val >= Low && Val < Hi);
				1569	}
				1570
				1571	/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return
				1572	/// true if Op is undef or if its value equal to the specified value.
				1573	static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
				1574	if (Op.getOpcode() == ISD::UNDEF)
				1575	return true;
				1576	return cast<ConstantSDNode>(Op)->getValue() == Val;
				1577	}
				1578
				1579	/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
				1580	/// specifies a shuffle of elements that is suitable for input to PSHUFD.
				1581	bool X86::isPSHUFDMask(SDNode *N) {
				1582	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1583
Dan Gohman	7dc1901	2007-08-02 21:17:01 +0000	[diff] [blame]	1584	if (N->getNumOperands() != 2 && N->getNumOperands() != 4)
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1585	return false;
				1586
				1587	// Check if the value doesn't reference the second vector.
				1588	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
				1589	SDOperand Arg = N->getOperand(i);
				1590	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1591	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
Dan Gohman	7dc1901	2007-08-02 21:17:01 +0000	[diff] [blame]	1592	if (cast<ConstantSDNode>(Arg)->getValue() >= e)
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1593	return false;
				1594	}
				1595
				1596	return true;
				1597	}
				1598
				1599	/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
				1600	/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
				1601	bool X86::isPSHUFHWMask(SDNode *N) {
				1602	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1603
				1604	if (N->getNumOperands() != 8)
				1605	return false;
				1606
				1607	// Lower quadword copied in order.
				1608	for (unsigned i = 0; i != 4; ++i) {
				1609	SDOperand Arg = N->getOperand(i);
				1610	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1611	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1612	if (cast<ConstantSDNode>(Arg)->getValue() != i)
				1613	return false;
				1614	}
				1615
				1616	// Upper quadword shuffled.
				1617	for (unsigned i = 4; i != 8; ++i) {
				1618	SDOperand Arg = N->getOperand(i);
				1619	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1620	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1621	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1622	if (Val < 4 \|\| Val > 7)
				1623	return false;
				1624	}
				1625
				1626	return true;
				1627	}
				1628
				1629	/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
				1630	/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
				1631	bool X86::isPSHUFLWMask(SDNode *N) {
				1632	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1633
				1634	if (N->getNumOperands() != 8)
				1635	return false;
				1636
				1637	// Upper quadword copied in order.
				1638	for (unsigned i = 4; i != 8; ++i)
				1639	if (!isUndefOrEqual(N->getOperand(i), i))
				1640	return false;
				1641
				1642	// Lower quadword shuffled.
				1643	for (unsigned i = 0; i != 4; ++i)
				1644	if (!isUndefOrInRange(N->getOperand(i), 0, 4))
				1645	return false;
				1646
				1647	return true;
				1648	}
				1649
				1650	/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
				1651	/// specifies a shuffle of elements that is suitable for input to SHUFP*.
				1652	static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) {
				1653	if (NumElems != 2 && NumElems != 4) return false;
				1654
				1655	unsigned Half = NumElems / 2;
				1656	for (unsigned i = 0; i < Half; ++i)
				1657	if (!isUndefOrInRange(Elems[i], 0, NumElems))
				1658	return false;
				1659	for (unsigned i = Half; i < NumElems; ++i)
				1660	if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2))
				1661	return false;
				1662
				1663	return true;
				1664	}
				1665
				1666	bool X86::isSHUFPMask(SDNode *N) {
				1667	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1668	return ::isSHUFPMask(N->op_begin(), N->getNumOperands());
				1669	}
				1670
				1671	/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
				1672	/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
				1673	/// half elements to come from vector 1 (which would equal the dest.) and
				1674	/// the upper half to come from vector 2.
				1675	static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) {
				1676	if (NumOps != 2 && NumOps != 4) return false;
				1677
				1678	unsigned Half = NumOps / 2;
				1679	for (unsigned i = 0; i < Half; ++i)
				1680	if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2))
				1681	return false;
				1682	for (unsigned i = Half; i < NumOps; ++i)
				1683	if (!isUndefOrInRange(Ops[i], 0, NumOps))
				1684	return false;
				1685	return true;
				1686	}
				1687
				1688	static bool isCommutedSHUFP(SDNode *N) {
				1689	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1690	return isCommutedSHUFP(N->op_begin(), N->getNumOperands());
				1691	}
				1692
				1693	/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
				1694	/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
				1695	bool X86::isMOVHLPSMask(SDNode *N) {
				1696	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1697
				1698	if (N->getNumOperands() != 4)
				1699	return false;
				1700
				1701	// Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
				1702	return isUndefOrEqual(N->getOperand(0), 6) &&
				1703	isUndefOrEqual(N->getOperand(1), 7) &&
				1704	isUndefOrEqual(N->getOperand(2), 2) &&
				1705	isUndefOrEqual(N->getOperand(3), 3);
				1706	}
				1707
				1708	/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
				1709	/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
				1710	/// <2, 3, 2, 3>
				1711	bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) {
				1712	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1713
				1714	if (N->getNumOperands() != 4)
				1715	return false;
				1716
				1717	// Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3
				1718	return isUndefOrEqual(N->getOperand(0), 2) &&
				1719	isUndefOrEqual(N->getOperand(1), 3) &&
				1720	isUndefOrEqual(N->getOperand(2), 2) &&
				1721	isUndefOrEqual(N->getOperand(3), 3);
				1722	}
				1723
				1724	/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
				1725	/// specifies a shuffle of elements that is suitable for input to MOVLP{S\|D}.
				1726	bool X86::isMOVLPMask(SDNode *N) {
				1727	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1728
				1729	unsigned NumElems = N->getNumOperands();
				1730	if (NumElems != 2 && NumElems != 4)
				1731	return false;
				1732
				1733	for (unsigned i = 0; i < NumElems/2; ++i)
				1734	if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
				1735	return false;
				1736
				1737	for (unsigned i = NumElems/2; i < NumElems; ++i)
				1738	if (!isUndefOrEqual(N->getOperand(i), i))
				1739	return false;
				1740
				1741	return true;
				1742	}
				1743
				1744	/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
				1745	/// specifies a shuffle of elements that is suitable for input to MOVHP{S\|D}
				1746	/// and MOVLHPS.
				1747	bool X86::isMOVHPMask(SDNode *N) {
				1748	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1749
				1750	unsigned NumElems = N->getNumOperands();
				1751	if (NumElems != 2 && NumElems != 4)
				1752	return false;
				1753
				1754	for (unsigned i = 0; i < NumElems/2; ++i)
				1755	if (!isUndefOrEqual(N->getOperand(i), i))
				1756	return false;
				1757
				1758	for (unsigned i = 0; i < NumElems/2; ++i) {
				1759	SDOperand Arg = N->getOperand(i + NumElems/2);
				1760	if (!isUndefOrEqual(Arg, i + NumElems))
				1761	return false;
				1762	}
				1763
				1764	return true;
				1765	}
				1766
				1767	/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
				1768	/// specifies a shuffle of elements that is suitable for input to UNPCKL.
				1769	bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts,
				1770	bool V2IsSplat = false) {
				1771	if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
				1772	return false;
				1773
				1774	for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
				1775	SDOperand BitI = Elts[i];
				1776	SDOperand BitI1 = Elts[i+1];
				1777	if (!isUndefOrEqual(BitI, j))
				1778	return false;
				1779	if (V2IsSplat) {
				1780	if (isUndefOrEqual(BitI1, NumElts))
				1781	return false;
				1782	} else {
				1783	if (!isUndefOrEqual(BitI1, j + NumElts))
				1784	return false;
				1785	}
				1786	}
				1787
				1788	return true;
				1789	}
				1790
				1791	bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) {
				1792	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1793	return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
				1794	}
				1795
				1796	/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
				1797	/// specifies a shuffle of elements that is suitable for input to UNPCKH.
				1798	bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts,
				1799	bool V2IsSplat = false) {
				1800	if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
				1801	return false;
				1802
				1803	for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
				1804	SDOperand BitI = Elts[i];
				1805	SDOperand BitI1 = Elts[i+1];
				1806	if (!isUndefOrEqual(BitI, j + NumElts/2))
				1807	return false;
				1808	if (V2IsSplat) {
				1809	if (isUndefOrEqual(BitI1, NumElts))
				1810	return false;
				1811	} else {
				1812	if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts))
				1813	return false;
				1814	}
				1815	}
				1816
				1817	return true;
				1818	}
				1819
				1820	bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) {
				1821	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1822	return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
				1823	}
				1824
				1825	/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
				1826	/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
				1827	/// <0, 0, 1, 1>
				1828	bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
				1829	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1830
				1831	unsigned NumElems = N->getNumOperands();
				1832	if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
				1833	return false;
				1834
				1835	for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
				1836	SDOperand BitI = N->getOperand(i);
				1837	SDOperand BitI1 = N->getOperand(i+1);
				1838
				1839	if (!isUndefOrEqual(BitI, j))
				1840	return false;
				1841	if (!isUndefOrEqual(BitI1, j))
				1842	return false;
				1843	}
				1844
				1845	return true;
				1846	}
				1847
				1848	/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
				1849	/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
				1850	/// <2, 2, 3, 3>
				1851	bool X86::isUNPCKH_v_undef_Mask(SDNode *N) {
				1852	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1853
				1854	unsigned NumElems = N->getNumOperands();
				1855	if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
				1856	return false;
				1857
				1858	for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
				1859	SDOperand BitI = N->getOperand(i);
				1860	SDOperand BitI1 = N->getOperand(i + 1);
				1861
				1862	if (!isUndefOrEqual(BitI, j))
				1863	return false;
				1864	if (!isUndefOrEqual(BitI1, j))
				1865	return false;
				1866	}
				1867
				1868	return true;
				1869	}
				1870
				1871	/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
				1872	/// specifies a shuffle of elements that is suitable for input to MOVSS,
				1873	/// MOVSD, and MOVD, i.e. setting the lowest element.
				1874	static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) {
				1875	if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
				1876	return false;
				1877
				1878	if (!isUndefOrEqual(Elts[0], NumElts))
				1879	return false;
				1880
				1881	for (unsigned i = 1; i < NumElts; ++i) {
				1882	if (!isUndefOrEqual(Elts[i], i))
				1883	return false;
				1884	}
				1885
				1886	return true;
				1887	}
				1888
				1889	bool X86::isMOVLMask(SDNode *N) {
				1890	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1891	return ::isMOVLMask(N->op_begin(), N->getNumOperands());
				1892	}
				1893
				1894	/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
				1895	/// of what x86 movss want. X86 movs requires the lowest element to be lowest
				1896	/// element of vector 2 and the other elements to come from vector 1 in order.
				1897	static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps,
				1898	bool V2IsSplat = false,
				1899	bool V2IsUndef = false) {
				1900	if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
				1901	return false;
				1902
				1903	if (!isUndefOrEqual(Ops[0], 0))
				1904	return false;
				1905
				1906	for (unsigned i = 1; i < NumOps; ++i) {
				1907	SDOperand Arg = Ops[i];
				1908	if (!(isUndefOrEqual(Arg, i+NumOps) \|\|
				1909	(V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) \|\|
				1910	(V2IsSplat && isUndefOrEqual(Arg, NumOps))))
				1911	return false;
				1912	}
				1913
				1914	return true;
				1915	}
				1916
				1917	static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false,
				1918	bool V2IsUndef = false) {
				1919	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1920	return isCommutedMOVL(N->op_begin(), N->getNumOperands(),
				1921	V2IsSplat, V2IsUndef);
				1922	}
				1923
				1924	/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
				1925	/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
				1926	bool X86::isMOVSHDUPMask(SDNode *N) {
				1927	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1928
				1929	if (N->getNumOperands() != 4)
				1930	return false;
				1931
				1932	// Expect 1, 1, 3, 3
				1933	for (unsigned i = 0; i < 2; ++i) {
				1934	SDOperand Arg = N->getOperand(i);
				1935	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1936	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1937	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1938	if (Val != 1) return false;
				1939	}
				1940
				1941	bool HasHi = false;
				1942	for (unsigned i = 2; i < 4; ++i) {
				1943	SDOperand Arg = N->getOperand(i);
				1944	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1945	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1946	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1947	if (Val != 3) return false;
				1948	HasHi = true;
				1949	}
				1950
				1951	// Don't use movshdup if it can be done with a shufps.
				1952	return HasHi;
				1953	}
				1954
				1955	/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
				1956	/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
				1957	bool X86::isMOVSLDUPMask(SDNode *N) {
				1958	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1959
				1960	if (N->getNumOperands() != 4)
				1961	return false;
				1962
				1963	// Expect 0, 0, 2, 2
				1964	for (unsigned i = 0; i < 2; ++i) {
				1965	SDOperand Arg = N->getOperand(i);
				1966	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1967	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1968	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1969	if (Val != 0) return false;
				1970	}
				1971
				1972	bool HasHi = false;
				1973	for (unsigned i = 2; i < 4; ++i) {
				1974	SDOperand Arg = N->getOperand(i);
				1975	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1976	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1977	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1978	if (Val != 2) return false;
				1979	HasHi = true;
				1980	}
				1981
				1982	// Don't use movshdup if it can be done with a shufps.
				1983	return HasHi;
				1984	}
				1985
				1986	/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand
				1987	/// specifies a identity operation on the LHS or RHS.
				1988	static bool isIdentityMask(SDNode *N, bool RHS = false) {
				1989	unsigned NumElems = N->getNumOperands();
				1990	for (unsigned i = 0; i < NumElems; ++i)
				1991	if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0)))
				1992	return false;
				1993	return true;
				1994	}
				1995
				1996	/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
				1997	/// a splat of a single element.
				1998	static bool isSplatMask(SDNode *N) {
				1999	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2000
				2001	// This is a splat operation if each element of the permute is the same, and
				2002	// if the value doesn't reference the second vector.
				2003	unsigned NumElems = N->getNumOperands();
				2004	SDOperand ElementBase;
				2005	unsigned i = 0;
				2006	for (; i != NumElems; ++i) {
				2007	SDOperand Elt = N->getOperand(i);
				2008	if (isa<ConstantSDNode>(Elt)) {
				2009	ElementBase = Elt;
				2010	break;
				2011	}
				2012	}
				2013
				2014	if (!ElementBase.Val)
				2015	return false;
				2016
				2017	for (; i != NumElems; ++i) {
				2018	SDOperand Arg = N->getOperand(i);
				2019	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2020	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2021	if (Arg != ElementBase) return false;
				2022	}
				2023
				2024	// Make sure it is a splat of the first vector operand.
				2025	return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems;
				2026	}
				2027
				2028	/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
				2029	/// a splat of a single element and it's a 2 or 4 element mask.
				2030	bool X86::isSplatMask(SDNode *N) {
				2031	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2032
				2033	// We can only splat 64-bit, and 32-bit quantities with a single instruction.
				2034	if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
				2035	return false;
				2036	return ::isSplatMask(N);
				2037	}
				2038
				2039	/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
				2040	/// specifies a splat of zero element.
				2041	bool X86::isSplatLoMask(SDNode *N) {
				2042	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2043
				2044	for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
				2045	if (!isUndefOrEqual(N->getOperand(i), 0))
				2046	return false;
				2047	return true;
				2048	}
				2049
				2050	/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
				2051	/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
				2052	/// instructions.
				2053	unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
				2054	unsigned NumOperands = N->getNumOperands();
				2055	unsigned Shift = (NumOperands == 4) ? 2 : 1;
				2056	unsigned Mask = 0;
				2057	for (unsigned i = 0; i < NumOperands; ++i) {
				2058	unsigned Val = 0;
				2059	SDOperand Arg = N->getOperand(NumOperands-i-1);
				2060	if (Arg.getOpcode() != ISD::UNDEF)
				2061	Val = cast<ConstantSDNode>(Arg)->getValue();
				2062	if (Val >= NumOperands) Val -= NumOperands;
				2063	Mask \|= Val;
				2064	if (i != NumOperands - 1)
				2065	Mask <<= Shift;
				2066	}
				2067
				2068	return Mask;
				2069	}
				2070
				2071	/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
				2072	/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
				2073	/// instructions.
				2074	unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
				2075	unsigned Mask = 0;
				2076	// 8 nodes, but we only care about the last 4.
				2077	for (unsigned i = 7; i >= 4; --i) {
				2078	unsigned Val = 0;
				2079	SDOperand Arg = N->getOperand(i);
				2080	if (Arg.getOpcode() != ISD::UNDEF)
				2081	Val = cast<ConstantSDNode>(Arg)->getValue();
				2082	Mask \|= (Val - 4);
				2083	if (i != 4)
				2084	Mask <<= 2;
				2085	}
				2086
				2087	return Mask;
				2088	}
				2089
				2090	/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
				2091	/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
				2092	/// instructions.
				2093	unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
				2094	unsigned Mask = 0;
				2095	// 8 nodes, but we only care about the first 4.
				2096	for (int i = 3; i >= 0; --i) {
				2097	unsigned Val = 0;
				2098	SDOperand Arg = N->getOperand(i);
				2099	if (Arg.getOpcode() != ISD::UNDEF)
				2100	Val = cast<ConstantSDNode>(Arg)->getValue();
				2101	Mask \|= Val;
				2102	if (i != 0)
				2103	Mask <<= 2;
				2104	}
				2105
				2106	return Mask;
				2107	}
				2108
				2109	/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
				2110	/// specifies a 8 element shuffle that can be broken into a pair of
				2111	/// PSHUFHW and PSHUFLW.
				2112	static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
				2113	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2114
				2115	if (N->getNumOperands() != 8)
				2116	return false;
				2117
				2118	// Lower quadword shuffled.
				2119	for (unsigned i = 0; i != 4; ++i) {
				2120	SDOperand Arg = N->getOperand(i);
				2121	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2122	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2123	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2124	if (Val > 4)
				2125	return false;
				2126	}
				2127
				2128	// Upper quadword shuffled.
				2129	for (unsigned i = 4; i != 8; ++i) {
				2130	SDOperand Arg = N->getOperand(i);
				2131	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2132	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2133	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2134	if (Val < 4 \|\| Val > 7)
				2135	return false;
				2136	}
				2137
				2138	return true;
				2139	}
				2140
				2141	/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
				2142	/// values in ther permute mask.
				2143	static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1,
				2144	SDOperand &V2, SDOperand &Mask,
				2145	SelectionDAG &DAG) {
				2146	MVT::ValueType VT = Op.getValueType();
				2147	MVT::ValueType MaskVT = Mask.getValueType();
				2148	MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT);
				2149	unsigned NumElems = Mask.getNumOperands();
				2150	SmallVector<SDOperand, 8> MaskVec;
				2151
				2152	for (unsigned i = 0; i != NumElems; ++i) {
				2153	SDOperand Arg = Mask.getOperand(i);
				2154	if (Arg.getOpcode() == ISD::UNDEF) {
				2155	MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
				2156	continue;
				2157	}
				2158	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2159	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2160	if (Val < NumElems)
				2161	MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
				2162	else
				2163	MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
				2164	}
				2165
				2166	std::swap(V1, V2);
				2167	Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2168	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2169	}
				2170
				2171	/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
				2172	/// match movhlps. The lower half elements should come from upper half of
				2173	/// V1 (and in order), and the upper half elements should come from the upper
				2174	/// half of V2 (and in order).
				2175	static bool ShouldXformToMOVHLPS(SDNode *Mask) {
				2176	unsigned NumElems = Mask->getNumOperands();
				2177	if (NumElems != 4)
				2178	return false;
				2179	for (unsigned i = 0, e = 2; i != e; ++i)
				2180	if (!isUndefOrEqual(Mask->getOperand(i), i+2))
				2181	return false;
				2182	for (unsigned i = 2; i != 4; ++i)
				2183	if (!isUndefOrEqual(Mask->getOperand(i), i+4))
				2184	return false;
				2185	return true;
				2186	}
				2187
				2188	/// isScalarLoadToVector - Returns true if the node is a scalar load that
				2189	/// is promoted to a vector.
				2190	static inline bool isScalarLoadToVector(SDNode *N) {
				2191	if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
				2192	N = N->getOperand(0).Val;
				2193	return ISD::isNON_EXTLoad(N);
				2194	}
				2195	return false;
				2196	}
				2197
				2198	/// ShouldXformToMOVLP{S\|D} - Return true if the node should be transformed to
				2199	/// match movlp{s\|d}. The lower half elements should come from lower half of
				2200	/// V1 (and in order), and the upper half elements should come from the upper
				2201	/// half of V2 (and in order). And since V1 will become the source of the
				2202	/// MOVLP, it must be either a vector load or a scalar load to vector.
				2203	static bool ShouldXformToMOVLP(SDNode V1, SDNode V2, SDNode *Mask) {
				2204	if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
				2205	return false;
				2206	// Is V2 is a vector load, don't do this transformation. We will try to use
				2207	// load folding shufps op.
				2208	if (ISD::isNON_EXTLoad(V2))
				2209	return false;
				2210
				2211	unsigned NumElems = Mask->getNumOperands();
				2212	if (NumElems != 2 && NumElems != 4)
				2213	return false;
				2214	for (unsigned i = 0, e = NumElems/2; i != e; ++i)
				2215	if (!isUndefOrEqual(Mask->getOperand(i), i))
				2216	return false;
				2217	for (unsigned i = NumElems/2; i != NumElems; ++i)
				2218	if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
				2219	return false;
				2220	return true;
				2221	}
				2222
				2223	/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
				2224	/// all the same.
				2225	static bool isSplatVector(SDNode *N) {
				2226	if (N->getOpcode() != ISD::BUILD_VECTOR)
				2227	return false;
				2228
				2229	SDOperand SplatValue = N->getOperand(0);
				2230	for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
				2231	if (N->getOperand(i) != SplatValue)
				2232	return false;
				2233	return true;
				2234	}
				2235
				2236	/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
				2237	/// to an undef.
				2238	static bool isUndefShuffle(SDNode *N) {
				2239	if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
				2240	return false;
				2241
				2242	SDOperand V1 = N->getOperand(0);
				2243	SDOperand V2 = N->getOperand(1);
				2244	SDOperand Mask = N->getOperand(2);
				2245	unsigned NumElems = Mask.getNumOperands();
				2246	for (unsigned i = 0; i != NumElems; ++i) {
				2247	SDOperand Arg = Mask.getOperand(i);
				2248	if (Arg.getOpcode() != ISD::UNDEF) {
				2249	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2250	if (Val < NumElems && V1.getOpcode() != ISD::UNDEF)
				2251	return false;
				2252	else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF)
				2253	return false;
				2254	}
				2255	}
				2256	return true;
				2257	}
				2258
				2259	/// isZeroNode - Returns true if Elt is a constant zero or a floating point
				2260	/// constant +0.0.
				2261	static inline bool isZeroNode(SDOperand Elt) {
				2262	return ((isa<ConstantSDNode>(Elt) &&
				2263	cast<ConstantSDNode>(Elt)->getValue() == 0) \|\|
				2264	(isa<ConstantFPSDNode>(Elt) &&
				2265	cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
				2266	}
				2267
				2268	/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
				2269	/// to an zero vector.
				2270	static bool isZeroShuffle(SDNode *N) {
				2271	if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
				2272	return false;
				2273
				2274	SDOperand V1 = N->getOperand(0);
				2275	SDOperand V2 = N->getOperand(1);
				2276	SDOperand Mask = N->getOperand(2);
				2277	unsigned NumElems = Mask.getNumOperands();
				2278	for (unsigned i = 0; i != NumElems; ++i) {
				2279	SDOperand Arg = Mask.getOperand(i);
				2280	if (Arg.getOpcode() != ISD::UNDEF) {
				2281	unsigned Idx = cast<ConstantSDNode>(Arg)->getValue();
				2282	if (Idx < NumElems) {
				2283	unsigned Opc = V1.Val->getOpcode();
				2284	if (Opc == ISD::UNDEF)
				2285	continue;
				2286	if (Opc != ISD::BUILD_VECTOR \|\|
				2287	!isZeroNode(V1.Val->getOperand(Idx)))
				2288	return false;
				2289	} else if (Idx >= NumElems) {
				2290	unsigned Opc = V2.Val->getOpcode();
				2291	if (Opc == ISD::UNDEF)
				2292	continue;
				2293	if (Opc != ISD::BUILD_VECTOR \|\|
				2294	!isZeroNode(V2.Val->getOperand(Idx - NumElems)))
				2295	return false;
				2296	}
				2297	}
				2298	}
				2299	return true;
				2300	}
				2301
				2302	/// getZeroVector - Returns a vector of specified type with all zero elements.
				2303	///
				2304	static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
				2305	assert(MVT::isVector(VT) && "Expected a vector type");
				2306	unsigned NumElems = MVT::getVectorNumElements(VT);
				2307	MVT::ValueType EVT = MVT::getVectorElementType(VT);
				2308	bool isFP = MVT::isFloatingPoint(EVT);
				2309	SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
				2310	SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero);
				2311	return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
				2312	}
				2313
				2314	/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
				2315	/// that point to V2 points to its first element.
				2316	static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
				2317	assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
				2318
				2319	bool Changed = false;
				2320	SmallVector<SDOperand, 8> MaskVec;
				2321	unsigned NumElems = Mask.getNumOperands();
				2322	for (unsigned i = 0; i != NumElems; ++i) {
				2323	SDOperand Arg = Mask.getOperand(i);
				2324	if (Arg.getOpcode() != ISD::UNDEF) {
				2325	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2326	if (Val > NumElems) {
				2327	Arg = DAG.getConstant(NumElems, Arg.getValueType());
				2328	Changed = true;
				2329	}
				2330	}
				2331	MaskVec.push_back(Arg);
				2332	}
				2333
				2334	if (Changed)
				2335	Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
				2336	&MaskVec[0], MaskVec.size());
				2337	return Mask;
				2338	}
				2339
				2340	/// getMOVLMask - Returns a vector_shuffle mask for an movs{s\|d}, movd
				2341	/// operation of specified width.
				2342	static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
				2343	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2344	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2345
				2346	SmallVector<SDOperand, 8> MaskVec;
				2347	MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
				2348	for (unsigned i = 1; i != NumElems; ++i)
				2349	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2350	return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2351	}
				2352
				2353	/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
				2354	/// of specified width.
				2355	static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
				2356	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2357	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2358	SmallVector<SDOperand, 8> MaskVec;
				2359	for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
				2360	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2361	MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
				2362	}
				2363	return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2364	}
				2365
				2366	/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
				2367	/// of specified width.
				2368	static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
				2369	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2370	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2371	unsigned Half = NumElems/2;
				2372	SmallVector<SDOperand, 8> MaskVec;
				2373	for (unsigned i = 0; i != Half; ++i) {
				2374	MaskVec.push_back(DAG.getConstant(i + Half, BaseVT));
				2375	MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
				2376	}
				2377	return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2378	}
				2379
				2380	/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
				2381	///
				2382	static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
				2383	SDOperand V1 = Op.getOperand(0);
				2384	SDOperand Mask = Op.getOperand(2);
				2385	MVT::ValueType VT = Op.getValueType();
				2386	unsigned NumElems = Mask.getNumOperands();
				2387	Mask = getUnpacklMask(NumElems, DAG);
				2388	while (NumElems != 4) {
				2389	V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
				2390	NumElems >>= 1;
				2391	}
				2392	V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
				2393
				2394	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				2395	Mask = getZeroVector(MaskVT, DAG);
				2396	SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
				2397	DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
				2398	return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
				2399	}
				2400
				2401	/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
				2402	/// vector of zero or undef vector.
				2403	static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
				2404	unsigned NumElems, unsigned Idx,
				2405	bool isZero, SelectionDAG &DAG) {
				2406	SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
				2407	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2408	MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
				2409	SDOperand Zero = DAG.getConstant(0, EVT);
				2410	SmallVector<SDOperand, 8> MaskVec(NumElems, Zero);
				2411	MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
				2412	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2413	&MaskVec[0], MaskVec.size());
				2414	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2415	}
				2416
				2417	/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
				2418	///
				2419	static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
				2420	unsigned NumNonZero, unsigned NumZero,
				2421	SelectionDAG &DAG, TargetLowering &TLI) {
				2422	if (NumNonZero > 8)
				2423	return SDOperand();
				2424
				2425	SDOperand V(0, 0);
				2426	bool First = true;
				2427	for (unsigned i = 0; i < 16; ++i) {
				2428	bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
				2429	if (ThisIsNonZero && First) {
				2430	if (NumZero)
				2431	V = getZeroVector(MVT::v8i16, DAG);
				2432	else
				2433	V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
				2434	First = false;
				2435	}
				2436
				2437	if ((i & 1) != 0) {
				2438	SDOperand ThisElt(0, 0), LastElt(0, 0);
				2439	bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
				2440	if (LastIsNonZero) {
				2441	LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
				2442	}
				2443	if (ThisIsNonZero) {
				2444	ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
				2445	ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
				2446	ThisElt, DAG.getConstant(8, MVT::i8));
				2447	if (LastIsNonZero)
				2448	ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
				2449	} else
				2450	ThisElt = LastElt;
				2451
				2452	if (ThisElt.Val)
				2453	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
				2454	DAG.getConstant(i/2, TLI.getPointerTy()));
				2455	}
				2456	}
				2457
				2458	return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
				2459	}
				2460
				2461	/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
				2462	///
				2463	static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
				2464	unsigned NumNonZero, unsigned NumZero,
				2465	SelectionDAG &DAG, TargetLowering &TLI) {
				2466	if (NumNonZero > 4)
				2467	return SDOperand();
				2468
				2469	SDOperand V(0, 0);
				2470	bool First = true;
				2471	for (unsigned i = 0; i < 8; ++i) {
				2472	bool isNonZero = (NonZeros & (1 << i)) != 0;
				2473	if (isNonZero) {
				2474	if (First) {
				2475	if (NumZero)
				2476	V = getZeroVector(MVT::v8i16, DAG);
				2477	else
				2478	V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
				2479	First = false;
				2480	}
				2481	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
				2482	DAG.getConstant(i, TLI.getPointerTy()));
				2483	}
				2484	}
				2485
				2486	return V;
				2487	}
				2488
				2489	SDOperand
				2490	X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				2491	// All zero's are handled with pxor.
				2492	if (ISD::isBuildVectorAllZeros(Op.Val))
				2493	return Op;
				2494
				2495	// All one's are handled with pcmpeqd.
				2496	if (ISD::isBuildVectorAllOnes(Op.Val))
				2497	return Op;
				2498
				2499	MVT::ValueType VT = Op.getValueType();
				2500	MVT::ValueType EVT = MVT::getVectorElementType(VT);
				2501	unsigned EVTBits = MVT::getSizeInBits(EVT);
				2502
				2503	unsigned NumElems = Op.getNumOperands();
				2504	unsigned NumZero = 0;
				2505	unsigned NumNonZero = 0;
				2506	unsigned NonZeros = 0;
Dan Gohman	2146324	2007-07-24 22:55:08 +0000	[diff] [blame]	2507	unsigned NumNonZeroImms = 0;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2508	std::set<SDOperand> Values;
				2509	for (unsigned i = 0; i < NumElems; ++i) {
				2510	SDOperand Elt = Op.getOperand(i);
				2511	if (Elt.getOpcode() != ISD::UNDEF) {
				2512	Values.insert(Elt);
				2513	if (isZeroNode(Elt))
				2514	NumZero++;
				2515	else {
				2516	NonZeros \|= (1 << i);
				2517	NumNonZero++;
Dan Gohman	2146324	2007-07-24 22:55:08 +0000	[diff] [blame]	2518	if (Elt.getOpcode() == ISD::Constant \|\|
				2519	Elt.getOpcode() == ISD::ConstantFP)
				2520	NumNonZeroImms++;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2521	}
				2522	}
				2523	}
				2524
				2525	if (NumNonZero == 0) {
				2526	if (NumZero == 0)
				2527	// All undef vector. Return an UNDEF.
				2528	return DAG.getNode(ISD::UNDEF, VT);
				2529	else
				2530	// A mix of zero and undef. Return a zero vector.
				2531	return getZeroVector(VT, DAG);
				2532	}
				2533
				2534	// Splat is obviously ok. Let legalizer expand it to a shuffle.
				2535	if (Values.size() == 1)
				2536	return SDOperand();
				2537
				2538	// Special case for single non-zero element.
				2539	if (NumNonZero == 1) {
				2540	unsigned Idx = CountTrailingZeros_32(NonZeros);
				2541	SDOperand Item = Op.getOperand(Idx);
				2542	Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
				2543	if (Idx == 0)
				2544	// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
				2545	return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
				2546	NumZero > 0, DAG);
				2547
				2548	if (EVTBits == 32) {
				2549	// Turn it into a shuffle of zero and zero-extended scalar to vector.
				2550	Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
				2551	DAG);
				2552	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2553	MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
				2554	SmallVector<SDOperand, 8> MaskVec;
				2555	for (unsigned i = 0; i < NumElems; i++)
				2556	MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
				2557	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2558	&MaskVec[0], MaskVec.size());
				2559	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
				2560	DAG.getNode(ISD::UNDEF, VT), Mask);
				2561	}
				2562	}
				2563
Dan Gohman	2146324	2007-07-24 22:55:08 +0000	[diff] [blame]	2564	// A vector full of immediates; various special cases are already
				2565	// handled, so this is best done with a single constant-pool load.
				2566	if (NumNonZero == NumNonZeroImms)
				2567	return SDOperand();
				2568
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2569	// Let legalizer expand 2-wide build_vectors.
				2570	if (EVTBits == 64)
				2571	return SDOperand();
				2572
				2573	// If element VT is < 32 bits, convert it to inserts into a zero vector.
				2574	if (EVTBits == 8 && NumElems == 16) {
				2575	SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
				2576	*this);
				2577	if (V.Val) return V;
				2578	}
				2579
				2580	if (EVTBits == 16 && NumElems == 8) {
				2581	SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
				2582	*this);
				2583	if (V.Val) return V;
				2584	}
				2585
				2586	// If element VT is == 32 bits, turn it into a number of shuffles.
				2587	SmallVector<SDOperand, 8> V;
				2588	V.resize(NumElems);
				2589	if (NumElems == 4 && NumZero > 0) {
				2590	for (unsigned i = 0; i < 4; ++i) {
				2591	bool isZero = !(NonZeros & (1 << i));
				2592	if (isZero)
				2593	V[i] = getZeroVector(VT, DAG);
				2594	else
				2595	V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
				2596	}
				2597
				2598	for (unsigned i = 0; i < 2; ++i) {
				2599	switch ((NonZeros & (0x3 << i2)) >> (i2)) {
				2600	default: break;
				2601	case 0:
				2602	V[i] = V[i*2]; // Must be a zero vector.
				2603	break;
				2604	case 1:
				2605	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i2+1], V[i2],
				2606	getMOVLMask(NumElems, DAG));
				2607	break;
				2608	case 2:
				2609	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i2], V[i2+1],
				2610	getMOVLMask(NumElems, DAG));
				2611	break;
				2612	case 3:
				2613	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i2], V[i2+1],
				2614	getUnpacklMask(NumElems, DAG));
				2615	break;
				2616	}
				2617	}
				2618
				2619	// Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
				2620	// clears the upper bits.
				2621	// FIXME: we can do the same for v4f32 case when we know both parts of
				2622	// the lower half come from scalar_to_vector (loadf32). We should do
				2623	// that in post legalizer dag combiner with target specific hooks.
				2624	if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
				2625	return V[0];
				2626	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2627	MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
				2628	SmallVector<SDOperand, 8> MaskVec;
				2629	bool Reverse = (NonZeros & 0x3) == 2;
				2630	for (unsigned i = 0; i < 2; ++i)
				2631	if (Reverse)
				2632	MaskVec.push_back(DAG.getConstant(1-i, EVT));
				2633	else
				2634	MaskVec.push_back(DAG.getConstant(i, EVT));
				2635	Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
				2636	for (unsigned i = 0; i < 2; ++i)
				2637	if (Reverse)
				2638	MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
				2639	else
				2640	MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
				2641	SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2642	&MaskVec[0], MaskVec.size());
				2643	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
				2644	}
				2645
				2646	if (Values.size() > 2) {
				2647	// Expand into a number of unpckl*.
				2648	// e.g. for v4f32
				2649	// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
				2650	// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
				2651	// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
				2652	SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
				2653	for (unsigned i = 0; i < NumElems; ++i)
				2654	V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
				2655	NumElems >>= 1;
				2656	while (NumElems != 0) {
				2657	for (unsigned i = 0; i < NumElems; ++i)
				2658	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
				2659	UnpckMask);
				2660	NumElems >>= 1;
				2661	}
				2662	return V[0];
				2663	}
				2664
				2665	return SDOperand();
				2666	}
				2667
				2668	SDOperand
				2669	X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
				2670	SDOperand V1 = Op.getOperand(0);
				2671	SDOperand V2 = Op.getOperand(1);
				2672	SDOperand PermMask = Op.getOperand(2);
				2673	MVT::ValueType VT = Op.getValueType();
				2674	unsigned NumElems = PermMask.getNumOperands();
				2675	bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
				2676	bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
				2677	bool V1IsSplat = false;
				2678	bool V2IsSplat = false;
				2679
				2680	if (isUndefShuffle(Op.Val))
				2681	return DAG.getNode(ISD::UNDEF, VT);
				2682
				2683	if (isZeroShuffle(Op.Val))
				2684	return getZeroVector(VT, DAG);
				2685
				2686	if (isIdentityMask(PermMask.Val))
				2687	return V1;
				2688	else if (isIdentityMask(PermMask.Val, true))
				2689	return V2;
				2690
				2691	if (isSplatMask(PermMask.Val)) {
				2692	if (NumElems <= 4) return Op;
				2693	// Promote it to a v4i32 splat.
				2694	return PromoteSplat(Op, DAG);
				2695	}
				2696
				2697	if (X86::isMOVLMask(PermMask.Val))
				2698	return (V1IsUndef) ? V2 : Op;
				2699
				2700	if (X86::isMOVSHDUPMask(PermMask.Val) \|\|
				2701	X86::isMOVSLDUPMask(PermMask.Val) \|\|
				2702	X86::isMOVHLPSMask(PermMask.Val) \|\|
				2703	X86::isMOVHPMask(PermMask.Val) \|\|
				2704	X86::isMOVLPMask(PermMask.Val))
				2705	return Op;
				2706
				2707	if (ShouldXformToMOVHLPS(PermMask.Val) \|\|
				2708	ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
				2709	return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2710
				2711	bool Commuted = false;
				2712	V1IsSplat = isSplatVector(V1.Val);
				2713	V2IsSplat = isSplatVector(V2.Val);
				2714	if ((V1IsSplat \|\| V1IsUndef) && !(V2IsSplat \|\| V2IsUndef)) {
				2715	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2716	std::swap(V1IsSplat, V2IsSplat);
				2717	std::swap(V1IsUndef, V2IsUndef);
				2718	Commuted = true;
				2719	}
				2720
				2721	if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
				2722	if (V2IsUndef) return V1;
				2723	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2724	if (V2IsSplat) {
				2725	// V2 is a splat, so the mask may be malformed. That is, it may point
				2726	// to any V2 element. The instruction selectior won't like this. Get
				2727	// a corrected mask and commute to form a proper MOVS{S\|D}.
				2728	SDOperand NewMask = getMOVLMask(NumElems, DAG);
				2729	if (NewMask.Val != PermMask.Val)
				2730	Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
				2731	}
				2732	return Op;
				2733	}
				2734
				2735	if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) \|\|
				2736	X86::isUNPCKH_v_undef_Mask(PermMask.Val) \|\|
				2737	X86::isUNPCKLMask(PermMask.Val) \|\|
				2738	X86::isUNPCKHMask(PermMask.Val))
				2739	return Op;
				2740
				2741	if (V2IsSplat) {
				2742	// Normalize mask so all entries that point to V2 points to its first
				2743	// element then try to match unpck{h\|l} again. If match, return a
				2744	// new vector_shuffle with the corrected mask.
				2745	SDOperand NewMask = NormalizeMask(PermMask, DAG);
				2746	if (NewMask.Val != PermMask.Val) {
				2747	if (X86::isUNPCKLMask(PermMask.Val, true)) {
				2748	SDOperand NewMask = getUnpacklMask(NumElems, DAG);
				2749	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
				2750	} else if (X86::isUNPCKHMask(PermMask.Val, true)) {
				2751	SDOperand NewMask = getUnpackhMask(NumElems, DAG);
				2752	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
				2753	}
				2754	}
				2755	}
				2756
				2757	// Normalize the node to match x86 shuffle ops if needed
				2758	if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val))
				2759	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2760
				2761	if (Commuted) {
				2762	// Commute is back and try unpck* again.
				2763	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2764	if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) \|\|
				2765	X86::isUNPCKH_v_undef_Mask(PermMask.Val) \|\|
				2766	X86::isUNPCKLMask(PermMask.Val) \|\|
				2767	X86::isUNPCKHMask(PermMask.Val))
				2768	return Op;
				2769	}
				2770
				2771	// If VT is integer, try PSHUF* first, then SHUFP*.
				2772	if (MVT::isInteger(VT)) {
Dan Gohman	7dc1901	2007-08-02 21:17:01 +0000	[diff] [blame]	2773	// MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically
				2774	// possible to shuffle a v2i32 using PSHUFW, that's not yet implemented.
				2775	if (((MVT::getSizeInBits(VT) != 64 \|\| NumElems == 4) &&
				2776	X86::isPSHUFDMask(PermMask.Val)) \|\|
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2777	X86::isPSHUFHWMask(PermMask.Val) \|\|
				2778	X86::isPSHUFLWMask(PermMask.Val)) {
				2779	if (V2.getOpcode() != ISD::UNDEF)
				2780	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
				2781	DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
				2782	return Op;
				2783	}
				2784
				2785	if (X86::isSHUFPMask(PermMask.Val) &&
				2786	MVT::getSizeInBits(VT) != 64) // Don't do this for MMX.
				2787	return Op;
				2788
				2789	// Handle v8i16 shuffle high / low shuffle node pair.
				2790	if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
				2791	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2792	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2793	SmallVector<SDOperand, 8> MaskVec;
				2794	for (unsigned i = 0; i != 4; ++i)
				2795	MaskVec.push_back(PermMask.getOperand(i));
				2796	for (unsigned i = 4; i != 8; ++i)
				2797	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2798	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2799	&MaskVec[0], MaskVec.size());
				2800	V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2801	MaskVec.clear();
				2802	for (unsigned i = 0; i != 4; ++i)
				2803	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2804	for (unsigned i = 4; i != 8; ++i)
				2805	MaskVec.push_back(PermMask.getOperand(i));
				2806	Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
				2807	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2808	}
				2809	} else {
				2810	// Floating point cases in the other order.
				2811	if (X86::isSHUFPMask(PermMask.Val))
				2812	return Op;
				2813	if (X86::isPSHUFDMask(PermMask.Val) \|\|
				2814	X86::isPSHUFHWMask(PermMask.Val) \|\|
				2815	X86::isPSHUFLWMask(PermMask.Val)) {
				2816	if (V2.getOpcode() != ISD::UNDEF)
				2817	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
				2818	DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
				2819	return Op;
				2820	}
				2821	}
				2822
				2823	if (NumElems == 4 &&
				2824	// Don't do this for MMX.
				2825	MVT::getSizeInBits(VT) != 64) {
				2826	MVT::ValueType MaskVT = PermMask.getValueType();
				2827	MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
				2828	SmallVector<std::pair<int, int>, 8> Locs;
				2829	Locs.reserve(NumElems);
				2830	SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2831	SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2832	unsigned NumHi = 0;
				2833	unsigned NumLo = 0;
				2834	// If no more than two elements come from either vector. This can be
				2835	// implemented with two shuffles. First shuffle gather the elements.
				2836	// The second shuffle, which takes the first shuffle as both of its
				2837	// vector operands, put the elements into the right order.
				2838	for (unsigned i = 0; i != NumElems; ++i) {
				2839	SDOperand Elt = PermMask.getOperand(i);
				2840	if (Elt.getOpcode() == ISD::UNDEF) {
				2841	Locs[i] = std::make_pair(-1, -1);
				2842	} else {
				2843	unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
				2844	if (Val < NumElems) {
				2845	Locs[i] = std::make_pair(0, NumLo);
				2846	Mask1[NumLo] = Elt;
				2847	NumLo++;
				2848	} else {
				2849	Locs[i] = std::make_pair(1, NumHi);
				2850	if (2+NumHi < NumElems)
				2851	Mask1[2+NumHi] = Elt;
				2852	NumHi++;
				2853	}
				2854	}
				2855	}
				2856	if (NumLo <= 2 && NumHi <= 2) {
				2857	V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
				2858	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2859	&Mask1[0], Mask1.size()));
				2860	for (unsigned i = 0; i != NumElems; ++i) {
				2861	if (Locs[i].first == -1)
				2862	continue;
				2863	else {
				2864	unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
				2865	Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
				2866	Mask2[i] = DAG.getConstant(Idx, MaskEVT);
				2867	}
				2868	}
				2869
				2870	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
				2871	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2872	&Mask2[0], Mask2.size()));
				2873	}
				2874
				2875	// Break it into (shuffle shuffle_hi, shuffle_lo).
				2876	Locs.clear();
				2877	SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2878	SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2879	SmallVector<SDOperand,8> *MaskPtr = &LoMask;
				2880	unsigned MaskIdx = 0;
				2881	unsigned LoIdx = 0;
				2882	unsigned HiIdx = NumElems/2;
				2883	for (unsigned i = 0; i != NumElems; ++i) {
				2884	if (i == NumElems/2) {
				2885	MaskPtr = &HiMask;
				2886	MaskIdx = 1;
				2887	LoIdx = 0;
				2888	HiIdx = NumElems/2;
				2889	}
				2890	SDOperand Elt = PermMask.getOperand(i);
				2891	if (Elt.getOpcode() == ISD::UNDEF) {
				2892	Locs[i] = std::make_pair(-1, -1);
				2893	} else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
				2894	Locs[i] = std::make_pair(MaskIdx, LoIdx);
				2895	(*MaskPtr)[LoIdx] = Elt;
				2896	LoIdx++;
				2897	} else {
				2898	Locs[i] = std::make_pair(MaskIdx, HiIdx);
				2899	(*MaskPtr)[HiIdx] = Elt;
				2900	HiIdx++;
				2901	}
				2902	}
				2903
				2904	SDOperand LoShuffle =
				2905	DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
				2906	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2907	&LoMask[0], LoMask.size()));
				2908	SDOperand HiShuffle =
				2909	DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
				2910	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2911	&HiMask[0], HiMask.size()));
				2912	SmallVector<SDOperand, 8> MaskOps;
				2913	for (unsigned i = 0; i != NumElems; ++i) {
				2914	if (Locs[i].first == -1) {
				2915	MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
				2916	} else {
				2917	unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
				2918	MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
				2919	}
				2920	}
				2921	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
				2922	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2923	&MaskOps[0], MaskOps.size()));
				2924	}
				2925
				2926	return SDOperand();
				2927	}
				2928
				2929	SDOperand
				2930	X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2931	if (!isa<ConstantSDNode>(Op.getOperand(1)))
				2932	return SDOperand();
				2933
				2934	MVT::ValueType VT = Op.getValueType();
				2935	// TODO: handle v16i8.
				2936	if (MVT::getSizeInBits(VT) == 16) {
				2937	// Transform it so it match pextrw which produces a 32-bit result.
				2938	MVT::ValueType EVT = (MVT::ValueType)(VT+1);
				2939	SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
				2940	Op.getOperand(0), Op.getOperand(1));
				2941	SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract,
				2942	DAG.getValueType(VT));
				2943	return DAG.getNode(ISD::TRUNCATE, VT, Assert);
				2944	} else if (MVT::getSizeInBits(VT) == 32) {
				2945	SDOperand Vec = Op.getOperand(0);
				2946	unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				2947	if (Idx == 0)
				2948	return Op;
				2949	// SHUFPS the element to the lowest double word, then movss.
				2950	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				2951	SmallVector<SDOperand, 8> IdxVec;
				2952	IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT)));
				2953	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				2954	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				2955	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				2956	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2957	&IdxVec[0], IdxVec.size());
				2958	Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
				2959	Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
				2960	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
				2961	DAG.getConstant(0, getPointerTy()));
				2962	} else if (MVT::getSizeInBits(VT) == 64) {
				2963	SDOperand Vec = Op.getOperand(0);
				2964	unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				2965	if (Idx == 0)
				2966	return Op;
				2967
				2968	// UNPCKHPD the element to the lowest double word, then movsd.
				2969	// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
				2970	// to a f64mem, the whole operation is folded into a single MOVHPDmr.
				2971	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				2972	SmallVector<SDOperand, 8> IdxVec;
				2973	IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT)));
				2974	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				2975	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2976	&IdxVec[0], IdxVec.size());
				2977	Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
				2978	Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
				2979	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
				2980	DAG.getConstant(0, getPointerTy()));
				2981	}
				2982
				2983	return SDOperand();
				2984	}
				2985
				2986	SDOperand
				2987	X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2988	// Transform it so it match pinsrw which expects a 16-bit value in a GR32
				2989	// as its second argument.
				2990	MVT::ValueType VT = Op.getValueType();
				2991	MVT::ValueType BaseVT = MVT::getVectorElementType(VT);
				2992	SDOperand N0 = Op.getOperand(0);
				2993	SDOperand N1 = Op.getOperand(1);
				2994	SDOperand N2 = Op.getOperand(2);
				2995	if (MVT::getSizeInBits(BaseVT) == 16) {
				2996	if (N1.getValueType() != MVT::i32)
				2997	N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
				2998	if (N2.getValueType() != MVT::i32)
				2999	N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy());
				3000	return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
				3001	} else if (MVT::getSizeInBits(BaseVT) == 32) {
				3002	unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
				3003	if (Idx == 0) {
				3004	// Use a movss.
				3005	N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
				3006	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				3007	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				3008	SmallVector<SDOperand, 8> MaskVec;
				3009	MaskVec.push_back(DAG.getConstant(4, BaseVT));
				3010	for (unsigned i = 1; i <= 3; ++i)
				3011	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				3012	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
				3013	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				3014	&MaskVec[0], MaskVec.size()));
				3015	} else {
				3016	// Use two pinsrw instructions to insert a 32 bit value.
				3017	Idx <<= 1;
				3018	if (MVT::isFloatingPoint(N1.getValueType())) {
Evan Cheng	1eea675	2007-07-31 06:21:44 +0000	[diff] [blame]	3019	N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
				3020	N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
				3021	N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
				3022	DAG.getConstant(0, getPointerTy()));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3023	}
				3024	N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
				3025	N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
				3026	DAG.getConstant(Idx, getPointerTy()));
				3027	N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
				3028	N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
				3029	DAG.getConstant(Idx+1, getPointerTy()));
				3030	return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
				3031	}
				3032	}
				3033
				3034	return SDOperand();
				3035	}
				3036
				3037	SDOperand
				3038	X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				3039	SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
				3040	return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
				3041	}
				3042
				3043	// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
				3044	// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
				3045	// one of the above mentioned nodes. It has to be wrapped because otherwise
				3046	// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
				3047	// be used to form addressing mode. These wrapped nodes will be selected
				3048	// into MOV32ri.
				3049	SDOperand
				3050	X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
				3051	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
				3052	SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(),
				3053	getPointerTy(),
				3054	CP->getAlignment());
				3055	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3056	// With PIC, the address is actually $g + Offset.
				3057	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3058	!Subtarget->isPICStyleRIPRel()) {
				3059	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3060	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3061	Result);
				3062	}
				3063
				3064	return Result;
				3065	}
				3066
				3067	SDOperand
				3068	X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
				3069	GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
				3070	SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
				3071	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3072	// With PIC, the address is actually $g + Offset.
				3073	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3074	!Subtarget->isPICStyleRIPRel()) {
				3075	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3076	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3077	Result);
				3078	}
				3079
				3080	// For Darwin & Mingw32, external and weak symbols are indirect, so we want to
				3081	// load the value at address GV, not the value of GV itself. This means that
				3082	// the GlobalAddress must be in the base or index register of the address, not
				3083	// the GV offset field. Platform check is inside GVRequiresExtraLoad() call
				3084	// The same applies for external symbols during PIC codegen
				3085	if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
				3086	Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);
				3087
				3088	return Result;
				3089	}
				3090
				3091	// Lower ISD::GlobalTLSAddress using the "general dynamic" model
				3092	static SDOperand
				3093	LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
				3094	const MVT::ValueType PtrVT) {
				3095	SDOperand InFlag;
				3096	SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX,
				3097	DAG.getNode(X86ISD::GlobalBaseReg,
				3098	PtrVT), InFlag);
				3099	InFlag = Chain.getValue(1);
				3100
				3101	// emit leal symbol@TLSGD(,%ebx,1), %eax
				3102	SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
				3103	SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
				3104	GA->getValueType(0),
				3105	GA->getOffset());
				3106	SDOperand Ops[] = { Chain, TGA, InFlag };
				3107	SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3);
				3108	InFlag = Result.getValue(2);
				3109	Chain = Result.getValue(1);
				3110
				3111	// call ___tls_get_addr. This function receives its argument in
				3112	// the register EAX.
				3113	Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag);
				3114	InFlag = Chain.getValue(1);
				3115
				3116	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				3117	SDOperand Ops1[] = { Chain,
				3118	DAG.getTargetExternalSymbol("___tls_get_addr",
				3119	PtrVT),
				3120	DAG.getRegister(X86::EAX, PtrVT),
				3121	DAG.getRegister(X86::EBX, PtrVT),
				3122	InFlag };
				3123	Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5);
				3124	InFlag = Chain.getValue(1);
				3125
				3126	return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag);
				3127	}
				3128
				3129	// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
				3130	// "local exec" model.
				3131	static SDOperand
				3132	LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
				3133	const MVT::ValueType PtrVT) {
				3134	// Get the Thread Pointer
				3135	SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT);
				3136	// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
				3137	// exec)
				3138	SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
				3139	GA->getValueType(0),
				3140	GA->getOffset());
				3141	SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA);
				3142
				3143	if (GA->getGlobal()->isDeclaration()) // initial exec TLS model
				3144	Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0);
				3145
				3146	// The address of the thread local variable is the add of the thread
				3147	// pointer with the offset of the variable.
				3148	return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset);
				3149	}
				3150
				3151	SDOperand
				3152	X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) {
				3153	// TODO: implement the "local dynamic" model
				3154	// TODO: implement the "initial exec"model for pic executables
				3155	assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() &&
				3156	"TLS not implemented for non-ELF and 64-bit targets");
				3157	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
				3158	// If the relocation model is PIC, use the "General Dynamic" TLS Model,
				3159	// otherwise use the "Local Exec"TLS Model
				3160	if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
				3161	return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy());
				3162	else
				3163	return LowerToTLSExecModel(GA, DAG, getPointerTy());
				3164	}
				3165
				3166	SDOperand
				3167	X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
				3168	const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
				3169	SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
				3170	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3171	// With PIC, the address is actually $g + Offset.
				3172	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3173	!Subtarget->isPICStyleRIPRel()) {
				3174	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3175	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3176	Result);
				3177	}
				3178
				3179	return Result;
				3180	}
				3181
				3182	SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
				3183	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
				3184	SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
				3185	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3186	// With PIC, the address is actually $g + Offset.
				3187	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3188	!Subtarget->isPICStyleRIPRel()) {
				3189	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3190	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3191	Result);
				3192	}
				3193
				3194	return Result;
				3195	}
				3196
				3197	SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
				3198	assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
				3199	"Not an i64 shift!");
				3200	bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
				3201	SDOperand ShOpLo = Op.getOperand(0);
				3202	SDOperand ShOpHi = Op.getOperand(1);
				3203	SDOperand ShAmt = Op.getOperand(2);
				3204	SDOperand Tmp1 = isSRA ?
				3205	DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
				3206	DAG.getConstant(0, MVT::i32);
				3207
				3208	SDOperand Tmp2, Tmp3;
				3209	if (Op.getOpcode() == ISD::SHL_PARTS) {
				3210	Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
				3211	Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
				3212	} else {
				3213	Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
				3214	Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
				3215	}
				3216
				3217	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3218	SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
				3219	DAG.getConstant(32, MVT::i8));
				3220	SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)};
				3221	SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1);
				3222
				3223	SDOperand Hi, Lo;
				3224	SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
				3225
				3226	VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
				3227	SmallVector<SDOperand, 4> Ops;
				3228	if (Op.getOpcode() == ISD::SHL_PARTS) {
				3229	Ops.push_back(Tmp2);
				3230	Ops.push_back(Tmp3);
				3231	Ops.push_back(CC);
				3232	Ops.push_back(InFlag);
				3233	Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3234	InFlag = Hi.getValue(1);
				3235
				3236	Ops.clear();
				3237	Ops.push_back(Tmp3);
				3238	Ops.push_back(Tmp1);
				3239	Ops.push_back(CC);
				3240	Ops.push_back(InFlag);
				3241	Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3242	} else {
				3243	Ops.push_back(Tmp2);
				3244	Ops.push_back(Tmp3);
				3245	Ops.push_back(CC);
				3246	Ops.push_back(InFlag);
				3247	Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3248	InFlag = Lo.getValue(1);
				3249
				3250	Ops.clear();
				3251	Ops.push_back(Tmp3);
				3252	Ops.push_back(Tmp1);
				3253	Ops.push_back(CC);
				3254	Ops.push_back(InFlag);
				3255	Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3256	}
				3257
				3258	VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
				3259	Ops.clear();
				3260	Ops.push_back(Lo);
				3261	Ops.push_back(Hi);
				3262	return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
				3263	}
				3264
				3265	SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
				3266	assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
				3267	Op.getOperand(0).getValueType() >= MVT::i16 &&
				3268	"Unknown SINT_TO_FP to lower!");
				3269
				3270	SDOperand Result;
				3271	MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
				3272	unsigned Size = MVT::getSizeInBits(SrcVT)/8;
				3273	MachineFunction &MF = DAG.getMachineFunction();
				3274	int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
				3275	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3276	SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
				3277	StackSlot, NULL, 0);
				3278
				3279	// Build the FILD
				3280	SDVTList Tys;
				3281	if (X86ScalarSSE)
				3282	Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
				3283	else
				3284	Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
				3285	SmallVector<SDOperand, 8> Ops;
				3286	Ops.push_back(Chain);
				3287	Ops.push_back(StackSlot);
				3288	Ops.push_back(DAG.getValueType(SrcVT));
				3289	Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
				3290	Tys, &Ops[0], Ops.size());
				3291
				3292	if (X86ScalarSSE) {
				3293	Chain = Result.getValue(1);
				3294	SDOperand InFlag = Result.getValue(2);
				3295
				3296	// FIXME: Currently the FST is flagged to the FILD_FLAG. This
				3297	// shouldn't be necessary except that RFP cannot be live across
				3298	// multiple blocks. When stackifier is fixed, they can be uncoupled.
				3299	MachineFunction &MF = DAG.getMachineFunction();
				3300	int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
				3301	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3302	Tys = DAG.getVTList(MVT::Other);
				3303	SmallVector<SDOperand, 8> Ops;
				3304	Ops.push_back(Chain);
				3305	Ops.push_back(Result);
				3306	Ops.push_back(StackSlot);
				3307	Ops.push_back(DAG.getValueType(Op.getValueType()));
				3308	Ops.push_back(InFlag);
				3309	Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
				3310	Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0);
				3311	}
				3312
				3313	return Result;
				3314	}
				3315
				3316	SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
				3317	assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
				3318	"Unknown FP_TO_SINT to lower!");
				3319	// We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
				3320	// stack slot.
				3321	MachineFunction &MF = DAG.getMachineFunction();
				3322	unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
				3323	int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
				3324	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3325
				3326	unsigned Opc;
				3327	switch (Op.getValueType()) {
				3328	default: assert(0 && "Invalid FP_TO_SINT to lower!");
				3329	case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
				3330	case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
				3331	case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
				3332	}
				3333
				3334	SDOperand Chain = DAG.getEntryNode();
				3335	SDOperand Value = Op.getOperand(0);
				3336	if (X86ScalarSSE) {
				3337	assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
				3338	Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
				3339	SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
				3340	SDOperand Ops[] = {
				3341	Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
				3342	};
				3343	Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
				3344	Chain = Value.getValue(1);
				3345	SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
				3346	StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3347	}
				3348
				3349	// Build the FP_TO_INT*_IN_MEM
				3350	SDOperand Ops[] = { Chain, Value, StackSlot };
				3351	SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);
				3352
				3353	// Load the result.
				3354	return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
				3355	}
				3356
				3357	SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
				3358	MVT::ValueType VT = Op.getValueType();
				3359	MVT::ValueType EltVT = VT;
				3360	if (MVT::isVector(VT))
				3361	EltVT = MVT::getVectorElementType(VT);
				3362	const Type *OpNTy = MVT::getTypeForValueType(EltVT);
				3363	std::vector<Constant*> CV;
				3364	if (EltVT == MVT::f64) {
				3365	Constant *C = ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)));
				3366	CV.push_back(C);
				3367	CV.push_back(C);
				3368	} else {
				3369	Constant *C = ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)));
				3370	CV.push_back(C);
				3371	CV.push_back(C);
				3372	CV.push_back(C);
				3373	CV.push_back(C);
				3374	}
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	3375	Constant *C = ConstantVector::get(CV);
				3376	SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
				3377	SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
				3378	false, 16);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3379	return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
				3380	}
				3381
				3382	SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
				3383	MVT::ValueType VT = Op.getValueType();
				3384	MVT::ValueType EltVT = VT;
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3385	unsigned EltNum = 1;
				3386	if (MVT::isVector(VT)) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3387	EltVT = MVT::getVectorElementType(VT);
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3388	EltNum = MVT::getVectorNumElements(VT);
				3389	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3390	const Type *OpNTy = MVT::getTypeForValueType(EltVT);
				3391	std::vector<Constant*> CV;
				3392	if (EltVT == MVT::f64) {
				3393	Constant *C = ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63));
				3394	CV.push_back(C);
				3395	CV.push_back(C);
				3396	} else {
				3397	Constant *C = ConstantFP::get(OpNTy, BitsToFloat(1U << 31));
				3398	CV.push_back(C);
				3399	CV.push_back(C);
				3400	CV.push_back(C);
				3401	CV.push_back(C);
				3402	}
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	3403	Constant *C = ConstantVector::get(CV);
				3404	SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
				3405	SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
				3406	false, 16);
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3407	if (MVT::isVector(VT)) {
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3408	return DAG.getNode(ISD::BIT_CONVERT, VT,
				3409	DAG.getNode(ISD::XOR, MVT::v2i64,
				3410	DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)),
				3411	DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask)));
				3412	} else {
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3413	return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
				3414	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3415	}
				3416
				3417	SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
				3418	SDOperand Op0 = Op.getOperand(0);
				3419	SDOperand Op1 = Op.getOperand(1);
				3420	MVT::ValueType VT = Op.getValueType();
				3421	MVT::ValueType SrcVT = Op1.getValueType();
				3422	const Type *SrcTy = MVT::getTypeForValueType(SrcVT);
				3423
				3424	// If second operand is smaller, extend it first.
				3425	if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) {
				3426	Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1);
				3427	SrcVT = VT;
				3428	}
				3429
				3430	// First get the sign bit of second operand.
				3431	std::vector<Constant*> CV;
				3432	if (SrcVT == MVT::f64) {
				3433	CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(1ULL << 63)));
				3434	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3435	} else {
				3436	CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(1U << 31)));
				3437	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3438	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3439	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3440	}
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	3441	Constant *C = ConstantVector::get(CV);
				3442	SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
				3443	SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0,
				3444	false, 16);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3445	SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
				3446
				3447	// Shift sign bit right or left if the two operands have different types.
				3448	if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
				3449	// Op0 is MVT::f32, Op1 is MVT::f64.
				3450	SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit);
				3451	SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit,
				3452	DAG.getConstant(32, MVT::i32));
				3453	SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
				3454	SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
				3455	DAG.getConstant(0, getPointerTy()));
				3456	}
				3457
				3458	// Clear first operand sign bit.
				3459	CV.clear();
				3460	if (VT == MVT::f64) {
				3461	CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(~(1ULL << 63))));
				3462	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3463	} else {
				3464	CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(~(1U << 31))));
				3465	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3466	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3467	CV.push_back(ConstantFP::get(SrcTy, 0.0));
				3468	}
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	3469	C = ConstantVector::get(CV);
				3470	CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
				3471	SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
				3472	false, 16);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3473	SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
				3474
				3475	// Or the value with the sign bit.
				3476	return DAG.getNode(X86ISD::FOR, VT, Val, SignBit);
				3477	}
				3478
				3479	SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG,
				3480	SDOperand Chain) {
				3481	assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
				3482	SDOperand Cond;
				3483	SDOperand Op0 = Op.getOperand(0);
				3484	SDOperand Op1 = Op.getOperand(1);
				3485	SDOperand CC = Op.getOperand(2);
				3486	ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
				3487	const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3488	const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
				3489	bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
				3490	unsigned X86CC;
				3491
				3492	if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
				3493	Op0, Op1, DAG)) {
				3494	SDOperand Ops1[] = { Chain, Op0, Op1 };
				3495	Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1);
				3496	SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
				3497	return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
				3498	}
				3499
				3500	assert(isFP && "Illegal integer SetCC!");
				3501
				3502	SDOperand COps[] = { Chain, Op0, Op1 };
				3503	Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1);
				3504
				3505	switch (SetCCOpcode) {
				3506	default: assert(false && "Illegal floating point SetCC!");
				3507	case ISD::SETOEQ: { // !PF & ZF
				3508	SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond };
				3509	SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
				3510	SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8),
				3511	Tmp1.getValue(1) };
				3512	SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
				3513	return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
				3514	}
				3515	case ISD::SETUNE: { // PF \| !ZF
				3516	SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond };
				3517	SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
				3518	SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8),
				3519	Tmp1.getValue(1) };
				3520	SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
				3521	return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
				3522	}
				3523	}
				3524	}
				3525
				3526	SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
				3527	bool addTest = true;
				3528	SDOperand Chain = DAG.getEntryNode();
				3529	SDOperand Cond = Op.getOperand(0);
				3530	SDOperand CC;
				3531	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3532
				3533	if (Cond.getOpcode() == ISD::SETCC)
				3534	Cond = LowerSETCC(Cond, DAG, Chain);
				3535
				3536	if (Cond.getOpcode() == X86ISD::SETCC) {
				3537	CC = Cond.getOperand(0);
				3538
				3539	// If condition flag is set by a X86ISD::CMP, then make a copy of it
				3540	// (since flag operand cannot be shared). Use it as the condition setting
				3541	// operand in place of the X86ISD::SETCC.
				3542	// If the X86ISD::SETCC has more than one use, then perhaps it's better
				3543	// to use a test instead of duplicating the X86ISD::CMP (for register
				3544	// pressure reason)?
				3545	SDOperand Cmp = Cond.getOperand(1);
				3546	unsigned Opc = Cmp.getOpcode();
				3547	bool IllegalFPCMov = !X86ScalarSSE &&
				3548	MVT::isFloatingPoint(Op.getValueType()) &&
				3549	!hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
				3550	if ((Opc == X86ISD::CMP \|\| Opc == X86ISD::COMI \|\| Opc == X86ISD::UCOMI) &&
				3551	!IllegalFPCMov) {
				3552	SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
				3553	Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
				3554	addTest = false;
				3555	}
				3556	}
				3557
				3558	if (addTest) {
				3559	CC = DAG.getConstant(X86::COND_NE, MVT::i8);
				3560	SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
				3561	Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
				3562	}
				3563
				3564	VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag);
				3565	SmallVector<SDOperand, 4> Ops;
				3566	// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
				3567	// condition is true.
				3568	Ops.push_back(Op.getOperand(2));
				3569	Ops.push_back(Op.getOperand(1));
				3570	Ops.push_back(CC);
				3571	Ops.push_back(Cond.getValue(1));
				3572	return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3573	}
				3574
				3575	SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
				3576	bool addTest = true;
				3577	SDOperand Chain = Op.getOperand(0);
				3578	SDOperand Cond = Op.getOperand(1);
				3579	SDOperand Dest = Op.getOperand(2);
				3580	SDOperand CC;
				3581	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3582
				3583	if (Cond.getOpcode() == ISD::SETCC)
				3584	Cond = LowerSETCC(Cond, DAG, Chain);
				3585
				3586	if (Cond.getOpcode() == X86ISD::SETCC) {
				3587	CC = Cond.getOperand(0);
				3588
				3589	// If condition flag is set by a X86ISD::CMP, then make a copy of it
				3590	// (since flag operand cannot be shared). Use it as the condition setting
				3591	// operand in place of the X86ISD::SETCC.
				3592	// If the X86ISD::SETCC has more than one use, then perhaps it's better
				3593	// to use a test instead of duplicating the X86ISD::CMP (for register
				3594	// pressure reason)?
				3595	SDOperand Cmp = Cond.getOperand(1);
				3596	unsigned Opc = Cmp.getOpcode();
				3597	if (Opc == X86ISD::CMP \|\| Opc == X86ISD::COMI \|\| Opc == X86ISD::UCOMI) {
				3598	SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
				3599	Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
				3600	addTest = false;
				3601	}
				3602	}
				3603
				3604	if (addTest) {
				3605	CC = DAG.getConstant(X86::COND_NE, MVT::i8);
				3606	SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
				3607	Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
				3608	}
				3609	return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
				3610	Cond, Op.getOperand(2), CC, Cond.getValue(1));
				3611	}
				3612
				3613	SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
				3614	unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				3615
				3616	if (Subtarget->is64Bit())
				3617	return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
				3618	else
				3619	switch (CallingConv) {
				3620	default:
				3621	assert(0 && "Unsupported calling convention");
				3622	case CallingConv::Fast:
				3623	// TODO: Implement fastcc
				3624	// Falls through
				3625	case CallingConv::C:
				3626	case CallingConv::X86_StdCall:
				3627	return LowerCCCCallTo(Op, DAG, CallingConv);
				3628	case CallingConv::X86_FastCall:
				3629	return LowerFastCCCallTo(Op, DAG, CallingConv);
				3630	}
				3631	}
				3632
				3633
				3634	// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
				3635	// Calls to _alloca is needed to probe the stack when allocating more than 4k
				3636	// bytes in one go. Touching the stack at 4K increments is necessary to ensure
				3637	// that the guard pages used by the OS virtual memory manager are allocated in
				3638	// correct sequence.
				3639	SDOperand
				3640	X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
				3641	SelectionDAG &DAG) {
				3642	assert(Subtarget->isTargetCygMing() &&
				3643	"This should be used only on Cygwin/Mingw targets");
				3644
				3645	// Get the inputs.
				3646	SDOperand Chain = Op.getOperand(0);
				3647	SDOperand Size = Op.getOperand(1);
				3648	// FIXME: Ensure alignment here
				3649
				3650	SDOperand Flag;
				3651
				3652	MVT::ValueType IntPtr = getPointerTy();
				3653	MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
				3654
				3655	Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag);
				3656	Flag = Chain.getValue(1);
				3657
				3658	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				3659	SDOperand Ops[] = { Chain,
				3660	DAG.getTargetExternalSymbol("_alloca", IntPtr),
				3661	DAG.getRegister(X86::EAX, IntPtr),
				3662	Flag };
				3663	Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4);
				3664	Flag = Chain.getValue(1);
				3665
				3666	Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1);
				3667
				3668	std::vector<MVT::ValueType> Tys;
				3669	Tys.push_back(SPTy);
				3670	Tys.push_back(MVT::Other);
				3671	SDOperand Ops1[2] = { Chain.getValue(0), Chain };
				3672	return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
				3673	}
				3674
				3675	SDOperand
				3676	X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
				3677	MachineFunction &MF = DAG.getMachineFunction();
				3678	const Function* Fn = MF.getFunction();
				3679	if (Fn->hasExternalLinkage() &&
				3680	Subtarget->isTargetCygMing() &&
				3681	Fn->getName() == "main")
				3682	MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true);
				3683
				3684	unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				3685	if (Subtarget->is64Bit())
				3686	return LowerX86_64CCCArguments(Op, DAG);
				3687	else
				3688	switch(CC) {
				3689	default:
				3690	assert(0 && "Unsupported calling convention");
				3691	case CallingConv::Fast:
				3692	// TODO: implement fastcc.
				3693
				3694	// Falls through
				3695	case CallingConv::C:
				3696	return LowerCCCArguments(Op, DAG);
				3697	case CallingConv::X86_StdCall:
				3698	MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall);
				3699	return LowerCCCArguments(Op, DAG, true);
				3700	case CallingConv::X86_FastCall:
				3701	MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall);
				3702	return LowerFastCCArguments(Op, DAG);
				3703	}
				3704	}
				3705
				3706	SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
				3707	SDOperand InFlag(0, 0);
				3708	SDOperand Chain = Op.getOperand(0);
				3709	unsigned Align =
				3710	(unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
				3711	if (Align == 0) Align = 1;
				3712
				3713	ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
				3714	// If not DWORD aligned, call memset if size is less than the threshold.
				3715	// It knows how to align to the right boundary first.
				3716	if ((Align & 3) != 0 \|\|
				3717	(I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
				3718	MVT::ValueType IntPtr = getPointerTy();
				3719	const Type *IntPtrTy = getTargetData()->getIntPtrType();
				3720	TargetLowering::ArgListTy Args;
				3721	TargetLowering::ArgListEntry Entry;
				3722	Entry.Node = Op.getOperand(1);
				3723	Entry.Ty = IntPtrTy;
				3724	Args.push_back(Entry);
				3725	// Extend the unsigned i8 argument to be an int value for the call.
				3726	Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
				3727	Entry.Ty = IntPtrTy;
				3728	Args.push_back(Entry);
				3729	Entry.Node = Op.getOperand(3);
				3730	Args.push_back(Entry);
				3731	std::pair<SDOperand,SDOperand> CallResult =
				3732	LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
				3733	DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
				3734	return CallResult.second;
				3735	}
				3736
				3737	MVT::ValueType AVT;
				3738	SDOperand Count;
				3739	ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
				3740	unsigned BytesLeft = 0;
				3741	bool TwoRepStos = false;
				3742	if (ValC) {
				3743	unsigned ValReg;
				3744	uint64_t Val = ValC->getValue() & 255;
				3745
				3746	// If the value is a constant, then we can potentially use larger sets.
				3747	switch (Align & 3) {
				3748	case 2: // WORD aligned
				3749	AVT = MVT::i16;
				3750	ValReg = X86::AX;
				3751	Val = (Val << 8) \| Val;
				3752	break;
				3753	case 0: // DWORD aligned
				3754	AVT = MVT::i32;
				3755	ValReg = X86::EAX;
				3756	Val = (Val << 8) \| Val;
				3757	Val = (Val << 16) \| Val;
				3758	if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned
				3759	AVT = MVT::i64;
				3760	ValReg = X86::RAX;
				3761	Val = (Val << 32) \| Val;
				3762	}
				3763	break;
				3764	default: // Byte aligned
				3765	AVT = MVT::i8;
				3766	ValReg = X86::AL;
				3767	Count = Op.getOperand(3);
				3768	break;
				3769	}
				3770
				3771	if (AVT > MVT::i8) {
				3772	if (I) {
				3773	unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
				3774	Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
				3775	BytesLeft = I->getValue() % UBytes;
				3776	} else {
				3777	assert(AVT >= MVT::i32 &&
				3778	"Do not use rep;stos if not at least DWORD aligned");
				3779	Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
				3780	Op.getOperand(3), DAG.getConstant(2, MVT::i8));
				3781	TwoRepStos = true;
				3782	}
				3783	}
				3784
				3785	Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
				3786	InFlag);
				3787	InFlag = Chain.getValue(1);
				3788	} else {
				3789	AVT = MVT::i8;
				3790	Count = Op.getOperand(3);
				3791	Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
				3792	InFlag = Chain.getValue(1);
				3793	}
				3794
				3795	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
				3796	Count, InFlag);
				3797	InFlag = Chain.getValue(1);
				3798	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
				3799	Op.getOperand(1), InFlag);
				3800	InFlag = Chain.getValue(1);
				3801
				3802	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				3803	SmallVector<SDOperand, 8> Ops;
				3804	Ops.push_back(Chain);
				3805	Ops.push_back(DAG.getValueType(AVT));
				3806	Ops.push_back(InFlag);
				3807	Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
				3808
				3809	if (TwoRepStos) {
				3810	InFlag = Chain.getValue(1);
				3811	Count = Op.getOperand(3);
				3812	MVT::ValueType CVT = Count.getValueType();
				3813	SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
				3814	DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
				3815	Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
				3816	Left, InFlag);
				3817	InFlag = Chain.getValue(1);
				3818	Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				3819	Ops.clear();
				3820	Ops.push_back(Chain);
				3821	Ops.push_back(DAG.getValueType(MVT::i8));
				3822	Ops.push_back(InFlag);
				3823	Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
				3824	} else if (BytesLeft) {
				3825	// Issue stores for the last 1 - 7 bytes.
				3826	SDOperand Value;
				3827	unsigned Val = ValC->getValue() & 255;
				3828	unsigned Offset = I->getValue() - BytesLeft;
				3829	SDOperand DstAddr = Op.getOperand(1);
				3830	MVT::ValueType AddrVT = DstAddr.getValueType();
				3831	if (BytesLeft >= 4) {
				3832	Val = (Val << 8) \| Val;
				3833	Val = (Val << 16) \| Val;
				3834	Value = DAG.getConstant(Val, MVT::i32);
				3835	Chain = DAG.getStore(Chain, Value,
				3836	DAG.getNode(ISD::ADD, AddrVT, DstAddr,
				3837	DAG.getConstant(Offset, AddrVT)),
				3838	NULL, 0);
				3839	BytesLeft -= 4;
				3840	Offset += 4;
				3841	}
				3842	if (BytesLeft >= 2) {
				3843	Value = DAG.getConstant((Val << 8) \| Val, MVT::i16);
				3844	Chain = DAG.getStore(Chain, Value,
				3845	DAG.getNode(ISD::ADD, AddrVT, DstAddr,
				3846	DAG.getConstant(Offset, AddrVT)),
				3847	NULL, 0);
				3848	BytesLeft -= 2;
				3849	Offset += 2;
				3850	}
				3851	if (BytesLeft == 1) {
				3852	Value = DAG.getConstant(Val, MVT::i8);
				3853	Chain = DAG.getStore(Chain, Value,
				3854	DAG.getNode(ISD::ADD, AddrVT, DstAddr,
				3855	DAG.getConstant(Offset, AddrVT)),
				3856	NULL, 0);
				3857	}
				3858	}
				3859
				3860	return Chain;
				3861	}
				3862
				3863	SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
				3864	SDOperand Chain = Op.getOperand(0);
				3865	unsigned Align =
				3866	(unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
				3867	if (Align == 0) Align = 1;
				3868
				3869	ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
				3870	// If not DWORD aligned, call memcpy if size is less than the threshold.
				3871	// It knows how to align to the right boundary first.
				3872	if ((Align & 3) != 0 \|\|
				3873	(I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
				3874	MVT::ValueType IntPtr = getPointerTy();
				3875	TargetLowering::ArgListTy Args;
				3876	TargetLowering::ArgListEntry Entry;
				3877	Entry.Ty = getTargetData()->getIntPtrType();
				3878	Entry.Node = Op.getOperand(1); Args.push_back(Entry);
				3879	Entry.Node = Op.getOperand(2); Args.push_back(Entry);
				3880	Entry.Node = Op.getOperand(3); Args.push_back(Entry);
				3881	std::pair<SDOperand,SDOperand> CallResult =
				3882	LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
				3883	DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
				3884	return CallResult.second;
				3885	}
				3886
				3887	MVT::ValueType AVT;
				3888	SDOperand Count;
				3889	unsigned BytesLeft = 0;
				3890	bool TwoRepMovs = false;
				3891	switch (Align & 3) {
				3892	case 2: // WORD aligned
				3893	AVT = MVT::i16;
				3894	break;
				3895	case 0: // DWORD aligned
				3896	AVT = MVT::i32;
				3897	if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned
				3898	AVT = MVT::i64;
				3899	break;
				3900	default: // Byte aligned
				3901	AVT = MVT::i8;
				3902	Count = Op.getOperand(3);
				3903	break;
				3904	}
				3905
				3906	if (AVT > MVT::i8) {
				3907	if (I) {
				3908	unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
				3909	Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
				3910	BytesLeft = I->getValue() % UBytes;
				3911	} else {
				3912	assert(AVT >= MVT::i32 &&
				3913	"Do not use rep;movs if not at least DWORD aligned");
				3914	Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
				3915	Op.getOperand(3), DAG.getConstant(2, MVT::i8));
				3916	TwoRepMovs = true;
				3917	}
				3918	}
				3919
				3920	SDOperand InFlag(0, 0);
				3921	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
				3922	Count, InFlag);
				3923	InFlag = Chain.getValue(1);
				3924	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
				3925	Op.getOperand(1), InFlag);
				3926	InFlag = Chain.getValue(1);
				3927	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
				3928	Op.getOperand(2), InFlag);
				3929	InFlag = Chain.getValue(1);
				3930
				3931	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				3932	SmallVector<SDOperand, 8> Ops;
				3933	Ops.push_back(Chain);
				3934	Ops.push_back(DAG.getValueType(AVT));
				3935	Ops.push_back(InFlag);
				3936	Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
				3937
				3938	if (TwoRepMovs) {
				3939	InFlag = Chain.getValue(1);
				3940	Count = Op.getOperand(3);
				3941	MVT::ValueType CVT = Count.getValueType();
				3942	SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
				3943	DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
				3944	Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
				3945	Left, InFlag);
				3946	InFlag = Chain.getValue(1);
				3947	Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				3948	Ops.clear();
				3949	Ops.push_back(Chain);
				3950	Ops.push_back(DAG.getValueType(MVT::i8));
				3951	Ops.push_back(InFlag);
				3952	Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
				3953	} else if (BytesLeft) {
				3954	// Issue loads and stores for the last 1 - 7 bytes.
				3955	unsigned Offset = I->getValue() - BytesLeft;
				3956	SDOperand DstAddr = Op.getOperand(1);
				3957	MVT::ValueType DstVT = DstAddr.getValueType();
				3958	SDOperand SrcAddr = Op.getOperand(2);
				3959	MVT::ValueType SrcVT = SrcAddr.getValueType();
				3960	SDOperand Value;
				3961	if (BytesLeft >= 4) {
				3962	Value = DAG.getLoad(MVT::i32, Chain,
				3963	DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
				3964	DAG.getConstant(Offset, SrcVT)),
				3965	NULL, 0);
				3966	Chain = Value.getValue(1);
				3967	Chain = DAG.getStore(Chain, Value,
				3968	DAG.getNode(ISD::ADD, DstVT, DstAddr,
				3969	DAG.getConstant(Offset, DstVT)),
				3970	NULL, 0);
				3971	BytesLeft -= 4;
				3972	Offset += 4;
				3973	}
				3974	if (BytesLeft >= 2) {
				3975	Value = DAG.getLoad(MVT::i16, Chain,
				3976	DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
				3977	DAG.getConstant(Offset, SrcVT)),
				3978	NULL, 0);
				3979	Chain = Value.getValue(1);
				3980	Chain = DAG.getStore(Chain, Value,
				3981	DAG.getNode(ISD::ADD, DstVT, DstAddr,
				3982	DAG.getConstant(Offset, DstVT)),
				3983	NULL, 0);
				3984	BytesLeft -= 2;
				3985	Offset += 2;
				3986	}
				3987
				3988	if (BytesLeft == 1) {
				3989	Value = DAG.getLoad(MVT::i8, Chain,
				3990	DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
				3991	DAG.getConstant(Offset, SrcVT)),
				3992	NULL, 0);
				3993	Chain = Value.getValue(1);
				3994	Chain = DAG.getStore(Chain, Value,
				3995	DAG.getNode(ISD::ADD, DstVT, DstAddr,
				3996	DAG.getConstant(Offset, DstVT)),
				3997	NULL, 0);
				3998	}
				3999	}
				4000
				4001	return Chain;
				4002	}
				4003
				4004	SDOperand
				4005	X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
				4006	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				4007	SDOperand TheOp = Op.getOperand(0);
				4008	SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1);
				4009	if (Subtarget->is64Bit()) {
				4010	SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
				4011	SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX,
				4012	MVT::i64, Copy1.getValue(2));
				4013	SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2,
				4014	DAG.getConstant(32, MVT::i8));
				4015	SDOperand Ops[] = {
				4016	DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1)
				4017	};
				4018
				4019	Tys = DAG.getVTList(MVT::i64, MVT::Other);
				4020	return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2);
				4021	}
				4022
				4023	SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
				4024	SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX,
				4025	MVT::i32, Copy1.getValue(2));
				4026	SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) };
				4027	Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
				4028	return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3);
				4029	}
				4030
				4031	SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
				4032	SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
				4033
				4034	if (!Subtarget->is64Bit()) {
				4035	// vastart just stores the address of the VarArgsFrameIndex slot into the
				4036	// memory location argument.
				4037	SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
				4038	return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(),
				4039	SV->getOffset());
				4040	}
				4041
				4042	// __va_list_tag:
				4043	// gp_offset (0 - 6 * 8)
				4044	// fp_offset (48 - 48 + 8 * 16)
				4045	// overflow_arg_area (point to parameters coming in memory).
				4046	// reg_save_area
				4047	SmallVector<SDOperand, 8> MemOps;
				4048	SDOperand FIN = Op.getOperand(1);
				4049	// Store gp_offset
				4050	SDOperand Store = DAG.getStore(Op.getOperand(0),
				4051	DAG.getConstant(VarArgsGPOffset, MVT::i32),
				4052	FIN, SV->getValue(), SV->getOffset());
				4053	MemOps.push_back(Store);
				4054
				4055	// Store fp_offset
				4056	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				4057	DAG.getConstant(4, getPointerTy()));
				4058	Store = DAG.getStore(Op.getOperand(0),
				4059	DAG.getConstant(VarArgsFPOffset, MVT::i32),
				4060	FIN, SV->getValue(), SV->getOffset());
				4061	MemOps.push_back(Store);
				4062
				4063	// Store ptr to overflow_arg_area
				4064	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				4065	DAG.getConstant(4, getPointerTy()));
				4066	SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
				4067	Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(),
				4068	SV->getOffset());
				4069	MemOps.push_back(Store);
				4070
				4071	// Store ptr to reg_save_area.
				4072	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				4073	DAG.getConstant(8, getPointerTy()));
				4074	SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
				4075	Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(),
				4076	SV->getOffset());
				4077	MemOps.push_back(Store);
				4078	return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
				4079	}
				4080
				4081	SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) {
				4082	// X86-64 va_list is a struct { i32, i32, i8, i8 }.
				4083	SDOperand Chain = Op.getOperand(0);
				4084	SDOperand DstPtr = Op.getOperand(1);
				4085	SDOperand SrcPtr = Op.getOperand(2);
				4086	SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3));
				4087	SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4));
				4088
				4089	SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr,
				4090	SrcSV->getValue(), SrcSV->getOffset());
				4091	Chain = SrcPtr.getValue(1);
				4092	for (unsigned i = 0; i < 3; ++i) {
				4093	SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr,
				4094	SrcSV->getValue(), SrcSV->getOffset());
				4095	Chain = Val.getValue(1);
				4096	Chain = DAG.getStore(Chain, Val, DstPtr,
				4097	DstSV->getValue(), DstSV->getOffset());
				4098	if (i == 2)
				4099	break;
				4100	SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr,
				4101	DAG.getConstant(8, getPointerTy()));
				4102	DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr,
				4103	DAG.getConstant(8, getPointerTy()));
				4104	}
				4105	return Chain;
				4106	}
				4107
				4108	SDOperand
				4109	X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
				4110	unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
				4111	switch (IntNo) {
				4112	default: return SDOperand(); // Don't custom lower most intrinsics.
				4113	// Comparison intrinsics.
				4114	case Intrinsic::x86_sse_comieq_ss:
				4115	case Intrinsic::x86_sse_comilt_ss:
				4116	case Intrinsic::x86_sse_comile_ss:
				4117	case Intrinsic::x86_sse_comigt_ss:
				4118	case Intrinsic::x86_sse_comige_ss:
				4119	case Intrinsic::x86_sse_comineq_ss:
				4120	case Intrinsic::x86_sse_ucomieq_ss:
				4121	case Intrinsic::x86_sse_ucomilt_ss:
				4122	case Intrinsic::x86_sse_ucomile_ss:
				4123	case Intrinsic::x86_sse_ucomigt_ss:
				4124	case Intrinsic::x86_sse_ucomige_ss:
				4125	case Intrinsic::x86_sse_ucomineq_ss:
				4126	case Intrinsic::x86_sse2_comieq_sd:
				4127	case Intrinsic::x86_sse2_comilt_sd:
				4128	case Intrinsic::x86_sse2_comile_sd:
				4129	case Intrinsic::x86_sse2_comigt_sd:
				4130	case Intrinsic::x86_sse2_comige_sd:
				4131	case Intrinsic::x86_sse2_comineq_sd:
				4132	case Intrinsic::x86_sse2_ucomieq_sd:
				4133	case Intrinsic::x86_sse2_ucomilt_sd:
				4134	case Intrinsic::x86_sse2_ucomile_sd:
				4135	case Intrinsic::x86_sse2_ucomigt_sd:
				4136	case Intrinsic::x86_sse2_ucomige_sd:
				4137	case Intrinsic::x86_sse2_ucomineq_sd: {
				4138	unsigned Opc = 0;
				4139	ISD::CondCode CC = ISD::SETCC_INVALID;
				4140	switch (IntNo) {
				4141	default: break;
				4142	case Intrinsic::x86_sse_comieq_ss:
				4143	case Intrinsic::x86_sse2_comieq_sd:
				4144	Opc = X86ISD::COMI;
				4145	CC = ISD::SETEQ;
				4146	break;
				4147	case Intrinsic::x86_sse_comilt_ss:
				4148	case Intrinsic::x86_sse2_comilt_sd:
				4149	Opc = X86ISD::COMI;
				4150	CC = ISD::SETLT;
				4151	break;
				4152	case Intrinsic::x86_sse_comile_ss:
				4153	case Intrinsic::x86_sse2_comile_sd:
				4154	Opc = X86ISD::COMI;
				4155	CC = ISD::SETLE;
				4156	break;
				4157	case Intrinsic::x86_sse_comigt_ss:
				4158	case Intrinsic::x86_sse2_comigt_sd:
				4159	Opc = X86ISD::COMI;
				4160	CC = ISD::SETGT;
				4161	break;
				4162	case Intrinsic::x86_sse_comige_ss:
				4163	case Intrinsic::x86_sse2_comige_sd:
				4164	Opc = X86ISD::COMI;
				4165	CC = ISD::SETGE;
				4166	break;
				4167	case Intrinsic::x86_sse_comineq_ss:
				4168	case Intrinsic::x86_sse2_comineq_sd:
				4169	Opc = X86ISD::COMI;
				4170	CC = ISD::SETNE;
				4171	break;
				4172	case Intrinsic::x86_sse_ucomieq_ss:
				4173	case Intrinsic::x86_sse2_ucomieq_sd:
				4174	Opc = X86ISD::UCOMI;
				4175	CC = ISD::SETEQ;
				4176	break;
				4177	case Intrinsic::x86_sse_ucomilt_ss:
				4178	case Intrinsic::x86_sse2_ucomilt_sd:
				4179	Opc = X86ISD::UCOMI;
				4180	CC = ISD::SETLT;
				4181	break;
				4182	case Intrinsic::x86_sse_ucomile_ss:
				4183	case Intrinsic::x86_sse2_ucomile_sd:
				4184	Opc = X86ISD::UCOMI;
				4185	CC = ISD::SETLE;
				4186	break;
				4187	case Intrinsic::x86_sse_ucomigt_ss:
				4188	case Intrinsic::x86_sse2_ucomigt_sd:
				4189	Opc = X86ISD::UCOMI;
				4190	CC = ISD::SETGT;
				4191	break;
				4192	case Intrinsic::x86_sse_ucomige_ss:
				4193	case Intrinsic::x86_sse2_ucomige_sd:
				4194	Opc = X86ISD::UCOMI;
				4195	CC = ISD::SETGE;
				4196	break;
				4197	case Intrinsic::x86_sse_ucomineq_ss:
				4198	case Intrinsic::x86_sse2_ucomineq_sd:
				4199	Opc = X86ISD::UCOMI;
				4200	CC = ISD::SETNE;
				4201	break;
				4202	}
				4203
				4204	unsigned X86CC;
				4205	SDOperand LHS = Op.getOperand(1);
				4206	SDOperand RHS = Op.getOperand(2);
				4207	translateX86CC(CC, true, X86CC, LHS, RHS, DAG);
				4208
				4209	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				4210	SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS };
				4211	SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3);
				4212	VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
				4213	SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
				4214	SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2);
				4215	return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
				4216	}
				4217	}
				4218	}
				4219
				4220	SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) {
				4221	// Depths > 0 not supported yet!
				4222	if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
				4223	return SDOperand();
				4224
				4225	// Just load the return address
				4226	SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
				4227	return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0);
				4228	}
				4229
				4230	SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) {
				4231	// Depths > 0 not supported yet!
				4232	if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
				4233	return SDOperand();
				4234
				4235	SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
				4236	return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
				4237	DAG.getConstant(4, getPointerTy()));
				4238	}
				4239
				4240	SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op,
				4241	SelectionDAG &DAG) {
				4242	// Is not yet supported on x86-64
				4243	if (Subtarget->is64Bit())
				4244	return SDOperand();
				4245
				4246	return DAG.getConstant(8, getPointerTy());
				4247	}
				4248
				4249	SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG)
				4250	{
				4251	assert(!Subtarget->is64Bit() &&
				4252	"Lowering of eh_return builtin is not supported yet on x86-64");
				4253
				4254	MachineFunction &MF = DAG.getMachineFunction();
				4255	SDOperand Chain = Op.getOperand(0);
				4256	SDOperand Offset = Op.getOperand(1);
				4257	SDOperand Handler = Op.getOperand(2);
				4258
				4259	SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF),
				4260	getPointerTy());
				4261
				4262	SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame,
				4263	DAG.getConstant(-4UL, getPointerTy()));
				4264	StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset);
				4265	Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0);
				4266	Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr);
				4267	MF.addLiveOut(X86::ECX);
				4268
				4269	return DAG.getNode(X86ISD::EH_RETURN, MVT::Other,
				4270	Chain, DAG.getRegister(X86::ECX, getPointerTy()));
				4271	}
				4272
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4273	SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op,
				4274	SelectionDAG &DAG) {
				4275	SDOperand Root = Op.getOperand(0);
				4276	SDOperand Trmp = Op.getOperand(1); // trampoline
				4277	SDOperand FPtr = Op.getOperand(2); // nested function
				4278	SDOperand Nest = Op.getOperand(3); // 'nest' parameter value
				4279
				4280	SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4));
				4281
				4282	if (Subtarget->is64Bit()) {
				4283	return SDOperand(); // not yet supported
				4284	} else {
				4285	Function Func = (Function )
				4286	cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
				4287	unsigned CC = Func->getCallingConv();
				4288	unsigned char NestReg;
				4289
				4290	switch (CC) {
				4291	default:
				4292	assert(0 && "Unsupported calling convention");
				4293	case CallingConv::C:
				4294	case CallingConv::Fast:
				4295	case CallingConv::X86_StdCall: {
				4296	// Pass 'nest' parameter in ECX.
				4297	// Must be kept in sync with X86CallingConv.td
				4298	NestReg = N86::ECX;
				4299
				4300	// Check that ECX wasn't needed by an 'inreg' parameter.
				4301	const FunctionType *FTy = Func->getFunctionType();
				4302	const ParamAttrsList *Attrs = FTy->getParamAttrs();
				4303
				4304	if (Attrs && !Func->isVarArg()) {
				4305	unsigned InRegCount = 0;
				4306	unsigned Idx = 1;
				4307
				4308	for (FunctionType::param_iterator I = FTy->param_begin(),
				4309	E = FTy->param_end(); I != E; ++I, ++Idx)
				4310	if (Attrs->paramHasAttr(Idx, ParamAttr::InReg))
				4311	// FIXME: should only count parameters that are lowered to integers.
				4312	InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32;
				4313
				4314	if (InRegCount > 2) {
				4315	cerr << "Nest register in use - reduce number of inreg parameters!\n";
				4316	abort();
				4317	}
				4318	}
				4319	break;
				4320	}
				4321	case CallingConv::X86_FastCall:
				4322	// Pass 'nest' parameter in EAX.
				4323	// Must be kept in sync with X86CallingConv.td
				4324	NestReg = N86::EAX;
				4325	break;
				4326	}
				4327
				4328	SDOperand OutChains[4];
				4329	SDOperand Addr, Disp;
				4330
				4331	Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32));
				4332	Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr);
				4333
				4334	const unsigned char MOV32ri = 0xB8;
				4335	const unsigned char JMP = 0xE9;
				4336
				4337	OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri\|NestReg, MVT::i8),
				4338	Trmp, TrmpSV->getValue(), TrmpSV->getOffset());
				4339
				4340	Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32));
				4341	OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(),
				4342	TrmpSV->getOffset() + 1, false, 1);
				4343
				4344	Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32));
				4345	OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr,
				4346	TrmpSV->getValue() + 5, TrmpSV->getOffset());
				4347
				4348	Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32));
				4349	OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(),
				4350	TrmpSV->getOffset() + 6, false, 1);
				4351
				4352	return DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4);
				4353	}
				4354	}
				4355
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4356	/// LowerOperation - Provide custom lowering hooks for some operations.
				4357	///
				4358	SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
				4359	switch (Op.getOpcode()) {
				4360	default: assert(0 && "Should not custom lower this!");
				4361	case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
				4362	case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
				4363	case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
				4364	case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
				4365	case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
				4366	case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
				4367	case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
				4368	case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
				4369	case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
				4370	case ISD::SHL_PARTS:
				4371	case ISD::SRA_PARTS:
				4372	case ISD::SRL_PARTS: return LowerShift(Op, DAG);
				4373	case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
				4374	case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
				4375	case ISD::FABS: return LowerFABS(Op, DAG);
				4376	case ISD::FNEG: return LowerFNEG(Op, DAG);
				4377	case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
				4378	case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode());
				4379	case ISD::SELECT: return LowerSELECT(Op, DAG);
				4380	case ISD::BRCOND: return LowerBRCOND(Op, DAG);
				4381	case ISD::JumpTable: return LowerJumpTable(Op, DAG);
				4382	case ISD::CALL: return LowerCALL(Op, DAG);
				4383	case ISD::RET: return LowerRET(Op, DAG);
				4384	case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
				4385	case ISD::MEMSET: return LowerMEMSET(Op, DAG);
				4386	case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
				4387	case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG);
				4388	case ISD::VASTART: return LowerVASTART(Op, DAG);
				4389	case ISD::VACOPY: return LowerVACOPY(Op, DAG);
				4390	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
				4391	case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
				4392	case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
				4393	case ISD::FRAME_TO_ARGS_OFFSET:
				4394	return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
				4395	case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
				4396	case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4397	case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4398	}
				4399	return SDOperand();
				4400	}
				4401
				4402	const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
				4403	switch (Opcode) {
				4404	default: return NULL;
				4405	case X86ISD::SHLD: return "X86ISD::SHLD";
				4406	case X86ISD::SHRD: return "X86ISD::SHRD";
				4407	case X86ISD::FAND: return "X86ISD::FAND";
				4408	case X86ISD::FOR: return "X86ISD::FOR";
				4409	case X86ISD::FXOR: return "X86ISD::FXOR";
				4410	case X86ISD::FSRL: return "X86ISD::FSRL";
				4411	case X86ISD::FILD: return "X86ISD::FILD";
				4412	case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
				4413	case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
				4414	case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
				4415	case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
				4416	case X86ISD::FLD: return "X86ISD::FLD";
				4417	case X86ISD::FST: return "X86ISD::FST";
				4418	case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT";
				4419	case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT";
				4420	case X86ISD::CALL: return "X86ISD::CALL";
				4421	case X86ISD::TAILCALL: return "X86ISD::TAILCALL";
				4422	case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
				4423	case X86ISD::CMP: return "X86ISD::CMP";
				4424	case X86ISD::COMI: return "X86ISD::COMI";
				4425	case X86ISD::UCOMI: return "X86ISD::UCOMI";
				4426	case X86ISD::SETCC: return "X86ISD::SETCC";
				4427	case X86ISD::CMOV: return "X86ISD::CMOV";
				4428	case X86ISD::BRCOND: return "X86ISD::BRCOND";
				4429	case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
				4430	case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
				4431	case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4432	case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
				4433	case X86ISD::Wrapper: return "X86ISD::Wrapper";
				4434	case X86ISD::S2VEC: return "X86ISD::S2VEC";
				4435	case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
				4436	case X86ISD::PINSRW: return "X86ISD::PINSRW";
				4437	case X86ISD::FMAX: return "X86ISD::FMAX";
				4438	case X86ISD::FMIN: return "X86ISD::FMIN";
				4439	case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
				4440	case X86ISD::FRCP: return "X86ISD::FRCP";
				4441	case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
				4442	case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER";
				4443	case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
				4444	}
				4445	}
				4446
				4447	// isLegalAddressingMode - Return true if the addressing mode represented
				4448	// by AM is legal for this target, for a load/store of the specified type.
				4449	bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
				4450	const Type *Ty) const {
				4451	// X86 supports extremely general addressing modes.
				4452
				4453	// X86 allows a sign-extended 32-bit immediate field as a displacement.
				4454	if (AM.BaseOffs <= -(1LL << 32) \|\| AM.BaseOffs >= (1LL << 32)-1)
				4455	return false;
				4456
				4457	if (AM.BaseGV) {
Evan Cheng	6a1f3f1	2007-08-01 23:46:47 +0000	[diff] [blame]	4458	// We can only fold this if we don't need an extra load.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4459	if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false))
				4460	return false;
Evan Cheng	6a1f3f1	2007-08-01 23:46:47 +0000	[diff] [blame]	4461
				4462	// X86-64 only supports addr of globals in small code model.
				4463	if (Subtarget->is64Bit()) {
				4464	if (getTargetMachine().getCodeModel() != CodeModel::Small)
				4465	return false;
				4466	// If lower 4G is not available, then we must use rip-relative addressing.
				4467	if (AM.BaseOffs \|\| AM.Scale > 1)
				4468	return false;
				4469	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4470	}
				4471
				4472	switch (AM.Scale) {
				4473	case 0:
				4474	case 1:
				4475	case 2:
				4476	case 4:
				4477	case 8:
				4478	// These scales always work.
				4479	break;
				4480	case 3:
				4481	case 5:
				4482	case 9:
				4483	// These scales are formed with basereg+scalereg. Only accept if there is
				4484	// no basereg yet.
				4485	if (AM.HasBaseReg)
				4486	return false;
				4487	break;
				4488	default: // Other stuff never works.
				4489	return false;
				4490	}
				4491
				4492	return true;
				4493	}
				4494
				4495
				4496	/// isShuffleMaskLegal - Targets can use this to indicate that they only
				4497	/// support some VECTOR_SHUFFLE operations, those with specific masks.
				4498	/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
				4499	/// are assumed to be legal.
				4500	bool
				4501	X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
				4502	// Only do shuffles on 128-bit vector types for now.
				4503	if (MVT::getSizeInBits(VT) == 64) return false;
				4504	return (Mask.Val->getNumOperands() <= 4 \|\|
				4505	isIdentityMask(Mask.Val) \|\|
				4506	isIdentityMask(Mask.Val, true) \|\|
				4507	isSplatMask(Mask.Val) \|\|
				4508	isPSHUFHW_PSHUFLWMask(Mask.Val) \|\|
				4509	X86::isUNPCKLMask(Mask.Val) \|\|
				4510	X86::isUNPCKHMask(Mask.Val) \|\|
				4511	X86::isUNPCKL_v_undef_Mask(Mask.Val) \|\|
				4512	X86::isUNPCKH_v_undef_Mask(Mask.Val));
				4513	}
				4514
				4515	bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
				4516	MVT::ValueType EVT,
				4517	SelectionDAG &DAG) const {
				4518	unsigned NumElts = BVOps.size();
				4519	// Only do shuffles on 128-bit vector types for now.
				4520	if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
				4521	if (NumElts == 2) return true;
				4522	if (NumElts == 4) {
				4523	return (isMOVLMask(&BVOps[0], 4) \|\|
				4524	isCommutedMOVL(&BVOps[0], 4, true) \|\|
				4525	isSHUFPMask(&BVOps[0], 4) \|\|
				4526	isCommutedSHUFP(&BVOps[0], 4));
				4527	}
				4528	return false;
				4529	}
				4530
				4531	//===----------------------------------------------------------------------===//
				4532	// X86 Scheduler Hooks
				4533	//===----------------------------------------------------------------------===//
				4534
				4535	MachineBasicBlock *
				4536	X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
				4537	MachineBasicBlock *BB) {
				4538	const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
				4539	switch (MI->getOpcode()) {
				4540	default: assert(false && "Unexpected instr type to insert");
				4541	case X86::CMOV_FR32:
				4542	case X86::CMOV_FR64:
				4543	case X86::CMOV_V4F32:
				4544	case X86::CMOV_V2F64:
				4545	case X86::CMOV_V2I64: {
				4546	// To "insert" a SELECT_CC instruction, we actually have to insert the
				4547	// diamond control-flow pattern. The incoming instruction knows the
				4548	// destination vreg to set, the condition code register to branch on, the
				4549	// true/false values to select between, and a branch opcode to use.
				4550	const BasicBlock *LLVM_BB = BB->getBasicBlock();
				4551	ilist<MachineBasicBlock>::iterator It = BB;
				4552	++It;
				4553
				4554	// thisMBB:
				4555	// ...
				4556	// TrueVal = ...
				4557	// cmpTY ccX, r1, r2
				4558	// bCC copy1MBB
				4559	// fallthrough --> copy0MBB
				4560	MachineBasicBlock *thisMBB = BB;
				4561	MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
				4562	MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
				4563	unsigned Opc =
				4564	X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
				4565	BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB);
				4566	MachineFunction *F = BB->getParent();
				4567	F->getBasicBlockList().insert(It, copy0MBB);
				4568	F->getBasicBlockList().insert(It, sinkMBB);
				4569	// Update machine-CFG edges by first adding all successors of the current
				4570	// block to the new block which will contain the Phi node for the select.
				4571	for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
				4572	e = BB->succ_end(); i != e; ++i)
				4573	sinkMBB->addSuccessor(*i);
				4574	// Next, remove all successors of the current block, and add the true
				4575	// and fallthrough blocks as its successors.
				4576	while(!BB->succ_empty())
				4577	BB->removeSuccessor(BB->succ_begin());
				4578	BB->addSuccessor(copy0MBB);
				4579	BB->addSuccessor(sinkMBB);
				4580
				4581	// copy0MBB:
				4582	// %FalseValue = ...
				4583	// # fallthrough to sinkMBB
				4584	BB = copy0MBB;
				4585
				4586	// Update machine-CFG edges
				4587	BB->addSuccessor(sinkMBB);
				4588
				4589	// sinkMBB:
				4590	// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
				4591	// ...
				4592	BB = sinkMBB;
				4593	BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg())
				4594	.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
				4595	.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
				4596
				4597	delete MI; // The pseudo instruction is gone now.
				4598	return BB;
				4599	}
				4600
				4601	case X86::FP32_TO_INT16_IN_MEM:
				4602	case X86::FP32_TO_INT32_IN_MEM:
				4603	case X86::FP32_TO_INT64_IN_MEM:
				4604	case X86::FP64_TO_INT16_IN_MEM:
				4605	case X86::FP64_TO_INT32_IN_MEM:
Dale Johannesen	6d0e36a	2007-08-07 01:17:37 +0000	[diff] [blame^]	4606	case X86::FP64_TO_INT64_IN_MEM:
				4607	case X86::FP80_TO_INT16_IN_MEM:
				4608	case X86::FP80_TO_INT32_IN_MEM:
				4609	case X86::FP80_TO_INT64_IN_MEM: {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4610	// Change the floating point control register to use "round towards zero"
				4611	// mode when truncating to an integer value.
				4612	MachineFunction *F = BB->getParent();
				4613	int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
				4614	addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx);
				4615
				4616	// Load the old value of the high byte of the control word...
				4617	unsigned OldCW =
				4618	F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
				4619	addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx);
				4620
				4621	// Set the high part to be round to zero...
				4622	addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx)
				4623	.addImm(0xC7F);
				4624
				4625	// Reload the modified control word now...
				4626	addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
				4627
				4628	// Restore the memory image of control word to original value
				4629	addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx)
				4630	.addReg(OldCW);
				4631
				4632	// Get the X86 opcode to use.
				4633	unsigned Opc;
				4634	switch (MI->getOpcode()) {
				4635	default: assert(0 && "illegal opcode!");
				4636	case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
				4637	case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
				4638	case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
				4639	case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
				4640	case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
				4641	case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
Dale Johannesen	6d0e36a	2007-08-07 01:17:37 +0000	[diff] [blame^]	4642	case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
				4643	case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
				4644	case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4645	}
				4646
				4647	X86AddressMode AM;
				4648	MachineOperand &Op = MI->getOperand(0);
				4649	if (Op.isRegister()) {
				4650	AM.BaseType = X86AddressMode::RegBase;
				4651	AM.Base.Reg = Op.getReg();
				4652	} else {
				4653	AM.BaseType = X86AddressMode::FrameIndexBase;
				4654	AM.Base.FrameIndex = Op.getFrameIndex();
				4655	}
				4656	Op = MI->getOperand(1);
				4657	if (Op.isImmediate())
				4658	AM.Scale = Op.getImm();
				4659	Op = MI->getOperand(2);
				4660	if (Op.isImmediate())
				4661	AM.IndexReg = Op.getImm();
				4662	Op = MI->getOperand(3);
				4663	if (Op.isGlobalAddress()) {
				4664	AM.GV = Op.getGlobal();
				4665	} else {
				4666	AM.Disp = Op.getImm();
				4667	}
				4668	addFullAddress(BuildMI(BB, TII->get(Opc)), AM)
				4669	.addReg(MI->getOperand(4).getReg());
				4670
				4671	// Reload the original control word now.
				4672	addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
				4673
				4674	delete MI; // The pseudo instruction is gone now.
				4675	return BB;
				4676	}
				4677	}
				4678	}
				4679
				4680	//===----------------------------------------------------------------------===//
				4681	// X86 Optimization Hooks
				4682	//===----------------------------------------------------------------------===//
				4683
				4684	void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
				4685	uint64_t Mask,
				4686	uint64_t &KnownZero,
				4687	uint64_t &KnownOne,
				4688	const SelectionDAG &DAG,
				4689	unsigned Depth) const {
				4690	unsigned Opc = Op.getOpcode();
				4691	assert((Opc >= ISD::BUILTIN_OP_END \|\|
				4692	Opc == ISD::INTRINSIC_WO_CHAIN \|\|
				4693	Opc == ISD::INTRINSIC_W_CHAIN \|\|
				4694	Opc == ISD::INTRINSIC_VOID) &&
				4695	"Should use MaskedValueIsZero if you don't know whether Op"
				4696	" is a target node!");
				4697
				4698	KnownZero = KnownOne = 0; // Don't know anything.
				4699	switch (Opc) {
				4700	default: break;
				4701	case X86ISD::SETCC:
				4702	KnownZero \|= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
				4703	break;
				4704	}
				4705	}
				4706
				4707	/// getShuffleScalarElt - Returns the scalar element that will make up the ith
				4708	/// element of the result of the vector shuffle.
				4709	static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
				4710	MVT::ValueType VT = N->getValueType(0);
				4711	SDOperand PermMask = N->getOperand(2);
				4712	unsigned NumElems = PermMask.getNumOperands();
				4713	SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
				4714	i %= NumElems;
				4715	if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
				4716	return (i == 0)
				4717	? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
				4718	} else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
				4719	SDOperand Idx = PermMask.getOperand(i);
				4720	if (Idx.getOpcode() == ISD::UNDEF)
				4721	return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
				4722	return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
				4723	}
				4724	return SDOperand();
				4725	}
				4726
				4727	/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
				4728	/// node is a GlobalAddress + an offset.
				4729	static bool isGAPlusOffset(SDNode N, GlobalValue &GA, int64_t &Offset) {
				4730	unsigned Opc = N->getOpcode();
				4731	if (Opc == X86ISD::Wrapper) {
				4732	if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) {
				4733	GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
				4734	return true;
				4735	}
				4736	} else if (Opc == ISD::ADD) {
				4737	SDOperand N1 = N->getOperand(0);
				4738	SDOperand N2 = N->getOperand(1);
				4739	if (isGAPlusOffset(N1.Val, GA, Offset)) {
				4740	ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
				4741	if (V) {
				4742	Offset += V->getSignExtended();
				4743	return true;
				4744	}
				4745	} else if (isGAPlusOffset(N2.Val, GA, Offset)) {
				4746	ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
				4747	if (V) {
				4748	Offset += V->getSignExtended();
				4749	return true;
				4750	}
				4751	}
				4752	}
				4753	return false;
				4754	}
				4755
				4756	/// isConsecutiveLoad - Returns true if N is loading from an address of Base
				4757	/// + Dist * Size.
				4758	static bool isConsecutiveLoad(SDNode N, SDNode Base, int Dist, int Size,
				4759	MachineFrameInfo *MFI) {
				4760	if (N->getOperand(0).Val != Base->getOperand(0).Val)
				4761	return false;
				4762
				4763	SDOperand Loc = N->getOperand(1);
				4764	SDOperand BaseLoc = Base->getOperand(1);
				4765	if (Loc.getOpcode() == ISD::FrameIndex) {
				4766	if (BaseLoc.getOpcode() != ISD::FrameIndex)
				4767	return false;
Dan Gohman	53491e9	2007-07-23 20:24:29 +0000	[diff] [blame]	4768	int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
				4769	int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4770	int FS = MFI->getObjectSize(FI);
				4771	int BFS = MFI->getObjectSize(BFI);
				4772	if (FS != BFS \|\| FS != Size) return false;
				4773	return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size);
				4774	} else {
				4775	GlobalValue *GV1 = NULL;
				4776	GlobalValue *GV2 = NULL;
				4777	int64_t Offset1 = 0;
				4778	int64_t Offset2 = 0;
				4779	bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1);
				4780	bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2);
				4781	if (isGA1 && isGA2 && GV1 == GV2)
				4782	return Offset1 == (Offset2 + Dist*Size);
				4783	}
				4784
				4785	return false;
				4786	}
				4787
				4788	static bool isBaseAlignment16(SDNode Base, MachineFrameInfo MFI,
				4789	const X86Subtarget *Subtarget) {
				4790	GlobalValue *GV;
				4791	int64_t Offset;
				4792	if (isGAPlusOffset(Base, GV, Offset))
				4793	return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
				4794	else {
				4795	assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!");
Dan Gohman	53491e9	2007-07-23 20:24:29 +0000	[diff] [blame]	4796	int BFI = cast<FrameIndexSDNode>(Base)->getIndex();
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4797	if (BFI < 0)
				4798	// Fixed objects do not specify alignment, however the offsets are known.
				4799	return ((Subtarget->getStackAlignment() % 16) == 0 &&
				4800	(MFI->getObjectOffset(BFI) % 16) == 0);
				4801	else
				4802	return MFI->getObjectAlignment(BFI) >= 16;
				4803	}
				4804	return false;
				4805	}
				4806
				4807
				4808	/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
				4809	/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
				4810	/// if the load addresses are consecutive, non-overlapping, and in the right
				4811	/// order.
				4812	static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
				4813	const X86Subtarget *Subtarget) {
				4814	MachineFunction &MF = DAG.getMachineFunction();
				4815	MachineFrameInfo *MFI = MF.getFrameInfo();
				4816	MVT::ValueType VT = N->getValueType(0);
				4817	MVT::ValueType EVT = MVT::getVectorElementType(VT);
				4818	SDOperand PermMask = N->getOperand(2);
				4819	int NumElems = (int)PermMask.getNumOperands();
				4820	SDNode *Base = NULL;
				4821	for (int i = 0; i < NumElems; ++i) {
				4822	SDOperand Idx = PermMask.getOperand(i);
				4823	if (Idx.getOpcode() == ISD::UNDEF) {
				4824	if (!Base) return SDOperand();
				4825	} else {
				4826	SDOperand Arg =
				4827	getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG);
				4828	if (!Arg.Val \|\| !ISD::isNON_EXTLoad(Arg.Val))
				4829	return SDOperand();
				4830	if (!Base)
				4831	Base = Arg.Val;
				4832	else if (!isConsecutiveLoad(Arg.Val, Base,
				4833	i, MVT::getSizeInBits(EVT)/8,MFI))
				4834	return SDOperand();
				4835	}
				4836	}
				4837
				4838	bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	4839	LoadSDNode *LD = cast<LoadSDNode>(Base);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4840	if (isAlign16) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4841	return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	4842	LD->getSrcValueOffset(), LD->isVolatile());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4843	} else {
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	4844	return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
				4845	LD->getSrcValueOffset(), LD->isVolatile(),
				4846	LD->getAlignment());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4847	}
				4848	}
				4849
				4850	/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
				4851	static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
				4852	const X86Subtarget *Subtarget) {
				4853	SDOperand Cond = N->getOperand(0);
				4854
				4855	// If we have SSE[12] support, try to form min/max nodes.
				4856	if (Subtarget->hasSSE2() &&
				4857	(N->getValueType(0) == MVT::f32 \|\| N->getValueType(0) == MVT::f64)) {
				4858	if (Cond.getOpcode() == ISD::SETCC) {
				4859	// Get the LHS/RHS of the select.
				4860	SDOperand LHS = N->getOperand(1);
				4861	SDOperand RHS = N->getOperand(2);
				4862	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
				4863
				4864	unsigned Opcode = 0;
				4865	if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
				4866	switch (CC) {
				4867	default: break;
				4868	case ISD::SETOLE: // (X <= Y) ? X : Y -> min
				4869	case ISD::SETULE:
				4870	case ISD::SETLE:
				4871	if (!UnsafeFPMath) break;
				4872	// FALL THROUGH.
				4873	case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min
				4874	case ISD::SETLT:
				4875	Opcode = X86ISD::FMIN;
				4876	break;
				4877
				4878	case ISD::SETOGT: // (X > Y) ? X : Y -> max
				4879	case ISD::SETUGT:
				4880	case ISD::SETGT:
				4881	if (!UnsafeFPMath) break;
				4882	// FALL THROUGH.
				4883	case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max
				4884	case ISD::SETGE:
				4885	Opcode = X86ISD::FMAX;
				4886	break;
				4887	}
				4888	} else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
				4889	switch (CC) {
				4890	default: break;
				4891	case ISD::SETOGT: // (X > Y) ? Y : X -> min
				4892	case ISD::SETUGT:
				4893	case ISD::SETGT:
				4894	if (!UnsafeFPMath) break;
				4895	// FALL THROUGH.
				4896	case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min
				4897	case ISD::SETGE:
				4898	Opcode = X86ISD::FMIN;
				4899	break;
				4900
				4901	case ISD::SETOLE: // (X <= Y) ? Y : X -> max
				4902	case ISD::SETULE:
				4903	case ISD::SETLE:
				4904	if (!UnsafeFPMath) break;
				4905	// FALL THROUGH.
				4906	case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max
				4907	case ISD::SETLT:
				4908	Opcode = X86ISD::FMAX;
				4909	break;
				4910	}
				4911	}
				4912
				4913	if (Opcode)
				4914	return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS);
				4915	}
				4916
				4917	}
				4918
				4919	return SDOperand();
				4920	}
				4921
				4922
				4923	SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
				4924	DAGCombinerInfo &DCI) const {
				4925	SelectionDAG &DAG = DCI.DAG;
				4926	switch (N->getOpcode()) {
				4927	default: break;
				4928	case ISD::VECTOR_SHUFFLE:
				4929	return PerformShuffleCombine(N, DAG, Subtarget);
				4930	case ISD::SELECT:
				4931	return PerformSELECTCombine(N, DAG, Subtarget);
				4932	}
				4933
				4934	return SDOperand();
				4935	}
				4936
				4937	//===----------------------------------------------------------------------===//
				4938	// X86 Inline Assembly Support
				4939	//===----------------------------------------------------------------------===//
				4940
				4941	/// getConstraintType - Given a constraint letter, return the type of
				4942	/// constraint it is for this target.
				4943	X86TargetLowering::ConstraintType
				4944	X86TargetLowering::getConstraintType(const std::string &Constraint) const {
				4945	if (Constraint.size() == 1) {
				4946	switch (Constraint[0]) {
				4947	case 'A':
				4948	case 'r':
				4949	case 'R':
				4950	case 'l':
				4951	case 'q':
				4952	case 'Q':
				4953	case 'x':
				4954	case 'Y':
				4955	return C_RegisterClass;
				4956	default:
				4957	break;
				4958	}
				4959	}
				4960	return TargetLowering::getConstraintType(Constraint);
				4961	}
				4962
				4963	/// isOperandValidForConstraint - Return the specified operand (possibly
				4964	/// modified) if the specified SDOperand is valid for the specified target
				4965	/// constraint letter, otherwise return null.
				4966	SDOperand X86TargetLowering::
				4967	isOperandValidForConstraint(SDOperand Op, char Constraint, SelectionDAG &DAG) {
				4968	switch (Constraint) {
				4969	default: break;
				4970	case 'I':
				4971	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
				4972	if (C->getValue() <= 31)
				4973	return DAG.getTargetConstant(C->getValue(), Op.getValueType());
				4974	}
				4975	return SDOperand(0,0);
				4976	case 'N':
				4977	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
				4978	if (C->getValue() <= 255)
				4979	return DAG.getTargetConstant(C->getValue(), Op.getValueType());
				4980	}
				4981	return SDOperand(0,0);
				4982	case 'i': {
				4983	// Literal immediates are always ok.
				4984	if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op))
				4985	return DAG.getTargetConstant(CST->getValue(), Op.getValueType());
				4986
				4987	// If we are in non-pic codegen mode, we allow the address of a global (with
				4988	// an optional displacement) to be used with 'i'.
				4989	GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
				4990	int64_t Offset = 0;
				4991
				4992	// Match either (GA) or (GA+C)
				4993	if (GA) {
				4994	Offset = GA->getOffset();
				4995	} else if (Op.getOpcode() == ISD::ADD) {
				4996	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
				4997	GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
				4998	if (C && GA) {
				4999	Offset = GA->getOffset()+C->getValue();
				5000	} else {
				5001	C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
				5002	GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
				5003	if (C && GA)
				5004	Offset = GA->getOffset()+C->getValue();
				5005	else
				5006	C = 0, GA = 0;
				5007	}
				5008	}
				5009
				5010	if (GA) {
				5011	// If addressing this global requires a load (e.g. in PIC mode), we can't
				5012	// match.
				5013	if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(),
				5014	false))
				5015	return SDOperand(0, 0);
				5016
				5017	Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
				5018	Offset);
				5019	return Op;
				5020	}
				5021
				5022	// Otherwise, not valid for this mode.
				5023	return SDOperand(0, 0);
				5024	}
				5025	}
				5026	return TargetLowering::isOperandValidForConstraint(Op, Constraint, DAG);
				5027	}
				5028
				5029	std::vector<unsigned> X86TargetLowering::
				5030	getRegClassForInlineAsmConstraint(const std::string &Constraint,
				5031	MVT::ValueType VT) const {
				5032	if (Constraint.size() == 1) {
				5033	// FIXME: not handling fp-stack yet!
				5034	switch (Constraint[0]) { // GCC X86 Constraint Letters
				5035	default: break; // Unknown constraint letter
				5036	case 'A': // EAX/EDX
				5037	if (VT == MVT::i32 \|\| VT == MVT::i64)
				5038	return make_vector<unsigned>(X86::EAX, X86::EDX, 0);
				5039	break;
				5040	case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode)
				5041	case 'Q': // Q_REGS
				5042	if (VT == MVT::i32)
				5043	return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
				5044	else if (VT == MVT::i16)
				5045	return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
				5046	else if (VT == MVT::i8)
				5047	return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
				5048	break;
				5049	}
				5050	}
				5051
				5052	return std::vector<unsigned>();
				5053	}
				5054
				5055	std::pair<unsigned, const TargetRegisterClass*>
				5056	X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
				5057	MVT::ValueType VT) const {
				5058	// First, see if this is a constraint that directly corresponds to an LLVM
				5059	// register class.
				5060	if (Constraint.size() == 1) {
				5061	// GCC Constraint Letters
				5062	switch (Constraint[0]) {
				5063	default: break;
				5064	case 'r': // GENERAL_REGS
				5065	case 'R': // LEGACY_REGS
				5066	case 'l': // INDEX_REGS
				5067	if (VT == MVT::i64 && Subtarget->is64Bit())
				5068	return std::make_pair(0U, X86::GR64RegisterClass);
				5069	if (VT == MVT::i32)
				5070	return std::make_pair(0U, X86::GR32RegisterClass);
				5071	else if (VT == MVT::i16)
				5072	return std::make_pair(0U, X86::GR16RegisterClass);
				5073	else if (VT == MVT::i8)
				5074	return std::make_pair(0U, X86::GR8RegisterClass);
				5075	break;
				5076	case 'y': // MMX_REGS if MMX allowed.
				5077	if (!Subtarget->hasMMX()) break;
				5078	return std::make_pair(0U, X86::VR64RegisterClass);
				5079	break;
				5080	case 'Y': // SSE_REGS if SSE2 allowed
				5081	if (!Subtarget->hasSSE2()) break;
				5082	// FALL THROUGH.
				5083	case 'x': // SSE_REGS if SSE1 allowed
				5084	if (!Subtarget->hasSSE1()) break;
				5085
				5086	switch (VT) {
				5087	default: break;
				5088	// Scalar SSE types.
				5089	case MVT::f32:
				5090	case MVT::i32:
				5091	return std::make_pair(0U, X86::FR32RegisterClass);
				5092	case MVT::f64:
				5093	case MVT::i64:
				5094	return std::make_pair(0U, X86::FR64RegisterClass);
				5095	// Vector types.
				5096	case MVT::v16i8:
				5097	case MVT::v8i16:
				5098	case MVT::v4i32:
				5099	case MVT::v2i64:
				5100	case MVT::v4f32:
				5101	case MVT::v2f64:
				5102	return std::make_pair(0U, X86::VR128RegisterClass);
				5103	}
				5104	break;
				5105	}
				5106	}
				5107
				5108	// Use the default implementation in TargetLowering to convert the register
				5109	// constraint into a member of a register class.
				5110	std::pair<unsigned, const TargetRegisterClass*> Res;
				5111	Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
				5112
				5113	// Not found as a standard register?
				5114	if (Res.second == 0) {
				5115	// GCC calls "st(0)" just plain "st".
				5116	if (StringsEqualNoCase("{st}", Constraint)) {
				5117	Res.first = X86::ST0;
				5118	Res.second = X86::RSTRegisterClass;
				5119	}
				5120
				5121	return Res;
				5122	}
				5123
				5124	// Otherwise, check to see if this is a register class of the wrong value
				5125	// type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
				5126	// turn into {ax},{dx}.
				5127	if (Res.second->hasType(VT))
				5128	return Res; // Correct type already, nothing to do.
				5129
				5130	// All of the single-register GCC register classes map their values onto
				5131	// 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we
				5132	// really want an 8-bit or 32-bit register, map to the appropriate register
				5133	// class and return the appropriate register.
				5134	if (Res.second != X86::GR16RegisterClass)
				5135	return Res;
				5136
				5137	if (VT == MVT::i8) {
				5138	unsigned DestReg = 0;
				5139	switch (Res.first) {
				5140	default: break;
				5141	case X86::AX: DestReg = X86::AL; break;
				5142	case X86::DX: DestReg = X86::DL; break;
				5143	case X86::CX: DestReg = X86::CL; break;
				5144	case X86::BX: DestReg = X86::BL; break;
				5145	}
				5146	if (DestReg) {
				5147	Res.first = DestReg;
				5148	Res.second = Res.second = X86::GR8RegisterClass;
				5149	}
				5150	} else if (VT == MVT::i32) {
				5151	unsigned DestReg = 0;
				5152	switch (Res.first) {
				5153	default: break;
				5154	case X86::AX: DestReg = X86::EAX; break;
				5155	case X86::DX: DestReg = X86::EDX; break;
				5156	case X86::CX: DestReg = X86::ECX; break;
				5157	case X86::BX: DestReg = X86::EBX; break;
				5158	case X86::SI: DestReg = X86::ESI; break;
				5159	case X86::DI: DestReg = X86::EDI; break;
				5160	case X86::BP: DestReg = X86::EBP; break;
				5161	case X86::SP: DestReg = X86::ESP; break;
				5162	}
				5163	if (DestReg) {
				5164	Res.first = DestReg;
				5165	Res.second = Res.second = X86::GR32RegisterClass;
				5166	}
				5167	} else if (VT == MVT::i64) {
				5168	unsigned DestReg = 0;
				5169	switch (Res.first) {
				5170	default: break;
				5171	case X86::AX: DestReg = X86::RAX; break;
				5172	case X86::DX: DestReg = X86::RDX; break;
				5173	case X86::CX: DestReg = X86::RCX; break;
				5174	case X86::BX: DestReg = X86::RBX; break;
				5175	case X86::SI: DestReg = X86::RSI; break;
				5176	case X86::DI: DestReg = X86::RDI; break;
				5177	case X86::BP: DestReg = X86::RBP; break;
				5178	case X86::SP: DestReg = X86::RSP; break;
				5179	}
				5180	if (DestReg) {
				5181	Res.first = DestReg;
				5182	Res.second = Res.second = X86::GR64RegisterClass;
				5183	}
				5184	}
				5185
				5186	return Res;
				5187	}