Blame - llvm/lib/Target/X86/X86InstrCompiler.td - toolchain/llvm-project

blob: 3feae6d31fddf43348296dfbd56e930022d58928 [file] [log] [blame]

Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1	//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -- tablegen --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file describes the various pseudo instructions used by the compiler,
				11	// as well as Pat patterns used during instruction selection.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	//===----------------------------------------------------------------------===//
				16	// Pattern Matching Support
				17
				18	def GetLo32XForm : SDNodeXForm<imm, [{
				19	// Transformation function: get the low 32 bits.
				20	return getI32Imm((unsigned)N->getZExtValue());
				21	}]>;
				22
				23	def GetLo8XForm : SDNodeXForm<imm, [{
				24	// Transformation function: get the low 8 bits.
				25	return getI8Imm((uint8_t)N->getZExtValue());
				26	}]>;
				27
				28
				29	//===----------------------------------------------------------------------===//
				30	// Random Pseudo Instructions.
				31
				32	// PIC base construction. This expands to code that looks like this:
				33	// call $next_inst
				34	// popl %destreg"
				35	let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP] in
				36	def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
				37	"", []>;
				38
				39
				40	// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
				41	// a stack adjustment and the codegen must know that they may modify the stack
				42	// pointer before prolog-epilog rewriting occurs.
				43	// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
				44	// sub / add which can clobber EFLAGS.
				45	let Defs = [ESP, EFLAGS], Uses = [ESP] in {
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame^]	46	def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	47	"#ADJCALLSTACKDOWN",
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame^]	48	[]>,
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	49	Requires<[NotLP64]>;
				50	def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
				51	"#ADJCALLSTACKUP",
				52	[(X86callseq_end timm:$amt1, timm:$amt2)]>,
				53	Requires<[NotLP64]>;
				54	}
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame^]	55	def : Pat<(X86callseq_start timm:$amt1),
				56	(ADJCALLSTACKDOWN32 i32imm:$amt1, 0)>, Requires<[NotLP64]>;
				57
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	58
				59	// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
				60	// a stack adjustment and the codegen must know that they may modify the stack
				61	// pointer before prolog-epilog rewriting occurs.
				62	// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
				63	// sub / add which can clobber EFLAGS.
				64	let Defs = [RSP, EFLAGS], Uses = [RSP] in {
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame^]	65	def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	66	"#ADJCALLSTACKDOWN",
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame^]	67	[]>,
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	68	Requires<[IsLP64]>;
				69	def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
				70	"#ADJCALLSTACKUP",
				71	[(X86callseq_end timm:$amt1, timm:$amt2)]>,
				72	Requires<[IsLP64]>;
				73	}
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame^]	74	def : Pat<(X86callseq_start timm:$amt1),
				75	(ADJCALLSTACKDOWN64 i32imm:$amt1, 0)>, Requires<[IsLP64]>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	76
				77
				78	// x86-64 va_start lowering magic.
				79	let usesCustomInserter = 1, Defs = [EFLAGS] in {
				80	def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
				81	(outs),
				82	(ins GR8:$al,
				83	i64imm:$regsavefi, i64imm:$offset,
				84	variable_ops),
				85	"#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
				86	[(X86vastart_save_xmm_regs GR8:$al,
				87	imm:$regsavefi,
				88	imm:$offset),
				89	(implicit EFLAGS)]>;
				90
				91	// The VAARG_64 pseudo-instruction takes the address of the va_list,
				92	// and places the address of the next argument into a register.
				93	let Defs = [EFLAGS] in
				94	def VAARG_64 : I<0, Pseudo,
				95	(outs GR64:$dst),
				96	(ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
				97	"#VAARG_64 $dst, $ap, $size, $mode, $align",
				98	[(set GR64:$dst,
				99	(X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
				100	(implicit EFLAGS)]>;
				101
				102	// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
				103	// targets. These calls are needed to probe the stack when allocating more than
				104	// 4k bytes in one go. Touching the stack at 4K increments is necessary to
				105	// ensure that the guard pages used by the OS virtual memory manager are
				106	// allocated in correct sequence.
				107	// The main point of having separate instruction are extra unmodelled effects
				108	// (compared to ordinary calls) like stack pointer change.
				109
				110	let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
				111	def WIN_ALLOCA : I<0, Pseudo, (outs), (ins),
				112	"# dynamic stack allocation",
				113	[(X86WinAlloca)]>;
				114
				115	// When using segmented stacks these are lowered into instructions which first
				116	// check if the current stacklet has enough free memory. If it does, memory is
				117	// allocated by bumping the stack pointer. Otherwise memory is allocated from
				118	// the heap.
				119
				120	let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
				121	def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
				122	"# variable sized alloca for segmented stacks",
				123	[(set GR32:$dst,
				124	(X86SegAlloca GR32:$size))]>,
				125	Requires<[NotLP64]>;
				126
				127	let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
				128	def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
				129	"# variable sized alloca for segmented stacks",
				130	[(set GR64:$dst,
				131	(X86SegAlloca GR64:$size))]>,
				132	Requires<[In64BitMode]>;
				133	}
				134
				135	// The MSVC runtime contains an _ftol2 routine for converting floating-point
				136	// to integer values. It has a strange calling convention: the input is
				137	// popped from the x87 stack, and the return value is given in EDX:EAX. ECX is
				138	// used as a temporary register. No other registers (aside from flags) are
				139	// touched.
				140	// Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80
				141	// variant is unnecessary.
				142
				143	let Defs = [EAX, EDX, ECX, EFLAGS], FPForm = SpecialFP in {
				144	def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src),
				145	"# win32 fptoui",
				146	[(X86WinFTOL RFP32:$src)]>,
				147	Requires<[Not64BitMode]>;
				148
				149	def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src),
				150	"# win32 fptoui",
				151	[(X86WinFTOL RFP64:$src)]>,
				152	Requires<[Not64BitMode]>;
				153	}
				154
				155	//===----------------------------------------------------------------------===//
				156	// EH Pseudo Instructions
				157	//
				158	let SchedRW = [WriteSystem] in {
				159	let isTerminator = 1, isReturn = 1, isBarrier = 1,
				160	hasCtrlDep = 1, isCodeGenOnly = 1 in {
				161	def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
				162	"ret\t#eh_return, addr: $addr",
				163	[(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
				164
				165	}
				166
				167	let isTerminator = 1, isReturn = 1, isBarrier = 1,
				168	hasCtrlDep = 1, isCodeGenOnly = 1 in {
				169	def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
				170	"ret\t#eh_return, addr: $addr",
				171	[(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
				172
				173	}
				174
				175	let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
				176	usesCustomInserter = 1 in {
				177	def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),
				178	"#EH_SJLJ_SETJMP32",
				179	[(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
				180	Requires<[Not64BitMode]>;
				181	def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf),
				182	"#EH_SJLJ_SETJMP64",
				183	[(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
				184	Requires<[In64BitMode]>;
				185	let isTerminator = 1 in {
				186	def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf),
				187	"#EH_SJLJ_LONGJMP32",
				188	[(X86eh_sjlj_longjmp addr:$buf)]>,
				189	Requires<[Not64BitMode]>;
				190	def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf),
				191	"#EH_SJLJ_LONGJMP64",
				192	[(X86eh_sjlj_longjmp addr:$buf)]>,
				193	Requires<[In64BitMode]>;
				194	}
				195	}
				196	} // SchedRW
				197
				198	let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
				199	def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),
				200	"#EH_SjLj_Setup\t$dst", []>;
				201	}
				202
				203	//===----------------------------------------------------------------------===//
				204	// Pseudo instructions used by unwind info.
				205	//
				206	let isPseudo = 1 in {
				207	def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg),
				208	"#SEH_PushReg $reg", []>;
				209	def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
				210	"#SEH_SaveReg $reg, $dst", []>;
				211	def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
				212	"#SEH_SaveXMM $reg, $dst", []>;
				213	def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size),
				214	"#SEH_StackAlloc $size", []>;
				215	def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset),
				216	"#SEH_SetFrame $reg, $offset", []>;
				217	def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode),
				218	"#SEH_PushFrame $mode", []>;
				219	def SEH_EndPrologue : I<0, Pseudo, (outs), (ins),
				220	"#SEH_EndPrologue", []>;
				221	def SEH_Epilogue : I<0, Pseudo, (outs), (ins),
				222	"#SEH_Epilogue", []>;
				223	}
				224
				225	//===----------------------------------------------------------------------===//
				226	// Pseudo instructions used by segmented stacks.
				227	//
				228
				229	// This is lowered into a RET instruction by MCInstLower. We need
				230	// this so that we don't have to have a MachineBasicBlock which ends
				231	// with a RET and also has successors.
				232	let isPseudo = 1 in {
				233	def MORESTACK_RET: I<0, Pseudo, (outs), (ins),
				234	"", []>;
				235
				236	// This instruction is lowered to a RET followed by a MOV. The two
				237	// instructions are not generated on a higher level since then the
				238	// verifier sees a MachineBasicBlock ending with a non-terminator.
				239	def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
				240	"", []>;
				241	}
				242
				243	//===----------------------------------------------------------------------===//
				244	// Alias Instructions
				245	//===----------------------------------------------------------------------===//
				246
				247	// Alias instruction mapping movr0 to xor.
				248	// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
				249	let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
				250	isPseudo = 1 in
				251	def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
				252	[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
				253
				254	// Other widths can also make use of the 32-bit xor, which may have a smaller
				255	// encoding and avoid partial register updates.
				256	def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;
				257	def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;
				258	def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {
				259	let AddedComplexity = 20;
				260	}
				261
				262	// Materialize i64 constant where top 32-bits are zero. This could theoretically
				263	// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
				264	// that would make it more difficult to rematerialize.
				265	let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
				266	isCodeGenOnly = 1, hasSideEffects = 0 in
				267	def MOV32ri64 : Ii32<0xb8, AddRegFrm, (outs GR32:$dst), (ins i64i32imm:$src),
				268	"", [], IIC_ALU_NONMEM>, Sched<[WriteALU]>;
				269
				270	// This 64-bit pseudo-move can be used for both a 64-bit constant that is
				271	// actually the zero-extension of a 32-bit constant, and for labels in the
				272	// x86-64 small code model.
				273	def mov64imm32 : ComplexPattern<i64, 1, "SelectMOV64Imm32", [imm, X86Wrapper]>;
				274
				275	let AddedComplexity = 1 in
				276	def : Pat<(i64 mov64imm32:$src),
				277	(SUBREG_TO_REG (i64 0), (MOV32ri64 mov64imm32:$src), sub_32bit)>;
				278
				279	// Use sbb to materialize carry bit.
				280	let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
				281	// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
				282	// However, Pat<> can't replicate the destination reg into the inputs of the
				283	// result.
				284	def SETB_C8r : I<0, Pseudo, (outs GR8:$dst), (ins), "",
				285	[(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
				286	def SETB_C16r : I<0, Pseudo, (outs GR16:$dst), (ins), "",
				287	[(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
				288	def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "",
				289	[(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
				290	def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "",
				291	[(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
				292	} // isCodeGenOnly
				293
				294
				295	def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				296	(SETB_C16r)>;
				297	def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				298	(SETB_C32r)>;
				299	def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				300	(SETB_C64r)>;
				301
				302	def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				303	(SETB_C16r)>;
				304	def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				305	(SETB_C32r)>;
				306	def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				307	(SETB_C64r)>;
				308
				309	// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and
				310	// will be eliminated and that the sbb can be extended up to a wider type. When
				311	// this happens, it is great. However, if we are left with an 8-bit sbb and an
				312	// and, we might as well just match it as a setb.
				313	def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
				314	(SETBr)>;
				315
				316	// (add OP, SETB) -> (adc OP, 0)
				317	def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op),
				318	(ADC8ri GR8:$op, 0)>;
				319	def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op),
				320	(ADC32ri8 GR32:$op, 0)>;
				321	def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op),
				322	(ADC64ri8 GR64:$op, 0)>;
				323
				324	// (sub OP, SETB) -> (sbb OP, 0)
				325	def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
				326	(SBB8ri GR8:$op, 0)>;
				327	def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
				328	(SBB32ri8 GR32:$op, 0)>;
				329	def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
				330	(SBB64ri8 GR64:$op, 0)>;
				331
				332	// (sub OP, SETCC_CARRY) -> (adc OP, 0)
				333	def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))),
				334	(ADC8ri GR8:$op, 0)>;
				335	def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))),
				336	(ADC32ri8 GR32:$op, 0)>;
				337	def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
				338	(ADC64ri8 GR64:$op, 0)>;
				339
				340	//===----------------------------------------------------------------------===//
				341	// String Pseudo Instructions
				342	//
				343	let SchedRW = [WriteMicrocoded] in {
				344	let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
				345	def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb\|rep movsb}",
				346	[(X86rep_movs i8)], IIC_REP_MOVS>, REP,
				347	Requires<[Not64BitMode]>;
				348	def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw\|rep movsw}",
				349	[(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16,
				350	Requires<[Not64BitMode]>;
				351	def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl\|rep movsd}",
				352	[(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32,
				353	Requires<[Not64BitMode]>;
				354	}
				355
				356	let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in {
				357	def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb\|rep movsb}",
				358	[(X86rep_movs i8)], IIC_REP_MOVS>, REP,
				359	Requires<[In64BitMode]>;
				360	def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw\|rep movsw}",
				361	[(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16,
				362	Requires<[In64BitMode]>;
				363	def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl\|rep movsd}",
				364	[(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32,
				365	Requires<[In64BitMode]>;
				366	def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq\|rep movsq}",
				367	[(X86rep_movs i64)], IIC_REP_MOVS>, REP,
				368	Requires<[In64BitMode]>;
				369	}
				370
				371	// FIXME: Should use "(X86rep_stos AL)" as the pattern.
				372	let Defs = [ECX,EDI], isCodeGenOnly = 1 in {
				373	let Uses = [AL,ECX,EDI] in
				374	def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb\|rep stosb}",
				375	[(X86rep_stos i8)], IIC_REP_STOS>, REP,
				376	Requires<[Not64BitMode]>;
				377	let Uses = [AX,ECX,EDI] in
				378	def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw\|rep stosw}",
				379	[(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16,
				380	Requires<[Not64BitMode]>;
				381	let Uses = [EAX,ECX,EDI] in
				382	def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl\|rep stosd}",
				383	[(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,
				384	Requires<[Not64BitMode]>;
				385	}
				386
				387	let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
				388	let Uses = [AL,RCX,RDI] in
				389	def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb\|rep stosb}",
				390	[(X86rep_stos i8)], IIC_REP_STOS>, REP,
				391	Requires<[In64BitMode]>;
				392	let Uses = [AX,RCX,RDI] in
				393	def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw\|rep stosw}",
				394	[(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16,
				395	Requires<[In64BitMode]>;
				396	let Uses = [RAX,RCX,RDI] in
				397	def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl\|rep stosd}",
				398	[(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,
				399	Requires<[In64BitMode]>;
				400
				401	let Uses = [RAX,RCX,RDI] in
				402	def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq\|rep stosq}",
				403	[(X86rep_stos i64)], IIC_REP_STOS>, REP,
				404	Requires<[In64BitMode]>;
				405	}
				406	} // SchedRW
				407
				408	//===----------------------------------------------------------------------===//
				409	// Thread Local Storage Instructions
				410	//
				411
				412	// ELF TLS Support
				413	// All calls clobber the non-callee saved registers. ESP is marked as
				414	// a use to prevent stack-pointer assignments that appear immediately
				415	// before calls from potentially appearing dead.
				416	let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
				417	ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
				418	MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
				419	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
				420	XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
				421	Uses = [ESP] in {
				422	def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
				423	"# TLS_addr32",
				424	[(X86tlsaddr tls32addr:$sym)]>,
				425	Requires<[Not64BitMode]>;
				426	def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
				427	"# TLS_base_addr32",
				428	[(X86tlsbaseaddr tls32baseaddr:$sym)]>,
				429	Requires<[Not64BitMode]>;
				430	}
				431
				432	// All calls clobber the non-callee saved registers. RSP is marked as
				433	// a use to prevent stack-pointer assignments that appear immediately
				434	// before calls from potentially appearing dead.
				435	let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
				436	FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
				437	ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
				438	MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
				439	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
				440	XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
				441	Uses = [RSP] in {
				442	def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
				443	"# TLS_addr64",
				444	[(X86tlsaddr tls64addr:$sym)]>,
				445	Requires<[In64BitMode]>;
				446	def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
				447	"# TLS_base_addr64",
				448	[(X86tlsbaseaddr tls64baseaddr:$sym)]>,
				449	Requires<[In64BitMode]>;
				450	}
				451
				452	// Darwin TLS Support
				453	// For i386, the address of the thunk is passed on the stack, on return the
				454	// address of the variable is in %eax. %ecx is trashed during the function
				455	// call. All other registers are preserved.
				456	let Defs = [EAX, ECX, EFLAGS],
				457	Uses = [ESP],
				458	usesCustomInserter = 1 in
				459	def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
				460	"# TLSCall_32",
				461	[(X86TLSCall addr:$sym)]>,
				462	Requires<[Not64BitMode]>;
				463
				464	// For x86_64, the address of the thunk is passed in %rdi, on return
				465	// the address of the variable is in %rax. All other registers are preserved.
				466	let Defs = [RAX, EFLAGS],
				467	Uses = [RSP, RDI],
				468	usesCustomInserter = 1 in
				469	def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
				470	"# TLSCall_64",
				471	[(X86TLSCall addr:$sym)]>,
				472	Requires<[In64BitMode]>;
				473
				474
				475	//===----------------------------------------------------------------------===//
				476	// Conditional Move Pseudo Instructions
				477
				478	// X86 doesn't have 8-bit conditional moves. Use a customInserter to
				479	// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
				480	// however that requires promoting the operands, and can induce additional
				481	// i8 register pressure.
				482	let usesCustomInserter = 1, Uses = [EFLAGS] in {
				483	def CMOV_GR8 : I<0, Pseudo,
				484	(outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
				485	"#CMOV_GR8 PSEUDO!",
				486	[(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
				487	imm:$cond, EFLAGS))]>;
				488
				489	let Predicates = [NoCMov] in {
				490	def CMOV_GR32 : I<0, Pseudo,
				491	(outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond),
				492	"#CMOV_GR32* PSEUDO!",
				493	[(set GR32:$dst,
				494	(X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>;
				495	def CMOV_GR16 : I<0, Pseudo,
				496	(outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond),
				497	"#CMOV_GR16* PSEUDO!",
				498	[(set GR16:$dst,
				499	(X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
				500	} // Predicates = [NoCMov]
				501
				502	// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no
				503	// SSE1.
				504	let Predicates = [FPStackf32] in
				505	def CMOV_RFP32 : I<0, Pseudo,
				506	(outs RFP32:$dst),
				507	(ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
				508	"#CMOV_RFP32 PSEUDO!",
				509	[(set RFP32:$dst,
				510	(X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
				511	EFLAGS))]>;
				512	// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no
				513	// SSE2.
				514	let Predicates = [FPStackf64] in
				515	def CMOV_RFP64 : I<0, Pseudo,
				516	(outs RFP64:$dst),
				517	(ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
				518	"#CMOV_RFP64 PSEUDO!",
				519	[(set RFP64:$dst,
				520	(X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
				521	EFLAGS))]>;
				522	def CMOV_RFP80 : I<0, Pseudo,
				523	(outs RFP80:$dst),
				524	(ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
				525	"#CMOV_RFP80 PSEUDO!",
				526	[(set RFP80:$dst,
				527	(X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
				528	EFLAGS))]>;
				529	} // UsesCustomInserter = 1, Uses = [EFLAGS]
				530
				531
				532	//===----------------------------------------------------------------------===//
				533	// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
				534	//===----------------------------------------------------------------------===//
				535
				536	// FIXME: Use normal instructions and add lock prefix dynamically.
				537
				538	// Memory barriers
				539
				540	// TODO: Get this to fold the constant into the instruction.
				541	let isCodeGenOnly = 1, Defs = [EFLAGS] in
				542	def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
				543	"or{l}\t{$zero, $dst\|$dst, $zero}",
				544	[], IIC_ALU_MEM>, Requires<[Not64BitMode]>, LOCK,
				545	Sched<[WriteALULd, WriteRMW]>;
				546
				547	let hasSideEffects = 1 in
				548	def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
				549	"#MEMBARRIER",
				550	[(X86MemBarrier)]>, Sched<[WriteLoad]>;
				551
				552	// RegOpc corresponds to the mr version of the instruction
				553	// ImmOpc corresponds to the mi version of the instruction
				554	// ImmOpc8 corresponds to the mi8 version of the instruction
				555	// ImmMod corresponds to the instruction format of the mi and mi8 versions
				556	multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
				557	Format ImmMod, string mnemonic> {
				558	let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
				559	SchedRW = [WriteALULd, WriteRMW] in {
				560
				561	def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
				562	RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
				563	MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
				564	!strconcat(mnemonic, "{b}\t",
				565	"{$src2, $dst\|$dst, $src2}"),
				566	[], IIC_ALU_NONMEM>, LOCK;
				567	def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
				568	RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
				569	MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
				570	!strconcat(mnemonic, "{w}\t",
				571	"{$src2, $dst\|$dst, $src2}"),
				572	[], IIC_ALU_NONMEM>, OpSize16, LOCK;
				573	def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
				574	RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
				575	MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
				576	!strconcat(mnemonic, "{l}\t",
				577	"{$src2, $dst\|$dst, $src2}"),
				578	[], IIC_ALU_NONMEM>, OpSize32, LOCK;
				579	def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
				580	RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
				581	MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
				582	!strconcat(mnemonic, "{q}\t",
				583	"{$src2, $dst\|$dst, $src2}"),
				584	[], IIC_ALU_NONMEM>, LOCK;
				585
				586	def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
				587	ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
				588	ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
				589	!strconcat(mnemonic, "{b}\t",
				590	"{$src2, $dst\|$dst, $src2}"),
				591	[], IIC_ALU_MEM>, LOCK;
				592
				593	def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
				594	ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
				595	ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
				596	!strconcat(mnemonic, "{w}\t",
				597	"{$src2, $dst\|$dst, $src2}"),
				598	[], IIC_ALU_MEM>, OpSize16, LOCK;
				599
				600	def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
				601	ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
				602	ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
				603	!strconcat(mnemonic, "{l}\t",
				604	"{$src2, $dst\|$dst, $src2}"),
				605	[], IIC_ALU_MEM>, OpSize32, LOCK;
				606
				607	def NAME#64mi32 : RIi32S<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
				608	ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
				609	ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
				610	!strconcat(mnemonic, "{q}\t",
				611	"{$src2, $dst\|$dst, $src2}"),
				612	[], IIC_ALU_MEM>, LOCK;
				613
				614	def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
				615	ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
				616	ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
				617	!strconcat(mnemonic, "{w}\t",
				618	"{$src2, $dst\|$dst, $src2}"),
				619	[], IIC_ALU_MEM>, OpSize16, LOCK;
				620	def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
				621	ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
				622	ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
				623	!strconcat(mnemonic, "{l}\t",
				624	"{$src2, $dst\|$dst, $src2}"),
				625	[], IIC_ALU_MEM>, OpSize32, LOCK;
				626	def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
				627	ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
				628	ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
				629	!strconcat(mnemonic, "{q}\t",
				630	"{$src2, $dst\|$dst, $src2}"),
				631	[], IIC_ALU_MEM>, LOCK;
				632
				633	}
				634
				635	}
				636
				637	defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">;
				638	defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">;
				639	defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">;
				640	defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">;
				641	defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">;
				642
				643	// Optimized codegen when the non-memory output is not used.
				644	multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
				645	string mnemonic> {
				646	let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
				647	SchedRW = [WriteALULd, WriteRMW] in {
				648
				649	def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
				650	!strconcat(mnemonic, "{b}\t$dst"),
				651	[], IIC_UNARY_MEM>, LOCK;
				652	def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
				653	!strconcat(mnemonic, "{w}\t$dst"),
				654	[], IIC_UNARY_MEM>, OpSize16, LOCK;
				655	def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
				656	!strconcat(mnemonic, "{l}\t$dst"),
				657	[], IIC_UNARY_MEM>, OpSize32, LOCK;
				658	def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
				659	!strconcat(mnemonic, "{q}\t$dst"),
				660	[], IIC_UNARY_MEM>, LOCK;
				661	}
				662	}
				663
				664	defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "inc">;
				665	defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "dec">;
				666
				667	// Atomic compare and swap.
				668	multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
				669	SDPatternOperator frag, X86MemOperand x86memop,
				670	InstrItinClass itin> {
				671	let isCodeGenOnly = 1 in {
				672	def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),
				673	!strconcat(mnemonic, "\t$ptr"),
				674	[(frag addr:$ptr)], itin>, TB, LOCK;
				675	}
				676	}
				677
				678	multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
				679	string mnemonic, SDPatternOperator frag,
				680	InstrItinClass itin8, InstrItinClass itin> {
				681	let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {
				682	let Defs = [AL, EFLAGS], Uses = [AL] in
				683	def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
				684	!strconcat(mnemonic, "{b}\t{$swap, $ptr\|$ptr, $swap}"),
				685	[(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;
				686	let Defs = [AX, EFLAGS], Uses = [AX] in
				687	def NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
				688	!strconcat(mnemonic, "{w}\t{$swap, $ptr\|$ptr, $swap}"),
				689	[(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize16, LOCK;
				690	let Defs = [EAX, EFLAGS], Uses = [EAX] in
				691	def NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
				692	!strconcat(mnemonic, "{l}\t{$swap, $ptr\|$ptr, $swap}"),
				693	[(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, OpSize32, LOCK;
				694	let Defs = [RAX, EFLAGS], Uses = [RAX] in
				695	def NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
				696	!strconcat(mnemonic, "{q}\t{$swap, $ptr\|$ptr, $swap}"),
				697	[(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;
				698	}
				699	}
				700
				701	let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
				702	SchedRW = [WriteALULd, WriteRMW] in {
				703	defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",
				704	X86cas8, i64mem,
				705	IIC_CMPX_LOCK_8B>;
				706	}
				707
				708	let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
				709	Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in {
				710	defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
				711	X86cas16, i128mem,
				712	IIC_CMPX_LOCK_16B>, REX_W;
				713	}
				714
				715	defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg",
				716	X86cas, IIC_CMPX_LOCK_8, IIC_CMPX_LOCK>;
				717
				718	// Atomic exchange and add
				719	multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
				720	string frag,
				721	InstrItinClass itin8, InstrItinClass itin> {
				722	let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1,
				723	SchedRW = [WriteALULd, WriteRMW] in {
				724	def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),
				725	(ins GR8:$val, i8mem:$ptr),
				726	!strconcat(mnemonic, "{b}\t{$val, $ptr\|$ptr, $val}"),
				727	[(set GR8:$dst,
				728	(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
				729	itin8>;
				730	def NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
				731	(ins GR16:$val, i16mem:$ptr),
				732	!strconcat(mnemonic, "{w}\t{$val, $ptr\|$ptr, $val}"),
				733	[(set
				734	GR16:$dst,
				735	(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
				736	itin>, OpSize16;
				737	def NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
				738	(ins GR32:$val, i32mem:$ptr),
				739	!strconcat(mnemonic, "{l}\t{$val, $ptr\|$ptr, $val}"),
				740	[(set
				741	GR32:$dst,
				742	(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
				743	itin>, OpSize32;
				744	def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
				745	(ins GR64:$val, i64mem:$ptr),
				746	!strconcat(mnemonic, "{q}\t{$val, $ptr\|$ptr, $val}"),
				747	[(set
				748	GR64:$dst,
				749	(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
				750	itin>;
				751	}
				752	}
				753
				754	defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",
				755	IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>,
				756	TB, LOCK;
				757
				758	/* The following multiclass tries to make sure that in code like
				759	* x.store (immediate op x.load(acquire), release)
				760	* an operation directly on memory is generated instead of wasting a register.
				761	* It is not automatic as atomic_store/load are only lowered to MOV instructions
				762	* extremely late to prevent them from being accidentally reordered in the backend
				763	* (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)
				764	*/
				765	multiclass RELEASE_BINOP_MI<string op> {
				766	def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
				767	"#RELEASE_BINOP PSEUDO!",
				768	[(atomic_store_8 addr:$dst, (!cast<PatFrag>(op)
				769	(atomic_load_8 addr:$dst), (i8 imm:$src)))]>;
				770	// NAME#16 is not generated as 16-bit arithmetic instructions are considered
				771	// costly and avoided as far as possible by this backend anyway
				772	def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
				773	"#RELEASE_BINOP PSEUDO!",
				774	[(atomic_store_32 addr:$dst, (!cast<PatFrag>(op)
				775	(atomic_load_32 addr:$dst), (i32 imm:$src)))]>;
				776	def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
				777	"#RELEASE_BINOP PSEUDO!",
				778	[(atomic_store_64 addr:$dst, (!cast<PatFrag>(op)
				779	(atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>;
				780	}
				781	defm RELEASE_ADD : RELEASE_BINOP_MI<"add">;
				782	defm RELEASE_AND : RELEASE_BINOP_MI<"and">;
				783	defm RELEASE_OR : RELEASE_BINOP_MI<"or">;
				784	defm RELEASE_XOR : RELEASE_BINOP_MI<"xor">;
				785	// Note: we don't deal with sub, because substractions of constants are
				786	// optimized into additions before this code can run
				787
				788	multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {
				789	def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst),
				790	"#RELEASE_UNOP PSEUDO!",
				791	[(atomic_store_8 addr:$dst, dag8)]>;
				792	def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst),
				793	"#RELEASE_UNOP PSEUDO!",
				794	[(atomic_store_16 addr:$dst, dag16)]>;
				795	def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst),
				796	"#RELEASE_UNOP PSEUDO!",
				797	[(atomic_store_32 addr:$dst, dag32)]>;
				798	def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst),
				799	"#RELEASE_UNOP PSEUDO!",
				800	[(atomic_store_64 addr:$dst, dag64)]>;
				801	}
				802
				803	defm RELEASE_INC : RELEASE_UNOP<
				804	(add (atomic_load_8 addr:$dst), (i8 1)),
				805	(add (atomic_load_16 addr:$dst), (i16 1)),
				806	(add (atomic_load_32 addr:$dst), (i32 1)),
				807	(add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;
				808	defm RELEASE_DEC : RELEASE_UNOP<
				809	(add (atomic_load_8 addr:$dst), (i8 -1)),
				810	(add (atomic_load_16 addr:$dst), (i16 -1)),
				811	(add (atomic_load_32 addr:$dst), (i32 -1)),
				812	(add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;
				813	/*
				814	TODO: These don't work because the type inference of TableGen fails.
				815	TODO: find a way to fix it.
				816	defm RELEASE_NEG : RELEASE_UNOP<
				817	(ineg (atomic_load_8 addr:$dst)),
				818	(ineg (atomic_load_16 addr:$dst)),
				819	(ineg (atomic_load_32 addr:$dst)),
				820	(ineg (atomic_load_64 addr:$dst))>;
				821	defm RELEASE_NOT : RELEASE_UNOP<
				822	(not (atomic_load_8 addr:$dst)),
				823	(not (atomic_load_16 addr:$dst)),
				824	(not (atomic_load_32 addr:$dst)),
				825	(not (atomic_load_64 addr:$dst))>;
				826	*/
				827
				828	def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
				829	"#RELEASE_MOV PSEUDO !",
				830	[(atomic_store_8 addr:$dst, (i8 imm:$src))]>;
				831	def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),
				832	"#RELEASE_MOV PSEUDO !",
				833	[(atomic_store_16 addr:$dst, (i16 imm:$src))]>;
				834	def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
				835	"#RELEASE_MOV PSEUDO !",
				836	[(atomic_store_32 addr:$dst, (i32 imm:$src))]>;
				837	def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
				838	"#RELEASE_MOV PSEUDO !",
				839	[(atomic_store_64 addr:$dst, i64immSExt32:$src)]>;
				840
				841	def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
				842	"#RELEASE_MOV PSEUDO!",
				843	[(atomic_store_8 addr:$dst, GR8 :$src)]>;
				844	def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
				845	"#RELEASE_MOV PSEUDO!",
				846	[(atomic_store_16 addr:$dst, GR16:$src)]>;
				847	def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
				848	"#RELEASE_MOV PSEUDO!",
				849	[(atomic_store_32 addr:$dst, GR32:$src)]>;
				850	def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
				851	"#RELEASE_MOV PSEUDO!",
				852	[(atomic_store_64 addr:$dst, GR64:$src)]>;
				853
				854	def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
				855	"#ACQUIRE_MOV PSEUDO!",
				856	[(set GR8:$dst, (atomic_load_8 addr:$src))]>;
				857	def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
				858	"#ACQUIRE_MOV PSEUDO!",
				859	[(set GR16:$dst, (atomic_load_16 addr:$src))]>;
				860	def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
				861	"#ACQUIRE_MOV PSEUDO!",
				862	[(set GR32:$dst, (atomic_load_32 addr:$src))]>;
				863	def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
				864	"#ACQUIRE_MOV PSEUDO!",
				865	[(set GR64:$dst, (atomic_load_64 addr:$src))]>;
				866	//===----------------------------------------------------------------------===//
				867	// Conditional Move Pseudo Instructions.
				868	//===----------------------------------------------------------------------===//
				869
				870	// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after
				871	// instruction selection into a branch sequence.
				872	let Uses = [EFLAGS], usesCustomInserter = 1 in {
				873	def CMOV_FR32 : I<0, Pseudo,
				874	(outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
				875	"#CMOV_FR32 PSEUDO!",
				876	[(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
				877	EFLAGS))]>;
				878	def CMOV_FR64 : I<0, Pseudo,
				879	(outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
				880	"#CMOV_FR64 PSEUDO!",
				881	[(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
				882	EFLAGS))]>;
				883	def CMOV_V4F32 : I<0, Pseudo,
				884	(outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
				885	"#CMOV_V4F32 PSEUDO!",
				886	[(set VR128:$dst,
				887	(v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
				888	EFLAGS)))]>;
				889	def CMOV_V2F64 : I<0, Pseudo,
				890	(outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
				891	"#CMOV_V2F64 PSEUDO!",
				892	[(set VR128:$dst,
				893	(v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
				894	EFLAGS)))]>;
				895	def CMOV_V2I64 : I<0, Pseudo,
				896	(outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
				897	"#CMOV_V2I64 PSEUDO!",
				898	[(set VR128:$dst,
				899	(v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
				900	EFLAGS)))]>;
				901	def CMOV_V8F32 : I<0, Pseudo,
				902	(outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
				903	"#CMOV_V8F32 PSEUDO!",
				904	[(set VR256:$dst,
				905	(v8f32 (X86cmov VR256:$t, VR256:$f, imm:$cond,
				906	EFLAGS)))]>;
				907	def CMOV_V4F64 : I<0, Pseudo,
				908	(outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
				909	"#CMOV_V4F64 PSEUDO!",
				910	[(set VR256:$dst,
				911	(v4f64 (X86cmov VR256:$t, VR256:$f, imm:$cond,
				912	EFLAGS)))]>;
				913	def CMOV_V4I64 : I<0, Pseudo,
				914	(outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
				915	"#CMOV_V4I64 PSEUDO!",
				916	[(set VR256:$dst,
				917	(v4i64 (X86cmov VR256:$t, VR256:$f, imm:$cond,
				918	EFLAGS)))]>;
				919	def CMOV_V8I64 : I<0, Pseudo,
				920	(outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
				921	"#CMOV_V8I64 PSEUDO!",
				922	[(set VR512:$dst,
				923	(v8i64 (X86cmov VR512:$t, VR512:$f, imm:$cond,
				924	EFLAGS)))]>;
				925	def CMOV_V8F64 : I<0, Pseudo,
				926	(outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
				927	"#CMOV_V8F64 PSEUDO!",
				928	[(set VR512:$dst,
				929	(v8f64 (X86cmov VR512:$t, VR512:$f, imm:$cond,
				930	EFLAGS)))]>;
				931	def CMOV_V16F32 : I<0, Pseudo,
				932	(outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
				933	"#CMOV_V16F32 PSEUDO!",
				934	[(set VR512:$dst,
				935	(v16f32 (X86cmov VR512:$t, VR512:$f, imm:$cond,
				936	EFLAGS)))]>;
				937	}
				938
				939
				940	//===----------------------------------------------------------------------===//
				941	// DAG Pattern Matching Rules
				942	//===----------------------------------------------------------------------===//
				943
				944	// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
				945	def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
				946	def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
				947	def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
				948	def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
				949	def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
				950	def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
				951
				952	def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
				953	(ADD32ri GR32:$src1, tconstpool:$src2)>;
				954	def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
				955	(ADD32ri GR32:$src1, tjumptable:$src2)>;
				956	def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
				957	(ADD32ri GR32:$src1, tglobaladdr:$src2)>;
				958	def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
				959	(ADD32ri GR32:$src1, texternalsym:$src2)>;
				960	def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
				961	(ADD32ri GR32:$src1, tblockaddress:$src2)>;
				962
				963	def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
				964	(MOV32mi addr:$dst, tglobaladdr:$src)>;
				965	def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
				966	(MOV32mi addr:$dst, texternalsym:$src)>;
				967	def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
				968	(MOV32mi addr:$dst, tblockaddress:$src)>;
				969
				970	// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
				971	// code model mode, should use 'movabs'. FIXME: This is really a hack, the
				972	// 'movabs' predicate should handle this sort of thing.
				973	def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
				974	(MOV64ri tconstpool :$dst)>, Requires<[FarData]>;
				975	def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
				976	(MOV64ri tjumptable :$dst)>, Requires<[FarData]>;
				977	def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
				978	(MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
				979	def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
				980	(MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
				981	def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
				982	(MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
				983
				984	// In kernel code model, we can get the address of a label
				985	// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of
				986	// the MOV64ri32 should accept these.
				987	def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
				988	(MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
				989	def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
				990	(MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
				991	def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
				992	(MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
				993	def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
				994	(MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
				995	def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
				996	(MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
				997
				998	// If we have small model and -static mode, it is safe to store global addresses
				999	// directly as immediates. FIXME: This is really a hack, the 'imm' predicate
				1000	// for MOV64mi32 should handle this sort of thing.
				1001	def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
				1002	(MOV64mi32 addr:$dst, tconstpool:$src)>,
				1003	Requires<[NearData, IsStatic]>;
				1004	def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
				1005	(MOV64mi32 addr:$dst, tjumptable:$src)>,
				1006	Requires<[NearData, IsStatic]>;
				1007	def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
				1008	(MOV64mi32 addr:$dst, tglobaladdr:$src)>,
				1009	Requires<[NearData, IsStatic]>;
				1010	def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
				1011	(MOV64mi32 addr:$dst, texternalsym:$src)>,
				1012	Requires<[NearData, IsStatic]>;
				1013	def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
				1014	(MOV64mi32 addr:$dst, tblockaddress:$src)>,
				1015	Requires<[NearData, IsStatic]>;
				1016
				1017	def : Pat<(i32 (X86RecoverFrameAlloc texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
				1018	def : Pat<(i64 (X86RecoverFrameAlloc texternalsym:$dst)), (MOV64ri texternalsym:$dst)>;
				1019
				1020	// Calls
				1021
				1022	// tls has some funny stuff here...
				1023	// This corresponds to movabs $foo@tpoff, %rax
				1024	def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
				1025	(MOV64ri32 tglobaltlsaddr :$dst)>;
				1026	// This corresponds to add $foo@tpoff, %rax
				1027	def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
				1028	(ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
				1029
				1030
				1031	// Direct PC relative function call for small code model. 32-bit displacement
				1032	// sign extended to 64-bit.
				1033	def : Pat<(X86call (i64 tglobaladdr:$dst)),
				1034	(CALL64pcrel32 tglobaladdr:$dst)>;
				1035	def : Pat<(X86call (i64 texternalsym:$dst)),
				1036	(CALL64pcrel32 texternalsym:$dst)>;
				1037
				1038	// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they
				1039	// can never use callee-saved registers. That is the purpose of the GR64_TC
				1040	// register classes.
				1041	//
				1042	// The only volatile register that is never used by the calling convention is
				1043	// %r11. This happens when calling a vararg function with 6 arguments.
				1044	//
				1045	// Match an X86tcret that uses less than 7 volatile registers.
				1046	def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
				1047	(X86tcret node:$ptr, node:$off), [{
				1048	// X86tcret args: (*chain, ptr, imm, regs..., glue)
				1049	unsigned NumRegs = 0;
				1050	for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i)
				1051	if (isa<RegisterSDNode>(N->getOperand(i)) && ++NumRegs > 6)
				1052	return false;
				1053	return true;
				1054	}]>;
				1055
				1056	def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
				1057	(TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
				1058	Requires<[Not64BitMode]>;
				1059
				1060	// FIXME: This is disabled for 32-bit PIC mode because the global base
				1061	// register which is part of the address mode may be assigned a
				1062	// callee-saved register.
				1063	def : Pat<(X86tcret (load addr:$dst), imm:$off),
				1064	(TCRETURNmi addr:$dst, imm:$off)>,
				1065	Requires<[Not64BitMode, IsNotPIC]>;
				1066
				1067	def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
				1068	(TCRETURNdi tglobaladdr:$dst, imm:$off)>,
				1069	Requires<[NotLP64]>;
				1070
				1071	def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
				1072	(TCRETURNdi texternalsym:$dst, imm:$off)>,
				1073	Requires<[NotLP64]>;
				1074
				1075	def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
				1076	(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
				1077	Requires<[In64BitMode]>;
				1078
				1079	// Don't fold loads into X86tcret requiring more than 6 regs.
				1080	// There wouldn't be enough scratch registers for base+index.
				1081	def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
				1082	(TCRETURNmi64 addr:$dst, imm:$off)>,
				1083	Requires<[In64BitMode]>;
				1084
				1085	def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
				1086	(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
				1087	Requires<[IsLP64]>;
				1088
				1089	def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
				1090	(TCRETURNdi64 texternalsym:$dst, imm:$off)>,
				1091	Requires<[IsLP64]>;
				1092
				1093	// Normal calls, with various flavors of addresses.
				1094	def : Pat<(X86call (i32 tglobaladdr:$dst)),
				1095	(CALLpcrel32 tglobaladdr:$dst)>;
				1096	def : Pat<(X86call (i32 texternalsym:$dst)),
				1097	(CALLpcrel32 texternalsym:$dst)>;
				1098	def : Pat<(X86call (i32 imm:$dst)),
				1099	(CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
				1100
				1101	// Comparisons.
				1102
				1103	// TEST R,R is smaller than CMP R,0
				1104	def : Pat<(X86cmp GR8:$src1, 0),
				1105	(TEST8rr GR8:$src1, GR8:$src1)>;
				1106	def : Pat<(X86cmp GR16:$src1, 0),
				1107	(TEST16rr GR16:$src1, GR16:$src1)>;
				1108	def : Pat<(X86cmp GR32:$src1, 0),
				1109	(TEST32rr GR32:$src1, GR32:$src1)>;
				1110	def : Pat<(X86cmp GR64:$src1, 0),
				1111	(TEST64rr GR64:$src1, GR64:$src1)>;
				1112
				1113	// Conditional moves with folded loads with operands swapped and conditions
				1114	// inverted.
				1115	multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
				1116	Instruction Inst64> {
				1117	let Predicates = [HasCMov] in {
				1118	def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
				1119	(Inst16 GR16:$src2, addr:$src1)>;
				1120	def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
				1121	(Inst32 GR32:$src2, addr:$src1)>;
				1122	def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
				1123	(Inst64 GR64:$src2, addr:$src1)>;
				1124	}
				1125	}
				1126
				1127	defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
				1128	defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;
				1129	defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;
				1130	defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;
				1131	defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;
				1132	defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;
				1133	defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;
				1134	defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;
				1135	defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;
				1136	defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;
				1137	defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;
				1138	defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;
				1139	defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;
				1140	defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;
				1141	defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
				1142	defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
				1143
				1144	// zextload bool -> zextload byte
				1145	def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
				1146	def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
				1147	def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
				1148	def : Pat<(zextloadi64i1 addr:$src),
				1149	(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
				1150
				1151	// extload bool -> extload byte
				1152	// When extloading from 16-bit and smaller memory locations into 64-bit
				1153	// registers, use zero-extending loads so that the entire 64-bit register is
				1154	// defined, avoiding partial-register updates.
				1155
				1156	def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
				1157	def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
				1158	def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
				1159	def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
				1160	def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
				1161	def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
				1162
				1163	// For other extloads, use subregs, since the high contents of the register are
				1164	// defined after an extload.
				1165	def : Pat<(extloadi64i1 addr:$src),
				1166	(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
				1167	def : Pat<(extloadi64i8 addr:$src),
				1168	(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
				1169	def : Pat<(extloadi64i16 addr:$src),
				1170	(SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
				1171	def : Pat<(extloadi64i32 addr:$src),
				1172	(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
				1173
				1174	// anyext. Define these to do an explicit zero-extend to
				1175	// avoid partial-register updates.
				1176	def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG
				1177	(MOVZX32rr8 GR8 :$src), sub_16bit)>;
				1178	def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
				1179
				1180	// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
				1181	def : Pat<(i32 (anyext GR16:$src)),
				1182	(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
				1183
				1184	def : Pat<(i64 (anyext GR8 :$src)),
				1185	(SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8 :$src), sub_32bit)>;
				1186	def : Pat<(i64 (anyext GR16:$src)),
				1187	(SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>;
				1188	def : Pat<(i64 (anyext GR32:$src)),
				1189	(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
				1190
				1191
				1192	// Any instruction that defines a 32-bit result leaves the high half of the
				1193	// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
				1194	// be copying from a truncate. And x86's cmov doesn't do anything if the
				1195	// condition is false. But any other 32-bit operation will zero-extend
				1196	// up to 64 bits.
				1197	def def32 : PatLeaf<(i32 GR32:$src), [{
				1198	return N->getOpcode() != ISD::TRUNCATE &&
				1199	N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
				1200	N->getOpcode() != ISD::CopyFromReg &&
				1201	N->getOpcode() != ISD::AssertSext &&
				1202	N->getOpcode() != X86ISD::CMOV;
				1203	}]>;
				1204
				1205	// In the case of a 32-bit def that is known to implicitly zero-extend,
				1206	// we can use a SUBREG_TO_REG.
				1207	def : Pat<(i64 (zext def32:$src)),
				1208	(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
				1209
				1210	//===----------------------------------------------------------------------===//
				1211	// Pattern match OR as ADD
				1212	//===----------------------------------------------------------------------===//
				1213
				1214	// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be
				1215	// 3-addressified into an LEA instruction to avoid copies. However, we also
				1216	// want to finally emit these instructions as an or at the end of the code
				1217	// generator to make the generated code easier to read. To do this, we select
				1218	// into "disjoint bits" pseudo ops.
				1219
				1220	// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
				1221	def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
				1222	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
				1223	return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
				1224
				1225	APInt KnownZero0, KnownOne0;
				1226	CurDAG->computeKnownBits(N->getOperand(0), KnownZero0, KnownOne0, 0);
				1227	APInt KnownZero1, KnownOne1;
				1228	CurDAG->computeKnownBits(N->getOperand(1), KnownZero1, KnownOne1, 0);
				1229	return (~KnownZero0 & ~KnownZero1) == 0;
				1230	}]>;
				1231
				1232
				1233	// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
				1234	// Try this before the selecting to OR.
				1235	let AddedComplexity = 5, SchedRW = [WriteALU] in {
				1236
				1237	let isConvertibleToThreeAddress = 1,
				1238	Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
				1239	let isCommutable = 1 in {
				1240	def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
				1241	"", // orw/addw REG, REG
				1242	[(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
				1243	def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
				1244	"", // orl/addl REG, REG
				1245	[(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;
				1246	def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
				1247	"", // orq/addq REG, REG
				1248	[(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
				1249	} // isCommutable
				1250
				1251	// NOTE: These are order specific, we want the ri8 forms to be listed
				1252	// first so that they are slightly preferred to the ri forms.
				1253
				1254	def ADD16ri8_DB : I<0, Pseudo,
				1255	(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
				1256	"", // orw/addw REG, imm8
				1257	[(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;
				1258	def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
				1259	"", // orw/addw REG, imm
				1260	[(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;
				1261
				1262	def ADD32ri8_DB : I<0, Pseudo,
				1263	(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
				1264	"", // orl/addl REG, imm8
				1265	[(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;
				1266	def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
				1267	"", // orl/addl REG, imm
				1268	[(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;
				1269
				1270
				1271	def ADD64ri8_DB : I<0, Pseudo,
				1272	(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
				1273	"", // orq/addq REG, imm8
				1274	[(set GR64:$dst, (or_is_add GR64:$src1,
				1275	i64immSExt8:$src2))]>;
				1276	def ADD64ri32_DB : I<0, Pseudo,
				1277	(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
				1278	"", // orq/addq REG, imm
				1279	[(set GR64:$dst, (or_is_add GR64:$src1,
				1280	i64immSExt32:$src2))]>;
				1281	}
				1282	} // AddedComplexity, SchedRW
				1283
				1284
				1285	//===----------------------------------------------------------------------===//
				1286	// Some peepholes
				1287	//===----------------------------------------------------------------------===//
				1288
				1289	// Odd encoding trick: -128 fits into an 8-bit immediate field while
				1290	// +128 doesn't, so in this special case use a sub instead of an add.
				1291	def : Pat<(add GR16:$src1, 128),
				1292	(SUB16ri8 GR16:$src1, -128)>;
				1293	def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
				1294	(SUB16mi8 addr:$dst, -128)>;
				1295
				1296	def : Pat<(add GR32:$src1, 128),
				1297	(SUB32ri8 GR32:$src1, -128)>;
				1298	def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
				1299	(SUB32mi8 addr:$dst, -128)>;
				1300
				1301	def : Pat<(add GR64:$src1, 128),
				1302	(SUB64ri8 GR64:$src1, -128)>;
				1303	def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
				1304	(SUB64mi8 addr:$dst, -128)>;
				1305
				1306	// The same trick applies for 32-bit immediate fields in 64-bit
				1307	// instructions.
				1308	def : Pat<(add GR64:$src1, 0x0000000080000000),
				1309	(SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
				1310	def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
				1311	(SUB64mi32 addr:$dst, 0xffffffff80000000)>;
				1312
				1313	// To avoid needing to materialize an immediate in a register, use a 32-bit and
				1314	// with implicit zero-extension instead of a 64-bit and if the immediate has at
				1315	// least 32 bits of leading zeros. If in addition the last 32 bits can be
				1316	// represented with a sign extension of a 8 bit constant, use that.
				1317
				1318	def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
				1319	(SUBREG_TO_REG
				1320	(i64 0),
				1321	(AND32ri8
				1322	(EXTRACT_SUBREG GR64:$src, sub_32bit),
				1323	(i32 (GetLo8XForm imm:$imm))),
				1324	sub_32bit)>;
				1325
				1326	def : Pat<(and GR64:$src, i64immZExt32:$imm),
				1327	(SUBREG_TO_REG
				1328	(i64 0),
				1329	(AND32ri
				1330	(EXTRACT_SUBREG GR64:$src, sub_32bit),
				1331	(i32 (GetLo32XForm imm:$imm))),
				1332	sub_32bit)>;
				1333
				1334
				1335	// r & (2^16-1) ==> movz
				1336	def : Pat<(and GR32:$src1, 0xffff),
				1337	(MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
				1338	// r & (2^8-1) ==> movz
				1339	def : Pat<(and GR32:$src1, 0xff),
				1340	(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
				1341	GR32_ABCD)),
				1342	sub_8bit))>,
				1343	Requires<[Not64BitMode]>;
				1344	// r & (2^8-1) ==> movz
				1345	def : Pat<(and GR16:$src1, 0xff),
				1346	(EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG
				1347	(i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)),
				1348	sub_16bit)>,
				1349	Requires<[Not64BitMode]>;
				1350
				1351	// r & (2^32-1) ==> movz
				1352	def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
				1353	(SUBREG_TO_REG (i64 0),
				1354	(MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
				1355	sub_32bit)>;
				1356	// r & (2^16-1) ==> movz
				1357	def : Pat<(and GR64:$src, 0xffff),
				1358	(SUBREG_TO_REG (i64 0),
				1359	(MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
				1360	sub_32bit)>;
				1361	// r & (2^8-1) ==> movz
				1362	def : Pat<(and GR64:$src, 0xff),
				1363	(SUBREG_TO_REG (i64 0),
				1364	(MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))),
				1365	sub_32bit)>;
				1366	// r & (2^8-1) ==> movz
				1367	def : Pat<(and GR32:$src1, 0xff),
				1368	(MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
				1369	Requires<[In64BitMode]>;
				1370	// r & (2^8-1) ==> movz
				1371	def : Pat<(and GR16:$src1, 0xff),
				1372	(EXTRACT_SUBREG (MOVZX32rr8 (i8
				1373	(EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,
				1374	Requires<[In64BitMode]>;
				1375
				1376
				1377	// sext_inreg patterns
				1378	def : Pat<(sext_inreg GR32:$src, i16),
				1379	(MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
				1380	def : Pat<(sext_inreg GR32:$src, i8),
				1381	(MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
				1382	GR32_ABCD)),
				1383	sub_8bit))>,
				1384	Requires<[Not64BitMode]>;
				1385
				1386	def : Pat<(sext_inreg GR16:$src, i8),
				1387	(EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG
				1388	(i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))),
				1389	sub_16bit)>,
				1390	Requires<[Not64BitMode]>;
				1391
				1392	def : Pat<(sext_inreg GR64:$src, i32),
				1393	(MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
				1394	def : Pat<(sext_inreg GR64:$src, i16),
				1395	(MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
				1396	def : Pat<(sext_inreg GR64:$src, i8),
				1397	(MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
				1398	def : Pat<(sext_inreg GR32:$src, i8),
				1399	(MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
				1400	Requires<[In64BitMode]>;
				1401	def : Pat<(sext_inreg GR16:$src, i8),
				1402	(EXTRACT_SUBREG (MOVSX32rr8
				1403	(EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>,
				1404	Requires<[In64BitMode]>;
				1405
				1406	// sext, sext_load, zext, zext_load
				1407	def: Pat<(i16 (sext GR8:$src)),
				1408	(EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>;
				1409	def: Pat<(sextloadi16i8 addr:$src),
				1410	(EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>;
				1411	def: Pat<(i16 (zext GR8:$src)),
				1412	(EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>;
				1413	def: Pat<(zextloadi16i8 addr:$src),
				1414	(EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
				1415
				1416	// trunc patterns
				1417	def : Pat<(i16 (trunc GR32:$src)),
				1418	(EXTRACT_SUBREG GR32:$src, sub_16bit)>;
				1419	def : Pat<(i8 (trunc GR32:$src)),
				1420	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
				1421	sub_8bit)>,
				1422	Requires<[Not64BitMode]>;
				1423	def : Pat<(i8 (trunc GR16:$src)),
				1424	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1425	sub_8bit)>,
				1426	Requires<[Not64BitMode]>;
				1427	def : Pat<(i32 (trunc GR64:$src)),
				1428	(EXTRACT_SUBREG GR64:$src, sub_32bit)>;
				1429	def : Pat<(i16 (trunc GR64:$src)),
				1430	(EXTRACT_SUBREG GR64:$src, sub_16bit)>;
				1431	def : Pat<(i8 (trunc GR64:$src)),
				1432	(EXTRACT_SUBREG GR64:$src, sub_8bit)>;
				1433	def : Pat<(i8 (trunc GR32:$src)),
				1434	(EXTRACT_SUBREG GR32:$src, sub_8bit)>,
				1435	Requires<[In64BitMode]>;
				1436	def : Pat<(i8 (trunc GR16:$src)),
				1437	(EXTRACT_SUBREG GR16:$src, sub_8bit)>,
				1438	Requires<[In64BitMode]>;
				1439
				1440	// h-register tricks
				1441	def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
				1442	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1443	sub_8bit_hi)>,
				1444	Requires<[Not64BitMode]>;
				1445	def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
				1446	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
				1447	sub_8bit_hi)>,
				1448	Requires<[Not64BitMode]>;
				1449	def : Pat<(srl GR16:$src, (i8 8)),
				1450	(EXTRACT_SUBREG
				1451	(MOVZX32rr8
				1452	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1453	sub_8bit_hi)),
				1454	sub_16bit)>,
				1455	Requires<[Not64BitMode]>;
				1456	def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
				1457	(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
				1458	GR16_ABCD)),
				1459	sub_8bit_hi))>,
				1460	Requires<[Not64BitMode]>;
				1461	def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
				1462	(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
				1463	GR16_ABCD)),
				1464	sub_8bit_hi))>,
				1465	Requires<[Not64BitMode]>;
				1466	def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
				1467	(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
				1468	GR32_ABCD)),
				1469	sub_8bit_hi))>,
				1470	Requires<[Not64BitMode]>;
				1471	def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
				1472	(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
				1473	GR32_ABCD)),
				1474	sub_8bit_hi))>,
				1475	Requires<[Not64BitMode]>;
				1476
				1477	// h-register tricks.
				1478	// For now, be conservative on x86-64 and use an h-register extract only if the
				1479	// value is immediately zero-extended or stored, which are somewhat common
				1480	// cases. This uses a bunch of code to prevent a register requiring a REX prefix
				1481	// from being allocated in the same instruction as the h register, as there's
				1482	// currently no way to describe this requirement to the register allocator.
				1483
				1484	// h-register extract and zero-extend.
				1485	def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
				1486	(SUBREG_TO_REG
				1487	(i64 0),
				1488	(MOVZX32_NOREXrr8
				1489	(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
				1490	sub_8bit_hi)),
				1491	sub_32bit)>;
				1492	def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
				1493	(MOVZX32_NOREXrr8
				1494	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
				1495	sub_8bit_hi))>,
				1496	Requires<[In64BitMode]>;
				1497	def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
				1498	(MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
				1499	GR32_ABCD)),
				1500	sub_8bit_hi))>,
				1501	Requires<[In64BitMode]>;
				1502	def : Pat<(srl GR16:$src, (i8 8)),
				1503	(EXTRACT_SUBREG
				1504	(MOVZX32_NOREXrr8
				1505	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1506	sub_8bit_hi)),
				1507	sub_16bit)>,
				1508	Requires<[In64BitMode]>;
				1509	def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
				1510	(MOVZX32_NOREXrr8
				1511	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1512	sub_8bit_hi))>,
				1513	Requires<[In64BitMode]>;
				1514	def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
				1515	(MOVZX32_NOREXrr8
				1516	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1517	sub_8bit_hi))>,
				1518	Requires<[In64BitMode]>;
				1519	def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
				1520	(SUBREG_TO_REG
				1521	(i64 0),
				1522	(MOVZX32_NOREXrr8
				1523	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1524	sub_8bit_hi)),
				1525	sub_32bit)>;
				1526	def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
				1527	(SUBREG_TO_REG
				1528	(i64 0),
				1529	(MOVZX32_NOREXrr8
				1530	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1531	sub_8bit_hi)),
				1532	sub_32bit)>;
				1533
				1534	// h-register extract and store.
				1535	def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
				1536	(MOV8mr_NOREX
				1537	addr:$dst,
				1538	(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
				1539	sub_8bit_hi))>;
				1540	def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
				1541	(MOV8mr_NOREX
				1542	addr:$dst,
				1543	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
				1544	sub_8bit_hi))>,
				1545	Requires<[In64BitMode]>;
				1546	def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
				1547	(MOV8mr_NOREX
				1548	addr:$dst,
				1549	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1550	sub_8bit_hi))>,
				1551	Requires<[In64BitMode]>;
				1552
				1553
				1554	// (shl x, 1) ==> (add x, x)
				1555	// Note that if x is undef (immediate or otherwise), we could theoretically
				1556	// end up with the two uses of x getting different values, producing a result
				1557	// where the least significant bit is not 0. However, the probability of this
				1558	// happening is considered low enough that this is officially not a
				1559	// "real problem".
				1560	def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
				1561	def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
				1562	def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
				1563	def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
				1564
				1565	// Helper imms that check if a mask doesn't change significant shift bits.
				1566	def immShift32 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 5; }]>;
				1567	def immShift64 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 6; }]>;
				1568
				1569	// Shift amount is implicitly masked.
				1570	multiclass MaskedShiftAmountPats<SDNode frag, string name> {
				1571	// (shift x (and y, 31)) ==> (shift x, y)
				1572	def : Pat<(frag GR8:$src1, (and CL, immShift32)),
				1573	(!cast<Instruction>(name # "8rCL") GR8:$src1)>;
				1574	def : Pat<(frag GR16:$src1, (and CL, immShift32)),
				1575	(!cast<Instruction>(name # "16rCL") GR16:$src1)>;
				1576	def : Pat<(frag GR32:$src1, (and CL, immShift32)),
				1577	(!cast<Instruction>(name # "32rCL") GR32:$src1)>;
				1578	def : Pat<(store (frag (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
				1579	(!cast<Instruction>(name # "8mCL") addr:$dst)>;
				1580	def : Pat<(store (frag (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
				1581	(!cast<Instruction>(name # "16mCL") addr:$dst)>;
				1582	def : Pat<(store (frag (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
				1583	(!cast<Instruction>(name # "32mCL") addr:$dst)>;
				1584
				1585	// (shift x (and y, 63)) ==> (shift x, y)
				1586	def : Pat<(frag GR64:$src1, (and CL, immShift64)),
				1587	(!cast<Instruction>(name # "64rCL") GR64:$src1)>;
				1588	def : Pat<(store (frag (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
				1589	(!cast<Instruction>(name # "64mCL") addr:$dst)>;
				1590	}
				1591
				1592	defm : MaskedShiftAmountPats<shl, "SHL">;
				1593	defm : MaskedShiftAmountPats<srl, "SHR">;
				1594	defm : MaskedShiftAmountPats<sra, "SAR">;
				1595	defm : MaskedShiftAmountPats<rotl, "ROL">;
				1596	defm : MaskedShiftAmountPats<rotr, "ROR">;
				1597
				1598	// (anyext (setcc_carry)) -> (setcc_carry)
				1599	def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				1600	(SETB_C16r)>;
				1601	def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				1602	(SETB_C32r)>;
				1603	def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
				1604	(SETB_C32r)>;
				1605
				1606
				1607
				1608
				1609	//===----------------------------------------------------------------------===//
				1610	// EFLAGS-defining Patterns
				1611	//===----------------------------------------------------------------------===//
				1612
				1613	// add reg, reg
				1614	def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
				1615	def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
				1616	def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
				1617
				1618	// add reg, mem
				1619	def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
				1620	(ADD8rm GR8:$src1, addr:$src2)>;
				1621	def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
				1622	(ADD16rm GR16:$src1, addr:$src2)>;
				1623	def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
				1624	(ADD32rm GR32:$src1, addr:$src2)>;
				1625
				1626	// add reg, imm
				1627	def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
				1628	def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
				1629	def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
				1630	def : Pat<(add GR16:$src1, i16immSExt8:$src2),
				1631	(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1632	def : Pat<(add GR32:$src1, i32immSExt8:$src2),
				1633	(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1634
				1635	// sub reg, reg
				1636	def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
				1637	def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
				1638	def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
				1639
				1640	// sub reg, mem
				1641	def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
				1642	(SUB8rm GR8:$src1, addr:$src2)>;
				1643	def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
				1644	(SUB16rm GR16:$src1, addr:$src2)>;
				1645	def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
				1646	(SUB32rm GR32:$src1, addr:$src2)>;
				1647
				1648	// sub reg, imm
				1649	def : Pat<(sub GR8:$src1, imm:$src2),
				1650	(SUB8ri GR8:$src1, imm:$src2)>;
				1651	def : Pat<(sub GR16:$src1, imm:$src2),
				1652	(SUB16ri GR16:$src1, imm:$src2)>;
				1653	def : Pat<(sub GR32:$src1, imm:$src2),
				1654	(SUB32ri GR32:$src1, imm:$src2)>;
				1655	def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
				1656	(SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1657	def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
				1658	(SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1659
				1660	// sub 0, reg
				1661	def : Pat<(X86sub_flag 0, GR8 :$src), (NEG8r GR8 :$src)>;
				1662	def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>;
				1663	def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>;
				1664	def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>;
				1665
				1666	// mul reg, reg
				1667	def : Pat<(mul GR16:$src1, GR16:$src2),
				1668	(IMUL16rr GR16:$src1, GR16:$src2)>;
				1669	def : Pat<(mul GR32:$src1, GR32:$src2),
				1670	(IMUL32rr GR32:$src1, GR32:$src2)>;
				1671
				1672	// mul reg, mem
				1673	def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
				1674	(IMUL16rm GR16:$src1, addr:$src2)>;
				1675	def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
				1676	(IMUL32rm GR32:$src1, addr:$src2)>;
				1677
				1678	// mul reg, imm
				1679	def : Pat<(mul GR16:$src1, imm:$src2),
				1680	(IMUL16rri GR16:$src1, imm:$src2)>;
				1681	def : Pat<(mul GR32:$src1, imm:$src2),
				1682	(IMUL32rri GR32:$src1, imm:$src2)>;
				1683	def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
				1684	(IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
				1685	def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
				1686	(IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
				1687
				1688	// reg = mul mem, imm
				1689	def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
				1690	(IMUL16rmi addr:$src1, imm:$src2)>;
				1691	def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
				1692	(IMUL32rmi addr:$src1, imm:$src2)>;
				1693	def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
				1694	(IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
				1695	def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
				1696	(IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
				1697
				1698	// Patterns for nodes that do not produce flags, for instructions that do.
				1699
				1700	// addition
				1701	def : Pat<(add GR64:$src1, GR64:$src2),
				1702	(ADD64rr GR64:$src1, GR64:$src2)>;
				1703	def : Pat<(add GR64:$src1, i64immSExt8:$src2),
				1704	(ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1705	def : Pat<(add GR64:$src1, i64immSExt32:$src2),
				1706	(ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1707	def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
				1708	(ADD64rm GR64:$src1, addr:$src2)>;
				1709
				1710	// subtraction
				1711	def : Pat<(sub GR64:$src1, GR64:$src2),
				1712	(SUB64rr GR64:$src1, GR64:$src2)>;
				1713	def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
				1714	(SUB64rm GR64:$src1, addr:$src2)>;
				1715	def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
				1716	(SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1717	def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
				1718	(SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1719
				1720	// Multiply
				1721	def : Pat<(mul GR64:$src1, GR64:$src2),
				1722	(IMUL64rr GR64:$src1, GR64:$src2)>;
				1723	def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
				1724	(IMUL64rm GR64:$src1, addr:$src2)>;
				1725	def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
				1726	(IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
				1727	def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
				1728	(IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
				1729	def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
				1730	(IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
				1731	def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
				1732	(IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
				1733
				1734	// Increment/Decrement reg.
				1735	// Do not make INC/DEC if it is slow
				1736	let Predicates = [NotSlowIncDec] in {
				1737	def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>;
				1738	def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>;
				1739	def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>;
				1740	def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
				1741	def : Pat<(add GR8:$src, -1), (DEC8r GR8:$src)>;
				1742	def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>;
				1743	def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>;
				1744	def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
				1745	}
				1746
				1747	// or reg/reg.
				1748	def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
				1749	def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
				1750	def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
				1751	def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>;
				1752
				1753	// or reg/mem
				1754	def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
				1755	(OR8rm GR8:$src1, addr:$src2)>;
				1756	def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
				1757	(OR16rm GR16:$src1, addr:$src2)>;
				1758	def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
				1759	(OR32rm GR32:$src1, addr:$src2)>;
				1760	def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
				1761	(OR64rm GR64:$src1, addr:$src2)>;
				1762
				1763	// or reg/imm
				1764	def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
				1765	def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
				1766	def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
				1767	def : Pat<(or GR16:$src1, i16immSExt8:$src2),
				1768	(OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1769	def : Pat<(or GR32:$src1, i32immSExt8:$src2),
				1770	(OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1771	def : Pat<(or GR64:$src1, i64immSExt8:$src2),
				1772	(OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1773	def : Pat<(or GR64:$src1, i64immSExt32:$src2),
				1774	(OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1775
				1776	// xor reg/reg
				1777	def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
				1778	def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
				1779	def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
				1780	def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>;
				1781
				1782	// xor reg/mem
				1783	def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
				1784	(XOR8rm GR8:$src1, addr:$src2)>;
				1785	def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
				1786	(XOR16rm GR16:$src1, addr:$src2)>;
				1787	def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
				1788	(XOR32rm GR32:$src1, addr:$src2)>;
				1789	def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
				1790	(XOR64rm GR64:$src1, addr:$src2)>;
				1791
				1792	// xor reg/imm
				1793	def : Pat<(xor GR8:$src1, imm:$src2),
				1794	(XOR8ri GR8:$src1, imm:$src2)>;
				1795	def : Pat<(xor GR16:$src1, imm:$src2),
				1796	(XOR16ri GR16:$src1, imm:$src2)>;
				1797	def : Pat<(xor GR32:$src1, imm:$src2),
				1798	(XOR32ri GR32:$src1, imm:$src2)>;
				1799	def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
				1800	(XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1801	def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
				1802	(XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1803	def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
				1804	(XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1805	def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
				1806	(XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1807
				1808	// and reg/reg
				1809	def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
				1810	def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
				1811	def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
				1812	def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>;
				1813
				1814	// and reg/mem
				1815	def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
				1816	(AND8rm GR8:$src1, addr:$src2)>;
				1817	def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
				1818	(AND16rm GR16:$src1, addr:$src2)>;
				1819	def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
				1820	(AND32rm GR32:$src1, addr:$src2)>;
				1821	def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
				1822	(AND64rm GR64:$src1, addr:$src2)>;
				1823
				1824	// and reg/imm
				1825	def : Pat<(and GR8:$src1, imm:$src2),
				1826	(AND8ri GR8:$src1, imm:$src2)>;
				1827	def : Pat<(and GR16:$src1, imm:$src2),
				1828	(AND16ri GR16:$src1, imm:$src2)>;
				1829	def : Pat<(and GR32:$src1, imm:$src2),
				1830	(AND32ri GR32:$src1, imm:$src2)>;
				1831	def : Pat<(and GR16:$src1, i16immSExt8:$src2),
				1832	(AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1833	def : Pat<(and GR32:$src1, i32immSExt8:$src2),
				1834	(AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1835	def : Pat<(and GR64:$src1, i64immSExt8:$src2),
				1836	(AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1837	def : Pat<(and GR64:$src1, i64immSExt32:$src2),
				1838	(AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1839
				1840	// Bit scan instruction patterns to match explicit zero-undef behavior.
				1841	def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>;
				1842	def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>;
				1843	def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr GR64:$src)>;
				1844	def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm addr:$src)>;
				1845	def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm addr:$src)>;
				1846	def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>;
				1847
				1848	// When HasMOVBE is enabled it is possible to get a non-legalized
				1849	// register-register 16 bit bswap. This maps it to a ROL instruction.
				1850	let Predicates = [HasMOVBE] in {
				1851	def : Pat<(bswap GR16:$src), (ROL16ri GR16:$src, (i8 8))>;
				1852	}