Blame - llvm/lib/Target/X86/X86InstrCompiler.td - toolchain/llvm-project

blob: 7f850d6830e1506eddd0ac707bf465e09ce4292d [file] [log] [blame]

Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1	//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -- tablegen --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file describes the various pseudo instructions used by the compiler,
				11	// as well as Pat patterns used during instruction selection.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	//===----------------------------------------------------------------------===//
				16	// Pattern Matching Support
				17
				18	def GetLo32XForm : SDNodeXForm<imm, [{
				19	// Transformation function: get the low 32 bits.
Sergey Dmitrouk	842a51b	2015-04-28 14:05:47 +0000	[diff] [blame]	20	return getI32Imm((unsigned)N->getZExtValue(), SDLoc(N));
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	21	}]>;
				22
				23	def GetLo8XForm : SDNodeXForm<imm, [{
				24	// Transformation function: get the low 8 bits.
Sergey Dmitrouk	842a51b	2015-04-28 14:05:47 +0000	[diff] [blame]	25	return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	26	}]>;
				27
				28
				29	//===----------------------------------------------------------------------===//
				30	// Random Pseudo Instructions.
				31
				32	// PIC base construction. This expands to code that looks like this:
				33	// call $next_inst
				34	// popl %destreg"
				35	let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP] in
				36	def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
				37	"", []>;
				38
				39
				40	// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
				41	// a stack adjustment and the codegen must know that they may modify the stack
				42	// pointer before prolog-epilog rewriting occurs.
				43	// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
				44	// sub / add which can clobber EFLAGS.
				45	let Defs = [ESP, EFLAGS], Uses = [ESP] in {
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	46	def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	47	"#ADJCALLSTACKDOWN",
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	48	[]>,
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	49	Requires<[NotLP64]>;
				50	def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
				51	"#ADJCALLSTACKUP",
				52	[(X86callseq_end timm:$amt1, timm:$amt2)]>,
				53	Requires<[NotLP64]>;
				54	}
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	55	def : Pat<(X86callseq_start timm:$amt1),
				56	(ADJCALLSTACKDOWN32 i32imm:$amt1, 0)>, Requires<[NotLP64]>;
				57
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	58
				59	// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
				60	// a stack adjustment and the codegen must know that they may modify the stack
				61	// pointer before prolog-epilog rewriting occurs.
				62	// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
				63	// sub / add which can clobber EFLAGS.
				64	let Defs = [RSP, EFLAGS], Uses = [RSP] in {
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	65	def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	66	"#ADJCALLSTACKDOWN",
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	67	[]>,
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	68	Requires<[IsLP64]>;
				69	def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
				70	"#ADJCALLSTACKUP",
				71	[(X86callseq_end timm:$amt1, timm:$amt2)]>,
				72	Requires<[IsLP64]>;
				73	}
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	74	def : Pat<(X86callseq_start timm:$amt1),
				75	(ADJCALLSTACKDOWN64 i32imm:$amt1, 0)>, Requires<[IsLP64]>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	76
				77
				78	// x86-64 va_start lowering magic.
				79	let usesCustomInserter = 1, Defs = [EFLAGS] in {
				80	def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
				81	(outs),
				82	(ins GR8:$al,
				83	i64imm:$regsavefi, i64imm:$offset,
				84	variable_ops),
				85	"#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
				86	[(X86vastart_save_xmm_regs GR8:$al,
				87	imm:$regsavefi,
				88	imm:$offset),
				89	(implicit EFLAGS)]>;
				90
				91	// The VAARG_64 pseudo-instruction takes the address of the va_list,
				92	// and places the address of the next argument into a register.
				93	let Defs = [EFLAGS] in
				94	def VAARG_64 : I<0, Pseudo,
				95	(outs GR64:$dst),
				96	(ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
				97	"#VAARG_64 $dst, $ap, $size, $mode, $align",
				98	[(set GR64:$dst,
				99	(X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
				100	(implicit EFLAGS)]>;
				101
				102	// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
				103	// targets. These calls are needed to probe the stack when allocating more than
				104	// 4k bytes in one go. Touching the stack at 4K increments is necessary to
				105	// ensure that the guard pages used by the OS virtual memory manager are
				106	// allocated in correct sequence.
				107	// The main point of having separate instruction are extra unmodelled effects
				108	// (compared to ordinary calls) like stack pointer change.
				109
				110	let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
				111	def WIN_ALLOCA : I<0, Pseudo, (outs), (ins),
				112	"# dynamic stack allocation",
				113	[(X86WinAlloca)]>;
				114
				115	// When using segmented stacks these are lowered into instructions which first
				116	// check if the current stacklet has enough free memory. If it does, memory is
				117	// allocated by bumping the stack pointer. Otherwise memory is allocated from
				118	// the heap.
				119
				120	let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
				121	def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
				122	"# variable sized alloca for segmented stacks",
				123	[(set GR32:$dst,
				124	(X86SegAlloca GR32:$size))]>,
				125	Requires<[NotLP64]>;
				126
				127	let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
				128	def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
				129	"# variable sized alloca for segmented stacks",
				130	[(set GR64:$dst,
				131	(X86SegAlloca GR64:$size))]>,
				132	Requires<[In64BitMode]>;
				133	}
				134
				135	// The MSVC runtime contains an _ftol2 routine for converting floating-point
				136	// to integer values. It has a strange calling convention: the input is
				137	// popped from the x87 stack, and the return value is given in EDX:EAX. ECX is
				138	// used as a temporary register. No other registers (aside from flags) are
				139	// touched.
				140	// Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80
				141	// variant is unnecessary.
				142
				143	let Defs = [EAX, EDX, ECX, EFLAGS], FPForm = SpecialFP in {
				144	def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src),
				145	"# win32 fptoui",
				146	[(X86WinFTOL RFP32:$src)]>,
				147	Requires<[Not64BitMode]>;
				148
				149	def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src),
				150	"# win32 fptoui",
				151	[(X86WinFTOL RFP64:$src)]>,
				152	Requires<[Not64BitMode]>;
				153	}
				154
				155	//===----------------------------------------------------------------------===//
				156	// EH Pseudo Instructions
				157	//
				158	let SchedRW = [WriteSystem] in {
				159	let isTerminator = 1, isReturn = 1, isBarrier = 1,
				160	hasCtrlDep = 1, isCodeGenOnly = 1 in {
				161	def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
				162	"ret\t#eh_return, addr: $addr",
				163	[(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
				164
				165	}
				166
				167	let isTerminator = 1, isReturn = 1, isBarrier = 1,
				168	hasCtrlDep = 1, isCodeGenOnly = 1 in {
				169	def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
				170	"ret\t#eh_return, addr: $addr",
				171	[(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
				172
				173	}
				174
				175	let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
				176	usesCustomInserter = 1 in {
				177	def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),
				178	"#EH_SJLJ_SETJMP32",
				179	[(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
				180	Requires<[Not64BitMode]>;
				181	def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf),
				182	"#EH_SJLJ_SETJMP64",
				183	[(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
				184	Requires<[In64BitMode]>;
				185	let isTerminator = 1 in {
				186	def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf),
				187	"#EH_SJLJ_LONGJMP32",
				188	[(X86eh_sjlj_longjmp addr:$buf)]>,
				189	Requires<[Not64BitMode]>;
				190	def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf),
				191	"#EH_SJLJ_LONGJMP64",
				192	[(X86eh_sjlj_longjmp addr:$buf)]>,
				193	Requires<[In64BitMode]>;
				194	}
				195	}
				196	} // SchedRW
				197
				198	let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
				199	def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),
				200	"#EH_SjLj_Setup\t$dst", []>;
				201	}
				202
				203	//===----------------------------------------------------------------------===//
				204	// Pseudo instructions used by unwind info.
				205	//
				206	let isPseudo = 1 in {
				207	def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg),
				208	"#SEH_PushReg $reg", []>;
				209	def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
				210	"#SEH_SaveReg $reg, $dst", []>;
				211	def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
				212	"#SEH_SaveXMM $reg, $dst", []>;
				213	def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size),
				214	"#SEH_StackAlloc $size", []>;
				215	def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset),
				216	"#SEH_SetFrame $reg, $offset", []>;
				217	def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode),
				218	"#SEH_PushFrame $mode", []>;
				219	def SEH_EndPrologue : I<0, Pseudo, (outs), (ins),
				220	"#SEH_EndPrologue", []>;
				221	def SEH_Epilogue : I<0, Pseudo, (outs), (ins),
				222	"#SEH_Epilogue", []>;
				223	}
				224
				225	//===----------------------------------------------------------------------===//
				226	// Pseudo instructions used by segmented stacks.
				227	//
				228
				229	// This is lowered into a RET instruction by MCInstLower. We need
				230	// this so that we don't have to have a MachineBasicBlock which ends
				231	// with a RET and also has successors.
				232	let isPseudo = 1 in {
				233	def MORESTACK_RET: I<0, Pseudo, (outs), (ins),
				234	"", []>;
				235
				236	// This instruction is lowered to a RET followed by a MOV. The two
				237	// instructions are not generated on a higher level since then the
				238	// verifier sees a MachineBasicBlock ending with a non-terminator.
				239	def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
				240	"", []>;
				241	}
				242
				243	//===----------------------------------------------------------------------===//
				244	// Alias Instructions
				245	//===----------------------------------------------------------------------===//
				246
				247	// Alias instruction mapping movr0 to xor.
				248	// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
				249	let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
				250	isPseudo = 1 in
				251	def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
				252	[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
				253
				254	// Other widths can also make use of the 32-bit xor, which may have a smaller
				255	// encoding and avoid partial register updates.
				256	def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;
				257	def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;
				258	def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {
				259	let AddedComplexity = 20;
				260	}
				261
				262	// Materialize i64 constant where top 32-bits are zero. This could theoretically
				263	// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
				264	// that would make it more difficult to rematerialize.
				265	let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
				266	isCodeGenOnly = 1, hasSideEffects = 0 in
				267	def MOV32ri64 : Ii32<0xb8, AddRegFrm, (outs GR32:$dst), (ins i64i32imm:$src),
				268	"", [], IIC_ALU_NONMEM>, Sched<[WriteALU]>;
				269
				270	// This 64-bit pseudo-move can be used for both a 64-bit constant that is
				271	// actually the zero-extension of a 32-bit constant, and for labels in the
				272	// x86-64 small code model.
				273	def mov64imm32 : ComplexPattern<i64, 1, "SelectMOV64Imm32", [imm, X86Wrapper]>;
				274
				275	let AddedComplexity = 1 in
				276	def : Pat<(i64 mov64imm32:$src),
				277	(SUBREG_TO_REG (i64 0), (MOV32ri64 mov64imm32:$src), sub_32bit)>;
				278
				279	// Use sbb to materialize carry bit.
				280	let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
				281	// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
				282	// However, Pat<> can't replicate the destination reg into the inputs of the
				283	// result.
				284	def SETB_C8r : I<0, Pseudo, (outs GR8:$dst), (ins), "",
				285	[(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
				286	def SETB_C16r : I<0, Pseudo, (outs GR16:$dst), (ins), "",
				287	[(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
				288	def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "",
				289	[(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
				290	def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "",
				291	[(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
				292	} // isCodeGenOnly
				293
				294
				295	def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				296	(SETB_C16r)>;
				297	def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				298	(SETB_C32r)>;
				299	def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				300	(SETB_C64r)>;
				301
				302	def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				303	(SETB_C16r)>;
				304	def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				305	(SETB_C32r)>;
				306	def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				307	(SETB_C64r)>;
				308
				309	// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and
				310	// will be eliminated and that the sbb can be extended up to a wider type. When
				311	// this happens, it is great. However, if we are left with an 8-bit sbb and an
				312	// and, we might as well just match it as a setb.
				313	def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
				314	(SETBr)>;
				315
				316	// (add OP, SETB) -> (adc OP, 0)
				317	def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op),
				318	(ADC8ri GR8:$op, 0)>;
				319	def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op),
				320	(ADC32ri8 GR32:$op, 0)>;
				321	def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op),
				322	(ADC64ri8 GR64:$op, 0)>;
				323
				324	// (sub OP, SETB) -> (sbb OP, 0)
				325	def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
				326	(SBB8ri GR8:$op, 0)>;
				327	def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
				328	(SBB32ri8 GR32:$op, 0)>;
				329	def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
				330	(SBB64ri8 GR64:$op, 0)>;
				331
				332	// (sub OP, SETCC_CARRY) -> (adc OP, 0)
				333	def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))),
				334	(ADC8ri GR8:$op, 0)>;
				335	def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))),
				336	(ADC32ri8 GR32:$op, 0)>;
				337	def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
				338	(ADC64ri8 GR64:$op, 0)>;
				339
				340	//===----------------------------------------------------------------------===//
				341	// String Pseudo Instructions
				342	//
				343	let SchedRW = [WriteMicrocoded] in {
				344	let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
				345	def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb\|rep movsb}",
				346	[(X86rep_movs i8)], IIC_REP_MOVS>, REP,
				347	Requires<[Not64BitMode]>;
				348	def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw\|rep movsw}",
				349	[(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16,
				350	Requires<[Not64BitMode]>;
				351	def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl\|rep movsd}",
				352	[(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32,
				353	Requires<[Not64BitMode]>;
				354	}
				355
				356	let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in {
				357	def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb\|rep movsb}",
				358	[(X86rep_movs i8)], IIC_REP_MOVS>, REP,
				359	Requires<[In64BitMode]>;
				360	def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw\|rep movsw}",
				361	[(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16,
				362	Requires<[In64BitMode]>;
				363	def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl\|rep movsd}",
				364	[(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32,
				365	Requires<[In64BitMode]>;
				366	def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq\|rep movsq}",
				367	[(X86rep_movs i64)], IIC_REP_MOVS>, REP,
				368	Requires<[In64BitMode]>;
				369	}
				370
				371	// FIXME: Should use "(X86rep_stos AL)" as the pattern.
				372	let Defs = [ECX,EDI], isCodeGenOnly = 1 in {
				373	let Uses = [AL,ECX,EDI] in
				374	def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb\|rep stosb}",
				375	[(X86rep_stos i8)], IIC_REP_STOS>, REP,
				376	Requires<[Not64BitMode]>;
				377	let Uses = [AX,ECX,EDI] in
				378	def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw\|rep stosw}",
				379	[(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16,
				380	Requires<[Not64BitMode]>;
				381	let Uses = [EAX,ECX,EDI] in
				382	def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl\|rep stosd}",
				383	[(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,
				384	Requires<[Not64BitMode]>;
				385	}
				386
				387	let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
				388	let Uses = [AL,RCX,RDI] in
				389	def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb\|rep stosb}",
				390	[(X86rep_stos i8)], IIC_REP_STOS>, REP,
				391	Requires<[In64BitMode]>;
				392	let Uses = [AX,RCX,RDI] in
				393	def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw\|rep stosw}",
				394	[(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16,
				395	Requires<[In64BitMode]>;
				396	let Uses = [RAX,RCX,RDI] in
				397	def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl\|rep stosd}",
				398	[(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,
				399	Requires<[In64BitMode]>;
				400
				401	let Uses = [RAX,RCX,RDI] in
				402	def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq\|rep stosq}",
				403	[(X86rep_stos i64)], IIC_REP_STOS>, REP,
				404	Requires<[In64BitMode]>;
				405	}
				406	} // SchedRW
				407
				408	//===----------------------------------------------------------------------===//
				409	// Thread Local Storage Instructions
				410	//
				411
				412	// ELF TLS Support
				413	// All calls clobber the non-callee saved registers. ESP is marked as
				414	// a use to prevent stack-pointer assignments that appear immediately
				415	// before calls from potentially appearing dead.
				416	let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
				417	ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
				418	MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
				419	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
				420	XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
				421	Uses = [ESP] in {
				422	def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
				423	"# TLS_addr32",
				424	[(X86tlsaddr tls32addr:$sym)]>,
				425	Requires<[Not64BitMode]>;
				426	def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
				427	"# TLS_base_addr32",
				428	[(X86tlsbaseaddr tls32baseaddr:$sym)]>,
				429	Requires<[Not64BitMode]>;
				430	}
				431
				432	// All calls clobber the non-callee saved registers. RSP is marked as
				433	// a use to prevent stack-pointer assignments that appear immediately
				434	// before calls from potentially appearing dead.
				435	let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
				436	FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
				437	ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
				438	MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
				439	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
				440	XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
				441	Uses = [RSP] in {
				442	def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
				443	"# TLS_addr64",
				444	[(X86tlsaddr tls64addr:$sym)]>,
				445	Requires<[In64BitMode]>;
				446	def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
				447	"# TLS_base_addr64",
				448	[(X86tlsbaseaddr tls64baseaddr:$sym)]>,
				449	Requires<[In64BitMode]>;
				450	}
				451
				452	// Darwin TLS Support
				453	// For i386, the address of the thunk is passed on the stack, on return the
				454	// address of the variable is in %eax. %ecx is trashed during the function
				455	// call. All other registers are preserved.
				456	let Defs = [EAX, ECX, EFLAGS],
				457	Uses = [ESP],
				458	usesCustomInserter = 1 in
				459	def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
				460	"# TLSCall_32",
				461	[(X86TLSCall addr:$sym)]>,
				462	Requires<[Not64BitMode]>;
				463
				464	// For x86_64, the address of the thunk is passed in %rdi, on return
				465	// the address of the variable is in %rax. All other registers are preserved.
				466	let Defs = [RAX, EFLAGS],
				467	Uses = [RSP, RDI],
				468	usesCustomInserter = 1 in
				469	def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
				470	"# TLSCall_64",
				471	[(X86TLSCall addr:$sym)]>,
				472	Requires<[In64BitMode]>;
				473
				474
				475	//===----------------------------------------------------------------------===//
				476	// Conditional Move Pseudo Instructions
				477
Ahmed Bougacha	8f2b4f0	2015-02-14 01:36:53 +0000	[diff] [blame]	478	// CMOV* - Used to implement the SELECT DAG operation. Expanded after
				479	// instruction selection into a branch sequence.
				480	multiclass CMOVrr_PSEUDO<RegisterClass RC, ValueType VT> {
				481	def CMOV#NAME : I<0, Pseudo,
				482	(outs RC:$dst), (ins RC:$t, RC:$f, i8imm:$cond),
				483	"#CMOV_"#NAME#" PSEUDO!",
				484	[(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, imm:$cond,
				485	EFLAGS)))]>;
				486	}
				487
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	488	let usesCustomInserter = 1, Uses = [EFLAGS] in {
Ahmed Bougacha	8f2b4f0	2015-02-14 01:36:53 +0000	[diff] [blame]	489	// X86 doesn't have 8-bit conditional moves. Use a customInserter to
				490	// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
				491	// however that requires promoting the operands, and can induce additional
				492	// i8 register pressure.
				493	defm _GR8 : CMOVrr_PSEUDO<GR8, i8>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	494
Ahmed Bougacha	8f2b4f0	2015-02-14 01:36:53 +0000	[diff] [blame]	495	let Predicates = [NoCMov] in {
				496	defm _GR32 : CMOVrr_PSEUDO<GR32, i32>;
				497	defm _GR16 : CMOVrr_PSEUDO<GR16, i16>;
				498	} // Predicates = [NoCMov]
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	499
Ahmed Bougacha	8f2b4f0	2015-02-14 01:36:53 +0000	[diff] [blame]	500	// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no
				501	// SSE1/SSE2.
				502	let Predicates = [FPStackf32] in
				503	defm _RFP32 : CMOVrr_PSEUDO<RFP32, f32>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	504
Ahmed Bougacha	8f2b4f0	2015-02-14 01:36:53 +0000	[diff] [blame]	505	let Predicates = [FPStackf64] in
				506	defm _RFP64 : CMOVrr_PSEUDO<RFP64, f64>;
				507
				508	defm _RFP80 : CMOVrr_PSEUDO<RFP80, f80>;
				509
				510	defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;
				511	defm _FR64 : CMOVrr_PSEUDO<FR64, f64>;
				512	defm _V4F32 : CMOVrr_PSEUDO<VR128, v4f32>;
				513	defm _V2F64 : CMOVrr_PSEUDO<VR128, v2f64>;
				514	defm _V2I64 : CMOVrr_PSEUDO<VR128, v2i64>;
				515	defm _V8F32 : CMOVrr_PSEUDO<VR256, v8f32>;
				516	defm _V4F64 : CMOVrr_PSEUDO<VR256, v4f64>;
				517	defm _V4I64 : CMOVrr_PSEUDO<VR256, v4i64>;
				518	defm _V8I64 : CMOVrr_PSEUDO<VR512, v8i64>;
				519	defm _V8F64 : CMOVrr_PSEUDO<VR512, v8f64>;
				520	defm _V16F32 : CMOVrr_PSEUDO<VR512, v16f32>;
Elena Demikhovsky	c1ac5d7	2015-05-12 09:36:52 +0000	[diff] [blame]	521	defm _V8I1 : CMOVrr_PSEUDO<VK8, v8i1>;
				522	defm _V16I1 : CMOVrr_PSEUDO<VK16, v16i1>;
				523	defm _V32I1 : CMOVrr_PSEUDO<VK32, v32i1>;
				524	defm _V64I1 : CMOVrr_PSEUDO<VK64, v64i1>;
Ahmed Bougacha	8f2b4f0	2015-02-14 01:36:53 +0000	[diff] [blame]	525	} // usesCustomInserter = 1, Uses = [EFLAGS]
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	526
				527	//===----------------------------------------------------------------------===//
				528	// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
				529	//===----------------------------------------------------------------------===//
				530
				531	// FIXME: Use normal instructions and add lock prefix dynamically.
				532
				533	// Memory barriers
				534
				535	// TODO: Get this to fold the constant into the instruction.
				536	let isCodeGenOnly = 1, Defs = [EFLAGS] in
				537	def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
				538	"or{l}\t{$zero, $dst\|$dst, $zero}",
				539	[], IIC_ALU_MEM>, Requires<[Not64BitMode]>, LOCK,
				540	Sched<[WriteALULd, WriteRMW]>;
				541
				542	let hasSideEffects = 1 in
				543	def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
				544	"#MEMBARRIER",
				545	[(X86MemBarrier)]>, Sched<[WriteLoad]>;
				546
				547	// RegOpc corresponds to the mr version of the instruction
				548	// ImmOpc corresponds to the mi version of the instruction
				549	// ImmOpc8 corresponds to the mi8 version of the instruction
				550	// ImmMod corresponds to the instruction format of the mi and mi8 versions
				551	multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
				552	Format ImmMod, string mnemonic> {
				553	let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
				554	SchedRW = [WriteALULd, WriteRMW] in {
				555
				556	def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
				557	RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
				558	MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
				559	!strconcat(mnemonic, "{b}\t",
				560	"{$src2, $dst\|$dst, $src2}"),
				561	[], IIC_ALU_NONMEM>, LOCK;
				562	def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
				563	RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
				564	MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
				565	!strconcat(mnemonic, "{w}\t",
				566	"{$src2, $dst\|$dst, $src2}"),
				567	[], IIC_ALU_NONMEM>, OpSize16, LOCK;
				568	def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
				569	RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
				570	MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
				571	!strconcat(mnemonic, "{l}\t",
				572	"{$src2, $dst\|$dst, $src2}"),
				573	[], IIC_ALU_NONMEM>, OpSize32, LOCK;
				574	def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
				575	RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
				576	MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
				577	!strconcat(mnemonic, "{q}\t",
				578	"{$src2, $dst\|$dst, $src2}"),
				579	[], IIC_ALU_NONMEM>, LOCK;
				580
				581	def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
				582	ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
				583	ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
				584	!strconcat(mnemonic, "{b}\t",
				585	"{$src2, $dst\|$dst, $src2}"),
				586	[], IIC_ALU_MEM>, LOCK;
				587
				588	def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
				589	ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
				590	ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
				591	!strconcat(mnemonic, "{w}\t",
				592	"{$src2, $dst\|$dst, $src2}"),
				593	[], IIC_ALU_MEM>, OpSize16, LOCK;
				594
				595	def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
				596	ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
				597	ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
				598	!strconcat(mnemonic, "{l}\t",
				599	"{$src2, $dst\|$dst, $src2}"),
				600	[], IIC_ALU_MEM>, OpSize32, LOCK;
				601
				602	def NAME#64mi32 : RIi32S<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
				603	ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
				604	ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
				605	!strconcat(mnemonic, "{q}\t",
				606	"{$src2, $dst\|$dst, $src2}"),
				607	[], IIC_ALU_MEM>, LOCK;
				608
				609	def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
				610	ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
				611	ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
				612	!strconcat(mnemonic, "{w}\t",
				613	"{$src2, $dst\|$dst, $src2}"),
				614	[], IIC_ALU_MEM>, OpSize16, LOCK;
				615	def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
				616	ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
				617	ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
				618	!strconcat(mnemonic, "{l}\t",
				619	"{$src2, $dst\|$dst, $src2}"),
				620	[], IIC_ALU_MEM>, OpSize32, LOCK;
				621	def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
				622	ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
				623	ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
				624	!strconcat(mnemonic, "{q}\t",
				625	"{$src2, $dst\|$dst, $src2}"),
				626	[], IIC_ALU_MEM>, LOCK;
				627
				628	}
				629
				630	}
				631
				632	defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">;
				633	defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">;
				634	defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">;
				635	defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">;
				636	defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">;
				637
				638	// Optimized codegen when the non-memory output is not used.
				639	multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
				640	string mnemonic> {
				641	let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
				642	SchedRW = [WriteALULd, WriteRMW] in {
				643
				644	def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
				645	!strconcat(mnemonic, "{b}\t$dst"),
				646	[], IIC_UNARY_MEM>, LOCK;
				647	def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
				648	!strconcat(mnemonic, "{w}\t$dst"),
				649	[], IIC_UNARY_MEM>, OpSize16, LOCK;
				650	def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
				651	!strconcat(mnemonic, "{l}\t$dst"),
				652	[], IIC_UNARY_MEM>, OpSize32, LOCK;
				653	def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
				654	!strconcat(mnemonic, "{q}\t$dst"),
				655	[], IIC_UNARY_MEM>, LOCK;
				656	}
				657	}
				658
				659	defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "inc">;
				660	defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "dec">;
				661
				662	// Atomic compare and swap.
				663	multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
				664	SDPatternOperator frag, X86MemOperand x86memop,
				665	InstrItinClass itin> {
				666	let isCodeGenOnly = 1 in {
				667	def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),
				668	!strconcat(mnemonic, "\t$ptr"),
				669	[(frag addr:$ptr)], itin>, TB, LOCK;
				670	}
				671	}
				672
				673	multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
				674	string mnemonic, SDPatternOperator frag,
				675	InstrItinClass itin8, InstrItinClass itin> {
				676	let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {
				677	let Defs = [AL, EFLAGS], Uses = [AL] in
				678	def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
				679	!strconcat(mnemonic, "{b}\t{$swap, $ptr\|$ptr, $swap}"),
				680	[(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;
				681	let Defs = [AX, EFLAGS], Uses = [AX] in
				682	def NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
				683	!strconcat(mnemonic, "{w}\t{$swap, $ptr\|$ptr, $swap}"),
				684	[(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize16, LOCK;
				685	let Defs = [EAX, EFLAGS], Uses = [EAX] in
				686	def NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
				687	!strconcat(mnemonic, "{l}\t{$swap, $ptr\|$ptr, $swap}"),
				688	[(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, OpSize32, LOCK;
				689	let Defs = [RAX, EFLAGS], Uses = [RAX] in
				690	def NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
				691	!strconcat(mnemonic, "{q}\t{$swap, $ptr\|$ptr, $swap}"),
				692	[(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;
				693	}
				694	}
				695
				696	let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
				697	SchedRW = [WriteALULd, WriteRMW] in {
				698	defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",
				699	X86cas8, i64mem,
				700	IIC_CMPX_LOCK_8B>;
				701	}
				702
				703	let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
				704	Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in {
				705	defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
				706	X86cas16, i128mem,
				707	IIC_CMPX_LOCK_16B>, REX_W;
				708	}
				709
				710	defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg",
				711	X86cas, IIC_CMPX_LOCK_8, IIC_CMPX_LOCK>;
				712
				713	// Atomic exchange and add
				714	multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
				715	string frag,
				716	InstrItinClass itin8, InstrItinClass itin> {
				717	let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1,
				718	SchedRW = [WriteALULd, WriteRMW] in {
				719	def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),
				720	(ins GR8:$val, i8mem:$ptr),
				721	!strconcat(mnemonic, "{b}\t{$val, $ptr\|$ptr, $val}"),
				722	[(set GR8:$dst,
				723	(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
				724	itin8>;
				725	def NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
				726	(ins GR16:$val, i16mem:$ptr),
				727	!strconcat(mnemonic, "{w}\t{$val, $ptr\|$ptr, $val}"),
				728	[(set
				729	GR16:$dst,
				730	(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
				731	itin>, OpSize16;
				732	def NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
				733	(ins GR32:$val, i32mem:$ptr),
				734	!strconcat(mnemonic, "{l}\t{$val, $ptr\|$ptr, $val}"),
				735	[(set
				736	GR32:$dst,
				737	(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
				738	itin>, OpSize32;
				739	def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
				740	(ins GR64:$val, i64mem:$ptr),
				741	!strconcat(mnemonic, "{q}\t{$val, $ptr\|$ptr, $val}"),
				742	[(set
				743	GR64:$dst,
				744	(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
				745	itin>;
				746	}
				747	}
				748
				749	defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",
				750	IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>,
				751	TB, LOCK;
				752
				753	/* The following multiclass tries to make sure that in code like
				754	* x.store (immediate op x.load(acquire), release)
				755	* an operation directly on memory is generated instead of wasting a register.
				756	* It is not automatic as atomic_store/load are only lowered to MOV instructions
				757	* extremely late to prevent them from being accidentally reordered in the backend
				758	* (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)
				759	*/
				760	multiclass RELEASE_BINOP_MI<string op> {
				761	def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
				762	"#RELEASE_BINOP PSEUDO!",
				763	[(atomic_store_8 addr:$dst, (!cast<PatFrag>(op)
				764	(atomic_load_8 addr:$dst), (i8 imm:$src)))]>;
				765	// NAME#16 is not generated as 16-bit arithmetic instructions are considered
				766	// costly and avoided as far as possible by this backend anyway
				767	def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
				768	"#RELEASE_BINOP PSEUDO!",
				769	[(atomic_store_32 addr:$dst, (!cast<PatFrag>(op)
				770	(atomic_load_32 addr:$dst), (i32 imm:$src)))]>;
				771	def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
				772	"#RELEASE_BINOP PSEUDO!",
				773	[(atomic_store_64 addr:$dst, (!cast<PatFrag>(op)
				774	(atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>;
				775	}
				776	defm RELEASE_ADD : RELEASE_BINOP_MI<"add">;
				777	defm RELEASE_AND : RELEASE_BINOP_MI<"and">;
				778	defm RELEASE_OR : RELEASE_BINOP_MI<"or">;
				779	defm RELEASE_XOR : RELEASE_BINOP_MI<"xor">;
				780	// Note: we don't deal with sub, because substractions of constants are
				781	// optimized into additions before this code can run
				782
				783	multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {
				784	def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst),
				785	"#RELEASE_UNOP PSEUDO!",
				786	[(atomic_store_8 addr:$dst, dag8)]>;
				787	def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst),
				788	"#RELEASE_UNOP PSEUDO!",
				789	[(atomic_store_16 addr:$dst, dag16)]>;
				790	def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst),
				791	"#RELEASE_UNOP PSEUDO!",
				792	[(atomic_store_32 addr:$dst, dag32)]>;
				793	def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst),
				794	"#RELEASE_UNOP PSEUDO!",
				795	[(atomic_store_64 addr:$dst, dag64)]>;
				796	}
				797
				798	defm RELEASE_INC : RELEASE_UNOP<
				799	(add (atomic_load_8 addr:$dst), (i8 1)),
				800	(add (atomic_load_16 addr:$dst), (i16 1)),
				801	(add (atomic_load_32 addr:$dst), (i32 1)),
				802	(add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;
				803	defm RELEASE_DEC : RELEASE_UNOP<
				804	(add (atomic_load_8 addr:$dst), (i8 -1)),
				805	(add (atomic_load_16 addr:$dst), (i16 -1)),
				806	(add (atomic_load_32 addr:$dst), (i32 -1)),
				807	(add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;
				808	/*
				809	TODO: These don't work because the type inference of TableGen fails.
				810	TODO: find a way to fix it.
				811	defm RELEASE_NEG : RELEASE_UNOP<
				812	(ineg (atomic_load_8 addr:$dst)),
				813	(ineg (atomic_load_16 addr:$dst)),
				814	(ineg (atomic_load_32 addr:$dst)),
				815	(ineg (atomic_load_64 addr:$dst))>;
				816	defm RELEASE_NOT : RELEASE_UNOP<
				817	(not (atomic_load_8 addr:$dst)),
				818	(not (atomic_load_16 addr:$dst)),
				819	(not (atomic_load_32 addr:$dst)),
				820	(not (atomic_load_64 addr:$dst))>;
				821	*/
				822
				823	def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
				824	"#RELEASE_MOV PSEUDO !",
				825	[(atomic_store_8 addr:$dst, (i8 imm:$src))]>;
				826	def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),
				827	"#RELEASE_MOV PSEUDO !",
				828	[(atomic_store_16 addr:$dst, (i16 imm:$src))]>;
				829	def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
				830	"#RELEASE_MOV PSEUDO !",
				831	[(atomic_store_32 addr:$dst, (i32 imm:$src))]>;
				832	def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
				833	"#RELEASE_MOV PSEUDO !",
				834	[(atomic_store_64 addr:$dst, i64immSExt32:$src)]>;
				835
				836	def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
				837	"#RELEASE_MOV PSEUDO!",
				838	[(atomic_store_8 addr:$dst, GR8 :$src)]>;
				839	def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
				840	"#RELEASE_MOV PSEUDO!",
				841	[(atomic_store_16 addr:$dst, GR16:$src)]>;
				842	def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
				843	"#RELEASE_MOV PSEUDO!",
				844	[(atomic_store_32 addr:$dst, GR32:$src)]>;
				845	def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
				846	"#RELEASE_MOV PSEUDO!",
				847	[(atomic_store_64 addr:$dst, GR64:$src)]>;
				848
				849	def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
				850	"#ACQUIRE_MOV PSEUDO!",
				851	[(set GR8:$dst, (atomic_load_8 addr:$src))]>;
				852	def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
				853	"#ACQUIRE_MOV PSEUDO!",
				854	[(set GR16:$dst, (atomic_load_16 addr:$src))]>;
				855	def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
				856	"#ACQUIRE_MOV PSEUDO!",
				857	[(set GR32:$dst, (atomic_load_32 addr:$src))]>;
				858	def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
				859	"#ACQUIRE_MOV PSEUDO!",
				860	[(set GR64:$dst, (atomic_load_64 addr:$src))]>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	861
				862	//===----------------------------------------------------------------------===//
				863	// DAG Pattern Matching Rules
				864	//===----------------------------------------------------------------------===//
				865
				866	// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
				867	def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
				868	def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
				869	def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
				870	def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
				871	def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
Rafael Espindola	36b718f	2015-06-22 17:46:53 +0000	[diff] [blame^]	872	def : Pat<(i32 (X86Wrapper mcsym:$dst)), (MOV32ri mcsym:$dst)>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	873	def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
				874
				875	def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
				876	(ADD32ri GR32:$src1, tconstpool:$src2)>;
				877	def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
				878	(ADD32ri GR32:$src1, tjumptable:$src2)>;
				879	def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
				880	(ADD32ri GR32:$src1, tglobaladdr:$src2)>;
				881	def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
				882	(ADD32ri GR32:$src1, texternalsym:$src2)>;
Rafael Espindola	36b718f	2015-06-22 17:46:53 +0000	[diff] [blame^]	883	def : Pat<(add GR32:$src1, (X86Wrapper mcsym:$src2)),
				884	(ADD32ri GR32:$src1, mcsym:$src2)>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	885	def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
				886	(ADD32ri GR32:$src1, tblockaddress:$src2)>;
				887
				888	def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
				889	(MOV32mi addr:$dst, tglobaladdr:$src)>;
				890	def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
				891	(MOV32mi addr:$dst, texternalsym:$src)>;
Rafael Espindola	36b718f	2015-06-22 17:46:53 +0000	[diff] [blame^]	892	def : Pat<(store (i32 (X86Wrapper mcsym:$src)), addr:$dst),
				893	(MOV32mi addr:$dst, mcsym:$src)>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	894	def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
				895	(MOV32mi addr:$dst, tblockaddress:$src)>;
				896
				897	// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
				898	// code model mode, should use 'movabs'. FIXME: This is really a hack, the
				899	// 'movabs' predicate should handle this sort of thing.
				900	def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
				901	(MOV64ri tconstpool :$dst)>, Requires<[FarData]>;
				902	def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
				903	(MOV64ri tjumptable :$dst)>, Requires<[FarData]>;
				904	def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
				905	(MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
				906	def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
				907	(MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
Rafael Espindola	36b718f	2015-06-22 17:46:53 +0000	[diff] [blame^]	908	def : Pat<(i64 (X86Wrapper mcsym:$dst)),
				909	(MOV64ri mcsym:$dst)>, Requires<[FarData]>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	910	def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
				911	(MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
				912
				913	// In kernel code model, we can get the address of a label
				914	// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of
				915	// the MOV64ri32 should accept these.
				916	def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
				917	(MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
				918	def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
				919	(MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
				920	def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
				921	(MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
				922	def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
				923	(MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
Rafael Espindola	36b718f	2015-06-22 17:46:53 +0000	[diff] [blame^]	924	def : Pat<(i64 (X86Wrapper mcsym:$dst)),
				925	(MOV64ri32 mcsym:$dst)>, Requires<[KernelCode]>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	926	def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
				927	(MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
				928
				929	// If we have small model and -static mode, it is safe to store global addresses
				930	// directly as immediates. FIXME: This is really a hack, the 'imm' predicate
				931	// for MOV64mi32 should handle this sort of thing.
				932	def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
				933	(MOV64mi32 addr:$dst, tconstpool:$src)>,
				934	Requires<[NearData, IsStatic]>;
				935	def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
				936	(MOV64mi32 addr:$dst, tjumptable:$src)>,
				937	Requires<[NearData, IsStatic]>;
				938	def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
				939	(MOV64mi32 addr:$dst, tglobaladdr:$src)>,
				940	Requires<[NearData, IsStatic]>;
				941	def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
				942	(MOV64mi32 addr:$dst, texternalsym:$src)>,
				943	Requires<[NearData, IsStatic]>;
Rafael Espindola	36b718f	2015-06-22 17:46:53 +0000	[diff] [blame^]	944	def : Pat<(store (i64 (X86Wrapper mcsym:$src)), addr:$dst),
				945	(MOV64mi32 addr:$dst, mcsym:$src)>,
				946	Requires<[NearData, IsStatic]>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	947	def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
				948	(MOV64mi32 addr:$dst, tblockaddress:$src)>,
				949	Requires<[NearData, IsStatic]>;
				950
Rafael Espindola	36b718f	2015-06-22 17:46:53 +0000	[diff] [blame^]	951	def : Pat<(i32 (X86RecoverFrameAlloc mcsym:$dst)), (MOV32ri mcsym:$dst)>;
				952	def : Pat<(i64 (X86RecoverFrameAlloc mcsym:$dst)), (MOV64ri mcsym:$dst)>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	953
				954	// Calls
				955
				956	// tls has some funny stuff here...
				957	// This corresponds to movabs $foo@tpoff, %rax
				958	def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
				959	(MOV64ri32 tglobaltlsaddr :$dst)>;
				960	// This corresponds to add $foo@tpoff, %rax
				961	def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
				962	(ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
				963
				964
				965	// Direct PC relative function call for small code model. 32-bit displacement
				966	// sign extended to 64-bit.
				967	def : Pat<(X86call (i64 tglobaladdr:$dst)),
				968	(CALL64pcrel32 tglobaladdr:$dst)>;
				969	def : Pat<(X86call (i64 texternalsym:$dst)),
				970	(CALL64pcrel32 texternalsym:$dst)>;
				971
				972	// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they
				973	// can never use callee-saved registers. That is the purpose of the GR64_TC
				974	// register classes.
				975	//
				976	// The only volatile register that is never used by the calling convention is
				977	// %r11. This happens when calling a vararg function with 6 arguments.
				978	//
				979	// Match an X86tcret that uses less than 7 volatile registers.
				980	def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
				981	(X86tcret node:$ptr, node:$off), [{
				982	// X86tcret args: (*chain, ptr, imm, regs..., glue)
				983	unsigned NumRegs = 0;
				984	for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i)
				985	if (isa<RegisterSDNode>(N->getOperand(i)) && ++NumRegs > 6)
				986	return false;
				987	return true;
				988	}]>;
				989
				990	def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
				991	(TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
				992	Requires<[Not64BitMode]>;
				993
				994	// FIXME: This is disabled for 32-bit PIC mode because the global base
				995	// register which is part of the address mode may be assigned a
				996	// callee-saved register.
				997	def : Pat<(X86tcret (load addr:$dst), imm:$off),
				998	(TCRETURNmi addr:$dst, imm:$off)>,
				999	Requires<[Not64BitMode, IsNotPIC]>;
				1000
				1001	def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
				1002	(TCRETURNdi tglobaladdr:$dst, imm:$off)>,
				1003	Requires<[NotLP64]>;
				1004
				1005	def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
				1006	(TCRETURNdi texternalsym:$dst, imm:$off)>,
				1007	Requires<[NotLP64]>;
				1008
				1009	def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
				1010	(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
				1011	Requires<[In64BitMode]>;
				1012
				1013	// Don't fold loads into X86tcret requiring more than 6 regs.
				1014	// There wouldn't be enough scratch registers for base+index.
				1015	def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
				1016	(TCRETURNmi64 addr:$dst, imm:$off)>,
				1017	Requires<[In64BitMode]>;
				1018
				1019	def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
				1020	(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
				1021	Requires<[IsLP64]>;
				1022
				1023	def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
				1024	(TCRETURNdi64 texternalsym:$dst, imm:$off)>,
				1025	Requires<[IsLP64]>;
				1026
				1027	// Normal calls, with various flavors of addresses.
				1028	def : Pat<(X86call (i32 tglobaladdr:$dst)),
				1029	(CALLpcrel32 tglobaladdr:$dst)>;
				1030	def : Pat<(X86call (i32 texternalsym:$dst)),
				1031	(CALLpcrel32 texternalsym:$dst)>;
				1032	def : Pat<(X86call (i32 imm:$dst)),
				1033	(CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
				1034
				1035	// Comparisons.
				1036
				1037	// TEST R,R is smaller than CMP R,0
				1038	def : Pat<(X86cmp GR8:$src1, 0),
				1039	(TEST8rr GR8:$src1, GR8:$src1)>;
				1040	def : Pat<(X86cmp GR16:$src1, 0),
				1041	(TEST16rr GR16:$src1, GR16:$src1)>;
				1042	def : Pat<(X86cmp GR32:$src1, 0),
				1043	(TEST32rr GR32:$src1, GR32:$src1)>;
				1044	def : Pat<(X86cmp GR64:$src1, 0),
				1045	(TEST64rr GR64:$src1, GR64:$src1)>;
				1046
				1047	// Conditional moves with folded loads with operands swapped and conditions
				1048	// inverted.
				1049	multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
				1050	Instruction Inst64> {
				1051	let Predicates = [HasCMov] in {
				1052	def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
				1053	(Inst16 GR16:$src2, addr:$src1)>;
				1054	def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
				1055	(Inst32 GR32:$src2, addr:$src1)>;
				1056	def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
				1057	(Inst64 GR64:$src2, addr:$src1)>;
				1058	}
				1059	}
				1060
				1061	defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
				1062	defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;
				1063	defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;
				1064	defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;
				1065	defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;
				1066	defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;
				1067	defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;
				1068	defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;
				1069	defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;
				1070	defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;
				1071	defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;
				1072	defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;
				1073	defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;
				1074	defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;
				1075	defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
				1076	defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
				1077
				1078	// zextload bool -> zextload byte
Elena Demikhovsky	f61727d	2015-05-20 14:32:03 +0000	[diff] [blame]	1079	def : Pat<(zextloadi8i1 addr:$src), (AND8ri (MOV8rm addr:$src), (i8 1))>;
				1080	def : Pat<(zextloadi16i1 addr:$src), (AND16ri (MOVZX16rm8 addr:$src), (i16 1))>;
				1081	def : Pat<(zextloadi32i1 addr:$src), (AND32ri (MOVZX32rm8 addr:$src), (i32 1))>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1082	def : Pat<(zextloadi64i1 addr:$src),
Elena Demikhovsky	f61727d	2015-05-20 14:32:03 +0000	[diff] [blame]	1083	(SUBREG_TO_REG (i64 0),
				1084	(AND32ri (MOVZX32rm8 addr:$src), (i32 1)), sub_32bit)>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1085
				1086	// extload bool -> extload byte
				1087	// When extloading from 16-bit and smaller memory locations into 64-bit
				1088	// registers, use zero-extending loads so that the entire 64-bit register is
				1089	// defined, avoiding partial-register updates.
				1090
				1091	def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
				1092	def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
				1093	def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
				1094	def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
				1095	def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
				1096	def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
				1097
				1098	// For other extloads, use subregs, since the high contents of the register are
				1099	// defined after an extload.
				1100	def : Pat<(extloadi64i1 addr:$src),
				1101	(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
				1102	def : Pat<(extloadi64i8 addr:$src),
				1103	(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
				1104	def : Pat<(extloadi64i16 addr:$src),
				1105	(SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
				1106	def : Pat<(extloadi64i32 addr:$src),
				1107	(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
				1108
				1109	// anyext. Define these to do an explicit zero-extend to
				1110	// avoid partial-register updates.
				1111	def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG
				1112	(MOVZX32rr8 GR8 :$src), sub_16bit)>;
				1113	def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
				1114
				1115	// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
				1116	def : Pat<(i32 (anyext GR16:$src)),
				1117	(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
				1118
				1119	def : Pat<(i64 (anyext GR8 :$src)),
				1120	(SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8 :$src), sub_32bit)>;
				1121	def : Pat<(i64 (anyext GR16:$src)),
				1122	(SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>;
				1123	def : Pat<(i64 (anyext GR32:$src)),
				1124	(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
				1125
				1126
				1127	// Any instruction that defines a 32-bit result leaves the high half of the
				1128	// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
				1129	// be copying from a truncate. And x86's cmov doesn't do anything if the
				1130	// condition is false. But any other 32-bit operation will zero-extend
				1131	// up to 64 bits.
				1132	def def32 : PatLeaf<(i32 GR32:$src), [{
				1133	return N->getOpcode() != ISD::TRUNCATE &&
				1134	N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
				1135	N->getOpcode() != ISD::CopyFromReg &&
				1136	N->getOpcode() != ISD::AssertSext &&
				1137	N->getOpcode() != X86ISD::CMOV;
				1138	}]>;
				1139
				1140	// In the case of a 32-bit def that is known to implicitly zero-extend,
				1141	// we can use a SUBREG_TO_REG.
				1142	def : Pat<(i64 (zext def32:$src)),
				1143	(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
				1144
				1145	//===----------------------------------------------------------------------===//
				1146	// Pattern match OR as ADD
				1147	//===----------------------------------------------------------------------===//
				1148
				1149	// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be
				1150	// 3-addressified into an LEA instruction to avoid copies. However, we also
				1151	// want to finally emit these instructions as an or at the end of the code
				1152	// generator to make the generated code easier to read. To do this, we select
				1153	// into "disjoint bits" pseudo ops.
				1154
				1155	// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
				1156	def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
				1157	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
				1158	return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
				1159
				1160	APInt KnownZero0, KnownOne0;
				1161	CurDAG->computeKnownBits(N->getOperand(0), KnownZero0, KnownOne0, 0);
				1162	APInt KnownZero1, KnownOne1;
				1163	CurDAG->computeKnownBits(N->getOperand(1), KnownZero1, KnownOne1, 0);
				1164	return (~KnownZero0 & ~KnownZero1) == 0;
				1165	}]>;
				1166
				1167
				1168	// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
				1169	// Try this before the selecting to OR.
				1170	let AddedComplexity = 5, SchedRW = [WriteALU] in {
				1171
				1172	let isConvertibleToThreeAddress = 1,
				1173	Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
				1174	let isCommutable = 1 in {
				1175	def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
				1176	"", // orw/addw REG, REG
				1177	[(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
				1178	def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
				1179	"", // orl/addl REG, REG
				1180	[(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;
				1181	def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
				1182	"", // orq/addq REG, REG
				1183	[(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
				1184	} // isCommutable
				1185
				1186	// NOTE: These are order specific, we want the ri8 forms to be listed
				1187	// first so that they are slightly preferred to the ri forms.
				1188
				1189	def ADD16ri8_DB : I<0, Pseudo,
				1190	(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
				1191	"", // orw/addw REG, imm8
				1192	[(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;
				1193	def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
				1194	"", // orw/addw REG, imm
				1195	[(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;
				1196
				1197	def ADD32ri8_DB : I<0, Pseudo,
				1198	(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
				1199	"", // orl/addl REG, imm8
				1200	[(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;
				1201	def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
				1202	"", // orl/addl REG, imm
				1203	[(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;
				1204
				1205
				1206	def ADD64ri8_DB : I<0, Pseudo,
				1207	(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
				1208	"", // orq/addq REG, imm8
				1209	[(set GR64:$dst, (or_is_add GR64:$src1,
				1210	i64immSExt8:$src2))]>;
				1211	def ADD64ri32_DB : I<0, Pseudo,
				1212	(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
				1213	"", // orq/addq REG, imm
				1214	[(set GR64:$dst, (or_is_add GR64:$src1,
				1215	i64immSExt32:$src2))]>;
				1216	}
				1217	} // AddedComplexity, SchedRW
				1218
				1219
				1220	//===----------------------------------------------------------------------===//
				1221	// Some peepholes
				1222	//===----------------------------------------------------------------------===//
				1223
				1224	// Odd encoding trick: -128 fits into an 8-bit immediate field while
				1225	// +128 doesn't, so in this special case use a sub instead of an add.
				1226	def : Pat<(add GR16:$src1, 128),
				1227	(SUB16ri8 GR16:$src1, -128)>;
				1228	def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
				1229	(SUB16mi8 addr:$dst, -128)>;
				1230
				1231	def : Pat<(add GR32:$src1, 128),
				1232	(SUB32ri8 GR32:$src1, -128)>;
				1233	def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
				1234	(SUB32mi8 addr:$dst, -128)>;
				1235
				1236	def : Pat<(add GR64:$src1, 128),
				1237	(SUB64ri8 GR64:$src1, -128)>;
				1238	def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
				1239	(SUB64mi8 addr:$dst, -128)>;
				1240
				1241	// The same trick applies for 32-bit immediate fields in 64-bit
				1242	// instructions.
				1243	def : Pat<(add GR64:$src1, 0x0000000080000000),
				1244	(SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
				1245	def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
				1246	(SUB64mi32 addr:$dst, 0xffffffff80000000)>;
				1247
				1248	// To avoid needing to materialize an immediate in a register, use a 32-bit and
				1249	// with implicit zero-extension instead of a 64-bit and if the immediate has at
				1250	// least 32 bits of leading zeros. If in addition the last 32 bits can be
				1251	// represented with a sign extension of a 8 bit constant, use that.
Craig Topper	3d44178	2015-04-04 02:31:43 +0000	[diff] [blame]	1252	// This can also reduce instruction size by eliminating the need for the REX
				1253	// prefix.
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1254
Craig Topper	7ea899a	2015-04-04 04:22:12 +0000	[diff] [blame]	1255	// AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32.
				1256	let AddedComplexity = 1 in {
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1257	def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
				1258	(SUBREG_TO_REG
				1259	(i64 0),
				1260	(AND32ri8
				1261	(EXTRACT_SUBREG GR64:$src, sub_32bit),
				1262	(i32 (GetLo8XForm imm:$imm))),
				1263	sub_32bit)>;
				1264
				1265	def : Pat<(and GR64:$src, i64immZExt32:$imm),
				1266	(SUBREG_TO_REG
				1267	(i64 0),
				1268	(AND32ri
				1269	(EXTRACT_SUBREG GR64:$src, sub_32bit),
				1270	(i32 (GetLo32XForm imm:$imm))),
				1271	sub_32bit)>;
Craig Topper	7ea899a	2015-04-04 04:22:12 +0000	[diff] [blame]	1272	} // AddedComplexity = 1
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1273
				1274
Craig Topper	7ea899a	2015-04-04 04:22:12 +0000	[diff] [blame]	1275	// AddedComplexity is needed due to the increased complexity on the
				1276	// i64immZExt32SExt8 and i64immZExt32 patterns above. Applying this to all
				1277	// the MOVZX patterns keeps thems together in DAGIsel tables.
				1278	let AddedComplexity = 1 in {
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1279	// r & (2^16-1) ==> movz
				1280	def : Pat<(and GR32:$src1, 0xffff),
				1281	(MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
				1282	// r & (2^8-1) ==> movz
				1283	def : Pat<(and GR32:$src1, 0xff),
				1284	(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
				1285	GR32_ABCD)),
				1286	sub_8bit))>,
				1287	Requires<[Not64BitMode]>;
				1288	// r & (2^8-1) ==> movz
				1289	def : Pat<(and GR16:$src1, 0xff),
				1290	(EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG
				1291	(i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)),
				1292	sub_16bit)>,
				1293	Requires<[Not64BitMode]>;
				1294
				1295	// r & (2^32-1) ==> movz
				1296	def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
				1297	(SUBREG_TO_REG (i64 0),
				1298	(MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
				1299	sub_32bit)>;
				1300	// r & (2^16-1) ==> movz
Craig Topper	901202873	2015-04-04 02:08:20 +0000	[diff] [blame]	1301	let AddedComplexity = 1 in // Give priority over i64immZExt32.
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1302	def : Pat<(and GR64:$src, 0xffff),
				1303	(SUBREG_TO_REG (i64 0),
				1304	(MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
				1305	sub_32bit)>;
				1306	// r & (2^8-1) ==> movz
				1307	def : Pat<(and GR64:$src, 0xff),
				1308	(SUBREG_TO_REG (i64 0),
				1309	(MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))),
				1310	sub_32bit)>;
				1311	// r & (2^8-1) ==> movz
				1312	def : Pat<(and GR32:$src1, 0xff),
				1313	(MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
				1314	Requires<[In64BitMode]>;
				1315	// r & (2^8-1) ==> movz
				1316	def : Pat<(and GR16:$src1, 0xff),
				1317	(EXTRACT_SUBREG (MOVZX32rr8 (i8
				1318	(EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,
				1319	Requires<[In64BitMode]>;
Craig Topper	7ea899a	2015-04-04 04:22:12 +0000	[diff] [blame]	1320	} // AddedComplexity = 1
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1321
				1322
				1323	// sext_inreg patterns
				1324	def : Pat<(sext_inreg GR32:$src, i16),
				1325	(MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
				1326	def : Pat<(sext_inreg GR32:$src, i8),
				1327	(MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
				1328	GR32_ABCD)),
				1329	sub_8bit))>,
				1330	Requires<[Not64BitMode]>;
				1331
				1332	def : Pat<(sext_inreg GR16:$src, i8),
				1333	(EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG
				1334	(i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))),
				1335	sub_16bit)>,
				1336	Requires<[Not64BitMode]>;
				1337
				1338	def : Pat<(sext_inreg GR64:$src, i32),
				1339	(MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
				1340	def : Pat<(sext_inreg GR64:$src, i16),
				1341	(MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
				1342	def : Pat<(sext_inreg GR64:$src, i8),
				1343	(MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
				1344	def : Pat<(sext_inreg GR32:$src, i8),
				1345	(MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
				1346	Requires<[In64BitMode]>;
				1347	def : Pat<(sext_inreg GR16:$src, i8),
				1348	(EXTRACT_SUBREG (MOVSX32rr8
				1349	(EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>,
				1350	Requires<[In64BitMode]>;
				1351
				1352	// sext, sext_load, zext, zext_load
				1353	def: Pat<(i16 (sext GR8:$src)),
				1354	(EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>;
				1355	def: Pat<(sextloadi16i8 addr:$src),
				1356	(EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>;
				1357	def: Pat<(i16 (zext GR8:$src)),
				1358	(EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>;
				1359	def: Pat<(zextloadi16i8 addr:$src),
				1360	(EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
				1361
				1362	// trunc patterns
				1363	def : Pat<(i16 (trunc GR32:$src)),
				1364	(EXTRACT_SUBREG GR32:$src, sub_16bit)>;
				1365	def : Pat<(i8 (trunc GR32:$src)),
				1366	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
				1367	sub_8bit)>,
				1368	Requires<[Not64BitMode]>;
				1369	def : Pat<(i8 (trunc GR16:$src)),
				1370	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1371	sub_8bit)>,
				1372	Requires<[Not64BitMode]>;
				1373	def : Pat<(i32 (trunc GR64:$src)),
				1374	(EXTRACT_SUBREG GR64:$src, sub_32bit)>;
				1375	def : Pat<(i16 (trunc GR64:$src)),
				1376	(EXTRACT_SUBREG GR64:$src, sub_16bit)>;
				1377	def : Pat<(i8 (trunc GR64:$src)),
				1378	(EXTRACT_SUBREG GR64:$src, sub_8bit)>;
				1379	def : Pat<(i8 (trunc GR32:$src)),
				1380	(EXTRACT_SUBREG GR32:$src, sub_8bit)>,
				1381	Requires<[In64BitMode]>;
				1382	def : Pat<(i8 (trunc GR16:$src)),
				1383	(EXTRACT_SUBREG GR16:$src, sub_8bit)>,
				1384	Requires<[In64BitMode]>;
				1385
				1386	// h-register tricks
				1387	def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
				1388	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1389	sub_8bit_hi)>,
				1390	Requires<[Not64BitMode]>;
				1391	def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
				1392	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
				1393	sub_8bit_hi)>,
				1394	Requires<[Not64BitMode]>;
				1395	def : Pat<(srl GR16:$src, (i8 8)),
				1396	(EXTRACT_SUBREG
				1397	(MOVZX32rr8
				1398	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1399	sub_8bit_hi)),
				1400	sub_16bit)>,
				1401	Requires<[Not64BitMode]>;
				1402	def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
				1403	(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
				1404	GR16_ABCD)),
				1405	sub_8bit_hi))>,
				1406	Requires<[Not64BitMode]>;
				1407	def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
				1408	(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
				1409	GR16_ABCD)),
				1410	sub_8bit_hi))>,
				1411	Requires<[Not64BitMode]>;
				1412	def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
				1413	(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
				1414	GR32_ABCD)),
				1415	sub_8bit_hi))>,
				1416	Requires<[Not64BitMode]>;
				1417	def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
				1418	(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
				1419	GR32_ABCD)),
				1420	sub_8bit_hi))>,
				1421	Requires<[Not64BitMode]>;
				1422
				1423	// h-register tricks.
				1424	// For now, be conservative on x86-64 and use an h-register extract only if the
				1425	// value is immediately zero-extended or stored, which are somewhat common
				1426	// cases. This uses a bunch of code to prevent a register requiring a REX prefix
				1427	// from being allocated in the same instruction as the h register, as there's
				1428	// currently no way to describe this requirement to the register allocator.
				1429
				1430	// h-register extract and zero-extend.
				1431	def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
				1432	(SUBREG_TO_REG
				1433	(i64 0),
				1434	(MOVZX32_NOREXrr8
				1435	(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
				1436	sub_8bit_hi)),
				1437	sub_32bit)>;
				1438	def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
				1439	(MOVZX32_NOREXrr8
				1440	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
				1441	sub_8bit_hi))>,
				1442	Requires<[In64BitMode]>;
				1443	def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
				1444	(MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
				1445	GR32_ABCD)),
				1446	sub_8bit_hi))>,
				1447	Requires<[In64BitMode]>;
				1448	def : Pat<(srl GR16:$src, (i8 8)),
				1449	(EXTRACT_SUBREG
				1450	(MOVZX32_NOREXrr8
				1451	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1452	sub_8bit_hi)),
				1453	sub_16bit)>,
				1454	Requires<[In64BitMode]>;
				1455	def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
				1456	(MOVZX32_NOREXrr8
				1457	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1458	sub_8bit_hi))>,
				1459	Requires<[In64BitMode]>;
				1460	def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
				1461	(MOVZX32_NOREXrr8
				1462	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1463	sub_8bit_hi))>,
				1464	Requires<[In64BitMode]>;
				1465	def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
				1466	(SUBREG_TO_REG
				1467	(i64 0),
				1468	(MOVZX32_NOREXrr8
				1469	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1470	sub_8bit_hi)),
				1471	sub_32bit)>;
				1472	def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
				1473	(SUBREG_TO_REG
				1474	(i64 0),
				1475	(MOVZX32_NOREXrr8
				1476	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1477	sub_8bit_hi)),
				1478	sub_32bit)>;
				1479
				1480	// h-register extract and store.
				1481	def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
				1482	(MOV8mr_NOREX
				1483	addr:$dst,
				1484	(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
				1485	sub_8bit_hi))>;
				1486	def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
				1487	(MOV8mr_NOREX
				1488	addr:$dst,
				1489	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
				1490	sub_8bit_hi))>,
				1491	Requires<[In64BitMode]>;
				1492	def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
				1493	(MOV8mr_NOREX
				1494	addr:$dst,
				1495	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1496	sub_8bit_hi))>,
				1497	Requires<[In64BitMode]>;
				1498
				1499
				1500	// (shl x, 1) ==> (add x, x)
				1501	// Note that if x is undef (immediate or otherwise), we could theoretically
				1502	// end up with the two uses of x getting different values, producing a result
				1503	// where the least significant bit is not 0. However, the probability of this
				1504	// happening is considered low enough that this is officially not a
				1505	// "real problem".
				1506	def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
				1507	def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
				1508	def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
				1509	def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
				1510
				1511	// Helper imms that check if a mask doesn't change significant shift bits.
Benjamin Kramer	5f6a907	2015-02-12 15:35:40 +0000	[diff] [blame]	1512	def immShift32 : ImmLeaf<i8, [{
				1513	return countTrailingOnes<uint64_t>(Imm) >= 5;
				1514	}]>;
				1515	def immShift64 : ImmLeaf<i8, [{
				1516	return countTrailingOnes<uint64_t>(Imm) >= 6;
				1517	}]>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1518
				1519	// Shift amount is implicitly masked.
				1520	multiclass MaskedShiftAmountPats<SDNode frag, string name> {
				1521	// (shift x (and y, 31)) ==> (shift x, y)
				1522	def : Pat<(frag GR8:$src1, (and CL, immShift32)),
				1523	(!cast<Instruction>(name # "8rCL") GR8:$src1)>;
				1524	def : Pat<(frag GR16:$src1, (and CL, immShift32)),
				1525	(!cast<Instruction>(name # "16rCL") GR16:$src1)>;
				1526	def : Pat<(frag GR32:$src1, (and CL, immShift32)),
				1527	(!cast<Instruction>(name # "32rCL") GR32:$src1)>;
				1528	def : Pat<(store (frag (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
				1529	(!cast<Instruction>(name # "8mCL") addr:$dst)>;
				1530	def : Pat<(store (frag (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
				1531	(!cast<Instruction>(name # "16mCL") addr:$dst)>;
				1532	def : Pat<(store (frag (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
				1533	(!cast<Instruction>(name # "32mCL") addr:$dst)>;
				1534
				1535	// (shift x (and y, 63)) ==> (shift x, y)
				1536	def : Pat<(frag GR64:$src1, (and CL, immShift64)),
				1537	(!cast<Instruction>(name # "64rCL") GR64:$src1)>;
				1538	def : Pat<(store (frag (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
				1539	(!cast<Instruction>(name # "64mCL") addr:$dst)>;
				1540	}
				1541
				1542	defm : MaskedShiftAmountPats<shl, "SHL">;
				1543	defm : MaskedShiftAmountPats<srl, "SHR">;
				1544	defm : MaskedShiftAmountPats<sra, "SAR">;
				1545	defm : MaskedShiftAmountPats<rotl, "ROL">;
				1546	defm : MaskedShiftAmountPats<rotr, "ROR">;
				1547
				1548	// (anyext (setcc_carry)) -> (setcc_carry)
				1549	def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				1550	(SETB_C16r)>;
				1551	def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				1552	(SETB_C32r)>;
				1553	def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
				1554	(SETB_C32r)>;
				1555
				1556
				1557
				1558
				1559	//===----------------------------------------------------------------------===//
				1560	// EFLAGS-defining Patterns
				1561	//===----------------------------------------------------------------------===//
				1562
				1563	// add reg, reg
				1564	def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
				1565	def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
				1566	def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
				1567
				1568	// add reg, mem
				1569	def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
				1570	(ADD8rm GR8:$src1, addr:$src2)>;
				1571	def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
				1572	(ADD16rm GR16:$src1, addr:$src2)>;
				1573	def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
				1574	(ADD32rm GR32:$src1, addr:$src2)>;
				1575
				1576	// add reg, imm
				1577	def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
				1578	def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
				1579	def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
				1580	def : Pat<(add GR16:$src1, i16immSExt8:$src2),
				1581	(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1582	def : Pat<(add GR32:$src1, i32immSExt8:$src2),
				1583	(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1584
				1585	// sub reg, reg
				1586	def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
				1587	def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
				1588	def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
				1589
				1590	// sub reg, mem
				1591	def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
				1592	(SUB8rm GR8:$src1, addr:$src2)>;
				1593	def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
				1594	(SUB16rm GR16:$src1, addr:$src2)>;
				1595	def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
				1596	(SUB32rm GR32:$src1, addr:$src2)>;
				1597
				1598	// sub reg, imm
				1599	def : Pat<(sub GR8:$src1, imm:$src2),
				1600	(SUB8ri GR8:$src1, imm:$src2)>;
				1601	def : Pat<(sub GR16:$src1, imm:$src2),
				1602	(SUB16ri GR16:$src1, imm:$src2)>;
				1603	def : Pat<(sub GR32:$src1, imm:$src2),
				1604	(SUB32ri GR32:$src1, imm:$src2)>;
				1605	def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
				1606	(SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1607	def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
				1608	(SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1609
				1610	// sub 0, reg
				1611	def : Pat<(X86sub_flag 0, GR8 :$src), (NEG8r GR8 :$src)>;
				1612	def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>;
				1613	def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>;
				1614	def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>;
				1615
				1616	// mul reg, reg
				1617	def : Pat<(mul GR16:$src1, GR16:$src2),
				1618	(IMUL16rr GR16:$src1, GR16:$src2)>;
				1619	def : Pat<(mul GR32:$src1, GR32:$src2),
				1620	(IMUL32rr GR32:$src1, GR32:$src2)>;
				1621
				1622	// mul reg, mem
				1623	def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
				1624	(IMUL16rm GR16:$src1, addr:$src2)>;
				1625	def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
				1626	(IMUL32rm GR32:$src1, addr:$src2)>;
				1627
				1628	// mul reg, imm
				1629	def : Pat<(mul GR16:$src1, imm:$src2),
				1630	(IMUL16rri GR16:$src1, imm:$src2)>;
				1631	def : Pat<(mul GR32:$src1, imm:$src2),
				1632	(IMUL32rri GR32:$src1, imm:$src2)>;
				1633	def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
				1634	(IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
				1635	def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
				1636	(IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
				1637
				1638	// reg = mul mem, imm
				1639	def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
				1640	(IMUL16rmi addr:$src1, imm:$src2)>;
				1641	def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
				1642	(IMUL32rmi addr:$src1, imm:$src2)>;
				1643	def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
				1644	(IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
				1645	def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
				1646	(IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
				1647
				1648	// Patterns for nodes that do not produce flags, for instructions that do.
				1649
				1650	// addition
				1651	def : Pat<(add GR64:$src1, GR64:$src2),
				1652	(ADD64rr GR64:$src1, GR64:$src2)>;
				1653	def : Pat<(add GR64:$src1, i64immSExt8:$src2),
				1654	(ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1655	def : Pat<(add GR64:$src1, i64immSExt32:$src2),
				1656	(ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1657	def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
				1658	(ADD64rm GR64:$src1, addr:$src2)>;
				1659
				1660	// subtraction
				1661	def : Pat<(sub GR64:$src1, GR64:$src2),
				1662	(SUB64rr GR64:$src1, GR64:$src2)>;
				1663	def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
				1664	(SUB64rm GR64:$src1, addr:$src2)>;
				1665	def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
				1666	(SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1667	def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
				1668	(SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1669
				1670	// Multiply
				1671	def : Pat<(mul GR64:$src1, GR64:$src2),
				1672	(IMUL64rr GR64:$src1, GR64:$src2)>;
				1673	def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
				1674	(IMUL64rm GR64:$src1, addr:$src2)>;
				1675	def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
				1676	(IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
				1677	def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
				1678	(IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
				1679	def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
				1680	(IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
				1681	def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
				1682	(IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
				1683
				1684	// Increment/Decrement reg.
				1685	// Do not make INC/DEC if it is slow
				1686	let Predicates = [NotSlowIncDec] in {
				1687	def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>;
				1688	def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>;
				1689	def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>;
				1690	def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
				1691	def : Pat<(add GR8:$src, -1), (DEC8r GR8:$src)>;
				1692	def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>;
				1693	def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>;
				1694	def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
				1695	}
				1696
				1697	// or reg/reg.
				1698	def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
				1699	def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
				1700	def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
				1701	def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>;
				1702
				1703	// or reg/mem
				1704	def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
				1705	(OR8rm GR8:$src1, addr:$src2)>;
				1706	def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
				1707	(OR16rm GR16:$src1, addr:$src2)>;
				1708	def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
				1709	(OR32rm GR32:$src1, addr:$src2)>;
				1710	def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
				1711	(OR64rm GR64:$src1, addr:$src2)>;
				1712
				1713	// or reg/imm
				1714	def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
				1715	def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
				1716	def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
				1717	def : Pat<(or GR16:$src1, i16immSExt8:$src2),
				1718	(OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1719	def : Pat<(or GR32:$src1, i32immSExt8:$src2),
				1720	(OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1721	def : Pat<(or GR64:$src1, i64immSExt8:$src2),
				1722	(OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1723	def : Pat<(or GR64:$src1, i64immSExt32:$src2),
				1724	(OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1725
				1726	// xor reg/reg
				1727	def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
				1728	def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
				1729	def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
				1730	def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>;
				1731
				1732	// xor reg/mem
				1733	def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
				1734	(XOR8rm GR8:$src1, addr:$src2)>;
				1735	def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
				1736	(XOR16rm GR16:$src1, addr:$src2)>;
				1737	def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
				1738	(XOR32rm GR32:$src1, addr:$src2)>;
				1739	def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
				1740	(XOR64rm GR64:$src1, addr:$src2)>;
				1741
				1742	// xor reg/imm
				1743	def : Pat<(xor GR8:$src1, imm:$src2),
				1744	(XOR8ri GR8:$src1, imm:$src2)>;
				1745	def : Pat<(xor GR16:$src1, imm:$src2),
				1746	(XOR16ri GR16:$src1, imm:$src2)>;
				1747	def : Pat<(xor GR32:$src1, imm:$src2),
				1748	(XOR32ri GR32:$src1, imm:$src2)>;
				1749	def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
				1750	(XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1751	def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
				1752	(XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1753	def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
				1754	(XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1755	def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
				1756	(XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1757
				1758	// and reg/reg
				1759	def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
				1760	def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
				1761	def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
				1762	def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>;
				1763
				1764	// and reg/mem
				1765	def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
				1766	(AND8rm GR8:$src1, addr:$src2)>;
				1767	def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
				1768	(AND16rm GR16:$src1, addr:$src2)>;
				1769	def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
				1770	(AND32rm GR32:$src1, addr:$src2)>;
				1771	def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
				1772	(AND64rm GR64:$src1, addr:$src2)>;
				1773
				1774	// and reg/imm
				1775	def : Pat<(and GR8:$src1, imm:$src2),
				1776	(AND8ri GR8:$src1, imm:$src2)>;
				1777	def : Pat<(and GR16:$src1, imm:$src2),
				1778	(AND16ri GR16:$src1, imm:$src2)>;
				1779	def : Pat<(and GR32:$src1, imm:$src2),
				1780	(AND32ri GR32:$src1, imm:$src2)>;
				1781	def : Pat<(and GR16:$src1, i16immSExt8:$src2),
				1782	(AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1783	def : Pat<(and GR32:$src1, i32immSExt8:$src2),
				1784	(AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1785	def : Pat<(and GR64:$src1, i64immSExt8:$src2),
				1786	(AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1787	def : Pat<(and GR64:$src1, i64immSExt32:$src2),
				1788	(AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1789
				1790	// Bit scan instruction patterns to match explicit zero-undef behavior.
				1791	def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>;
				1792	def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>;
				1793	def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr GR64:$src)>;
				1794	def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm addr:$src)>;
				1795	def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm addr:$src)>;
				1796	def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>;
				1797
				1798	// When HasMOVBE is enabled it is possible to get a non-legalized
				1799	// register-register 16 bit bswap. This maps it to a ROL instruction.
				1800	let Predicates = [HasMOVBE] in {
				1801	def : Pat<(bswap GR16:$src), (ROL16ri GR16:$src, (i8 8))>;
				1802	}