blob: 3046a69add4ea12f5bad97bbe1bdf1907539ced0 [file] [log] [blame]
Chris Lattner87be16a2010-10-05 06:04:14 +00001//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the various pseudo instructions used by the compiler,
11// as well as Pat patterns used during instruction selection.
12//
13//===----------------------------------------------------------------------===//
14
Chris Lattner8af88ef2010-10-05 06:10:16 +000015// PIC base construction. This expands to code that looks like this:
16// call $next_inst
17// popl %destreg"
18let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
19 def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
20 "", []>;
21
22
23// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
24// a stack adjustment and the codegen must know that they may modify the stack
25// pointer before prolog-epilog rewriting occurs.
26// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
27// sub / add which can clobber EFLAGS.
28let Defs = [ESP, EFLAGS], Uses = [ESP] in {
29def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
30 "#ADJCALLSTACKDOWN",
31 [(X86callseq_start timm:$amt)]>,
32 Requires<[In32BitMode]>;
33def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
34 "#ADJCALLSTACKUP",
35 [(X86callseq_end timm:$amt1, timm:$amt2)]>,
36 Requires<[In32BitMode]>;
37}
38
39// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
40// a stack adjustment and the codegen must know that they may modify the stack
41// pointer before prolog-epilog rewriting occurs.
42// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
43// sub / add which can clobber EFLAGS.
44let Defs = [RSP, EFLAGS], Uses = [RSP] in {
45def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
46 "#ADJCALLSTACKDOWN",
47 [(X86callseq_start timm:$amt)]>,
48 Requires<[In64BitMode]>;
49def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
50 "#ADJCALLSTACKUP",
51 [(X86callseq_end timm:$amt1, timm:$amt2)]>,
52 Requires<[In64BitMode]>;
53}
54
55
56
57// x86-64 va_start lowering magic.
58let usesCustomInserter = 1 in {
59def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
60 (outs),
61 (ins GR8:$al,
62 i64imm:$regsavefi, i64imm:$offset,
63 variable_ops),
64 "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
65 [(X86vastart_save_xmm_regs GR8:$al,
66 imm:$regsavefi,
67 imm:$offset)]>;
68
69// Dynamic stack allocation yields _alloca call for Cygwin/Mingw targets. Calls
70// to _alloca is needed to probe the stack when allocating more than 4k bytes in
71// one go. Touching the stack at 4K increments is necessary to ensure that the
72// guard pages used by the OS virtual memory manager are allocated in correct
73// sequence.
74// The main point of having separate instruction are extra unmodelled effects
75// (compared to ordinary calls) like stack pointer change.
76
77let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
78 def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins),
79 "# dynamic stack allocation",
80 [(X86MingwAlloca)]>;
81}
82
83
Chris Lattner87be16a2010-10-05 06:04:14 +000084
85//===----------------------------------------------------------------------===//
86// EH Pseudo Instructions
87//
88let isTerminator = 1, isReturn = 1, isBarrier = 1,
89 hasCtrlDep = 1, isCodeGenOnly = 1 in {
90def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
91 "ret\t#eh_return, addr: $addr",
92 [(X86ehret GR32:$addr)]>;
93
94}
95
96let isTerminator = 1, isReturn = 1, isBarrier = 1,
97 hasCtrlDep = 1, isCodeGenOnly = 1 in {
98def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
99 "ret\t#eh_return, addr: $addr",
100 [(X86ehret GR64:$addr)]>;
101
102}
103
Chris Lattner8af88ef2010-10-05 06:10:16 +0000104//===----------------------------------------------------------------------===//
105// Alias Instructions
106//===----------------------------------------------------------------------===//
107
108// Alias instructions that map movr0 to xor.
109// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
110// FIXME: Set encoding to pseudo.
111let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
112 isCodeGenOnly = 1 in {
113def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
114 [(set GR8:$dst, 0)]>;
115
116// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
117// encoding and avoids a partial-register update sometimes, but doing so
118// at isel time interferes with rematerialization in the current register
119// allocator. For now, this is rewritten when the instruction is lowered
120// to an MCInst.
121def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
122 "",
123 [(set GR16:$dst, 0)]>, OpSize;
124
125// FIXME: Set encoding to pseudo.
126def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
127 [(set GR32:$dst, 0)]>;
128}
129
130//===----------------------------------------------------------------------===//
131// Thread Local Storage Instructions
132//
133
134// ELF TLS Support
135// All calls clobber the non-callee saved registers. ESP is marked as
136// a use to prevent stack-pointer assignments that appear immediately
137// before calls from potentially appearing dead.
138let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
139 MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
140 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
141 XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
142 Uses = [ESP] in
143def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
144 "leal\t$sym, %eax; "
145 "call\t___tls_get_addr@PLT",
146 [(X86tlsaddr tls32addr:$sym)]>,
147 Requires<[In32BitMode]>;
148
149// All calls clobber the non-callee saved registers. RSP is marked as
150// a use to prevent stack-pointer assignments that appear immediately
151// before calls from potentially appearing dead.
152let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
153 FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
154 MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
155 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
156 XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
157 Uses = [RSP] in
158def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
159 ".byte\t0x66; "
160 "leaq\t$sym(%rip), %rdi; "
161 ".word\t0x6666; "
162 "rex64; "
163 "call\t__tls_get_addr@PLT",
164 [(X86tlsaddr tls64addr:$sym)]>,
165 Requires<[In64BitMode]>;
166
167// Darwin TLS Support
168// For i386, the address of the thunk is passed on the stack, on return the
169// address of the variable is in %eax. %ecx is trashed during the function
170// call. All other registers are preserved.
171let Defs = [EAX, ECX],
172 Uses = [ESP],
173 usesCustomInserter = 1 in
174def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
175 "# TLSCall_32",
176 [(X86TLSCall addr:$sym)]>,
177 Requires<[In32BitMode]>;
178
179// For x86_64, the address of the thunk is passed in %rdi, on return
180// the address of the variable is in %rax. All other registers are preserved.
181let Defs = [RAX],
182 Uses = [RDI],
183 usesCustomInserter = 1 in
184def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
185 "# TLSCall_64",
186 [(X86TLSCall addr:$sym)]>,
187 Requires<[In64BitMode]>;
Chris Lattner87be16a2010-10-05 06:04:14 +0000188
189//===----------------------------------------------------------------------===//
190// Non-Instruction Patterns
191//===----------------------------------------------------------------------===//
192
193// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
194def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
195def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
196def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
197def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
198def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
199def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
200
201def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
202 (ADD32ri GR32:$src1, tconstpool:$src2)>;
203def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
204 (ADD32ri GR32:$src1, tjumptable:$src2)>;
205def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
206 (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
207def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
208 (ADD32ri GR32:$src1, texternalsym:$src2)>;
209def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
210 (ADD32ri GR32:$src1, tblockaddress:$src2)>;
211
212def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
213 (MOV32mi addr:$dst, tglobaladdr:$src)>;
214def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
215 (MOV32mi addr:$dst, texternalsym:$src)>;
216def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
217 (MOV32mi addr:$dst, tblockaddress:$src)>;
218
219
220
221// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
222// code model mode, should use 'movabs'. FIXME: This is really a hack, the
223// 'movabs' predicate should handle this sort of thing.
224def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
225 (MOV64ri tconstpool :$dst)>, Requires<[FarData]>;
226def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
227 (MOV64ri tjumptable :$dst)>, Requires<[FarData]>;
228def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
229 (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
230def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
231 (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
232def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
233 (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
234
235// In static codegen with small code model, we can get the address of a label
236// into a register with 'movl'. FIXME: This is a hack, the 'imm' predicate of
237// the MOV64ri64i32 should accept these.
238def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
239 (MOV64ri64i32 tconstpool :$dst)>, Requires<[SmallCode]>;
240def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
241 (MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>;
242def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
243 (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
244def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
245 (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
246def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
247 (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>;
248
249// In kernel code model, we can get the address of a label
250// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of
251// the MOV64ri32 should accept these.
252def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
253 (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
254def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
255 (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
256def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
257 (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
258def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
259 (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
260def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
261 (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
262
263// If we have small model and -static mode, it is safe to store global addresses
264// directly as immediates. FIXME: This is really a hack, the 'imm' predicate
265// for MOV64mi32 should handle this sort of thing.
266def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
267 (MOV64mi32 addr:$dst, tconstpool:$src)>,
268 Requires<[NearData, IsStatic]>;
269def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
270 (MOV64mi32 addr:$dst, tjumptable:$src)>,
271 Requires<[NearData, IsStatic]>;
272def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
273 (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
274 Requires<[NearData, IsStatic]>;
275def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
276 (MOV64mi32 addr:$dst, texternalsym:$src)>,
277 Requires<[NearData, IsStatic]>;
278def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
279 (MOV64mi32 addr:$dst, tblockaddress:$src)>,
280 Requires<[NearData, IsStatic]>;
281
282
283
284// Calls
285
286// tls has some funny stuff here...
287// This corresponds to movabs $foo@tpoff, %rax
288def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
289 (MOV64ri tglobaltlsaddr :$dst)>;
290// This corresponds to add $foo@tpoff, %rax
291def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
292 (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
293// This corresponds to mov foo@tpoff(%rbx), %eax
294def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
295 (MOV64rm tglobaltlsaddr :$dst)>;
296
297
298// Direct PC relative function call for small code model. 32-bit displacement
299// sign extended to 64-bit.
300def : Pat<(X86call (i64 tglobaladdr:$dst)),
301 (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
302def : Pat<(X86call (i64 texternalsym:$dst)),
303 (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
304
305def : Pat<(X86call (i64 tglobaladdr:$dst)),
306 (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
307def : Pat<(X86call (i64 texternalsym:$dst)),
308 (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
309
310// tailcall stuff
311def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
312 (TCRETURNri GR32_TC:$dst, imm:$off)>,
313 Requires<[In32BitMode]>;
314
315// FIXME: This is disabled for 32-bit PIC mode because the global base
316// register which is part of the address mode may be assigned a
317// callee-saved register.
318def : Pat<(X86tcret (load addr:$dst), imm:$off),
319 (TCRETURNmi addr:$dst, imm:$off)>,
320 Requires<[In32BitMode, IsNotPIC]>;
321
322def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
323 (TCRETURNdi texternalsym:$dst, imm:$off)>,
324 Requires<[In32BitMode]>;
325
326def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
327 (TCRETURNdi texternalsym:$dst, imm:$off)>,
328 Requires<[In32BitMode]>;
329
330def : Pat<(X86tcret GR64_TC:$dst, imm:$off),
331 (TCRETURNri64 GR64_TC:$dst, imm:$off)>,
332 Requires<[In64BitMode]>;
333
334def : Pat<(X86tcret (load addr:$dst), imm:$off),
335 (TCRETURNmi64 addr:$dst, imm:$off)>,
336 Requires<[In64BitMode]>;
337
338def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
339 (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
340 Requires<[In64BitMode]>;
341
342def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
343 (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
344 Requires<[In64BitMode]>;
345
346// Normal calls, with various flavors of addresses.
347def : Pat<(X86call (i32 tglobaladdr:$dst)),
348 (CALLpcrel32 tglobaladdr:$dst)>;
349def : Pat<(X86call (i32 texternalsym:$dst)),
350 (CALLpcrel32 texternalsym:$dst)>;
351def : Pat<(X86call (i32 imm:$dst)),
352 (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
353
354// X86 specific add which produces a flag.
355def : Pat<(addc GR32:$src1, GR32:$src2),
356 (ADD32rr GR32:$src1, GR32:$src2)>;
357def : Pat<(addc GR32:$src1, (load addr:$src2)),
358 (ADD32rm GR32:$src1, addr:$src2)>;
359def : Pat<(addc GR32:$src1, imm:$src2),
360 (ADD32ri GR32:$src1, imm:$src2)>;
361def : Pat<(addc GR32:$src1, i32immSExt8:$src2),
362 (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
363
364def : Pat<(addc GR64:$src1, GR64:$src2),
365 (ADD64rr GR64:$src1, GR64:$src2)>;
366def : Pat<(addc GR64:$src1, (load addr:$src2)),
367 (ADD64rm GR64:$src1, addr:$src2)>;
368def : Pat<(addc GR64:$src1, i64immSExt8:$src2),
369 (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
370def : Pat<(addc GR64:$src1, i64immSExt32:$src2),
371 (ADD64ri32 GR64:$src1, imm:$src2)>;
372
373def : Pat<(subc GR32:$src1, GR32:$src2),
374 (SUB32rr GR32:$src1, GR32:$src2)>;
375def : Pat<(subc GR32:$src1, (load addr:$src2)),
376 (SUB32rm GR32:$src1, addr:$src2)>;
377def : Pat<(subc GR32:$src1, imm:$src2),
378 (SUB32ri GR32:$src1, imm:$src2)>;
379def : Pat<(subc GR32:$src1, i32immSExt8:$src2),
380 (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
381
382def : Pat<(subc GR64:$src1, GR64:$src2),
383 (SUB64rr GR64:$src1, GR64:$src2)>;
384def : Pat<(subc GR64:$src1, (load addr:$src2)),
385 (SUB64rm GR64:$src1, addr:$src2)>;
386def : Pat<(subc GR64:$src1, i64immSExt8:$src2),
387 (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
388def : Pat<(subc GR64:$src1, imm:$src2),
389 (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
390
391// Comparisons.
392
393// TEST R,R is smaller than CMP R,0
394def : Pat<(X86cmp GR8:$src1, 0),
395 (TEST8rr GR8:$src1, GR8:$src1)>;
396def : Pat<(X86cmp GR16:$src1, 0),
397 (TEST16rr GR16:$src1, GR16:$src1)>;
398def : Pat<(X86cmp GR32:$src1, 0),
399 (TEST32rr GR32:$src1, GR32:$src1)>;
400def : Pat<(X86cmp GR64:$src1, 0),
401 (TEST64rr GR64:$src1, GR64:$src1)>;
402
403// Conditional moves with folded loads with operands swapped and conditions
404// inverted.
405def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_B, EFLAGS),
406 (CMOVAE16rm GR16:$src2, addr:$src1)>;
407def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_B, EFLAGS),
408 (CMOVAE32rm GR32:$src2, addr:$src1)>;
409def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_AE, EFLAGS),
410 (CMOVB16rm GR16:$src2, addr:$src1)>;
411def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_AE, EFLAGS),
412 (CMOVB32rm GR32:$src2, addr:$src1)>;
413def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_E, EFLAGS),
414 (CMOVNE16rm GR16:$src2, addr:$src1)>;
415def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_E, EFLAGS),
416 (CMOVNE32rm GR32:$src2, addr:$src1)>;
417def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NE, EFLAGS),
418 (CMOVE16rm GR16:$src2, addr:$src1)>;
419def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NE, EFLAGS),
420 (CMOVE32rm GR32:$src2, addr:$src1)>;
421def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_BE, EFLAGS),
422 (CMOVA16rm GR16:$src2, addr:$src1)>;
423def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_BE, EFLAGS),
424 (CMOVA32rm GR32:$src2, addr:$src1)>;
425def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_A, EFLAGS),
426 (CMOVBE16rm GR16:$src2, addr:$src1)>;
427def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_A, EFLAGS),
428 (CMOVBE32rm GR32:$src2, addr:$src1)>;
429def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_L, EFLAGS),
430 (CMOVGE16rm GR16:$src2, addr:$src1)>;
431def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_L, EFLAGS),
432 (CMOVGE32rm GR32:$src2, addr:$src1)>;
433def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_GE, EFLAGS),
434 (CMOVL16rm GR16:$src2, addr:$src1)>;
435def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_GE, EFLAGS),
436 (CMOVL32rm GR32:$src2, addr:$src1)>;
437def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_LE, EFLAGS),
438 (CMOVG16rm GR16:$src2, addr:$src1)>;
439def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_LE, EFLAGS),
440 (CMOVG32rm GR32:$src2, addr:$src1)>;
441def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_G, EFLAGS),
442 (CMOVLE16rm GR16:$src2, addr:$src1)>;
443def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_G, EFLAGS),
444 (CMOVLE32rm GR32:$src2, addr:$src1)>;
445def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_P, EFLAGS),
446 (CMOVNP16rm GR16:$src2, addr:$src1)>;
447def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_P, EFLAGS),
448 (CMOVNP32rm GR32:$src2, addr:$src1)>;
449def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NP, EFLAGS),
450 (CMOVP16rm GR16:$src2, addr:$src1)>;
451def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NP, EFLAGS),
452 (CMOVP32rm GR32:$src2, addr:$src1)>;
453def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_S, EFLAGS),
454 (CMOVNS16rm GR16:$src2, addr:$src1)>;
455def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_S, EFLAGS),
456 (CMOVNS32rm GR32:$src2, addr:$src1)>;
457def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NS, EFLAGS),
458 (CMOVS16rm GR16:$src2, addr:$src1)>;
459def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NS, EFLAGS),
460 (CMOVS32rm GR32:$src2, addr:$src1)>;
461def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_O, EFLAGS),
462 (CMOVNO16rm GR16:$src2, addr:$src1)>;
463def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_O, EFLAGS),
464 (CMOVNO32rm GR32:$src2, addr:$src1)>;
465def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NO, EFLAGS),
466 (CMOVO16rm GR16:$src2, addr:$src1)>;
467def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NO, EFLAGS),
468 (CMOVO32rm GR32:$src2, addr:$src1)>;
469
470def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_B, EFLAGS),
471 (CMOVAE64rm GR64:$src2, addr:$src1)>;
472def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_AE, EFLAGS),
473 (CMOVB64rm GR64:$src2, addr:$src1)>;
474def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_E, EFLAGS),
475 (CMOVNE64rm GR64:$src2, addr:$src1)>;
476def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NE, EFLAGS),
477 (CMOVE64rm GR64:$src2, addr:$src1)>;
478def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_BE, EFLAGS),
479 (CMOVA64rm GR64:$src2, addr:$src1)>;
480def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_A, EFLAGS),
481 (CMOVBE64rm GR64:$src2, addr:$src1)>;
482def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_L, EFLAGS),
483 (CMOVGE64rm GR64:$src2, addr:$src1)>;
484def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_GE, EFLAGS),
485 (CMOVL64rm GR64:$src2, addr:$src1)>;
486def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_LE, EFLAGS),
487 (CMOVG64rm GR64:$src2, addr:$src1)>;
488def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_G, EFLAGS),
489 (CMOVLE64rm GR64:$src2, addr:$src1)>;
490def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_P, EFLAGS),
491 (CMOVNP64rm GR64:$src2, addr:$src1)>;
492def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NP, EFLAGS),
493 (CMOVP64rm GR64:$src2, addr:$src1)>;
494def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_S, EFLAGS),
495 (CMOVNS64rm GR64:$src2, addr:$src1)>;
496def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NS, EFLAGS),
497 (CMOVS64rm GR64:$src2, addr:$src1)>;
498def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_O, EFLAGS),
499 (CMOVNO64rm GR64:$src2, addr:$src1)>;
500def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NO, EFLAGS),
501 (CMOVO64rm GR64:$src2, addr:$src1)>;
502
503
504// zextload bool -> zextload byte
505def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
506def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
507def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
508def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
509
510// extload bool -> extload byte
511// When extloading from 16-bit and smaller memory locations into 64-bit
512// registers, use zero-extending loads so that the entire 64-bit register is
513// defined, avoiding partial-register updates.
514
515def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
516def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
517def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
518def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
519def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
520def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
521
522def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
523def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>;
524def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
525// For other extloads, use subregs, since the high contents of the register are
526// defined after an extload.
527def : Pat<(extloadi64i32 addr:$src),
528 (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
529 sub_32bit)>;
530
531// anyext. Define these to do an explicit zero-extend to
532// avoid partial-register updates.
533def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>;
534def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
535
536// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
537def : Pat<(i32 (anyext GR16:$src)),
538 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
539
540def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>;
541def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
542def : Pat<(i64 (anyext GR32:$src)),
543 (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
544
545//===----------------------------------------------------------------------===//
546// Some peepholes
547//===----------------------------------------------------------------------===//
548
549// Odd encoding trick: -128 fits into an 8-bit immediate field while
550// +128 doesn't, so in this special case use a sub instead of an add.
551def : Pat<(add GR16:$src1, 128),
552 (SUB16ri8 GR16:$src1, -128)>;
553def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
554 (SUB16mi8 addr:$dst, -128)>;
555
556def : Pat<(add GR32:$src1, 128),
557 (SUB32ri8 GR32:$src1, -128)>;
558def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
559 (SUB32mi8 addr:$dst, -128)>;
560
561def : Pat<(add GR64:$src1, 128),
562 (SUB64ri8 GR64:$src1, -128)>;
563def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
564 (SUB64mi8 addr:$dst, -128)>;
565
566// The same trick applies for 32-bit immediate fields in 64-bit
567// instructions.
568def : Pat<(add GR64:$src1, 0x0000000080000000),
569 (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
570def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
571 (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
572
573// Use a 32-bit and with implicit zero-extension instead of a 64-bit and if it
574// has an immediate with at least 32 bits of leading zeros, to avoid needing to
575// materialize that immediate in a register first.
576def : Pat<(and GR64:$src, i64immZExt32:$imm),
577 (SUBREG_TO_REG
578 (i64 0),
579 (AND32ri
580 (EXTRACT_SUBREG GR64:$src, sub_32bit),
581 (i32 (GetLo32XForm imm:$imm))),
582 sub_32bit)>;
583
584
585// r & (2^16-1) ==> movz
586def : Pat<(and GR32:$src1, 0xffff),
587 (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
588// r & (2^8-1) ==> movz
589def : Pat<(and GR32:$src1, 0xff),
590 (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
591 GR32_ABCD)),
592 sub_8bit))>,
593 Requires<[In32BitMode]>;
594// r & (2^8-1) ==> movz
595def : Pat<(and GR16:$src1, 0xff),
596 (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1,
597 GR16_ABCD)),
598 sub_8bit))>,
599 Requires<[In32BitMode]>;
600
601// r & (2^32-1) ==> movz
602def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
603 (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
604// r & (2^16-1) ==> movz
605def : Pat<(and GR64:$src, 0xffff),
606 (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
607// r & (2^8-1) ==> movz
608def : Pat<(and GR64:$src, 0xff),
609 (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
610// r & (2^8-1) ==> movz
611def : Pat<(and GR32:$src1, 0xff),
612 (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
613 Requires<[In64BitMode]>;
614// r & (2^8-1) ==> movz
615def : Pat<(and GR16:$src1, 0xff),
616 (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>,
617 Requires<[In64BitMode]>;
618
619
620// sext_inreg patterns
621def : Pat<(sext_inreg GR32:$src, i16),
622 (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
623def : Pat<(sext_inreg GR32:$src, i8),
624 (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
625 GR32_ABCD)),
626 sub_8bit))>,
627 Requires<[In32BitMode]>;
628def : Pat<(sext_inreg GR16:$src, i8),
629 (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
630 GR16_ABCD)),
631 sub_8bit))>,
632 Requires<[In32BitMode]>;
633
634def : Pat<(sext_inreg GR64:$src, i32),
635 (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
636def : Pat<(sext_inreg GR64:$src, i16),
637 (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
638def : Pat<(sext_inreg GR64:$src, i8),
639 (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
640def : Pat<(sext_inreg GR32:$src, i8),
641 (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
642 Requires<[In64BitMode]>;
643def : Pat<(sext_inreg GR16:$src, i8),
644 (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>,
645 Requires<[In64BitMode]>;
646
647
648// trunc patterns
649def : Pat<(i16 (trunc GR32:$src)),
650 (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
651def : Pat<(i8 (trunc GR32:$src)),
652 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
653 sub_8bit)>,
654 Requires<[In32BitMode]>;
655def : Pat<(i8 (trunc GR16:$src)),
656 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
657 sub_8bit)>,
658 Requires<[In32BitMode]>;
659def : Pat<(i32 (trunc GR64:$src)),
660 (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
661def : Pat<(i16 (trunc GR64:$src)),
662 (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
663def : Pat<(i8 (trunc GR64:$src)),
664 (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
665def : Pat<(i8 (trunc GR32:$src)),
666 (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
667 Requires<[In64BitMode]>;
668def : Pat<(i8 (trunc GR16:$src)),
669 (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
670 Requires<[In64BitMode]>;
671
672// h-register tricks
673def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
674 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
675 sub_8bit_hi)>,
676 Requires<[In32BitMode]>;
677def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
678 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
679 sub_8bit_hi)>,
680 Requires<[In32BitMode]>;
681def : Pat<(srl GR16:$src, (i8 8)),
682 (EXTRACT_SUBREG
683 (MOVZX32rr8
684 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
685 sub_8bit_hi)),
686 sub_16bit)>,
687 Requires<[In32BitMode]>;
688def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
689 (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
690 GR16_ABCD)),
691 sub_8bit_hi))>,
692 Requires<[In32BitMode]>;
693def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
694 (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
695 GR16_ABCD)),
696 sub_8bit_hi))>,
697 Requires<[In32BitMode]>;
698def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
699 (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
700 GR32_ABCD)),
701 sub_8bit_hi))>,
702 Requires<[In32BitMode]>;
703def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
704 (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
705 GR32_ABCD)),
706 sub_8bit_hi))>,
707 Requires<[In32BitMode]>;
708
709// h-register tricks.
710// For now, be conservative on x86-64 and use an h-register extract only if the
711// value is immediately zero-extended or stored, which are somewhat common
712// cases. This uses a bunch of code to prevent a register requiring a REX prefix
713// from being allocated in the same instruction as the h register, as there's
714// currently no way to describe this requirement to the register allocator.
715
716// h-register extract and zero-extend.
717def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
718 (SUBREG_TO_REG
719 (i64 0),
720 (MOVZX32_NOREXrr8
721 (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
722 sub_8bit_hi)),
723 sub_32bit)>;
724def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
725 (MOVZX32_NOREXrr8
726 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
727 sub_8bit_hi))>,
728 Requires<[In64BitMode]>;
729def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
730 (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
731 GR32_ABCD)),
732 sub_8bit_hi))>,
733 Requires<[In64BitMode]>;
734def : Pat<(srl GR16:$src, (i8 8)),
735 (EXTRACT_SUBREG
736 (MOVZX32_NOREXrr8
737 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
738 sub_8bit_hi)),
739 sub_16bit)>,
740 Requires<[In64BitMode]>;
741def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
742 (MOVZX32_NOREXrr8
743 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
744 sub_8bit_hi))>,
745 Requires<[In64BitMode]>;
746def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
747 (MOVZX32_NOREXrr8
748 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
749 sub_8bit_hi))>,
750 Requires<[In64BitMode]>;
751def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
752 (SUBREG_TO_REG
753 (i64 0),
754 (MOVZX32_NOREXrr8
755 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
756 sub_8bit_hi)),
757 sub_32bit)>;
758def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
759 (SUBREG_TO_REG
760 (i64 0),
761 (MOVZX32_NOREXrr8
762 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
763 sub_8bit_hi)),
764 sub_32bit)>;
765
766// h-register extract and store.
767def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
768 (MOV8mr_NOREX
769 addr:$dst,
770 (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
771 sub_8bit_hi))>;
772def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
773 (MOV8mr_NOREX
774 addr:$dst,
775 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
776 sub_8bit_hi))>,
777 Requires<[In64BitMode]>;
778def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
779 (MOV8mr_NOREX
780 addr:$dst,
781 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
782 sub_8bit_hi))>,
783 Requires<[In64BitMode]>;
784
785
786// (shl x, 1) ==> (add x, x)
787def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
788def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
789def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
790def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
791
792// (shl x (and y, 31)) ==> (shl x, y)
793def : Pat<(shl GR8:$src1, (and CL, 31)),
794 (SHL8rCL GR8:$src1)>;
795def : Pat<(shl GR16:$src1, (and CL, 31)),
796 (SHL16rCL GR16:$src1)>;
797def : Pat<(shl GR32:$src1, (and CL, 31)),
798 (SHL32rCL GR32:$src1)>;
799def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
800 (SHL8mCL addr:$dst)>;
801def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
802 (SHL16mCL addr:$dst)>;
803def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
804 (SHL32mCL addr:$dst)>;
805
806def : Pat<(srl GR8:$src1, (and CL, 31)),
807 (SHR8rCL GR8:$src1)>;
808def : Pat<(srl GR16:$src1, (and CL, 31)),
809 (SHR16rCL GR16:$src1)>;
810def : Pat<(srl GR32:$src1, (and CL, 31)),
811 (SHR32rCL GR32:$src1)>;
812def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
813 (SHR8mCL addr:$dst)>;
814def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
815 (SHR16mCL addr:$dst)>;
816def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
817 (SHR32mCL addr:$dst)>;
818
819def : Pat<(sra GR8:$src1, (and CL, 31)),
820 (SAR8rCL GR8:$src1)>;
821def : Pat<(sra GR16:$src1, (and CL, 31)),
822 (SAR16rCL GR16:$src1)>;
823def : Pat<(sra GR32:$src1, (and CL, 31)),
824 (SAR32rCL GR32:$src1)>;
825def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
826 (SAR8mCL addr:$dst)>;
827def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
828 (SAR16mCL addr:$dst)>;
829def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
830 (SAR32mCL addr:$dst)>;
831
832// (shl x (and y, 63)) ==> (shl x, y)
833def : Pat<(shl GR64:$src1, (and CL, 63)),
834 (SHL64rCL GR64:$src1)>;
835def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
836 (SHL64mCL addr:$dst)>;
837
838def : Pat<(srl GR64:$src1, (and CL, 63)),
839 (SHR64rCL GR64:$src1)>;
840def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
841 (SHR64mCL addr:$dst)>;
842
843def : Pat<(sra GR64:$src1, (and CL, 63)),
844 (SAR64rCL GR64:$src1)>;
845def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
846 (SAR64mCL addr:$dst)>;
847
848
849// (anyext (setcc_carry)) -> (setcc_carry)
850def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
851 (SETB_C16r)>;
852def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
853 (SETB_C32r)>;
854def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
855 (SETB_C32r)>;
856
857// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
858let AddedComplexity = 5 in { // Try this before the selecting to OR
859def : Pat<(or_is_add GR16:$src1, imm:$src2),
860 (ADD16ri GR16:$src1, imm:$src2)>;
861def : Pat<(or_is_add GR32:$src1, imm:$src2),
862 (ADD32ri GR32:$src1, imm:$src2)>;
863def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2),
864 (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
865def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2),
866 (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
867def : Pat<(or_is_add GR16:$src1, GR16:$src2),
868 (ADD16rr GR16:$src1, GR16:$src2)>;
869def : Pat<(or_is_add GR32:$src1, GR32:$src2),
870 (ADD32rr GR32:$src1, GR32:$src2)>;
871def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2),
872 (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
873def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2),
874 (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
875def : Pat<(or_is_add GR64:$src1, GR64:$src2),
876 (ADD64rr GR64:$src1, GR64:$src2)>;
877} // AddedComplexity
878
879//===----------------------------------------------------------------------===//
880// EFLAGS-defining Patterns
881//===----------------------------------------------------------------------===//
882
883// add reg, reg
884def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
885def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
886def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
887
888// add reg, mem
889def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
890 (ADD8rm GR8:$src1, addr:$src2)>;
891def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
892 (ADD16rm GR16:$src1, addr:$src2)>;
893def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
894 (ADD32rm GR32:$src1, addr:$src2)>;
895
896// add reg, imm
897def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
898def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
899def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
900def : Pat<(add GR16:$src1, i16immSExt8:$src2),
901 (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
902def : Pat<(add GR32:$src1, i32immSExt8:$src2),
903 (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
904
905// sub reg, reg
906def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
907def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
908def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
909
910// sub reg, mem
911def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
912 (SUB8rm GR8:$src1, addr:$src2)>;
913def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
914 (SUB16rm GR16:$src1, addr:$src2)>;
915def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
916 (SUB32rm GR32:$src1, addr:$src2)>;
917
918// sub reg, imm
919def : Pat<(sub GR8:$src1, imm:$src2),
920 (SUB8ri GR8:$src1, imm:$src2)>;
921def : Pat<(sub GR16:$src1, imm:$src2),
922 (SUB16ri GR16:$src1, imm:$src2)>;
923def : Pat<(sub GR32:$src1, imm:$src2),
924 (SUB32ri GR32:$src1, imm:$src2)>;
925def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
926 (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
927def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
928 (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
929
930// mul reg, reg
931def : Pat<(mul GR16:$src1, GR16:$src2),
932 (IMUL16rr GR16:$src1, GR16:$src2)>;
933def : Pat<(mul GR32:$src1, GR32:$src2),
934 (IMUL32rr GR32:$src1, GR32:$src2)>;
935
936// mul reg, mem
937def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
938 (IMUL16rm GR16:$src1, addr:$src2)>;
939def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
940 (IMUL32rm GR32:$src1, addr:$src2)>;
941
942// mul reg, imm
943def : Pat<(mul GR16:$src1, imm:$src2),
944 (IMUL16rri GR16:$src1, imm:$src2)>;
945def : Pat<(mul GR32:$src1, imm:$src2),
946 (IMUL32rri GR32:$src1, imm:$src2)>;
947def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
948 (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
949def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
950 (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
951
952// reg = mul mem, imm
953def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
954 (IMUL16rmi addr:$src1, imm:$src2)>;
955def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
956 (IMUL32rmi addr:$src1, imm:$src2)>;
957def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
958 (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
959def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
960 (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
961
962// Optimize multiply by 2 with EFLAGS result.
963let AddedComplexity = 2 in {
964def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>;
965def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>;
966}
967
968// Patterns for nodes that do not produce flags, for instructions that do.
969
970// addition
971def : Pat<(add GR64:$src1, GR64:$src2),
972 (ADD64rr GR64:$src1, GR64:$src2)>;
973def : Pat<(add GR64:$src1, i64immSExt8:$src2),
974 (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
975def : Pat<(add GR64:$src1, i64immSExt32:$src2),
976 (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
977def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
978 (ADD64rm GR64:$src1, addr:$src2)>;
979
980// subtraction
981def : Pat<(sub GR64:$src1, GR64:$src2),
982 (SUB64rr GR64:$src1, GR64:$src2)>;
983def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
984 (SUB64rm GR64:$src1, addr:$src2)>;
985def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
986 (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
987def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
988 (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
989
990// Multiply
991def : Pat<(mul GR64:$src1, GR64:$src2),
992 (IMUL64rr GR64:$src1, GR64:$src2)>;
993def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
994 (IMUL64rm GR64:$src1, addr:$src2)>;
995def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
996 (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
997def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
998 (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
999def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
1000 (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
1001def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
1002 (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
1003
1004// Increment reg.
1005def : Pat<(add GR8 :$src, 1), (INC8r GR8 :$src)>;
1006def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>;
1007def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
1008def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>;
1009def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
1010def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
1011
1012// Decrement reg.
1013def : Pat<(add GR8 :$src, -1), (DEC8r GR8 :$src)>;
1014def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
1015def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
1016def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
1017def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
1018def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
1019
1020// or reg/reg.
1021def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
1022def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
1023def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
1024def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>;
1025
1026// or reg/mem
1027def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
1028 (OR8rm GR8:$src1, addr:$src2)>;
1029def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
1030 (OR16rm GR16:$src1, addr:$src2)>;
1031def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
1032 (OR32rm GR32:$src1, addr:$src2)>;
1033def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
1034 (OR64rm GR64:$src1, addr:$src2)>;
1035
1036// or reg/imm
1037def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
1038def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
1039def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
1040def : Pat<(or GR16:$src1, i16immSExt8:$src2),
1041 (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
1042def : Pat<(or GR32:$src1, i32immSExt8:$src2),
1043 (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
1044def : Pat<(or GR64:$src1, i64immSExt8:$src2),
1045 (OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
1046def : Pat<(or GR64:$src1, i64immSExt32:$src2),
1047 (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
1048
1049// xor reg/reg
1050def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
1051def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
1052def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
1053def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>;
1054
1055// xor reg/mem
1056def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
1057 (XOR8rm GR8:$src1, addr:$src2)>;
1058def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
1059 (XOR16rm GR16:$src1, addr:$src2)>;
1060def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
1061 (XOR32rm GR32:$src1, addr:$src2)>;
1062def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
1063 (XOR64rm GR64:$src1, addr:$src2)>;
1064
1065// xor reg/imm
1066def : Pat<(xor GR8:$src1, imm:$src2),
1067 (XOR8ri GR8:$src1, imm:$src2)>;
1068def : Pat<(xor GR16:$src1, imm:$src2),
1069 (XOR16ri GR16:$src1, imm:$src2)>;
1070def : Pat<(xor GR32:$src1, imm:$src2),
1071 (XOR32ri GR32:$src1, imm:$src2)>;
1072def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
1073 (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
1074def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
1075 (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
1076def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
1077 (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
1078def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
1079 (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
1080
1081// and reg/reg
1082def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
1083def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
1084def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
1085def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>;
1086
1087// and reg/mem
1088def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
1089 (AND8rm GR8:$src1, addr:$src2)>;
1090def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
1091 (AND16rm GR16:$src1, addr:$src2)>;
1092def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
1093 (AND32rm GR32:$src1, addr:$src2)>;
1094def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
1095 (AND64rm GR64:$src1, addr:$src2)>;
1096
1097// and reg/imm
1098def : Pat<(and GR8:$src1, imm:$src2),
1099 (AND8ri GR8:$src1, imm:$src2)>;
1100def : Pat<(and GR16:$src1, imm:$src2),
1101 (AND16ri GR16:$src1, imm:$src2)>;
1102def : Pat<(and GR32:$src1, imm:$src2),
1103 (AND32ri GR32:$src1, imm:$src2)>;
1104def : Pat<(and GR16:$src1, i16immSExt8:$src2),
1105 (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
1106def : Pat<(and GR32:$src1, i32immSExt8:$src2),
1107 (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
1108def : Pat<(and GR64:$src1, i64immSExt8:$src2),
1109 (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
1110def : Pat<(and GR64:$src1, i64immSExt32:$src2),
1111 (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
1112
1113
1114