Blame - llvm/lib/Target/X86/X86InstrCompiler.td - toolchain/llvm-project

2015-02-01 16:15:07 +0000

[diff] [blame]

1

//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//

2

//

3

// The LLVM Compiler Infrastructure

4

//

5

// This file is distributed under the University of Illinois Open Source

6

// License. See LICENSE.TXT for details.

7

//

8

//===----------------------------------------------------------------------===//

9

//

10

// This file describes the various pseudo instructions used by the compiler,

11

// as well as Pat patterns used during instruction selection.

12

//

13

//===----------------------------------------------------------------------===//

14

15

//===----------------------------------------------------------------------===//

16

// Pattern Matching Support

17

18

def GetLo32XForm : SDNodeXForm<imm, [{

19

// Transformation function: get the low 32 bits.

Sergey Dmitrouk

842a51b

2015-04-28 14:05:47 +0000

[diff] [blame]

20

return getI32Imm((unsigned)N->getZExtValue(), SDLoc(N));

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

21

}]>;

22

23

def GetLo8XForm : SDNodeXForm<imm, [{

24

// Transformation function: get the low 8 bits.

Sergey Dmitrouk

842a51b

2015-04-28 14:05:47 +0000

[diff] [blame]

25

return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

}]>;

//===----------------------------------------------------------------------===//

30

// Random Pseudo Instructions.

31

32

// PIC base construction. This expands to code that looks like this:

33

// call $next_inst

34

// popl %destreg"

35

let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP] in

36

def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),

"", []>;

// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into

41

// a stack adjustment and the codegen must know that they may modify the stack

42

// pointer before prolog-epilog rewriting occurs.

43

// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become

44

// sub / add which can clobber EFLAGS.

45

let Defs = [ESP, EFLAGS], Uses = [ESP] in {

Michael Kuperstein

2015-02-01 16:56:04 +0000

[diff] [blame]

46

def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

47

"#ADJCALLSTACKDOWN",

Michael Kuperstein

2015-02-01 16:56:04 +0000

[diff] [blame]

48

[]>,

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

49

Requires<[NotLP64]>;

50

def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),

51

"#ADJCALLSTACKUP",

52

[(X86callseq_end timm:$amt1, timm:$amt2)]>,

53

Requires<[NotLP64]>;

54

}

Michael Kuperstein

2015-02-01 16:56:04 +0000

[diff] [blame]

55

def : Pat<(X86callseq_start timm:$amt1),

56

(ADJCALLSTACKDOWN32 i32imm:$amt1, 0)>, Requires<[NotLP64]>;

57

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

58

59

// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into

60

// a stack adjustment and the codegen must know that they may modify the stack

61

// pointer before prolog-epilog rewriting occurs.

62

// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become

63

// sub / add which can clobber EFLAGS.

64

let Defs = [RSP, EFLAGS], Uses = [RSP] in {

Michael Kuperstein

2015-02-01 16:56:04 +0000

[diff] [blame]

65

def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

66

"#ADJCALLSTACKDOWN",

Michael Kuperstein

2015-02-01 16:56:04 +0000

[diff] [blame]

67

[]>,

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

68

Requires<[IsLP64]>;

69

def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),

70

"#ADJCALLSTACKUP",

71

[(X86callseq_end timm:$amt1, timm:$amt2)]>,

72

Requires<[IsLP64]>;

73

}

Michael Kuperstein

2015-02-01 16:56:04 +0000

[diff] [blame]

74

def : Pat<(X86callseq_start timm:$amt1),

75

(ADJCALLSTACKDOWN64 i32imm:$amt1, 0)>, Requires<[IsLP64]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

76

77

78

// x86-64 va_start lowering magic.

79

let usesCustomInserter = 1, Defs = [EFLAGS] in {

80

def VASTART_SAVE_XMM_REGS : I<0, Pseudo,

81

(outs),

82

(ins GR8:$al,

83

i64imm:$regsavefi, i64imm:$offset,

84

variable_ops),

85

"#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",

86

[(X86vastart_save_xmm_regs GR8:$al,

imm:$regsavefi,

imm:$offset),

(implicit EFLAGS)]>;

// The VAARG_64 pseudo-instruction takes the address of the va_list,

92

// and places the address of the next argument into a register.

93

let Defs = [EFLAGS] in

94

def VAARG_64 : I<0, Pseudo,

95

(outs GR64:$dst),

96

(ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),

97

"#VAARG_64 $dst, $ap, $size, $mode, $align",

98

[(set GR64:$dst,

99

(X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),

100

(implicit EFLAGS)]>;

101

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

102

103

// When using segmented stacks these are lowered into instructions which first

104

// check if the current stacklet has enough free memory. If it does, memory is

105

// allocated by bumping the stack pointer. Otherwise memory is allocated from

106

// the heap.

107

108

let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in

109

def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),

110

"# variable sized alloca for segmented stacks",

111

[(set GR32:$dst,

112

(X86SegAlloca GR32:$size))]>,

113

Requires<[NotLP64]>;

114

115

let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in

116

def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),

117

"# variable sized alloca for segmented stacks",

118

[(set GR64:$dst,

119

(X86SegAlloca GR64:$size))]>,

120

Requires<[In64BitMode]>;

121

}

122

Hans Wennborg

8eb336c

2016-05-18 16:10:17 +0000

[diff] [blame]

123

// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows

124

// targets. These calls are needed to probe the stack when allocating more than

125

// 4k bytes in one go. Touching the stack at 4K increments is necessary to

126

// ensure that the guard pages used by the OS virtual memory manager are

127

// allocated in correct sequence.

128

// The main point of having separate instruction are extra unmodelled effects

129

// (compared to ordinary calls) like stack pointer change.

130

131

let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in

132

def WIN_ALLOCA_32 : I<0, Pseudo, (outs), (ins GR32:$size),

133

"# dynamic stack allocation",

134

[(X86WinAlloca GR32:$size)]>,

135

Requires<[NotLP64]>;

136

137

let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in

138

def WIN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size),

139

"# dynamic stack allocation",

140

[(X86WinAlloca GR64:$size)]>,

141

Requires<[In64BitMode]>;

142

143

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

144

//===----------------------------------------------------------------------===//

145

// EH Pseudo Instructions

146

//

147

let SchedRW = [WriteSystem] in {

148

let isTerminator = 1, isReturn = 1, isBarrier = 1,

149

hasCtrlDep = 1, isCodeGenOnly = 1 in {

150

def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),

151

"ret\t#eh_return, addr: $addr",

152

[(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;

}

let isTerminator = 1, isReturn = 1, isBarrier = 1,

157

hasCtrlDep = 1, isCodeGenOnly = 1 in {

158

def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),

159

"ret\t#eh_return, addr: $addr",

160

[(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;

}

Reid Kleckner

2015-11-06 01:49:05 +0000

[diff] [blame]

164

let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,

165

isCodeGenOnly = 1, isReturn = 1 in {

166

def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET", [(cleanupret)]>;

167

David Majnemer

2652b75

2015-11-09 23:07:48 +0000

[diff] [blame]

168

// CATCHRET needs a custom inserter for SEH.

Reid Kleckner

51460c1

2015-11-06 01:49:05 +0000

[diff] [blame]

169

let usesCustomInserter = 1 in

170

def CATCHRET : I<0, Pseudo, (outs), (ins brtarget32:$dst, brtarget32:$from),

171

"# CATCHRET",

172

[(catchret bb:$dst, bb:$from)]>;

Reid Kleckner

0e28823

2015-08-27 23:27:47 +0000

[diff] [blame]

173

}

174

Reid Kleckner

420f054

2015-11-09 23:34:42 +0000

[diff] [blame]

175

let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1,

David Majnemer

2652b75

2015-11-09 23:07:48 +0000

[diff] [blame]

176

usesCustomInserter = 1 in

177

def CATCHPAD : I<0, Pseudo, (outs), (ins), "# CATCHPAD", [(catchpad)]>;

178

Reid Kleckner

51460c1

2015-11-06 01:49:05 +0000

[diff] [blame]

179

// This instruction is responsible for re-establishing stack pointers after an

180

// exception has been caught and we are rejoining normal control flow in the

181

// parent function or funclet. It generally sets ESP and EBP, and optionally

182

// ESI. It is only needed for 32-bit WinEH, as the runtime restores CSRs for us

183

// elsewhere.

Reid Kleckner

420f054

2015-11-09 23:34:42 +0000

[diff] [blame]

184

let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in

Reid Kleckner

51460c1

2015-11-06 01:49:05 +0000

[diff] [blame]

185

def EH_RESTORE : I<0, Pseudo, (outs), (ins), "# EH_RESTORE", []>;

186

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

187

let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,

188

usesCustomInserter = 1 in {

189

def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),

190

"#EH_SJLJ_SETJMP32",

191

[(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,

192

Requires<[Not64BitMode]>;

193

def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf),

194

"#EH_SJLJ_SETJMP64",

195

[(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,

196

Requires<[In64BitMode]>;

197

let isTerminator = 1 in {

198

def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf),

199

"#EH_SJLJ_LONGJMP32",

200

[(X86eh_sjlj_longjmp addr:$buf)]>,

201

Requires<[Not64BitMode]>;

202

def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf),

203

"#EH_SJLJ_LONGJMP64",

204

[(X86eh_sjlj_longjmp addr:$buf)]>,

205

Requires<[In64BitMode]>;

}

}

} // SchedRW

let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {

211

def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),

212

"#EH_SjLj_Setup\t$dst", []>;

213

}

214

215

//===----------------------------------------------------------------------===//

216

// Pseudo instructions used by unwind info.

217

//

218

let isPseudo = 1 in {

219

def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg),

220

"#SEH_PushReg $reg", []>;

221

def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),

222

"#SEH_SaveReg $reg, $dst", []>;

223

def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),

224

"#SEH_SaveXMM $reg, $dst", []>;

225

def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size),

226

"#SEH_StackAlloc $size", []>;

227

def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset),

228

"#SEH_SetFrame $reg, $offset", []>;

229

def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode),

230

"#SEH_PushFrame $mode", []>;

231

def SEH_EndPrologue : I<0, Pseudo, (outs), (ins),

232

"#SEH_EndPrologue", []>;

233

def SEH_Epilogue : I<0, Pseudo, (outs), (ins),

234

"#SEH_Epilogue", []>;

235

}

236

237

//===----------------------------------------------------------------------===//

238

// Pseudo instructions used by segmented stacks.

239

//

240

241

// This is lowered into a RET instruction by MCInstLower. We need

242

// this so that we don't have to have a MachineBasicBlock which ends

243

// with a RET and also has successors.

244

let isPseudo = 1 in {

245

def MORESTACK_RET: I<0, Pseudo, (outs), (ins),

246

"", []>;

247

248

// This instruction is lowered to a RET followed by a MOV. The two

249

// instructions are not generated on a higher level since then the

250

// verifier sees a MachineBasicBlock ending with a non-terminator.

251

def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),

"", []>;

}

//===----------------------------------------------------------------------===//

256

// Alias Instructions

257

//===----------------------------------------------------------------------===//

258

259

// Alias instruction mapping movr0 to xor.

260

// FIXME: remove when we can teach regalloc that xor reg, reg is ok.

261

let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,

Hans Wennborg

4ae5119

2016-03-25 01:10:56 +0000

[diff] [blame]

262

isPseudo = 1, AddedComplexity = 20 in

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

263

def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",

264

[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;

265

266

// Other widths can also make use of the 32-bit xor, which may have a smaller

267

// encoding and avoid partial register updates.

268

def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;

269

def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;

270

def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {

271

let AddedComplexity = 20;

272

}

273

Hans Wennborg

08d5905

2015-12-15 17:10:28 +0000

[diff] [blame]

274

let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],

Hans Wennborg

4ae5119

2016-03-25 01:10:56 +0000

[diff] [blame]

275

AddedComplexity = 15 in {

Hans Wennborg

08d5905

2015-12-15 17:10:28 +0000

[diff] [blame]

276

// Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,

277

// which only require 3 bytes compared to MOV32ri which requires 5.

278

let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {

279

def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",

280

[(set GR32:$dst, 1)]>;

281

def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",

282

[(set GR32:$dst, -1)]>;

283

}

284

285

// MOV16ri is 4 bytes, so the instructions above are smaller.

286

def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>;

287

def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;

288

}

289

Hans Wennborg

4ae5119

2016-03-25 01:10:56 +0000

[diff] [blame]

290

let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in {

291

// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1.

292

// FIXME: Add itinerary class and Schedule.

293

def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "",

294

[(set GR32:$dst, i32immSExt8:$src)]>,

295

Requires<[OptForMinSize, NotWin64WithoutFP]>;

296

def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "",

297

[(set GR64:$dst, i64immSExt8:$src)]>,

298

Requires<[OptForMinSize, NotWin64WithoutFP]>;

299

}

300

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

301

// Materialize i64 constant where top 32-bits are zero. This could theoretically

302

// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however

303

// that would make it more difficult to rematerialize.

Craig Topper

e00bffb

2016-01-05 07:44:14 +0000

[diff] [blame]

304

let isReMaterializable = 1, isAsCheapAsAMove = 1,

305

isPseudo = 1, hasSideEffects = 0 in

306

def MOV32ri64 : I<0, Pseudo, (outs GR32:$dst), (ins i64i32imm:$src), "", []>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

307

308

// This 64-bit pseudo-move can be used for both a 64-bit constant that is

Sanjay Patel

85030aa

2015-10-13 16:23:00 +0000

[diff] [blame]

309

// actually the zero-extension of a 32-bit constant and for labels in the

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

310

// x86-64 small code model.

Sanjay Patel

85030aa

2015-10-13 16:23:00 +0000

[diff] [blame]

311

def mov64imm32 : ComplexPattern<i64, 1, "selectMOV64Imm32", [imm, X86Wrapper]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

312

313

let AddedComplexity = 1 in

314

def : Pat<(i64 mov64imm32:$src),

315

(SUBREG_TO_REG (i64 0), (MOV32ri64 mov64imm32:$src), sub_32bit)>;

316

317

// Use sbb to materialize carry bit.

318

let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {

319

// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.

320

// However, Pat<> can't replicate the destination reg into the inputs of the

321

// result.

322

def SETB_C8r : I<0, Pseudo, (outs GR8:$dst), (ins), "",

323

[(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;

324

def SETB_C16r : I<0, Pseudo, (outs GR16:$dst), (ins), "",

325

[(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;

326

def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "",

327

[(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;

328

def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "",

329

[(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;

} // isCodeGenOnly

def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

334

(SETB_C16r)>;

335

def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

336

(SETB_C32r)>;

337

def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

338

(SETB_C64r)>;

339

340

def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

341

(SETB_C16r)>;

342

def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

343

(SETB_C32r)>;

344

def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

345

(SETB_C64r)>;

346

347

// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and

348

// will be eliminated and that the sbb can be extended up to a wider type. When

349

// this happens, it is great. However, if we are left with an 8-bit sbb and an

350

// and, we might as well just match it as a setb.

351

def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),

352

(SETBr)>;

353

354

// (add OP, SETB) -> (adc OP, 0)

355

def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op),

356

(ADC8ri GR8:$op, 0)>;

357

def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op),

358

(ADC32ri8 GR32:$op, 0)>;

359

def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op),

360

(ADC64ri8 GR64:$op, 0)>;

361

362

// (sub OP, SETB) -> (sbb OP, 0)

363

def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)),

364

(SBB8ri GR8:$op, 0)>;

365

def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)),

366

(SBB32ri8 GR32:$op, 0)>;

367

def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)),

368

(SBB64ri8 GR64:$op, 0)>;

369

370

// (sub OP, SETCC_CARRY) -> (adc OP, 0)

371

def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))),

372

(ADC8ri GR8:$op, 0)>;

373

def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))),

374

(ADC32ri8 GR32:$op, 0)>;

375

def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),

376

(ADC64ri8 GR64:$op, 0)>;

377

378

//===----------------------------------------------------------------------===//

379

// String Pseudo Instructions

380

//

381

let SchedRW = [WriteMicrocoded] in {

382

let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {

383

def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",

384

[(X86rep_movs i8)], IIC_REP_MOVS>, REP,

385

Requires<[Not64BitMode]>;

386

def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",

387

[(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16,

388

Requires<[Not64BitMode]>;

389

def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",

390

[(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32,

391

Requires<[Not64BitMode]>;

392

}

393

394

let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in {

395

def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",

396

[(X86rep_movs i8)], IIC_REP_MOVS>, REP,

397

Requires<[In64BitMode]>;

398

def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",

399

[(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16,

400

Requires<[In64BitMode]>;

401

def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",

402

[(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32,

403

Requires<[In64BitMode]>;

404

def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",

405

[(X86rep_movs i64)], IIC_REP_MOVS>, REP,

406

Requires<[In64BitMode]>;

407

}

408

409

// FIXME: Should use "(X86rep_stos AL)" as the pattern.

410

let Defs = [ECX,EDI], isCodeGenOnly = 1 in {

411

let Uses = [AL,ECX,EDI] in

412

def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",

413

[(X86rep_stos i8)], IIC_REP_STOS>, REP,

414

Requires<[Not64BitMode]>;

415

let Uses = [AX,ECX,EDI] in

416

def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",

417

[(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16,

418

Requires<[Not64BitMode]>;

419

let Uses = [EAX,ECX,EDI] in

420

def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",

421

[(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,

422

Requires<[Not64BitMode]>;

423

}

424

425

let Defs = [RCX,RDI], isCodeGenOnly = 1 in {

426

let Uses = [AL,RCX,RDI] in

427

def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",

428

[(X86rep_stos i8)], IIC_REP_STOS>, REP,

429

Requires<[In64BitMode]>;

430

let Uses = [AX,RCX,RDI] in

431

def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",

432

[(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16,

433

Requires<[In64BitMode]>;

434

let Uses = [RAX,RCX,RDI] in

435

def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",

436

[(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,

437

Requires<[In64BitMode]>;

438

439

let Uses = [RAX,RCX,RDI] in

440

def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",

441

[(X86rep_stos i64)], IIC_REP_STOS>, REP,

442

Requires<[In64BitMode]>;

}

} // SchedRW

//===----------------------------------------------------------------------===//

447

// Thread Local Storage Instructions

//

// ELF TLS Support

// All calls clobber the non-callee saved registers. ESP is marked as

452

// a use to prevent stack-pointer assignments that appear immediately

453

// before calls from potentially appearing dead.

454

let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,

455

ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,

456

MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,

457

XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,

458

XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],

Davide Italiano

228978c

2016-02-20 00:44:47 +0000

[diff] [blame]

459

usesCustomInserter = 1, Uses = [ESP] in {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

460

def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),

461

"# TLS_addr32",

462

[(X86tlsaddr tls32addr:$sym)]>,

463

Requires<[Not64BitMode]>;

464

def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),

465

"# TLS_base_addr32",

466

[(X86tlsbaseaddr tls32baseaddr:$sym)]>,

467

Requires<[Not64BitMode]>;

468

}

469

470

// All calls clobber the non-callee saved registers. RSP is marked as

471

// a use to prevent stack-pointer assignments that appear immediately

472

// before calls from potentially appearing dead.

473

let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,

474

FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,

475

ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,

476

MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,

477

XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,

478

XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],

Davide Italiano

228978c

2016-02-20 00:44:47 +0000

[diff] [blame]

479

usesCustomInserter = 1, Uses = [RSP] in {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

480

def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),

481

"# TLS_addr64",

482

[(X86tlsaddr tls64addr:$sym)]>,

483

Requires<[In64BitMode]>;

484

def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),

485

"# TLS_base_addr64",

486

[(X86tlsbaseaddr tls64baseaddr:$sym)]>,

487

Requires<[In64BitMode]>;

488

}

489

490

// Darwin TLS Support

491

// For i386, the address of the thunk is passed on the stack, on return the

492

// address of the variable is in %eax. %ecx is trashed during the function

493

// call. All other registers are preserved.

494

let Defs = [EAX, ECX, EFLAGS],

495

Uses = [ESP],

496

usesCustomInserter = 1 in

497

def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),

498

"# TLSCall_32",

499

[(X86TLSCall addr:$sym)]>,

500

Requires<[Not64BitMode]>;

501

Quentin Colombet

d6dbec4

2016-04-27 21:37:37 +0000

[diff] [blame]

502

// For x86_64, the address of the thunk is passed in %rdi, but the

503

// pseudo directly use the symbol, so do not add an implicit use of

504

// %rdi. The lowering will do the right thing with RDI.

505

// On return the address of the variable is in %rax. All other

506

// registers are preserved.

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

507

let Defs = [RAX, EFLAGS],

Quentin Colombet

d6dbec4

2016-04-27 21:37:37 +0000

[diff] [blame]

508

Uses = [RSP],

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

509

usesCustomInserter = 1 in

510

def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),

511

"# TLSCall_64",

512

[(X86TLSCall addr:$sym)]>,

513

Requires<[In64BitMode]>;

514

515

516

//===----------------------------------------------------------------------===//

517

// Conditional Move Pseudo Instructions

518

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

519

// CMOV* - Used to implement the SELECT DAG operation. Expanded after

520

// instruction selection into a branch sequence.

521

multiclass CMOVrr_PSEUDO<RegisterClass RC, ValueType VT> {

522

def CMOV#NAME : I<0, Pseudo,

523

(outs RC:$dst), (ins RC:$t, RC:$f, i8imm:$cond),

524

"#CMOV_"#NAME#" PSEUDO!",

525

[(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, imm:$cond,

EFLAGS)))]>;

}

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

529

let usesCustomInserter = 1, Uses = [EFLAGS] in {

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

530

// X86 doesn't have 8-bit conditional moves. Use a customInserter to

531

// emit control flow. An alternative to this is to mark i8 SELECT as Promote,

532

// however that requires promoting the operands, and can induce additional

533

// i8 register pressure.

534

defm _GR8 : CMOVrr_PSEUDO<GR8, i8>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

535

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

536

let Predicates = [NoCMov] in {

537

defm _GR32 : CMOVrr_PSEUDO<GR32, i32>;

538

defm _GR16 : CMOVrr_PSEUDO<GR16, i16>;

539

} // Predicates = [NoCMov]

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

540

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

541

// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no

542

// SSE1/SSE2.

543

let Predicates = [FPStackf32] in

544

defm _RFP32 : CMOVrr_PSEUDO<RFP32, f32>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

545

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

546

let Predicates = [FPStackf64] in

547

defm _RFP64 : CMOVrr_PSEUDO<RFP64, f64>;

548

549

defm _RFP80 : CMOVrr_PSEUDO<RFP80, f80>;

550

551

defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;

552

defm _FR64 : CMOVrr_PSEUDO<FR64, f64>;

Chih-Hung Hsieh

7993e18

2015-12-14 22:08:36 +0000

[diff] [blame]

553

defm _FR128 : CMOVrr_PSEUDO<FR128, f128>;

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

554

defm _V4F32 : CMOVrr_PSEUDO<VR128, v4f32>;

555

defm _V2F64 : CMOVrr_PSEUDO<VR128, v2f64>;

556

defm _V2I64 : CMOVrr_PSEUDO<VR128, v2i64>;

557

defm _V8F32 : CMOVrr_PSEUDO<VR256, v8f32>;

558

defm _V4F64 : CMOVrr_PSEUDO<VR256, v4f64>;

559

defm _V4I64 : CMOVrr_PSEUDO<VR256, v4i64>;

560

defm _V8I64 : CMOVrr_PSEUDO<VR512, v8i64>;

561

defm _V8F64 : CMOVrr_PSEUDO<VR512, v8f64>;

562

defm _V16F32 : CMOVrr_PSEUDO<VR512, v16f32>;

Elena Demikhovsky

c1ac5d7

2015-05-12 09:36:52 +0000

[diff] [blame]

563

defm _V8I1 : CMOVrr_PSEUDO<VK8, v8i1>;

564

defm _V16I1 : CMOVrr_PSEUDO<VK16, v16i1>;

565

defm _V32I1 : CMOVrr_PSEUDO<VK32, v32i1>;

566

defm _V64I1 : CMOVrr_PSEUDO<VK64, v64i1>;

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

567

} // usesCustomInserter = 1, Uses = [EFLAGS]

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

568

569

//===----------------------------------------------------------------------===//

570

// Normal-Instructions-With-Lock-Prefix Pseudo Instructions

571

//===----------------------------------------------------------------------===//

572

573

// FIXME: Use normal instructions and add lock prefix dynamically.

// Memory barriers

// TODO: Get this to fold the constant into the instruction.

578

let isCodeGenOnly = 1, Defs = [EFLAGS] in

579

def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),

Craig Topper

9583f51

2016-01-05 07:44:11 +0000

[diff] [blame]

580

"or{l}\t{$zero, $dst|$dst, $zero}", [],

581

IIC_ALU_MEM>, Requires<[Not64BitMode]>, OpSize32, LOCK,

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

582

Sched<[WriteALULd, WriteRMW]>;

583

584

let hasSideEffects = 1 in

585

def Int_MemBarrier : I<0, Pseudo, (outs), (ins),

586

"#MEMBARRIER",

587

[(X86MemBarrier)]>, Sched<[WriteLoad]>;

588

589

// RegOpc corresponds to the mr version of the instruction

590

// ImmOpc corresponds to the mi version of the instruction

591

// ImmOpc8 corresponds to the mi8 version of the instruction

592

// ImmMod corresponds to the instruction format of the mi and mi8 versions

593

multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

594

Format ImmMod, SDPatternOperator Op, string mnemonic> {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

595

let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,

596

SchedRW = [WriteALULd, WriteRMW] in {

597

598

def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},

599

RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },

600

MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),

601

!strconcat(mnemonic, "{b}\t",

602

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

603

[(set EFLAGS, (Op addr:$dst, GR8:$src2))],

604

IIC_ALU_NONMEM>, LOCK;

605

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

606

def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},

607

RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },

608

MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),

609

!strconcat(mnemonic, "{w}\t",

610

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

611

[(set EFLAGS, (Op addr:$dst, GR16:$src2))],

612

IIC_ALU_NONMEM>, OpSize16, LOCK;

613

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

614

def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},

615

RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },

616

MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),

617

!strconcat(mnemonic, "{l}\t",

618

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

619

[(set EFLAGS, (Op addr:$dst, GR32:$src2))],

620

IIC_ALU_NONMEM>, OpSize32, LOCK;

621

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

622

def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},

623

RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },

624

MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),

625

!strconcat(mnemonic, "{q}\t",

626

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

627

[(set EFLAGS, (Op addr:$dst, GR64:$src2))],

628

IIC_ALU_NONMEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

629

630

def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},

631

ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },

632

ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),

633

!strconcat(mnemonic, "{b}\t",

634

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

635

[(set EFLAGS, (Op addr:$dst, (i8 imm:$src2)))],

636

IIC_ALU_MEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

637

638

def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},

639

ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },

640

ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),

641

!strconcat(mnemonic, "{w}\t",

642

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

643

[(set EFLAGS, (Op addr:$dst, (i16 imm:$src2)))],

644

IIC_ALU_MEM>, OpSize16, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

645

646

def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},

647

ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },

648

ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),

649

!strconcat(mnemonic, "{l}\t",

650

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

651

[(set EFLAGS, (Op addr:$dst, (i32 imm:$src2)))],

652

IIC_ALU_MEM>, OpSize32, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

653

654

def NAME#64mi32 : RIi32S<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},

655

ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },

656

ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),

657

!strconcat(mnemonic, "{q}\t",

658

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

659

[(set EFLAGS, (Op addr:$dst, i64immSExt32:$src2))],

660

IIC_ALU_MEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

661

662

def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},

663

ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },

664

ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),

665

!strconcat(mnemonic, "{w}\t",

666

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

667

[(set EFLAGS, (Op addr:$dst, i16immSExt8:$src2))],

668

IIC_ALU_MEM>, OpSize16, LOCK;

669

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

670

def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},

671

ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },

672

ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),

673

!strconcat(mnemonic, "{l}\t",

674

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

675

[(set EFLAGS, (Op addr:$dst, i32immSExt8:$src2))],

676

IIC_ALU_MEM>, OpSize32, LOCK;

677

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

678

def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},

679

ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },

680

ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),

681

!strconcat(mnemonic, "{q}\t",

682

"{$src2, $dst|$dst, $src2}"),

Craig Topper

7b5925a

2016-05-02 05:44:21 +0000

[diff] [blame]

683

[(set EFLAGS, (Op addr:$dst, i64immSExt8:$src2))],

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

684

IIC_ALU_MEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

}

}

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

690

defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, X86lock_add, "add">;

691

defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, X86lock_sub, "sub">;

692

defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, X86lock_or , "or">;

693

defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, X86lock_and, "and">;

694

defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

695

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

696

multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

697

int Increment, string mnemonic> {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

698

let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

699

SchedRW = [WriteALULd, WriteRMW], Predicates = [NotSlowIncDec] in {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

700

def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),

701

!strconcat(mnemonic, "{b}\t$dst"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

702

[(set EFLAGS, (X86lock_add addr:$dst, (i8 Increment)))],

703

IIC_UNARY_MEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

704

def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),

705

!strconcat(mnemonic, "{w}\t$dst"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

706

[(set EFLAGS, (X86lock_add addr:$dst, (i16 Increment)))],

707

IIC_UNARY_MEM>, OpSize16, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

708

def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),

709

!strconcat(mnemonic, "{l}\t$dst"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

710

[(set EFLAGS, (X86lock_add addr:$dst, (i32 Increment)))],

711

IIC_UNARY_MEM>, OpSize32, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

712

def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),

713

!strconcat(mnemonic, "{q}\t$dst"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

714

[(set EFLAGS, (X86lock_add addr:$dst, (i64 Increment)))],

715

IIC_UNARY_MEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

}

}

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

719

defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, 1, "inc">;

720

defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, -1, "dec">;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

721

722

// Atomic compare and swap.

723

multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,

724

SDPatternOperator frag, X86MemOperand x86memop,

725

InstrItinClass itin> {

726

let isCodeGenOnly = 1 in {

727

def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),

728

!strconcat(mnemonic, "\t$ptr"),

729

[(frag addr:$ptr)], itin>, TB, LOCK;

}

}

multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,

734

string mnemonic, SDPatternOperator frag,

735

InstrItinClass itin8, InstrItinClass itin> {

736

let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {

737

let Defs = [AL, EFLAGS], Uses = [AL] in

738

def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),

739

!strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),

740

[(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;

741

let Defs = [AX, EFLAGS], Uses = [AX] in

742

def NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),

743

!strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"),

744

[(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize16, LOCK;

745

let Defs = [EAX, EFLAGS], Uses = [EAX] in

746

def NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),

747

!strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"),

748

[(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, OpSize32, LOCK;

749

let Defs = [RAX, EFLAGS], Uses = [RAX] in

750

def NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),

751

!strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"),

752

[(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;

}

}

let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],

757

SchedRW = [WriteALULd, WriteRMW] in {

758

defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",

X86cas8, i64mem,

IIC_CMPX_LOCK_8B>;

}

Quentin Colombet

2016-03-12 02:25:27 +0000

[diff] [blame]

763

// This pseudo must be used when the frame uses RBX as

764

// the base pointer. Indeed, in such situation RBX is a reserved

765

// register and the register allocator will ignore any use/def of

766

// it. In other words, the register will not fix the clobbering of

767

// RBX that will happen when setting the arguments for the instrucion.

768

//

769

// Unlike the actual related instuction, we mark that this one

770

// defines EBX (instead of using EBX).

771

// The rationale is that we will define RBX during the expansion of

772

// the pseudo. The argument feeding EBX is ebx_input.

773

//

774

// The additional argument, $ebx_save, is a temporary register used to

775

// save the value of RBX accross the actual instruction.

776

//

777

// To make sure the register assigned to $ebx_save does not interfere with

778

// the definition of the actual instruction, we use a definition $dst which

779

// is tied to $rbx_save. That way, the live-range of $rbx_save spans accross

780

// the instruction and we are sure we will have a valid register to restore

781

// the value of RBX.

782

let Defs = [EAX, EDX, EBX, EFLAGS], Uses = [EAX, ECX, EDX],

783

SchedRW = [WriteALULd, WriteRMW], isCodeGenOnly = 1, isPseudo = 1,

784

Constraints = "$ebx_save = $dst", usesCustomInserter = 1 in {

785

def LCMPXCHG8B_SAVE_EBX :

786

I<0, Pseudo, (outs GR32:$dst),

787

(ins i64mem:$ptr, GR32:$ebx_input, GR32:$ebx_save),

788

!strconcat("cmpxchg8b", "\t$ptr"),

789

[(set GR32:$dst, (X86cas8save_ebx addr:$ptr, GR32:$ebx_input,

GR32:$ebx_save))],

IIC_CMPX_LOCK_8B>;

}

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

795

let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],

796

Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in {

797

defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",

798

X86cas16, i128mem,

799

IIC_CMPX_LOCK_16B>, REX_W;

800

}

801

Quentin Colombet

cf9732b

2016-03-12 02:25:27 +0000

[diff] [blame]

802

// Same as LCMPXCHG8B_SAVE_RBX but for the 16 Bytes variant.

803

let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX],

804

Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW],

805

isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst",

806

usesCustomInserter = 1 in {

807

def LCMPXCHG16B_SAVE_RBX :

808

I<0, Pseudo, (outs GR64:$dst),

809

(ins i128mem:$ptr, GR64:$rbx_input, GR64:$rbx_save),

810

!strconcat("cmpxchg16b", "\t$ptr"),

811

[(set GR64:$dst, (X86cas16save_rbx addr:$ptr, GR64:$rbx_input,

GR64:$rbx_save))],

IIC_CMPX_LOCK_16B>;

}

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

816

defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg",

817

X86cas, IIC_CMPX_LOCK_8, IIC_CMPX_LOCK>;

818

819

// Atomic exchange and add

820

multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,

821

string frag,

822

InstrItinClass itin8, InstrItinClass itin> {

823

let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1,

824

SchedRW = [WriteALULd, WriteRMW] in {

825

def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),

826

(ins GR8:$val, i8mem:$ptr),

827

!strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),

828

[(set GR8:$dst,

829

(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],

830

itin8>;

831

def NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),

832

(ins GR16:$val, i16mem:$ptr),

833

!strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),

834

[(set

835

GR16:$dst,

836

(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],

837

itin>, OpSize16;

838

def NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),

839

(ins GR32:$val, i32mem:$ptr),

840

!strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),

841

[(set

842

GR32:$dst,

843

(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],

844

itin>, OpSize32;

845

def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),

846

(ins GR64:$val, i64mem:$ptr),

847

!strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),

848

[(set

849

GR64:$dst,

850

(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],

itin>;

}

}

defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",

856

IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>,

857

TB, LOCK;

858

859

/* The following multiclass tries to make sure that in code like

860

* x.store (immediate op x.load(acquire), release)

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

861

* and

862

* x.store (register op x.load(acquire), release)

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

863

* an operation directly on memory is generated instead of wasting a register.

864

* It is not automatic as atomic_store/load are only lowered to MOV instructions

865

* extremely late to prevent them from being accidentally reordered in the backend

866

* (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)

867

*/

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

868

multiclass RELEASE_BINOP_MI<SDNode op> {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

869

def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

870

"#BINOP "#NAME#"8mi PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

871

[(atomic_store_8 addr:$dst, (op

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

872

(atomic_load_8 addr:$dst), (i8 imm:$src)))]>;

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

873

def NAME#8mr : I<0, Pseudo, (outs), (ins i8mem:$dst, GR8:$src),

874

"#BINOP "#NAME#"8mr PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

875

[(atomic_store_8 addr:$dst, (op

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

876

(atomic_load_8 addr:$dst), GR8:$src))]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

877

// NAME#16 is not generated as 16-bit arithmetic instructions are considered

878

// costly and avoided as far as possible by this backend anyway

879

def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

880

"#BINOP "#NAME#"32mi PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

881

[(atomic_store_32 addr:$dst, (op

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

882

(atomic_load_32 addr:$dst), (i32 imm:$src)))]>;

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

883

def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),

884

"#BINOP "#NAME#"32mr PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

885

[(atomic_store_32 addr:$dst, (op

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

886

(atomic_load_32 addr:$dst), GR32:$src))]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

887

def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

888

"#BINOP "#NAME#"64mi32 PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

889

[(atomic_store_64 addr:$dst, (op

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

890

(atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>;

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

891

def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),

892

"#BINOP "#NAME#"64mr PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

893

[(atomic_store_64 addr:$dst, (op

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

894

(atomic_load_64 addr:$dst), GR64:$src))]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

895

}

JF Bastien

986ed68

2015-10-13 00:28:47 +0000

[diff] [blame]

896

let Defs = [EFLAGS] in {

897

defm RELEASE_ADD : RELEASE_BINOP_MI<add>;

898

defm RELEASE_AND : RELEASE_BINOP_MI<and>;

899

defm RELEASE_OR : RELEASE_BINOP_MI<or>;

900

defm RELEASE_XOR : RELEASE_BINOP_MI<xor>;

901

// Note: we don't deal with sub, because substractions of constants are

902

// optimized into additions before this code can run.

903

}

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

904

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

905

// Same as above, but for floating-point.

906

// FIXME: imm version.

907

// FIXME: Version that doesn't clobber $src, using AVX's VADDSS.

908

// FIXME: This could also handle SIMD operations with *ps and *pd instructions.

909

let usesCustomInserter = 1 in {

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

910

multiclass RELEASE_FP_BINOP_MI<SDNode op> {

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

911

def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, FR32:$src),

912

"#BINOP "#NAME#"32mr PSEUDO!",

913

[(atomic_store_32 addr:$dst,

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

914

(i32 (bitconvert (op

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

915

(f32 (bitconvert (i32 (atomic_load_32 addr:$dst)))),

916

FR32:$src))))]>, Requires<[HasSSE1]>;

917

def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, FR64:$src),

918

"#BINOP "#NAME#"64mr PSEUDO!",

919

[(atomic_store_64 addr:$dst,

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

920

(i64 (bitconvert (op

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

921

(f64 (bitconvert (i64 (atomic_load_64 addr:$dst)))),

922

FR64:$src))))]>, Requires<[HasSSE2]>;

923

}

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

924

defm RELEASE_FADD : RELEASE_FP_BINOP_MI<fadd>;

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

925

// FIXME: Add fsub, fmul, fdiv, ...

926

}

927

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

928

multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {

929

def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

930

"#UNOP "#NAME#"8m PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

931

[(atomic_store_8 addr:$dst, dag8)]>;

932

def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

933

"#UNOP "#NAME#"16m PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

934

[(atomic_store_16 addr:$dst, dag16)]>;

935

def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

936

"#UNOP "#NAME#"32m PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

937

[(atomic_store_32 addr:$dst, dag32)]>;

938

def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

939

"#UNOP "#NAME#"64m PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

940

[(atomic_store_64 addr:$dst, dag64)]>;

941

}

942

JF Bastien

2cdd5e4

2015-10-15 18:24:52 +0000

[diff] [blame]

943

let Defs = [EFLAGS] in {

944

defm RELEASE_INC : RELEASE_UNOP<

945

(add (atomic_load_8 addr:$dst), (i8 1)),

946

(add (atomic_load_16 addr:$dst), (i16 1)),

947

(add (atomic_load_32 addr:$dst), (i32 1)),

948

(add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;

949

defm RELEASE_DEC : RELEASE_UNOP<

950

(add (atomic_load_8 addr:$dst), (i8 -1)),

951

(add (atomic_load_16 addr:$dst), (i16 -1)),

952

(add (atomic_load_32 addr:$dst), (i32 -1)),

953

(add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;

954

}

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

955

/*

956

TODO: These don't work because the type inference of TableGen fails.

957

TODO: find a way to fix it.

JF Bastien

2cdd5e4

2015-10-15 18:24:52 +0000

[diff] [blame]

958

let Defs = [EFLAGS] in {

959

defm RELEASE_NEG : RELEASE_UNOP<

960

(ineg (atomic_load_8 addr:$dst)),

961

(ineg (atomic_load_16 addr:$dst)),

962

(ineg (atomic_load_32 addr:$dst)),

963

(ineg (atomic_load_64 addr:$dst))>;

964

}

965

// NOT doesn't set flags.

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

966

defm RELEASE_NOT : RELEASE_UNOP<

967

(not (atomic_load_8 addr:$dst)),

968

(not (atomic_load_16 addr:$dst)),

969

(not (atomic_load_32 addr:$dst)),

970

(not (atomic_load_64 addr:$dst))>;

971

*/

972

973

def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

974

"#RELEASE_MOV8mi PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

975

[(atomic_store_8 addr:$dst, (i8 imm:$src))]>;

976

def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

977

"#RELEASE_MOV16mi PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

978

[(atomic_store_16 addr:$dst, (i16 imm:$src))]>;

979

def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

980

"#RELEASE_MOV32mi PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

981

[(atomic_store_32 addr:$dst, (i32 imm:$src))]>;

982

def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

983

"#RELEASE_MOV64mi32 PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

984

[(atomic_store_64 addr:$dst, i64immSExt32:$src)]>;

985

986

def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

987

"#RELEASE_MOV8mr PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

988

[(atomic_store_8 addr:$dst, GR8 :$src)]>;

989

def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

990

"#RELEASE_MOV16mr PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

991

[(atomic_store_16 addr:$dst, GR16:$src)]>;

992

def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

993

"#RELEASE_MOV32mr PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

994

[(atomic_store_32 addr:$dst, GR32:$src)]>;

995

def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

996

"#RELEASE_MOV64mr PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

997

[(atomic_store_64 addr:$dst, GR64:$src)]>;

998

999

def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

1000

"#ACQUIRE_MOV8rm PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1001

[(set GR8:$dst, (atomic_load_8 addr:$src))]>;

1002

def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

1003

"#ACQUIRE_MOV16rm PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1004

[(set GR16:$dst, (atomic_load_16 addr:$src))]>;

1005

def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

1006

"#ACQUIRE_MOV32rm PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1007

[(set GR32:$dst, (atomic_load_32 addr:$src))]>;

1008

def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

1009

"#ACQUIRE_MOV64rm PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1010

[(set GR64:$dst, (atomic_load_64 addr:$src))]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1011

1012

//===----------------------------------------------------------------------===//

1013

// DAG Pattern Matching Rules

1014

//===----------------------------------------------------------------------===//

1015

Hans Wennborg

5f916d3

2016-03-25 18:11:31 +0000

[diff] [blame]

1016

// Use AND/OR to store 0/-1 in memory when optimizing for minsize. This saves

1017

// binary size compared to a regular MOV, but it introduces an unnecessary

1018

// load, so is not suitable for regular or optsize functions.

1019

let Predicates = [OptForMinSize] in {

1020

def : Pat<(store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>;

1021

def : Pat<(store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>;

1022

def : Pat<(store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>;

1023

def : Pat<(store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>;

1024

def : Pat<(store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>;

1025

def : Pat<(store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>;

1026

}

1027

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1028

// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable

1029

def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;

1030

def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;

1031

def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;

1032

def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;

1033

def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;

Rafael Espindola

2015-06-22 17:46:53 +0000

[diff] [blame]

1034

def : Pat<(i32 (X86Wrapper mcsym:$dst)), (MOV32ri mcsym:$dst)>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1035

def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;

1036

1037

def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),

1038

(ADD32ri GR32:$src1, tconstpool:$src2)>;

1039

def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),

1040

(ADD32ri GR32:$src1, tjumptable:$src2)>;

1041

def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),

1042

(ADD32ri GR32:$src1, tglobaladdr:$src2)>;

1043

def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),

1044

(ADD32ri GR32:$src1, texternalsym:$src2)>;

Rafael Espindola

2015-06-22 17:46:53 +0000

[diff] [blame]

1045

def : Pat<(add GR32:$src1, (X86Wrapper mcsym:$src2)),

1046

(ADD32ri GR32:$src1, mcsym:$src2)>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1047

def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),

1048

(ADD32ri GR32:$src1, tblockaddress:$src2)>;

1049

1050

def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),

1051

(MOV32mi addr:$dst, tglobaladdr:$src)>;

1052

def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),

1053

(MOV32mi addr:$dst, texternalsym:$src)>;

Rafael Espindola

2015-06-22 17:46:53 +0000

[diff] [blame]

1054

def : Pat<(store (i32 (X86Wrapper mcsym:$src)), addr:$dst),

1055

(MOV32mi addr:$dst, mcsym:$src)>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1056

def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),

1057

(MOV32mi addr:$dst, tblockaddress:$src)>;

1058

1059

// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small

1060

// code model mode, should use 'movabs'. FIXME: This is really a hack, the

1061

// 'movabs' predicate should handle this sort of thing.

1062

def : Pat<(i64 (X86Wrapper tconstpool :$dst)),

1063

(MOV64ri tconstpool :$dst)>, Requires<[FarData]>;

1064

def : Pat<(i64 (X86Wrapper tjumptable :$dst)),

1065

(MOV64ri tjumptable :$dst)>, Requires<[FarData]>;

1066

def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),

1067

(MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;

1068

def : Pat<(i64 (X86Wrapper texternalsym:$dst)),

1069

(MOV64ri texternalsym:$dst)>, Requires<[FarData]>;

Rafael Espindola

2015-06-22 17:46:53 +0000

[diff] [blame]

1070

def : Pat<(i64 (X86Wrapper mcsym:$dst)),

1071

(MOV64ri mcsym:$dst)>, Requires<[FarData]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1072

def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),

1073

(MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;

1074

1075

// In kernel code model, we can get the address of a label

1076

// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of

1077

// the MOV64ri32 should accept these.

1078

def : Pat<(i64 (X86Wrapper tconstpool :$dst)),

1079

(MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;

1080

def : Pat<(i64 (X86Wrapper tjumptable :$dst)),

1081

(MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;

1082

def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),

1083

(MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;

1084

def : Pat<(i64 (X86Wrapper texternalsym:$dst)),

1085

(MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;

Rafael Espindola

2015-06-22 17:46:53 +0000

[diff] [blame]

1086

def : Pat<(i64 (X86Wrapper mcsym:$dst)),

1087

(MOV64ri32 mcsym:$dst)>, Requires<[KernelCode]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1088

def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),

1089

(MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;

1090

1091

// If we have small model and -static mode, it is safe to store global addresses

1092

// directly as immediates. FIXME: This is really a hack, the 'imm' predicate

1093

// for MOV64mi32 should handle this sort of thing.

1094

def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),

1095

(MOV64mi32 addr:$dst, tconstpool:$src)>,

Rafael Espindola

2016-06-27 21:09:14 +0000

[diff] [blame^]

1096

Requires<[NearData, IsNotPIC]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1097

def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),

1098

(MOV64mi32 addr:$dst, tjumptable:$src)>,

Rafael Espindola

2016-06-27 21:09:14 +0000

[diff] [blame^]

1099

Requires<[NearData, IsNotPIC]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1100

def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),

1101

(MOV64mi32 addr:$dst, tglobaladdr:$src)>,

Rafael Espindola

2016-06-27 21:09:14 +0000

[diff] [blame^]

1102

Requires<[NearData, IsNotPIC]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1103

def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),

1104

(MOV64mi32 addr:$dst, texternalsym:$src)>,

Rafael Espindola

2016-06-27 21:09:14 +0000

[diff] [blame^]

1105

Requires<[NearData, IsNotPIC]>;

Rafael Espindola

2015-06-22 17:46:53 +0000

[diff] [blame]

1106

def : Pat<(store (i64 (X86Wrapper mcsym:$src)), addr:$dst),

1107

(MOV64mi32 addr:$dst, mcsym:$src)>,

Rafael Espindola

2016-06-27 21:09:14 +0000

[diff] [blame^]

1108

Requires<[NearData, IsNotPIC]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1109

def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),

1110

(MOV64mi32 addr:$dst, tblockaddress:$src)>,

Rafael Espindola

2016-06-27 21:09:14 +0000

[diff] [blame^]

1111

Requires<[NearData, IsNotPIC]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1112

Rafael Espindola

2015-06-22 17:46:53 +0000

[diff] [blame]

1113

def : Pat<(i32 (X86RecoverFrameAlloc mcsym:$dst)), (MOV32ri mcsym:$dst)>;

1114

def : Pat<(i64 (X86RecoverFrameAlloc mcsym:$dst)), (MOV64ri mcsym:$dst)>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

// Calls

// tls has some funny stuff here...

1119

// This corresponds to movabs $foo@tpoff, %rax

1120

def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),

1121

(MOV64ri32 tglobaltlsaddr :$dst)>;

1122

// This corresponds to add $foo@tpoff, %rax

1123

def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),

1124

(ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;

1125

1126

1127

// Direct PC relative function call for small code model. 32-bit displacement

1128

// sign extended to 64-bit.

1129

def : Pat<(X86call (i64 tglobaladdr:$dst)),

1130

(CALL64pcrel32 tglobaladdr:$dst)>;

1131

def : Pat<(X86call (i64 texternalsym:$dst)),

1132

(CALL64pcrel32 texternalsym:$dst)>;

1133

1134

// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they

1135

// can never use callee-saved registers. That is the purpose of the GR64_TC

1136

// register classes.

1137

//

1138

// The only volatile register that is never used by the calling convention is

1139

// %r11. This happens when calling a vararg function with 6 arguments.

1140

//

1141

// Match an X86tcret that uses less than 7 volatile registers.

1142

def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),

1143

(X86tcret node:$ptr, node:$off), [{

1144

// X86tcret args: (*chain, ptr, imm, regs..., glue)

1145

unsigned NumRegs = 0;

1146

for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i)

1147

if (isa<RegisterSDNode>(N->getOperand(i)) && ++NumRegs > 6)

return false;

return true;

}]>;

def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),

1153

(TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,

1154

Requires<[Not64BitMode]>;

1155

1156

// FIXME: This is disabled for 32-bit PIC mode because the global base

1157

// register which is part of the address mode may be assigned a

1158

// callee-saved register.

1159

def : Pat<(X86tcret (load addr:$dst), imm:$off),

1160

(TCRETURNmi addr:$dst, imm:$off)>,

1161

Requires<[Not64BitMode, IsNotPIC]>;

1162

1163

def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),

1164

(TCRETURNdi tglobaladdr:$dst, imm:$off)>,

1165

Requires<[NotLP64]>;

1166

1167

def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),

1168

(TCRETURNdi texternalsym:$dst, imm:$off)>,

1169

Requires<[NotLP64]>;

1170

1171

def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),

1172

(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,

1173

Requires<[In64BitMode]>;

1174

1175

// Don't fold loads into X86tcret requiring more than 6 regs.

1176

// There wouldn't be enough scratch registers for base+index.

1177

def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),

1178

(TCRETURNmi64 addr:$dst, imm:$off)>,

1179

Requires<[In64BitMode]>;

1180

1181

def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),

1182

(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,

1183

Requires<[IsLP64]>;

1184

1185

def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),

1186

(TCRETURNdi64 texternalsym:$dst, imm:$off)>,

1187

Requires<[IsLP64]>;

1188

1189

// Normal calls, with various flavors of addresses.

1190

def : Pat<(X86call (i32 tglobaladdr:$dst)),

1191

(CALLpcrel32 tglobaladdr:$dst)>;

1192

def : Pat<(X86call (i32 texternalsym:$dst)),

1193

(CALLpcrel32 texternalsym:$dst)>;

1194

def : Pat<(X86call (i32 imm:$dst)),

1195

(CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;

// Comparisons.

// TEST R,R is smaller than CMP R,0

1200

def : Pat<(X86cmp GR8:$src1, 0),

1201

(TEST8rr GR8:$src1, GR8:$src1)>;

1202

def : Pat<(X86cmp GR16:$src1, 0),

1203

(TEST16rr GR16:$src1, GR16:$src1)>;

1204

def : Pat<(X86cmp GR32:$src1, 0),

1205

(TEST32rr GR32:$src1, GR32:$src1)>;

1206

def : Pat<(X86cmp GR64:$src1, 0),

1207

(TEST64rr GR64:$src1, GR64:$src1)>;

1208

1209

// Conditional moves with folded loads with operands swapped and conditions

1210

// inverted.

1211

multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,

1212

Instruction Inst64> {

1213

let Predicates = [HasCMov] in {

1214

def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),

1215

(Inst16 GR16:$src2, addr:$src1)>;

1216

def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),

1217

(Inst32 GR32:$src2, addr:$src1)>;

1218

def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),

1219

(Inst64 GR64:$src2, addr:$src1)>;

}

}

defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;

1224

defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;

1225

defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;

1226

defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;

1227

defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;

1228

defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;

1229

defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;

1230

defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;

1231

defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;

1232

defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;

1233

defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;

1234

defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;

1235

defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;

1236

defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;

1237

defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;

1238

defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;

1239

1240

// zextload bool -> zextload byte

Elena Demikhovsky

e5bbca6

2016-02-25 07:05:12 +0000

[diff] [blame]

1241

// i1 stored in one byte in zero-extended form.

1242

// Upper bits cleanup should be executed before Store.

1243

def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;

1244

def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;

1245

def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1246

def : Pat<(zextloadi64i1 addr:$src),

Elena Demikhovsky

e5bbca6

2016-02-25 07:05:12 +0000

[diff] [blame]

1247

(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1248

1249

// extload bool -> extload byte

1250

// When extloading from 16-bit and smaller memory locations into 64-bit

1251

// registers, use zero-extending loads so that the entire 64-bit register is

1252

// defined, avoiding partial-register updates.

1253

1254

def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;

1255

def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;

1256

def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;

1257

def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;

1258

def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;

1259

def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;

1260

1261

// For other extloads, use subregs, since the high contents of the register are

1262

// defined after an extload.

1263

def : Pat<(extloadi64i1 addr:$src),

1264

(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;

1265

def : Pat<(extloadi64i8 addr:$src),

1266

(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;

1267

def : Pat<(extloadi64i16 addr:$src),

1268

(SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;

1269

def : Pat<(extloadi64i32 addr:$src),

1270

(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;

1271

1272

// anyext. Define these to do an explicit zero-extend to

1273

// avoid partial-register updates.

1274

def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG

1275

(MOVZX32rr8 GR8 :$src), sub_16bit)>;

1276

def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;

1277

1278

// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.

1279

def : Pat<(i32 (anyext GR16:$src)),

1280

(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;

1281

1282

def : Pat<(i64 (anyext GR8 :$src)),

1283

(SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8 :$src), sub_32bit)>;

1284

def : Pat<(i64 (anyext GR16:$src)),

1285

(SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>;

1286

def : Pat<(i64 (anyext GR32:$src)),

1287

(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;

1288

1289

1290

// Any instruction that defines a 32-bit result leaves the high half of the

1291

// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may

1292

// be copying from a truncate. And x86's cmov doesn't do anything if the

1293

// condition is false. But any other 32-bit operation will zero-extend

1294

// up to 64 bits.

1295

def def32 : PatLeaf<(i32 GR32:$src), [{

1296

return N->getOpcode() != ISD::TRUNCATE &&

1297

N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&

1298

N->getOpcode() != ISD::CopyFromReg &&

1299

N->getOpcode() != ISD::AssertSext &&

1300

N->getOpcode() != X86ISD::CMOV;

1301

}]>;

1302

1303

// In the case of a 32-bit def that is known to implicitly zero-extend,

1304

// we can use a SUBREG_TO_REG.

1305

def : Pat<(i64 (zext def32:$src)),

1306

(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;

1307

1308

//===----------------------------------------------------------------------===//

1309

// Pattern match OR as ADD

1310

//===----------------------------------------------------------------------===//

1311

1312

// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be

1313

// 3-addressified into an LEA instruction to avoid copies. However, we also

1314

// want to finally emit these instructions as an or at the end of the code

1315

// generator to make the generated code easier to read. To do this, we select

1316

// into "disjoint bits" pseudo ops.

1317

1318

// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.

1319

def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{

1320

if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))

1321

return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());

1322

1323

APInt KnownZero0, KnownOne0;

1324

CurDAG->computeKnownBits(N->getOperand(0), KnownZero0, KnownOne0, 0);

1325

APInt KnownZero1, KnownOne1;

1326

CurDAG->computeKnownBits(N->getOperand(1), KnownZero1, KnownOne1, 0);

1327

return (~KnownZero0 & ~KnownZero1) == 0;

}]>;

// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.

1332

// Try this before the selecting to OR.

1333

let AddedComplexity = 5, SchedRW = [WriteALU] in {

1334

1335

let isConvertibleToThreeAddress = 1,

1336

Constraints = "$src1 = $dst", Defs = [EFLAGS] in {

1337

let isCommutable = 1 in {

1338

def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),

1339

"", // orw/addw REG, REG

1340

[(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;

1341

def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),

1342

"", // orl/addl REG, REG

1343

[(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;

1344

def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),

1345

"", // orq/addq REG, REG

1346

[(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;

1347

} // isCommutable

1348

1349

// NOTE: These are order specific, we want the ri8 forms to be listed

1350

// first so that they are slightly preferred to the ri forms.

1351

1352

def ADD16ri8_DB : I<0, Pseudo,

1353

(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),

1354

"", // orw/addw REG, imm8

1355

[(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;

1356

def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),

1357

"", // orw/addw REG, imm

1358

[(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;

1359

1360

def ADD32ri8_DB : I<0, Pseudo,

1361

(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),

1362

"", // orl/addl REG, imm8

1363

[(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;

1364

def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),

1365

"", // orl/addl REG, imm

1366

[(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;

1367

1368

1369

def ADD64ri8_DB : I<0, Pseudo,

1370

(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),

1371

"", // orq/addq REG, imm8

1372

[(set GR64:$dst, (or_is_add GR64:$src1,

1373

i64immSExt8:$src2))]>;

1374

def ADD64ri32_DB : I<0, Pseudo,

1375

(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),

1376

"", // orq/addq REG, imm

1377

[(set GR64:$dst, (or_is_add GR64:$src1,

1378

i64immSExt32:$src2))]>;

1379

}

1380

} // AddedComplexity, SchedRW

1381

1382

1383

//===----------------------------------------------------------------------===//

1384

// Some peepholes

1385

//===----------------------------------------------------------------------===//

1386

1387

// Odd encoding trick: -128 fits into an 8-bit immediate field while

1388

// +128 doesn't, so in this special case use a sub instead of an add.

1389

def : Pat<(add GR16:$src1, 128),

1390

(SUB16ri8 GR16:$src1, -128)>;

1391

def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),

1392

(SUB16mi8 addr:$dst, -128)>;

1393

1394

def : Pat<(add GR32:$src1, 128),

1395

(SUB32ri8 GR32:$src1, -128)>;

1396

def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),

1397

(SUB32mi8 addr:$dst, -128)>;

1398

1399

def : Pat<(add GR64:$src1, 128),

1400

(SUB64ri8 GR64:$src1, -128)>;

1401

def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),

1402

(SUB64mi8 addr:$dst, -128)>;

1403

1404

// The same trick applies for 32-bit immediate fields in 64-bit

1405

// instructions.

1406

def : Pat<(add GR64:$src1, 0x0000000080000000),

1407

(SUB64ri32 GR64:$src1, 0xffffffff80000000)>;

1408

def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),

1409

(SUB64mi32 addr:$dst, 0xffffffff80000000)>;

1410

1411

// To avoid needing to materialize an immediate in a register, use a 32-bit and

1412

// with implicit zero-extension instead of a 64-bit and if the immediate has at

1413

// least 32 bits of leading zeros. If in addition the last 32 bits can be

1414

// represented with a sign extension of a 8 bit constant, use that.

Craig Topper

3d44178

2015-04-04 02:31:43 +0000

[diff] [blame]

1415

// This can also reduce instruction size by eliminating the need for the REX

1416

// prefix.

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1417

Craig Topper

2015-04-04 04:22:12 +0000

[diff] [blame]

1418

// AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32.

1419

let AddedComplexity = 1 in {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1420

def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),

(SUBREG_TO_REG

(i64 0),

(AND32ri8

(EXTRACT_SUBREG GR64:$src, sub_32bit),

1425

(i32 (GetLo8XForm imm:$imm))),

1426

sub_32bit)>;

1427

1428

def : Pat<(and GR64:$src, i64immZExt32:$imm),

(SUBREG_TO_REG

(i64 0),

(AND32ri

(EXTRACT_SUBREG GR64:$src, sub_32bit),

1433

(i32 (GetLo32XForm imm:$imm))),

1434

sub_32bit)>;

Craig Topper

2015-04-04 04:22:12 +0000

[diff] [blame]

1435

} // AddedComplexity = 1

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1436

1437

Craig Topper

2015-04-04 04:22:12 +0000

[diff] [blame]

1438

// AddedComplexity is needed due to the increased complexity on the

1439

// i64immZExt32SExt8 and i64immZExt32 patterns above. Applying this to all

1440

// the MOVZX patterns keeps thems together in DAGIsel tables.

1441

let AddedComplexity = 1 in {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1442

// r & (2^16-1) ==> movz

1443

def : Pat<(and GR32:$src1, 0xffff),

1444

(MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;

1445

// r & (2^8-1) ==> movz

1446

def : Pat<(and GR32:$src1, 0xff),

1447

(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,

1448

GR32_ABCD)),

1449

sub_8bit))>,

1450

Requires<[Not64BitMode]>;

1451

// r & (2^8-1) ==> movz

1452

def : Pat<(and GR16:$src1, 0xff),

1453

(EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG

1454

(i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)),

1455

sub_16bit)>,

1456

Requires<[Not64BitMode]>;

1457

1458

// r & (2^32-1) ==> movz

1459

def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),

1460

(SUBREG_TO_REG (i64 0),

1461

(MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),

1462

sub_32bit)>;

1463

// r & (2^16-1) ==> movz

1464

def : Pat<(and GR64:$src, 0xffff),

1465

(SUBREG_TO_REG (i64 0),

1466

(MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),

1467

sub_32bit)>;

1468

// r & (2^8-1) ==> movz

1469

def : Pat<(and GR64:$src, 0xff),

1470

(SUBREG_TO_REG (i64 0),

1471

(MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))),

1472

sub_32bit)>;

1473

// r & (2^8-1) ==> movz

1474

def : Pat<(and GR32:$src1, 0xff),

1475

(MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,

1476

Requires<[In64BitMode]>;

1477

// r & (2^8-1) ==> movz

1478

def : Pat<(and GR16:$src1, 0xff),

1479

(EXTRACT_SUBREG (MOVZX32rr8 (i8

1480

(EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,

1481

Requires<[In64BitMode]>;

Craig Topper

2015-04-04 04:22:12 +0000

[diff] [blame]

1482

} // AddedComplexity = 1

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1483

1484

1485

// sext_inreg patterns

1486

def : Pat<(sext_inreg GR32:$src, i16),

1487

(MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;

1488

def : Pat<(sext_inreg GR32:$src, i8),

1489

(MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,

1490

GR32_ABCD)),

1491

sub_8bit))>,

1492

Requires<[Not64BitMode]>;

1493

1494

def : Pat<(sext_inreg GR16:$src, i8),

1495

(EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG

1496

(i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))),

1497

sub_16bit)>,

1498

Requires<[Not64BitMode]>;

1499

1500

def : Pat<(sext_inreg GR64:$src, i32),

1501

(MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;

1502

def : Pat<(sext_inreg GR64:$src, i16),

1503

(MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;

1504

def : Pat<(sext_inreg GR64:$src, i8),

1505

(MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;

1506

def : Pat<(sext_inreg GR32:$src, i8),

1507

(MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,

1508

Requires<[In64BitMode]>;

1509

def : Pat<(sext_inreg GR16:$src, i8),

1510

(EXTRACT_SUBREG (MOVSX32rr8

1511

(EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>,

1512

Requires<[In64BitMode]>;

1513

1514

// sext, sext_load, zext, zext_load

1515

def: Pat<(i16 (sext GR8:$src)),

1516

(EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>;

1517

def: Pat<(sextloadi16i8 addr:$src),

1518

(EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>;

1519

def: Pat<(i16 (zext GR8:$src)),

1520

(EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>;

1521

def: Pat<(zextloadi16i8 addr:$src),

1522

(EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;

1523

1524

// trunc patterns

1525

def : Pat<(i16 (trunc GR32:$src)),

1526

(EXTRACT_SUBREG GR32:$src, sub_16bit)>;

1527

def : Pat<(i8 (trunc GR32:$src)),

1528

(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),

1529

sub_8bit)>,

1530

Requires<[Not64BitMode]>;

1531

def : Pat<(i8 (trunc GR16:$src)),

1532

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1533

sub_8bit)>,

1534

Requires<[Not64BitMode]>;

1535

def : Pat<(i32 (trunc GR64:$src)),

1536

(EXTRACT_SUBREG GR64:$src, sub_32bit)>;

1537

def : Pat<(i16 (trunc GR64:$src)),

1538

(EXTRACT_SUBREG GR64:$src, sub_16bit)>;

1539

def : Pat<(i8 (trunc GR64:$src)),

1540

(EXTRACT_SUBREG GR64:$src, sub_8bit)>;

1541

def : Pat<(i8 (trunc GR32:$src)),

1542

(EXTRACT_SUBREG GR32:$src, sub_8bit)>,

1543

Requires<[In64BitMode]>;

1544

def : Pat<(i8 (trunc GR16:$src)),

1545

(EXTRACT_SUBREG GR16:$src, sub_8bit)>,

1546

Requires<[In64BitMode]>;

1547

1548

// h-register tricks

1549

def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),

1550

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1551

sub_8bit_hi)>,

1552

Requires<[Not64BitMode]>;

Kevin B. Smith

ed0b620

2016-05-31 22:00:12 +0000

[diff] [blame]

1553

def : Pat<(i8 (trunc (srl_su (i32 (anyext GR16:$src)), (i8 8)))),

1554

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1555

sub_8bit_hi)>,

1556

Requires<[Not64BitMode]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1557

def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),

1558

(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),

1559

sub_8bit_hi)>,

1560

Requires<[Not64BitMode]>;

1561

def : Pat<(srl GR16:$src, (i8 8)),

1562

(EXTRACT_SUBREG

1563

(MOVZX32rr8

1564

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1565

sub_8bit_hi)),

1566

sub_16bit)>,

1567

Requires<[Not64BitMode]>;

1568

def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),

1569

(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,

1570

GR16_ABCD)),

1571

sub_8bit_hi))>,

1572

Requires<[Not64BitMode]>;

1573

def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),

1574

(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,

1575

GR16_ABCD)),

1576

sub_8bit_hi))>,

1577

Requires<[Not64BitMode]>;

1578

def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),

1579

(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,

1580

GR32_ABCD)),

1581

sub_8bit_hi))>,

1582

Requires<[Not64BitMode]>;

1583

def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),

1584

(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,

1585

GR32_ABCD)),

1586

sub_8bit_hi))>,

1587

Requires<[Not64BitMode]>;

1588

1589

// h-register tricks.

1590

// For now, be conservative on x86-64 and use an h-register extract only if the

1591

// value is immediately zero-extended or stored, which are somewhat common

1592

// cases. This uses a bunch of code to prevent a register requiring a REX prefix

1593

// from being allocated in the same instruction as the h register, as there's

1594

// currently no way to describe this requirement to the register allocator.

1595

1596

// h-register extract and zero-extend.

1597

def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),

(SUBREG_TO_REG

(i64 0),

(MOVZX32_NOREXrr8

(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),

1602

sub_8bit_hi)),

1603

sub_32bit)>;

1604

def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),

1605

(MOVZX32_NOREXrr8

1606

(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),

1607

sub_8bit_hi))>,

1608

Requires<[In64BitMode]>;

1609

def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),

1610

(MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,

1611

GR32_ABCD)),

1612

sub_8bit_hi))>,

1613

Requires<[In64BitMode]>;

1614

def : Pat<(srl GR16:$src, (i8 8)),

1615

(EXTRACT_SUBREG

1616

(MOVZX32_NOREXrr8

1617

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1618

sub_8bit_hi)),

1619

sub_16bit)>,

1620

Requires<[In64BitMode]>;

1621

def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),

1622

(MOVZX32_NOREXrr8

1623

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1624

sub_8bit_hi))>,

1625

Requires<[In64BitMode]>;

1626

def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),

1627

(MOVZX32_NOREXrr8

1628

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1629

sub_8bit_hi))>,

1630

Requires<[In64BitMode]>;

1631

def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),

(SUBREG_TO_REG

(i64 0),

(MOVZX32_NOREXrr8

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1636

sub_8bit_hi)),

1637

sub_32bit)>;

1638

def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),

(SUBREG_TO_REG

(i64 0),

(MOVZX32_NOREXrr8

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

sub_8bit_hi)),

sub_32bit)>;

// h-register extract and store.

1647

def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),

1648

(MOV8mr_NOREX

1649

addr:$dst,

1650

(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),

1651

sub_8bit_hi))>;

1652

def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),

1653

(MOV8mr_NOREX

1654

addr:$dst,

1655

(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),

1656

sub_8bit_hi))>,

1657

Requires<[In64BitMode]>;

1658

def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),

1659

(MOV8mr_NOREX

1660

addr:$dst,

1661

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1662

sub_8bit_hi))>,

1663

Requires<[In64BitMode]>;

1664

1665

1666

// (shl x, 1) ==> (add x, x)

1667

// Note that if x is undef (immediate or otherwise), we could theoretically

1668

// end up with the two uses of x getting different values, producing a result

1669

// where the least significant bit is not 0. However, the probability of this

1670

// happening is considered low enough that this is officially not a

1671

// "real problem".

1672

def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;

1673

def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;

1674

def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;

1675

def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;

1676

1677

// Helper imms that check if a mask doesn't change significant shift bits.

Benjamin Kramer

5f6a907

2015-02-12 15:35:40 +0000

[diff] [blame]

1678

def immShift32 : ImmLeaf<i8, [{

1679

return countTrailingOnes<uint64_t>(Imm) >= 5;

1680

}]>;

1681

def immShift64 : ImmLeaf<i8, [{

1682

return countTrailingOnes<uint64_t>(Imm) >= 6;

1683

}]>;

Michael Kuperstein