Blame - lib/Target/PowerPC/PPCISelLowering.cpp - fp2-dev/platform/external/llvm

2007-07-18 16:29:46 +0000

[diff] [blame]

1

//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//

2

//

3

// The LLVM Compiler Infrastructure

4

//

Chris Lattner

081ce94

2007-12-29 20:36:04 +0000

[diff] [blame]

5

// This file is distributed under the University of Illinois Open Source

6

// License. See LICENSE.TXT for details.

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

7

//

8

//===----------------------------------------------------------------------===//

9

//

10

// This file implements the PPCISelLowering class.

11

//

12

//===----------------------------------------------------------------------===//

13

14

#include "PPCISelLowering.h"

15

#include "PPCMachineFunctionInfo.h"

16

#include "PPCPredicates.h"

17

#include "PPCTargetMachine.h"

18

#include "PPCPerfectShuffle.h"

Owen Anderson

2007-09-07 04:06:50 +0000

[diff] [blame]

19

#include "llvm/ADT/STLExtras.h"

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

20

#include "llvm/ADT/VectorExtras.h"

21

#include "llvm/Analysis/ScalarEvolutionExpressions.h"

22

#include "llvm/CodeGen/CallingConvLower.h"

23

#include "llvm/CodeGen/MachineFrameInfo.h"

24

#include "llvm/CodeGen/MachineFunction.h"

25

#include "llvm/CodeGen/MachineInstrBuilder.h"

Chris Lattner

2007-12-31 04:13:23 +0000

[diff] [blame]

26

#include "llvm/CodeGen/MachineRegisterInfo.h"

Dan Gohman

2008-02-06 22:27:42 +0000

[diff] [blame]

27

#include "llvm/CodeGen/PseudoSourceValue.h"

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

28

#include "llvm/CodeGen/SelectionDAG.h"

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

29

#include "llvm/Constants.h"

30

#include "llvm/Function.h"

31

#include "llvm/Intrinsics.h"

32

#include "llvm/Support/MathExtras.h"

33

#include "llvm/Target/TargetOptions.h"

34

#include "llvm/Support/CommandLine.h"

35

using namespace llvm;

36

37

static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",

38

cl::desc("enable preincrement load/store generation on PPC (experimental)"),

39

cl::Hidden);

40

41

PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

42

: TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) {

setPow2DivIsCheap();

// Use _setjmp/_longjmp instead of setjmp/longjmp.

47

setUseUnderscoreSetJmp(true);

48

setUseUnderscoreLongJmp(true);

49

50

// Set up the register classes.

51

addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);

52

addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);

53

addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);

54

55

// PowerPC has an i16 but no i8 (or i1) SEXTLOAD

Duncan Sands

082524c

2008-01-23 20:39:46 +0000

[diff] [blame]

56

setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

57

setLoadXAction(ISD::SEXTLOAD, MVT::i8, Expand);

Duncan Sands

082524c

2008-01-23 20:39:46 +0000

[diff] [blame]

58

Chris Lattner

3bc0850

2008-01-17 19:59:44 +0000

[diff] [blame]

59

setTruncStoreAction(MVT::f64, MVT::f32, Expand);

60

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

61

// PowerPC has pre-inc load and store's.

62

setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);

63

setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);

64

setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);

65

setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);

66

setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);

67

setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);

68

setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);

69

setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);

70

setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);

71

setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);

72

Dale Johannesen

472d15d

2007-10-06 01:24:11 +0000

[diff] [blame]

73

// Shortening conversions involving ppcf128 get expanded (2 regs -> 1 reg)

74

setConvertAction(MVT::ppcf128, MVT::f64, Expand);

75

setConvertAction(MVT::ppcf128, MVT::f32, Expand);

Dale Johannesen

2007-10-10 01:01:31 +0000

[diff] [blame]

76

// This is used in the ppcf128->int sequence. Note it has different semantics

77

// from FP_ROUND: that rounds to nearest, this rounds to zero.

78

setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);

Dale Johannesen

472d15d

2007-10-06 01:24:11 +0000

[diff] [blame]

79

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

80

// PowerPC has no intrinsics for these particular operations

81

setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);

82

setOperationAction(ISD::MEMSET, MVT::Other, Expand);

83

setOperationAction(ISD::MEMCPY, MVT::Other, Expand);

Andrew Lenharth

0531ec5

2008-02-16 14:46:26 +0000

[diff] [blame]

84

setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);

85

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

86

// PowerPC has no SREM/UREM instructions

87

setOperationAction(ISD::SREM, MVT::i32, Expand);

88

setOperationAction(ISD::UREM, MVT::i32, Expand);

89

setOperationAction(ISD::SREM, MVT::i64, Expand);

90

setOperationAction(ISD::UREM, MVT::i64, Expand);

Dan Gohman

c9130bb

2007-10-08 17:28:24 +0000

[diff] [blame]

91

92

// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.

93

setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);

94

setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);

95

setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);

96

setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);

97

setOperationAction(ISD::UDIVREM, MVT::i32, Expand);

98

setOperationAction(ISD::SDIVREM, MVT::i32, Expand);

99

setOperationAction(ISD::UDIVREM, MVT::i64, Expand);

100

setOperationAction(ISD::SDIVREM, MVT::i64, Expand);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

101

Dan Gohman

2f7b198

2007-10-11 23:21:31 +0000

[diff] [blame]

102

// We don't support sin/cos/sqrt/fmod/pow

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

103

setOperationAction(ISD::FSIN , MVT::f64, Expand);

104

setOperationAction(ISD::FCOS , MVT::f64, Expand);

105

setOperationAction(ISD::FREM , MVT::f64, Expand);

Dan Gohman

2f7b198

2007-10-11 23:21:31 +0000

[diff] [blame]

106

setOperationAction(ISD::FPOW , MVT::f64, Expand);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

107

setOperationAction(ISD::FSIN , MVT::f32, Expand);

108

setOperationAction(ISD::FCOS , MVT::f32, Expand);

109

setOperationAction(ISD::FREM , MVT::f32, Expand);

Dan Gohman

2f7b198

2007-10-11 23:21:31 +0000

[diff] [blame]

110

setOperationAction(ISD::FPOW , MVT::f32, Expand);

Dale Johannesen

436e380

2008-01-18 19:55:37 +0000

[diff] [blame]

111

Dan Gohman

819574c

2008-01-31 00:41:03 +0000

[diff] [blame]

112

setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

113

114

// If we're enabling GP optimizations, use hardware square root

115

if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {

116

setOperationAction(ISD::FSQRT, MVT::f64, Expand);

117

setOperationAction(ISD::FSQRT, MVT::f32, Expand);

118

}

119

120

setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);

121

setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);

122

123

// PowerPC does not have BSWAP, CTPOP or CTTZ

124

setOperationAction(ISD::BSWAP, MVT::i32 , Expand);

125

setOperationAction(ISD::CTPOP, MVT::i32 , Expand);

126

setOperationAction(ISD::CTTZ , MVT::i32 , Expand);

127

setOperationAction(ISD::BSWAP, MVT::i64 , Expand);

128

setOperationAction(ISD::CTPOP, MVT::i64 , Expand);

129

setOperationAction(ISD::CTTZ , MVT::i64 , Expand);

130

131

// PowerPC does not have ROTR

132

setOperationAction(ISD::ROTR, MVT::i32 , Expand);

133

134

// PowerPC does not have Select

135

setOperationAction(ISD::SELECT, MVT::i32, Expand);

136

setOperationAction(ISD::SELECT, MVT::i64, Expand);

137

setOperationAction(ISD::SELECT, MVT::f32, Expand);

138

setOperationAction(ISD::SELECT, MVT::f64, Expand);

139

140

// PowerPC wants to turn select_cc of FP into fsel when possible.

141

setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);

142

setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);

143

144

// PowerPC wants to optimize integer setcc a bit

145

setOperationAction(ISD::SETCC, MVT::i32, Custom);

146

147

// PowerPC does not have BRCOND which requires SetCC

148

setOperationAction(ISD::BRCOND, MVT::Other, Expand);

149

150

setOperationAction(ISD::BR_JT, MVT::Other, Expand);

151

152

// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.

153

setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);

154

155

// PowerPC does not have [U|S]INT_TO_FP

156

setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);

157

setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);

158

159

setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);

160

setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);

161

setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);

162

setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);

163

164

// We cannot sextinreg(i1). Expand to shifts.

165

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);

166

167

// Support label based line numbers.

168

setOperationAction(ISD::LOCATION, MVT::Other, Expand);

169

setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);

Nicolas Geoffray

6186476

2007-12-21 12:19:44 +0000

[diff] [blame]

170

171

setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);

172

setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);

173

setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);

174

setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);

175

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

176

177

// We want to legalize GlobalAddress and ConstantPool nodes into the

178

// appropriate instructions to materialize the address.

179

setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);

180

setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);

181

setOperationAction(ISD::ConstantPool, MVT::i32, Custom);

182

setOperationAction(ISD::JumpTable, MVT::i32, Custom);

183

setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);

184

setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);

185

setOperationAction(ISD::ConstantPool, MVT::i64, Custom);

186

setOperationAction(ISD::JumpTable, MVT::i64, Custom);

187

188

// RET must be custom lowered, to meet ABI requirements

189

setOperationAction(ISD::RET , MVT::Other, Custom);

Duncan Sands

38947cd

2007-07-27 12:58:54 +0000

[diff] [blame]

190

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

191

// VASTART needs to be custom lowered to use the VarArgsFrameIndex

192

setOperationAction(ISD::VASTART , MVT::Other, Custom);

193

194

// VAARG is custom lowered with ELF 32 ABI

195

if (TM.getSubtarget<PPCSubtarget>().isELF32_ABI())

196

setOperationAction(ISD::VAARG, MVT::Other, Custom);

197

else

198

setOperationAction(ISD::VAARG, MVT::Other, Expand);

199

200

// Use the default implementation.

201

setOperationAction(ISD::VACOPY , MVT::Other, Expand);

202

setOperationAction(ISD::VAEND , MVT::Other, Expand);

203

setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);

204

setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);

205

setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);

206

setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);

207

208

// We want to custom lower some of our intrinsics.

209

setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

210

211

if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {

212

// They also have instructions for converting between i64 and fp.

213

setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);

214

setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);

215

setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);

216

setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);

217

setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);

218

219

// FIXME: disable this lowered code. This generates 64-bit register values,

220

// and we don't model the fact that the top part is clobbered by calls. We

221

// need to flag these together so that the value isn't live across a call.

222

//setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);

223

224

// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT

225

setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);

226

} else {

227

// PowerPC does not have FP_TO_UINT on 32-bit implementations.

228

setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);

229

}

230

231

if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {

Chris Lattner

c882caf

2007-10-19 04:08:28 +0000

[diff] [blame]

232

// 64-bit PowerPC implementations can support i64 types directly

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

233

addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);

234

// BUILD_PAIR can't be handled natively, and should be expanded to shl/or

235

setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);

Dan Gohman

2008-03-07 20:36:53 +0000

[diff] [blame]

236

// 64-bit PowerPC wants to expand i128 shifts itself.

237

setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);

238

setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);

239

setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

240

} else {

Chris Lattner

c882caf

2007-10-19 04:08:28 +0000

[diff] [blame]

241

// 32-bit PowerPC wants to expand i64 shifts itself.

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

242

setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);

243

setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);

244

setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);

245

}

246

247

if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {

248

// First set operation action for all vector types to expand. Then we

249

// will selectively turn on ones that can be effectively codegen'd.

250

for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;

251

VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {

252

// add/sub are legal for all supported vector VT's.

253

setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);

254

setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);

255

256

// We promote all shuffles to v16i8.

257

setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote);

258

AddPromotedToType (ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8);

259

260

// We promote all non-typed operations to v4i32.

261

setOperationAction(ISD::AND , (MVT::ValueType)VT, Promote);

262

AddPromotedToType (ISD::AND , (MVT::ValueType)VT, MVT::v4i32);

263

setOperationAction(ISD::OR , (MVT::ValueType)VT, Promote);

264

AddPromotedToType (ISD::OR , (MVT::ValueType)VT, MVT::v4i32);

265

setOperationAction(ISD::XOR , (MVT::ValueType)VT, Promote);

266

AddPromotedToType (ISD::XOR , (MVT::ValueType)VT, MVT::v4i32);

267

setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Promote);

268

AddPromotedToType (ISD::LOAD , (MVT::ValueType)VT, MVT::v4i32);

269

setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);

270

AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v4i32);

271

setOperationAction(ISD::STORE, (MVT::ValueType)VT, Promote);

272

AddPromotedToType (ISD::STORE, (MVT::ValueType)VT, MVT::v4i32);

273

274

// No other operations are legal.

275

setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);

276

setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);

277

setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);

278

setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);

279

setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);

280

setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);

Evan Cheng

c5912e3

2007-07-30 07:51:22 +0000

[diff] [blame]

281

setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

282

setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);

283

setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);

284

setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);

Dan Gohman

c9130bb

2007-10-08 17:28:24 +0000

[diff] [blame]

285

setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand);

286

setOperationAction(ISD::SMUL_LOHI, (MVT::ValueType)VT, Expand);

287

setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand);

288

setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

289

setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand);

Dan Gohman

4e22ac4

2007-10-12 14:08:57 +0000

[diff] [blame]

290

setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand);

291

setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand);

292

setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand);

293

setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

294

}

295

296

// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle

297

// with merges, splats, etc.

298

setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);

299

300

setOperationAction(ISD::AND , MVT::v4i32, Legal);

301

setOperationAction(ISD::OR , MVT::v4i32, Legal);

302

setOperationAction(ISD::XOR , MVT::v4i32, Legal);

303

setOperationAction(ISD::LOAD , MVT::v4i32, Legal);

304

setOperationAction(ISD::SELECT, MVT::v4i32, Expand);

305

setOperationAction(ISD::STORE , MVT::v4i32, Legal);

306

307

addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);

308

addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);

309

addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);

310

addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);

311

312

setOperationAction(ISD::MUL, MVT::v4f32, Legal);

313

setOperationAction(ISD::MUL, MVT::v4i32, Custom);

314

setOperationAction(ISD::MUL, MVT::v8i16, Custom);

315

setOperationAction(ISD::MUL, MVT::v16i8, Custom);

316

317

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);

318

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);

319

320

setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);

321

setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);

322

setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);

323

setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);

324

}

325

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

326

setShiftAmountType(MVT::i32);

327

setSetCCResultContents(ZeroOrOneSetCCResult);

328

329

if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {

330

setStackPointerRegisterToSaveRestore(PPC::X1);

331

setExceptionPointerRegister(PPC::X3);

332

setExceptionSelectorRegister(PPC::X4);

333

} else {

334

setStackPointerRegisterToSaveRestore(PPC::R1);

335

setExceptionPointerRegister(PPC::R3);

336

setExceptionSelectorRegister(PPC::R4);

337

}

338

339

// We have target-specific dag combine patterns for the following nodes:

340

setTargetDAGCombine(ISD::SINT_TO_FP);

341

setTargetDAGCombine(ISD::STORE);

342

setTargetDAGCombine(ISD::BR_CC);

343

setTargetDAGCombine(ISD::BSWAP);

344

Dale Johannesen

6f3c7bf

2007-10-19 00:59:18 +0000

[diff] [blame]

345

// Darwin long double math library functions have $LDBL128 appended.

346

if (TM.getSubtarget<PPCSubtarget>().isDarwin()) {

Duncan Sands

37a3f47

2008-01-10 10:28:30 +0000

[diff] [blame]

347

setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");

Dale Johannesen

6f3c7bf

2007-10-19 00:59:18 +0000

[diff] [blame]

348

setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");

349

setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");

Duncan Sands

37a3f47

2008-01-10 10:28:30 +0000

[diff] [blame]

350

setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");

351

setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");

Dale Johannesen

6f3c7bf

2007-10-19 00:59:18 +0000

[diff] [blame]

352

}

353

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

354

computeRegisterProperties();

355

}

356

Dale Johannesen

88945f8

2008-02-28 22:31:51 +0000

[diff] [blame]

357

/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate

358

/// function arguments in the caller parameter area.

359

unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {

360

TargetMachine &TM = getTargetMachine();

361

// Darwin passes everything on 4 byte boundary.

362

if (TM.getSubtarget<PPCSubtarget>().isDarwin())

return 4;

// FIXME Elf TBD

return 4;

}

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

368

const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {

369

switch (Opcode) {

370

default: return 0;

371

case PPCISD::FSEL: return "PPCISD::FSEL";

372

case PPCISD::FCFID: return "PPCISD::FCFID";

373

case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";

374

case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";

375

case PPCISD::STFIWX: return "PPCISD::STFIWX";

376

case PPCISD::VMADDFP: return "PPCISD::VMADDFP";

377

case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";

378

case PPCISD::VPERM: return "PPCISD::VPERM";

379

case PPCISD::Hi: return "PPCISD::Hi";

380

case PPCISD::Lo: return "PPCISD::Lo";

381

case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";

382

case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";

383

case PPCISD::SRL: return "PPCISD::SRL";

384

case PPCISD::SRA: return "PPCISD::SRA";

385

case PPCISD::SHL: return "PPCISD::SHL";

386

case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";

387

case PPCISD::STD_32: return "PPCISD::STD_32";

388

case PPCISD::CALL_ELF: return "PPCISD::CALL_ELF";

389

case PPCISD::CALL_Macho: return "PPCISD::CALL_Macho";

390

case PPCISD::MTCTR: return "PPCISD::MTCTR";

391

case PPCISD::BCTRL_Macho: return "PPCISD::BCTRL_Macho";

392

case PPCISD::BCTRL_ELF: return "PPCISD::BCTRL_ELF";

393

case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";

394

case PPCISD::MFCR: return "PPCISD::MFCR";

395

case PPCISD::VCMP: return "PPCISD::VCMP";

396

case PPCISD::VCMPo: return "PPCISD::VCMPo";

397

case PPCISD::LBRX: return "PPCISD::LBRX";

398

case PPCISD::STBRX: return "PPCISD::STBRX";

399

case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";

Chris Lattner

e2a6e9f

2008-01-18 18:51:16 +0000

[diff] [blame]

400

case PPCISD::MFFS: return "PPCISD::MFFS";

401

case PPCISD::MTFSB0: return "PPCISD::MTFSB0";

402

case PPCISD::MTFSB1: return "PPCISD::MTFSB1";

403

case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";

404

case PPCISD::MTFSF: return "PPCISD::MTFSF";

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

}

}

Scott Michel

2008-03-10 15:42:14 +0000

[diff] [blame]

408

409

MVT::ValueType

410

PPCTargetLowering::getSetCCResultType(const SDOperand &) const {

return MVT::i32;

}

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

415

//===----------------------------------------------------------------------===//

416

// Node matching predicates, for use by the tblgen matching code.

417

//===----------------------------------------------------------------------===//

418

419

/// isFloatingPointZero - Return true if this is 0.0 or -0.0.

420

static bool isFloatingPointZero(SDOperand Op) {

421

if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))

Dale Johannesen

2007-08-31 04:03:46 +0000

[diff] [blame]

422

return CFP->getValueAPF().isZero();

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

423

else if (ISD::isEXTLoad(Op.Val) || ISD::isNON_EXTLoad(Op.Val)) {

424

// Maybe this has already been legalized into the constant pool?

425

if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))

426

if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))

Dale Johannesen

2007-08-31 04:03:46 +0000

[diff] [blame]

427

return CFP->getValueAPF().isZero();

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

}

return false;

}

/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return

433

/// true if Op is undef or if it matches the specified value.

434

static bool isConstantOrUndef(SDOperand Op, unsigned Val) {

435

return Op.getOpcode() == ISD::UNDEF ||

436

cast<ConstantSDNode>(Op)->getValue() == Val;

437

}

438

439

/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a

440

/// VPKUHUM instruction.

441

bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) {

442

if (!isUnary) {

443

for (unsigned i = 0; i != 16; ++i)

444

if (!isConstantOrUndef(N->getOperand(i), i*2+1))

445

return false;

446

} else {

447

for (unsigned i = 0; i != 8; ++i)

448

if (!isConstantOrUndef(N->getOperand(i), i*2+1) ||

449

!isConstantOrUndef(N->getOperand(i+8), i*2+1))

return false;

}

return true;

}

/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a

456

/// VPKUWUM instruction.

457

bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) {

458

if (!isUnary) {

459

for (unsigned i = 0; i != 16; i += 2)

460

if (!isConstantOrUndef(N->getOperand(i ), i*2+2) ||

461

!isConstantOrUndef(N->getOperand(i+1), i*2+3))

462

return false;

463

} else {

464

for (unsigned i = 0; i != 8; i += 2)

465

if (!isConstantOrUndef(N->getOperand(i ), i*2+2) ||

466

!isConstantOrUndef(N->getOperand(i+1), i*2+3) ||

467

!isConstantOrUndef(N->getOperand(i+8), i*2+2) ||

468

!isConstantOrUndef(N->getOperand(i+9), i*2+3))

return false;

}

return true;

}

/// isVMerge - Common function, used to match vmrg* shuffles.

475

///

476

static bool isVMerge(SDNode *N, unsigned UnitSize,

477

unsigned LHSStart, unsigned RHSStart) {

478

assert(N->getOpcode() == ISD::BUILD_VECTOR &&

479

N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");

480

assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&

481

"Unsupported merge size!");

482

483

for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units

484

for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit

485

if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j),

486

LHSStart+j+i*UnitSize) ||

487

!isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j),

488

RHSStart+j+i*UnitSize))

return false;

}

return true;

}

/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for

495

/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).

496

bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {

497

if (!isUnary)

498

return isVMerge(N, UnitSize, 8, 24);

499

return isVMerge(N, UnitSize, 8, 8);

500

}

501

502

/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for

503

/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).

504

bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {

505

if (!isUnary)

506

return isVMerge(N, UnitSize, 0, 16);

507

return isVMerge(N, UnitSize, 0, 0);

}

/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift

512

/// amount, otherwise return -1.

513

int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {

514

assert(N->getOpcode() == ISD::BUILD_VECTOR &&

515

N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");

516

// Find the first non-undef value in the shuffle mask.

517

unsigned i;

518

for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i)

519

/*search*/;

520

521

if (i == 16) return -1; // all undef.

522

523

// Otherwise, check to see if the rest of the elements are consequtively

524

// numbered from this value.

525

unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue();

526

if (ShiftAmt < i) return -1;

ShiftAmt -= i;

if (!isUnary) {

// Check the rest of the elements to see if they are consequtive.

531

for (++i; i != 16; ++i)

532

if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i))

533

return -1;

534

} else {

535

// Check the rest of the elements to see if they are consequtive.

536

for (++i; i != 16; ++i)

537

if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15))

return -1;

}

return ShiftAmt;

}

/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand

545

/// specifies a splat of a single element that is suitable for input to

546

/// VSPLTB/VSPLTH/VSPLTW.

547

bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) {

548

assert(N->getOpcode() == ISD::BUILD_VECTOR &&

549

N->getNumOperands() == 16 &&

550

(EltSize == 1 || EltSize == 2 || EltSize == 4));

551

552

// This is a splat operation if each element of the permute is the same, and

553

// if the value doesn't reference the second vector.

554

unsigned ElementBase = 0;

555

SDOperand Elt = N->getOperand(0);

556

if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt))

557

ElementBase = EltV->getValue();

558

else

559

return false; // FIXME: Handle UNDEF elements too!

560

561

if (cast<ConstantSDNode>(Elt)->getValue() >= 16)

562

return false;

563

564

// Check that they are consequtive.

565

for (unsigned i = 1; i != EltSize; ++i) {

566

if (!isa<ConstantSDNode>(N->getOperand(i)) ||

567

cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase)

return false;

}

assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");

572

for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {

573

if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;

574

assert(isa<ConstantSDNode>(N->getOperand(i)) &&

575

"Invalid VECTOR_SHUFFLE mask!");

576

for (unsigned j = 0; j != EltSize; ++j)

577

if (N->getOperand(i+j) != N->getOperand(j))

return false;

}

return true;

}

Evan Cheng

2007-07-30 07:51:22 +0000

[diff] [blame]

584

/// isAllNegativeZeroVector - Returns true if all elements of build_vector

585

/// are -0.0.

586

bool PPC::isAllNegativeZeroVector(SDNode *N) {

587

assert(N->getOpcode() == ISD::BUILD_VECTOR);

588

if (PPC::isSplatShuffleMask(N, N->getNumOperands()))

589

if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N))

Dale Johannesen

2007-08-31 04:03:46 +0000

[diff] [blame]

590

return CFP->getValueAPF().isNegZero();

Evan Cheng

c5912e3

2007-07-30 07:51:22 +0000

[diff] [blame]

return false;

}

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

594

/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the

595

/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.

596

unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {

597

assert(isSplatShuffleMask(N, EltSize));

598

return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize;

599

}

600

601

/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed

602

/// by using a vspltis[bhw] instruction of the specified element size, return

603

/// the constant being splatted. The ByteSize field indicates the number of

604

/// bytes of each element [124] -> [bhw].

605

SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {

606

SDOperand OpVal(0, 0);

607

608

// If ByteSize of the splat is bigger than the element size of the

609

// build_vector, then we have a case where we are checking for a splat where

610

// multiple elements of the buildvector are folded together into a single

611

// logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).

612

unsigned EltSize = 16/N->getNumOperands();

613

if (EltSize < ByteSize) {

614

unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.

615

SDOperand UniquedVals[4];

616

assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");

617

618

// See if all of the elements in the buildvector agree across.

619

for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

620

if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;

621

// If the element isn't a constant, bail fully out.

622

if (!isa<ConstantSDNode>(N->getOperand(i))) return SDOperand();

623

624

625

if (UniquedVals[i&(Multiple-1)].Val == 0)

626

UniquedVals[i&(Multiple-1)] = N->getOperand(i);

627

else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))

628

return SDOperand(); // no match.

629

}

630

631

// Okay, if we reached this point, UniquedVals[0..Multiple-1] contains

632

// either constant or undef values that are identical for each chunk. See

633

// if these chunks can form into a larger vspltis*.

634

635

// Check to see if all of the leading entries are either 0 or -1. If

636

// neither, then this won't fit into the immediate field.

637

bool LeadingZero = true;

638

bool LeadingOnes = true;

639

for (unsigned i = 0; i != Multiple-1; ++i) {

640

if (UniquedVals[i].Val == 0) continue; // Must have been undefs.

641

642

LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();

643

LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();

644

}

645

// Finally, check the least significant entry.

646

if (LeadingZero) {

647

if (UniquedVals[Multiple-1].Val == 0)

648

return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef

649

int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getValue();

650

if (Val < 16)

651

return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)

652

}

653

if (LeadingOnes) {

654

if (UniquedVals[Multiple-1].Val == 0)

655

return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef

656

int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSignExtended();

657

if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)

658

return DAG.getTargetConstant(Val, MVT::i32);

}

return SDOperand();

}

// Check to see if this buildvec has a single non-undef value in its elements.

665

for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

666

if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;

667

if (OpVal.Val == 0)

668

OpVal = N->getOperand(i);

669

else if (OpVal != N->getOperand(i))

return SDOperand();

}

if (OpVal.Val == 0) return SDOperand(); // All UNDEF: use implicit def.

674

675

unsigned ValSizeInBytes = 0;

676

uint64_t Value = 0;

677

if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {

678

Value = CN->getValue();

679

ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8;

680

} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {

681

assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");

Dale Johannesen

2007-08-31 04:03:46 +0000

[diff] [blame]

682

Value = FloatToBits(CN->getValueAPF().convertToFloat());

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

ValSizeInBytes = 4;

}

// If the splat value is larger than the element value, then we can never do

687

// this splat. The only case that we could fit the replicated bits into our

688

// immediate field for would be zero, and we prefer to use vxor for it.

689

if (ValSizeInBytes < ByteSize) return SDOperand();

690

691

// If the element value is larger than the splat value, cut it in half and

692

// check to see if the two halves are equal. Continue doing this until we

693

// get to ByteSize. This allows us to handle 0x01010101 as 0x01.

694

while (ValSizeInBytes > ByteSize) {

695

ValSizeInBytes >>= 1;

696

697

// If the top half equals the bottom half, we're still ok.

698

if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=

699

(Value & ((1 << (8*ValSizeInBytes))-1)))

return SDOperand();

}

// Properly sign extend the value.

704

int ShAmt = (4-ByteSize)*8;

705

int MaskVal = ((int)Value << ShAmt) >> ShAmt;

706

707

// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.

708

if (MaskVal == 0) return SDOperand();

709

710

// Finally, if this value fits in a 5 bit sext field, return it

711

if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)

712

return DAG.getTargetConstant(MaskVal, MVT::i32);

return SDOperand();

}

//===----------------------------------------------------------------------===//

717

// Addressing Mode Selection

718

//===----------------------------------------------------------------------===//

719

720

/// isIntS16Immediate - This method tests to see if the node is either a 32-bit

721

/// or 64-bit immediate, and if the value can be accurately represented as a

722

/// sign extension from a 16-bit value. If so, this returns true and the

723

/// immediate.

724

static bool isIntS16Immediate(SDNode *N, short &Imm) {

725

if (N->getOpcode() != ISD::Constant)

726

return false;

727

728

Imm = (short)cast<ConstantSDNode>(N)->getValue();

729

if (N->getValueType(0) == MVT::i32)

730

return Imm == (int32_t)cast<ConstantSDNode>(N)->getValue();

731

else

732

return Imm == (int64_t)cast<ConstantSDNode>(N)->getValue();

733

}

734

static bool isIntS16Immediate(SDOperand Op, short &Imm) {

735

return isIntS16Immediate(Op.Val, Imm);

}

/// SelectAddressRegReg - Given the specified addressed, check to see if it

740

/// can be represented as an indexed [r+r] operation. Returns false if it

741

/// can be more efficiently represented with [r+imm].

742

bool PPCTargetLowering::SelectAddressRegReg(SDOperand N, SDOperand &Base,

SDOperand &Index,

SelectionDAG &DAG) {

short imm = 0;

if (N.getOpcode() == ISD::ADD) {

747

if (isIntS16Immediate(N.getOperand(1), imm))

748

return false; // r+i

749

if (N.getOperand(1).getOpcode() == PPCISD::Lo)

750

return false; // r+i

751

752

Base = N.getOperand(0);

753

Index = N.getOperand(1);

754

return true;

755

} else if (N.getOpcode() == ISD::OR) {

756

if (isIntS16Immediate(N.getOperand(1), imm))

757

return false; // r+i can fold it if we can.

758

759

// If this is an or of disjoint bitfields, we can codegen this as an add

760

// (for better address arithmetic) if the LHS and RHS of the OR are provably

761

// disjoint.

Dan Gohman

2008-02-27 01:23:58 +0000

[diff] [blame]

762

APInt LHSKnownZero, LHSKnownOne;

763

APInt RHSKnownZero, RHSKnownOne;

764

DAG.ComputeMaskedBits(N.getOperand(0),

Dan Gohman

c9cd46f

2008-02-27 21:12:32 +0000

[diff] [blame]

765

APInt::getAllOnesValue(N.getOperand(0)

766

.getValueSizeInBits()),

Dan Gohman

2008-02-27 01:23:58 +0000

[diff] [blame]

767

LHSKnownZero, LHSKnownOne);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

768

Dan Gohman

2008-02-27 01:23:58 +0000

[diff] [blame]

769

if (LHSKnownZero.getBoolValue()) {

770

DAG.ComputeMaskedBits(N.getOperand(1),

Dan Gohman

c9cd46f

2008-02-27 21:12:32 +0000

[diff] [blame]

771

APInt::getAllOnesValue(N.getOperand(1)

772

.getValueSizeInBits()),

Dan Gohman

2008-02-27 01:23:58 +0000

[diff] [blame]

773

RHSKnownZero, RHSKnownOne);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

774

// If all of the bits are known zero on the LHS or RHS, the add won't

775

// carry.

Dan Gohman

c9cd46f

2008-02-27 21:12:32 +0000

[diff] [blame]

776

if (~(LHSKnownZero | RHSKnownZero) == 0) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

777

Base = N.getOperand(0);

778

Index = N.getOperand(1);

return true;

}

}

}

return false;

}

/// Returns true if the address N can be represented by a base register plus

788

/// a signed 16-bit displacement [r+imm], and if it is not better

789

/// represented as reg+reg.

790

bool PPCTargetLowering::SelectAddressRegImm(SDOperand N, SDOperand &Disp,

791

SDOperand &Base, SelectionDAG &DAG){

792

// If this can be more profitably realized as r+r, fail.

793

if (SelectAddressRegReg(N, Disp, Base, DAG))

794

return false;

795

796

if (N.getOpcode() == ISD::ADD) {

797

short imm = 0;

798

if (isIntS16Immediate(N.getOperand(1), imm)) {

799

Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);

800

if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {

801

Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());

802

} else {

803

Base = N.getOperand(0);

804

}

805

return true; // [r+i]

806

} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {

807

// Match LOAD (ADD (X, Lo(G))).

808

assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getValue()

809

&& "Cannot handle constant offsets yet!");

810

Disp = N.getOperand(1).getOperand(0); // The global address.

811

assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||

812

Disp.getOpcode() == ISD::TargetConstantPool ||

813

Disp.getOpcode() == ISD::TargetJumpTable);

814

Base = N.getOperand(0);

815

return true; // [&g+r]

816

}

817

} else if (N.getOpcode() == ISD::OR) {

818

short imm = 0;

819

if (isIntS16Immediate(N.getOperand(1), imm)) {

820

// If this is an or of disjoint bitfields, we can codegen this as an add

821

// (for better address arithmetic) if the LHS and RHS of the OR are

822

// provably disjoint.

Dan Gohman

2008-02-27 01:23:58 +0000

[diff] [blame]

823

APInt LHSKnownZero, LHSKnownOne;

824

DAG.ComputeMaskedBits(N.getOperand(0),

Bill Wendling

a77e9f0

2008-03-24 23:16:37 +0000

[diff] [blame]

825

APInt::getAllOnesValue(N.getOperand(0)

826

.getValueSizeInBits()),

Dan Gohman

2008-02-27 01:23:58 +0000

[diff] [blame]

827

LHSKnownZero, LHSKnownOne);

Bill Wendling

a77e9f0

2008-03-24 23:16:37 +0000

[diff] [blame]

828

Dan Gohman

2008-02-27 01:23:58 +0000

[diff] [blame]

829

if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

830

// If all of the bits are known zero on the LHS or RHS, the add won't

831

// carry.

832

Base = N.getOperand(0);

833

Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);

return true;

}

}

} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {

838

// Loading from a constant address.

839

840

// If this address fits entirely in a 16-bit sext immediate field, codegen

841

// this as "d, 0"

842

short Imm;

843

if (isIntS16Immediate(CN, Imm)) {

844

Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));

845

Base = DAG.getRegister(PPC::R0, CN->getValueType(0));

return true;

}

// Handle 32-bit sext immediates with LIS + addr mode.

850

if (CN->getValueType(0) == MVT::i32 ||

851

(int64_t)CN->getValue() == (int)CN->getValue()) {

852

int Addr = (int)CN->getValue();

853

854

// Otherwise, break this down into an LIS + disp.

855

Disp = DAG.getTargetConstant((short)Addr, MVT::i32);

856

857

Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);

858

unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;

859

Base = SDOperand(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0);

return true;

}

}

Disp = DAG.getTargetConstant(0, getPointerTy());

865

if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))

866

Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());

867

else

868

Base = N;

869

return true; // [r+0]

870

}

871

872

/// SelectAddressRegRegOnly - Given the specified addressed, force it to be

873

/// represented as an indexed [r+r] operation.

874

bool PPCTargetLowering::SelectAddressRegRegOnly(SDOperand N, SDOperand &Base,

875

SDOperand &Index,

876

SelectionDAG &DAG) {

877

// Check to see if we can easily represent this as an [r+r] address. This

878

// will fail if it thinks that the address is more profitably represented as

879

// reg+imm, e.g. where imm = 0.

880

if (SelectAddressRegReg(N, Base, Index, DAG))

881

return true;

882

883

// If the operand is an addition, always emit this as [r+r], since this is

884

// better (for code size, and execution, as the memop does the add for free)

885

// than emitting an explicit add.

886

if (N.getOpcode() == ISD::ADD) {

887

Base = N.getOperand(0);

888

Index = N.getOperand(1);

return true;

}

// Otherwise, do it the hard way, using R0 as the base register.

893

Base = DAG.getRegister(PPC::R0, N.getValueType());

Index = N;

return true;

}

/// SelectAddressRegImmShift - Returns true if the address N can be

899

/// represented by a base register plus a signed 14-bit displacement

900

/// [r+imm*4]. Suitable for use by STD and friends.

901

bool PPCTargetLowering::SelectAddressRegImmShift(SDOperand N, SDOperand &Disp,

902

SDOperand &Base,

903

SelectionDAG &DAG) {

904

// If this can be more profitably realized as r+r, fail.

905

if (SelectAddressRegReg(N, Disp, Base, DAG))

906

return false;

907

908

if (N.getOpcode() == ISD::ADD) {

909

short imm = 0;

910

if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {

911

Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);

912

if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {

913

Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());

914

} else {

915

Base = N.getOperand(0);

916

}

917

return true; // [r+i]

918

} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {

919

// Match LOAD (ADD (X, Lo(G))).

920

assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getValue()

921

&& "Cannot handle constant offsets yet!");

922

Disp = N.getOperand(1).getOperand(0); // The global address.

923

assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||

924

Disp.getOpcode() == ISD::TargetConstantPool ||

925

Disp.getOpcode() == ISD::TargetJumpTable);

926

Base = N.getOperand(0);

927

return true; // [&g+r]

928

}

929

} else if (N.getOpcode() == ISD::OR) {

930

short imm = 0;

931

if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {

932

// If this is an or of disjoint bitfields, we can codegen this as an add

933

// (for better address arithmetic) if the LHS and RHS of the OR are

934

// provably disjoint.

Dan Gohman

2008-02-27 01:23:58 +0000

[diff] [blame]

935

APInt LHSKnownZero, LHSKnownOne;

936

DAG.ComputeMaskedBits(N.getOperand(0),

Bill Wendling

a77e9f0

2008-03-24 23:16:37 +0000

[diff] [blame]

937

APInt::getAllOnesValue(N.getOperand(0)

938

.getValueSizeInBits()),

Dan Gohman

2008-02-27 01:23:58 +0000

[diff] [blame]

939

LHSKnownZero, LHSKnownOne);

940

if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

941

// If all of the bits are known zero on the LHS or RHS, the add won't

942

// carry.

943

Base = N.getOperand(0);

944

Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);

return true;

}

}

} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {

949

// Loading from a constant address. Verify low two bits are clear.

950

if ((CN->getValue() & 3) == 0) {

951

// If this address fits entirely in a 14-bit sext immediate field, codegen

952

// this as "d, 0"

953

short Imm;

954

if (isIntS16Immediate(CN, Imm)) {

955

Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());

956

Base = DAG.getRegister(PPC::R0, CN->getValueType(0));

return true;

}

// Fold the low-part of 32-bit absolute addresses into addr mode.

961

if (CN->getValueType(0) == MVT::i32 ||

962

(int64_t)CN->getValue() == (int)CN->getValue()) {

963

int Addr = (int)CN->getValue();

964

965

// Otherwise, break this down into an LIS + disp.

966

Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);

967

968

Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);

969

unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;

970

Base = SDOperand(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0);

return true;

}

}

}

Disp = DAG.getTargetConstant(0, getPointerTy());

977

if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))

978

Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());

979

else

980

Base = N;

981

return true; // [r+0]

}

/// getPreIndexedAddressParts - returns true by value, base pointer and

986

/// offset pointer and addressing mode by reference if the node's address

987

/// can be legally represented as pre-indexed load / store address.

988

bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDOperand &Base,

989

SDOperand &Offset,

990

ISD::MemIndexedMode &AM,

991

SelectionDAG &DAG) {

992

// Disabled by default for now.

993

if (!EnablePPCPreinc) return false;

SDOperand Ptr;

MVT::ValueType VT;

if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

998

Ptr = LD->getBasePtr();

Dan Gohman

9a4c92c

2008-01-30 00:15:11 +0000

[diff] [blame]

999

VT = LD->getMemoryVT();

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1000

1001

} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {

1002

ST = ST;

1003

Ptr = ST->getBasePtr();

Dan Gohman

9a4c92c

2008-01-30 00:15:11 +0000

[diff] [blame]

1004

VT = ST->getMemoryVT();

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

} else

return false;

// PowerPC doesn't have preinc load/store instructions for vectors.

1009

if (MVT::isVector(VT))

1010

return false;

1011

1012

// TODO: Check reg+reg first.

1013

1014

// LDU/STU use reg+imm*4, others use reg+imm.

1015

if (VT != MVT::i64) {

1016

// reg + imm

1017

if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))

return false;

} else {

// reg + imm * 4.

if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))

return false;

}

if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

1026

// PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of

1027

// sext i32 to i64 when addr mode is r+i.

Dan Gohman

9a4c92c

2008-01-30 00:15:11 +0000

[diff] [blame]

1028

if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1029

LD->getExtensionType() == ISD::SEXTLOAD &&

1030

isa<ConstantSDNode>(Offset))

return false;

}

AM = ISD::PRE_INC;

return true;

}

//===----------------------------------------------------------------------===//

1039

// LowerOperation implementation

1040

//===----------------------------------------------------------------------===//

1041

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

1042

SDOperand PPCTargetLowering::LowerConstantPool(SDOperand Op,

1043

SelectionDAG &DAG) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1044

MVT::ValueType PtrVT = Op.getValueType();

1045

ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);

1046

Constant *C = CP->getConstVal();

1047

SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());

1048

SDOperand Zero = DAG.getConstant(0, PtrVT);

1049

1050

const TargetMachine &TM = DAG.getTarget();

1051

1052

SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, CPI, Zero);

1053

SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, CPI, Zero);

1054

1055

// If this is a non-darwin platform, we don't support non-static relo models

1056

// yet.

1057

if (TM.getRelocationModel() == Reloc::Static ||

1058

!TM.getSubtarget<PPCSubtarget>().isDarwin()) {

1059

// Generate non-pic code that has direct accesses to the constant pool.

1060

// The address of the global is just (hi(&g)+lo(&g)).

1061

return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);

1062

}

1063

1064

if (TM.getRelocationModel() == Reloc::PIC_) {

1065

// With PIC, the first instruction is actually "GR+hi(&G)".

1066

Hi = DAG.getNode(ISD::ADD, PtrVT,

1067

DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi);

1068

}

1069

1070

Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);

return Lo;

}

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

1074

SDOperand PPCTargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1075

MVT::ValueType PtrVT = Op.getValueType();

1076

JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);

1077

SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);

1078

SDOperand Zero = DAG.getConstant(0, PtrVT);

1079

1080

const TargetMachine &TM = DAG.getTarget();

1081

1082

SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, JTI, Zero);

1083

SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, JTI, Zero);

1084

1085

// If this is a non-darwin platform, we don't support non-static relo models

1086

// yet.

1087

if (TM.getRelocationModel() == Reloc::Static ||

1088

!TM.getSubtarget<PPCSubtarget>().isDarwin()) {

1089

// Generate non-pic code that has direct accesses to the constant pool.

1090

// The address of the global is just (hi(&g)+lo(&g)).

1091

return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);

1092

}

1093

1094

if (TM.getRelocationModel() == Reloc::PIC_) {

1095

// With PIC, the first instruction is actually "GR+hi(&G)".

1096

Hi = DAG.getNode(ISD::ADD, PtrVT,

1097

DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi);

1098

}

1099

1100

Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);

return Lo;

}

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

1104

SDOperand PPCTargetLowering::LowerGlobalTLSAddress(SDOperand Op,

1105

SelectionDAG &DAG) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1106

assert(0 && "TLS not implemented for PPC.");

Chris Lattner

2b06cd3

2008-03-30 18:22:13 +0000

[diff] [blame^]

1107

return SDOperand(); // Not reached

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1108

}

1109

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

1110

SDOperand PPCTargetLowering::LowerGlobalAddress(SDOperand Op,

1111

SelectionDAG &DAG) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1112

MVT::ValueType PtrVT = Op.getValueType();

1113

GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);

1114

GlobalValue *GV = GSDN->getGlobal();

1115

SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());

Evan Cheng

a5a257d

2008-02-02 05:06:29 +0000

[diff] [blame]

1116

// If it's a debug information descriptor, don't mess with it.

1117

if (DAG.isVerifiedDebugInfoDesc(Op))

1118

return GA;

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1119

SDOperand Zero = DAG.getConstant(0, PtrVT);

1120

1121

const TargetMachine &TM = DAG.getTarget();

1122

1123

SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, GA, Zero);

1124

SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, GA, Zero);

1125

1126

// If this is a non-darwin platform, we don't support non-static relo models

1127

// yet.

1128

if (TM.getRelocationModel() == Reloc::Static ||

1129

!TM.getSubtarget<PPCSubtarget>().isDarwin()) {

1130

// Generate non-pic code that has direct accesses to globals.

1131

// The address of the global is just (hi(&g)+lo(&g)).

1132

return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);

1133

}

1134

1135

if (TM.getRelocationModel() == Reloc::PIC_) {

1136

// With PIC, the first instruction is actually "GR+hi(&G)".

1137

Hi = DAG.getNode(ISD::ADD, PtrVT,

1138

DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi);

1139

}

1140

1141

Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);

1142

1143

if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV))

1144

return Lo;

1145

1146

// If the global is weak or external, we have to go through the lazy

1147

// resolution stub.

1148

return DAG.getLoad(PtrVT, DAG.getEntryNode(), Lo, NULL, 0);

1149

}

1150

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

1151

SDOperand PPCTargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1152

ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();

1153

1154

// If we're comparing for equality to zero, expose the fact that this is

1155

// implented as a ctlz/srl pair on ppc, so that the dag combiner can

1156

// fold the new nodes.

1157

if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

1158

if (C->isNullValue() && CC == ISD::SETEQ) {

1159

MVT::ValueType VT = Op.getOperand(0).getValueType();

1160

SDOperand Zext = Op.getOperand(0);

1161

if (VT < MVT::i32) {

1162

VT = MVT::i32;

1163

Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0));

1164

}

1165

unsigned Log2b = Log2_32(MVT::getSizeInBits(VT));

1166

SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext);

1167

SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz,

1168

DAG.getConstant(Log2b, MVT::i32));

1169

return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc);

1170

}

1171

// Leave comparisons against 0 and -1 alone for now, since they're usually

1172

// optimized. FIXME: revisit this when we can custom lower all setcc

1173

// optimizations.

1174

if (C->isAllOnesValue() || C->isNullValue())

return SDOperand();

}

// If we have an integer seteq/setne, turn it into a compare against zero

1179

// by xor'ing the rhs with the lhs, which is faster than setting a

1180

// condition register, reading it back out, and masking the correct bit. The

1181

// normal approach here uses sub to do this instead of xor. Using xor exposes

1182

// the result to other bit-twiddling opportunities.

1183

MVT::ValueType LHSVT = Op.getOperand(0).getValueType();

1184

if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {

1185

MVT::ValueType VT = Op.getValueType();

1186

SDOperand Sub = DAG.getNode(ISD::XOR, LHSVT, Op.getOperand(0),

1187

Op.getOperand(1));

1188

return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC);

}

return SDOperand();

}

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

1193

SDOperand PPCTargetLowering::LowerVAARG(SDOperand Op, SelectionDAG &DAG,

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1194

int VarArgsFrameIndex,

1195

int VarArgsStackOffset,

1196

unsigned VarArgsNumGPR,

1197

unsigned VarArgsNumFPR,

1198

const PPCSubtarget &Subtarget) {

1199

1200

assert(0 && "VAARG in ELF32 ABI not implemented yet!");

Chris Lattner

2b06cd3

2008-03-30 18:22:13 +0000

[diff] [blame^]

1201

return SDOperand(); // Not reached

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1202

}

1203

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

1204

SDOperand PPCTargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG,

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1205

int VarArgsFrameIndex,

1206

int VarArgsStackOffset,

1207

unsigned VarArgsNumGPR,

1208

unsigned VarArgsNumFPR,

1209

const PPCSubtarget &Subtarget) {

1210

1211

if (Subtarget.isMachoABI()) {

1212

// vastart just stores the address of the VarArgsFrameIndex slot into the

1213

// memory location argument.

1214

MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

1215

SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);

Dan Gohman

2008-02-06 22:27:42 +0000

[diff] [blame]

1216

const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();

1217

return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV, 0);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1218

}

1219

1220

// For ELF 32 ABI we follow the layout of the va_list struct.

1221

// We suppose the given va_list is already allocated.

1222

//

1223

// typedef struct {

1224

// char gpr; /* index into the array of 8 GPRs

1225

// * stored in the register save area

1226

// * gpr=0 corresponds to r3,

1227

// * gpr=1 to r4, etc.

1228

// */

1229

// char fpr; /* index into the array of 8 FPRs

1230

// * stored in the register save area

1231

// * fpr=0 corresponds to f1,

1232

// * fpr=1 to f2, etc.

1233

// */

1234

// char *overflow_arg_area;

1235

// /* location on stack that holds

1236

// * the next overflow argument

1237

// */

1238

// char *reg_save_area;

1239

// /* where r3:r10 and f1:f8 (if saved)

// * are stored

// */

// } va_list[1];

SDOperand ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8);

1246

SDOperand ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8);

1247

1248

1249

MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

1250

Dan Gohman

2008-02-06 22:27:42 +0000

[diff] [blame]

1251

SDOperand StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1252

SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);

1253

Dan Gohman

2008-02-06 22:27:42 +0000

[diff] [blame]

1254

uint64_t FrameOffset = MVT::getSizeInBits(PtrVT)/8;

1255

SDOperand ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);

1256

1257

uint64_t StackOffset = MVT::getSizeInBits(PtrVT)/8 - 1;

1258

SDOperand ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);

1259

1260

uint64_t FPROffset = 1;

1261

SDOperand ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1262

Dan Gohman

2008-02-06 22:27:42 +0000

[diff] [blame]

1263

const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1264

1265

// Store first byte : number of int regs

1266

SDOperand firstStore = DAG.getStore(Op.getOperand(0), ArgGPR,

Dan Gohman

2008-02-06 22:27:42 +0000

[diff] [blame]

1267

Op.getOperand(1), SV, 0);

1268

uint64_t nextOffset = FPROffset;

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1269

SDOperand nextPtr = DAG.getNode(ISD::ADD, PtrVT, Op.getOperand(1),

1270

ConstFPROffset);

1271

1272

// Store second byte : number of float regs

Dan Gohman

2008-02-06 22:27:42 +0000

[diff] [blame]

1273

SDOperand secondStore =

1274

DAG.getStore(firstStore, ArgFPR, nextPtr, SV, nextOffset);

1275

nextOffset += StackOffset;

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1276

nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstStackOffset);

1277

1278

// Store second word : arguments given on stack

Dan Gohman

2008-02-06 22:27:42 +0000

[diff] [blame]

1279

SDOperand thirdStore =

1280

DAG.getStore(secondStore, StackOffsetFI, nextPtr, SV, nextOffset);

1281

nextOffset += FrameOffset;

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1282

nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstFrameOffset);

1283

1284

// Store third word : arguments given in registers

Dan Gohman

2008-02-06 22:27:42 +0000

[diff] [blame]

1285

return DAG.getStore(thirdStore, FR, nextPtr, SV, nextOffset);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

}

#include "PPCGenCallingConv.inc"

1290

1291

/// GetFPR - Get the set of FP registers that should be allocated for arguments,

1292

/// depending on which subtarget is selected.

1293

static const unsigned *GetFPR(const PPCSubtarget &Subtarget) {

1294

if (Subtarget.isMachoABI()) {

1295

static const unsigned FPR[] = {

1296

PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,

1297

PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13

};

return FPR;

}

static const unsigned FPR[] = {

1304

PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,

PPC::F8

};

return FPR;

}

Bill Wendling

2008-03-07 20:49:02 +0000

[diff] [blame]

1310

SDOperand

1311

PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op,

1312

SelectionDAG &DAG,

1313

int &VarArgsFrameIndex,

1314

int &VarArgsStackOffset,

1315

unsigned &VarArgsNumGPR,

1316

unsigned &VarArgsNumFPR,

1317

const PPCSubtarget &Subtarget) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1318

// TODO: add description of PPC stack frame format, or at least some docs.

1319

//

1320

MachineFunction &MF = DAG.getMachineFunction();

1321

MachineFrameInfo *MFI = MF.getFrameInfo();

Chris Lattner

2007-12-31 04:13:23 +0000

[diff] [blame]

1322

MachineRegisterInfo &RegInfo = MF.getRegInfo();

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1323

SmallVector<SDOperand, 8> ArgValues;

1324

SDOperand Root = Op.getOperand(0);

Dale Johannesen

2008-03-12 00:22:17 +0000

[diff] [blame]

1325

bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1326

1327

MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

1328

bool isPPC64 = PtrVT == MVT::i64;

1329

bool isMachoABI = Subtarget.isMachoABI();

1330

bool isELF32_ABI = Subtarget.isELF32_ABI();

1331

unsigned PtrByteSize = isPPC64 ? 8 : 4;

1332

1333

unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);

1334

1335

static const unsigned GPR_32[] = { // 32-bit registers.

1336

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

1337

PPC::R7, PPC::R8, PPC::R9, PPC::R10,

1338

};

1339

static const unsigned GPR_64[] = { // 64-bit registers.

1340

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

1341

PPC::X7, PPC::X8, PPC::X9, PPC::X10,

1342

};

1343

1344

static const unsigned *FPR = GetFPR(Subtarget);

1345

1346

static const unsigned VR[] = {

1347

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

1348

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

1349

};

1350

Owen Anderson

2007-09-07 04:06:50 +0000

[diff] [blame]

1351

const unsigned Num_GPR_Regs = array_lengthof(GPR_32);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1352

const unsigned Num_FPR_Regs = isMachoABI ? 13 : 8;

Owen Anderson

2007-09-07 04:06:50 +0000

[diff] [blame]

1353

const unsigned Num_VR_Regs = array_lengthof( VR);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1354

1355

unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

1356

1357

const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;

1358

Dale Johannesen

2008-03-14 17:41:26 +0000

[diff] [blame]

1359

// In 32-bit non-varargs functions, the stack space for vectors is after the

1360

// stack space for non-vectors. We do not use this space unless we have

1361

// too many vectors to fit in registers, something that only occurs in

1362

// constructed examples:), but we have to walk the arglist to figure

1363

// that out...for the pathological case, compute VecArgOffset as the

1364

// start of the vector parameter area. Computing VecArgOffset is the

1365

// entire point of the following loop.

1366

// Altivec is not mentioned in the ppc32 Elf Supplement, so I'm not trying

1367

// to handle Elf here.

1368

unsigned VecArgOffset = ArgOffset;

1369

if (!isVarArg && !isPPC64) {

1370

for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e;

1371

++ArgNo) {

1372

MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();

1373

unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;

Duncan Sands

2008-03-21 09:14:45 +0000

[diff] [blame]

1374

ISD::ArgFlagsTy Flags =

1375

cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags();

Dale Johannesen

2008-03-14 17:41:26 +0000

[diff] [blame]

1376

Duncan Sands

2008-03-21 09:14:45 +0000

[diff] [blame]

1377

if (Flags.isByVal()) {

Dale Johannesen

2008-03-14 17:41:26 +0000

[diff] [blame]

1378

// ObjSize is the true size, ArgSize rounded up to multiple of regs.

Duncan Sands

2008-03-21 09:14:45 +0000

[diff] [blame]

1379

ObjSize = Flags.getByValSize();

Dale Johannesen

2008-03-14 17:41:26 +0000

[diff] [blame]

1380

unsigned ArgSize =

1381

((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

1382

VecArgOffset += ArgSize;

continue;

}

switch(ObjectVT) {

default: assert(0 && "Unhandled argument type!");

1388

case MVT::i32:

1389

case MVT::f32:

1390

VecArgOffset += isPPC64 ? 8 : 4;

1391

break;

1392

case MVT::i64: // PPC64

case MVT::f64:

VecArgOffset += 8;

break;

case MVT::v4f32:

case MVT::v4i32:

case MVT::v8i16:

case MVT::v16i8:

// Nothing to do, we're only looking at Nonvector args here.

break;

}

}

}

// We've found where the vector parameter area in memory is. Skip the

1406

// first 12 parameters; these don't use that memory.

1407

VecArgOffset = ((VecArgOffset+15)/16)*16;

1408

VecArgOffset += 12*16;

1409

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1410

// Add DAG nodes to load the arguments or copy them out of registers. On

1411

// entry to a function on PPC, the arguments start after the linkage area,

1412

// although the first ones are often in registers.

1413

//

1414

// In the ELF 32 ABI, GPRs and stack are double word align: an argument

1415

// represented with two words (long long or double) must be copied to an

Duncan Sands

2008-03-21 09:14:45 +0000

[diff] [blame]

1416

// even GPR_idx value or to an even ArgOffset value. TODO: implement this.

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1417

Dale Johannesen

2008-03-07 20:27:40 +0000

[diff] [blame]

1418

SmallVector<SDOperand, 8> MemOps;

1419

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1420

for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {

1421

SDOperand ArgVal;

1422

bool needsLoad = false;

1423

MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();

1424

unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;

1425

unsigned ArgSize = ObjSize;

Duncan Sands

2008-03-21 09:14:45 +0000

[diff] [blame]

1426

ISD::ArgFlagsTy Flags =

1427

cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags();

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1428

// See if next argument requires stack alignment in ELF

Duncan Sands

2008-03-21 09:14:45 +0000

[diff] [blame]

1429

bool Expand = false; // TODO: implement this.

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1430

1431

unsigned CurArgOffset = ArgOffset;

Dale Johannesen

2008-03-07 20:27:40 +0000

[diff] [blame]

1432

1433

// FIXME alignment for ELF may not be right

1434

// FIXME the codegen can be much improved in some cases.

1435

// We do not have to keep everything in memory.

Duncan Sands

2008-03-21 09:14:45 +0000

[diff] [blame]

1436

if (Flags.isByVal()) {

Dale Johannesen

2008-03-07 20:27:40 +0000

[diff] [blame]

1437

// ObjSize is the true size, ArgSize rounded up to multiple of registers.

Duncan Sands

2008-03-21 09:14:45 +0000

[diff] [blame]

1438

ObjSize = Flags.getByValSize();

Dale Johannesen

2008-03-07 20:27:40 +0000

[diff] [blame]

1439

ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

Dale Johannesen

05b4dbc

2008-03-08 01:41:42 +0000

[diff] [blame]

1440

// Double word align in ELF

1441

if (Expand && isELF32_ABI) GPR_idx += (GPR_idx % 2);

1442

// Objects of size 1 and 2 are right justified, everything else is

1443

// left justified. This means the memory address is adjusted forwards.

1444

if (ObjSize==1 || ObjSize==2) {

1445

CurArgOffset = CurArgOffset + (4 - ObjSize);

1446

}

Dale Johannesen

2008-03-07 20:27:40 +0000

[diff] [blame]

1447

// The value of the object is its address.

1448

int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset);

1449

SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);

1450

ArgValues.push_back(FIN);

Dale Johannesen

05b4dbc

2008-03-08 01:41:42 +0000

[diff] [blame]

1451

if (ObjSize==1 || ObjSize==2) {

1452

if (GPR_idx != Num_GPR_Regs) {

1453

unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);

1454

RegInfo.addLiveIn(GPR[GPR_idx], VReg);

1455

SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);

1456

SDOperand Store = DAG.getTruncStore(Val.getValue(1), Val, FIN,

1457

NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 );

1458

MemOps.push_back(Store);

1459

++GPR_idx;

1460

if (isMachoABI) ArgOffset += PtrByteSize;

1461

} else {

1462

ArgOffset += PtrByteSize;

1463

}

1464

continue;

1465

}

Dale Johannesen

2008-03-07 20:27:40 +0000

[diff] [blame]

1466

for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {

1467

// Store whatever pieces of the object are in registers

1468

// to memory. ArgVal will be address of the beginning of

1469

// the object.

1470

if (GPR_idx != Num_GPR_Regs) {

1471

unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);

1472

RegInfo.addLiveIn(GPR[GPR_idx], VReg);

1473

int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset);

1474

SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);

1475

SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);

1476

SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);

1477

MemOps.push_back(Store);

1478

++GPR_idx;

1479

if (isMachoABI) ArgOffset += PtrByteSize;

1480

} else {

1481

ArgOffset += ArgSize - (ArgOffset-CurArgOffset);

break;

}

}

continue;

}

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1488

switch (ObjectVT) {

1489

default: assert(0 && "Unhandled argument type!");

1490

case MVT::i32:

Bill Wendling

2008-03-07 20:49:02 +0000

[diff] [blame]

1491

if (!isPPC64) {

1492

// Double word align in ELF

1493

if (Expand && isELF32_ABI) GPR_idx += (GPR_idx % 2);

1494

1495

if (GPR_idx != Num_GPR_Regs) {

1496

unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);

1497

RegInfo.addLiveIn(GPR[GPR_idx], VReg);

1498

ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);

++GPR_idx;

} else {

needsLoad = true;

ArgSize = PtrByteSize;

1503

}

1504

// Stack align in ELF

1505

if (needsLoad && Expand && isELF32_ABI)

1506

ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;

1507

// All int arguments reserve stack space in Macho ABI.

1508

if (isMachoABI || needsLoad) ArgOffset += PtrByteSize;

1509

break;

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1510

}

Bill Wendling

2008-03-07 20:49:02 +0000

[diff] [blame]

1511

// FALLTHROUGH

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1512

case MVT::i64: // PPC64

1513

if (GPR_idx != Num_GPR_Regs) {

Chris Lattner

2007-12-31 04:13:23 +0000

[diff] [blame]

1514

unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);

1515

RegInfo.addLiveIn(GPR[GPR_idx], VReg);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1516

ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);

Bill Wendling

2008-03-07 20:49:02 +0000

[diff] [blame]

1517

1518

if (ObjectVT == MVT::i32) {

1519

// PPC64 passes i8, i16, and i32 values in i64 registers. Promote

1520

// value to MVT::i64 and then truncate to the correct register size.

Duncan Sands

2008-03-21 09:14:45 +0000

[diff] [blame]

1521

if (Flags.isSExt())

Bill Wendling

2008-03-07 20:49:02 +0000

[diff] [blame]

1522

ArgVal = DAG.getNode(ISD::AssertSext, MVT::i64, ArgVal,

1523

DAG.getValueType(ObjectVT));

Duncan Sands

2008-03-21 09:14:45 +0000

[diff] [blame]

1524

else if (Flags.isZExt())

Bill Wendling

2008-03-07 20:49:02 +0000

[diff] [blame]

1525

ArgVal = DAG.getNode(ISD::AssertZext, MVT::i64, ArgVal,

1526

DAG.getValueType(ObjectVT));

1527

1528

ArgVal = DAG.getNode(ISD::TRUNCATE, MVT::i32, ArgVal);

1529

}

1530

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

++GPR_idx;

} else {

needsLoad = true;

}

// All int arguments reserve stack space in Macho ABI.

1536

if (isMachoABI || needsLoad) ArgOffset += 8;

break;

case MVT::f32:

case MVT::f64:

// Every 4 bytes of argument space consumes one of the GPRs available for

1542

// argument passing.

1543

if (GPR_idx != Num_GPR_Regs && isMachoABI) {

1544

++GPR_idx;

1545

if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)

1546

++GPR_idx;

1547

}

1548

if (FPR_idx != Num_FPR_Regs) {

1549

unsigned VReg;

1550

if (ObjectVT == MVT::f32)

Chris Lattner

2007-12-31 04:13:23 +0000

[diff] [blame]

1551

VReg = RegInfo.createVirtualRegister(&PPC::F4RCRegClass);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1552

else

Chris Lattner

2007-12-31 04:13:23 +0000

[diff] [blame]

1553

VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);

1554

RegInfo.addLiveIn(FPR[FPR_idx], VReg);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1555

ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);

++FPR_idx;

} else {

needsLoad = true;

}

// Stack align in ELF

1562

if (needsLoad && Expand && isELF32_ABI)

1563

ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;

1564

// All FP arguments reserve stack space in Macho ABI.

1565

if (isMachoABI || needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize;

break;

case MVT::v4f32:

case MVT::v4i32:

case MVT::v8i16:

case MVT::v16i8:

Dale Johannesen

2008-03-12 00:22:17 +0000

[diff] [blame]

1571

// Note that vector arguments in registers don't reserve stack space,

1572

// except in varargs functions.

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1573

if (VR_idx != Num_VR_Regs) {

Chris Lattner

2007-12-31 04:13:23 +0000

[diff] [blame]

1574

unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass);

1575

RegInfo.addLiveIn(VR[VR_idx], VReg);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1576

ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);

Dale Johannesen

2008-03-12 00:22:17 +0000

[diff] [blame]

1577

if (isVarArg) {

1578

while ((ArgOffset % 16) != 0) {

1579

ArgOffset += PtrByteSize;

1580

if (GPR_idx != Num_GPR_Regs)

GPR_idx++;

}

ArgOffset += 16;

GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs);

1585

}

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1586

++VR_idx;

1587

} else {

Dale Johannesen

2008-03-14 17:41:26 +0000

[diff] [blame]

1588

if (!isVarArg && !isPPC64) {

1589

// Vectors go after all the nonvectors.

1590

CurArgOffset = VecArgOffset;

1591

VecArgOffset += 16;

1592

} else {

1593

// Vectors are aligned.

1594

ArgOffset = ((ArgOffset+15)/16)*16;

1595

CurArgOffset = ArgOffset;

1596

ArgOffset += 16;

Dale Johannesen

896870b

2008-03-12 00:49:20 +0000

[diff] [blame]

1597

}

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

needsLoad = true;

}

break;

}

// We need to load the argument to a virtual register if we determined above

Chris Lattner

6006945

2008-02-13 07:35:30 +0000

[diff] [blame]

1604

// that we ran out of physical registers of the appropriate type.

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1605

if (needsLoad) {

Chris Lattner

6006945

2008-02-13 07:35:30 +0000

[diff] [blame]

1606

int FI = MFI->CreateFixedObject(ObjSize,

1607

CurArgOffset + (ArgSize - ObjSize));

1608

SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);

1609

ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1610

}

1611

1612

ArgValues.push_back(ArgVal);

1613

}

Dale Johannesen

2008-03-07 20:27:40 +0000

[diff] [blame]

1614

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1615

// If the function takes variable number of arguments, make a frame index for

1616

// the start of the first vararg value... for expansion of llvm.va_start.

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

if (isVarArg) {

int depth;

if (isELF32_ABI) {

VarArgsNumGPR = GPR_idx;

1622

VarArgsNumFPR = FPR_idx;

1623

1624

// Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame

1625

// pointer.

1626

depth = -(Num_GPR_Regs * MVT::getSizeInBits(PtrVT)/8 +

1627

Num_FPR_Regs * MVT::getSizeInBits(MVT::f64)/8 +

1628

MVT::getSizeInBits(PtrVT)/8);

1629

1630

VarArgsStackOffset = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,

ArgOffset);

}

else

depth = ArgOffset;

VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,

1638

depth);

1639

SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);

1640

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1641

// In ELF 32 ABI, the fixed integer arguments of a variadic function are

1642

// stored to the VarArgsFrameIndex on the stack.

1643

if (isELF32_ABI) {

1644

for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) {

1645

SDOperand Val = DAG.getRegister(GPR[GPR_idx], PtrVT);

1646

SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0);

1647

MemOps.push_back(Store);

1648

// Increment the address by four for the next argument to store

1649

SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);

1650

FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);

}

}

// If this function is vararg, store any remaining integer argument regs

1655

// to their spots on the stack so that they may be loaded by deferencing the

1656

// result of va_next.

1657

for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {

1658

unsigned VReg;

1659

if (isPPC64)

Chris Lattner

2007-12-31 04:13:23 +0000

[diff] [blame]

1660

VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1661

else

Chris Lattner

2007-12-31 04:13:23 +0000

[diff] [blame]

1662

VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1663

Chris Lattner

2007-12-31 04:13:23 +0000

[diff] [blame]

1664

RegInfo.addLiveIn(GPR[GPR_idx], VReg);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1665

SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);

1666

SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);

1667

MemOps.push_back(Store);

1668

// Increment the address by four for the next argument to store

1669

SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);

1670

FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);

1671

}

1672

1673

// In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex

1674

// on the stack.

1675

if (isELF32_ABI) {

1676

for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) {

1677

SDOperand Val = DAG.getRegister(FPR[FPR_idx], MVT::f64);

1678

SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0);

1679

MemOps.push_back(Store);

1680

// Increment the address by eight for the next argument to store

1681

SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(MVT::f64)/8,

1682

PtrVT);

1683

FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);

1684

}

1685

1686

for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) {

1687

unsigned VReg;

Chris Lattner

2007-12-31 04:13:23 +0000

[diff] [blame]

1688

VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1689

Chris Lattner

2007-12-31 04:13:23 +0000

[diff] [blame]

1690

RegInfo.addLiveIn(FPR[FPR_idx], VReg);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1691

SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::f64);

1692

SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);

1693

MemOps.push_back(Store);

1694

// Increment the address by eight for the next argument to store

1695

SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(MVT::f64)/8,

1696

PtrVT);

1697

FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);

1698

}

1699

}

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1700

}

1701

Dale Johannesen

2008-03-07 20:27:40 +0000

[diff] [blame]

1702

if (!MemOps.empty())

1703

Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());

1704

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1705

ArgValues.push_back(Root);

1706

1707

// Return the new list of results.

1708

std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),

1709

Op.Val->value_end());

1710

return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());

1711

}

1712

1713

/// isCallCompatibleAddress - Return the immediate to use if the specified

1714

/// 32-bit value is representable in the immediate field of a BxA instruction.

1715

static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) {

1716

ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);

1717

if (!C) return 0;

1718

1719

int Addr = C->getValue();

1720

if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.

1721

(Addr << 6 >> 6) != Addr)

1722

return 0; // Top 6 bits have to be sext of immediate.

1723

Evan Cheng

282c646

2007-10-22 19:46:19 +0000

[diff] [blame]

1724

return DAG.getConstant((int)C->getValue() >> 2,

1725

DAG.getTargetLoweringInfo().getPointerTy()).Val;

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1726

}

1727

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

1728

/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified

1729

/// by "Src" to address "Dst" of size "Size". Alignment information is

1730

/// specified by the specific parameter attribute. The copy will be passed as

1731

/// a byval function parameter.

1732

/// Sometimes what we are copying is the end of a larger object, the part that

1733

/// does not fit in registers.

1734

static SDOperand

1735

CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain,

Duncan Sands

2008-03-21 09:14:45 +0000

[diff] [blame]

1736

ISD::ArgFlagsTy Flags, SelectionDAG &DAG,

1737

unsigned Size) {

1738

SDOperand AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

1739

SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);

Dale Johannesen

7a7aa10

2008-03-05 23:31:27 +0000

[diff] [blame]

1740

SDOperand AlwaysInline = DAG.getConstant(0, MVT::i32);

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

1741

return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline);

1742

}

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1743

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

1744

SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,

Dan Gohman

2008-03-19 21:39:28 +0000

[diff] [blame]

1745

const PPCSubtarget &Subtarget,

1746

TargetMachine &TM) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1747

SDOperand Chain = Op.getOperand(0);

1748

bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;

1749

SDOperand Callee = Op.getOperand(4);

1750

unsigned NumOps = (Op.getNumOperands() - 5) / 2;

1751

1752

bool isMachoABI = Subtarget.isMachoABI();

1753

bool isELF32_ABI = Subtarget.isELF32_ABI();

1754

1755

MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

1756

bool isPPC64 = PtrVT == MVT::i64;

1757

unsigned PtrByteSize = isPPC64 ? 8 : 4;

1758

1759

// args_to_use will accumulate outgoing args for the PPCISD::CALL case in

1760

// SelectExpr to use to put the arguments in the appropriate registers.

1761

std::vector<SDOperand> args_to_use;

1762

1763

// Count how many bytes are to be pushed on the stack, including the linkage

1764

// area, and parameter passing area. We start with 24/48 bytes, which is

1765

// prereserved space for [SP][CR][LR][3 x unused].

1766

unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);

Dale Johannesen

2008-03-12 00:22:17 +0000

[diff] [blame]

1767

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1768

// Add up all the space actually used.

Dale Johannesen

2008-03-14 17:41:26 +0000

[diff] [blame]

1769

// In 32-bit non-varargs calls, Altivec parameters all go at the end; usually

1770

// they all go in registers, but we must reserve stack space for them for

1771

// possible use by the caller. In varargs or 64-bit calls, parameters are

1772

// assigned stack space in order, with padding so Altivec parameters are

1773

// 16-byte aligned.

1774

unsigned nAltivecParamsAtEnd = 0;

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1775

for (unsigned i = 0; i != NumOps; ++i) {

Dale Johannesen

2008-03-12 00:22:17 +0000

[diff] [blame]

1776

SDOperand Arg = Op.getOperand(5+2*i);

1777

MVT::ValueType ArgVT = Arg.getValueType();

Dale Johannesen

2008-03-14 17:41:26 +0000

[diff] [blame]

1778

if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||

1779

ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {

1780

if (!isVarArg && !isPPC64) {

1781

// Non-varargs Altivec parameters go after all the non-Altivec parameters;

1782

// do those last so we know how much padding we need.

1783

nAltivecParamsAtEnd++;

1784

continue;

1785

} else {

1786

// Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.

1787

NumBytes = ((NumBytes+15)/16)*16;

1788

}

1789

}

Duncan Sands

2008-03-21 09:14:45 +0000

[diff] [blame]

1790

ISD::ArgFlagsTy Flags =

1791

cast<ARG_FLAGSSDNode>(Op.getOperand(5+2*i+1))->getArgFlags();

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1792

unsigned ArgSize =MVT::getSizeInBits(Op.getOperand(5+2*i).getValueType())/8;

Duncan Sands

2008-03-21 09:14:45 +0000

[diff] [blame]

1793

if (Flags.isByVal())

1794

ArgSize = Flags.getByValSize();

Dale Johannesen

05b4dbc

2008-03-08 01:41:42 +0000

[diff] [blame]

1795

ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1796

NumBytes += ArgSize;

1797

}

Dale Johannesen

2008-03-14 17:41:26 +0000

[diff] [blame]

1798

// Allow for Altivec parameters at the end, if needed.

1799

if (nAltivecParamsAtEnd) {

1800

NumBytes = ((NumBytes+15)/16)*16;

1801

NumBytes += 16*nAltivecParamsAtEnd;

1802

}

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1803

1804

// The prolog code of the callee may store up to 8 GPR argument registers to

1805

// the stack, allowing va_start to index over them in memory if its varargs.

1806

// Because we cannot tell if this is needed on the caller side, we have to

1807

// conservatively assume that it is needed. As such, make sure we have at

1808

// least enough stack space for the caller to store the 8 GPRs.

1809

NumBytes = std::max(NumBytes,

1810

PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));

1811

1812

// Adjust the stack pointer for the new arguments...

1813

// These operations are automatically eliminated by the prolog/epilog pass

1814

Chain = DAG.getCALLSEQ_START(Chain,

1815

DAG.getConstant(NumBytes, PtrVT));

Dale Johannesen

7a7aa10

2008-03-05 23:31:27 +0000

[diff] [blame]

1816

SDOperand CallSeqStart = Chain;

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1817

1818

// Set up a copy of the stack pointer for use loading and storing any

1819

// arguments that may not fit in the registers available for argument

// passing.

SDOperand StackPtr;

if (isPPC64)

StackPtr = DAG.getRegister(PPC::X1, MVT::i64);

1824

else

1825

StackPtr = DAG.getRegister(PPC::R1, MVT::i32);

1826

1827

// Figure out which arguments are going to go in registers, and which in

1828

// memory. Also, if this is a vararg function, floating point operations

1829

// must be stored to our stack, and loaded into integer regs as well, if

1830

// any integer regs are available for argument passing.

1831

unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);

1832

unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

1833

1834

static const unsigned GPR_32[] = { // 32-bit registers.

1835

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

1836

PPC::R7, PPC::R8, PPC::R9, PPC::R10,

1837

};

1838

static const unsigned GPR_64[] = { // 64-bit registers.

1839

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

1840

PPC::X7, PPC::X8, PPC::X9, PPC::X10,

1841

};

1842

static const unsigned *FPR = GetFPR(Subtarget);

1843

1844

static const unsigned VR[] = {

1845

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

1846

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

1847

};

Owen Anderson

2007-09-07 04:06:50 +0000

[diff] [blame]

1848

const unsigned NumGPRs = array_lengthof(GPR_32);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1849

const unsigned NumFPRs = isMachoABI ? 13 : 8;

Owen Anderson

2007-09-07 04:06:50 +0000

[diff] [blame]

1850

const unsigned NumVRs = array_lengthof( VR);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1851

1852

const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;

1853

1854

std::vector<std::pair<unsigned, SDOperand> > RegsToPass;

1855

SmallVector<SDOperand, 8> MemOpChains;

1856

for (unsigned i = 0; i != NumOps; ++i) {

1857

bool inMem = false;

1858

SDOperand Arg = Op.getOperand(5+2*i);

Duncan Sands

2008-03-21 09:14:45 +0000

[diff] [blame]

1859

ISD::ArgFlagsTy Flags =

1860

cast<ARG_FLAGSSDNode>(Op.getOperand(5+2*i+1))->getArgFlags();

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1861

// See if next argument requires stack alignment in ELF

Duncan Sands

2008-03-21 09:14:45 +0000

[diff] [blame]

1862

bool Expand = false; // TODO: implement this.

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1863

1864

// PtrOff will be used to store the current argument to the stack if a

1865

// register cannot be found for it.

1866

SDOperand PtrOff;

1867

1868

// Stack align in ELF 32

1869

if (isELF32_ABI && Expand)

1870

PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize,

1871

StackPtr.getValueType());

1872

else

1873

PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());

1874

1875

PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);

1876

1877

// On PPC64, promote integers to 64-bit values.

1878

if (isPPC64 && Arg.getValueType() == MVT::i32) {

Duncan Sands

2008-03-21 09:14:45 +0000

[diff] [blame]

1879

// FIXME: Should this use ANY_EXTEND if neither sext nor zext?

1880

unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1881

Arg = DAG.getNode(ExtOp, MVT::i64, Arg);

1882

}

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

1883

1884

// FIXME Elf untested, what are alignment rules?

Dale Johannesen

2008-03-07 20:27:40 +0000

[diff] [blame]

1885

// FIXME memcpy is used way more than necessary. Correctness first.

Duncan Sands

2008-03-21 09:14:45 +0000

[diff] [blame]

1886

if (Flags.isByVal()) {

1887

unsigned Size = Flags.getByValSize();

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

1888

if (isELF32_ABI && Expand) GPR_idx += (GPR_idx % 2);

Dale Johannesen

2008-03-07 20:27:40 +0000

[diff] [blame]

1889

if (Size==1 || Size==2) {

1890

// Very small objects are passed right-justified.

1891

// Everything else is passed left-justified.

1892

MVT::ValueType VT = (Size==1) ? MVT::i8 : MVT::i16;

1893

if (GPR_idx != NumGPRs) {

1894

SDOperand Load = DAG.getExtLoad(ISD::EXTLOAD, PtrVT, Chain, Arg,

1895

NULL, 0, VT);

1896

MemOpChains.push_back(Load.getValue(1));

1897

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

1898

if (isMachoABI)

1899

ArgOffset += PtrByteSize;

1900

} else {

1901

SDOperand Const = DAG.getConstant(4 - Size, PtrOff.getValueType());

1902

SDOperand AddPtr = DAG.getNode(ISD::ADD, PtrVT, PtrOff, Const);

1903

SDOperand MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,

1904

CallSeqStart.Val->getOperand(0),

1905

Flags, DAG, Size);

1906

// This must go outside the CALLSEQ_START..END.

1907

SDOperand NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,

1908

CallSeqStart.Val->getOperand(1));

1909

DAG.ReplaceAllUsesWith(CallSeqStart.Val, NewCallSeqStart.Val);

1910

Chain = CallSeqStart = NewCallSeqStart;

1911

ArgOffset += PtrByteSize;

1912

}

1913

continue;

1914

}

Dale Johannesen

bfadf4b

2008-03-17 02:13:43 +0000

[diff] [blame]

1915

// Copy entire object into memory. There are cases where gcc-generated

1916

// code assumes it is there, even if it could be put entirely into

1917

// registers. (This is not what the doc says.)

1918

SDOperand MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,

1919

CallSeqStart.Val->getOperand(0),

1920

Flags, DAG, Size);

1921

// This must go outside the CALLSEQ_START..END.

1922

SDOperand NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,

1923

CallSeqStart.Val->getOperand(1));

1924

DAG.ReplaceAllUsesWith(CallSeqStart.Val, NewCallSeqStart.Val);

1925

Chain = CallSeqStart = NewCallSeqStart;

1926

// And copy the pieces of it that fit into registers.

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

1927

for (unsigned j=0; j<Size; j+=PtrByteSize) {

1928

SDOperand Const = DAG.getConstant(j, PtrOff.getValueType());

1929

SDOperand AddArg = DAG.getNode(ISD::ADD, PtrVT, Arg, Const);

1930

if (GPR_idx != NumGPRs) {

1931

SDOperand Load = DAG.getLoad(PtrVT, Chain, AddArg, NULL, 0);

Dale Johannesen

7a7aa10

2008-03-05 23:31:27 +0000

[diff] [blame]

1932

MemOpChains.push_back(Load.getValue(1));

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

1933

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

1934

if (isMachoABI)

1935

ArgOffset += PtrByteSize;

1936

} else {

Dale Johannesen

bfadf4b

2008-03-17 02:13:43 +0000

[diff] [blame]

1937

ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;

Dale Johannesen

2008-03-07 20:27:40 +0000

[diff] [blame]

1938

break;

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

}

}

continue;

}

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1944

switch (Arg.getValueType()) {

1945

default: assert(0 && "Unexpected ValueType for argument!");

1946

case MVT::i32:

1947

case MVT::i64:

1948

// Double word align in ELF

1949

if (isELF32_ABI && Expand) GPR_idx += (GPR_idx % 2);

1950

if (GPR_idx != NumGPRs) {

1951

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));

1952

} else {

1953

MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));

1954

inMem = true;

1955

}

1956

if (inMem || isMachoABI) {

1957

// Stack align in ELF

1958

if (isELF32_ABI && Expand)

1959

ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;

1960

1961

ArgOffset += PtrByteSize;

}

break;

case MVT::f32:

case MVT::f64:

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

1966

if (FPR_idx != NumFPRs) {

1967

RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));

1968

1969

if (isVarArg) {

1970

SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0);

1971

MemOpChains.push_back(Store);

1972

1973

// Float varargs are always shadowed in available integer registers

1974

if (GPR_idx != NumGPRs) {

1975

SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff, NULL, 0);

1976

MemOpChains.push_back(Load.getValue(1));

1977

if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],

1978

Load));

1979

}

1980

if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){

1981

SDOperand ConstFour = DAG.getConstant(4, PtrOff.getValueType());

1982

PtrOff = DAG.getNode(ISD::ADD, PtrVT, PtrOff, ConstFour);

1983

SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff, NULL, 0);

1984

MemOpChains.push_back(Load.getValue(1));

1985

if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],

Load));

}

} else {

// If we have any FPRs remaining, we may also have GPRs remaining.

1990

// Args passed in FPRs consume either 1 (f32) or 2 (f64) available

1991

// GPRs.

1992

if (isMachoABI) {

1993

if (GPR_idx != NumGPRs)

1994

++GPR_idx;

1995

if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&

1996

!isPPC64) // PPC64 has 64-bit GPR's obviously :)

++GPR_idx;

}

}

} else {

MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));

2002

inMem = true;

2003

}

2004

if (inMem || isMachoABI) {

2005

// Stack align in ELF

2006

if (isELF32_ABI && Expand)

2007

ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;

if (isPPC64)

ArgOffset += 8;

else

ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;

}

break;

case MVT::v4f32:

case MVT::v4i32:

case MVT::v8i16:

case MVT::v16i8:

Dale Johannesen

2008-03-12 00:22:17 +0000

[diff] [blame]

2018

if (isVarArg) {

2019

// These go aligned on the stack, or in the corresponding R registers

2020

// when within range. The Darwin PPC ABI doc claims they also go in

2021

// V registers; in fact gcc does this only for arguments that are

2022

// prototyped, not for those that match the ... We do it for all

2023

// arguments, seems to work.

2024

while (ArgOffset % 16 !=0) {

2025

ArgOffset += PtrByteSize;

2026

if (GPR_idx != NumGPRs)

2027

GPR_idx++;

2028

}

2029

// We could elide this store in the case where the object fits

2030

// entirely in R registers. Maybe later.

2031

PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr,

2032

DAG.getConstant(ArgOffset, PtrVT));

2033

SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0);

2034

MemOpChains.push_back(Store);

2035

if (VR_idx != NumVRs) {

2036

SDOperand Load = DAG.getLoad(MVT::v4f32, Store, PtrOff, NULL, 0);

2037

MemOpChains.push_back(Load.getValue(1));

2038

RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));

2039

}

2040

ArgOffset += 16;

2041

for (unsigned i=0; i<16; i+=PtrByteSize) {

2042

if (GPR_idx == NumGPRs)

2043

break;

2044

SDOperand Ix = DAG.getNode(ISD::ADD, PtrVT, PtrOff,

2045

DAG.getConstant(i, PtrVT));

2046

SDOperand Load = DAG.getLoad(PtrVT, Store, Ix, NULL, 0);

2047

MemOpChains.push_back(Load.getValue(1));

2048

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

2049

}

2050

break;

2051

}

Dale Johannesen

2008-03-14 17:41:26 +0000

[diff] [blame]

2052

// Non-varargs Altivec params generally go in registers, but have

2053

// stack space allocated at the end.

2054

if (VR_idx != NumVRs) {

2055

// Doesn't have GPR space allocated.

2056

RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));

2057

} else if (nAltivecParamsAtEnd==0) {

2058

// We are emitting Altivec params in order.

Dale Johannesen

2008-03-12 00:22:17 +0000

[diff] [blame]

2059

PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr,

2060

DAG.getConstant(ArgOffset, PtrVT));

2061

SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0);

2062

MemOpChains.push_back(Store);

2063

ArgOffset += 16;

Dale Johannesen

2008-03-12 00:22:17 +0000

[diff] [blame]

2064

}

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2065

break;

2066

}

2067

}

Dale Johannesen

2008-03-14 17:41:26 +0000

[diff] [blame]

2068

// If all Altivec parameters fit in registers, as they usually do,

2069

// they get stack space following the non-Altivec parameters. We

2070

// don't track this here because nobody below needs it.

2071

// If there are more Altivec parameters than fit in registers emit

2072

// the stores here.

2073

if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {

2074

unsigned j = 0;

2075

// Offset is aligned; skip 1st 12 params which go in V registers.

2076

ArgOffset = ((ArgOffset+15)/16)*16;

2077

ArgOffset += 12*16;

2078

for (unsigned i = 0; i != NumOps; ++i) {

2079

SDOperand Arg = Op.getOperand(5+2*i);

2080

MVT::ValueType ArgType = Arg.getValueType();

2081

if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||

2082

ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {

2083

if (++j > NumVRs) {

2084

SDOperand PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr,

2085

DAG.getConstant(ArgOffset, PtrVT));

2086

SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0);

2087

MemOpChains.push_back(Store);

ArgOffset += 16;

}

}

}

}

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2094

if (!MemOpChains.empty())

2095

Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,

2096

&MemOpChains[0], MemOpChains.size());

2097

2098

// Build a sequence of copy-to-reg nodes chained together with token chain

2099

// and flag operands which copy the outgoing args into the appropriate regs.

2100

SDOperand InFlag;

2101

for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {

2102

Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,

2103

InFlag);

2104

InFlag = Chain.getValue(1);

2105

}

2106

2107

// With the ELF 32 ABI, set CR6 to true if this is a vararg call.

2108

if (isVarArg && isELF32_ABI) {

Nicolas Geoffray

d01feb2

2008-03-10 14:12:10 +0000

[diff] [blame]

2109

SDOperand SetCR(DAG.getTargetNode(PPC::CRSET, MVT::i32), 0);

2110

Chain = DAG.getCopyToReg(Chain, PPC::CR1EQ, SetCR, InFlag);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2111

InFlag = Chain.getValue(1);

2112

}

2113

2114

std::vector<MVT::ValueType> NodeTys;

2115

NodeTys.push_back(MVT::Other); // Returns a chain

2116

NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.

2117

2118

SmallVector<SDOperand, 8> Ops;

2119

unsigned CallOpc = isMachoABI? PPCISD::CALL_Macho : PPCISD::CALL_ELF;

2120

2121

// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every

2122

// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol

2123

// node so that legalize doesn't hack it.

Nicolas Geoffray

455a2e0

2007-12-21 12:22:29 +0000

[diff] [blame]

2124

if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))

2125

Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());

2126

else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2127

Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());

2128

else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))

2129

// If this is an absolute destination address, use the munged value.

2130

Callee = SDOperand(Dest, 0);

2131

else {

2132

// Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair

2133

// to do the call, we can't use PPCISD::CALL.

2134

SDOperand MTCTROps[] = {Chain, Callee, InFlag};

2135

Chain = DAG.getNode(PPCISD::MTCTR, NodeTys, MTCTROps, 2+(InFlag.Val!=0));

2136

InFlag = Chain.getValue(1);

2137

Chris Lattner

6eae8c6

2008-03-09 20:49:33 +0000

[diff] [blame]

2138

// Copy the callee address into R12/X12 on darwin.

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2139

if (isMachoABI) {

Chris Lattner

6eae8c6

2008-03-09 20:49:33 +0000

[diff] [blame]

2140

unsigned Reg = Callee.getValueType() == MVT::i32 ? PPC::R12 : PPC::X12;

2141

Chain = DAG.getCopyToReg(Chain, Reg, Callee, InFlag);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2142

InFlag = Chain.getValue(1);

}

NodeTys.clear();

NodeTys.push_back(MVT::Other);

2147

NodeTys.push_back(MVT::Flag);

2148

Ops.push_back(Chain);

2149

CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF;

Callee.Val = 0;

}

// If this is a direct call, pass the chain and the callee.

2154

if (Callee.Val) {

2155

Ops.push_back(Chain);

2156

Ops.push_back(Callee);

2157

}

2158

2159

// Add argument registers to the end of the list so that they are known live

2160

// into the call.

2161

for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)

2162

Ops.push_back(DAG.getRegister(RegsToPass[i].first,

2163

RegsToPass[i].second.getValueType()));

2164

2165

if (InFlag.Val)

2166

Ops.push_back(InFlag);

2167

Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());

2168

InFlag = Chain.getValue(1);

2169

Bill Wendling

22f8deb

2007-11-13 00:44:25 +0000

[diff] [blame]

2170

Chain = DAG.getCALLSEQ_END(Chain,

2171

DAG.getConstant(NumBytes, PtrVT),

2172

DAG.getConstant(0, PtrVT),

2173

InFlag);

2174

if (Op.Val->getValueType(0) != MVT::Other)

2175

InFlag = Chain.getValue(1);

2176

Dan Gohman

2008-03-19 21:39:28 +0000

[diff] [blame]

2177

SmallVector<SDOperand, 16> ResultVals;

2178

SmallVector<CCValAssign, 16> RVLocs;

2179

unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();

2180

CCState CCInfo(CC, isVarArg, TM, RVLocs);

2181

CCInfo.AnalyzeCallResult(Op.Val, RetCC_PPC);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2182

Dan Gohman

2008-03-19 21:39:28 +0000

[diff] [blame]

2183

// Copy all of the result registers out of their specified physreg.

2184

for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {

2185

CCValAssign &VA = RVLocs[i];

2186

MVT::ValueType VT = VA.getValVT();

2187

assert(VA.isRegLoc() && "Can only return in registers!");

2188

Chain = DAG.getCopyFromReg(Chain, VA.getLocReg(), VT, InFlag).getValue(1);

2189

ResultVals.push_back(Chain.getValue(0));

2190

InFlag = Chain.getValue(2);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2191

}

Dan Gohman

2008-03-19 21:39:28 +0000

[diff] [blame]

2192

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2193

// If the function returns void, just return the chain.

Dan Gohman

2008-03-19 21:39:28 +0000

[diff] [blame]

2194

if (RVLocs.empty())

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2195

return Chain;

2196

2197

// Otherwise, merge everything together with a MERGE_VALUES node.

Dan Gohman

2008-03-19 21:39:28 +0000

[diff] [blame]

2198

ResultVals.push_back(Chain);

2199

SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),

2200

&ResultVals[0], ResultVals.size());

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2201

return Res.getValue(Op.ResNo);

2202

}

2203

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

2204

SDOperand PPCTargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG,

2205

TargetMachine &TM) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2206

SmallVector<CCValAssign, 16> RVLocs;

2207

unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();

2208

bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();

2209

CCState CCInfo(CC, isVarArg, TM, RVLocs);

2210

CCInfo.AnalyzeReturn(Op.Val, RetCC_PPC);

2211

2212

// If this is the first return lowered for this function, add the regs to the

2213

// liveout set for the function.

Chris Lattner

2007-12-31 04:13:23 +0000

[diff] [blame]

2214

if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2215

for (unsigned i = 0; i != RVLocs.size(); ++i)

Chris Lattner

2007-12-31 04:13:23 +0000

[diff] [blame]

2216

DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2217

}

2218

2219

SDOperand Chain = Op.getOperand(0);

2220

SDOperand Flag;

2221

2222

// Copy the result values into the output registers.

2223

for (unsigned i = 0; i != RVLocs.size(); ++i) {

2224

CCValAssign &VA = RVLocs[i];

2225

assert(VA.isRegLoc() && "Can only return in registers!");

2226

Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);

2227

Flag = Chain.getValue(1);

}

if (Flag.Val)

return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain, Flag);

2232

else

2233

return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain);

2234

}

2235

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

2236

SDOperand PPCTargetLowering::LowerSTACKRESTORE(SDOperand Op, SelectionDAG &DAG,

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2237

const PPCSubtarget &Subtarget) {

2238

// When we pop the dynamic allocation we need to restore the SP link.

2239

2240

// Get the corect type for pointers.

2241

MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

2242

2243

// Construct the stack pointer operand.

2244

bool IsPPC64 = Subtarget.isPPC64();

2245

unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1;

2246

SDOperand StackPtr = DAG.getRegister(SP, PtrVT);

2247

2248

// Get the operands for the STACKRESTORE.

2249

SDOperand Chain = Op.getOperand(0);

2250

SDOperand SaveSP = Op.getOperand(1);

2251

2252

// Load the old link SP.

2253

SDOperand LoadLinkSP = DAG.getLoad(PtrVT, Chain, StackPtr, NULL, 0);

2254

2255

// Restore the stack pointer.

2256

Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), SP, SaveSP);

2257

2258

// Store the old link SP.

2259

return DAG.getStore(Chain, LoadLinkSP, StackPtr, NULL, 0);

2260

}

2261

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

2262

SDOperand PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,

2263

SelectionDAG &DAG,

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2264

const PPCSubtarget &Subtarget) {

2265

MachineFunction &MF = DAG.getMachineFunction();

2266

bool IsPPC64 = Subtarget.isPPC64();

2267

bool isMachoABI = Subtarget.isMachoABI();

2268

2269

// Get current frame pointer save index. The users of this index will be

2270

// primarily DYNALLOC instructions.

2271

PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();

2272

int FPSI = FI->getFramePointerSaveIndex();

2273

2274

// If the frame pointer save index hasn't been defined yet.

2275

if (!FPSI) {

2276

// Find out what the fix offset of the frame pointer save area.

2277

int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isMachoABI);

2278

2279

// Allocate the frame index for frame pointer save area.

2280

FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);

2281

// Save the result.

2282

FI->setFramePointerSaveIndex(FPSI);

}

// Get the inputs.

SDOperand Chain = Op.getOperand(0);

2287

SDOperand Size = Op.getOperand(1);

2288

2289

// Get the corect type for pointers.

2290

MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

2291

// Negate the size.

2292

SDOperand NegSize = DAG.getNode(ISD::SUB, PtrVT,

2293

DAG.getConstant(0, PtrVT), Size);

2294

// Construct a node for the frame pointer save index.

2295

SDOperand FPSIdx = DAG.getFrameIndex(FPSI, PtrVT);

2296

// Build a DYNALLOC node.

2297

SDOperand Ops[3] = { Chain, NegSize, FPSIdx };

2298

SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);

2299

return DAG.getNode(PPCISD::DYNALLOC, VTs, Ops, 3);

}

/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when

2304

/// possible.

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

2305

SDOperand PPCTargetLowering::LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2306

// Not FP? Not a fsel.

2307

if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||

2308

!MVT::isFloatingPoint(Op.getOperand(2).getValueType()))

2309

return SDOperand();

2310

2311

ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();

2312

2313

// Cannot handle SETEQ/SETNE.

2314

if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDOperand();

2315

2316

MVT::ValueType ResVT = Op.getValueType();

2317

MVT::ValueType CmpVT = Op.getOperand(0).getValueType();

2318

SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);

2319

SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3);

2320

2321

// If the RHS of the comparison is a 0.0, we don't need to do the

2322

// subtraction at all.

2323

if (isFloatingPointZero(RHS))

2324

switch (CC) {

2325

default: break; // SETUO etc aren't handled by fsel.

case ISD::SETULT:

case ISD::SETOLT:

case ISD::SETLT:

std::swap(TV, FV); // fsel is natively setge, swap operands for setlt

case ISD::SETUGE:

case ISD::SETOGE:

case ISD::SETGE:

if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits

2334

LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);

2335

return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV);

case ISD::SETUGT:

case ISD::SETOGT:

case ISD::SETGT:

std::swap(TV, FV); // fsel is natively setge, swap operands for setlt

case ISD::SETULE:

case ISD::SETOLE:

case ISD::SETLE:

if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits

2344

LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);

2345

return DAG.getNode(PPCISD::FSEL, ResVT,

2346

DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV);

2347

}

2348

Chris Lattner

a216bee

2007-10-15 20:14:52 +0000

[diff] [blame]

2349

SDOperand Cmp;

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2350

switch (CC) {

2351

default: break; // SETUO etc aren't handled by fsel.

case ISD::SETULT:

case ISD::SETOLT:

case ISD::SETLT:

Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);

2356

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

2357

Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);

2358

return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);

case ISD::SETUGE:

case ISD::SETOGE:

case ISD::SETGE:

Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);

2363

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

2364

Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);

2365

return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);

case ISD::SETUGT:

case ISD::SETOGT:

case ISD::SETGT:

Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);

2370

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

2371

Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);

2372

return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);

case ISD::SETULE:

case ISD::SETOLE:

case ISD::SETLE:

Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);

2377

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

2378

Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);

2379

return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);

}

return SDOperand();

}

Chris Lattner

2007-11-28 18:44:47 +0000

[diff] [blame]

2384

// FIXME: Split this code up when LegalizeDAGTypes lands.

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

2385

SDOperand PPCTargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2386

assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));

2387

SDOperand Src = Op.getOperand(0);

2388

if (Src.getValueType() == MVT::f32)

2389

Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);

2390

2391

SDOperand Tmp;

2392

switch (Op.getValueType()) {

2393

default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");

2394

case MVT::i32:

2395

Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);

2396

break;

2397

case MVT::i64:

2398

Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src);

break;

}

// Convert the FP value to an int value through memory.

Chris Lattner

a216bee

2007-10-15 20:14:52 +0000

[diff] [blame]

2403

SDOperand FIPtr = DAG.CreateStackTemporary(MVT::f64);

2404

2405

// Emit a store to the stack slot.

2406

SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Tmp, FIPtr, NULL, 0);

2407

2408

// Result is a load from the stack slot. If loading 4 bytes, make sure to

2409

// add in a bias.

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2410

if (Op.getValueType() == MVT::i32)

Chris Lattner

a216bee

2007-10-15 20:14:52 +0000

[diff] [blame]

2411

FIPtr = DAG.getNode(ISD::ADD, FIPtr.getValueType(), FIPtr,

2412

DAG.getConstant(4, FIPtr.getValueType()));

2413

return DAG.getLoad(Op.getValueType(), Chain, FIPtr, NULL, 0);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2414

}

2415

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

2416

SDOperand PPCTargetLowering::LowerFP_ROUND_INREG(SDOperand Op,

2417

SelectionDAG &DAG) {

Dale Johannesen

2007-10-10 01:01:31 +0000

[diff] [blame]

2418

assert(Op.getValueType() == MVT::ppcf128);

2419

SDNode *Node = Op.Val;

2420

assert(Node->getOperand(0).getValueType() == MVT::ppcf128);

Chris Lattner

c882caf

2007-10-19 04:08:28 +0000

[diff] [blame]

2421

assert(Node->getOperand(0).Val->getOpcode() == ISD::BUILD_PAIR);

Dale Johannesen

2007-10-10 01:01:31 +0000

[diff] [blame]

2422

SDOperand Lo = Node->getOperand(0).Val->getOperand(0);

2423

SDOperand Hi = Node->getOperand(0).Val->getOperand(1);

2424

2425

// This sequence changes FPSCR to do round-to-zero, adds the two halves

2426

// of the long double, and puts FPSCR back the way it was. We do not

2427

// actually model FPSCR.

2428

std::vector<MVT::ValueType> NodeTys;

2429

SDOperand Ops[4], Result, MFFSreg, InFlag, FPreg;

2430

2431

NodeTys.push_back(MVT::f64); // Return register

2432

NodeTys.push_back(MVT::Flag); // Returns a flag for later insns

2433

Result = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0);

2434

MFFSreg = Result.getValue(0);

2435

InFlag = Result.getValue(1);

2436

2437

NodeTys.clear();

2438

NodeTys.push_back(MVT::Flag); // Returns a flag

2439

Ops[0] = DAG.getConstant(31, MVT::i32);

2440

Ops[1] = InFlag;

2441

Result = DAG.getNode(PPCISD::MTFSB1, NodeTys, Ops, 2);

2442

InFlag = Result.getValue(0);

2443

2444

NodeTys.clear();

2445

NodeTys.push_back(MVT::Flag); // Returns a flag

2446

Ops[0] = DAG.getConstant(30, MVT::i32);

2447

Ops[1] = InFlag;

2448

Result = DAG.getNode(PPCISD::MTFSB0, NodeTys, Ops, 2);

2449

InFlag = Result.getValue(0);

2450

2451

NodeTys.clear();

2452

NodeTys.push_back(MVT::f64); // result of add

2453

NodeTys.push_back(MVT::Flag); // Returns a flag

Ops[0] = Lo;

Ops[1] = Hi;

Ops[2] = InFlag;

Result = DAG.getNode(PPCISD::FADDRTZ, NodeTys, Ops, 3);

2458

FPreg = Result.getValue(0);

2459

InFlag = Result.getValue(1);

2460

2461

NodeTys.clear();

2462

NodeTys.push_back(MVT::f64);

2463

Ops[0] = DAG.getConstant(1, MVT::i32);

Ops[1] = MFFSreg;

Ops[2] = FPreg;

Ops[3] = InFlag;

Result = DAG.getNode(PPCISD::MTFSF, NodeTys, Ops, 4);

2468

FPreg = Result.getValue(0);

2469

2470

// We know the low half is about to be thrown away, so just use something

2471

// convenient.

2472

return DAG.getNode(ISD::BUILD_PAIR, Lo.getValueType(), FPreg, FPreg);

2473

}

2474

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

2475

SDOperand PPCTargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {

Dan Gohman

8b232ff

2008-03-11 01:59:03 +0000

[diff] [blame]

2476

// Don't handle ppc_fp128 here; let it be lowered to a libcall.

2477

if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)

2478

return SDOperand();

2479

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2480

if (Op.getOperand(0).getValueType() == MVT::i64) {

2481

SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));

2482

SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);

2483

if (Op.getValueType() == MVT::f32)

Chris Lattner

5872a36

2008-01-17 07:00:52 +0000

[diff] [blame]

2484

FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0));

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

return FP;

}

assert(Op.getOperand(0).getValueType() == MVT::i32 &&

2489

"Unhandled SINT_TO_FP type in custom expander!");

2490

// Since we only generate this in 64-bit mode, we can take advantage of

2491

// 64-bit registers. In particular, sign extend the input value into the

2492

// 64-bit register with extsw, store the WHOLE 64-bit value into the stack

2493

// then lfd it and fcfid it.

2494

MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();

2495

int FrameIdx = FrameInfo->CreateStackObject(8, 8);

2496

MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

2497

SDOperand FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

2498

2499

SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32,

2500

Op.getOperand(0));

2501

2502

// STD the extended value into the stack slot.

Dan Gohman

fb020b6

2008-02-07 18:41:25 +0000

[diff] [blame]

2503

MemOperand MO(PseudoSourceValue::getFixedStack(),

Dan Gohman

2008-02-06 22:27:42 +0000

[diff] [blame]

2504

MemOperand::MOStore, FrameIdx, 8, 8);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2505

SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other,

2506

DAG.getEntryNode(), Ext64, FIdx,

Dan Gohman

2008-02-06 22:27:42 +0000

[diff] [blame]

2507

DAG.getMemOperand(MO));

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2508

// Load the value as a double.

2509

SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, NULL, 0);

2510

2511

// FCFID it and return it.

2512

SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld);

2513

if (Op.getValueType() == MVT::f32)

Chris Lattner

5872a36

2008-01-17 07:00:52 +0000

[diff] [blame]

2514

FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0));

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

return FP;

}

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

2518

SDOperand PPCTargetLowering::LowerFLT_ROUNDS_(SDOperand Op, SelectionDAG &DAG) {

Dale Johannesen

436e380

2008-01-18 19:55:37 +0000

[diff] [blame]

2519

/*

2520

The rounding mode is in bits 30:31 of FPSR, and has the following

settings:

00 Round to nearest

01 Round to 0

10 Round to +inf

11 Round to -inf

FLT_ROUNDS, on the other hand, expects the following:

-1 Undefined

0 Round to 0

1 Round to nearest

2 Round to +inf

3 Round to -inf

To perform the conversion, we do:

2535

((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))

2536

*/

2537

2538

MachineFunction &MF = DAG.getMachineFunction();

2539

MVT::ValueType VT = Op.getValueType();

2540

MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

2541

std::vector<MVT::ValueType> NodeTys;

2542

SDOperand MFFSreg, InFlag;

2543

2544

// Save FP Control Word to register

2545

NodeTys.push_back(MVT::f64); // return register

2546

NodeTys.push_back(MVT::Flag); // unused in this context

2547

SDOperand Chain = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0);

2548

2549

// Save FP register to stack slot

2550

int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);

2551

SDOperand StackSlot = DAG.getFrameIndex(SSFI, PtrVT);

2552

SDOperand Store = DAG.getStore(DAG.getEntryNode(), Chain,

2553

StackSlot, NULL, 0);

2554

2555

// Load FP Control Word from low 32 bits of stack slot.

2556

SDOperand Four = DAG.getConstant(4, PtrVT);

2557

SDOperand Addr = DAG.getNode(ISD::ADD, PtrVT, StackSlot, Four);

2558

SDOperand CWD = DAG.getLoad(MVT::i32, Store, Addr, NULL, 0);

2559

2560

// Transform as necessary

2561

SDOperand CWD1 =

2562

DAG.getNode(ISD::AND, MVT::i32,

2563

CWD, DAG.getConstant(3, MVT::i32));

2564

SDOperand CWD2 =

2565

DAG.getNode(ISD::SRL, MVT::i32,

2566

DAG.getNode(ISD::AND, MVT::i32,

2567

DAG.getNode(ISD::XOR, MVT::i32,

2568

CWD, DAG.getConstant(3, MVT::i32)),

2569

DAG.getConstant(3, MVT::i32)),

2570

DAG.getConstant(1, MVT::i8));

2571

2572

SDOperand RetVal =

2573

DAG.getNode(ISD::XOR, MVT::i32, CWD1, CWD2);

2574

2575

return DAG.getNode((MVT::getSizeInBits(VT) < 16 ?

2576

ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal);

2577

}

2578

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

2579

SDOperand PPCTargetLowering::LowerSHL_PARTS(SDOperand Op, SelectionDAG &DAG) {

Dan Gohman

2008-03-07 20:36:53 +0000

[diff] [blame]

2580

MVT::ValueType VT = Op.getValueType();

2581

unsigned BitWidth = MVT::getSizeInBits(VT);

2582

assert(Op.getNumOperands() == 3 &&

2583

VT == Op.getOperand(1).getValueType() &&

2584

"Unexpected SHL!");

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2585

2586

// Expand into a bunch of logical ops. Note that these ops

2587

// depend on the PPC behavior for oversized shift amounts.

2588

SDOperand Lo = Op.getOperand(0);

2589

SDOperand Hi = Op.getOperand(1);

2590

SDOperand Amt = Op.getOperand(2);

Dan Gohman

2008-03-07 20:36:53 +0000

[diff] [blame]

2591

MVT::ValueType AmtVT = Amt.getValueType();

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2592

Dan Gohman

2008-03-07 20:36:53 +0000

[diff] [blame]

2593

SDOperand Tmp1 = DAG.getNode(ISD::SUB, AmtVT,

2594

DAG.getConstant(BitWidth, AmtVT), Amt);

2595

SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, VT, Hi, Amt);

2596

SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, VT, Lo, Tmp1);

2597

SDOperand Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3);

2598

SDOperand Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt,

2599

DAG.getConstant(-BitWidth, AmtVT));

2600

SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, VT, Lo, Tmp5);

2601

SDOperand OutHi = DAG.getNode(ISD::OR, VT, Tmp4, Tmp6);

2602

SDOperand OutLo = DAG.getNode(PPCISD::SHL, VT, Lo, Amt);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2603

SDOperand OutOps[] = { OutLo, OutHi };

Dan Gohman

2008-03-07 20:36:53 +0000

[diff] [blame]

2604

return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, VT),

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

OutOps, 2);

}

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

2608

SDOperand PPCTargetLowering::LowerSRL_PARTS(SDOperand Op, SelectionDAG &DAG) {

Dan Gohman

2008-03-07 20:36:53 +0000

[diff] [blame]

2609

MVT::ValueType VT = Op.getValueType();

2610

unsigned BitWidth = MVT::getSizeInBits(VT);

2611

assert(Op.getNumOperands() == 3 &&

2612

VT == Op.getOperand(1).getValueType() &&

2613

"Unexpected SRL!");

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2614

Dan Gohman

2008-03-07 20:36:53 +0000

[diff] [blame]

2615

// Expand into a bunch of logical ops. Note that these ops

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2616

// depend on the PPC behavior for oversized shift amounts.

2617

SDOperand Lo = Op.getOperand(0);

2618

SDOperand Hi = Op.getOperand(1);

2619

SDOperand Amt = Op.getOperand(2);

Dan Gohman

2008-03-07 20:36:53 +0000

[diff] [blame]

2620

MVT::ValueType AmtVT = Amt.getValueType();

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2621

Dan Gohman

2008-03-07 20:36:53 +0000

[diff] [blame]

2622

SDOperand Tmp1 = DAG.getNode(ISD::SUB, AmtVT,

2623

DAG.getConstant(BitWidth, AmtVT), Amt);

2624

SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, VT, Lo, Amt);

2625

SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, VT, Hi, Tmp1);

2626

SDOperand Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3);

2627

SDOperand Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt,

2628

DAG.getConstant(-BitWidth, AmtVT));

2629

SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, VT, Hi, Tmp5);

2630

SDOperand OutLo = DAG.getNode(ISD::OR, VT, Tmp4, Tmp6);

2631

SDOperand OutHi = DAG.getNode(PPCISD::SRL, VT, Hi, Amt);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2632

SDOperand OutOps[] = { OutLo, OutHi };

Dan Gohman

2008-03-07 20:36:53 +0000

[diff] [blame]

2633

return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, VT),

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

OutOps, 2);

}

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

2637

SDOperand PPCTargetLowering::LowerSRA_PARTS(SDOperand Op, SelectionDAG &DAG) {

Dan Gohman

2008-03-07 20:36:53 +0000

[diff] [blame]

2638

MVT::ValueType VT = Op.getValueType();

2639

unsigned BitWidth = MVT::getSizeInBits(VT);

2640

assert(Op.getNumOperands() == 3 &&

2641

VT == Op.getOperand(1).getValueType() &&

2642

"Unexpected SRA!");

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2643

Dan Gohman

2008-03-07 20:36:53 +0000

[diff] [blame]

2644

// Expand into a bunch of logical ops, followed by a select_cc.

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2645

SDOperand Lo = Op.getOperand(0);

2646

SDOperand Hi = Op.getOperand(1);

2647

SDOperand Amt = Op.getOperand(2);

Dan Gohman

2008-03-07 20:36:53 +0000

[diff] [blame]

2648

MVT::ValueType AmtVT = Amt.getValueType();

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2649

Dan Gohman

2008-03-07 20:36:53 +0000

[diff] [blame]

2650

SDOperand Tmp1 = DAG.getNode(ISD::SUB, AmtVT,

2651

DAG.getConstant(BitWidth, AmtVT), Amt);

2652

SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, VT, Lo, Amt);

2653

SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, VT, Hi, Tmp1);

2654

SDOperand Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3);

2655

SDOperand Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt,

2656

DAG.getConstant(-BitWidth, AmtVT));

2657

SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, VT, Hi, Tmp5);

2658

SDOperand OutHi = DAG.getNode(PPCISD::SRA, VT, Hi, Amt);

2659

SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, AmtVT),

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2660

Tmp4, Tmp6, ISD::SETLE);

2661

SDOperand OutOps[] = { OutLo, OutHi };

Dan Gohman

2008-03-07 20:36:53 +0000

[diff] [blame]

2662

return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, VT),

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

OutOps, 2);

}

//===----------------------------------------------------------------------===//

2667

// Vector related lowering.

2668

//

2669

2670

// If this is a vector of constants or undefs, get the bits. A bit in

2671

// UndefBits is set if the corresponding element of the vector is an

2672

// ISD::UNDEF value. For undefs, the corresponding VectorBits values are

2673

// zero. Return true if this is not an array of constants, false if it is.

2674

//

2675

static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],

2676

uint64_t UndefBits[2]) {

2677

// Start with zero'd results.

2678

VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;

2679

2680

unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());

2681

for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {

2682

SDOperand OpVal = BV->getOperand(i);

2683

2684

unsigned PartNo = i >= e/2; // In the upper 128 bits?

2685

unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.

2686

2687

uint64_t EltBits = 0;

2688

if (OpVal.getOpcode() == ISD::UNDEF) {

2689

uint64_t EltUndefBits = ~0U >> (32-EltBitSize);

2690

UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);

2691

continue;

2692

} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {

2693

EltBits = CN->getValue() & (~0U >> (32-EltBitSize));

2694

} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {

2695

assert(CN->getValueType(0) == MVT::f32 &&

2696

"Only one legal FP vector type!");

Dale Johannesen

2007-08-31 04:03:46 +0000

[diff] [blame]

2697

EltBits = FloatToBits(CN->getValueAPF().convertToFloat());

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2698

} else {

2699

// Nonconstant element.

return true;

}

VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);

2704

}

2705

2706

//printf("%llx %llx %llx %llx\n",

2707

// VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);

return false;

}

// If this is a splat (repetition) of a value across the whole vector, return

2712

// the smallest size that splats it. For example, "0x01010101010101..." is a

2713

// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and

2714

// SplatSize = 1 byte.

2715

static bool isConstantSplat(const uint64_t Bits128[2],

2716

const uint64_t Undef128[2],

2717

unsigned &SplatBits, unsigned &SplatUndef,

2718

unsigned &SplatSize) {

2719

2720

// Don't let undefs prevent splats from matching. See if the top 64-bits are

2721

// the same as the lower 64-bits, ignoring undefs.

2722

if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0]))

2723

return false; // Can't be a splat if two pieces don't match.

2724

2725

uint64_t Bits64 = Bits128[0] | Bits128[1];

2726

uint64_t Undef64 = Undef128[0] & Undef128[1];

2727

2728

// Check that the top 32-bits are the same as the lower 32-bits, ignoring

2729

// undefs.

2730

if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64))

2731

return false; // Can't be a splat if two pieces don't match.

2732

2733

uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);

2734

uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);

2735

2736

// If the top 16-bits are different than the lower 16-bits, ignoring

2737

// undefs, we have an i32 splat.

2738

if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) {

2739

SplatBits = Bits32;

2740

SplatUndef = Undef32;

SplatSize = 4;

return true;

}

uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);

2746

uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);

2747

2748

// If the top 8-bits are different than the lower 8-bits, ignoring

2749

// undefs, we have an i16 splat.

2750

if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) {

2751

SplatBits = Bits16;

2752

SplatUndef = Undef16;

SplatSize = 2;

return true;

}

// Otherwise, we have an 8-bit splat.

2758

SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);

2759

SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);

SplatSize = 1;

return true;

}

/// BuildSplatI - Build a canonical splati of Val with an element size of

2765

/// SplatSize. Cast the result to VT.

2766

static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT::ValueType VT,

2767

SelectionDAG &DAG) {

2768

assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");

2769

2770

static const MVT::ValueType VTys[] = { // canonical VT to use for each size.

2771

MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32

2772

};

2773

2774

MVT::ValueType ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];

2775

2776

// Force vspltis[hw] -1 to vspltisb -1 to canonicalize.

if (Val == -1)

SplatSize = 1;

MVT::ValueType CanonicalVT = VTys[SplatSize-1];

2781

2782

// Build a canonical splat for this value.

2783

SDOperand Elt = DAG.getConstant(Val, MVT::getVectorElementType(CanonicalVT));

2784

SmallVector<SDOperand, 8> Ops;

2785

Ops.assign(MVT::getVectorNumElements(CanonicalVT), Elt);

2786

SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT,

2787

&Ops[0], Ops.size());

2788

return DAG.getNode(ISD::BIT_CONVERT, ReqVT, Res);

2789

}

2790

2791

/// BuildIntrinsicOp - Return a binary operator intrinsic node with the

2792

/// specified intrinsic ID.

2793

static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS,

2794

SelectionDAG &DAG,

2795

MVT::ValueType DestVT = MVT::Other) {

2796

if (DestVT == MVT::Other) DestVT = LHS.getValueType();

2797

return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,

2798

DAG.getConstant(IID, MVT::i32), LHS, RHS);

2799

}

2800

2801

/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the

2802

/// specified intrinsic ID.

2803

static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1,

2804

SDOperand Op2, SelectionDAG &DAG,

2805

MVT::ValueType DestVT = MVT::Other) {

2806

if (DestVT == MVT::Other) DestVT = Op0.getValueType();

2807

return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,

2808

DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);

}

/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified

2813

/// amount. The result has the specified value type.

2814

static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt,

2815

MVT::ValueType VT, SelectionDAG &DAG) {

2816

// Force LHS/RHS to be the right type.

2817

LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS);

2818

RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS);

2819

2820

SDOperand Ops[16];

2821

for (unsigned i = 0; i != 16; ++i)

2822

Ops[i] = DAG.getConstant(i+Amt, MVT::i32);

2823

SDOperand T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS,

2824

DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops,16));

2825

return DAG.getNode(ISD::BIT_CONVERT, VT, T);

2826

}

2827

2828

// If this is a case we can't handle, return null and let the default

2829

// expansion code take care of it. If we CAN select this case, and if it

2830

// selects to a single instruction, return Op. Otherwise, if we can codegen

2831

// this case more efficiently than a constant pool load, lower it to the

2832

// sequence of ops that should be used.

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

2833

SDOperand PPCTargetLowering::LowerBUILD_VECTOR(SDOperand Op,

2834

SelectionDAG &DAG) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2835

// If this is a vector of constants or undefs, get the bits. A bit in

2836

// UndefBits is set if the corresponding element of the vector is an

2837

// ISD::UNDEF value. For undefs, the corresponding VectorBits values are

2838

// zero.

2839

uint64_t VectorBits[2];

2840

uint64_t UndefBits[2];

2841

if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits))

2842

return SDOperand(); // Not a constant vector.

2843

2844

// If this is a splat (repetition) of a value across the whole vector, return

2845

// the smallest size that splats it. For example, "0x01010101010101..." is a

2846

// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and

2847

// SplatSize = 1 byte.

2848

unsigned SplatBits, SplatUndef, SplatSize;

2849

if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){

2850

bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0;

2851

2852

// First, handle single instruction cases.

2853

2854

// All zeros?

2855

if (SplatBits == 0) {

2856

// Canonicalize all zero vectors to be v4i32.

2857

if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {

2858

SDOperand Z = DAG.getConstant(0, MVT::i32);

2859

Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z);

2860

Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z);

}

return Op;

}

// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].

2866

int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize);

2867

if (SextVal >= -16 && SextVal <= 15)

2868

return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG);

2869

2870

2871

// Two instruction sequences.

2872

2873

// If this value is in the range [-32,30] and is even, use:

2874

// tmp = VSPLTI[bhw], result = add tmp, tmp

2875

if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {

2876

Op = BuildSplatI(SextVal >> 1, SplatSize, Op.getValueType(), DAG);

2877

return DAG.getNode(ISD::ADD, Op.getValueType(), Op, Op);

2878

}

2879

2880

// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is

2881

// 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important

2882

// for fneg/fabs.

2883

if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {

2884

// Make -1 and vspltisw -1:

2885

SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG);

2886

2887

// Make the VSLW intrinsic, computing 0x8000_0000.

2888

SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,

2889

OnesV, DAG);

2890

2891

// xor by OnesV to invert it.

2892

Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV);

2893

return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);

2894

}

2895

2896

// Check to see if this is a wide variety of vsplti*, binop self cases.

2897

unsigned SplatBitSize = SplatSize*8;

2898

static const signed char SplatCsts[] = {

2899

-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,

2900

-8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16

2901

};

2902

Owen Anderson

2007-09-07 04:06:50 +0000

[diff] [blame]

2903

for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2904

// Indirect through the SplatCsts array so that we favor 'vsplti -1' for

2905

// cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'

2906

int i = SplatCsts[idx];

2907

2908

// Figure out what shift amount will be used by altivec if shifted by i in

2909

// this splat size.

2910

unsigned TypeShiftAmt = i & (SplatBitSize-1);

2911

2912

// vsplti + shl self.

2913

if (SextVal == (i << (int)TypeShiftAmt)) {

2914

SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG);

2915

static const unsigned IIDs[] = { // Intrinsic to use for each size.

2916

Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,

2917

Intrinsic::ppc_altivec_vslw

2918

};

2919

Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG);

2920

return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);

2921

}

2922

2923

// vsplti + srl self.

2924

if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {

2925

SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG);

2926

static const unsigned IIDs[] = { // Intrinsic to use for each size.

2927

Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,

2928

Intrinsic::ppc_altivec_vsrw

2929

};

2930

Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG);

2931

return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);

2932

}

2933

2934

// vsplti + sra self.

2935

if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {

2936

SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG);

2937

static const unsigned IIDs[] = { // Intrinsic to use for each size.

2938

Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,

2939

Intrinsic::ppc_altivec_vsraw

2940

};

2941

Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG);

2942

return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);

2943

}

2944

2945

// vsplti + rol self.

2946

if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |

2947

((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {

2948

SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG);

2949

static const unsigned IIDs[] = { // Intrinsic to use for each size.

2950

Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,

2951

Intrinsic::ppc_altivec_vrlw

2952

};

2953

Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG);

2954

return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);

2955

}

2956

2957

// t = vsplti c, result = vsldoi t, t, 1

2958

if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {

2959

SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);

2960

return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG);

2961

}

2962

// t = vsplti c, result = vsldoi t, t, 2

2963

if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {

2964

SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);

2965

return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG);

2966

}

2967

// t = vsplti c, result = vsldoi t, t, 3

2968

if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {

2969

SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);

2970

return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG);

}

}

// Three instruction sequences.

2975

2976

// Odd, in range [17,31]: (vsplti C)-(vsplti -16).

2977

if (SextVal >= 0 && SextVal <= 31) {

2978

SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG);

2979

SDOperand RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG);

Dale Johannesen

6fdf931

2007-10-14 01:58:32 +0000

[diff] [blame]

2980

LHS = DAG.getNode(ISD::SUB, LHS.getValueType(), LHS, RHS);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2981

return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS);

2982

}

2983

// Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).

2984

if (SextVal >= -31 && SextVal <= 0) {

2985

SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG);

2986

SDOperand RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG);

Dale Johannesen

6fdf931

2007-10-14 01:58:32 +0000

[diff] [blame]

2987

LHS = DAG.getNode(ISD::ADD, LHS.getValueType(), LHS, RHS);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

2988

return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS);

}

}

return SDOperand();

}

/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit

2996

/// the specified operations to build the shuffle.

2997

static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS,

2998

SDOperand RHS, SelectionDAG &DAG) {

2999

unsigned OpNum = (PFEntry >> 26) & 0x0F;

3000

unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);

3001

unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);

3002

3003

enum {

3004

OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>

OP_VMRGHW,

OP_VMRGLW,

OP_VSPLTISW0,

OP_VSPLTISW1,

OP_VSPLTISW2,

OP_VSPLTISW3,

OP_VSLDOI4,

OP_VSLDOI8,

OP_VSLDOI12

};

if (OpNum == OP_COPY) {

3017

if (LHSID == (1*9+2)*9+3) return LHS;

3018

assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");

return RHS;

}

SDOperand OpLHS, OpRHS;

3023

OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG);

3024

OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG);

3025

3026

unsigned ShufIdxs[16];

3027

switch (OpNum) {

3028

default: assert(0 && "Unknown i32 permute!");

3029

case OP_VMRGHW:

3030

ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;

3031

ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;

3032

ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;

3033

ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;

3034

break;

3035

case OP_VMRGLW:

3036

ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;

3037

ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;

3038

ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;

3039

ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;

3040

break;

3041

case OP_VSPLTISW0:

3042

for (unsigned i = 0; i != 16; ++i)

3043

ShufIdxs[i] = (i&3)+0;

3044

break;

3045

case OP_VSPLTISW1:

3046

for (unsigned i = 0; i != 16; ++i)

3047

ShufIdxs[i] = (i&3)+4;

3048

break;

3049

case OP_VSPLTISW2:

3050

for (unsigned i = 0; i != 16; ++i)

3051

ShufIdxs[i] = (i&3)+8;

3052

break;

3053

case OP_VSPLTISW3:

3054

for (unsigned i = 0; i != 16; ++i)

3055

ShufIdxs[i] = (i&3)+12;

3056

break;

3057

case OP_VSLDOI4:

3058

return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG);

3059

case OP_VSLDOI8:

3060

return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG);

3061

case OP_VSLDOI12:

3062

return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG);

3063

}

3064

SDOperand Ops[16];

3065

for (unsigned i = 0; i != 16; ++i)

3066

Ops[i] = DAG.getConstant(ShufIdxs[i], MVT::i32);

3067

3068

return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS,

3069

DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16));

3070

}

3071

3072

/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this

3073

/// is a shuffle we can handle in a single instruction, return it. Otherwise,

3074

/// return the code it can be lowered into. Worst case, it can always be

3075

/// lowered into a vperm.

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

3076

SDOperand PPCTargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op,

3077

SelectionDAG &DAG) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3078

SDOperand V1 = Op.getOperand(0);

3079

SDOperand V2 = Op.getOperand(1);

3080

SDOperand PermMask = Op.getOperand(2);

3081

3082

// Cases that are handled by instructions that take permute immediates

3083

// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be

3084

// selected by the instruction selector.

3085

if (V2.getOpcode() == ISD::UNDEF) {

3086

if (PPC::isSplatShuffleMask(PermMask.Val, 1) ||

3087

PPC::isSplatShuffleMask(PermMask.Val, 2) ||

3088

PPC::isSplatShuffleMask(PermMask.Val, 4) ||

3089

PPC::isVPKUWUMShuffleMask(PermMask.Val, true) ||

3090

PPC::isVPKUHUMShuffleMask(PermMask.Val, true) ||

3091

PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 ||

3092

PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) ||

3093

PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) ||

3094

PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) ||

3095

PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) ||

3096

PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) ||

3097

PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) {

return Op;

}

}

// Altivec has a variety of "shuffle immediates" that take two vector inputs

3103

// and produce a fixed permutation. If any of these match, do not lower to

3104

// VPERM.

3105

if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) ||

3106

PPC::isVPKUHUMShuffleMask(PermMask.Val, false) ||

3107

PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 ||

3108

PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) ||

3109

PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) ||

3110

PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) ||

3111

PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) ||

3112

PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) ||

3113

PPC::isVMRGHShuffleMask(PermMask.Val, 4, false))

3114

return Op;

3115

3116

// Check to see if this is a shuffle of 4-byte values. If so, we can use our

3117

// perfect shuffle table to emit an optimal matching sequence.

3118

unsigned PFIndexes[4];

3119

bool isFourElementShuffle = true;

3120

for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number

3121

unsigned EltNo = 8; // Start out undef.

3122

for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.

3123

if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF)

3124

continue; // Undef, ignore it.

3125

3126

unsigned ByteSource =

3127

cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue();

3128

if ((ByteSource & 3) != j) {

3129

isFourElementShuffle = false;

break;

}

if (EltNo == 8) {

EltNo = ByteSource/4;

3135

} else if (EltNo != ByteSource/4) {

3136

isFourElementShuffle = false;

break;

}

}

PFIndexes[i] = EltNo;

3141

}

3142

3143

// If this shuffle can be expressed as a shuffle of 4-byte elements, use the

3144

// perfect shuffle vector to determine if it is cost effective to do this as

3145

// discrete instructions, or whether we should use a vperm.

3146

if (isFourElementShuffle) {

3147

// Compute the index in the perfect shuffle table.

3148

unsigned PFTableIndex =

3149

PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];

3150

3151

unsigned PFEntry = PerfectShuffleTable[PFTableIndex];

3152

unsigned Cost = (PFEntry >> 30);

3153

3154

// Determining when to avoid vperm is tricky. Many things affect the cost

3155

// of vperm, particularly how many times the perm mask needs to be computed.

3156

// For example, if the perm mask can be hoisted out of a loop or is already

3157

// used (perhaps because there are multiple permutes with the same shuffle

3158

// mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of

3159

// the loop requires an extra register.

3160

//

3161

// As a compromise, we only emit discrete instructions if the shuffle can be

3162

// generated in 3 or fewer operations. When we have loop information

3163

// available, if this block is within a loop, we should avoid using vperm

3164

// for 3-operation perms and use a constant pool load instead.

3165

if (Cost < 3)

3166

return GeneratePerfectShuffle(PFEntry, V1, V2, DAG);

3167

}

3168

3169

// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant

3170

// vector that will get spilled to the constant pool.

3171

if (V2.getOpcode() == ISD::UNDEF) V2 = V1;

3172

3173

// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except

3174

// that it is in input element units, not in bytes. Convert now.

3175

MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());

3176

unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;

3177

3178

SmallVector<SDOperand, 16> ResultMask;

3179

for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {

3180

unsigned SrcElt;

3181

if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)

3182

SrcElt = 0;

3183

else

3184

SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();

3185

3186

for (unsigned j = 0; j != BytesPerElement; ++j)

3187

ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,

MVT::i8));

}

SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,

3192

&ResultMask[0], ResultMask.size());

3193

return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);

3194

}

3195

3196

/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an

3197

/// altivec comparison. If it is, return true and fill in Opc/isDot with

3198

/// information about the intrinsic.

3199

static bool getAltivecCompareInfo(SDOperand Intrin, int &CompareOpc,

3200

bool &isDot) {

3201

unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getValue();

3202

CompareOpc = -1;

3203

isDot = false;

3204

switch (IntrinsicID) {

3205

default: return false;

3206

// Comparison predicates.

3207

case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;

3208

case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;

3209

case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;

3210

case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;

3211

case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;

3212

case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;

3213

case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;

3214

case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;

3215

case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;

3216

case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;

3217

case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;

3218

case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;

3219

case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;

3220

3221

// Normal Comparisons.

3222

case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;

3223

case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;

3224

case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;

3225

case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;

3226

case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;

3227

case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;

3228

case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;

3229

case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;

3230

case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;

3231

case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;

3232

case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;

3233

case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;

3234

case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;

}

return true;

}

/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom

3240

/// lower, do it, otherwise return null.

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

3241

SDOperand PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op,

3242

SelectionDAG &DAG) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3243

// If this is a lowered altivec predicate compare, CompareOpc is set to the

3244

// opcode number of the comparison.

3245

int CompareOpc;

3246

bool isDot;

3247

if (!getAltivecCompareInfo(Op, CompareOpc, isDot))

3248

return SDOperand(); // Don't custom lower most intrinsics.

3249

3250

// If this is a non-dot comparison, make the VCMP node and we are done.

3251

if (!isDot) {

3252

SDOperand Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(),

3253

Op.getOperand(1), Op.getOperand(2),

3254

DAG.getConstant(CompareOpc, MVT::i32));

3255

return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp);

3256

}

3257

3258

// Create the PPCISD altivec 'dot' comparison node.

3259

SDOperand Ops[] = {

3260

Op.getOperand(2), // LHS

3261

Op.getOperand(3), // RHS

3262

DAG.getConstant(CompareOpc, MVT::i32)

3263

};

3264

std::vector<MVT::ValueType> VTs;

3265

VTs.push_back(Op.getOperand(2).getValueType());

3266

VTs.push_back(MVT::Flag);

3267

SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3);

3268

3269

// Now that we have the comparison, emit a copy from the CR to a GPR.

3270

// This is flagged to the above dot comparison.

3271

SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32,

3272

DAG.getRegister(PPC::CR6, MVT::i32),

3273

CompNode.getValue(1));

3274

3275

// Unpack the result based on how the target uses it.

3276

unsigned BitNo; // Bit # of CR6.

3277

bool InvertBit; // Invert result?

3278

switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) {

3279

default: // Can't happen, don't crash on invalid number though.

3280

case 0: // Return the value of the EQ bit of CR6.

3281

BitNo = 0; InvertBit = false;

3282

break;

3283

case 1: // Return the inverted value of the EQ bit of CR6.

3284

BitNo = 0; InvertBit = true;

3285

break;

3286

case 2: // Return the value of the LT bit of CR6.

3287

BitNo = 2; InvertBit = false;

3288

break;

3289

case 3: // Return the inverted value of the LT bit of CR6.

3290

BitNo = 2; InvertBit = true;

break;

}

// Shift the bit into the low position.

3295

Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags,

3296

DAG.getConstant(8-(3-BitNo), MVT::i32));

3297

// Isolate the bit.

3298

Flags = DAG.getNode(ISD::AND, MVT::i32, Flags,

3299

DAG.getConstant(1, MVT::i32));

3300

3301

// If we are supposed to, toggle the bit.

3302

if (InvertBit)

3303

Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags,

3304

DAG.getConstant(1, MVT::i32));

return Flags;

}

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

3308

SDOperand PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op,

3309

SelectionDAG &DAG) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3310

// Create a stack slot that is 16-byte aligned.

3311

MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();

3312

int FrameIdx = FrameInfo->CreateStackObject(16, 16);

3313

MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

3314

SDOperand FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

3315

3316

// Store the input value into Value#0 of the stack slot.

3317

SDOperand Store = DAG.getStore(DAG.getEntryNode(),

3318

Op.getOperand(0), FIdx, NULL, 0);

3319

// Load it out.

3320

return DAG.getLoad(Op.getValueType(), Store, FIdx, NULL, 0);

3321

}

3322

Dale Johannesen

2008-03-04 23:17:14 +0000

[diff] [blame]

3323

SDOperand PPCTargetLowering::LowerMUL(SDOperand Op, SelectionDAG &DAG) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3324

if (Op.getValueType() == MVT::v4i32) {

3325

SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);

3326

3327

SDOperand Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG);

3328

SDOperand Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG); // +16 as shift amt.

3329

3330

SDOperand RHSSwap = // = vrlw RHS, 16

3331

BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG);

3332

3333

// Shrinkify inputs to v8i16.

3334

LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS);

3335

RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS);

3336

RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap);

3337

3338

// Low parts multiplied together, generating 32-bit results (we ignore the

3339

// top parts).

3340

SDOperand LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,

3341

LHS, RHS, DAG, MVT::v4i32);

3342

3343

SDOperand HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,

3344

LHS, RHSSwap, Zero, DAG, MVT::v4i32);

3345

// Shift the high parts up 16 bits.

3346

HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG);

3347

return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd);

3348

} else if (Op.getValueType() == MVT::v8i16) {

3349

SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);

3350

3351

SDOperand Zero = BuildSplatI(0, 1, MVT::v8i16, DAG);

3352

3353

return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,

3354

LHS, RHS, Zero, DAG);

3355

} else if (Op.getValueType() == MVT::v16i8) {

3356

SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);

3357

3358

// Multiply the even 8-bit parts, producing 16-bit sums.

3359

SDOperand EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,

3360

LHS, RHS, DAG, MVT::v8i16);

3361

EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, EvenParts);

3362

3363

// Multiply the odd 8-bit parts, producing 16-bit sums.

3364

SDOperand OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,

3365

LHS, RHS, DAG, MVT::v8i16);

3366

OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, OddParts);

3367

3368

// Merge the results together.

3369

SDOperand Ops[16];

3370

for (unsigned i = 0; i != 8; ++i) {

3371

Ops[i*2 ] = DAG.getConstant(2*i+1, MVT::i8);

3372

Ops[i*2+1] = DAG.getConstant(2*i+1+16, MVT::i8);

3373

}

3374

return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, EvenParts, OddParts,

3375

DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16));

3376

} else {

3377

assert(0 && "Unknown mul to lower!");

abort();

}

}

/// LowerOperation - Provide custom lowering hooks for some operations.

3383

///

3384

SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {

3385

switch (Op.getOpcode()) {

3386

default: assert(0 && "Wasn't expecting to be able to lower this!");

3387

case ISD::ConstantPool: return LowerConstantPool(Op, DAG);

3388

case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);

3389

case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);

3390

case ISD::JumpTable: return LowerJumpTable(Op, DAG);

3391

case ISD::SETCC: return LowerSETCC(Op, DAG);

3392

case ISD::VASTART:

3393

return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,

3394

VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);

3395

3396

case ISD::VAARG:

3397

return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,

3398

VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);

3399

3400

case ISD::FORMAL_ARGUMENTS:

3401

return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex,

3402

VarArgsStackOffset, VarArgsNumGPR,

3403

VarArgsNumFPR, PPCSubTarget);

3404

Dan Gohman

2008-03-19 21:39:28 +0000

[diff] [blame]

3405

case ISD::CALL: return LowerCALL(Op, DAG, PPCSubTarget,

3406

getTargetMachine());

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3407

case ISD::RET: return LowerRET(Op, DAG, getTargetMachine());

3408

case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);

3409

case ISD::DYNAMIC_STACKALLOC:

3410

return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);

3411

3412

case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);

3413

case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);

3414

case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);

Dale Johannesen

2007-10-10 01:01:31 +0000

[diff] [blame]

3415

case ISD::FP_ROUND_INREG: return LowerFP_ROUND_INREG(Op, DAG);

Dan Gohman

819574c

2008-01-31 00:41:03 +0000

[diff] [blame]

3416

case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3417

3418

// Lower 64-bit shifts.

3419

case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);

3420

case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);

3421

case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);

3422

3423

// Vector-related lowering.

3424

case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);

3425

case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);

3426

case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);

3427

case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);

3428

case ISD::MUL: return LowerMUL(Op, DAG);

3429

Chris Lattner

f8b9337

2007-12-08 06:59:59 +0000

[diff] [blame]

3430

// Frame & Return address.

3431

case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3432

case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);

}

return SDOperand();

}

Chris Lattner

2007-11-28 18:44:47 +0000

[diff] [blame]

3437

SDNode *PPCTargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) {

3438

switch (N->getOpcode()) {

3439

default: assert(0 && "Wasn't expecting to be able to lower this!");

3440

case ISD::FP_TO_SINT: return LowerFP_TO_SINT(SDOperand(N, 0), DAG).Val;

}

}

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3445

//===----------------------------------------------------------------------===//

3446

// Other Lowering Code

3447

//===----------------------------------------------------------------------===//

3448

3449

MachineBasicBlock *

Evan Cheng

e637db1

2008-01-30 18:18:23 +0000

[diff] [blame]

3450

PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,

3451

MachineBasicBlock *BB) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3452

const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();

3453

assert((MI->getOpcode() == PPC::SELECT_CC_I4 ||

3454

MI->getOpcode() == PPC::SELECT_CC_I8 ||

3455

MI->getOpcode() == PPC::SELECT_CC_F4 ||

3456

MI->getOpcode() == PPC::SELECT_CC_F8 ||

3457

MI->getOpcode() == PPC::SELECT_CC_VRRC) &&

3458

"Unexpected instr type to insert");

3459

3460

// To "insert" a SELECT_CC instruction, we actually have to insert the diamond

3461

// control-flow pattern. The incoming instruction knows the destination vreg

3462

// to set, the condition code register to branch on, the true/false values to

3463

// select between, and a branch opcode to use.

3464

const BasicBlock *LLVM_BB = BB->getBasicBlock();

3465

ilist<MachineBasicBlock>::iterator It = BB;

++It;

// thisMBB:

// ...

// TrueVal = ...

// cmpTY ccX, r1, r2

// bCC copy1MBB

// fallthrough --> copy0MBB

3474

MachineBasicBlock *thisMBB = BB;

3475

MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);

3476

MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);

3477

unsigned SelectPred = MI->getOperand(4).getImm();

3478

BuildMI(BB, TII->get(PPC::BCC))

3479

.addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);

3480

MachineFunction *F = BB->getParent();

3481

F->getBasicBlockList().insert(It, copy0MBB);

3482

F->getBasicBlockList().insert(It, sinkMBB);

3483

// Update machine-CFG edges by first adding all successors of the current

3484

// block to the new block which will contain the Phi node for the select.

3485

for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),

3486

e = BB->succ_end(); i != e; ++i)

3487

sinkMBB->addSuccessor(*i);

3488

// Next, remove all successors of the current block, and add the true

3489

// and fallthrough blocks as its successors.

3490

while(!BB->succ_empty())

3491

BB->removeSuccessor(BB->succ_begin());

3492

BB->addSuccessor(copy0MBB);

3493

BB->addSuccessor(sinkMBB);

// copy0MBB:

// %FalseValue = ...

// # fallthrough to sinkMBB

3498

BB = copy0MBB;

3499

3500

// Update machine-CFG edges

3501

BB->addSuccessor(sinkMBB);

3502

3503

// sinkMBB:

3504

// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]

3505

// ...

3506

BB = sinkMBB;

3507

BuildMI(BB, TII->get(PPC::PHI), MI->getOperand(0).getReg())

3508

.addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)

3509

.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);

3510

3511

delete MI; // The pseudo instruction is gone now.

return BB;

}

//===----------------------------------------------------------------------===//

3516

// Target Optimization Hooks

3517

//===----------------------------------------------------------------------===//

3518

3519

SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,

3520

DAGCombinerInfo &DCI) const {

3521

TargetMachine &TM = getTargetMachine();

3522

SelectionDAG &DAG = DCI.DAG;

3523

switch (N->getOpcode()) {

3524

default: break;

3525

case PPCISD::SHL:

3526

if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {

3527

if (C->getValue() == 0) // 0 << V -> 0.

3528

return N->getOperand(0);

}

break;

case PPCISD::SRL:

if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {

3533

if (C->getValue() == 0) // 0 >>u V -> 0.

3534

return N->getOperand(0);

}

break;

case PPCISD::SRA:

if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {

3539

if (C->getValue() == 0 || // 0 >>s V -> 0.

3540

C->isAllOnesValue()) // -1 >>s V -> -1.

3541

return N->getOperand(0);

}

break;

case ISD::SINT_TO_FP:

3546

if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {

3547

if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {

3548

// Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.

3549

// We allow the src/dst to be either f32/f64, but the intermediate

3550

// type must be i64.

Dale Johannesen

cbc0351

2007-10-23 23:20:14 +0000

[diff] [blame]

3551

if (N->getOperand(0).getValueType() == MVT::i64 &&

3552

N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3553

SDOperand Val = N->getOperand(0).getOperand(0);

3554

if (Val.getValueType() == MVT::f32) {

3555

Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);

3556

DCI.AddToWorklist(Val.Val);

3557

}

3558

3559

Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);

3560

DCI.AddToWorklist(Val.Val);

3561

Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);

3562

DCI.AddToWorklist(Val.Val);

3563

if (N->getValueType(0) == MVT::f32) {

Chris Lattner

5872a36

2008-01-17 07:00:52 +0000

[diff] [blame]

3564

Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val,

3565

DAG.getIntPtrConstant(0));

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3566

DCI.AddToWorklist(Val.Val);

3567

}

3568

return Val;

3569

} else if (N->getOperand(0).getValueType() == MVT::i32) {

3570

// If the intermediate type is i32, we can avoid the load/store here

// too.

}

}

}

break;

case ISD::STORE:

// Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).

3578

if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&

Chris Lattner

df7a4ae

2008-01-18 16:54:56 +0000

[diff] [blame]

3579

!cast<StoreSDNode>(N)->isTruncatingStore() &&

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3580

N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&

Dale Johannesen

cbc0351

2007-10-23 23:20:14 +0000

[diff] [blame]

3581

N->getOperand(1).getValueType() == MVT::i32 &&

3582

N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3583

SDOperand Val = N->getOperand(1).getOperand(0);

3584

if (Val.getValueType() == MVT::f32) {

3585

Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);

3586

DCI.AddToWorklist(Val.Val);

3587

}

3588

Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val);

3589

DCI.AddToWorklist(Val.Val);

3590

3591

Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val,

3592

N->getOperand(2), N->getOperand(3));

3593

DCI.AddToWorklist(Val.Val);

return Val;

}

// Turn STORE (BSWAP) -> sthbrx/stwbrx.

3598

if (N->getOperand(1).getOpcode() == ISD::BSWAP &&

3599

N->getOperand(1).Val->hasOneUse() &&

3600

(N->getOperand(1).getValueType() == MVT::i32 ||

3601

N->getOperand(1).getValueType() == MVT::i16)) {

3602

SDOperand BSwapOp = N->getOperand(1).getOperand(0);

3603

// Do an any-extend to 32-bits if this is a half-word input.

3604

if (BSwapOp.getValueType() == MVT::i16)

3605

BSwapOp = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, BSwapOp);

3606

3607

return DAG.getNode(PPCISD::STBRX, MVT::Other, N->getOperand(0), BSwapOp,

3608

N->getOperand(2), N->getOperand(3),

3609

DAG.getValueType(N->getOperand(1).getValueType()));

}

break;

case ISD::BSWAP:

// Turn BSWAP (LOAD) -> lhbrx/lwbrx.

3614

if (ISD::isNON_EXTLoad(N->getOperand(0).Val) &&

3615

N->getOperand(0).hasOneUse() &&

3616

(N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {

3617

SDOperand Load = N->getOperand(0);

3618

LoadSDNode *LD = cast<LoadSDNode>(Load);

3619

// Create the byte-swapping load.

3620

std::vector<MVT::ValueType> VTs;

3621

VTs.push_back(MVT::i32);

3622

VTs.push_back(MVT::Other);

Dan Gohman

2008-02-06 22:27:42 +0000

[diff] [blame]

3623

SDOperand MO = DAG.getMemOperand(LD->getMemOperand());

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3624

SDOperand Ops[] = {

3625

LD->getChain(), // Chain

3626

LD->getBasePtr(), // Ptr

Dan Gohman

2008-02-06 22:27:42 +0000

[diff] [blame]

3627

MO, // MemOperand

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3628

DAG.getValueType(N->getValueType(0)) // VT

3629

};

3630

SDOperand BSLoad = DAG.getNode(PPCISD::LBRX, VTs, Ops, 4);

3631

3632

// If this is an i16 load, insert the truncate.

3633

SDOperand ResVal = BSLoad;

3634

if (N->getValueType(0) == MVT::i16)

3635

ResVal = DAG.getNode(ISD::TRUNCATE, MVT::i16, BSLoad);

3636

3637

// First, combine the bswap away. This makes the value produced by the

3638

// load dead.

3639

DCI.CombineTo(N, ResVal);

3640

3641

// Next, combine the load away, we give it a bogus result value but a real

3642

// chain result. The result value is dead because the bswap is dead.

3643

DCI.CombineTo(Load.Val, ResVal, BSLoad.getValue(1));

3644

3645

// Return N so it doesn't get rechecked!

3646

return SDOperand(N, 0);

}

break;

case PPCISD::VCMP: {

// If a VCMPo node already exists with exactly the same operands as this

3652

// node, use its result instead of this node (VCMPo computes both a CR6 and

3653

// a normal output).

3654

//

3655

if (!N->getOperand(0).hasOneUse() &&

3656

!N->getOperand(1).hasOneUse() &&

3657

!N->getOperand(2).hasOneUse()) {

3658

3659

// Scan all of the users of the LHS, looking for VCMPo's that match.

3660

SDNode *VCMPoNode = 0;

3661

3662

SDNode *LHSN = N->getOperand(0).Val;

3663

for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();

3664

UI != E; ++UI)

Roman Levenstein

0664ef9

2008-03-26 12:39:26 +0000

[diff] [blame]

3665

if ((*UI).getUser()->getOpcode() == PPCISD::VCMPo &&

3666

(*UI).getUser()->getOperand(1) == N->getOperand(1) &&

3667

(*UI).getUser()->getOperand(2) == N->getOperand(2) &&

3668

(*UI).getUser()->getOperand(0) == N->getOperand(0)) {

3669

VCMPoNode = UI->getUser();

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

break;

}

// If there is no VCMPo node, or if the flag value has a single use, don't

3674

// transform this.

3675

if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))

3676

break;

3677

3678

// Look at the (necessarily single) use of the flag value. If it has a

3679

// chain, this transformation is more complex. Note that multiple things

3680

// could use the value result, which we should ignore.

3681

SDNode *FlagUser = 0;

3682

for (SDNode::use_iterator UI = VCMPoNode->use_begin();

3683

FlagUser == 0; ++UI) {

3684

assert(UI != VCMPoNode->use_end() && "Didn't find user!");

Roman Levenstein

0664ef9

2008-03-26 12:39:26 +0000

[diff] [blame]

3685

SDNode *User = UI->getUser();

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3686

for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {

3687

if (User->getOperand(i) == SDOperand(VCMPoNode, 1)) {

FlagUser = User;

break;

}

}

}

// If the user is a MFCR instruction, we know this is safe. Otherwise we

3695

// give up for right now.

3696

if (FlagUser->getOpcode() == PPCISD::MFCR)

3697

return SDOperand(VCMPoNode, 0);

}

break;

}

case ISD::BR_CC: {

// If this is a branch on an altivec predicate comparison, lower this so

3703

// that we don't have to do a MFCR: instead, branch directly on CR6. This

3704

// lowering is done pre-legalize, because the legalizer lowers the predicate

3705

// compare down to code that is difficult to reassemble.

3706

ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();

3707

SDOperand LHS = N->getOperand(2), RHS = N->getOperand(3);

int CompareOpc;

bool isDot;

if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&

3712

isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&

3713

getAltivecCompareInfo(LHS, CompareOpc, isDot)) {

3714

assert(isDot && "Can't compare against a vector result!");

3715

3716

// If this is a comparison against something other than 0/1, then we know

3717

// that the condition is never/always true.

3718

unsigned Val = cast<ConstantSDNode>(RHS)->getValue();

3719

if (Val != 0 && Val != 1) {

3720

if (CC == ISD::SETEQ) // Cond never true, remove branch.

3721

return N->getOperand(0);

3722

// Always !=, turn it into an unconditional branch.

3723

return DAG.getNode(ISD::BR, MVT::Other,

3724

N->getOperand(0), N->getOperand(4));

3725

}

3726

3727

bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);

3728

3729

// Create the PPCISD altivec 'dot' comparison node.

3730

std::vector<MVT::ValueType> VTs;

3731

SDOperand Ops[] = {

3732

LHS.getOperand(2), // LHS of compare

3733

LHS.getOperand(3), // RHS of compare

3734

DAG.getConstant(CompareOpc, MVT::i32)

3735

};

3736

VTs.push_back(LHS.getOperand(2).getValueType());

3737

VTs.push_back(MVT::Flag);

3738

SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3);

3739

3740

// Unpack the result based on how the target uses it.

3741

PPC::Predicate CompOpc;

3742

switch (cast<ConstantSDNode>(LHS.getOperand(1))->getValue()) {

3743

default: // Can't happen, don't crash on invalid number though.

3744

case 0: // Branch on the value of the EQ bit of CR6.

3745

CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;

3746

break;

3747

case 1: // Branch on the inverted value of the EQ bit of CR6.

3748

CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;

3749

break;

3750

case 2: // Branch on the value of the LT bit of CR6.

3751

CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;

3752

break;

3753

case 3: // Branch on the inverted value of the LT bit of CR6.

3754

CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;

break;

}

return DAG.getNode(PPCISD::COND_BRANCH, MVT::Other, N->getOperand(0),

3759

DAG.getConstant(CompOpc, MVT::i32),

3760

DAG.getRegister(PPC::CR6, MVT::i32),

3761

N->getOperand(4), CompNode.getValue(1));

}

break;

}

}

return SDOperand();

}

//===----------------------------------------------------------------------===//

3771

// Inline Assembly Support

3772

//===----------------------------------------------------------------------===//

3773

3774

void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,

Dan Gohman

d0dfc77

2008-02-13 22:28:48 +0000

[diff] [blame]

3775

const APInt &Mask,

Dan Gohman

229fa05

2008-02-13 00:35:47 +0000

[diff] [blame]

3776

APInt &KnownZero,

3777

APInt &KnownOne,

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3778

const SelectionDAG &DAG,

3779

unsigned Depth) const {

Dan Gohman

229fa05

2008-02-13 00:35:47 +0000

[diff] [blame]

3780

KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3781

switch (Op.getOpcode()) {

3782

default: break;

3783

case PPCISD::LBRX: {

3784

// lhbrx is known to have the top bits cleared out.

3785

if (cast<VTSDNode>(Op.getOperand(3))->getVT() == MVT::i16)

3786

KnownZero = 0xFFFF0000;

3787

break;

3788

}

3789

case ISD::INTRINSIC_WO_CHAIN: {

3790

switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) {

3791

default: break;

3792

case Intrinsic::ppc_altivec_vcmpbfp_p:

3793

case Intrinsic::ppc_altivec_vcmpeqfp_p:

3794

case Intrinsic::ppc_altivec_vcmpequb_p:

3795

case Intrinsic::ppc_altivec_vcmpequh_p:

3796

case Intrinsic::ppc_altivec_vcmpequw_p:

3797

case Intrinsic::ppc_altivec_vcmpgefp_p:

3798

case Intrinsic::ppc_altivec_vcmpgtfp_p:

3799

case Intrinsic::ppc_altivec_vcmpgtsb_p:

3800

case Intrinsic::ppc_altivec_vcmpgtsh_p:

3801

case Intrinsic::ppc_altivec_vcmpgtsw_p:

3802

case Intrinsic::ppc_altivec_vcmpgtub_p:

3803

case Intrinsic::ppc_altivec_vcmpgtuh_p:

3804

case Intrinsic::ppc_altivec_vcmpgtuw_p:

3805

KnownZero = ~1U; // All bits but the low one are known to be zero.

break;

}

}

}

}

/// getConstraintType - Given a constraint, return the type of

3814

/// constraint it is for this target.

3815

PPCTargetLowering::ConstraintType

3816

PPCTargetLowering::getConstraintType(const std::string &Constraint) const {

3817

if (Constraint.size() == 1) {

3818

switch (Constraint[0]) {

default: break;

case 'b':

case 'r':

case 'f':

case 'v':

case 'y':

return C_RegisterClass;

3826

}

3827

}

3828

return TargetLowering::getConstraintType(Constraint);

3829

}

3830

3831

std::pair<unsigned, const TargetRegisterClass*>

3832

PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,

3833

MVT::ValueType VT) const {

3834

if (Constraint.size() == 1) {

3835

// GCC RS6000 Constraint Letters

3836

switch (Constraint[0]) {

3837

case 'b': // R1-R31

3838

case 'r': // R0-R31

3839

if (VT == MVT::i64 && PPCSubTarget.isPPC64())

3840

return std::make_pair(0U, PPC::G8RCRegisterClass);

3841

return std::make_pair(0U, PPC::GPRCRegisterClass);

3842

case 'f':

3843

if (VT == MVT::f32)

3844

return std::make_pair(0U, PPC::F4RCRegisterClass);

3845

else if (VT == MVT::f64)

3846

return std::make_pair(0U, PPC::F8RCRegisterClass);

3847

break;

3848

case 'v':

3849

return std::make_pair(0U, PPC::VRRCRegisterClass);

3850

case 'y': // crrc

3851

return std::make_pair(0U, PPC::CRRCRegisterClass);

}

}

return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);

}

Chris Lattner

2007-08-25 00:47:38 +0000

[diff] [blame]

3859

/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops

3860

/// vector. If it is invalid, don't add anything to Ops.

3861

void PPCTargetLowering::LowerAsmOperandForConstraint(SDOperand Op, char Letter,

3862

std::vector<SDOperand>&Ops,

3863

SelectionDAG &DAG) {

3864

SDOperand Result(0,0);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

switch (Letter) {

default: break;

case 'I':

case 'J':

case 'K':

case 'L':

case 'M':

case 'N':

case 'O':

case 'P': {

ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);

Chris Lattner

2007-08-25 00:47:38 +0000

[diff] [blame]

3876

if (!CST) return; // Must be an immediate to match.

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3877

unsigned Value = CST->getValue();

3878

switch (Letter) {

3879

default: assert(0 && "Unknown constraint letter!");

3880

case 'I': // "I" is a signed 16-bit constant.

3881

if ((short)Value == (int)Value)

Chris Lattner

2007-08-25 00:47:38 +0000

[diff] [blame]

3882

Result = DAG.getTargetConstant(Value, Op.getValueType());

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3883

break;

3884

case 'J': // "J" is a constant with only the high-order 16 bits nonzero.

3885

case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.

3886

if ((short)Value == 0)

Chris Lattner

2007-08-25 00:47:38 +0000

[diff] [blame]

3887

Result = DAG.getTargetConstant(Value, Op.getValueType());

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3888

break;

3889

case 'K': // "K" is a constant with only the low-order 16 bits nonzero.

3890

if ((Value >> 16) == 0)

Chris Lattner

2007-08-25 00:47:38 +0000

[diff] [blame]

3891

Result = DAG.getTargetConstant(Value, Op.getValueType());

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3892

break;

3893

case 'M': // "M" is a constant that is greater than 31.

3894

if (Value > 31)

Chris Lattner

2007-08-25 00:47:38 +0000

[diff] [blame]

3895

Result = DAG.getTargetConstant(Value, Op.getValueType());

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3896

break;

3897

case 'N': // "N" is a positive constant that is an exact power of two.

3898

if ((int)Value > 0 && isPowerOf2_32(Value))

Chris Lattner

2007-08-25 00:47:38 +0000

[diff] [blame]

3899

Result = DAG.getTargetConstant(Value, Op.getValueType());

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3900

break;

3901

case 'O': // "O" is the constant zero.

3902

if (Value == 0)

Chris Lattner

2007-08-25 00:47:38 +0000

[diff] [blame]

3903

Result = DAG.getTargetConstant(Value, Op.getValueType());

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3904

break;

3905

case 'P': // "P" is a constant whose negation is a signed 16-bit constant.

3906

if ((short)-Value == (int)-Value)

Chris Lattner

2007-08-25 00:47:38 +0000

[diff] [blame]

3907

Result = DAG.getTargetConstant(Value, Op.getValueType());

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

break;

}

break;

}

}

Chris Lattner

2007-08-25 00:47:38 +0000

[diff] [blame]

3914

if (Result.Val) {

3915

Ops.push_back(Result);

return;

}

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3919

// Handle standard constraint letters.

Chris Lattner

2007-08-25 00:47:38 +0000

[diff] [blame]

3920

TargetLowering::LowerAsmOperandForConstraint(Op, Letter, Ops, DAG);

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

3921

}

3922

3923

// isLegalAddressingMode - Return true if the addressing mode represented

3924

// by AM is legal for this target, for a load/store of the specified type.

3925

bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,

3926

const Type *Ty) const {

3927

// FIXME: PPC does not allow r+i addressing modes for vectors!

3928

3929

// PPC allows a sign-extended 16-bit immediate field.

3930

if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)

3931

return false;

3932

3933

// No global is ever allowed as a base.

if (AM.BaseGV)

return false;

// PPC only support r+r,

3938

switch (AM.Scale) {

3939

case 0: // "r+i" or just "i", depending on HasBaseReg.

3940

break;

3941

case 1:

3942

if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.

3943

return false;

3944

// Otherwise we have r+r or r+i.

3945

break;

3946

case 2:

3947

if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.

return false;

// Allow 2*r as r+r.

break;

default:

// No other scales are supported.

return false;

}

return true;

}

/// isLegalAddressImmediate - Return true if the integer value can be used

3960

/// as the offset of the target addressing mode for load / store of the

3961

/// given type.

3962

bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{

3963

// PPC allows a sign-extended 16-bit immediate field.

3964

return (V > -(1 << 16) && V < (1 << 16)-1);

3965

}

3966

3967

bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {

return false;

}

Chris Lattner

2007-12-08 06:59:59 +0000

[diff] [blame]

3971

SDOperand PPCTargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) {

3972

// Depths > 0 not supported yet!

3973

if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)

3974

return SDOperand();

3975

3976

MachineFunction &MF = DAG.getMachineFunction();

3977

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

3978

int RAIdx = FuncInfo->getReturnAddrSaveIndex();

3979

if (RAIdx == 0) {

3980

bool isPPC64 = PPCSubTarget.isPPC64();

3981

int Offset =

3982

PPCFrameInfo::getReturnSaveOffset(isPPC64, PPCSubTarget.isMachoABI());

3983

3984

// Set up a frame object for the return address.

3985

RAIdx = MF.getFrameInfo()->CreateFixedObject(isPPC64 ? 8 : 4, Offset);

3986

3987

// Remember it for next time.

3988

FuncInfo->setReturnAddrSaveIndex(RAIdx);

3989

3990

// Make sure the function really does not optimize away the store of the RA

3991

// to the stack.

3992

FuncInfo->setLRStoreRequired();

3993

}

3994

3995

// Just load the return address off the stack.

3996

SDOperand RetAddrFI = DAG.getFrameIndex(RAIdx, getPointerTy());

3997

return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0);

3998

}

3999

4000

SDOperand PPCTargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) {

Dan Gohman

2007-07-18 16:29:46 +0000

[diff] [blame]

4001

// Depths > 0 not supported yet!

4002

if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)

4003

return SDOperand();

4004

4005

MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

4006

bool isPPC64 = PtrVT == MVT::i64;

4007

4008

MachineFunction &MF = DAG.getMachineFunction();

4009

MachineFrameInfo *MFI = MF.getFrameInfo();

4010

bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects())

4011

&& MFI->getStackSize();

4012

4013

if (isPPC64)

4014

return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::X31 : PPC::X1,

Bill Wendling

5e28ab1

2007-08-30 00:59:19 +0000

[diff] [blame]

4015

MVT::i64);

Dan Gohman