Blame - lib/Target/PowerPC/PPCISelLowering.cpp - fp2-dev/platform/external/llvm

2005-10-18 00:28:58 +0000

[diff] [blame]

1

//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

2

//

3

// The LLVM Compiler Infrastructure

4

//

5

// This file was developed by Chris Lattner and is distributed under

6

// the University of Illinois Open Source License. See LICENSE.TXT for details.

7

//

8

//===----------------------------------------------------------------------===//

9

//

Nate Begeman

2005-10-16 05:39:50 +0000

[diff] [blame]

10

// This file implements the PPCISelLowering class.

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

11

//

12

//===----------------------------------------------------------------------===//

13

Chris Lattner

16e71f2

2005-10-14 23:59:06 +0000

[diff] [blame]

14

#include "PPCISelLowering.h"

15

#include "PPCTargetMachine.h"

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

16

#include "PPCPerfectShuffle.h"

Nate Begeman

750ac1b

2006-02-01 07:19:44 +0000

[diff] [blame]

17

#include "llvm/ADT/VectorExtras.h"

Evan Cheng

c4c6257

2006-03-13 23:20:37 +0000

[diff] [blame]

18

#include "llvm/Analysis/ScalarEvolutionExpressions.h"

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

19

#include "llvm/CodeGen/MachineFrameInfo.h"

20

#include "llvm/CodeGen/MachineFunction.h"

Chris Lattner

2005-08-26 21:23:58 +0000

[diff] [blame]

21

#include "llvm/CodeGen/MachineInstrBuilder.h"

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

22

#include "llvm/CodeGen/SelectionDAG.h"

Chris Lattner

7b73834

2005-09-13 19:33:40 +0000

[diff] [blame]

23

#include "llvm/CodeGen/SSARegMap.h"

Chris Lattner

0b1e4e5

2005-08-26 17:36:52 +0000

[diff] [blame]

24

#include "llvm/Constants.h"

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

25

#include "llvm/Function.h"

Chris Lattner

2006-03-26 10:06:40 +0000

[diff] [blame]

26

#include "llvm/Intrinsics.h"

Nate Begeman

750ac1b

2006-02-01 07:19:44 +0000

[diff] [blame]

27

#include "llvm/Support/MathExtras.h"

Evan Cheng

d2ee218

2006-02-18 00:08:58 +0000

[diff] [blame]

28

#include "llvm/Target/TargetOptions.h"

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

29

using namespace llvm;

30

Nate Begeman

2005-10-16 05:39:50 +0000

[diff] [blame]

31

PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

32

: TargetLowering(TM) {

33

34

// Fold away setcc operations if possible.

35

setSetCCIsExpensive();

Nate Begeman

405e3ec

2005-10-21 00:02:42 +0000

[diff] [blame]

36

setPow2DivIsCheap();

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

37

Chris Lattner

d145a61

2005-09-27 22:18:25 +0000

[diff] [blame]

38

// Use _setjmp/_longjmp instead of setjmp/longjmp.

39

setUseUnderscoreSetJmpLongJmp(true);

40

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

41

// Set up the register classes.

Nate Begeman

2005-10-18 00:28:58 +0000

[diff] [blame]

42

addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);

43

addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);

44

addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

45

Chris Lattner

a54aa94

2006-01-29 06:26:08 +0000

[diff] [blame]

46

setOperationAction(ISD::ConstantFP, MVT::f64, Expand);

47

setOperationAction(ISD::ConstantFP, MVT::f32, Expand);

48

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

49

// PowerPC has no intrinsics for these particular operations

50

setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);

51

setOperationAction(ISD::MEMSET, MVT::Other, Expand);

52

setOperationAction(ISD::MEMCPY, MVT::Other, Expand);

53

54

// PowerPC has an i16 but no i8 (or i1) SEXTLOAD

55

setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand);

56

setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand);

57

58

// PowerPC has no SREM/UREM instructions

59

setOperationAction(ISD::SREM, MVT::i32, Expand);

60

setOperationAction(ISD::UREM, MVT::i32, Expand);

61

62

// We don't support sin/cos/sqrt/fmod

63

setOperationAction(ISD::FSIN , MVT::f64, Expand);

64

setOperationAction(ISD::FCOS , MVT::f64, Expand);

Chris Lattner

615c2d0

2005-09-28 22:29:58 +0000

[diff] [blame]

65

setOperationAction(ISD::FREM , MVT::f64, Expand);

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

66

setOperationAction(ISD::FSIN , MVT::f32, Expand);

67

setOperationAction(ISD::FCOS , MVT::f32, Expand);

Chris Lattner

615c2d0

2005-09-28 22:29:58 +0000

[diff] [blame]

68

setOperationAction(ISD::FREM , MVT::f32, Expand);

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

69

70

// If we're enabling GP optimizations, use hardware square root

Chris Lattner

1e9de3e

2005-09-02 18:33:05 +0000

[diff] [blame]

71

if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

72

setOperationAction(ISD::FSQRT, MVT::f64, Expand);

73

setOperationAction(ISD::FSQRT, MVT::f32, Expand);

74

}

75

Chris Lattner

9601a86

2006-03-05 05:08:37 +0000

[diff] [blame]

76

setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);

77

setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);

78

Nate Begeman

d88fc03

2006-01-14 03:14:10 +0000

[diff] [blame]

79

// PowerPC does not have BSWAP, CTPOP or CTTZ

80

setOperationAction(ISD::BSWAP, MVT::i32 , Expand);

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

81

setOperationAction(ISD::CTPOP, MVT::i32 , Expand);

82

setOperationAction(ISD::CTTZ , MVT::i32 , Expand);

83

Nate Begeman

35ef913

2006-01-11 21:21:00 +0000

[diff] [blame]

84

// PowerPC does not have ROTR

85

setOperationAction(ISD::ROTR, MVT::i32 , Expand);

86

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

87

// PowerPC does not have Select

88

setOperationAction(ISD::SELECT, MVT::i32, Expand);

89

setOperationAction(ISD::SELECT, MVT::f32, Expand);

90

setOperationAction(ISD::SELECT, MVT::f64, Expand);

Chris Lattner

2005-08-26 00:52:45 +0000

[diff] [blame]

91

Chris Lattner

0b1e4e5

2005-08-26 17:36:52 +0000

[diff] [blame]

92

// PowerPC wants to turn select_cc of FP into fsel when possible.

93

setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);

94

setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);

Nate Begeman

4477590

2006-01-31 08:17:29 +0000

[diff] [blame]

95

Nate Begeman

750ac1b

2006-02-01 07:19:44 +0000

[diff] [blame]

96

// PowerPC wants to optimize integer setcc a bit

Nate Begeman

4477590

2006-01-31 08:17:29 +0000

[diff] [blame]

97

setOperationAction(ISD::SETCC, MVT::i32, Custom);

Chris Lattner

eb9b62e

2005-08-31 19:09:57 +0000

[diff] [blame]

98

Nate Begeman

81e8097

2006-03-17 01:40:33 +0000

[diff] [blame]

99

// PowerPC does not have BRCOND which requires SetCC

100

setOperationAction(ISD::BRCOND, MVT::Other, Expand);

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

101

Chris Lattner

f760532

2005-08-31 21:09:52 +0000

[diff] [blame]

102

// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.

103

setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);

Nate Begeman

2005-09-06 22:03:27 +0000

[diff] [blame]

104

Jim Laskey

ad23c9d

2005-08-17 00:40:22 +0000

[diff] [blame]

105

// PowerPC does not have [U|S]INT_TO_FP

106

setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);

107

setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);

108

Chris Lattner

53e8845

2005-12-23 05:13:35 +0000

[diff] [blame]

109

setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);

110

setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);

111

Chris Lattner

e6ec9f2

2005-09-10 00:21:06 +0000

[diff] [blame]

112

// PowerPC does not have truncstore for i1.

113

setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote);

Chris Lattner

f73bae1

2005-11-29 06:16:21 +0000

[diff] [blame]

114

Chris Lattner

25b8b8c

2006-04-28 21:56:10 +0000

[diff] [blame]

115

// We cannot sextinreg(i1). Expand to shifts.

116

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);

117

118

Jim Laskey

abf6d17

2006-01-05 01:25:28 +0000

[diff] [blame]

119

// Support label based line numbers.

Chris Lattner

f73bae1

2005-11-29 06:16:21 +0000

[diff] [blame]

120

setOperationAction(ISD::LOCATION, MVT::Other, Expand);

Jim Laskey

e0bce71

2006-01-05 01:47:43 +0000

[diff] [blame]

121

setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);

Jim Laskey

abf6d17

2006-01-05 01:25:28 +0000

[diff] [blame]

122

// FIXME - use subtarget debug flags

Jim Laskey

e0bce71

2006-01-05 01:47:43 +0000

[diff] [blame]

123

if (!TM.getSubtarget<PPCSubtarget>().isDarwin())

Jim Laskey

abf6d17

2006-01-05 01:25:28 +0000

[diff] [blame]

124

setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);

Chris Lattner

e6ec9f2

2005-09-10 00:21:06 +0000

[diff] [blame]

125

Nate Begeman

28a6b02

2005-12-10 02:36:00 +0000

[diff] [blame]

126

// We want to legalize GlobalAddress and ConstantPool nodes into the

127

// appropriate instructions to materialize the address.

Chris Lattner

3eef4e3

2005-11-17 18:26:56 +0000

[diff] [blame]

128

setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);

Nate Begeman

28a6b02

2005-12-10 02:36:00 +0000

[diff] [blame]

129

setOperationAction(ISD::ConstantPool, MVT::i32, Custom);

Nate Begeman

37efe67

2006-04-22 18:53:45 +0000

[diff] [blame]

130

setOperationAction(ISD::JumpTable, MVT::i32, Custom);

Chris Lattner

b99329e

2006-01-13 02:42:53 +0000

[diff] [blame]

131

Nate Begeman

ee62557

2006-01-27 21:09:22 +0000

[diff] [blame]

132

// RET must be custom lowered, to meet ABI requirements

133

setOperationAction(ISD::RET , MVT::Other, Custom);

134

Nate Begeman

acc398c

2006-01-25 18:21:52 +0000

[diff] [blame]

135

// VASTART needs to be custom lowered to use the VarArgsFrameIndex

136

setOperationAction(ISD::VASTART , MVT::Other, Custom);

137

Chris Lattner

b22c08b

2006-01-15 09:02:48 +0000

[diff] [blame]

138

// Use the default implementation.

Nate Begeman

acc398c

2006-01-25 18:21:52 +0000

[diff] [blame]

139

setOperationAction(ISD::VAARG , MVT::Other, Expand);

140

setOperationAction(ISD::VACOPY , MVT::Other, Expand);

141

setOperationAction(ISD::VAEND , MVT::Other, Expand);

Chris Lattner

b22c08b

2006-01-15 09:02:48 +0000

[diff] [blame]

142

setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);

143

setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);

144

setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);

Chris Lattner

860e886

2005-11-17 07:30:41 +0000

[diff] [blame]

145

Chris Lattner

2006-03-26 10:06:40 +0000

[diff] [blame]

146

// We want to custom lower some of our intrinsics.

Chris Lattner

48b61a7

2006-03-28 00:40:33 +0000

[diff] [blame]

147

setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

Chris Lattner

2006-03-26 10:06:40 +0000

[diff] [blame]

148

Nate Begeman

2005-09-06 22:03:27 +0000

[diff] [blame]

149

if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {

Nate Begeman

2005-10-18 00:28:58 +0000

[diff] [blame]

150

// They also have instructions for converting between i64 and fp.

Nate Begeman

2005-09-06 22:03:27 +0000

[diff] [blame]

151

setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);

152

setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);

Chris Lattner

7fbcef7

2006-03-24 07:53:47 +0000

[diff] [blame]

153

154

// FIXME: disable this lowered code. This generates 64-bit register values,

155

// and we don't model the fact that the top part is clobbered by calls. We

156

// need to flag these together so that the value isn't live across a call.

157

//setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);

158

Nate Begeman

ae749a9

2005-10-25 23:48:36 +0000

[diff] [blame]

159

// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT

160

setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);

161

} else {

Chris Lattner

860e886

2005-11-17 07:30:41 +0000

[diff] [blame]

162

// PowerPC does not have FP_TO_UINT on 32-bit implementations.

Nate Begeman

ae749a9

2005-10-25 23:48:36 +0000

[diff] [blame]

163

setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);

Nate Begeman

9d2b817

2005-10-18 00:56:42 +0000

[diff] [blame]

164

}

165

166

if (TM.getSubtarget<PPCSubtarget>().has64BitRegs()) {

167

// 64 bit PowerPC implementations can support i64 types directly

168

addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);

Nate Begeman

2005-10-18 00:28:58 +0000

[diff] [blame]

169

// BUILD_PAIR can't be handled natively, and should be expanded to shl/or

170

setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);

Nate Begeman

2005-10-18 00:28:58 +0000

[diff] [blame]

171

} else {

172

// 32 bit PowerPC wants to expand i64 shifts itself.

173

setOperationAction(ISD::SHL, MVT::i64, Custom);

174

setOperationAction(ISD::SRL, MVT::i64, Custom);

175

setOperationAction(ISD::SRA, MVT::i64, Custom);

Nate Begeman

2005-09-06 22:03:27 +0000

[diff] [blame]

176

}

Evan Cheng

d30bf01

2006-03-01 01:11:20 +0000

[diff] [blame]

177

Nate Begeman

425a969

2005-11-29 08:17:20 +0000

[diff] [blame]

178

if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {

Chris Lattner

2006-03-31 19:52:36 +0000

[diff] [blame]

179

// First set operation action for all vector types to expand. Then we

180

// will selectively turn on ones that can be effectively codegen'd.

181

for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;

182

VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {

Chris Lattner

2006-04-16 01:37:57 +0000

[diff] [blame]

183

// add/sub are legal for all supported vector VT's.

Chris Lattner

2006-03-31 19:52:36 +0000

[diff] [blame]

184

setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);

185

setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);

Chris Lattner

2006-03-31 19:52:36 +0000

[diff] [blame]

186

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

187

// We promote all shuffles to v16i8.

188

setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote);

Chris Lattner

2006-04-16 01:37:57 +0000

[diff] [blame]

189

AddPromotedToType (ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8);

190

191

// We promote all non-typed operations to v4i32.

192

setOperationAction(ISD::AND , (MVT::ValueType)VT, Promote);

193

AddPromotedToType (ISD::AND , (MVT::ValueType)VT, MVT::v4i32);

194

setOperationAction(ISD::OR , (MVT::ValueType)VT, Promote);

195

AddPromotedToType (ISD::OR , (MVT::ValueType)VT, MVT::v4i32);

196

setOperationAction(ISD::XOR , (MVT::ValueType)VT, Promote);

197

AddPromotedToType (ISD::XOR , (MVT::ValueType)VT, MVT::v4i32);

198

setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Promote);

199

AddPromotedToType (ISD::LOAD , (MVT::ValueType)VT, MVT::v4i32);

200

setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);

201

AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v4i32);

202

setOperationAction(ISD::STORE, (MVT::ValueType)VT, Promote);

203

AddPromotedToType (ISD::STORE, (MVT::ValueType)VT, MVT::v4i32);

Chris Lattner

2006-03-31 19:52:36 +0000

[diff] [blame]

204

Chris Lattner

2006-04-16 01:37:57 +0000

[diff] [blame]

205

// No other operations are legal.

Chris Lattner

2006-03-31 19:52:36 +0000

[diff] [blame]

206

setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);

207

setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);

208

setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);

209

setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);

210

setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);

211

setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);

212

setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);

213

setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);

Chris Lattner

01cae07

2006-04-03 23:55:43 +0000

[diff] [blame]

214

215

setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand);

Chris Lattner

2006-03-31 19:52:36 +0000

[diff] [blame]

216

}

217

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

218

// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle

219

// with merges, splats, etc.

220

setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);

221

Chris Lattner

2006-04-16 01:37:57 +0000

[diff] [blame]

222

setOperationAction(ISD::AND , MVT::v4i32, Legal);

223

setOperationAction(ISD::OR , MVT::v4i32, Legal);

224

setOperationAction(ISD::XOR , MVT::v4i32, Legal);

225

setOperationAction(ISD::LOAD , MVT::v4i32, Legal);

226

setOperationAction(ISD::SELECT, MVT::v4i32, Expand);

227

setOperationAction(ISD::STORE , MVT::v4i32, Legal);

228

Nate Begeman

425a969

2005-11-29 08:17:20 +0000

[diff] [blame]

229

addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);

Nate Begeman

7fd1edd

2005-12-19 23:25:09 +0000

[diff] [blame]

230

addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);

Chris Lattner

8d052bc

2006-03-25 07:39:07 +0000

[diff] [blame]

231

addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);

232

addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);

Chris Lattner

ec4a0c7

2006-01-29 06:32:58 +0000

[diff] [blame]

233

Chris Lattner

2006-03-31 19:52:36 +0000

[diff] [blame]

234

setOperationAction(ISD::MUL, MVT::v4f32, Legal);

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

235

setOperationAction(ISD::MUL, MVT::v4i32, Custom);

Chris Lattner

2006-04-18 03:43:48 +0000

[diff] [blame]

236

setOperationAction(ISD::MUL, MVT::v8i16, Custom);

Chris Lattner

19a8152

2006-04-18 03:57:35 +0000

[diff] [blame]

237

setOperationAction(ISD::MUL, MVT::v16i8, Custom);

Chris Lattner

f1d0b2b

2006-03-20 01:53:53 +0000

[diff] [blame]

238

Chris Lattner

b2177b9

2006-03-19 06:55:52 +0000

[diff] [blame]

239

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);

240

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);

Chris Lattner

64b3a08

2006-03-24 07:48:08 +0000

[diff] [blame]

241

Chris Lattner

541f91b

2006-04-02 00:43:36 +0000

[diff] [blame]

242

setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);

243

setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);

Chris Lattner

64b3a08

2006-03-24 07:48:08 +0000

[diff] [blame]

244

setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);

245

setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);

Nate Begeman

425a969

2005-11-29 08:17:20 +0000

[diff] [blame]

246

}

247

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

248

setSetCCResultContents(ZeroOrOneSetCCResult);

Chris Lattner

cadd742

2006-01-13 17:52:03 +0000

[diff] [blame]

249

setStackPointerRegisterToSaveRestore(PPC::R1);

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

250

Chris Lattner

2006-03-01 04:57:39 +0000

[diff] [blame]

251

// We have target-specific dag combine patterns for the following nodes:

252

setTargetDAGCombine(ISD::SINT_TO_FP);

Chris Lattner

5126984

2006-03-01 05:50:56 +0000

[diff] [blame]

253

setTargetDAGCombine(ISD::STORE);

Chris Lattner

2006-04-18 17:59:36 +0000

[diff] [blame]

254

setTargetDAGCombine(ISD::BR_CC);

Chris Lattner

2006-03-01 04:57:39 +0000

[diff] [blame]

255

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

256

computeRegisterProperties();

257

}

258

Chris Lattner

2006-01-09 23:52:17 +0000

[diff] [blame]

259

const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {

260

switch (Opcode) {

261

default: return 0;

262

case PPCISD::FSEL: return "PPCISD::FSEL";

263

case PPCISD::FCFID: return "PPCISD::FCFID";

264

case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";

265

case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";

Chris Lattner

5126984

2006-03-01 05:50:56 +0000

[diff] [blame]

266

case PPCISD::STFIWX: return "PPCISD::STFIWX";

Chris Lattner

2006-01-09 23:52:17 +0000

[diff] [blame]

267

case PPCISD::VMADDFP: return "PPCISD::VMADDFP";

268

case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";

Chris Lattner

f1d0b2b

2006-03-20 01:53:53 +0000

[diff] [blame]

269

case PPCISD::VPERM: return "PPCISD::VPERM";

Chris Lattner

2006-01-09 23:52:17 +0000

[diff] [blame]

270

case PPCISD::Hi: return "PPCISD::Hi";

271

case PPCISD::Lo: return "PPCISD::Lo";

272

case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";

273

case PPCISD::SRL: return "PPCISD::SRL";

274

case PPCISD::SRA: return "PPCISD::SRA";

275

case PPCISD::SHL: return "PPCISD::SHL";

Chris Lattner

2006-03-22 05:30:33 +0000

[diff] [blame]

276

case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";

277

case PPCISD::STD_32: return "PPCISD::STD_32";

Chris Lattner

e00ebf0

2006-01-28 07:33:03 +0000

[diff] [blame]

278

case PPCISD::CALL: return "PPCISD::CALL";

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

279

case PPCISD::MTCTR: return "PPCISD::MTCTR";

280

case PPCISD::BCTRL: return "PPCISD::BCTRL";

Chris Lattner

2006-01-09 23:52:17 +0000

[diff] [blame]

281

case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";

Chris Lattner

2006-03-26 10:06:40 +0000

[diff] [blame]

282

case PPCISD::MFCR: return "PPCISD::MFCR";

Chris Lattner

a17b155

2006-03-31 05:13:27 +0000

[diff] [blame]

283

case PPCISD::VCMP: return "PPCISD::VCMP";

Chris Lattner

2006-03-26 10:06:40 +0000

[diff] [blame]

284

case PPCISD::VCMPo: return "PPCISD::VCMPo";

Chris Lattner

f70f8d9

2006-04-18 18:05:58 +0000

[diff] [blame]

285

case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";

Chris Lattner

2006-01-09 23:52:17 +0000

[diff] [blame]

}

}

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

289

//===----------------------------------------------------------------------===//

290

// Node matching predicates, for use by the tblgen matching code.

291

//===----------------------------------------------------------------------===//

292

Chris Lattner

0b1e4e5

2005-08-26 17:36:52 +0000

[diff] [blame]

293

/// isFloatingPointZero - Return true if this is 0.0 or -0.0.

294

static bool isFloatingPointZero(SDOperand Op) {

295

if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))

296

return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);

297

else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) {

298

// Maybe this has already been legalized into the constant pool?

299

if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))

300

if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get()))

301

return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);

}

return false;

}

Chris Lattner

2006-04-06 17:23:16 +0000

[diff] [blame]

306

/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return

307

/// true if Op is undef or if it matches the specified value.

308

static bool isConstantOrUndef(SDOperand Op, unsigned Val) {

309

return Op.getOpcode() == ISD::UNDEF ||

310

cast<ConstantSDNode>(Op)->getValue() == Val;

311

}

312

313

/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a

314

/// VPKUHUM instruction.

Chris Lattner

2006-04-06 22:28:36 +0000

[diff] [blame]

315

bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) {

316

if (!isUnary) {

317

for (unsigned i = 0; i != 16; ++i)

318

if (!isConstantOrUndef(N->getOperand(i), i*2+1))

319

return false;

320

} else {

321

for (unsigned i = 0; i != 8; ++i)

322

if (!isConstantOrUndef(N->getOperand(i), i*2+1) ||

323

!isConstantOrUndef(N->getOperand(i+8), i*2+1))

324

return false;

325

}

Chris Lattner

2006-04-06 18:26:28 +0000

[diff] [blame]

326

return true;

Chris Lattner

ddb739e

2006-04-06 17:23:16 +0000

[diff] [blame]

327

}

328

329

/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a

330

/// VPKUWUM instruction.

Chris Lattner

2006-04-06 22:28:36 +0000

[diff] [blame]

331

bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) {

332

if (!isUnary) {

333

for (unsigned i = 0; i != 16; i += 2)

334

if (!isConstantOrUndef(N->getOperand(i ), i*2+2) ||

335

!isConstantOrUndef(N->getOperand(i+1), i*2+3))

336

return false;

337

} else {

338

for (unsigned i = 0; i != 8; i += 2)

339

if (!isConstantOrUndef(N->getOperand(i ), i*2+2) ||

340

!isConstantOrUndef(N->getOperand(i+1), i*2+3) ||

341

!isConstantOrUndef(N->getOperand(i+8), i*2+2) ||

342

!isConstantOrUndef(N->getOperand(i+9), i*2+3))

343

return false;

344

}

Chris Lattner

2006-04-06 18:26:28 +0000

[diff] [blame]

345

return true;

Chris Lattner

ddb739e

2006-04-06 17:23:16 +0000

[diff] [blame]

346

}

347

Chris Lattner

2006-04-06 22:02:42 +0000

[diff] [blame]

348

/// isVMerge - Common function, used to match vmrg* shuffles.

349

///

350

static bool isVMerge(SDNode *N, unsigned UnitSize,

351

unsigned LHSStart, unsigned RHSStart) {

Chris Lattner

2006-04-06 21:11:54 +0000

[diff] [blame]

352

assert(N->getOpcode() == ISD::BUILD_VECTOR &&

353

N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");

354

assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&

355

"Unsupported merge size!");

356

357

for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units

358

for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit

359

if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j),

Chris Lattner

2006-04-06 22:02:42 +0000

[diff] [blame]

360

LHSStart+j+i*UnitSize) ||

Chris Lattner

2006-04-06 21:11:54 +0000

[diff] [blame]

361

!isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j),

Chris Lattner

2006-04-06 22:02:42 +0000

[diff] [blame]

362

RHSStart+j+i*UnitSize))

Chris Lattner

2006-04-06 21:11:54 +0000

[diff] [blame]

363

return false;

364

}

Chris Lattner

2006-04-06 22:02:42 +0000

[diff] [blame]

return true;

}

/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for

369

/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).

370

bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {

371

if (!isUnary)

372

return isVMerge(N, UnitSize, 8, 24);

373

return isVMerge(N, UnitSize, 8, 8);

Chris Lattner

2006-04-06 21:11:54 +0000

[diff] [blame]

374

}

375

376

/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for

377

/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).

Chris Lattner

2006-04-06 22:02:42 +0000

[diff] [blame]

378

bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {

379

if (!isUnary)

380

return isVMerge(N, UnitSize, 0, 16);

381

return isVMerge(N, UnitSize, 0, 0);

Chris Lattner

2006-04-06 21:11:54 +0000

[diff] [blame]

}

Chris Lattner

2006-04-06 18:26:28 +0000

[diff] [blame]

385

/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift

386

/// amount, otherwise return -1.

Chris Lattner

2006-04-06 22:28:36 +0000

[diff] [blame]

387

int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {

Chris Lattner

2006-04-06 21:11:54 +0000

[diff] [blame]

388

assert(N->getOpcode() == ISD::BUILD_VECTOR &&

389

N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");

Chris Lattner

2006-04-06 18:26:28 +0000

[diff] [blame]

390

// Find the first non-undef value in the shuffle mask.

391

unsigned i;

392

for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i)

393

/*search*/;

394

395

if (i == 16) return -1; // all undef.

396

397

// Otherwise, check to see if the rest of the elements are consequtively

398

// numbered from this value.

399

unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue();

400

if (ShiftAmt < i) return -1;

401

ShiftAmt -= i;

Chris Lattner

ddb739e

2006-04-06 17:23:16 +0000

[diff] [blame]

402

Chris Lattner

2006-04-06 22:28:36 +0000

[diff] [blame]

403

if (!isUnary) {

404

// Check the rest of the elements to see if they are consequtive.

405

for (++i; i != 16; ++i)

406

if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i))

407

return -1;

408

} else {

409

// Check the rest of the elements to see if they are consequtive.

410

for (++i; i != 16; ++i)

411

if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15))

412

return -1;

413

}

Chris Lattner

2006-04-06 18:26:28 +0000

[diff] [blame]

414

415

return ShiftAmt;

416

}

Chris Lattner

2006-03-20 06:33:01 +0000

[diff] [blame]

417

418

/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand

419

/// specifies a splat of a single element that is suitable for input to

420

/// VSPLTB/VSPLTH/VSPLTW.

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

421

bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) {

422

assert(N->getOpcode() == ISD::BUILD_VECTOR &&

423

N->getNumOperands() == 16 &&

424

(EltSize == 1 || EltSize == 2 || EltSize == 4));

Chris Lattner

dd4d2d0

2006-03-20 06:51:10 +0000

[diff] [blame]

425

Chris Lattner

2006-03-20 06:37:44 +0000

[diff] [blame]

426

// This is a splat operation if each element of the permute is the same, and

427

// if the value doesn't reference the second vector.

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

428

unsigned ElementBase = 0;

Chris Lattner

2006-03-20 06:37:44 +0000

[diff] [blame]

429

SDOperand Elt = N->getOperand(0);

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

430

if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt))

431

ElementBase = EltV->getValue();

432

else

433

return false; // FIXME: Handle UNDEF elements too!

434

435

if (cast<ConstantSDNode>(Elt)->getValue() >= 16)

436

return false;

437

438

// Check that they are consequtive.

439

for (unsigned i = 1; i != EltSize; ++i) {

440

if (!isa<ConstantSDNode>(N->getOperand(i)) ||

441

cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase)

return false;

}

Chris Lattner

2006-03-20 06:37:44 +0000

[diff] [blame]

445

assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

446

for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {

Chris Lattner

b097aa9

2006-04-14 23:19:08 +0000

[diff] [blame]

447

if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;

Chris Lattner

2006-03-20 06:37:44 +0000

[diff] [blame]

448

assert(isa<ConstantSDNode>(N->getOperand(i)) &&

449

"Invalid VECTOR_SHUFFLE mask!");

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

450

for (unsigned j = 0; j != EltSize; ++j)

451

if (N->getOperand(i+j) != N->getOperand(j))

452

return false;

Chris Lattner

2006-03-20 06:37:44 +0000

[diff] [blame]

453

}

454

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

455

return true;

Chris Lattner

2006-03-20 06:33:01 +0000

[diff] [blame]

456

}

457

458

/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the

459

/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

460

unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {

461

assert(isSplatShuffleMask(N, EltSize));

462

return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize;

Chris Lattner

2006-03-20 06:33:01 +0000

[diff] [blame]

463

}

464

Chris Lattner

e87192a

2006-04-12 17:37:20 +0000

[diff] [blame]

465

/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed

Chris Lattner

2006-04-08 06:46:53 +0000

[diff] [blame]

466

/// by using a vspltis[bhw] instruction of the specified element size, return

467

/// the constant being splatted. The ByteSize field indicates the number of

468

/// bytes of each element [124] -> [bhw].

Chris Lattner

e87192a

2006-04-12 17:37:20 +0000

[diff] [blame]

469

SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

470

SDOperand OpVal(0, 0);

Chris Lattner

79d9a88

2006-04-08 07:14:26 +0000

[diff] [blame]

471

472

// If ByteSize of the splat is bigger than the element size of the

473

// build_vector, then we have a case where we are checking for a splat where

474

// multiple elements of the buildvector are folded together into a single

475

// logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).

476

unsigned EltSize = 16/N->getNumOperands();

477

if (EltSize < ByteSize) {

478

unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.

479

SDOperand UniquedVals[4];

480

assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");

481

482

// See if all of the elements in the buildvector agree across.

483

for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

484

if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;

485

// If the element isn't a constant, bail fully out.

486

if (!isa<ConstantSDNode>(N->getOperand(i))) return SDOperand();

487

488

489

if (UniquedVals[i&(Multiple-1)].Val == 0)

490

UniquedVals[i&(Multiple-1)] = N->getOperand(i);

491

else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))

492

return SDOperand(); // no match.

493

}

494

495

// Okay, if we reached this point, UniquedVals[0..Multiple-1] contains

496

// either constant or undef values that are identical for each chunk. See

497

// if these chunks can form into a larger vspltis*.

498

499

// Check to see if all of the leading entries are either 0 or -1. If

500

// neither, then this won't fit into the immediate field.

501

bool LeadingZero = true;

502

bool LeadingOnes = true;

503

for (unsigned i = 0; i != Multiple-1; ++i) {

504

if (UniquedVals[i].Val == 0) continue; // Must have been undefs.

505

506

LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();

507

LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();

508

}

509

// Finally, check the least significant entry.

510

if (LeadingZero) {

511

if (UniquedVals[Multiple-1].Val == 0)

512

return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef

513

int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getValue();

514

if (Val < 16)

515

return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)

516

}

517

if (LeadingOnes) {

518

if (UniquedVals[Multiple-1].Val == 0)

519

return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef

520

int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSignExtended();

521

if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)

522

return DAG.getTargetConstant(Val, MVT::i32);

}

return SDOperand();

}

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

528

// Check to see if this buildvec has a single non-undef value in its elements.

529

for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

530

if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;

531

if (OpVal.Val == 0)

532

OpVal = N->getOperand(i);

533

else if (OpVal != N->getOperand(i))

Chris Lattner

2006-04-08 06:46:53 +0000

[diff] [blame]

534

return SDOperand();

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

535

}

536

Chris Lattner

2006-04-08 06:46:53 +0000

[diff] [blame]

537

if (OpVal.Val == 0) return SDOperand(); // All UNDEF: use implicit def.

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

538

Nate Begeman

98e70cc

2006-03-28 04:15:58 +0000

[diff] [blame]

539

unsigned ValSizeInBytes = 0;

540

uint64_t Value = 0;

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

541

if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {

542

Value = CN->getValue();

543

ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8;

544

} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {

545

assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");

546

Value = FloatToBits(CN->getValue());

ValSizeInBytes = 4;

}

// If the splat value is larger than the element value, then we can never do

551

// this splat. The only case that we could fit the replicated bits into our

552

// immediate field for would be zero, and we prefer to use vxor for it.

Chris Lattner

2006-04-08 06:46:53 +0000

[diff] [blame]

553

if (ValSizeInBytes < ByteSize) return SDOperand();

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

554

555

// If the element value is larger than the splat value, cut it in half and

556

// check to see if the two halves are equal. Continue doing this until we

557

// get to ByteSize. This allows us to handle 0x01010101 as 0x01.

558

while (ValSizeInBytes > ByteSize) {

559

ValSizeInBytes >>= 1;

560

561

// If the top half equals the bottom half, we're still ok.

Chris Lattner

9b42bdd

2006-04-05 17:39:25 +0000

[diff] [blame]

562

if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=

563

(Value & ((1 << (8*ValSizeInBytes))-1)))

Chris Lattner

2006-04-08 06:46:53 +0000

[diff] [blame]

564

return SDOperand();

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

565

}

566

567

// Properly sign extend the value.

568

int ShAmt = (4-ByteSize)*8;

569

int MaskVal = ((int)Value << ShAmt) >> ShAmt;

570

Evan Cheng

5b6a01b

2006-03-26 09:52:32 +0000

[diff] [blame]

571

// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.

Chris Lattner

2006-04-08 06:46:53 +0000

[diff] [blame]

572

if (MaskVal == 0) return SDOperand();

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

573

Chris Lattner

2006-04-08 06:46:53 +0000

[diff] [blame]

574

// Finally, if this value fits in a 5 bit sext field, return it

575

if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)

576

return DAG.getTargetConstant(MaskVal, MVT::i32);

577

return SDOperand();

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

578

}

579

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

580

//===----------------------------------------------------------------------===//

581

// LowerOperation implementation

582

//===----------------------------------------------------------------------===//

583

584

static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {

585

ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);

586

Constant *C = CP->get();

587

SDOperand CPI = DAG.getTargetConstantPool(C, MVT::i32, CP->getAlignment());

588

SDOperand Zero = DAG.getConstant(0, MVT::i32);

589

590

const TargetMachine &TM = DAG.getTarget();

591

592

// If this is a non-darwin platform, we don't support non-static relo models

593

// yet.

594

if (TM.getRelocationModel() == Reloc::Static ||

595

!TM.getSubtarget<PPCSubtarget>().isDarwin()) {

596

// Generate non-pic code that has direct accesses to the constant pool.

597

// The address of the global is just (hi(&g)+lo(&g)).

598

SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);

599

SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);

600

return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);

601

}

602

603

SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);

604

if (TM.getRelocationModel() == Reloc::PIC) {

605

// With PIC, the first instruction is actually "GR+hi(&G)".

606

Hi = DAG.getNode(ISD::ADD, MVT::i32,

607

DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);

608

}

609

610

SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);

611

Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);

return Lo;

}

Nate Begeman

2006-04-22 18:53:45 +0000

[diff] [blame]

615

static SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {

616

JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);

617

SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);

618

SDOperand Zero = DAG.getConstant(0, MVT::i32);

619

620

const TargetMachine &TM = DAG.getTarget();

621

622

// If this is a non-darwin platform, we don't support non-static relo models

623

// yet.

624

if (TM.getRelocationModel() == Reloc::Static ||

625

!TM.getSubtarget<PPCSubtarget>().isDarwin()) {

626

// Generate non-pic code that has direct accesses to the constant pool.

627

// The address of the global is just (hi(&g)+lo(&g)).

628

SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, JTI, Zero);

629

SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, JTI, Zero);

630

return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);

631

}

632

633

SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, JTI, Zero);

634

if (TM.getRelocationModel() == Reloc::PIC) {

635

// With PIC, the first instruction is actually "GR+hi(&G)".

636

Hi = DAG.getNode(ISD::ADD, MVT::i32,

637

DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);

638

}

639

640

SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, JTI, Zero);

641

Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);

return Lo;

}

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

645

static SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {

646

GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);

647

GlobalValue *GV = GSDN->getGlobal();

648

SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i32, GSDN->getOffset());

649

SDOperand Zero = DAG.getConstant(0, MVT::i32);

650

651

const TargetMachine &TM = DAG.getTarget();

652

653

// If this is a non-darwin platform, we don't support non-static relo models

654

// yet.

655

if (TM.getRelocationModel() == Reloc::Static ||

656

!TM.getSubtarget<PPCSubtarget>().isDarwin()) {

657

// Generate non-pic code that has direct accesses to globals.

658

// The address of the global is just (hi(&g)+lo(&g)).

659

SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);

660

SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);

661

return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);

662

}

663

664

SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);

665

if (TM.getRelocationModel() == Reloc::PIC) {

666

// With PIC, the first instruction is actually "GR+hi(&G)".

667

Hi = DAG.getNode(ISD::ADD, MVT::i32,

668

DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);

669

}

670

671

SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);

672

Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);

673

674

if (!GV->hasWeakLinkage() && !GV->hasLinkOnceLinkage() &&

675

(!GV->isExternal() || GV->hasNotBeenReadFromBytecode()))

676

return Lo;

677

678

// If the global is weak or external, we have to go through the lazy

679

// resolution stub.

680

return DAG.getLoad(MVT::i32, DAG.getEntryNode(), Lo, DAG.getSrcValue(0));

681

}

682

683

static SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG) {

684

ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();

685

686

// If we're comparing for equality to zero, expose the fact that this is

687

// implented as a ctlz/srl pair on ppc, so that the dag combiner can

688

// fold the new nodes.

689

if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

690

if (C->isNullValue() && CC == ISD::SETEQ) {

691

MVT::ValueType VT = Op.getOperand(0).getValueType();

692

SDOperand Zext = Op.getOperand(0);

693

if (VT < MVT::i32) {

694

VT = MVT::i32;

695

Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0));

696

}

697

unsigned Log2b = Log2_32(MVT::getSizeInBits(VT));

698

SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext);

699

SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz,

700

DAG.getConstant(Log2b, MVT::i32));

701

return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc);

702

}

703

// Leave comparisons against 0 and -1 alone for now, since they're usually

704

// optimized. FIXME: revisit this when we can custom lower all setcc

705

// optimizations.

706

if (C->isAllOnesValue() || C->isNullValue())

return SDOperand();

}

// If we have an integer seteq/setne, turn it into a compare against zero

711

// by subtracting the rhs from the lhs, which is faster than setting a

712

// condition register, reading it back out, and masking the correct bit.

713

MVT::ValueType LHSVT = Op.getOperand(0).getValueType();

714

if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {

715

MVT::ValueType VT = Op.getValueType();

716

SDOperand Sub = DAG.getNode(ISD::SUB, LHSVT, Op.getOperand(0),

717

Op.getOperand(1));

718

return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC);

}

return SDOperand();

}

static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG,

724

unsigned VarArgsFrameIndex) {

725

// vastart just stores the address of the VarArgsFrameIndex slot into the

726

// memory location argument.

727

SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);

728

return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,

729

Op.getOperand(1), Op.getOperand(2));

730

}

731

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

732

static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG,

733

int &VarArgsFrameIndex) {

734

// TODO: add description of PPC stack frame format, or at least some docs.

735

//

736

MachineFunction &MF = DAG.getMachineFunction();

737

MachineFrameInfo *MFI = MF.getFrameInfo();

738

SSARegMap *RegMap = MF.getSSARegMap();

739

std::vector<SDOperand> ArgValues;

740

SDOperand Root = Op.getOperand(0);

741

742

unsigned ArgOffset = 24;

Chris Lattner

2006-05-16 18:58:15 +0000

[diff] [blame]

743

const unsigned Num_GPR_Regs = 8;

744

const unsigned Num_FPR_Regs = 13;

745

const unsigned Num_VR_Regs = 12;

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

746

unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

747

static const unsigned GPR[] = {

748

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

749

PPC::R7, PPC::R8, PPC::R9, PPC::R10,

750

};

751

static const unsigned FPR[] = {

752

PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,

753

PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13

754

};

755

static const unsigned VR[] = {

756

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

757

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

758

};

759

760

// Add DAG nodes to load the arguments or copy them out of registers. On

761

// entry to a function on PPC, the arguments start at offset 24, although the

762

// first ones are often in registers.

763

for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {

764

SDOperand ArgVal;

765

bool needsLoad = false;

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

766

MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();

767

unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;

768

Chris Lattner

2006-05-16 18:51:52 +0000

[diff] [blame]

769

unsigned CurArgOffset = ArgOffset;

770

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

771

switch (ObjectVT) {

772

default: assert(0 && "Unhandled argument type!");

773

case MVT::i32:

Chris Lattner

2006-05-16 18:51:52 +0000

[diff] [blame]

774

// All int arguments reserve stack space.

775

ArgOffset += 4;

776

Chris Lattner

2006-05-16 18:58:15 +0000

[diff] [blame]

777

if (GPR_idx != Num_GPR_Regs) {

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

778

unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);

779

MF.addLiveIn(GPR[GPR_idx], VReg);

780

ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);

Chris Lattner

2006-05-16 18:51:52 +0000

[diff] [blame]

781

++GPR_idx;

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

} else {

needsLoad = true;

}

break;

case MVT::f32:

case MVT::f64:

Chris Lattner

2006-05-16 18:51:52 +0000

[diff] [blame]

788

// All FP arguments reserve stack space.

789

ArgOffset += ObjSize;

790

791

// Every 4 bytes of argument space consumes one of the GPRs available for

792

// argument passing.

Chris Lattner

2006-05-16 18:58:15 +0000

[diff] [blame]

793

if (GPR_idx != Num_GPR_Regs) {

794

++GPR_idx;

795

if (ObjSize == 8 && GPR_idx != Num_GPR_Regs)

796

++GPR_idx;

Chris Lattner

2006-05-16 18:51:52 +0000

[diff] [blame]

797

}

Chris Lattner

2006-05-16 18:58:15 +0000

[diff] [blame]

798

if (FPR_idx != Num_FPR_Regs) {

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

799

unsigned VReg;

800

if (ObjectVT == MVT::f32)

801

VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass);

802

else

803

VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass);

804

MF.addLiveIn(FPR[FPR_idx], VReg);

805

ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

++FPR_idx;

} else {

needsLoad = true;

}

break;

case MVT::v4f32:

case MVT::v4i32:

case MVT::v8i16:

case MVT::v16i8:

Chris Lattner

2006-05-16 18:51:52 +0000

[diff] [blame]

815

// Note that vector arguments in registers don't reserve stack space.

Chris Lattner

2006-05-16 18:58:15 +0000

[diff] [blame]

816

if (VR_idx != Num_VR_Regs) {

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

817

unsigned VReg = RegMap->createVirtualRegister(&PPC::VRRCRegClass);

818

MF.addLiveIn(VR[VR_idx], VReg);

819

ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

820

++VR_idx;

821

} else {

822

// This should be simple, but requires getting 16-byte aligned stack

823

// values.

824

assert(0 && "Loading VR argument not implemented yet!");

needsLoad = true;

}

break;

}

// We need to load the argument to a virtual register if we determined above

831

// that we ran out of physical registers of the appropriate type

832

if (needsLoad) {

Chris Lattner

b375b5e

2006-05-16 18:54:32 +0000

[diff] [blame]

833

// If the argument is actually used, emit a load from the right stack

834

// slot.

835

if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {

836

int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset);

837

SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);

838

ArgVal = DAG.getLoad(ObjectVT, Root, FIN,

839

DAG.getSrcValue(NULL));

840

} else {

841

// Don't emit a dead load.

842

ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);

843

}

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

844

}

845

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

846

ArgValues.push_back(ArgVal);

847

}

848

849

// If the function takes variable number of arguments, make a frame index for

850

// the start of the first vararg value... for expansion of llvm.va_start.

851

bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;

852

if (isVarArg) {

853

VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);

854

SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);

855

// If this function is vararg, store any remaining integer argument regs

856

// to their spots on the stack so that they may be loaded by deferencing the

857

// result of va_next.

858

std::vector<SDOperand> MemOps;

Chris Lattner

2006-05-16 18:58:15 +0000

[diff] [blame]

859

for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

860

unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);

861

MF.addLiveIn(GPR[GPR_idx], VReg);

862

SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i32);

863

SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),

864

Val, FIN, DAG.getSrcValue(NULL));

865

MemOps.push_back(Store);

866

// Increment the address by four for the next argument to store

867

SDOperand PtrOff = DAG.getConstant(4, MVT::i32);

868

FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, PtrOff);

869

}

870

if (!MemOps.empty())

871

Root = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps);

872

}

873

874

ArgValues.push_back(Root);

875

876

// Return the new list of results.

877

std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),

878

Op.Val->value_end());

879

return DAG.getNode(ISD::MERGE_VALUES, RetVT, ArgValues);

880

}

881

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

882

/// isCallCompatibleAddress - Return the immediate to use if the specified

883

/// 32-bit value is representable in the immediate field of a BxA instruction.

884

static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) {

885

ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);

886

if (!C) return 0;

887

888

int Addr = C->getValue();

889

if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.

890

(Addr << 6 >> 6) != Addr)

891

return 0; // Top 6 bits have to be sext of immediate.

892

893

return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;

}

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

897

static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG) {

898

SDOperand Chain = Op.getOperand(0);

899

unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();

900

bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;

901

bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;

902

SDOperand Callee = Op.getOperand(4);

903

904

// args_to_use will accumulate outgoing args for the PPCISD::CALL case in

905

// SelectExpr to use to put the arguments in the appropriate registers.

906

std::vector<SDOperand> args_to_use;

907

908

// Count how many bytes are to be pushed on the stack, including the linkage

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

909

// area, and parameter passing area. We start with 24 bytes, which is

910

// prereserved space for [SP][CR][LR][3 x unused].

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

911

unsigned NumBytes = 24;

912

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

913

// Add up all the space actually used.

914

for (unsigned i = 5, e = Op.getNumOperands(); i != e; ++i)

915

NumBytes += MVT::getSizeInBits(Op.getOperand(i).getValueType())/8;

Chris Lattner

c04ba7a

2006-05-16 23:54:25 +0000

[diff] [blame]

916

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

917

// If we are calling what looks like a varargs function on the caller side,

918

// there are two cases:

919

// 1) The callee uses va_start.

920

// 2) The callee doesn't use va_start.

921

//

922

// In the case of #1, the prolog code will store up to 8 GPR argument

923

// registers to the stack, allowing va_start to index over them in memory.

924

// Because we cannot tell the difference (on the caller side) between #1/#2,

925

// we have to conservatively assume we have #1. As such, make sure we have

926

// at least enough stack space for the caller to store the 8 GPRs.

927

if (isVarArg && Op.getNumOperands() > 5 && NumBytes < 56)

928

NumBytes = 56;

929

930

// Adjust the stack pointer for the new arguments...

931

// These operations are automatically eliminated by the prolog/epilog pass

932

Chain = DAG.getCALLSEQ_START(Chain,

933

DAG.getConstant(NumBytes, MVT::i32));

934

935

// Set up a copy of the stack pointer for use loading and storing any

936

// arguments that may not fit in the registers available for argument

937

// passing.

938

SDOperand StackPtr = DAG.getRegister(PPC::R1, MVT::i32);

939

940

// Figure out which arguments are going to go in registers, and which in

941

// memory. Also, if this is a vararg function, floating point operations

942

// must be stored to our stack, and loaded into integer regs as well, if

943

// any integer regs are available for argument passing.

944

unsigned ArgOffset = 24;

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

945

unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

946

static const unsigned GPR[] = {

947

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

948

PPC::R7, PPC::R8, PPC::R9, PPC::R10,

949

};

950

static const unsigned FPR[] = {

951

PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,

952

PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13

953

};

954

static const unsigned VR[] = {

955

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

956

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

957

};

958

const unsigned NumGPRs = sizeof(GPR)/sizeof(GPR[0]);

959

const unsigned NumFPRs = sizeof(FPR)/sizeof(FPR[0]);

960

const unsigned NumVRs = sizeof( VR)/sizeof( VR[0]);

961

962

std::vector<std::pair<unsigned, SDOperand> > RegsToPass;

963

std::vector<SDOperand> MemOpChains;

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

964

for (unsigned i = 5, e = Op.getNumOperands(); i != e; ++i) {

965

SDOperand Arg = Op.getOperand(i);

966

967

// PtrOff will be used to store the current argument to the stack if a

968

// register cannot be found for it.

969

SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());

970

PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);

971

switch (Arg.getValueType()) {

972

default: assert(0 && "Unexpected ValueType for argument!");

973

case MVT::i32:

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

974

if (GPR_idx != NumGPRs) {

975

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

976

} else {

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

977

MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,

978

Arg, PtrOff, DAG.getSrcValue(NULL)));

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

}

ArgOffset += 4;

break;

case MVT::f32:

case MVT::f64:

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

984

if (FPR_idx != NumFPRs) {

985

RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));

986

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

987

if (isVarArg) {

988

SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain,

989

Arg, PtrOff,

990

DAG.getSrcValue(NULL));

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

991

MemOpChains.push_back(Store);

992

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

993

// Float varargs are always shadowed in available integer registers

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

994

if (GPR_idx != NumGPRs) {

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

995

SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,

996

DAG.getSrcValue(NULL));

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

997

MemOpChains.push_back(Load.getValue(1));

998

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

999

}

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1000

if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64) {

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

1001

SDOperand ConstFour = DAG.getConstant(4, PtrOff.getValueType());

1002

PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);

1003

SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,

1004

DAG.getSrcValue(NULL));

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1005

MemOpChains.push_back(Load.getValue(1));

1006

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1007

}

1008

} else {

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

1009

// If we have any FPRs remaining, we may also have GPRs remaining.

1010

// Args passed in FPRs consume either 1 (f32) or 2 (f64) available

1011

// GPRs.

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1012

if (GPR_idx != NumGPRs)

1013

++GPR_idx;

1014

if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64)

1015

++GPR_idx;

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1016

}

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

1017

} else {

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1018

MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,

1019

Arg, PtrOff, DAG.getSrcValue(NULL)));

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1020

}

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

1021

ArgOffset += (Arg.getValueType() == MVT::f32) ? 4 : 8;

break;

case MVT::v4f32:

case MVT::v4i32:

case MVT::v8i16:

case MVT::v16i8:

assert(!isVarArg && "Don't support passing vectors to varargs yet!");

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1028

assert(VR_idx != NumVRs &&

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

1029

"Don't support passing more than 12 vector args yet!");

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1030

RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

1031

break;

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1032

}

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1033

}

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1034

if (!MemOpChains.empty())

1035

Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOpChains);

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1036

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1037

// Build a sequence of copy-to-reg nodes chained together with token chain

1038

// and flag operands which copy the outgoing args into the appropriate regs.

1039

SDOperand InFlag;

1040

for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {

1041

Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,

1042

InFlag);

1043

InFlag = Chain.getValue(1);

1044

}

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1045

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

1046

std::vector<MVT::ValueType> NodeTys;

1047

1048

// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every

1049

// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol

1050

// node so that legalize doesn't hack it.

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1051

if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1052

Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

1053

else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))

1054

Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());

1055

else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))

1056

// If this is an absolute destination address, use the munged value.

1057

Callee = SDOperand(Dest, 0);

1058

else {

1059

// Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair

1060

// to do the call, we can't use PPCISD::CALL.

1061

std::vector<SDOperand> Ops;

1062

Ops.push_back(Chain);

1063

Ops.push_back(Callee);

1064

NodeTys.push_back(MVT::Other);

1065

NodeTys.push_back(MVT::Flag);

1066

1067

if (InFlag.Val)

1068

Ops.push_back(InFlag);

1069

Chain = DAG.getNode(PPCISD::MTCTR, NodeTys, Ops);

1070

InFlag = Chain.getValue(1);

1071

1072

// Copy the callee address into R12 on darwin.

1073

Chain = DAG.getCopyToReg(Chain, PPC::R12, Callee, InFlag);

1074

InFlag = Chain.getValue(1);

1075

1076

NodeTys.clear();

1077

NodeTys.push_back(MVT::Other);

1078

NodeTys.push_back(MVT::Flag);

1079

Ops.clear();

1080

Ops.push_back(Chain);

1081

Ops.push_back(InFlag);

1082

Chain = DAG.getNode(PPCISD::BCTRL, NodeTys, Ops);

1083

InFlag = Chain.getValue(1);

1084

Callee.Val = 0;

1085

}

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1086

1087

// Create the PPCISD::CALL node itself.

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

1088

if (Callee.Val) {

1089

NodeTys.push_back(MVT::Other); // Returns a chain

1090

NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.

1091

std::vector<SDOperand> Ops;

1092

Ops.push_back(Chain);

1093

Ops.push_back(Callee);

1094

if (InFlag.Val)

1095

Ops.push_back(InFlag);

1096

Chain = DAG.getNode(PPCISD::CALL, NodeTys, Ops);

1097

InFlag = Chain.getValue(1);

1098

}

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1099

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1100

std::vector<SDOperand> ResultVals;

1101

NodeTys.clear();

1102

1103

// If the call has results, copy the values out of the ret val registers.

1104

switch (Op.Val->getValueType(0)) {

1105

default: assert(0 && "Unexpected ret value!");

1106

case MVT::Other: break;

1107

case MVT::i32:

1108

if (Op.Val->getValueType(1) == MVT::i32) {

1109

Chain = DAG.getCopyFromReg(Chain, PPC::R4, MVT::i32, InFlag).getValue(1);

1110

ResultVals.push_back(Chain.getValue(0));

1111

Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32,

1112

Chain.getValue(2)).getValue(1);

1113

ResultVals.push_back(Chain.getValue(0));

1114

NodeTys.push_back(MVT::i32);

1115

} else {

1116

Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32, InFlag).getValue(1);

1117

ResultVals.push_back(Chain.getValue(0));

1118

}

1119

NodeTys.push_back(MVT::i32);

break;

case MVT::f32:

case MVT::f64:

Chain = DAG.getCopyFromReg(Chain, PPC::F1, Op.Val->getValueType(0),

1124

InFlag).getValue(1);

1125

ResultVals.push_back(Chain.getValue(0));

1126

NodeTys.push_back(Op.Val->getValueType(0));

break;

case MVT::v4f32:

case MVT::v4i32:

case MVT::v8i16:

case MVT::v16i8:

Chain = DAG.getCopyFromReg(Chain, PPC::V2, Op.Val->getValueType(0),

1133

InFlag).getValue(1);

1134

ResultVals.push_back(Chain.getValue(0));

1135

NodeTys.push_back(Op.Val->getValueType(0));

break;

}

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1139

Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,

1140

DAG.getConstant(NumBytes, MVT::i32));

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1141

NodeTys.push_back(MVT::Other);

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1142

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

1143

// If the function returns void, just return the chain.

1144

if (ResultVals.empty())

1145

return Chain;

1146

1147

// Otherwise, merge everything together with a MERGE_VALUES node.

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1148

ResultVals.push_back(Chain);

1149

SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, ResultVals);

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1150

return Res.getValue(Op.ResNo);

1151

}

1152

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1153

static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) {

1154

SDOperand Copy;

1155

switch(Op.getNumOperands()) {

1156

default:

1157

assert(0 && "Do not know how to return this many arguments!");

1158

abort();

1159

case 1:

1160

return SDOperand(); // ret void is legal

1161

case 2: {

1162

MVT::ValueType ArgVT = Op.getOperand(1).getValueType();

1163

unsigned ArgReg;

1164

if (MVT::isVector(ArgVT))

1165

ArgReg = PPC::V2;

1166

else if (MVT::isInteger(ArgVT))

1167

ArgReg = PPC::R3;

1168

else {

1169

assert(MVT::isFloatingPoint(ArgVT));

ArgReg = PPC::F1;

}

Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1),

1174

SDOperand());

1175

1176

// If we haven't noted the R3/F1 are live out, do so now.

1177

if (DAG.getMachineFunction().liveout_empty())

1178

DAG.getMachineFunction().addLiveOut(ArgReg);

break;

}

case 3:

Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(2),

1183

SDOperand());

1184

Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1));

1185

// If we haven't noted the R3+R4 are live out, do so now.

1186

if (DAG.getMachineFunction().liveout_empty()) {

1187

DAG.getMachineFunction().addLiveOut(PPC::R3);

1188

DAG.getMachineFunction().addLiveOut(PPC::R4);

}

break;

}

return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));

1193

}

1194

1195

/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when

1196

/// possible.

1197

static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) {

1198

// Not FP? Not a fsel.

1199

if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||

1200

!MVT::isFloatingPoint(Op.getOperand(2).getValueType()))

1201

return SDOperand();

1202

1203

ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();

1204

1205

// Cannot handle SETEQ/SETNE.

1206

if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDOperand();

1207

1208

MVT::ValueType ResVT = Op.getValueType();

1209

MVT::ValueType CmpVT = Op.getOperand(0).getValueType();

1210

SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);

1211

SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3);

1212

1213

// If the RHS of the comparison is a 0.0, we don't need to do the

1214

// subtraction at all.

1215

if (isFloatingPointZero(RHS))

1216

switch (CC) {

1217

default: break; // SETUO etc aren't handled by fsel.

1218

case ISD::SETULT:

Chris Lattner

2006-05-24 00:06:44 +0000

[diff] [blame^]

1219

case ISD::SETOLT:

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1220

case ISD::SETLT:

1221

std::swap(TV, FV); // fsel is natively setge, swap operands for setlt

1222

case ISD::SETUGE:

Chris Lattner

2006-05-24 00:06:44 +0000

[diff] [blame^]

1223

case ISD::SETOGE:

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1224

case ISD::SETGE:

1225

if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits

1226

LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);

1227

return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV);

1228

case ISD::SETUGT:

Chris Lattner

2006-05-24 00:06:44 +0000

[diff] [blame^]

1229

case ISD::SETOGT:

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1230

case ISD::SETGT:

1231

std::swap(TV, FV); // fsel is natively setge, swap operands for setlt

1232

case ISD::SETULE:

Chris Lattner

2006-05-24 00:06:44 +0000

[diff] [blame^]

1233

case ISD::SETOLE:

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1234

case ISD::SETLE:

1235

if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits

1236

LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);

1237

return DAG.getNode(PPCISD::FSEL, ResVT,

1238

DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV);

}

SDOperand Cmp;

switch (CC) {

default: break; // SETUO etc aren't handled by fsel.

1244

case ISD::SETULT:

Chris Lattner

2006-05-24 00:06:44 +0000

[diff] [blame^]

1245

case ISD::SETOLT:

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1246

case ISD::SETLT:

1247

Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);

1248

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

1249

Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);

1250

return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);

1251

case ISD::SETUGE:

Chris Lattner

2006-05-24 00:06:44 +0000

[diff] [blame^]

1252

case ISD::SETOGE:

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1253

case ISD::SETGE:

1254

Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);

1255

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

1256

Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);

1257

return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);

1258

case ISD::SETUGT:

Chris Lattner

2006-05-24 00:06:44 +0000

[diff] [blame^]

1259

case ISD::SETOGT:

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1260

case ISD::SETGT:

1261

Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);

1262

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

1263

Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);

1264

return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);

1265

case ISD::SETULE:

Chris Lattner

2006-05-24 00:06:44 +0000

[diff] [blame^]

1266

case ISD::SETOLE:

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1267

case ISD::SETLE:

1268

Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);

1269

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

1270

Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);

1271

return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);

}

return SDOperand();

}

static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {

1277

assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));

1278

SDOperand Src = Op.getOperand(0);

1279

if (Src.getValueType() == MVT::f32)

1280

Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);

1281

1282

SDOperand Tmp;

1283

switch (Op.getValueType()) {

1284

default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");

1285

case MVT::i32:

1286

Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);

1287

break;

1288

case MVT::i64:

1289

Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src);

break;

}

// Convert the FP value to an int value through memory.

1294

SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp);

1295

if (Op.getValueType() == MVT::i32)

1296

Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits);

return Bits;

}

static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {

1301

if (Op.getOperand(0).getValueType() == MVT::i64) {

1302

SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));

1303

SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);

1304

if (Op.getValueType() == MVT::f32)

1305

FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);

return FP;

}

assert(Op.getOperand(0).getValueType() == MVT::i32 &&

1310

"Unhandled SINT_TO_FP type in custom expander!");

1311

// Since we only generate this in 64-bit mode, we can take advantage of

1312

// 64-bit registers. In particular, sign extend the input value into the

1313

// 64-bit register with extsw, store the WHOLE 64-bit value into the stack

1314

// then lfd it and fcfid it.

1315

MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();

1316

int FrameIdx = FrameInfo->CreateStackObject(8, 8);

1317

SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);

1318

1319

SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32,

1320

Op.getOperand(0));

1321

1322

// STD the extended value into the stack slot.

1323

SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other,

1324

DAG.getEntryNode(), Ext64, FIdx,

1325

DAG.getSrcValue(NULL));

1326

// Load the value as a double.

1327

SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL));

1328

1329

// FCFID it and return it.

1330

SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld);

1331

if (Op.getValueType() == MVT::f32)

1332

FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);

return FP;

}

static SDOperand LowerSHL(SDOperand Op, SelectionDAG &DAG) {

1337

assert(Op.getValueType() == MVT::i64 &&

1338

Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");

1339

// The generic code does a fine job expanding shift by a constant.

1340

if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();

1341

1342

// Otherwise, expand into a bunch of logical ops. Note that these ops

1343

// depend on the PPC behavior for oversized shift amounts.

1344

SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),

1345

DAG.getConstant(0, MVT::i32));

1346

SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),

1347

DAG.getConstant(1, MVT::i32));

1348

SDOperand Amt = Op.getOperand(1);

1349

1350

SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,

1351

DAG.getConstant(32, MVT::i32), Amt);

1352

SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt);

1353

SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1);

1354

SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);

1355

SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,

1356

DAG.getConstant(-32U, MVT::i32));

1357

SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5);

1358

SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);

1359

SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt);

1360

return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);

1361

}

1362

1363

static SDOperand LowerSRL(SDOperand Op, SelectionDAG &DAG) {

1364

assert(Op.getValueType() == MVT::i64 &&

1365

Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");

1366

// The generic code does a fine job expanding shift by a constant.

1367

if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();

1368

1369

// Otherwise, expand into a bunch of logical ops. Note that these ops

1370

// depend on the PPC behavior for oversized shift amounts.

1371

SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),

1372

DAG.getConstant(0, MVT::i32));

1373

SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),

1374

DAG.getConstant(1, MVT::i32));

1375

SDOperand Amt = Op.getOperand(1);

1376

1377

SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,

1378

DAG.getConstant(32, MVT::i32), Amt);

1379

SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);

1380

SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);

1381

SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);

1382

SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,

1383

DAG.getConstant(-32U, MVT::i32));

1384

SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5);

1385

SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);

1386

SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt);

1387

return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);

1388

}

1389

1390

static SDOperand LowerSRA(SDOperand Op, SelectionDAG &DAG) {

1391

assert(Op.getValueType() == MVT::i64 &&

1392

Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!");

1393

// The generic code does a fine job expanding shift by a constant.

1394

if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();

1395

1396

// Otherwise, expand into a bunch of logical ops, followed by a select_cc.

1397

SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),

1398

DAG.getConstant(0, MVT::i32));

1399

SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),

1400

DAG.getConstant(1, MVT::i32));

1401

SDOperand Amt = Op.getOperand(1);

1402

1403

SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,

1404

DAG.getConstant(32, MVT::i32), Amt);

1405

SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);

1406

SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);

1407

SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);

1408

SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,

1409

DAG.getConstant(-32U, MVT::i32));

1410

SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5);

1411

SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt);

1412

SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32),

1413

Tmp4, Tmp6, ISD::SETLE);

1414

return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);

1415

}

1416

1417

//===----------------------------------------------------------------------===//

1418

// Vector related lowering.

1419

//

1420

Chris Lattner

ac225ca

2006-04-12 19:07:14 +0000

[diff] [blame]

1421

// If this is a vector of constants or undefs, get the bits. A bit in

1422

// UndefBits is set if the corresponding element of the vector is an

1423

// ISD::UNDEF value. For undefs, the corresponding VectorBits values are

1424

// zero. Return true if this is not an array of constants, false if it is.

1425

//

Chris Lattner

ac225ca

2006-04-12 19:07:14 +0000

[diff] [blame]

1426

static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],

1427

uint64_t UndefBits[2]) {

1428

// Start with zero'd results.

1429

VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;

1430

1431

unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());

1432

for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {

1433

SDOperand OpVal = BV->getOperand(i);

1434

1435

unsigned PartNo = i >= e/2; // In the upper 128 bits?

Chris Lattner

2006-04-16 01:01:29 +0000

[diff] [blame]

1436

unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.

Chris Lattner

ac225ca

2006-04-12 19:07:14 +0000

[diff] [blame]

1437

1438

uint64_t EltBits = 0;

1439

if (OpVal.getOpcode() == ISD::UNDEF) {

1440

uint64_t EltUndefBits = ~0U >> (32-EltBitSize);

1441

UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);

1442

continue;

1443

} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {

1444

EltBits = CN->getValue() & (~0U >> (32-EltBitSize));

1445

} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {

1446

assert(CN->getValueType(0) == MVT::f32 &&

1447

"Only one legal FP vector type!");

1448

EltBits = FloatToBits(CN->getValue());

1449

} else {

1450

// Nonconstant element.

return true;

}

VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);

1455

}

1456

1457

//printf("%llx %llx %llx %llx\n",

1458

// VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);

1459

return false;

1460

}

Chris Lattner

2006-03-20 06:33:01 +0000

[diff] [blame]

1461

Chris Lattner

2006-04-16 01:01:29 +0000

[diff] [blame]

1462

// If this is a splat (repetition) of a value across the whole vector, return

1463

// the smallest size that splats it. For example, "0x01010101010101..." is a

1464

// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and

1465

// SplatSize = 1 byte.

1466

static bool isConstantSplat(const uint64_t Bits128[2],

1467

const uint64_t Undef128[2],

1468

unsigned &SplatBits, unsigned &SplatUndef,

1469

unsigned &SplatSize) {

1470

1471

// Don't let undefs prevent splats from matching. See if the top 64-bits are

1472

// the same as the lower 64-bits, ignoring undefs.

1473

if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0]))

1474

return false; // Can't be a splat if two pieces don't match.

1475

1476

uint64_t Bits64 = Bits128[0] | Bits128[1];

1477

uint64_t Undef64 = Undef128[0] & Undef128[1];

1478

1479

// Check that the top 32-bits are the same as the lower 32-bits, ignoring

1480

// undefs.

1481

if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64))

1482

return false; // Can't be a splat if two pieces don't match.

1483

1484

uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);

1485

uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);

1486

1487

// If the top 16-bits are different than the lower 16-bits, ignoring

1488

// undefs, we have an i32 splat.

1489

if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) {

1490

SplatBits = Bits32;

1491

SplatUndef = Undef32;

SplatSize = 4;

return true;

}

uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);

1497

uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);

1498

1499

// If the top 8-bits are different than the lower 8-bits, ignoring

1500

// undefs, we have an i16 splat.

1501

if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) {

1502

SplatBits = Bits16;

1503

SplatUndef = Undef16;

SplatSize = 2;

return true;

}

// Otherwise, we have an 8-bit splat.

1509

SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);

1510

SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);

SplatSize = 1;

return true;

}

Chris Lattner

2006-04-17 06:00:21 +0000

[diff] [blame]

1515

/// BuildSplatI - Build a canonical splati of Val with an element size of

1516

/// SplatSize. Cast the result to VT.

1517

static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT::ValueType VT,

1518

SelectionDAG &DAG) {

1519

assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1520

1521

// Force vspltis[hw] -1 to vspltisb -1.

1522

if (Val == -1) SplatSize = 1;

1523

Chris Lattner

4a998b9

2006-04-17 06:00:21 +0000

[diff] [blame]

1524

static const MVT::ValueType VTys[] = { // canonical VT to use for each size.

1525

MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32

1526

};

1527

MVT::ValueType CanonicalVT = VTys[SplatSize-1];

1528

1529

// Build a canonical splat for this value.

1530

SDOperand Elt = DAG.getConstant(Val, MVT::getVectorBaseType(CanonicalVT));

1531

std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt);

1532

SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops);

1533

return DAG.getNode(ISD::BIT_CONVERT, VT, Res);

1534

}

1535

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

1536

/// BuildIntrinsicOp - Return a binary operator intrinsic node with the

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1537

/// specified intrinsic ID.

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

1538

static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS,

1539

SelectionDAG &DAG,

1540

MVT::ValueType DestVT = MVT::Other) {

1541

if (DestVT == MVT::Other) DestVT = LHS.getValueType();

1542

return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1543

DAG.getConstant(IID, MVT::i32), LHS, RHS);

1544

}

1545

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

1546

/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the

1547

/// specified intrinsic ID.

1548

static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1,

1549

SDOperand Op2, SelectionDAG &DAG,

1550

MVT::ValueType DestVT = MVT::Other) {

1551

if (DestVT == MVT::Other) DestVT = Op0.getValueType();

1552

return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,

1553

DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);

}

Chris Lattner

2006-04-17 17:55:10 +0000

[diff] [blame]

1557

/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified

1558

/// amount. The result has the specified value type.

1559

static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt,

1560

MVT::ValueType VT, SelectionDAG &DAG) {

1561

// Force LHS/RHS to be the right type.

1562

LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS);

1563

RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS);

1564

1565

std::vector<SDOperand> Ops;

1566

for (unsigned i = 0; i != 16; ++i)

1567

Ops.push_back(DAG.getConstant(i+Amt, MVT::i32));

1568

SDOperand T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS,

1569

DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops));

1570

return DAG.getNode(ISD::BIT_CONVERT, VT, T);

1571

}

1572

Chris Lattner

2006-04-14 05:19:18 +0000

[diff] [blame]

1573

// If this is a case we can't handle, return null and let the default

1574

// expansion code take care of it. If we CAN select this case, and if it

1575

// selects to a single instruction, return Op. Otherwise, if we can codegen

1576

// this case more efficiently than a constant pool load, lower it to the

1577

// sequence of ops that should be used.

1578

static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {

1579

// If this is a vector of constants or undefs, get the bits. A bit in

1580

// UndefBits is set if the corresponding element of the vector is an

1581

// ISD::UNDEF value. For undefs, the corresponding VectorBits values are

1582

// zero.

1583

uint64_t VectorBits[2];

1584

uint64_t UndefBits[2];

1585

if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits))

1586

return SDOperand(); // Not a constant vector.

1587

Chris Lattner

2006-04-16 01:01:29 +0000

[diff] [blame]

1588

// If this is a splat (repetition) of a value across the whole vector, return

1589

// the smallest size that splats it. For example, "0x01010101010101..." is a

1590

// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and

1591

// SplatSize = 1 byte.

1592

unsigned SplatBits, SplatUndef, SplatSize;

1593

if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){

1594

bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0;

1595

1596

// First, handle single instruction cases.

1597

1598

// All zeros?

1599

if (SplatBits == 0) {

1600

// Canonicalize all zero vectors to be v4i32.

1601

if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {

1602

SDOperand Z = DAG.getConstant(0, MVT::i32);

1603

Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z);

1604

Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z);

1605

}

1606

return Op;

Chris Lattner

2006-04-14 05:19:18 +0000

[diff] [blame]

1607

}

Chris Lattner

2006-04-16 01:01:29 +0000

[diff] [blame]

1608

1609

// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].

1610

int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize);

Chris Lattner

4a998b9

2006-04-17 06:00:21 +0000

[diff] [blame]

1611

if (SextVal >= -16 && SextVal <= 15)

1612

return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG);

Chris Lattner

2006-04-16 01:01:29 +0000

[diff] [blame]

1613

Chris Lattner

dbce85d

2006-04-17 18:09:22 +0000

[diff] [blame]

1614

1615

// Two instruction sequences.

1616

Chris Lattner

4a998b9

2006-04-17 06:00:21 +0000

[diff] [blame]

1617

// If this value is in the range [-32,30] and is even, use:

1618

// tmp = VSPLTI[bhw], result = add tmp, tmp

1619

if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {

1620

Op = BuildSplatI(SextVal >> 1, SplatSize, Op.getValueType(), DAG);

1621

return DAG.getNode(ISD::ADD, Op.getValueType(), Op, Op);

1622

}

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1623

1624

// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is

1625

// 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important

1626

// for fneg/fabs.

1627

if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {

1628

// Make -1 and vspltisw -1:

1629

SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG);

1630

1631

// Make the VSLW intrinsic, computing 0x8000_0000.

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

1632

SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,

1633

OnesV, DAG);

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1634

1635

// xor by OnesV to invert it.

1636

Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV);

1637

return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);

1638

}

1639

1640

// Check to see if this is a wide variety of vsplti*, binop self cases.

1641

unsigned SplatBitSize = SplatSize*8;

1642

static const char SplatCsts[] = {

1643

-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,

Chris Lattner

dbce85d

2006-04-17 18:09:22 +0000

[diff] [blame]

1644

-8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1645

};

1646

for (unsigned idx = 0; idx < sizeof(SplatCsts)/sizeof(SplatCsts[0]); ++idx){

1647

// Indirect through the SplatCsts array so that we favor 'vsplti -1' for

1648

// cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'

1649

int i = SplatCsts[idx];

1650

1651

// Figure out what shift amount will be used by altivec if shifted by i in

1652

// this splat size.

1653

unsigned TypeShiftAmt = i & (SplatBitSize-1);

1654

1655

// vsplti + shl self.

1656

if (SextVal == (i << (int)TypeShiftAmt)) {

1657

Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);

1658

static const unsigned IIDs[] = { // Intrinsic to use for each size.

1659

Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,

1660

Intrinsic::ppc_altivec_vslw

1661

};

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

1662

return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1663

}

1664

1665

// vsplti + srl self.

1666

if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {

1667

Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);

1668

static const unsigned IIDs[] = { // Intrinsic to use for each size.

1669

Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,

1670

Intrinsic::ppc_altivec_vsrw

1671

};

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

1672

return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1673

}

1674

1675

// vsplti + sra self.

1676

if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {

1677

Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);

1678

static const unsigned IIDs[] = { // Intrinsic to use for each size.

1679

Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,

1680

Intrinsic::ppc_altivec_vsraw

1681

};

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

1682

return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1683

}

1684

Chris Lattner

2006-04-17 17:55:10 +0000

[diff] [blame]

1685

// vsplti + rol self.

1686

if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |

1687

((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {

1688

Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);

1689

static const unsigned IIDs[] = { // Intrinsic to use for each size.

1690

Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,

1691

Intrinsic::ppc_altivec_vrlw

1692

};

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

1693

return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);

Chris Lattner

2006-04-17 17:55:10 +0000

[diff] [blame]

1694

}

1695

1696

// t = vsplti c, result = vsldoi t, t, 1

1697

if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {

1698

SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);

1699

return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG);

1700

}

1701

// t = vsplti c, result = vsldoi t, t, 2

1702

if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {

1703

SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);

1704

return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG);

1705

}

1706

// t = vsplti c, result = vsldoi t, t, 3

1707

if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {

1708

SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);

1709

return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG);

1710

}

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1711

}

1712

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1713

// Three instruction sequences.

1714

Chris Lattner

dbce85d

2006-04-17 18:09:22 +0000

[diff] [blame]

1715

// Odd, in range [17,31]: (vsplti C)-(vsplti -16).

1716

if (SextVal >= 0 && SextVal <= 31) {

1717

SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, Op.getValueType(),DAG);

1718

SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG);

1719

return DAG.getNode(ISD::SUB, Op.getValueType(), LHS, RHS);

1720

}

1721

// Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).

1722

if (SextVal >= -31 && SextVal <= 0) {

1723

SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, Op.getValueType(),DAG);

1724

SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG);

Chris Lattner

c408382

2006-04-17 06:07:44 +0000

[diff] [blame]

1725

return DAG.getNode(ISD::ADD, Op.getValueType(), LHS, RHS);

Chris Lattner

2006-04-14 05:19:18 +0000

[diff] [blame]

1726

}

1727

}

Chris Lattner

2006-04-16 01:01:29 +0000

[diff] [blame]

1728

Chris Lattner

2006-04-14 05:19:18 +0000

[diff] [blame]

return SDOperand();

}

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

1732

/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit

1733

/// the specified operations to build the shuffle.

1734

static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS,

1735

SDOperand RHS, SelectionDAG &DAG) {

1736

unsigned OpNum = (PFEntry >> 26) & 0x0F;

1737

unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);

1738

unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);

1739

1740

enum {

Chris Lattner

00402c7

2006-05-16 04:20:24 +0000

[diff] [blame]

1741

OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

OP_VMRGHW,

OP_VMRGLW,

OP_VSPLTISW0,

OP_VSPLTISW1,

OP_VSPLTISW2,

OP_VSPLTISW3,

OP_VSLDOI4,

OP_VSLDOI8,

OP_VSLDOI12,

};

if (OpNum == OP_COPY) {

1754

if (LHSID == (1*9+2)*9+3) return LHS;

1755

assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");

return RHS;

}

Chris Lattner

2006-04-17 17:55:10 +0000

[diff] [blame]

1759

SDOperand OpLHS, OpRHS;

1760

OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG);

1761

OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG);

1762

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

1763

unsigned ShufIdxs[16];

1764

switch (OpNum) {

1765

default: assert(0 && "Unknown i32 permute!");

1766

case OP_VMRGHW:

1767

ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;

1768

ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;

1769

ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;

1770

ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;

1771

break;

1772

case OP_VMRGLW:

1773

ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;

1774

ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;

1775

ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;

1776

ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;

1777

break;

1778

case OP_VSPLTISW0:

1779

for (unsigned i = 0; i != 16; ++i)

1780

ShufIdxs[i] = (i&3)+0;

1781

break;

1782

case OP_VSPLTISW1:

1783

for (unsigned i = 0; i != 16; ++i)

1784

ShufIdxs[i] = (i&3)+4;

1785

break;

1786

case OP_VSPLTISW2:

1787

for (unsigned i = 0; i != 16; ++i)

1788

ShufIdxs[i] = (i&3)+8;

1789

break;

1790

case OP_VSPLTISW3:

1791

for (unsigned i = 0; i != 16; ++i)

1792

ShufIdxs[i] = (i&3)+12;

1793

break;

1794

case OP_VSLDOI4:

Chris Lattner

2006-04-17 17:55:10 +0000

[diff] [blame]

1795

return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG);

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

1796

case OP_VSLDOI8:

Chris Lattner

2006-04-17 17:55:10 +0000

[diff] [blame]

1797

return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG);

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

1798

case OP_VSLDOI12:

Chris Lattner

2006-04-17 17:55:10 +0000

[diff] [blame]

1799

return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG);

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

1800

}

1801

std::vector<SDOperand> Ops;

1802

for (unsigned i = 0; i != 16; ++i)

1803

Ops.push_back(DAG.getConstant(ShufIdxs[i], MVT::i32));

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

1804

1805

return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS,

1806

DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops));

1807

}

1808

Chris Lattner

2006-04-14 05:19:18 +0000

[diff] [blame]

1809

/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this

1810

/// is a shuffle we can handle in a single instruction, return it. Otherwise,

1811

/// return the code it can be lowered into. Worst case, it can always be

1812

/// lowered into a vperm.

1813

static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {

1814

SDOperand V1 = Op.getOperand(0);

1815

SDOperand V2 = Op.getOperand(1);

1816

SDOperand PermMask = Op.getOperand(2);

1817

1818

// Cases that are handled by instructions that take permute immediates

1819

// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be

1820

// selected by the instruction selector.

1821

if (V2.getOpcode() == ISD::UNDEF) {

1822

if (PPC::isSplatShuffleMask(PermMask.Val, 1) ||

1823

PPC::isSplatShuffleMask(PermMask.Val, 2) ||

1824

PPC::isSplatShuffleMask(PermMask.Val, 4) ||

1825

PPC::isVPKUWUMShuffleMask(PermMask.Val, true) ||

1826

PPC::isVPKUHUMShuffleMask(PermMask.Val, true) ||

1827

PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 ||

1828

PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) ||

1829

PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) ||

1830

PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) ||

1831

PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) ||

1832

PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) ||

1833

PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) {

return Op;

}

}

// Altivec has a variety of "shuffle immediates" that take two vector inputs

1839

// and produce a fixed permutation. If any of these match, do not lower to

1840

// VPERM.

1841

if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) ||

1842

PPC::isVPKUHUMShuffleMask(PermMask.Val, false) ||

1843

PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 ||

1844

PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) ||

1845

PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) ||

1846

PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) ||

1847

PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) ||

1848

PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) ||

1849

PPC::isVMRGHShuffleMask(PermMask.Val, 4, false))

1850

return Op;

1851

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

1852

// Check to see if this is a shuffle of 4-byte values. If so, we can use our

1853

// perfect shuffle table to emit an optimal matching sequence.

1854

unsigned PFIndexes[4];

1855

bool isFourElementShuffle = true;

1856

for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number

1857

unsigned EltNo = 8; // Start out undef.

1858

for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.

1859

if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF)

1860

continue; // Undef, ignore it.

1861

1862

unsigned ByteSource =

1863

cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue();

1864

if ((ByteSource & 3) != j) {

1865

isFourElementShuffle = false;

break;

}

if (EltNo == 8) {

EltNo = ByteSource/4;

1871

} else if (EltNo != ByteSource/4) {

1872

isFourElementShuffle = false;

break;

}

}

PFIndexes[i] = EltNo;

1877

}

1878

1879

// If this shuffle can be expressed as a shuffle of 4-byte elements, use the

1880

// perfect shuffle vector to determine if it is cost effective to do this as

1881

// discrete instructions, or whether we should use a vperm.

1882

if (isFourElementShuffle) {

1883

// Compute the index in the perfect shuffle table.

1884

unsigned PFTableIndex =

1885

PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];

1886

1887

unsigned PFEntry = PerfectShuffleTable[PFTableIndex];

1888

unsigned Cost = (PFEntry >> 30);

1889

1890

// Determining when to avoid vperm is tricky. Many things affect the cost

1891

// of vperm, particularly how many times the perm mask needs to be computed.

1892

// For example, if the perm mask can be hoisted out of a loop or is already

1893

// used (perhaps because there are multiple permutes with the same shuffle

1894

// mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of

1895

// the loop requires an extra register.

1896

//

1897

// As a compromise, we only emit discrete instructions if the shuffle can be

1898

// generated in 3 or fewer operations. When we have loop information

1899

// available, if this block is within a loop, we should avoid using vperm

1900

// for 3-operation perms and use a constant pool load instead.

1901

if (Cost < 3)

1902

return GeneratePerfectShuffle(PFEntry, V1, V2, DAG);

1903

}

Chris Lattner

2006-04-14 05:19:18 +0000

[diff] [blame]

1904

1905

// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant

1906

// vector that will get spilled to the constant pool.

1907

if (V2.getOpcode() == ISD::UNDEF) V2 = V1;

1908

1909

// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except

1910

// that it is in input element units, not in bytes. Convert now.

1911

MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType());

1912

unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;

1913

1914

std::vector<SDOperand> ResultMask;

1915

for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {

Chris Lattner

730b456

2006-04-15 23:48:05 +0000

[diff] [blame]

1916

unsigned SrcElt;

1917

if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)

1918

SrcElt = 0;

1919

else

1920

SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();

Chris Lattner

2006-04-14 05:19:18 +0000

[diff] [blame]

1921

1922

for (unsigned j = 0; j != BytesPerElement; ++j)

1923

ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,

MVT::i8));

}

SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask);

1928

return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);

1929

}

1930

Chris Lattner

2006-04-18 17:59:36 +0000

[diff] [blame]

1931

/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an

1932

/// altivec comparison. If it is, return true and fill in Opc/isDot with

1933

/// information about the intrinsic.

1934

static bool getAltivecCompareInfo(SDOperand Intrin, int &CompareOpc,

1935

bool &isDot) {

1936

unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getValue();

1937

CompareOpc = -1;

1938

isDot = false;

1939

switch (IntrinsicID) {

1940

default: return false;

1941

// Comparison predicates.

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1942

case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;

1943

case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;

1944

case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;

1945

case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;

1946

case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;

1947

case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;

1948

case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;

1949

case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;

1950

case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;

1951

case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;

1952

case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;

1953

case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;

1954

case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;

1955

1956

// Normal Comparisons.

1957

case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;

1958

case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;

1959

case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;

1960

case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;

1961

case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;

1962

case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;

1963

case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;

1964

case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;

1965

case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;

1966

case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;

1967

case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;

1968

case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;

1969

case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;

1970

}

Chris Lattner

2006-04-18 17:59:36 +0000

[diff] [blame]

return true;

}

/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom

1975

/// lower, do it, otherwise return null.

1976

static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {

1977

// If this is a lowered altivec predicate compare, CompareOpc is set to the

1978

// opcode number of the comparison.

1979

int CompareOpc;

1980

bool isDot;

1981

if (!getAltivecCompareInfo(Op, CompareOpc, isDot))

1982

return SDOperand(); // Don't custom lower most intrinsics.

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1983

Chris Lattner

2006-04-18 17:59:36 +0000

[diff] [blame]

1984

// If this is a non-dot comparison, make the VCMP node and we are done.

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1985

if (!isDot) {

1986

SDOperand Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(),

1987

Op.getOperand(1), Op.getOperand(2),

1988

DAG.getConstant(CompareOpc, MVT::i32));

1989

return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp);

1990

}

1991

1992

// Create the PPCISD altivec 'dot' comparison node.

1993

std::vector<SDOperand> Ops;

1994

std::vector<MVT::ValueType> VTs;

1995

Ops.push_back(Op.getOperand(2)); // LHS

1996

Ops.push_back(Op.getOperand(3)); // RHS

1997

Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32));

1998

VTs.push_back(Op.getOperand(2).getValueType());

1999

VTs.push_back(MVT::Flag);

2000

SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops);

2001

2002

// Now that we have the comparison, emit a copy from the CR to a GPR.

2003

// This is flagged to the above dot comparison.

2004

SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32,

2005

DAG.getRegister(PPC::CR6, MVT::i32),

2006

CompNode.getValue(1));

2007

2008

// Unpack the result based on how the target uses it.

2009

unsigned BitNo; // Bit # of CR6.

2010

bool InvertBit; // Invert result?

2011

switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) {

2012

default: // Can't happen, don't crash on invalid number though.

2013

case 0: // Return the value of the EQ bit of CR6.

2014

BitNo = 0; InvertBit = false;

2015

break;

2016

case 1: // Return the inverted value of the EQ bit of CR6.

2017

BitNo = 0; InvertBit = true;

2018

break;

2019

case 2: // Return the value of the LT bit of CR6.

2020

BitNo = 2; InvertBit = false;

2021

break;

2022

case 3: // Return the inverted value of the LT bit of CR6.

2023

BitNo = 2; InvertBit = true;

break;

}

// Shift the bit into the low position.

2028

Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags,

2029

DAG.getConstant(8-(3-BitNo), MVT::i32));

2030

// Isolate the bit.

2031

Flags = DAG.getNode(ISD::AND, MVT::i32, Flags,

2032

DAG.getConstant(1, MVT::i32));

2033

2034

// If we are supposed to, toggle the bit.

2035

if (InvertBit)

2036

Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags,

2037

DAG.getConstant(1, MVT::i32));

return Flags;

}

static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {

2042

// Create a stack slot that is 16-byte aligned.

2043

MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();

2044

int FrameIdx = FrameInfo->CreateStackObject(16, 16);

2045

SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);

2046

2047

// Store the input value into Value#0 of the stack slot.

2048

SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(),

2049

Op.getOperand(0), FIdx,DAG.getSrcValue(NULL));

2050

// Load it out.

2051

return DAG.getLoad(Op.getValueType(), Store, FIdx, DAG.getSrcValue(NULL));

2052

}

2053

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

2054

static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) {

Chris Lattner

2006-04-18 03:43:48 +0000

[diff] [blame]

2055

if (Op.getValueType() == MVT::v4i32) {

2056

SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);

2057

2058

SDOperand Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG);

2059

SDOperand Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG); // +16 as shift amt.

2060

2061

SDOperand RHSSwap = // = vrlw RHS, 16

2062

BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG);

2063

2064

// Shrinkify inputs to v8i16.

2065

LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS);

2066

RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS);

2067

RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap);

2068

2069

// Low parts multiplied together, generating 32-bit results (we ignore the

2070

// top parts).

2071

SDOperand LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,

2072

LHS, RHS, DAG, MVT::v4i32);

2073

2074

SDOperand HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,

2075

LHS, RHSSwap, Zero, DAG, MVT::v4i32);

2076

// Shift the high parts up 16 bits.

2077

HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG);

2078

return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd);

2079

} else if (Op.getValueType() == MVT::v8i16) {

2080

SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);

2081

Chris Lattner

cea2aa7

2006-04-18 04:28:57 +0000

[diff] [blame]

2082

SDOperand Zero = BuildSplatI(0, 1, MVT::v8i16, DAG);

Chris Lattner

2006-04-18 03:43:48 +0000

[diff] [blame]

2083

Chris Lattner

cea2aa7

2006-04-18 04:28:57 +0000

[diff] [blame]

2084

return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,

2085

LHS, RHS, Zero, DAG);

Chris Lattner

19a8152

2006-04-18 03:57:35 +0000

[diff] [blame]

2086

} else if (Op.getValueType() == MVT::v16i8) {

2087

SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);

2088

2089

// Multiply the even 8-bit parts, producing 16-bit sums.

2090

SDOperand EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,

2091

LHS, RHS, DAG, MVT::v8i16);

2092

EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, EvenParts);

2093

2094

// Multiply the odd 8-bit parts, producing 16-bit sums.

2095

SDOperand OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,

2096

LHS, RHS, DAG, MVT::v8i16);

2097

OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, OddParts);

2098

2099

// Merge the results together.

2100

std::vector<SDOperand> Ops;

2101

for (unsigned i = 0; i != 8; ++i) {

2102

Ops.push_back(DAG.getConstant(2*i+1, MVT::i8));

2103

Ops.push_back(DAG.getConstant(2*i+1+16, MVT::i8));

2104

}

2105

2106

return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, EvenParts, OddParts,

2107

DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops));

Chris Lattner

2006-04-18 03:43:48 +0000

[diff] [blame]

2108

} else {

2109

assert(0 && "Unknown mul to lower!");

2110

abort();

2111

}

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

2112

}

2113

Chris Lattner

2005-08-26 00:52:45 +0000

[diff] [blame]

2114

/// LowerOperation - Provide custom lowering hooks for some operations.

2115

///

Nate Begeman

2005-10-16 05:39:50 +0000

[diff] [blame]

2116

SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {

Chris Lattner

2005-08-26 00:52:45 +0000

[diff] [blame]

2117

switch (Op.getOpcode()) {

2118

default: assert(0 && "Wasn't expecting to be able to lower this!");

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

2119

case ISD::ConstantPool: return LowerConstantPool(Op, DAG);

2120

case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);

Nate Begeman

37efe67

2006-04-22 18:53:45 +0000

[diff] [blame]

2121

case ISD::JumpTable: return LowerJumpTable(Op, DAG);

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

2122

case ISD::SETCC: return LowerSETCC(Op, DAG);

2123

case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex);

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

2124

case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG,

2125

VarArgsFrameIndex);

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

2126

case ISD::CALL: return LowerCALL(Op, DAG);

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

2127

case ISD::RET: return LowerRET(Op, DAG);

Chris Lattner

7c0d664

2005-10-02 06:37:13 +0000

[diff] [blame]

2128

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

2129

case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);

2130

case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);

2131

case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);

Chris Lattner

2006-03-22 05:30:33 +0000

[diff] [blame]

2132

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

2133

// Lower 64-bit shifts.

2134

case ISD::SHL: return LowerSHL(Op, DAG);

2135

case ISD::SRL: return LowerSRL(Op, DAG);

2136

case ISD::SRA: return LowerSRA(Op, DAG);

Chris Lattner

2006-03-22 05:30:33 +0000

[diff] [blame]

2137

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

2138

// Vector-related lowering.

2139

case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);

2140

case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);

2141

case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);

2142

case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

2143

case ISD::MUL: return LowerMUL(Op, DAG);

Chris Lattner

bc11c34

2005-08-31 20:23:54 +0000

[diff] [blame]

2144

}

Chris Lattner

2005-08-26 00:52:45 +0000

[diff] [blame]

return SDOperand();

}

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

2148

//===----------------------------------------------------------------------===//

2149

// Other Lowering Code

2150

//===----------------------------------------------------------------------===//

2151

Chris Lattner

2005-08-26 21:23:58 +0000

[diff] [blame]

2152

MachineBasicBlock *

Nate Begeman

2005-10-16 05:39:50 +0000

[diff] [blame]

2153

PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,

2154

MachineBasicBlock *BB) {

Chris Lattner

2005-08-26 21:23:58 +0000

[diff] [blame]

2155

assert((MI->getOpcode() == PPC::SELECT_CC_Int ||

Chris Lattner

919c032

2005-10-01 01:35:02 +0000

[diff] [blame]

2156

MI->getOpcode() == PPC::SELECT_CC_F4 ||

Chris Lattner

710ff32

2006-04-08 22:45:08 +0000

[diff] [blame]

2157

MI->getOpcode() == PPC::SELECT_CC_F8 ||

2158

MI->getOpcode() == PPC::SELECT_CC_VRRC) &&

Chris Lattner

2005-08-26 21:23:58 +0000

[diff] [blame]

2159

"Unexpected instr type to insert");

2160

2161

// To "insert" a SELECT_CC instruction, we actually have to insert the diamond

2162

// control-flow pattern. The incoming instruction knows the destination vreg

2163

// to set, the condition code register to branch on, the true/false values to

2164

// select between, and a branch opcode to use.

2165

const BasicBlock *LLVM_BB = BB->getBasicBlock();

2166

ilist<MachineBasicBlock>::iterator It = BB;

++It;

// thisMBB:

// ...

// TrueVal = ...

// cmpTY ccX, r1, r2

// bCC copy1MBB

// fallthrough --> copy0MBB

2175

MachineBasicBlock *thisMBB = BB;

2176

MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);

2177

MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);

2178

BuildMI(BB, MI->getOperand(4).getImmedValue(), 2)

2179

.addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);

2180

MachineFunction *F = BB->getParent();

2181

F->getBasicBlockList().insert(It, copy0MBB);

2182

F->getBasicBlockList().insert(It, sinkMBB);

Nate Begeman

f15485a

2006-03-27 01:32:24 +0000

[diff] [blame]

2183

// Update machine-CFG edges by first adding all successors of the current

2184

// block to the new block which will contain the Phi node for the select.

2185

for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),

2186

e = BB->succ_end(); i != e; ++i)

2187

sinkMBB->addSuccessor(*i);

2188

// Next, remove all successors of the current block, and add the true

2189

// and fallthrough blocks as its successors.

2190

while(!BB->succ_empty())

2191

BB->removeSuccessor(BB->succ_begin());

Chris Lattner

2005-08-26 21:23:58 +0000

[diff] [blame]

2192

BB->addSuccessor(copy0MBB);

2193

BB->addSuccessor(sinkMBB);

// copy0MBB:

// %FalseValue = ...

// # fallthrough to sinkMBB

2198

BB = copy0MBB;

2199

2200

// Update machine-CFG edges

2201

BB->addSuccessor(sinkMBB);

2202

2203

// sinkMBB:

2204

// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]

2205

// ...

2206

BB = sinkMBB;

2207

BuildMI(BB, PPC::PHI, 4, MI->getOperand(0).getReg())

2208

.addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)

2209

.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);

2210

2211

delete MI; // The pseudo instruction is gone now.

return BB;

}

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

2215

//===----------------------------------------------------------------------===//

2216

// Target Optimization Hooks

2217

//===----------------------------------------------------------------------===//

2218

Chris Lattner

2006-03-01 04:57:39 +0000

[diff] [blame]

2219

SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,

2220

DAGCombinerInfo &DCI) const {

2221

TargetMachine &TM = getTargetMachine();

2222

SelectionDAG &DAG = DCI.DAG;

2223

switch (N->getOpcode()) {

2224

default: break;

2225

case ISD::SINT_TO_FP:

2226

if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {

Chris Lattner

2006-03-22 05:30:33 +0000

[diff] [blame]

2227

if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {

2228

// Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.

2229

// We allow the src/dst to be either f32/f64, but the intermediate

2230

// type must be i64.

2231

if (N->getOperand(0).getValueType() == MVT::i64) {

2232

SDOperand Val = N->getOperand(0).getOperand(0);

2233

if (Val.getValueType() == MVT::f32) {

2234

Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);

2235

DCI.AddToWorklist(Val.Val);

2236

}

2237

2238

Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);

Chris Lattner

2006-03-01 04:57:39 +0000

[diff] [blame]

2239

DCI.AddToWorklist(Val.Val);

Chris Lattner

2006-03-22 05:30:33 +0000

[diff] [blame]

2240

Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);

Chris Lattner

2006-03-01 04:57:39 +0000

[diff] [blame]

2241

DCI.AddToWorklist(Val.Val);

Chris Lattner

2006-03-22 05:30:33 +0000

[diff] [blame]

2242

if (N->getValueType(0) == MVT::f32) {

2243

Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val);

2244

DCI.AddToWorklist(Val.Val);

2245

}

2246

return Val;

2247

} else if (N->getOperand(0).getValueType() == MVT::i32) {

2248

// If the intermediate type is i32, we can avoid the load/store here

2249

// too.

Chris Lattner

2006-03-01 04:57:39 +0000

[diff] [blame]

2250

}

Chris Lattner

2006-03-01 04:57:39 +0000

[diff] [blame]

2251

}

2252

}

2253

break;

Chris Lattner

5126984

2006-03-01 05:50:56 +0000

[diff] [blame]

2254

case ISD::STORE:

2255

// Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).

2256

if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&

2257

N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&

2258

N->getOperand(1).getValueType() == MVT::i32) {

2259

SDOperand Val = N->getOperand(1).getOperand(0);

2260

if (Val.getValueType() == MVT::f32) {

2261

Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);

2262

DCI.AddToWorklist(Val.Val);

2263

}

2264

Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val);

2265

DCI.AddToWorklist(Val.Val);

2266

2267

Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val,

2268

N->getOperand(2), N->getOperand(3));

2269

DCI.AddToWorklist(Val.Val);

2270

return Val;

2271

}

2272

break;

Chris Lattner

4468c22

2006-03-31 06:02:07 +0000

[diff] [blame]

2273

case PPCISD::VCMP: {

2274

// If a VCMPo node already exists with exactly the same operands as this

2275

// node, use its result instead of this node (VCMPo computes both a CR6 and

2276

// a normal output).

2277

//

2278

if (!N->getOperand(0).hasOneUse() &&

2279

!N->getOperand(1).hasOneUse() &&

2280

!N->getOperand(2).hasOneUse()) {

2281

2282

// Scan all of the users of the LHS, looking for VCMPo's that match.

2283

SDNode *VCMPoNode = 0;

2284

2285

SDNode *LHSN = N->getOperand(0).Val;

2286

for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();

2287

UI != E; ++UI)

2288

if ((*UI)->getOpcode() == PPCISD::VCMPo &&

2289

(*UI)->getOperand(1) == N->getOperand(1) &&

2290

(*UI)->getOperand(2) == N->getOperand(2) &&

2291

(*UI)->getOperand(0) == N->getOperand(0)) {

VCMPoNode = *UI;

break;

}

Chris Lattner

2006-04-18 18:28:22 +0000

[diff] [blame]

2296

// If there is no VCMPo node, or if the flag value has a single use, don't

2297

// transform this.

2298

if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))

2299

break;

2300

2301

// Look at the (necessarily single) use of the flag value. If it has a

2302

// chain, this transformation is more complex. Note that multiple things

2303

// could use the value result, which we should ignore.

2304

SDNode *FlagUser = 0;

2305

for (SDNode::use_iterator UI = VCMPoNode->use_begin();

2306

FlagUser == 0; ++UI) {

2307

assert(UI != VCMPoNode->use_end() && "Didn't find user!");

2308

SDNode *User = *UI;

2309

for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {

2310

if (User->getOperand(i) == SDOperand(VCMPoNode, 1)) {

FlagUser = User;

break;

}

}

}

// If the user is a MFCR instruction, we know this is safe. Otherwise we

2318

// give up for right now.

2319

if (FlagUser->getOpcode() == PPCISD::MFCR)

Chris Lattner

4468c22

2006-03-31 06:02:07 +0000

[diff] [blame]

2320

return SDOperand(VCMPoNode, 0);

2321

}

2322

break;

2323

}

Chris Lattner

2006-04-18 17:59:36 +0000

[diff] [blame]

2324

case ISD::BR_CC: {

2325

// If this is a branch on an altivec predicate comparison, lower this so

2326

// that we don't have to do a MFCR: instead, branch directly on CR6. This

2327

// lowering is done pre-legalize, because the legalizer lowers the predicate

2328

// compare down to code that is difficult to reassemble.

2329

ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();

2330

SDOperand LHS = N->getOperand(2), RHS = N->getOperand(3);

int CompareOpc;

bool isDot;

if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&

2335

isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&

2336

getAltivecCompareInfo(LHS, CompareOpc, isDot)) {

2337

assert(isDot && "Can't compare against a vector result!");

2338

2339

// If this is a comparison against something other than 0/1, then we know

2340

// that the condition is never/always true.

2341

unsigned Val = cast<ConstantSDNode>(RHS)->getValue();

2342

if (Val != 0 && Val != 1) {

2343

if (CC == ISD::SETEQ) // Cond never true, remove branch.

2344

return N->getOperand(0);

2345

// Always !=, turn it into an unconditional branch.

2346

return DAG.getNode(ISD::BR, MVT::Other,

2347

N->getOperand(0), N->getOperand(4));

2348

}

2349

2350

bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);

2351

2352

// Create the PPCISD altivec 'dot' comparison node.

2353

std::vector<SDOperand> Ops;

2354

std::vector<MVT::ValueType> VTs;

2355

Ops.push_back(LHS.getOperand(2)); // LHS of compare

2356

Ops.push_back(LHS.getOperand(3)); // RHS of compare

2357

Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32));

2358

VTs.push_back(LHS.getOperand(2).getValueType());

2359

VTs.push_back(MVT::Flag);

2360

SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops);

2361

2362

// Unpack the result based on how the target uses it.

2363

unsigned CompOpc;

2364

switch (cast<ConstantSDNode>(LHS.getOperand(1))->getValue()) {

2365

default: // Can't happen, don't crash on invalid number though.

2366

case 0: // Branch on the value of the EQ bit of CR6.

2367

CompOpc = BranchOnWhenPredTrue ? PPC::BEQ : PPC::BNE;

2368

break;

2369

case 1: // Branch on the inverted value of the EQ bit of CR6.

2370

CompOpc = BranchOnWhenPredTrue ? PPC::BNE : PPC::BEQ;

2371

break;

2372

case 2: // Branch on the value of the LT bit of CR6.

2373

CompOpc = BranchOnWhenPredTrue ? PPC::BLT : PPC::BGE;

2374

break;

2375

case 3: // Branch on the inverted value of the LT bit of CR6.

2376

CompOpc = BranchOnWhenPredTrue ? PPC::BGE : PPC::BLT;

break;

}

return DAG.getNode(PPCISD::COND_BRANCH, MVT::Other, N->getOperand(0),

2381

DAG.getRegister(PPC::CR6, MVT::i32),

2382

DAG.getConstant(CompOpc, MVT::i32),

2383

N->getOperand(4), CompNode.getValue(1));

2384

}

2385

break;

2386

}

Chris Lattner