Blame - lib/Target/PowerPC/PPCISelLowering.cpp - platform/external/llvm

2005-10-18 00:28:58 +0000

[diff] [blame]

1

//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

2

//

3

// The LLVM Compiler Infrastructure

4

//

5

// This file was developed by Chris Lattner and is distributed under

6

// the University of Illinois Open Source License. See LICENSE.TXT for details.

7

//

8

//===----------------------------------------------------------------------===//

9

//

Nate Begeman

2005-10-16 05:39:50 +0000

[diff] [blame]

10

// This file implements the PPCISelLowering class.

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

11

//

12

//===----------------------------------------------------------------------===//

13

Chris Lattner

16e71f2

2005-10-14 23:59:06 +0000

[diff] [blame]

14

#include "PPCISelLowering.h"

15

#include "PPCTargetMachine.h"

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

16

#include "PPCPerfectShuffle.h"

Nate Begeman

750ac1b

2006-02-01 07:19:44 +0000

[diff] [blame]

17

#include "llvm/ADT/VectorExtras.h"

Evan Cheng

c4c6257

2006-03-13 23:20:37 +0000

[diff] [blame]

18

#include "llvm/Analysis/ScalarEvolutionExpressions.h"

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

19

#include "llvm/CodeGen/MachineFrameInfo.h"

20

#include "llvm/CodeGen/MachineFunction.h"

Chris Lattner

2005-08-26 21:23:58 +0000

[diff] [blame]

21

#include "llvm/CodeGen/MachineInstrBuilder.h"

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

22

#include "llvm/CodeGen/SelectionDAG.h"

Chris Lattner

7b73834

2005-09-13 19:33:40 +0000

[diff] [blame]

23

#include "llvm/CodeGen/SSARegMap.h"

Chris Lattner

0b1e4e5

2005-08-26 17:36:52 +0000

[diff] [blame]

24

#include "llvm/Constants.h"

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

25

#include "llvm/Function.h"

Chris Lattner

2006-03-26 10:06:40 +0000

[diff] [blame]

26

#include "llvm/Intrinsics.h"

Nate Begeman

750ac1b

2006-02-01 07:19:44 +0000

[diff] [blame]

27

#include "llvm/Support/MathExtras.h"

Evan Cheng

d2ee218

2006-02-18 00:08:58 +0000

[diff] [blame]

28

#include "llvm/Target/TargetOptions.h"

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

29

using namespace llvm;

30

Nate Begeman

2005-10-16 05:39:50 +0000

[diff] [blame]

31

PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

32

: TargetLowering(TM) {

33

34

// Fold away setcc operations if possible.

35

setSetCCIsExpensive();

Nate Begeman

405e3ec

2005-10-21 00:02:42 +0000

[diff] [blame]

36

setPow2DivIsCheap();

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

37

Chris Lattner

d145a61

2005-09-27 22:18:25 +0000

[diff] [blame]

38

// Use _setjmp/_longjmp instead of setjmp/longjmp.

39

setUseUnderscoreSetJmpLongJmp(true);

40

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

41

// Set up the register classes.

Nate Begeman

2005-10-18 00:28:58 +0000

[diff] [blame]

42

addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);

43

addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);

44

addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

45

Chris Lattner

a54aa94

2006-01-29 06:26:08 +0000

[diff] [blame]

46

setOperationAction(ISD::ConstantFP, MVT::f64, Expand);

47

setOperationAction(ISD::ConstantFP, MVT::f32, Expand);

48

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

49

// PowerPC has no intrinsics for these particular operations

50

setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);

51

setOperationAction(ISD::MEMSET, MVT::Other, Expand);

52

setOperationAction(ISD::MEMCPY, MVT::Other, Expand);

53

54

// PowerPC has an i16 but no i8 (or i1) SEXTLOAD

55

setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand);

56

setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand);

57

58

// PowerPC has no SREM/UREM instructions

59

setOperationAction(ISD::SREM, MVT::i32, Expand);

60

setOperationAction(ISD::UREM, MVT::i32, Expand);

61

62

// We don't support sin/cos/sqrt/fmod

63

setOperationAction(ISD::FSIN , MVT::f64, Expand);

64

setOperationAction(ISD::FCOS , MVT::f64, Expand);

Chris Lattner

615c2d0

2005-09-28 22:29:58 +0000

[diff] [blame]

65

setOperationAction(ISD::FREM , MVT::f64, Expand);

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

66

setOperationAction(ISD::FSIN , MVT::f32, Expand);

67

setOperationAction(ISD::FCOS , MVT::f32, Expand);

Chris Lattner

615c2d0

2005-09-28 22:29:58 +0000

[diff] [blame]

68

setOperationAction(ISD::FREM , MVT::f32, Expand);

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

69

70

// If we're enabling GP optimizations, use hardware square root

Chris Lattner

1e9de3e

2005-09-02 18:33:05 +0000

[diff] [blame]

71

if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

72

setOperationAction(ISD::FSQRT, MVT::f64, Expand);

73

setOperationAction(ISD::FSQRT, MVT::f32, Expand);

74

}

75

Chris Lattner

9601a86

2006-03-05 05:08:37 +0000

[diff] [blame]

76

setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);

77

setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);

78

Nate Begeman

d88fc03

2006-01-14 03:14:10 +0000

[diff] [blame]

79

// PowerPC does not have BSWAP, CTPOP or CTTZ

80

setOperationAction(ISD::BSWAP, MVT::i32 , Expand);

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

81

setOperationAction(ISD::CTPOP, MVT::i32 , Expand);

82

setOperationAction(ISD::CTTZ , MVT::i32 , Expand);

83

Nate Begeman

35ef913

2006-01-11 21:21:00 +0000

[diff] [blame]

84

// PowerPC does not have ROTR

85

setOperationAction(ISD::ROTR, MVT::i32 , Expand);

86

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

87

// PowerPC does not have Select

88

setOperationAction(ISD::SELECT, MVT::i32, Expand);

89

setOperationAction(ISD::SELECT, MVT::f32, Expand);

90

setOperationAction(ISD::SELECT, MVT::f64, Expand);

Chris Lattner

2005-08-26 00:52:45 +0000

[diff] [blame]

91

Chris Lattner

0b1e4e5

2005-08-26 17:36:52 +0000

[diff] [blame]

92

// PowerPC wants to turn select_cc of FP into fsel when possible.

93

setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);

94

setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);

Nate Begeman

4477590

2006-01-31 08:17:29 +0000

[diff] [blame]

95

Nate Begeman

750ac1b

2006-02-01 07:19:44 +0000

[diff] [blame]

96

// PowerPC wants to optimize integer setcc a bit

Nate Begeman

4477590

2006-01-31 08:17:29 +0000

[diff] [blame]

97

setOperationAction(ISD::SETCC, MVT::i32, Custom);

Chris Lattner

eb9b62e

2005-08-31 19:09:57 +0000

[diff] [blame]

98

Nate Begeman

81e8097

2006-03-17 01:40:33 +0000

[diff] [blame]

99

// PowerPC does not have BRCOND which requires SetCC

100

setOperationAction(ISD::BRCOND, MVT::Other, Expand);

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

101

Chris Lattner

f760532

2005-08-31 21:09:52 +0000

[diff] [blame]

102

// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.

103

setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);

Nate Begeman

2005-09-06 22:03:27 +0000

[diff] [blame]

104

Jim Laskey

ad23c9d

2005-08-17 00:40:22 +0000

[diff] [blame]

105

// PowerPC does not have [U|S]INT_TO_FP

106

setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);

107

setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);

108

Chris Lattner

53e8845

2005-12-23 05:13:35 +0000

[diff] [blame]

109

setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);

110

setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);

111

Chris Lattner

e6ec9f2

2005-09-10 00:21:06 +0000

[diff] [blame]

112

// PowerPC does not have truncstore for i1.

113

setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote);

Chris Lattner

f73bae1

2005-11-29 06:16:21 +0000

[diff] [blame]

114

Chris Lattner

25b8b8c

2006-04-28 21:56:10 +0000

[diff] [blame]

115

// We cannot sextinreg(i1). Expand to shifts.

116

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);

117

118

Jim Laskey

abf6d17

2006-01-05 01:25:28 +0000

[diff] [blame]

119

// Support label based line numbers.

Chris Lattner

f73bae1

2005-11-29 06:16:21 +0000

[diff] [blame]

120

setOperationAction(ISD::LOCATION, MVT::Other, Expand);

Jim Laskey

e0bce71

2006-01-05 01:47:43 +0000

[diff] [blame]

121

setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);

Jim Laskey

abf6d17

2006-01-05 01:25:28 +0000

[diff] [blame]

122

// FIXME - use subtarget debug flags

Jim Laskey

e0bce71

2006-01-05 01:47:43 +0000

[diff] [blame]

123

if (!TM.getSubtarget<PPCSubtarget>().isDarwin())

Jim Laskey

abf6d17

2006-01-05 01:25:28 +0000

[diff] [blame]

124

setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);

Chris Lattner

e6ec9f2

2005-09-10 00:21:06 +0000

[diff] [blame]

125

Nate Begeman

28a6b02

2005-12-10 02:36:00 +0000

[diff] [blame]

126

// We want to legalize GlobalAddress and ConstantPool nodes into the

127

// appropriate instructions to materialize the address.

Chris Lattner

3eef4e3

2005-11-17 18:26:56 +0000

[diff] [blame]

128

setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);

Nate Begeman

28a6b02

2005-12-10 02:36:00 +0000

[diff] [blame]

129

setOperationAction(ISD::ConstantPool, MVT::i32, Custom);

Nate Begeman

37efe67

2006-04-22 18:53:45 +0000

[diff] [blame]

130

setOperationAction(ISD::JumpTable, MVT::i32, Custom);

Chris Lattner

b99329e

2006-01-13 02:42:53 +0000

[diff] [blame]

131

Nate Begeman

ee62557

2006-01-27 21:09:22 +0000

[diff] [blame]

132

// RET must be custom lowered, to meet ABI requirements

133

setOperationAction(ISD::RET , MVT::Other, Custom);

134

Nate Begeman

acc398c

2006-01-25 18:21:52 +0000

[diff] [blame]

135

// VASTART needs to be custom lowered to use the VarArgsFrameIndex

136

setOperationAction(ISD::VASTART , MVT::Other, Custom);

137

Chris Lattner

b22c08b

2006-01-15 09:02:48 +0000

[diff] [blame]

138

// Use the default implementation.

Nate Begeman

acc398c

2006-01-25 18:21:52 +0000

[diff] [blame]

139

setOperationAction(ISD::VAARG , MVT::Other, Expand);

140

setOperationAction(ISD::VACOPY , MVT::Other, Expand);

141

setOperationAction(ISD::VAEND , MVT::Other, Expand);

Chris Lattner

b22c08b

2006-01-15 09:02:48 +0000

[diff] [blame]

142

setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);

143

setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);

144

setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);

Chris Lattner

860e886

2005-11-17 07:30:41 +0000

[diff] [blame]

145

Chris Lattner

2006-03-26 10:06:40 +0000

[diff] [blame]

146

// We want to custom lower some of our intrinsics.

Chris Lattner

48b61a7

2006-03-28 00:40:33 +0000

[diff] [blame]

147

setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

Chris Lattner

2006-03-26 10:06:40 +0000

[diff] [blame]

148

Nate Begeman

2005-09-06 22:03:27 +0000

[diff] [blame]

149

if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {

Nate Begeman

2005-10-18 00:28:58 +0000

[diff] [blame]

150

// They also have instructions for converting between i64 and fp.

Nate Begeman

2005-09-06 22:03:27 +0000

[diff] [blame]

151

setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);

152

setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);

Chris Lattner

7fbcef7

2006-03-24 07:53:47 +0000

[diff] [blame]

153

154

// FIXME: disable this lowered code. This generates 64-bit register values,

155

// and we don't model the fact that the top part is clobbered by calls. We

156

// need to flag these together so that the value isn't live across a call.

157

//setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);

158

Nate Begeman

ae749a9

2005-10-25 23:48:36 +0000

[diff] [blame]

159

// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT

160

setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);

161

} else {

Chris Lattner

860e886

2005-11-17 07:30:41 +0000

[diff] [blame]

162

// PowerPC does not have FP_TO_UINT on 32-bit implementations.

Nate Begeman

ae749a9

2005-10-25 23:48:36 +0000

[diff] [blame]

163

setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);

Nate Begeman

9d2b817

2005-10-18 00:56:42 +0000

[diff] [blame]

164

}

165

166

if (TM.getSubtarget<PPCSubtarget>().has64BitRegs()) {

167

// 64 bit PowerPC implementations can support i64 types directly

168

addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);

Nate Begeman

2005-10-18 00:28:58 +0000

[diff] [blame]

169

// BUILD_PAIR can't be handled natively, and should be expanded to shl/or

170

setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);

Nate Begeman

2005-10-18 00:28:58 +0000

[diff] [blame]

171

} else {

172

// 32 bit PowerPC wants to expand i64 shifts itself.

173

setOperationAction(ISD::SHL, MVT::i64, Custom);

174

setOperationAction(ISD::SRL, MVT::i64, Custom);

175

setOperationAction(ISD::SRA, MVT::i64, Custom);

Nate Begeman

2005-09-06 22:03:27 +0000

[diff] [blame]

176

}

Evan Cheng

d30bf01

2006-03-01 01:11:20 +0000

[diff] [blame]

177

Nate Begeman

425a969

2005-11-29 08:17:20 +0000

[diff] [blame]

178

if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {

Chris Lattner

2006-03-31 19:52:36 +0000

[diff] [blame]

179

// First set operation action for all vector types to expand. Then we

180

// will selectively turn on ones that can be effectively codegen'd.

181

for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;

182

VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {

Chris Lattner

2006-04-16 01:37:57 +0000

[diff] [blame]

183

// add/sub are legal for all supported vector VT's.

Chris Lattner

2006-03-31 19:52:36 +0000

[diff] [blame]

184

setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);

185

setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);

Chris Lattner

2006-03-31 19:52:36 +0000

[diff] [blame]

186

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

187

// We promote all shuffles to v16i8.

188

setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote);

Chris Lattner

2006-04-16 01:37:57 +0000

[diff] [blame]

189

AddPromotedToType (ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8);

190

191

// We promote all non-typed operations to v4i32.

192

setOperationAction(ISD::AND , (MVT::ValueType)VT, Promote);

193

AddPromotedToType (ISD::AND , (MVT::ValueType)VT, MVT::v4i32);

194

setOperationAction(ISD::OR , (MVT::ValueType)VT, Promote);

195

AddPromotedToType (ISD::OR , (MVT::ValueType)VT, MVT::v4i32);

196

setOperationAction(ISD::XOR , (MVT::ValueType)VT, Promote);

197

AddPromotedToType (ISD::XOR , (MVT::ValueType)VT, MVT::v4i32);

198

setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Promote);

199

AddPromotedToType (ISD::LOAD , (MVT::ValueType)VT, MVT::v4i32);

200

setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);

201

AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v4i32);

202

setOperationAction(ISD::STORE, (MVT::ValueType)VT, Promote);

203

AddPromotedToType (ISD::STORE, (MVT::ValueType)VT, MVT::v4i32);

Chris Lattner

2006-03-31 19:52:36 +0000

[diff] [blame]

204

Chris Lattner

2006-04-16 01:37:57 +0000

[diff] [blame]

205

// No other operations are legal.

Chris Lattner

2006-03-31 19:52:36 +0000

[diff] [blame]

206

setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);

207

setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);

208

setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);

209

setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);

210

setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);

Chris Lattner

2ef5e89

2006-05-24 00:15:25 +0000

[diff] [blame]

211

setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);

Chris Lattner

2006-03-31 19:52:36 +0000

[diff] [blame]

212

setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);

213

setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);

214

setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);

Chris Lattner

01cae07

2006-04-03 23:55:43 +0000

[diff] [blame]

215

216

setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand);

Chris Lattner

2006-03-31 19:52:36 +0000

[diff] [blame]

217

}

218

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

219

// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle

220

// with merges, splats, etc.

221

setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);

222

Chris Lattner

2006-04-16 01:37:57 +0000

[diff] [blame]

223

setOperationAction(ISD::AND , MVT::v4i32, Legal);

224

setOperationAction(ISD::OR , MVT::v4i32, Legal);

225

setOperationAction(ISD::XOR , MVT::v4i32, Legal);

226

setOperationAction(ISD::LOAD , MVT::v4i32, Legal);

227

setOperationAction(ISD::SELECT, MVT::v4i32, Expand);

228

setOperationAction(ISD::STORE , MVT::v4i32, Legal);

229

Nate Begeman

425a969

2005-11-29 08:17:20 +0000

[diff] [blame]

230

addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);

Nate Begeman

7fd1edd

2005-12-19 23:25:09 +0000

[diff] [blame]

231

addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);

Chris Lattner

8d052bc

2006-03-25 07:39:07 +0000

[diff] [blame]

232

addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);

233

addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);

Chris Lattner

ec4a0c7

2006-01-29 06:32:58 +0000

[diff] [blame]

234

Chris Lattner

2006-03-31 19:52:36 +0000

[diff] [blame]

235

setOperationAction(ISD::MUL, MVT::v4f32, Legal);

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

236

setOperationAction(ISD::MUL, MVT::v4i32, Custom);

Chris Lattner

2006-04-18 03:43:48 +0000

[diff] [blame]

237

setOperationAction(ISD::MUL, MVT::v8i16, Custom);

Chris Lattner

19a8152

2006-04-18 03:57:35 +0000

[diff] [blame]

238

setOperationAction(ISD::MUL, MVT::v16i8, Custom);

Chris Lattner

f1d0b2b

2006-03-20 01:53:53 +0000

[diff] [blame]

239

Chris Lattner

b2177b9

2006-03-19 06:55:52 +0000

[diff] [blame]

240

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);

241

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);

Chris Lattner

64b3a08

2006-03-24 07:48:08 +0000

[diff] [blame]

242

Chris Lattner

541f91b

2006-04-02 00:43:36 +0000

[diff] [blame]

243

setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);

244

setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);

Chris Lattner

64b3a08

2006-03-24 07:48:08 +0000

[diff] [blame]

245

setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);

246

setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);

Nate Begeman

425a969

2005-11-29 08:17:20 +0000

[diff] [blame]

247

}

248

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

249

setSetCCResultContents(ZeroOrOneSetCCResult);

Chris Lattner

cadd742

2006-01-13 17:52:03 +0000

[diff] [blame]

250

setStackPointerRegisterToSaveRestore(PPC::R1);

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

251

Chris Lattner

2006-03-01 04:57:39 +0000

[diff] [blame]

252

// We have target-specific dag combine patterns for the following nodes:

253

setTargetDAGCombine(ISD::SINT_TO_FP);

Chris Lattner

5126984

2006-03-01 05:50:56 +0000

[diff] [blame]

254

setTargetDAGCombine(ISD::STORE);

Chris Lattner

2006-04-18 17:59:36 +0000

[diff] [blame]

255

setTargetDAGCombine(ISD::BR_CC);

Chris Lattner

2006-03-01 04:57:39 +0000

[diff] [blame]

256

Chris Lattner

2005-08-16 17:14:42 +0000

[diff] [blame]

257

computeRegisterProperties();

258

}

259

Chris Lattner

2006-01-09 23:52:17 +0000

[diff] [blame]

260

const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {

261

switch (Opcode) {

262

default: return 0;

263

case PPCISD::FSEL: return "PPCISD::FSEL";

264

case PPCISD::FCFID: return "PPCISD::FCFID";

265

case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";

266

case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";

Chris Lattner

5126984

2006-03-01 05:50:56 +0000

[diff] [blame]

267

case PPCISD::STFIWX: return "PPCISD::STFIWX";

Chris Lattner

2006-01-09 23:52:17 +0000

[diff] [blame]

268

case PPCISD::VMADDFP: return "PPCISD::VMADDFP";

269

case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";

Chris Lattner

f1d0b2b

2006-03-20 01:53:53 +0000

[diff] [blame]

270

case PPCISD::VPERM: return "PPCISD::VPERM";

Chris Lattner

2006-01-09 23:52:17 +0000

[diff] [blame]

271

case PPCISD::Hi: return "PPCISD::Hi";

272

case PPCISD::Lo: return "PPCISD::Lo";

273

case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";

274

case PPCISD::SRL: return "PPCISD::SRL";

275

case PPCISD::SRA: return "PPCISD::SRA";

276

case PPCISD::SHL: return "PPCISD::SHL";

Chris Lattner

2006-03-22 05:30:33 +0000

[diff] [blame]

277

case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";

278

case PPCISD::STD_32: return "PPCISD::STD_32";

Chris Lattner

e00ebf0

2006-01-28 07:33:03 +0000

[diff] [blame]

279

case PPCISD::CALL: return "PPCISD::CALL";

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

280

case PPCISD::MTCTR: return "PPCISD::MTCTR";

281

case PPCISD::BCTRL: return "PPCISD::BCTRL";

Chris Lattner

2006-01-09 23:52:17 +0000

[diff] [blame]

282

case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";

Chris Lattner

2006-03-26 10:06:40 +0000

[diff] [blame]

283

case PPCISD::MFCR: return "PPCISD::MFCR";

Chris Lattner

a17b155

2006-03-31 05:13:27 +0000

[diff] [blame]

284

case PPCISD::VCMP: return "PPCISD::VCMP";

Chris Lattner

2006-03-26 10:06:40 +0000

[diff] [blame]

285

case PPCISD::VCMPo: return "PPCISD::VCMPo";

Chris Lattner

f70f8d9

2006-04-18 18:05:58 +0000

[diff] [blame]

286

case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";

Chris Lattner

2006-01-09 23:52:17 +0000

[diff] [blame]

}

}

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

290

//===----------------------------------------------------------------------===//

291

// Node matching predicates, for use by the tblgen matching code.

292

//===----------------------------------------------------------------------===//

293

Chris Lattner

0b1e4e5

2005-08-26 17:36:52 +0000

[diff] [blame]

294

/// isFloatingPointZero - Return true if this is 0.0 or -0.0.

295

static bool isFloatingPointZero(SDOperand Op) {

296

if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))

297

return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);

298

else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) {

299

// Maybe this has already been legalized into the constant pool?

300

if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))

301

if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get()))

302

return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);

}

return false;

}

Chris Lattner

2006-04-06 17:23:16 +0000

[diff] [blame]

307

/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return

308

/// true if Op is undef or if it matches the specified value.

309

static bool isConstantOrUndef(SDOperand Op, unsigned Val) {

310

return Op.getOpcode() == ISD::UNDEF ||

311

cast<ConstantSDNode>(Op)->getValue() == Val;

312

}

313

314

/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a

315

/// VPKUHUM instruction.

Chris Lattner

2006-04-06 22:28:36 +0000

[diff] [blame]

316

bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) {

317

if (!isUnary) {

318

for (unsigned i = 0; i != 16; ++i)

319

if (!isConstantOrUndef(N->getOperand(i), i*2+1))

320

return false;

321

} else {

322

for (unsigned i = 0; i != 8; ++i)

323

if (!isConstantOrUndef(N->getOperand(i), i*2+1) ||

324

!isConstantOrUndef(N->getOperand(i+8), i*2+1))

325

return false;

326

}

Chris Lattner

2006-04-06 18:26:28 +0000

[diff] [blame]

327

return true;

Chris Lattner

ddb739e

2006-04-06 17:23:16 +0000

[diff] [blame]

328

}

329

330

/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a

331

/// VPKUWUM instruction.

Chris Lattner

2006-04-06 22:28:36 +0000

[diff] [blame]

332

bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) {

333

if (!isUnary) {

334

for (unsigned i = 0; i != 16; i += 2)

335

if (!isConstantOrUndef(N->getOperand(i ), i*2+2) ||

336

!isConstantOrUndef(N->getOperand(i+1), i*2+3))

337

return false;

338

} else {

339

for (unsigned i = 0; i != 8; i += 2)

340

if (!isConstantOrUndef(N->getOperand(i ), i*2+2) ||

341

!isConstantOrUndef(N->getOperand(i+1), i*2+3) ||

342

!isConstantOrUndef(N->getOperand(i+8), i*2+2) ||

343

!isConstantOrUndef(N->getOperand(i+9), i*2+3))

344

return false;

345

}

Chris Lattner

2006-04-06 18:26:28 +0000

[diff] [blame]

346

return true;

Chris Lattner

ddb739e

2006-04-06 17:23:16 +0000

[diff] [blame]

347

}

348

Chris Lattner

2006-04-06 22:02:42 +0000

[diff] [blame]

349

/// isVMerge - Common function, used to match vmrg* shuffles.

350

///

351

static bool isVMerge(SDNode *N, unsigned UnitSize,

352

unsigned LHSStart, unsigned RHSStart) {

Chris Lattner

2006-04-06 21:11:54 +0000

[diff] [blame]

353

assert(N->getOpcode() == ISD::BUILD_VECTOR &&

354

N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");

355

assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&

356

"Unsupported merge size!");

357

358

for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units

359

for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit

360

if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j),

Chris Lattner

2006-04-06 22:02:42 +0000

[diff] [blame]

361

LHSStart+j+i*UnitSize) ||

Chris Lattner

2006-04-06 21:11:54 +0000

[diff] [blame]

362

!isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j),

Chris Lattner

2006-04-06 22:02:42 +0000

[diff] [blame]

363

RHSStart+j+i*UnitSize))

Chris Lattner

2006-04-06 21:11:54 +0000

[diff] [blame]

364

return false;

365

}

Chris Lattner

2006-04-06 22:02:42 +0000

[diff] [blame]

return true;

}

/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for

370

/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).

371

bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {

372

if (!isUnary)

373

return isVMerge(N, UnitSize, 8, 24);

374

return isVMerge(N, UnitSize, 8, 8);

Chris Lattner

2006-04-06 21:11:54 +0000

[diff] [blame]

375

}

376

377

/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for

378

/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).

Chris Lattner

2006-04-06 22:02:42 +0000

[diff] [blame]

379

bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {

380

if (!isUnary)

381

return isVMerge(N, UnitSize, 0, 16);

382

return isVMerge(N, UnitSize, 0, 0);

Chris Lattner

2006-04-06 21:11:54 +0000

[diff] [blame]

}

Chris Lattner

2006-04-06 18:26:28 +0000

[diff] [blame]

386

/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift

387

/// amount, otherwise return -1.

Chris Lattner

2006-04-06 22:28:36 +0000

[diff] [blame]

388

int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {

Chris Lattner

2006-04-06 21:11:54 +0000

[diff] [blame]

389

assert(N->getOpcode() == ISD::BUILD_VECTOR &&

390

N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");

Chris Lattner

2006-04-06 18:26:28 +0000

[diff] [blame]

391

// Find the first non-undef value in the shuffle mask.

392

unsigned i;

393

for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i)

394

/*search*/;

395

396

if (i == 16) return -1; // all undef.

397

398

// Otherwise, check to see if the rest of the elements are consequtively

399

// numbered from this value.

400

unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue();

401

if (ShiftAmt < i) return -1;

402

ShiftAmt -= i;

Chris Lattner

ddb739e

2006-04-06 17:23:16 +0000

[diff] [blame]

403

Chris Lattner

2006-04-06 22:28:36 +0000

[diff] [blame]

404

if (!isUnary) {

405

// Check the rest of the elements to see if they are consequtive.

406

for (++i; i != 16; ++i)

407

if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i))

408

return -1;

409

} else {

410

// Check the rest of the elements to see if they are consequtive.

411

for (++i; i != 16; ++i)

412

if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15))

413

return -1;

414

}

Chris Lattner

2006-04-06 18:26:28 +0000

[diff] [blame]

415

416

return ShiftAmt;

417

}

Chris Lattner

2006-03-20 06:33:01 +0000

[diff] [blame]

418

419

/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand

420

/// specifies a splat of a single element that is suitable for input to

421

/// VSPLTB/VSPLTH/VSPLTW.

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

422

bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) {

423

assert(N->getOpcode() == ISD::BUILD_VECTOR &&

424

N->getNumOperands() == 16 &&

425

(EltSize == 1 || EltSize == 2 || EltSize == 4));

Chris Lattner

dd4d2d0

2006-03-20 06:51:10 +0000

[diff] [blame]

426

Chris Lattner

2006-03-20 06:37:44 +0000

[diff] [blame]

427

// This is a splat operation if each element of the permute is the same, and

428

// if the value doesn't reference the second vector.

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

429

unsigned ElementBase = 0;

Chris Lattner

2006-03-20 06:37:44 +0000

[diff] [blame]

430

SDOperand Elt = N->getOperand(0);

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

431

if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt))

432

ElementBase = EltV->getValue();

433

else

434

return false; // FIXME: Handle UNDEF elements too!

435

436

if (cast<ConstantSDNode>(Elt)->getValue() >= 16)

437

return false;

438

439

// Check that they are consequtive.

440

for (unsigned i = 1; i != EltSize; ++i) {

441

if (!isa<ConstantSDNode>(N->getOperand(i)) ||

442

cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase)

return false;

}

Chris Lattner

2006-03-20 06:37:44 +0000

[diff] [blame]

446

assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

447

for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {

Chris Lattner

b097aa9

2006-04-14 23:19:08 +0000

[diff] [blame]

448

if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;

Chris Lattner

2006-03-20 06:37:44 +0000

[diff] [blame]

449

assert(isa<ConstantSDNode>(N->getOperand(i)) &&

450

"Invalid VECTOR_SHUFFLE mask!");

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

451

for (unsigned j = 0; j != EltSize; ++j)

452

if (N->getOperand(i+j) != N->getOperand(j))

453

return false;

Chris Lattner

2006-03-20 06:37:44 +0000

[diff] [blame]

454

}

455

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

456

return true;

Chris Lattner

2006-03-20 06:33:01 +0000

[diff] [blame]

457

}

458

459

/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the

460

/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.

Chris Lattner

2006-04-04 17:25:31 +0000

[diff] [blame]

461

unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {

462

assert(isSplatShuffleMask(N, EltSize));

463

return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize;

Chris Lattner

2006-03-20 06:33:01 +0000

[diff] [blame]

464

}

465

Chris Lattner

e87192a

2006-04-12 17:37:20 +0000

[diff] [blame]

466

/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed

Chris Lattner

2006-04-08 06:46:53 +0000

[diff] [blame]

467

/// by using a vspltis[bhw] instruction of the specified element size, return

468

/// the constant being splatted. The ByteSize field indicates the number of

469

/// bytes of each element [124] -> [bhw].

Chris Lattner

e87192a

2006-04-12 17:37:20 +0000

[diff] [blame]

470

SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

471

SDOperand OpVal(0, 0);

Chris Lattner

79d9a88

2006-04-08 07:14:26 +0000

[diff] [blame]

472

473

// If ByteSize of the splat is bigger than the element size of the

474

// build_vector, then we have a case where we are checking for a splat where

475

// multiple elements of the buildvector are folded together into a single

476

// logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).

477

unsigned EltSize = 16/N->getNumOperands();

478

if (EltSize < ByteSize) {

479

unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.

480

SDOperand UniquedVals[4];

481

assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");

482

483

// See if all of the elements in the buildvector agree across.

484

for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

485

if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;

486

// If the element isn't a constant, bail fully out.

487

if (!isa<ConstantSDNode>(N->getOperand(i))) return SDOperand();

488

489

490

if (UniquedVals[i&(Multiple-1)].Val == 0)

491

UniquedVals[i&(Multiple-1)] = N->getOperand(i);

492

else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))

493

return SDOperand(); // no match.

494

}

495

496

// Okay, if we reached this point, UniquedVals[0..Multiple-1] contains

497

// either constant or undef values that are identical for each chunk. See

498

// if these chunks can form into a larger vspltis*.

499

500

// Check to see if all of the leading entries are either 0 or -1. If

501

// neither, then this won't fit into the immediate field.

502

bool LeadingZero = true;

503

bool LeadingOnes = true;

504

for (unsigned i = 0; i != Multiple-1; ++i) {

505

if (UniquedVals[i].Val == 0) continue; // Must have been undefs.

506

507

LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();

508

LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();

509

}

510

// Finally, check the least significant entry.

511

if (LeadingZero) {

512

if (UniquedVals[Multiple-1].Val == 0)

513

return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef

514

int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getValue();

515

if (Val < 16)

516

return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)

517

}

518

if (LeadingOnes) {

519

if (UniquedVals[Multiple-1].Val == 0)

520

return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef

521

int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSignExtended();

522

if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)

523

return DAG.getTargetConstant(Val, MVT::i32);

}

return SDOperand();

}

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

529

// Check to see if this buildvec has a single non-undef value in its elements.

530

for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

531

if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;

532

if (OpVal.Val == 0)

533

OpVal = N->getOperand(i);

534

else if (OpVal != N->getOperand(i))

Chris Lattner

2006-04-08 06:46:53 +0000

[diff] [blame]

535

return SDOperand();

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

536

}

537

Chris Lattner

2006-04-08 06:46:53 +0000

[diff] [blame]

538

if (OpVal.Val == 0) return SDOperand(); // All UNDEF: use implicit def.

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

539

Nate Begeman

98e70cc

2006-03-28 04:15:58 +0000

[diff] [blame]

540

unsigned ValSizeInBytes = 0;

541

uint64_t Value = 0;

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

542

if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {

543

Value = CN->getValue();

544

ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8;

545

} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {

546

assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");

547

Value = FloatToBits(CN->getValue());

ValSizeInBytes = 4;

}

// If the splat value is larger than the element value, then we can never do

552

// this splat. The only case that we could fit the replicated bits into our

553

// immediate field for would be zero, and we prefer to use vxor for it.

Chris Lattner

2006-04-08 06:46:53 +0000

[diff] [blame]

554

if (ValSizeInBytes < ByteSize) return SDOperand();

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

555

556

// If the element value is larger than the splat value, cut it in half and

557

// check to see if the two halves are equal. Continue doing this until we

558

// get to ByteSize. This allows us to handle 0x01010101 as 0x01.

559

while (ValSizeInBytes > ByteSize) {

560

ValSizeInBytes >>= 1;

561

562

// If the top half equals the bottom half, we're still ok.

Chris Lattner

9b42bdd

2006-04-05 17:39:25 +0000

[diff] [blame]

563

if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=

564

(Value & ((1 << (8*ValSizeInBytes))-1)))

Chris Lattner

2006-04-08 06:46:53 +0000

[diff] [blame]

565

return SDOperand();

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

566

}

567

568

// Properly sign extend the value.

569

int ShAmt = (4-ByteSize)*8;

570

int MaskVal = ((int)Value << ShAmt) >> ShAmt;

571

Evan Cheng

5b6a01b

2006-03-26 09:52:32 +0000

[diff] [blame]

572

// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.

Chris Lattner

2006-04-08 06:46:53 +0000

[diff] [blame]

573

if (MaskVal == 0) return SDOperand();

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

574

Chris Lattner

2006-04-08 06:46:53 +0000

[diff] [blame]

575

// Finally, if this value fits in a 5 bit sext field, return it

576

if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)

577

return DAG.getTargetConstant(MaskVal, MVT::i32);

578

return SDOperand();

Chris Lattner

2006-03-25 06:12:06 +0000

[diff] [blame]

579

}

580

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

581

//===----------------------------------------------------------------------===//

582

// LowerOperation implementation

583

//===----------------------------------------------------------------------===//

584

585

static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {

586

ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);

587

Constant *C = CP->get();

588

SDOperand CPI = DAG.getTargetConstantPool(C, MVT::i32, CP->getAlignment());

589

SDOperand Zero = DAG.getConstant(0, MVT::i32);

590

591

const TargetMachine &TM = DAG.getTarget();

592

593

// If this is a non-darwin platform, we don't support non-static relo models

594

// yet.

595

if (TM.getRelocationModel() == Reloc::Static ||

596

!TM.getSubtarget<PPCSubtarget>().isDarwin()) {

597

// Generate non-pic code that has direct accesses to the constant pool.

598

// The address of the global is just (hi(&g)+lo(&g)).

599

SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);

600

SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);

601

return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);

602

}

603

604

SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);

605

if (TM.getRelocationModel() == Reloc::PIC) {

606

// With PIC, the first instruction is actually "GR+hi(&G)".

607

Hi = DAG.getNode(ISD::ADD, MVT::i32,

608

DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);

609

}

610

611

SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);

612

Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);

return Lo;

}

Nate Begeman

2006-04-22 18:53:45 +0000

[diff] [blame]

616

static SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {

617

JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);

618

SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);

619

SDOperand Zero = DAG.getConstant(0, MVT::i32);

620

621

const TargetMachine &TM = DAG.getTarget();

622

623

// If this is a non-darwin platform, we don't support non-static relo models

624

// yet.

625

if (TM.getRelocationModel() == Reloc::Static ||

626

!TM.getSubtarget<PPCSubtarget>().isDarwin()) {

627

// Generate non-pic code that has direct accesses to the constant pool.

628

// The address of the global is just (hi(&g)+lo(&g)).

629

SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, JTI, Zero);

630

SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, JTI, Zero);

631

return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);

632

}

633

634

SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, JTI, Zero);

635

if (TM.getRelocationModel() == Reloc::PIC) {

636

// With PIC, the first instruction is actually "GR+hi(&G)".

637

Hi = DAG.getNode(ISD::ADD, MVT::i32,

638

DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);

639

}

640

641

SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, JTI, Zero);

642

Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);

return Lo;

}

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

646

static SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {

647

GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);

648

GlobalValue *GV = GSDN->getGlobal();

649

SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i32, GSDN->getOffset());

650

SDOperand Zero = DAG.getConstant(0, MVT::i32);

651

652

const TargetMachine &TM = DAG.getTarget();

653

654

// If this is a non-darwin platform, we don't support non-static relo models

655

// yet.

656

if (TM.getRelocationModel() == Reloc::Static ||

657

!TM.getSubtarget<PPCSubtarget>().isDarwin()) {

658

// Generate non-pic code that has direct accesses to globals.

659

// The address of the global is just (hi(&g)+lo(&g)).

660

SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);

661

SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);

662

return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);

663

}

664

665

SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);

666

if (TM.getRelocationModel() == Reloc::PIC) {

667

// With PIC, the first instruction is actually "GR+hi(&G)".

668

Hi = DAG.getNode(ISD::ADD, MVT::i32,

669

DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);

670

}

671

672

SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);

673

Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);

674

675

if (!GV->hasWeakLinkage() && !GV->hasLinkOnceLinkage() &&

676

(!GV->isExternal() || GV->hasNotBeenReadFromBytecode()))

677

return Lo;

678

679

// If the global is weak or external, we have to go through the lazy

680

// resolution stub.

681

return DAG.getLoad(MVT::i32, DAG.getEntryNode(), Lo, DAG.getSrcValue(0));

682

}

683

684

static SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG) {

685

ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();

686

687

// If we're comparing for equality to zero, expose the fact that this is

688

// implented as a ctlz/srl pair on ppc, so that the dag combiner can

689

// fold the new nodes.

690

if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

691

if (C->isNullValue() && CC == ISD::SETEQ) {

692

MVT::ValueType VT = Op.getOperand(0).getValueType();

693

SDOperand Zext = Op.getOperand(0);

694

if (VT < MVT::i32) {

695

VT = MVT::i32;

696

Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0));

697

}

698

unsigned Log2b = Log2_32(MVT::getSizeInBits(VT));

699

SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext);

700

SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz,

701

DAG.getConstant(Log2b, MVT::i32));

702

return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc);

703

}

704

// Leave comparisons against 0 and -1 alone for now, since they're usually

705

// optimized. FIXME: revisit this when we can custom lower all setcc

706

// optimizations.

707

if (C->isAllOnesValue() || C->isNullValue())

return SDOperand();

}

// If we have an integer seteq/setne, turn it into a compare against zero

712

// by subtracting the rhs from the lhs, which is faster than setting a

713

// condition register, reading it back out, and masking the correct bit.

714

MVT::ValueType LHSVT = Op.getOperand(0).getValueType();

715

if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {

716

MVT::ValueType VT = Op.getValueType();

717

SDOperand Sub = DAG.getNode(ISD::SUB, LHSVT, Op.getOperand(0),

718

Op.getOperand(1));

719

return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC);

}

return SDOperand();

}

static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG,

725

unsigned VarArgsFrameIndex) {

726

// vastart just stores the address of the VarArgsFrameIndex slot into the

727

// memory location argument.

728

SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);

729

return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,

730

Op.getOperand(1), Op.getOperand(2));

731

}

732

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

733

static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG,

734

int &VarArgsFrameIndex) {

735

// TODO: add description of PPC stack frame format, or at least some docs.

736

//

737

MachineFunction &MF = DAG.getMachineFunction();

738

MachineFrameInfo *MFI = MF.getFrameInfo();

739

SSARegMap *RegMap = MF.getSSARegMap();

740

std::vector<SDOperand> ArgValues;

741

SDOperand Root = Op.getOperand(0);

742

743

unsigned ArgOffset = 24;

Chris Lattner

2006-05-16 18:58:15 +0000

[diff] [blame]

744

const unsigned Num_GPR_Regs = 8;

745

const unsigned Num_FPR_Regs = 13;

746

const unsigned Num_VR_Regs = 12;

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

747

unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

748

static const unsigned GPR[] = {

749

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

750

PPC::R7, PPC::R8, PPC::R9, PPC::R10,

751

};

752

static const unsigned FPR[] = {

753

PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,

754

PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13

755

};

756

static const unsigned VR[] = {

757

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

758

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

759

};

760

761

// Add DAG nodes to load the arguments or copy them out of registers. On

762

// entry to a function on PPC, the arguments start at offset 24, although the

763

// first ones are often in registers.

764

for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {

765

SDOperand ArgVal;

766

bool needsLoad = false;

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

767

MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();

768

unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;

769

Chris Lattner

2006-05-16 18:51:52 +0000

[diff] [blame]

770

unsigned CurArgOffset = ArgOffset;

771

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

772

switch (ObjectVT) {

773

default: assert(0 && "Unhandled argument type!");

774

case MVT::i32:

Chris Lattner

2006-05-16 18:51:52 +0000

[diff] [blame]

775

// All int arguments reserve stack space.

776

ArgOffset += 4;

777

Chris Lattner

2006-05-16 18:58:15 +0000

[diff] [blame]

778

if (GPR_idx != Num_GPR_Regs) {

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

779

unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);

780

MF.addLiveIn(GPR[GPR_idx], VReg);

781

ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);

Chris Lattner

2006-05-16 18:51:52 +0000

[diff] [blame]

782

++GPR_idx;

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

} else {

needsLoad = true;

}

break;

case MVT::f32:

case MVT::f64:

Chris Lattner

2006-05-16 18:51:52 +0000

[diff] [blame]

789

// All FP arguments reserve stack space.

790

ArgOffset += ObjSize;

791

792

// Every 4 bytes of argument space consumes one of the GPRs available for

793

// argument passing.

Chris Lattner

2006-05-16 18:58:15 +0000

[diff] [blame]

794

if (GPR_idx != Num_GPR_Regs) {

795

++GPR_idx;

796

if (ObjSize == 8 && GPR_idx != Num_GPR_Regs)

797

++GPR_idx;

Chris Lattner

2006-05-16 18:51:52 +0000

[diff] [blame]

798

}

Chris Lattner

2006-05-16 18:58:15 +0000

[diff] [blame]

799

if (FPR_idx != Num_FPR_Regs) {

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

800

unsigned VReg;

801

if (ObjectVT == MVT::f32)

802

VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass);

803

else

804

VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass);

805

MF.addLiveIn(FPR[FPR_idx], VReg);

806

ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

++FPR_idx;

} else {

needsLoad = true;

}

break;

case MVT::v4f32:

case MVT::v4i32:

case MVT::v8i16:

case MVT::v16i8:

Chris Lattner

2006-05-16 18:51:52 +0000

[diff] [blame]

816

// Note that vector arguments in registers don't reserve stack space.

Chris Lattner

2006-05-16 18:58:15 +0000

[diff] [blame]

817

if (VR_idx != Num_VR_Regs) {

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

818

unsigned VReg = RegMap->createVirtualRegister(&PPC::VRRCRegClass);

819

MF.addLiveIn(VR[VR_idx], VReg);

820

ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

821

++VR_idx;

822

} else {

823

// This should be simple, but requires getting 16-byte aligned stack

824

// values.

825

assert(0 && "Loading VR argument not implemented yet!");

needsLoad = true;

}

break;

}

// We need to load the argument to a virtual register if we determined above

832

// that we ran out of physical registers of the appropriate type

833

if (needsLoad) {

Chris Lattner

b375b5e

2006-05-16 18:54:32 +0000

[diff] [blame]

834

// If the argument is actually used, emit a load from the right stack

835

// slot.

836

if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {

837

int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset);

838

SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);

839

ArgVal = DAG.getLoad(ObjectVT, Root, FIN,

840

DAG.getSrcValue(NULL));

841

} else {

842

// Don't emit a dead load.

843

ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);

844

}

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

845

}

846

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

847

ArgValues.push_back(ArgVal);

848

}

849

850

// If the function takes variable number of arguments, make a frame index for

851

// the start of the first vararg value... for expansion of llvm.va_start.

852

bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;

853

if (isVarArg) {

854

VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);

855

SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);

856

// If this function is vararg, store any remaining integer argument regs

857

// to their spots on the stack so that they may be loaded by deferencing the

858

// result of va_next.

859

std::vector<SDOperand> MemOps;

Chris Lattner

2006-05-16 18:58:15 +0000

[diff] [blame]

860

for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

861

unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);

862

MF.addLiveIn(GPR[GPR_idx], VReg);

863

SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i32);

864

SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),

865

Val, FIN, DAG.getSrcValue(NULL));

866

MemOps.push_back(Store);

867

// Increment the address by four for the next argument to store

868

SDOperand PtrOff = DAG.getConstant(4, MVT::i32);

869

FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, PtrOff);

870

}

871

if (!MemOps.empty())

872

Root = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps);

873

}

874

875

ArgValues.push_back(Root);

876

877

// Return the new list of results.

878

std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),

879

Op.Val->value_end());

880

return DAG.getNode(ISD::MERGE_VALUES, RetVT, ArgValues);

881

}

882

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

883

/// isCallCompatibleAddress - Return the immediate to use if the specified

884

/// 32-bit value is representable in the immediate field of a BxA instruction.

885

static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) {

886

ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);

887

if (!C) return 0;

888

889

int Addr = C->getValue();

890

if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.

891

(Addr << 6 >> 6) != Addr)

892

return 0; // Top 6 bits have to be sext of immediate.

893

894

return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;

}

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

898

static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG) {

899

SDOperand Chain = Op.getOperand(0);

900

unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();

901

bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;

902

bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;

903

SDOperand Callee = Op.getOperand(4);

Evan Cheng

4360bdc

2006-05-25 00:57:32 +0000

[diff] [blame]

904

unsigned NumOps = (Op.getNumOperands() - 5) / 2;

905

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

906

// args_to_use will accumulate outgoing args for the PPCISD::CALL case in

907

// SelectExpr to use to put the arguments in the appropriate registers.

908

std::vector<SDOperand> args_to_use;

909

910

// Count how many bytes are to be pushed on the stack, including the linkage

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

911

// area, and parameter passing area. We start with 24 bytes, which is

912

// prereserved space for [SP][CR][LR][3 x unused].

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

913

unsigned NumBytes = 24;

914

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

915

// Add up all the space actually used.

Evan Cheng

4360bdc

2006-05-25 00:57:32 +0000

[diff] [blame]

916

for (unsigned i = 0; i != NumOps; ++i)

917

NumBytes += MVT::getSizeInBits(Op.getOperand(5+2*i).getValueType())/8;

Chris Lattner

c04ba7a

2006-05-16 23:54:25 +0000

[diff] [blame]

918

Chris Lattner

7b05350

2006-05-30 21:21:04 +0000

[diff] [blame]

919

// The prolog code of the callee may store up to 8 GPR argument registers to

920

// the stack, allowing va_start to index over them in memory if its varargs.

921

// Because we cannot tell if this is needed on the caller side, we have to

922

// conservatively assume that it is needed. As such, make sure we have at

923

// least enough stack space for the caller to store the 8 GPRs.

924

if (NumBytes < 24+8*4)

925

NumBytes = 24+8*4;

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

926

927

// Adjust the stack pointer for the new arguments...

928

// These operations are automatically eliminated by the prolog/epilog pass

929

Chain = DAG.getCALLSEQ_START(Chain,

930

DAG.getConstant(NumBytes, MVT::i32));

931

932

// Set up a copy of the stack pointer for use loading and storing any

933

// arguments that may not fit in the registers available for argument

934

// passing.

935

SDOperand StackPtr = DAG.getRegister(PPC::R1, MVT::i32);

936

937

// Figure out which arguments are going to go in registers, and which in

938

// memory. Also, if this is a vararg function, floating point operations

939

// must be stored to our stack, and loaded into integer regs as well, if

940

// any integer regs are available for argument passing.

941

unsigned ArgOffset = 24;

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

942

unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

943

static const unsigned GPR[] = {

944

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

945

PPC::R7, PPC::R8, PPC::R9, PPC::R10,

946

};

947

static const unsigned FPR[] = {

948

PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,

949

PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13

950

};

951

static const unsigned VR[] = {

952

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

953

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

954

};

955

const unsigned NumGPRs = sizeof(GPR)/sizeof(GPR[0]);

956

const unsigned NumFPRs = sizeof(FPR)/sizeof(FPR[0]);

957

const unsigned NumVRs = sizeof( VR)/sizeof( VR[0]);

958

959

std::vector<std::pair<unsigned, SDOperand> > RegsToPass;

960

std::vector<SDOperand> MemOpChains;

Evan Cheng

4360bdc

2006-05-25 00:57:32 +0000

[diff] [blame]

961

for (unsigned i = 0; i != NumOps; ++i) {

962

SDOperand Arg = Op.getOperand(5+2*i);

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

963

964

// PtrOff will be used to store the current argument to the stack if a

965

// register cannot be found for it.

966

SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());

967

PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);

968

switch (Arg.getValueType()) {

969

default: assert(0 && "Unexpected ValueType for argument!");

970

case MVT::i32:

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

971

if (GPR_idx != NumGPRs) {

972

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

973

} else {

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

974

MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,

975

Arg, PtrOff, DAG.getSrcValue(NULL)));

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

}

ArgOffset += 4;

break;

case MVT::f32:

case MVT::f64:

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

981

if (FPR_idx != NumFPRs) {

982

RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));

983

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

984

if (isVarArg) {

985

SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain,

986

Arg, PtrOff,

987

DAG.getSrcValue(NULL));

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

988

MemOpChains.push_back(Store);

989

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

990

// Float varargs are always shadowed in available integer registers

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

991

if (GPR_idx != NumGPRs) {

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

992

SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,

993

DAG.getSrcValue(NULL));

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

994

MemOpChains.push_back(Load.getValue(1));

995

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

996

}

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

997

if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64) {

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

998

SDOperand ConstFour = DAG.getConstant(4, PtrOff.getValueType());

999

PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);

1000

SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,

1001

DAG.getSrcValue(NULL));

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1002

MemOpChains.push_back(Load.getValue(1));

1003

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1004

}

1005

} else {

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

1006

// If we have any FPRs remaining, we may also have GPRs remaining.

1007

// Args passed in FPRs consume either 1 (f32) or 2 (f64) available

1008

// GPRs.

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1009

if (GPR_idx != NumGPRs)

1010

++GPR_idx;

1011

if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64)

1012

++GPR_idx;

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1013

}

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

1014

} else {

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1015

MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,

1016

Arg, PtrOff, DAG.getSrcValue(NULL)));

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1017

}

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

1018

ArgOffset += (Arg.getValueType() == MVT::f32) ? 4 : 8;

break;

case MVT::v4f32:

case MVT::v4i32:

case MVT::v8i16:

case MVT::v16i8:

assert(!isVarArg && "Don't support passing vectors to varargs yet!");

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1025

assert(VR_idx != NumVRs &&

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

1026

"Don't support passing more than 12 vector args yet!");

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1027

RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));

Chris Lattner

2006-05-17 00:15:40 +0000

[diff] [blame]

1028

break;

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1029

}

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1030

}

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1031

if (!MemOpChains.empty())

1032

Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOpChains);

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1033

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1034

// Build a sequence of copy-to-reg nodes chained together with token chain

1035

// and flag operands which copy the outgoing args into the appropriate regs.

1036

SDOperand InFlag;

1037

for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {

1038

Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,

1039

InFlag);

1040

InFlag = Chain.getValue(1);

1041

}

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1042

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

1043

std::vector<MVT::ValueType> NodeTys;

Chris Lattner

2006-06-10 01:14:28 +0000

[diff] [blame^]

1044

NodeTys.push_back(MVT::Other); // Returns a chain

1045

NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.

1046

1047

std::vector<SDOperand> Ops;

1048

unsigned CallOpc = PPCISD::CALL;

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

1049

1050

// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every

1051

// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol

1052

// node so that legalize doesn't hack it.

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1053

if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1054

Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

1055

else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))

1056

Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());

1057

else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))

1058

// If this is an absolute destination address, use the munged value.

1059

Callee = SDOperand(Dest, 0);

1060

else {

1061

// Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair

1062

// to do the call, we can't use PPCISD::CALL.

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

1063

Ops.push_back(Chain);

1064

Ops.push_back(Callee);

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

1065

1066

if (InFlag.Val)

1067

Ops.push_back(InFlag);

1068

Chain = DAG.getNode(PPCISD::MTCTR, NodeTys, Ops);

1069

InFlag = Chain.getValue(1);

1070

1071

// Copy the callee address into R12 on darwin.

1072

Chain = DAG.getCopyToReg(Chain, PPC::R12, Callee, InFlag);

1073

InFlag = Chain.getValue(1);

1074

1075

NodeTys.clear();

1076

NodeTys.push_back(MVT::Other);

1077

NodeTys.push_back(MVT::Flag);

1078

Ops.clear();

1079

Ops.push_back(Chain);

Chris Lattner

2006-06-10 01:14:28 +0000

[diff] [blame^]

1080

CallOpc = PPCISD::BCTRL;

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

1081

Callee.Val = 0;

1082

}

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1083

Chris Lattner

2006-06-10 01:14:28 +0000

[diff] [blame^]

1084

// If this is a direct call, pass the chain and the callee.

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

1085

if (Callee.Val) {

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

1086

Ops.push_back(Chain);

1087

Ops.push_back(Callee);

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

1088

}

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1089

Chris Lattner

2006-06-10 01:14:28 +0000

[diff] [blame^]

1090

// Add argument registers to the end of the list so that they are known live

1091

// into the call.

1092

for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)

1093

Ops.push_back(DAG.getRegister(RegsToPass[i].first,

1094

RegsToPass[i].second.getValueType()));

1095

1096

if (InFlag.Val)

1097

Ops.push_back(InFlag);

1098

Chain = DAG.getNode(CallOpc, NodeTys, Ops);

1099

InFlag = Chain.getValue(1);

1100

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1101

std::vector<SDOperand> ResultVals;

1102

NodeTys.clear();

1103

1104

// If the call has results, copy the values out of the ret val registers.

1105

switch (Op.Val->getValueType(0)) {

1106

default: assert(0 && "Unexpected ret value!");

1107

case MVT::Other: break;

1108

case MVT::i32:

1109

if (Op.Val->getValueType(1) == MVT::i32) {

1110

Chain = DAG.getCopyFromReg(Chain, PPC::R4, MVT::i32, InFlag).getValue(1);

1111

ResultVals.push_back(Chain.getValue(0));

1112

Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32,

1113

Chain.getValue(2)).getValue(1);

1114

ResultVals.push_back(Chain.getValue(0));

1115

NodeTys.push_back(MVT::i32);

1116

} else {

1117

Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32, InFlag).getValue(1);

1118

ResultVals.push_back(Chain.getValue(0));

1119

}

1120

NodeTys.push_back(MVT::i32);

break;

case MVT::f32:

case MVT::f64:

Chain = DAG.getCopyFromReg(Chain, PPC::F1, Op.Val->getValueType(0),

1125

InFlag).getValue(1);

1126

ResultVals.push_back(Chain.getValue(0));

1127

NodeTys.push_back(Op.Val->getValueType(0));

break;

case MVT::v4f32:

case MVT::v4i32:

case MVT::v8i16:

case MVT::v16i8:

Chain = DAG.getCopyFromReg(Chain, PPC::V2, Op.Val->getValueType(0),

1134

InFlag).getValue(1);

1135

ResultVals.push_back(Chain.getValue(0));

1136

NodeTys.push_back(Op.Val->getValueType(0));

break;

}

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1140

Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,

1141

DAG.getConstant(NumBytes, MVT::i32));

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1142

NodeTys.push_back(MVT::Other);

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1143

Chris Lattner

2006-05-17 19:00:46 +0000

[diff] [blame]

1144

// If the function returns void, just return the chain.

1145

if (ResultVals.empty())

1146

return Chain;

1147

1148

// Otherwise, merge everything together with a MERGE_VALUES node.

Chris Lattner

2006-05-17 06:01:33 +0000

[diff] [blame]

1149

ResultVals.push_back(Chain);

1150

SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, ResultVals);

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

1151

return Res.getValue(Op.ResNo);

1152

}

1153

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1154

static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) {

1155

SDOperand Copy;

1156

switch(Op.getNumOperands()) {

1157

default:

1158

assert(0 && "Do not know how to return this many arguments!");

1159

abort();

1160

case 1:

1161

return SDOperand(); // ret void is legal

Evan Cheng

6848be1

2006-05-26 23:10:12 +0000

[diff] [blame]

1162

case 3: {

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1163

MVT::ValueType ArgVT = Op.getOperand(1).getValueType();

1164

unsigned ArgReg;

1165

if (MVT::isVector(ArgVT))

1166

ArgReg = PPC::V2;

1167

else if (MVT::isInteger(ArgVT))

1168

ArgReg = PPC::R3;

1169

else {

1170

assert(MVT::isFloatingPoint(ArgVT));

ArgReg = PPC::F1;

}

Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1),

1175

SDOperand());

1176

1177

// If we haven't noted the R3/F1 are live out, do so now.

1178

if (DAG.getMachineFunction().liveout_empty())

1179

DAG.getMachineFunction().addLiveOut(ArgReg);

1180

break;

1181

}

Evan Cheng

6848be1

2006-05-26 23:10:12 +0000

[diff] [blame]

1182

case 5:

1183

Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(3),

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1184

SDOperand());

1185

Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1));

1186

// If we haven't noted the R3+R4 are live out, do so now.

1187

if (DAG.getMachineFunction().liveout_empty()) {

1188

DAG.getMachineFunction().addLiveOut(PPC::R3);

1189

DAG.getMachineFunction().addLiveOut(PPC::R4);

}

break;

}

return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));

1194

}

1195

1196

/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when

1197

/// possible.

1198

static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) {

1199

// Not FP? Not a fsel.

1200

if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||

1201

!MVT::isFloatingPoint(Op.getOperand(2).getValueType()))

1202

return SDOperand();

1203

1204

ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();

1205

1206

// Cannot handle SETEQ/SETNE.

1207

if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDOperand();

1208

1209

MVT::ValueType ResVT = Op.getValueType();

1210

MVT::ValueType CmpVT = Op.getOperand(0).getValueType();

1211

SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);

1212

SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3);

1213

1214

// If the RHS of the comparison is a 0.0, we don't need to do the

1215

// subtraction at all.

1216

if (isFloatingPointZero(RHS))

1217

switch (CC) {

1218

default: break; // SETUO etc aren't handled by fsel.

1219

case ISD::SETULT:

Chris Lattner

2006-05-24 00:06:44 +0000

[diff] [blame]

1220

case ISD::SETOLT:

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1221

case ISD::SETLT:

1222

std::swap(TV, FV); // fsel is natively setge, swap operands for setlt

1223

case ISD::SETUGE:

Chris Lattner

2006-05-24 00:06:44 +0000

[diff] [blame]

1224

case ISD::SETOGE:

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1225

case ISD::SETGE:

1226

if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits

1227

LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);

1228

return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV);

1229

case ISD::SETUGT:

Chris Lattner

2006-05-24 00:06:44 +0000

[diff] [blame]

1230

case ISD::SETOGT:

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1231

case ISD::SETGT:

1232

std::swap(TV, FV); // fsel is natively setge, swap operands for setlt

1233

case ISD::SETULE:

Chris Lattner

2006-05-24 00:06:44 +0000

[diff] [blame]

1234

case ISD::SETOLE:

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1235

case ISD::SETLE:

1236

if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits

1237

LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);

1238

return DAG.getNode(PPCISD::FSEL, ResVT,

1239

DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV);

}

SDOperand Cmp;

switch (CC) {

default: break; // SETUO etc aren't handled by fsel.

1245

case ISD::SETULT:

Chris Lattner

2006-05-24 00:06:44 +0000

[diff] [blame]

1246

case ISD::SETOLT:

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1247

case ISD::SETLT:

1248

Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);

1249

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

1250

Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);

1251

return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);

1252

case ISD::SETUGE:

Chris Lattner

2006-05-24 00:06:44 +0000

[diff] [blame]

1253

case ISD::SETOGE:

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1254

case ISD::SETGE:

1255

Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);

1256

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

1257

Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);

1258

return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);

1259

case ISD::SETUGT:

Chris Lattner

2006-05-24 00:06:44 +0000

[diff] [blame]

1260

case ISD::SETOGT:

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1261

case ISD::SETGT:

1262

Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);

1263

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

1264

Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);

1265

return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);

1266

case ISD::SETULE:

Chris Lattner

2006-05-24 00:06:44 +0000

[diff] [blame]

1267

case ISD::SETOLE:

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1268

case ISD::SETLE:

1269

Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);

1270

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

1271

Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);

1272

return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);

}

return SDOperand();

}

static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {

1278

assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));

1279

SDOperand Src = Op.getOperand(0);

1280

if (Src.getValueType() == MVT::f32)

1281

Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);

1282

1283

SDOperand Tmp;

1284

switch (Op.getValueType()) {

1285

default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");

1286

case MVT::i32:

1287

Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);

1288

break;

1289

case MVT::i64:

1290

Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src);

break;

}

// Convert the FP value to an int value through memory.

1295

SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp);

1296

if (Op.getValueType() == MVT::i32)

1297

Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits);

return Bits;

}

static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {

1302

if (Op.getOperand(0).getValueType() == MVT::i64) {

1303

SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));

1304

SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);

1305

if (Op.getValueType() == MVT::f32)

1306

FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);

return FP;

}

assert(Op.getOperand(0).getValueType() == MVT::i32 &&

1311

"Unhandled SINT_TO_FP type in custom expander!");

1312

// Since we only generate this in 64-bit mode, we can take advantage of

1313

// 64-bit registers. In particular, sign extend the input value into the

1314

// 64-bit register with extsw, store the WHOLE 64-bit value into the stack

1315

// then lfd it and fcfid it.

1316

MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();

1317

int FrameIdx = FrameInfo->CreateStackObject(8, 8);

1318

SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);

1319

1320

SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32,

1321

Op.getOperand(0));

1322

1323

// STD the extended value into the stack slot.

1324

SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other,

1325

DAG.getEntryNode(), Ext64, FIdx,

1326

DAG.getSrcValue(NULL));

1327

// Load the value as a double.

1328

SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL));

1329

1330

// FCFID it and return it.

1331

SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld);

1332

if (Op.getValueType() == MVT::f32)

1333

FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);

return FP;

}

static SDOperand LowerSHL(SDOperand Op, SelectionDAG &DAG) {

1338

assert(Op.getValueType() == MVT::i64 &&

1339

Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");

1340

// The generic code does a fine job expanding shift by a constant.

1341

if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();

1342

1343

// Otherwise, expand into a bunch of logical ops. Note that these ops

1344

// depend on the PPC behavior for oversized shift amounts.

1345

SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),

1346

DAG.getConstant(0, MVT::i32));

1347

SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),

1348

DAG.getConstant(1, MVT::i32));

1349

SDOperand Amt = Op.getOperand(1);

1350

1351

SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,

1352

DAG.getConstant(32, MVT::i32), Amt);

1353

SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt);

1354

SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1);

1355

SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);

1356

SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,

1357

DAG.getConstant(-32U, MVT::i32));

1358

SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5);

1359

SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);

1360

SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt);

1361

return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);

1362

}

1363

1364

static SDOperand LowerSRL(SDOperand Op, SelectionDAG &DAG) {

1365

assert(Op.getValueType() == MVT::i64 &&

1366

Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");

1367

// The generic code does a fine job expanding shift by a constant.

1368

if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();

1369

1370

// Otherwise, expand into a bunch of logical ops. Note that these ops

1371

// depend on the PPC behavior for oversized shift amounts.

1372

SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),

1373

DAG.getConstant(0, MVT::i32));

1374

SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),

1375

DAG.getConstant(1, MVT::i32));

1376

SDOperand Amt = Op.getOperand(1);

1377

1378

SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,

1379

DAG.getConstant(32, MVT::i32), Amt);

1380

SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);

1381

SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);

1382

SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);

1383

SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,

1384

DAG.getConstant(-32U, MVT::i32));

1385

SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5);

1386

SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);

1387

SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt);

1388

return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);

1389

}

1390

1391

static SDOperand LowerSRA(SDOperand Op, SelectionDAG &DAG) {

1392

assert(Op.getValueType() == MVT::i64 &&

1393

Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!");

1394

// The generic code does a fine job expanding shift by a constant.

1395

if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();

1396

1397

// Otherwise, expand into a bunch of logical ops, followed by a select_cc.

1398

SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),

1399

DAG.getConstant(0, MVT::i32));

1400

SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),

1401

DAG.getConstant(1, MVT::i32));

1402

SDOperand Amt = Op.getOperand(1);

1403

1404

SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,

1405

DAG.getConstant(32, MVT::i32), Amt);

1406

SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);

1407

SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);

1408

SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);

1409

SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,

1410

DAG.getConstant(-32U, MVT::i32));

1411

SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5);

1412

SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt);

1413

SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32),

1414

Tmp4, Tmp6, ISD::SETLE);

1415

return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);

1416

}

1417

1418

//===----------------------------------------------------------------------===//

1419

// Vector related lowering.

1420

//

1421

Chris Lattner

ac225ca

2006-04-12 19:07:14 +0000

[diff] [blame]

1422

// If this is a vector of constants or undefs, get the bits. A bit in

1423

// UndefBits is set if the corresponding element of the vector is an

1424

// ISD::UNDEF value. For undefs, the corresponding VectorBits values are

1425

// zero. Return true if this is not an array of constants, false if it is.

1426

//

Chris Lattner

ac225ca

2006-04-12 19:07:14 +0000

[diff] [blame]

1427

static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],

1428

uint64_t UndefBits[2]) {

1429

// Start with zero'd results.

1430

VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;

1431

1432

unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());

1433

for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {

1434

SDOperand OpVal = BV->getOperand(i);

1435

1436

unsigned PartNo = i >= e/2; // In the upper 128 bits?

Chris Lattner

2006-04-16 01:01:29 +0000

[diff] [blame]

1437

unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.

Chris Lattner

ac225ca

2006-04-12 19:07:14 +0000

[diff] [blame]

1438

1439

uint64_t EltBits = 0;

1440

if (OpVal.getOpcode() == ISD::UNDEF) {

1441

uint64_t EltUndefBits = ~0U >> (32-EltBitSize);

1442

UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);

1443

continue;

1444

} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {

1445

EltBits = CN->getValue() & (~0U >> (32-EltBitSize));

1446

} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {

1447

assert(CN->getValueType(0) == MVT::f32 &&

1448

"Only one legal FP vector type!");

1449

EltBits = FloatToBits(CN->getValue());

1450

} else {

1451

// Nonconstant element.

return true;

}

VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);

1456

}

1457

1458

//printf("%llx %llx %llx %llx\n",

1459

// VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);

1460

return false;

1461

}

Chris Lattner

2006-03-20 06:33:01 +0000

[diff] [blame]

1462

Chris Lattner

2006-04-16 01:01:29 +0000

[diff] [blame]

1463

// If this is a splat (repetition) of a value across the whole vector, return

1464

// the smallest size that splats it. For example, "0x01010101010101..." is a

1465

// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and

1466

// SplatSize = 1 byte.

1467

static bool isConstantSplat(const uint64_t Bits128[2],

1468

const uint64_t Undef128[2],

1469

unsigned &SplatBits, unsigned &SplatUndef,

1470

unsigned &SplatSize) {

1471

1472

// Don't let undefs prevent splats from matching. See if the top 64-bits are

1473

// the same as the lower 64-bits, ignoring undefs.

1474

if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0]))

1475

return false; // Can't be a splat if two pieces don't match.

1476

1477

uint64_t Bits64 = Bits128[0] | Bits128[1];

1478

uint64_t Undef64 = Undef128[0] & Undef128[1];

1479

1480

// Check that the top 32-bits are the same as the lower 32-bits, ignoring

1481

// undefs.

1482

if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64))

1483

return false; // Can't be a splat if two pieces don't match.

1484

1485

uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);

1486

uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);

1487

1488

// If the top 16-bits are different than the lower 16-bits, ignoring

1489

// undefs, we have an i32 splat.

1490

if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) {

1491

SplatBits = Bits32;

1492

SplatUndef = Undef32;

SplatSize = 4;

return true;

}

uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);

1498

uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);

1499

1500

// If the top 8-bits are different than the lower 8-bits, ignoring

1501

// undefs, we have an i16 splat.

1502

if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) {

1503

SplatBits = Bits16;

1504

SplatUndef = Undef16;

SplatSize = 2;

return true;

}

// Otherwise, we have an 8-bit splat.

1510

SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);

1511

SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);

SplatSize = 1;

return true;

}

Chris Lattner

2006-04-17 06:00:21 +0000

[diff] [blame]

1516

/// BuildSplatI - Build a canonical splati of Val with an element size of

1517

/// SplatSize. Cast the result to VT.

1518

static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT::ValueType VT,

1519

SelectionDAG &DAG) {

1520

assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1521

1522

// Force vspltis[hw] -1 to vspltisb -1.

1523

if (Val == -1) SplatSize = 1;

1524

Chris Lattner

4a998b9

2006-04-17 06:00:21 +0000

[diff] [blame]

1525

static const MVT::ValueType VTys[] = { // canonical VT to use for each size.

1526

MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32

1527

};

1528

MVT::ValueType CanonicalVT = VTys[SplatSize-1];

1529

1530

// Build a canonical splat for this value.

1531

SDOperand Elt = DAG.getConstant(Val, MVT::getVectorBaseType(CanonicalVT));

1532

std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt);

1533

SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops);

1534

return DAG.getNode(ISD::BIT_CONVERT, VT, Res);

1535

}

1536

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

1537

/// BuildIntrinsicOp - Return a binary operator intrinsic node with the

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1538

/// specified intrinsic ID.

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

1539

static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS,

1540

SelectionDAG &DAG,

1541

MVT::ValueType DestVT = MVT::Other) {

1542

if (DestVT == MVT::Other) DestVT = LHS.getValueType();

1543

return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1544

DAG.getConstant(IID, MVT::i32), LHS, RHS);

1545

}

1546

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

1547

/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the

1548

/// specified intrinsic ID.

1549

static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1,

1550

SDOperand Op2, SelectionDAG &DAG,

1551

MVT::ValueType DestVT = MVT::Other) {

1552

if (DestVT == MVT::Other) DestVT = Op0.getValueType();

1553

return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,

1554

DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);

}

Chris Lattner

2006-04-17 17:55:10 +0000

[diff] [blame]

1558

/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified

1559

/// amount. The result has the specified value type.

1560

static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt,

1561

MVT::ValueType VT, SelectionDAG &DAG) {

1562

// Force LHS/RHS to be the right type.

1563

LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS);

1564

RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS);

1565

1566

std::vector<SDOperand> Ops;

1567

for (unsigned i = 0; i != 16; ++i)

1568

Ops.push_back(DAG.getConstant(i+Amt, MVT::i32));

1569

SDOperand T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS,

1570

DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops));

1571

return DAG.getNode(ISD::BIT_CONVERT, VT, T);

1572

}

1573

Chris Lattner

2006-04-14 05:19:18 +0000

[diff] [blame]

1574

// If this is a case we can't handle, return null and let the default

1575

// expansion code take care of it. If we CAN select this case, and if it

1576

// selects to a single instruction, return Op. Otherwise, if we can codegen

1577

// this case more efficiently than a constant pool load, lower it to the

1578

// sequence of ops that should be used.

1579

static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {

1580

// If this is a vector of constants or undefs, get the bits. A bit in

1581

// UndefBits is set if the corresponding element of the vector is an

1582

// ISD::UNDEF value. For undefs, the corresponding VectorBits values are

1583

// zero.

1584

uint64_t VectorBits[2];

1585

uint64_t UndefBits[2];

1586

if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits))

1587

return SDOperand(); // Not a constant vector.

1588

Chris Lattner

2006-04-16 01:01:29 +0000

[diff] [blame]

1589

// If this is a splat (repetition) of a value across the whole vector, return

1590

// the smallest size that splats it. For example, "0x01010101010101..." is a

1591

// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and

1592

// SplatSize = 1 byte.

1593

unsigned SplatBits, SplatUndef, SplatSize;

1594

if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){

1595

bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0;

1596

1597

// First, handle single instruction cases.

1598

1599

// All zeros?

1600

if (SplatBits == 0) {

1601

// Canonicalize all zero vectors to be v4i32.

1602

if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {

1603

SDOperand Z = DAG.getConstant(0, MVT::i32);

1604

Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z);

1605

Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z);

1606

}

1607

return Op;

Chris Lattner

2006-04-14 05:19:18 +0000

[diff] [blame]

1608

}

Chris Lattner

2006-04-16 01:01:29 +0000

[diff] [blame]

1609

1610

// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].

1611

int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize);

Chris Lattner

4a998b9

2006-04-17 06:00:21 +0000

[diff] [blame]

1612

if (SextVal >= -16 && SextVal <= 15)

1613

return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG);

Chris Lattner

2006-04-16 01:01:29 +0000

[diff] [blame]

1614

Chris Lattner

dbce85d

2006-04-17 18:09:22 +0000

[diff] [blame]

1615

1616

// Two instruction sequences.

1617

Chris Lattner

4a998b9

2006-04-17 06:00:21 +0000

[diff] [blame]

1618

// If this value is in the range [-32,30] and is even, use:

1619

// tmp = VSPLTI[bhw], result = add tmp, tmp

1620

if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {

1621

Op = BuildSplatI(SextVal >> 1, SplatSize, Op.getValueType(), DAG);

1622

return DAG.getNode(ISD::ADD, Op.getValueType(), Op, Op);

1623

}

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1624

1625

// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is

1626

// 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important

1627

// for fneg/fabs.

1628

if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {

1629

// Make -1 and vspltisw -1:

1630

SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG);

1631

1632

// Make the VSLW intrinsic, computing 0x8000_0000.

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

1633

SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,

1634

OnesV, DAG);

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1635

1636

// xor by OnesV to invert it.

1637

Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV);

1638

return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);

1639

}

1640

1641

// Check to see if this is a wide variety of vsplti*, binop self cases.

1642

unsigned SplatBitSize = SplatSize*8;

1643

static const char SplatCsts[] = {

1644

-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,

Chris Lattner

dbce85d

2006-04-17 18:09:22 +0000

[diff] [blame]

1645

-8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1646

};

1647

for (unsigned idx = 0; idx < sizeof(SplatCsts)/sizeof(SplatCsts[0]); ++idx){

1648

// Indirect through the SplatCsts array so that we favor 'vsplti -1' for

1649

// cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'

1650

int i = SplatCsts[idx];

1651

1652

// Figure out what shift amount will be used by altivec if shifted by i in

1653

// this splat size.

1654

unsigned TypeShiftAmt = i & (SplatBitSize-1);

1655

1656

// vsplti + shl self.

1657

if (SextVal == (i << (int)TypeShiftAmt)) {

1658

Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);

1659

static const unsigned IIDs[] = { // Intrinsic to use for each size.

1660

Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,

1661

Intrinsic::ppc_altivec_vslw

1662

};

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

1663

return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1664

}

1665

1666

// vsplti + srl self.

1667

if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {

1668

Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);

1669

static const unsigned IIDs[] = { // Intrinsic to use for each size.

1670

Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,

1671

Intrinsic::ppc_altivec_vsrw

1672

};

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

1673

return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1674

}

1675

1676

// vsplti + sra self.

1677

if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {

1678

Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);

1679

static const unsigned IIDs[] = { // Intrinsic to use for each size.

1680

Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,

1681

Intrinsic::ppc_altivec_vsraw

1682

};

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

1683

return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1684

}

1685

Chris Lattner

2006-04-17 17:55:10 +0000

[diff] [blame]

1686

// vsplti + rol self.

1687

if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |

1688

((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {

1689

Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);

1690

static const unsigned IIDs[] = { // Intrinsic to use for each size.

1691

Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,

1692

Intrinsic::ppc_altivec_vrlw

1693

};

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

1694

return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);

Chris Lattner

2006-04-17 17:55:10 +0000

[diff] [blame]

1695

}

1696

1697

// t = vsplti c, result = vsldoi t, t, 1

1698

if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {

1699

SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);

1700

return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG);

1701

}

1702

// t = vsplti c, result = vsldoi t, t, 2

1703

if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {

1704

SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);

1705

return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG);

1706

}

1707

// t = vsplti c, result = vsldoi t, t, 3

1708

if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {

1709

SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);

1710

return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG);

1711

}

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1712

}

1713

Chris Lattner

2006-04-17 06:58:41 +0000

[diff] [blame]

1714

// Three instruction sequences.

1715

Chris Lattner

dbce85d

2006-04-17 18:09:22 +0000

[diff] [blame]

1716

// Odd, in range [17,31]: (vsplti C)-(vsplti -16).

1717

if (SextVal >= 0 && SextVal <= 31) {

1718

SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, Op.getValueType(),DAG);

1719

SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG);

1720

return DAG.getNode(ISD::SUB, Op.getValueType(), LHS, RHS);

1721

}

1722

// Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).

1723

if (SextVal >= -31 && SextVal <= 0) {

1724

SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, Op.getValueType(),DAG);

1725

SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG);

Chris Lattner

c408382

2006-04-17 06:07:44 +0000

[diff] [blame]

1726

return DAG.getNode(ISD::ADD, Op.getValueType(), LHS, RHS);

Chris Lattner

2006-04-14 05:19:18 +0000

[diff] [blame]

1727

}

1728

}

Chris Lattner

2006-04-16 01:01:29 +0000

[diff] [blame]

1729

Chris Lattner

2006-04-14 05:19:18 +0000

[diff] [blame]

return SDOperand();

}

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

1733

/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit

1734

/// the specified operations to build the shuffle.

1735

static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS,

1736

SDOperand RHS, SelectionDAG &DAG) {

1737

unsigned OpNum = (PFEntry >> 26) & 0x0F;

1738

unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);

1739

unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);

1740

1741

enum {

Chris Lattner

00402c7

2006-05-16 04:20:24 +0000

[diff] [blame]

1742

OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

OP_VMRGHW,

OP_VMRGLW,

OP_VSPLTISW0,

OP_VSPLTISW1,

OP_VSPLTISW2,

OP_VSPLTISW3,

OP_VSLDOI4,

OP_VSLDOI8,

Chris Lattner

d74ea2b

2006-05-24 17:04:05 +0000

[diff] [blame]

1751

OP_VSLDOI12

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

1752

};

1753

1754

if (OpNum == OP_COPY) {

1755

if (LHSID == (1*9+2)*9+3) return LHS;

1756

assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");

return RHS;

}

Chris Lattner

2006-04-17 17:55:10 +0000

[diff] [blame]

1760

SDOperand OpLHS, OpRHS;

1761

OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG);

1762

OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG);

1763

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

1764

unsigned ShufIdxs[16];

1765

switch (OpNum) {

1766

default: assert(0 && "Unknown i32 permute!");

1767

case OP_VMRGHW:

1768

ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;

1769

ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;

1770

ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;

1771

ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;

1772

break;

1773

case OP_VMRGLW:

1774

ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;

1775

ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;

1776

ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;

1777

ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;

1778

break;

1779

case OP_VSPLTISW0:

1780

for (unsigned i = 0; i != 16; ++i)

1781

ShufIdxs[i] = (i&3)+0;

1782

break;

1783

case OP_VSPLTISW1:

1784

for (unsigned i = 0; i != 16; ++i)

1785

ShufIdxs[i] = (i&3)+4;

1786

break;

1787

case OP_VSPLTISW2:

1788

for (unsigned i = 0; i != 16; ++i)

1789

ShufIdxs[i] = (i&3)+8;

1790

break;

1791

case OP_VSPLTISW3:

1792

for (unsigned i = 0; i != 16; ++i)

1793

ShufIdxs[i] = (i&3)+12;

1794

break;

1795

case OP_VSLDOI4:

Chris Lattner

2006-04-17 17:55:10 +0000

[diff] [blame]

1796

return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG);

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

1797

case OP_VSLDOI8:

Chris Lattner

2006-04-17 17:55:10 +0000

[diff] [blame]

1798

return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG);

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

1799

case OP_VSLDOI12:

Chris Lattner

2006-04-17 17:55:10 +0000

[diff] [blame]

1800

return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG);

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

1801

}

1802

std::vector<SDOperand> Ops;

1803

for (unsigned i = 0; i != 16; ++i)

1804

Ops.push_back(DAG.getConstant(ShufIdxs[i], MVT::i32));

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

1805

1806

return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS,

1807

DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops));

1808

}

1809

Chris Lattner

2006-04-14 05:19:18 +0000

[diff] [blame]

1810

/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this

1811

/// is a shuffle we can handle in a single instruction, return it. Otherwise,

1812

/// return the code it can be lowered into. Worst case, it can always be

1813

/// lowered into a vperm.

1814

static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {

1815

SDOperand V1 = Op.getOperand(0);

1816

SDOperand V2 = Op.getOperand(1);

1817

SDOperand PermMask = Op.getOperand(2);

1818

1819

// Cases that are handled by instructions that take permute immediates

1820

// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be

1821

// selected by the instruction selector.

1822

if (V2.getOpcode() == ISD::UNDEF) {

1823

if (PPC::isSplatShuffleMask(PermMask.Val, 1) ||

1824

PPC::isSplatShuffleMask(PermMask.Val, 2) ||

1825

PPC::isSplatShuffleMask(PermMask.Val, 4) ||

1826

PPC::isVPKUWUMShuffleMask(PermMask.Val, true) ||

1827

PPC::isVPKUHUMShuffleMask(PermMask.Val, true) ||

1828

PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 ||

1829

PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) ||

1830

PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) ||

1831

PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) ||

1832

PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) ||

1833

PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) ||

1834

PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) {

return Op;

}

}

// Altivec has a variety of "shuffle immediates" that take two vector inputs

1840

// and produce a fixed permutation. If any of these match, do not lower to

1841

// VPERM.

1842

if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) ||

1843

PPC::isVPKUHUMShuffleMask(PermMask.Val, false) ||

1844

PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 ||

1845

PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) ||

1846

PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) ||

1847

PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) ||

1848

PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) ||

1849

PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) ||

1850

PPC::isVMRGHShuffleMask(PermMask.Val, 4, false))

1851

return Op;

1852

Chris Lattner

2006-04-17 05:28:54 +0000

[diff] [blame]

1853

// Check to see if this is a shuffle of 4-byte values. If so, we can use our

1854

// perfect shuffle table to emit an optimal matching sequence.

1855

unsigned PFIndexes[4];

1856

bool isFourElementShuffle = true;

1857

for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number

1858

unsigned EltNo = 8; // Start out undef.

1859

for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.

1860

if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF)

1861

continue; // Undef, ignore it.

1862

1863

unsigned ByteSource =

1864

cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue();

1865

if ((ByteSource & 3) != j) {

1866

isFourElementShuffle = false;

break;

}

if (EltNo == 8) {

EltNo = ByteSource/4;

1872

} else if (EltNo != ByteSource/4) {

1873

isFourElementShuffle = false;

break;

}

}

PFIndexes[i] = EltNo;

1878

}

1879

1880

// If this shuffle can be expressed as a shuffle of 4-byte elements, use the

1881

// perfect shuffle vector to determine if it is cost effective to do this as

1882

// discrete instructions, or whether we should use a vperm.

1883

if (isFourElementShuffle) {

1884

// Compute the index in the perfect shuffle table.

1885

unsigned PFTableIndex =

1886

PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];

1887

1888

unsigned PFEntry = PerfectShuffleTable[PFTableIndex];

1889

unsigned Cost = (PFEntry >> 30);

1890

1891

// Determining when to avoid vperm is tricky. Many things affect the cost

1892

// of vperm, particularly how many times the perm mask needs to be computed.

1893

// For example, if the perm mask can be hoisted out of a loop or is already

1894

// used (perhaps because there are multiple permutes with the same shuffle

1895

// mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of

1896

// the loop requires an extra register.

1897

//

1898

// As a compromise, we only emit discrete instructions if the shuffle can be

1899

// generated in 3 or fewer operations. When we have loop information

1900

// available, if this block is within a loop, we should avoid using vperm

1901

// for 3-operation perms and use a constant pool load instead.

1902

if (Cost < 3)

1903

return GeneratePerfectShuffle(PFEntry, V1, V2, DAG);

1904

}

Chris Lattner

2006-04-14 05:19:18 +0000

[diff] [blame]

1905

1906

// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant

1907

// vector that will get spilled to the constant pool.

1908

if (V2.getOpcode() == ISD::UNDEF) V2 = V1;

1909

1910

// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except

1911

// that it is in input element units, not in bytes. Convert now.

1912

MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType());

1913

unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;

1914

1915

std::vector<SDOperand> ResultMask;

1916

for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {

Chris Lattner

730b456

2006-04-15 23:48:05 +0000

[diff] [blame]

1917

unsigned SrcElt;

1918

if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)

1919

SrcElt = 0;

1920

else

1921

SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();

Chris Lattner

2006-04-14 05:19:18 +0000

[diff] [blame]

1922

1923

for (unsigned j = 0; j != BytesPerElement; ++j)

1924

ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,

MVT::i8));

}

SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask);

1929

return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);

1930

}

1931

Chris Lattner

2006-04-18 17:59:36 +0000

[diff] [blame]

1932

/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an

1933

/// altivec comparison. If it is, return true and fill in Opc/isDot with

1934

/// information about the intrinsic.

1935

static bool getAltivecCompareInfo(SDOperand Intrin, int &CompareOpc,

1936

bool &isDot) {

1937

unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getValue();

1938

CompareOpc = -1;

1939

isDot = false;

1940

switch (IntrinsicID) {

1941

default: return false;

1942

// Comparison predicates.

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1943

case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;

1944

case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;

1945

case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;

1946

case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;

1947

case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;

1948

case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;

1949

case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;

1950

case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;

1951

case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;

1952

case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;

1953

case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;

1954

case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;

1955

case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;

1956

1957

// Normal Comparisons.

1958

case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;

1959

case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;

1960

case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;

1961

case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;

1962

case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;

1963

case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;

1964

case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;

1965

case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;

1966

case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;

1967

case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;

1968

case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;

1969

case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;

1970

case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;

1971

}

Chris Lattner

2006-04-18 17:59:36 +0000

[diff] [blame]

return true;

}

/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom

1976

/// lower, do it, otherwise return null.

1977

static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {

1978

// If this is a lowered altivec predicate compare, CompareOpc is set to the

1979

// opcode number of the comparison.

1980

int CompareOpc;

1981

bool isDot;

1982

if (!getAltivecCompareInfo(Op, CompareOpc, isDot))

1983

return SDOperand(); // Don't custom lower most intrinsics.

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1984

Chris Lattner

2006-04-18 17:59:36 +0000

[diff] [blame]

1985

// If this is a non-dot comparison, make the VCMP node and we are done.

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

1986

if (!isDot) {

1987

SDOperand Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(),

1988

Op.getOperand(1), Op.getOperand(2),

1989

DAG.getConstant(CompareOpc, MVT::i32));

1990

return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp);

1991

}

1992

1993

// Create the PPCISD altivec 'dot' comparison node.

1994

std::vector<SDOperand> Ops;

1995

std::vector<MVT::ValueType> VTs;

1996

Ops.push_back(Op.getOperand(2)); // LHS

1997

Ops.push_back(Op.getOperand(3)); // RHS

1998

Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32));

1999

VTs.push_back(Op.getOperand(2).getValueType());

2000

VTs.push_back(MVT::Flag);

2001

SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops);

2002

2003

// Now that we have the comparison, emit a copy from the CR to a GPR.

2004

// This is flagged to the above dot comparison.

2005

SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32,

2006

DAG.getRegister(PPC::CR6, MVT::i32),

2007

CompNode.getValue(1));

2008

2009

// Unpack the result based on how the target uses it.

2010

unsigned BitNo; // Bit # of CR6.

2011

bool InvertBit; // Invert result?

2012

switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) {

2013

default: // Can't happen, don't crash on invalid number though.

2014

case 0: // Return the value of the EQ bit of CR6.

2015

BitNo = 0; InvertBit = false;

2016

break;

2017

case 1: // Return the inverted value of the EQ bit of CR6.

2018

BitNo = 0; InvertBit = true;

2019

break;

2020

case 2: // Return the value of the LT bit of CR6.

2021

BitNo = 2; InvertBit = false;

2022

break;

2023

case 3: // Return the inverted value of the LT bit of CR6.

2024

BitNo = 2; InvertBit = true;

break;

}

// Shift the bit into the low position.

2029

Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags,

2030

DAG.getConstant(8-(3-BitNo), MVT::i32));

2031

// Isolate the bit.

2032

Flags = DAG.getNode(ISD::AND, MVT::i32, Flags,

2033

DAG.getConstant(1, MVT::i32));

2034

2035

// If we are supposed to, toggle the bit.

2036

if (InvertBit)

2037

Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags,

2038

DAG.getConstant(1, MVT::i32));

return Flags;

}

static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {

2043

// Create a stack slot that is 16-byte aligned.

2044

MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();

2045

int FrameIdx = FrameInfo->CreateStackObject(16, 16);

2046

SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);

2047

2048

// Store the input value into Value#0 of the stack slot.

2049

SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(),

2050

Op.getOperand(0), FIdx,DAG.getSrcValue(NULL));

2051

// Load it out.

2052

return DAG.getLoad(Op.getValueType(), Store, FIdx, DAG.getSrcValue(NULL));

2053

}

2054

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

2055

static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) {

Chris Lattner

2006-04-18 03:43:48 +0000

[diff] [blame]

2056

if (Op.getValueType() == MVT::v4i32) {

2057

SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);

2058

2059

SDOperand Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG);

2060

SDOperand Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG); // +16 as shift amt.

2061

2062

SDOperand RHSSwap = // = vrlw RHS, 16

2063

BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG);

2064

2065

// Shrinkify inputs to v8i16.

2066

LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS);

2067

RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS);

2068

RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap);

2069

2070

// Low parts multiplied together, generating 32-bit results (we ignore the

2071

// top parts).

2072

SDOperand LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,

2073

LHS, RHS, DAG, MVT::v4i32);

2074

2075

SDOperand HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,

2076

LHS, RHSSwap, Zero, DAG, MVT::v4i32);

2077

// Shift the high parts up 16 bits.

2078

HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG);

2079

return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd);

2080

} else if (Op.getValueType() == MVT::v8i16) {

2081

SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);

2082

Chris Lattner

cea2aa7

2006-04-18 04:28:57 +0000

[diff] [blame]

2083

SDOperand Zero = BuildSplatI(0, 1, MVT::v8i16, DAG);

Chris Lattner

2006-04-18 03:43:48 +0000

[diff] [blame]

2084

Chris Lattner

cea2aa7

2006-04-18 04:28:57 +0000

[diff] [blame]

2085

return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,

2086

LHS, RHS, Zero, DAG);

Chris Lattner

19a8152

2006-04-18 03:57:35 +0000

[diff] [blame]

2087

} else if (Op.getValueType() == MVT::v16i8) {

2088

SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);

2089

2090

// Multiply the even 8-bit parts, producing 16-bit sums.

2091

SDOperand EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,

2092

LHS, RHS, DAG, MVT::v8i16);

2093

EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, EvenParts);

2094

2095

// Multiply the odd 8-bit parts, producing 16-bit sums.

2096

SDOperand OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,

2097

LHS, RHS, DAG, MVT::v8i16);

2098

OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, OddParts);

2099

2100

// Merge the results together.

2101

std::vector<SDOperand> Ops;

2102

for (unsigned i = 0; i != 8; ++i) {

2103

Ops.push_back(DAG.getConstant(2*i+1, MVT::i8));

2104

Ops.push_back(DAG.getConstant(2*i+1+16, MVT::i8));

2105

}

2106

2107

return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, EvenParts, OddParts,

2108

DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops));

Chris Lattner

2006-04-18 03:43:48 +0000

[diff] [blame]

2109

} else {

2110

assert(0 && "Unknown mul to lower!");

2111

abort();

2112

}

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

2113

}

2114

Chris Lattner

2005-08-26 00:52:45 +0000

[diff] [blame]

2115

/// LowerOperation - Provide custom lowering hooks for some operations.

2116

///

Nate Begeman

2005-10-16 05:39:50 +0000

[diff] [blame]

2117

SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {

Chris Lattner

2005-08-26 00:52:45 +0000

[diff] [blame]

2118

switch (Op.getOpcode()) {

2119

default: assert(0 && "Wasn't expecting to be able to lower this!");

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

2120

case ISD::ConstantPool: return LowerConstantPool(Op, DAG);

2121

case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);

Nate Begeman

37efe67

2006-04-22 18:53:45 +0000

[diff] [blame]

2122

case ISD::JumpTable: return LowerJumpTable(Op, DAG);

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

2123

case ISD::SETCC: return LowerSETCC(Op, DAG);

2124

case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex);

Chris Lattner

2006-05-16 18:18:50 +0000

[diff] [blame]

2125

case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG,

2126

VarArgsFrameIndex);

Chris Lattner

2006-05-16 22:56:08 +0000

[diff] [blame]

2127

case ISD::CALL: return LowerCALL(Op, DAG);

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

2128

case ISD::RET: return LowerRET(Op, DAG);

Chris Lattner

7c0d664

2005-10-02 06:37:13 +0000

[diff] [blame]

2129

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

2130

case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);

2131

case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);

2132

case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);

Chris Lattner

2006-03-22 05:30:33 +0000

[diff] [blame]

2133

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

2134

// Lower 64-bit shifts.

2135

case ISD::SHL: return LowerSHL(Op, DAG);

2136

case ISD::SRL: return LowerSRL(Op, DAG);

2137

case ISD::SRA: return LowerSRA(Op, DAG);

Chris Lattner

2006-03-22 05:30:33 +0000

[diff] [blame]

2138

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

2139

// Vector-related lowering.

2140

case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);

2141

case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);

2142

case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);

2143

case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);

Chris Lattner

2006-04-18 03:24:30 +0000

[diff] [blame]

2144

case ISD::MUL: return LowerMUL(Op, DAG);

Chris Lattner

bc11c34

2005-08-31 20:23:54 +0000

[diff] [blame]

2145

}

Chris Lattner

2005-08-26 00:52:45 +0000

[diff] [blame]

return SDOperand();

}

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

2149

//===----------------------------------------------------------------------===//

2150

// Other Lowering Code

2151

//===----------------------------------------------------------------------===//

2152

Chris Lattner

2005-08-26 21:23:58 +0000

[diff] [blame]

2153

MachineBasicBlock *

Nate Begeman

2005-10-16 05:39:50 +0000

[diff] [blame]

2154

PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,

2155

MachineBasicBlock *BB) {

Chris Lattner

2005-08-26 21:23:58 +0000

[diff] [blame]

2156

assert((MI->getOpcode() == PPC::SELECT_CC_Int ||

Chris Lattner

919c032

2005-10-01 01:35:02 +0000

[diff] [blame]

2157

MI->getOpcode() == PPC::SELECT_CC_F4 ||

Chris Lattner

710ff32

2006-04-08 22:45:08 +0000

[diff] [blame]

2158

MI->getOpcode() == PPC::SELECT_CC_F8 ||

2159

MI->getOpcode() == PPC::SELECT_CC_VRRC) &&

Chris Lattner

2005-08-26 21:23:58 +0000

[diff] [blame]

2160

"Unexpected instr type to insert");

2161

2162

// To "insert" a SELECT_CC instruction, we actually have to insert the diamond

2163

// control-flow pattern. The incoming instruction knows the destination vreg

2164

// to set, the condition code register to branch on, the true/false values to

2165

// select between, and a branch opcode to use.

2166

const BasicBlock *LLVM_BB = BB->getBasicBlock();

2167

ilist<MachineBasicBlock>::iterator It = BB;

++It;

// thisMBB:

// ...

// TrueVal = ...

// cmpTY ccX, r1, r2

// bCC copy1MBB

// fallthrough --> copy0MBB

2176

MachineBasicBlock *thisMBB = BB;

2177

MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);

2178

MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);

2179

BuildMI(BB, MI->getOperand(4).getImmedValue(), 2)

2180

.addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);

2181

MachineFunction *F = BB->getParent();

2182

F->getBasicBlockList().insert(It, copy0MBB);

2183

F->getBasicBlockList().insert(It, sinkMBB);

Nate Begeman

f15485a

2006-03-27 01:32:24 +0000

[diff] [blame]

2184

// Update machine-CFG edges by first adding all successors of the current

2185

// block to the new block which will contain the Phi node for the select.

2186

for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),

2187

e = BB->succ_end(); i != e; ++i)

2188

sinkMBB->addSuccessor(*i);

2189

// Next, remove all successors of the current block, and add the true

2190

// and fallthrough blocks as its successors.

2191

while(!BB->succ_empty())

2192

BB->removeSuccessor(BB->succ_begin());

Chris Lattner

2005-08-26 21:23:58 +0000

[diff] [blame]

2193

BB->addSuccessor(copy0MBB);

2194

BB->addSuccessor(sinkMBB);

// copy0MBB:

// %FalseValue = ...

// # fallthrough to sinkMBB

2199

BB = copy0MBB;

2200

2201

// Update machine-CFG edges

2202

BB->addSuccessor(sinkMBB);

2203

2204

// sinkMBB:

2205

// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]

2206

// ...

2207

BB = sinkMBB;

2208

BuildMI(BB, PPC::PHI, 4, MI->getOperand(0).getReg())

2209

.addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)

2210

.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);

2211

2212

delete MI; // The pseudo instruction is gone now.

return BB;

}

Chris Lattner

2006-04-14 06:01:58 +0000

[diff] [blame]

2216

//===----------------------------------------------------------------------===//

2217

// Target Optimization Hooks

2218

//===----------------------------------------------------------------------===//

2219

Chris Lattner

2006-03-01 04:57:39 +0000

[diff] [blame]

2220

SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,

2221

DAGCombinerInfo &DCI) const {

2222

TargetMachine &TM = getTargetMachine();

2223

SelectionDAG &DAG = DCI.DAG;

2224

switch (N->getOpcode()) {

2225

default: break;

2226

case ISD::SINT_TO_FP:

2227

if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {

Chris Lattner

2006-03-22 05:30:33 +0000

[diff] [blame]

2228

if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {

2229

// Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.

2230

// We allow the src/dst to be either f32/f64, but the intermediate

2231

// type must be i64.

2232

if (N->getOperand(0).getValueType() == MVT::i64) {

2233

SDOperand Val = N->getOperand(0).getOperand(0);

2234

if (Val.getValueType() == MVT::f32) {

2235

Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);

2236

DCI.AddToWorklist(Val.Val);

2237

}

2238

2239

Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);

Chris Lattner

2006-03-01 04:57:39 +0000

[diff] [blame]

2240

DCI.AddToWorklist(Val.Val);

Chris Lattner

2006-03-22 05:30:33 +0000

[diff] [blame]

2241

Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);

Chris Lattner

2006-03-01 04:57:39 +0000

[diff] [blame]

2242

DCI.AddToWorklist(Val.Val);

Chris Lattner

2006-03-22 05:30:33 +0000

[diff] [blame]

2243

if (N->getValueType(0) == MVT::f32) {

2244

Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val);

2245

DCI.AddToWorklist(Val.Val);

2246

}

2247

return Val;

2248

} else if (N->getOperand(0).getValueType() == MVT::i32) {

2249

// If the intermediate type is i32, we can avoid the load/store here

2250

// too.

Chris Lattner

2006-03-01 04:57:39 +0000

[diff] [blame]

2251

}

Chris Lattner

2006-03-01 04:57:39 +0000

[diff] [blame]

2252

}

2253

}

2254

break;

Chris Lattner

5126984

2006-03-01 05:50:56 +0000

[diff] [blame]

2255

case ISD::STORE:

2256

// Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).

2257

if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&

2258

N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&

2259

N->getOperand(1).getValueType() == MVT::i32) {

2260

SDOperand Val = N->getOperand(1).getOperand(0);

2261

if (Val.getValueType() == MVT::f32) {

2262

Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);

2263

DCI.AddToWorklist(Val.Val);

2264

}

2265

Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val);

2266

DCI.AddToWorklist(Val.Val);

2267

2268

Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val,

2269

N->getOperand(2), N->getOperand(3));

2270

DCI.AddToWorklist(Val.Val);

2271

return Val;

2272

}

2273

break;

Chris Lattner

4468c22

2006-03-31 06:02:07 +0000

[diff] [blame]

2274

case PPCISD::VCMP: {

2275

// If a VCMPo node already exists with exactly the same operands as this

2276

// node, use its result instead of this node (VCMPo computes both a CR6 and

2277

// a normal output).

2278

//

2279

if (!N->getOperand(0).hasOneUse() &&

2280

!N->getOperand(1).hasOneUse() &&

2281

!N->getOperand(2).hasOneUse()) {

2282

2283

// Scan all of the users of the LHS, looking for VCMPo's that match.

2284

SDNode *VCMPoNode = 0;

2285

2286

SDNode *LHSN = N->getOperand(0).Val;

2287

for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();

2288

UI != E; ++UI)

2289

if ((*UI)->getOpcode() == PPCISD::VCMPo &&

2290

(*UI)->getOperand(1) == N->getOperand(1) &&

2291

(*UI)->getOperand(2) == N->getOperand(2) &&

2292

(*UI)->getOperand(0) == N->getOperand(0)) {

VCMPoNode = *UI;

break;

}

Chris Lattner

2006-04-18 18:28:22 +0000

[diff] [blame]

2297

// If there is no VCMPo node, or if the flag value has a single use, don't

2298

// transform this.

2299

if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))

2300

break;

2301

2302

// Look at the (necessarily single) use of the flag value. If it has a

2303

// chain, this transformation is more complex. Note that multiple things

2304

// could use the value result, which we should ignore.

2305

SDNode *FlagUser = 0;

2306

for (SDNode::use_iterator UI = VCMPoNode->use_begin();

2307

FlagUser == 0; ++UI) {

2308

assert(UI != VCMPoNode->use_end() && "Didn't find user!");

2309

SDNode *User = *UI;

2310

for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {

2311

if (User->getOperand(i) == SDOperand(VCMPoNode, 1)) {

FlagUser = User;

break;

}

}

}

// If the user is a MFCR instruction, we know this is safe. Otherwise we

2319

// give up for right now.

2320

if (FlagUser->getOpcode() == PPCISD::MFCR)

Chris Lattner

4468c22

2006-03-31 06:02:07 +0000

[diff] [blame]

2321

return SDOperand(VCMPoNode, 0);

2322

}

2323

break;

2324

}

Chris Lattner

2006-04-18 17:59:36 +0000

[diff] [blame]

2325

case ISD::BR_CC: {

2326

// If this is a branch on an altivec predicate comparison, lower this so

2327

// that we don't have to do a MFCR: instead, branch directly on CR6. This

2328

// lowering is done pre-legalize, because the legalizer lowers the predicate

2329

// compare down to code that is difficult to reassemble.

2330

ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();

2331

SDOperand LHS = N->getOperand(2), RHS = N->getOperand(3);

int CompareOpc;

bool isDot;

if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&

2336

isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&

2337

getAltivecCompareInfo(LHS, CompareOpc, isDot)) {

2338

assert(isDot && "Can't compare against a vector result!");

2339

2340

// If this is a comparison against something other than 0/1, then we know

2341

// that the condition is never/always true.

2342

unsigned Val = cast<ConstantSDNode>(RHS)->getValue();

2343

if (Val != 0 && Val != 1) {

2344

if (CC == ISD::SETEQ) // Cond never true, remove branch.

2345

return N->getOperand(0);

2346

// Always !=, turn it into an unconditional branch.

2347

return DAG.getNode(ISD::BR, MVT::Other,

2348

N->getOperand(0), N->getOperand(4));

2349

}

2350

2351

bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);

2352

2353

// Create the PPCISD altivec 'dot' comparison node.

2354

std::vector<SDOperand> Ops;

2355

std::vector<MVT::ValueType> VTs;

2356

Ops.push_back(LHS.getOperand(2)); // LHS of compare

2357

Ops.push_back(LHS.getOperand(3)); // RHS of compare

2358

Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32));

2359

VTs.push_back(LHS.getOperand(2).getValueType());

2360

VTs.push_back(MVT::Flag);

2361

SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops);

2362

2363

// Unpack the result based on how the target uses it.

2364

unsigned CompOpc;

2365

switch (cast<ConstantSDNode>(LHS.getOperand(1))->getValue()) {

2366

default: // Can't happen, don't crash on invalid number though.

2367

case 0: // Branch on the value of the EQ bit of CR6.

2368

CompOpc = BranchOnWhenPredTrue ? PPC::BEQ : PPC::BNE;

2369

break;

2370

case 1: // Branch on the inverted value of the EQ bit of CR6.

2371

CompOpc = BranchOnWhenPredTrue ? PPC::BNE : PPC::BEQ;

2372

break;

2373

case 2: // Branch on the value of the LT bit of CR6.

2374

CompOpc = BranchOnWhenPredTrue ? PPC::BLT : PPC::BGE;

2375

break;

2376

case 3: // Branch on the inverted value of the LT bit of CR6.

2377

CompOpc = BranchOnWhenPredTrue ? PPC::BGE : PPC::BLT;

break;

}

return DAG.getNode(PPCISD::COND_BRANCH, MVT::Other, N->getOperand(0),

2382

DAG.getRegister(PPC::CR6, MVT::i32),

2383

DAG.getConstant(CompOpc, MVT::i32),

2384

N->getOperand(4), CompNode.getValue(1));

2385

}

2386

break;

2387

}

Chris Lattner