blob: e333183fc958f2a3e5d04c2ae0d5ea297b36f5aa [file] [log] [blame]
Justin Holewinskibc97f442011-09-26 18:57:27 +00001//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PTXSelectionDAGInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#define DEBUG_TYPE "ptx-selectiondag-info"
15#include "PTXTargetMachine.h"
16#include "llvm/DerivedTypes.h"
17#include "llvm/CodeGen/SelectionDAG.h"
18using namespace llvm;
19
20PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM)
21 : TargetSelectionDAGInfo(TM),
22 Subtarget(&TM.getSubtarget<PTXSubtarget>()) {
23}
24
25PTXSelectionDAGInfo::~PTXSelectionDAGInfo() {
26}
27
28SDValue
29PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
30 SDValue Chain,
31 SDValue Dst, SDValue Src,
32 SDValue Size, unsigned Align,
33 bool isVolatile, bool AlwaysInline,
34 MachinePointerInfo DstPtrInfo,
35 MachinePointerInfo SrcPtrInfo) const {
36 // Do repeated 4-byte loads and stores. To be improved.
37 // This requires 4-byte alignment.
38 if ((Align & 3) != 0)
39 return SDValue();
40 // This requires the copy size to be a constant, preferably
41 // within a subtarget-specific limit.
42 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
43 if (!ConstantSize)
44 return SDValue();
45 uint64_t SizeVal = ConstantSize->getZExtValue();
46 // Always inline memcpys. In PTX, we do not have a C library that provides
47 // a memcpy function.
48 //if (!AlwaysInline)
49 // return SDValue();
50
51 unsigned BytesLeft = SizeVal & 3;
52 unsigned NumMemOps = SizeVal >> 2;
53 unsigned EmittedNumMemOps = 0;
54 EVT VT = MVT::i32;
55 unsigned VTSize = 4;
56 unsigned i = 0;
57 const unsigned MAX_LOADS_IN_LDM = 6;
58 SDValue TFOps[MAX_LOADS_IN_LDM];
59 SDValue Loads[MAX_LOADS_IN_LDM];
60 uint64_t SrcOff = 0, DstOff = 0;
61
62 // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
63 // same number of stores. The loads and stores will get combined into
64 // ldm/stm later on.
65 while (EmittedNumMemOps < NumMemOps) {
66 for (i = 0;
67 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
68 Loads[i] = DAG.getLoad(VT, dl, Chain,
69 DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
70 DAG.getConstant(SrcOff, MVT::i32)),
71 SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
72 false, 0);
73 TFOps[i] = Loads[i].getValue(1);
74 SrcOff += VTSize;
75 }
76 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
77
78 for (i = 0;
79 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
80 TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
81 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
82 DAG.getConstant(DstOff, MVT::i32)),
83 DstPtrInfo.getWithOffset(DstOff),
84 isVolatile, false, 0);
85 DstOff += VTSize;
86 }
87 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
88
89 EmittedNumMemOps += i;
90 }
91
92 if (BytesLeft == 0)
93 return Chain;
94
95 // Issue loads / stores for the trailing (1 - 3) bytes.
96 unsigned BytesLeftSave = BytesLeft;
97 i = 0;
98 while (BytesLeft) {
99 if (BytesLeft >= 2) {
100 VT = MVT::i16;
101 VTSize = 2;
102 } else {
103 VT = MVT::i8;
104 VTSize = 1;
105 }
106
107 Loads[i] = DAG.getLoad(VT, dl, Chain,
108 DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
109 DAG.getConstant(SrcOff, MVT::i32)),
110 SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
111 TFOps[i] = Loads[i].getValue(1);
112 ++i;
113 SrcOff += VTSize;
114 BytesLeft -= VTSize;
115 }
116 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
117
118 i = 0;
119 BytesLeft = BytesLeftSave;
120 while (BytesLeft) {
121 if (BytesLeft >= 2) {
122 VT = MVT::i16;
123 VTSize = 2;
124 } else {
125 VT = MVT::i8;
126 VTSize = 1;
127 }
128
129 TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
130 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
131 DAG.getConstant(DstOff, MVT::i32)),
132 DstPtrInfo.getWithOffset(DstOff), false, false, 0);
133 ++i;
134 DstOff += VTSize;
135 BytesLeft -= VTSize;
136 }
137 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
138}
139
140SDValue PTXSelectionDAGInfo::
141EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
142 SDValue Chain, SDValue Dst,
143 SDValue Src, SDValue Size,
144 unsigned Align, bool isVolatile,
145 MachinePointerInfo DstPtrInfo) const {
146 llvm_unreachable("memset lowering not implemented for PTX yet");
147}
148