blob: 3b99762f7157a353ed375f44a8fe733c0d062b66 [file] [log] [blame]
Dan Gohman9becddd2010-04-16 23:04:22 +00001//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the ARMSelectionDAGInfo class.
11//
12//===----------------------------------------------------------------------===//
13
Dan Gohmanbb919df2010-05-11 17:31:57 +000014#include "ARMTargetMachine.h"
Renato Golin4cd51872011-05-22 21:41:23 +000015#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000016#include "llvm/IR/DerivedTypes.h"
Dan Gohman9becddd2010-04-16 23:04:22 +000017using namespace llvm;
18
Chandler Carruth84e68b22014-04-22 02:41:26 +000019#define DEBUG_TYPE "arm-selectiondag-info"
20
John Brawn70605f72015-05-12 13:13:38 +000021// Emit, if possible, a specialized version of the given Libcall. Typically this
22// means selecting the appropriately aligned version, but we also convert memset
23// of 0 into memclr.
Benjamin Kramerbdc49562016-06-12 15:39:02 +000024SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
25 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
26 SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
John Brawn70605f72015-05-12 13:13:38 +000027 const ARMSubtarget &Subtarget =
28 DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
29 const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
30
31 // Only use a specialized AEABI function if the default version of this
32 // Libcall is an AEABI function.
33 if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
34 return SDValue();
35
36 // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
37 // able to translate memset to memclr and use the value to index the function
38 // name array.
39 enum {
40 AEABI_MEMCPY = 0,
41 AEABI_MEMMOVE,
42 AEABI_MEMSET,
43 AEABI_MEMCLR
44 } AEABILibcall;
45 switch (LC) {
46 case RTLIB::MEMCPY:
47 AEABILibcall = AEABI_MEMCPY;
48 break;
49 case RTLIB::MEMMOVE:
50 AEABILibcall = AEABI_MEMMOVE;
51 break;
52 case RTLIB::MEMSET:
53 AEABILibcall = AEABI_MEMSET;
54 if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
55 if (ConstantSrc->getZExtValue() == 0)
56 AEABILibcall = AEABI_MEMCLR;
57 break;
58 default:
59 return SDValue();
60 }
61
62 // Choose the most-aligned libcall variant that we can
63 enum {
64 ALIGN1 = 0,
65 ALIGN4,
66 ALIGN8
67 } AlignVariant;
68 if ((Align & 7) == 0)
69 AlignVariant = ALIGN8;
70 else if ((Align & 3) == 0)
71 AlignVariant = ALIGN4;
72 else
73 AlignVariant = ALIGN1;
74
75 TargetLowering::ArgListTy Args;
76 TargetLowering::ArgListEntry Entry;
Mehdi Aminia749f2a2015-07-09 02:09:52 +000077 Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
John Brawn70605f72015-05-12 13:13:38 +000078 Entry.Node = Dst;
79 Args.push_back(Entry);
80 if (AEABILibcall == AEABI_MEMCLR) {
81 Entry.Node = Size;
82 Args.push_back(Entry);
83 } else if (AEABILibcall == AEABI_MEMSET) {
84 // Adjust parameters for memset, EABI uses format (ptr, size, value),
85 // GNU library uses (ptr, value, size)
86 // See RTABI section 4.3.4
87 Entry.Node = Size;
88 Args.push_back(Entry);
89
90 // Extend or truncate the argument to be an i32 value for the call.
91 if (Src.getValueType().bitsGT(MVT::i32))
92 Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
93 else if (Src.getValueType().bitsLT(MVT::i32))
94 Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
95
96 Entry.Node = Src;
97 Entry.Ty = Type::getInt32Ty(*DAG.getContext());
98 Entry.isSExt = false;
99 Args.push_back(Entry);
100 } else {
101 Entry.Node = Src;
102 Args.push_back(Entry);
103
104 Entry.Node = Size;
105 Args.push_back(Entry);
106 }
107
108 char const *FunctionNames[4][3] = {
109 { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" },
110 { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
111 { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" },
112 { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }
113 };
114 TargetLowering::CallLoweringInfo CLI(DAG);
Mehdi Amini44ede332015-07-09 02:09:04 +0000115 CLI.setDebugLoc(dl)
116 .setChain(Chain)
117 .setCallee(
118 TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
119 DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
120 TLI->getPointerTy(DAG.getDataLayout())),
Krzysztof Parzyszeke116d5002016-06-22 12:54:25 +0000121 std::move(Args))
Mehdi Amini44ede332015-07-09 02:09:04 +0000122 .setDiscardResult();
John Brawn70605f72015-05-12 13:13:38 +0000123 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
124
125 return CallResult.second;
126}
127
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000128SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
129 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
130 SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
131 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
Eric Christopher22b2ad22015-02-20 08:24:37 +0000132 const ARMSubtarget &Subtarget =
133 DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
Dan Gohmanbb919df2010-05-11 17:31:57 +0000134 // Do repeated 4-byte loads and stores. To be improved.
135 // This requires 4-byte alignment.
136 if ((Align & 3) != 0)
137 return SDValue();
Chris Lattner0ab5e2c2011-04-15 05:18:47 +0000138 // This requires the copy size to be a constant, preferably
Dan Gohmanbb919df2010-05-11 17:31:57 +0000139 // within a subtarget-specific limit.
140 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
141 if (!ConstantSize)
John Brawn70605f72015-05-12 13:13:38 +0000142 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
143 RTLIB::MEMCPY);
Dan Gohmanbb919df2010-05-11 17:31:57 +0000144 uint64_t SizeVal = ConstantSize->getZExtValue();
Eric Christopher70e005a2014-06-12 23:39:49 +0000145 if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
John Brawn70605f72015-05-12 13:13:38 +0000146 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
147 RTLIB::MEMCPY);
Dan Gohmanbb919df2010-05-11 17:31:57 +0000148
149 unsigned BytesLeft = SizeVal & 3;
150 unsigned NumMemOps = SizeVal >> 2;
151 unsigned EmittedNumMemOps = 0;
152 EVT VT = MVT::i32;
153 unsigned VTSize = 4;
154 unsigned i = 0;
James Molloya70697e2014-05-16 14:24:22 +0000155 // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
Scott Douglass953f9082015-10-05 14:49:54 +0000156 const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
James Molloya70697e2014-05-16 14:24:22 +0000157 SDValue TFOps[6];
158 SDValue Loads[6];
Dan Gohmanbb919df2010-05-11 17:31:57 +0000159 uint64_t SrcOff = 0, DstOff = 0;
160
Scott Douglass953f9082015-10-05 14:49:54 +0000161 // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
162 // VLDM/VSTM and make this code emit it when appropriate. This would reduce
163 // pressure on the general purpose registers. However this seems harder to map
164 // onto the register allocator's view of the world.
Dan Gohmanbb919df2010-05-11 17:31:57 +0000165
Scott Douglass953f9082015-10-05 14:49:54 +0000166 // The number of MEMCPY pseudo-instructions to emit. We use up to
167 // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
168 // later on. This is a lower bound on the number of MEMCPY operations we must
169 // emit.
170 unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
Dan Gohmanbb919df2010-05-11 17:31:57 +0000171
Sjoerd Meijer9bc93f62016-06-03 15:38:55 +0000172 // Code size optimisation: do not inline memcpy if expansion results in
173 // more instructions than the libary call.
174 if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction()->optForMinSize()) {
175 return SDValue();
176 }
177
Scott Douglass953f9082015-10-05 14:49:54 +0000178 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
179
180 for (unsigned I = 0; I != NumMEMCPYs; ++I) {
181 // Evenly distribute registers among MEMCPY operations to reduce register
182 // pressure.
183 unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
184 unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
185
186 Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
187 DAG.getConstant(NumRegs, dl, MVT::i32));
188 Src = Dst.getValue(1);
189 Chain = Dst.getValue(2);
190
191 DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
192 SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
193
194 EmittedNumMemOps = NextEmittedNumMemOps;
Dan Gohmanbb919df2010-05-11 17:31:57 +0000195 }
196
197 if (BytesLeft == 0)
198 return Chain;
199
200 // Issue loads / stores for the trailing (1 - 3) bytes.
201 unsigned BytesLeftSave = BytesLeft;
202 i = 0;
203 while (BytesLeft) {
204 if (BytesLeft >= 2) {
205 VT = MVT::i16;
206 VTSize = 2;
207 } else {
208 VT = MVT::i8;
209 VTSize = 1;
210 }
211
212 Loads[i] = DAG.getLoad(VT, dl, Chain,
213 DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000214 DAG.getConstant(SrcOff, dl, MVT::i32)),
Justin Lebar9c375812016-07-15 18:27:10 +0000215 SrcPtrInfo.getWithOffset(SrcOff));
Dan Gohmanbb919df2010-05-11 17:31:57 +0000216 TFOps[i] = Loads[i].getValue(1);
217 ++i;
218 SrcOff += VTSize;
219 BytesLeft -= VTSize;
220 }
Craig Topper48d114b2014-04-26 18:35:24 +0000221 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Craig Topper2d2aa0c2014-04-30 07:17:30 +0000222 makeArrayRef(TFOps, i));
Dan Gohmanbb919df2010-05-11 17:31:57 +0000223
224 i = 0;
225 BytesLeft = BytesLeftSave;
226 while (BytesLeft) {
227 if (BytesLeft >= 2) {
228 VT = MVT::i16;
229 VTSize = 2;
230 } else {
231 VT = MVT::i8;
232 VTSize = 1;
233 }
234
235 TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
236 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000237 DAG.getConstant(DstOff, dl, MVT::i32)),
Justin Lebar9c375812016-07-15 18:27:10 +0000238 DstPtrInfo.getWithOffset(DstOff));
Dan Gohmanbb919df2010-05-11 17:31:57 +0000239 ++i;
240 DstOff += VTSize;
241 BytesLeft -= VTSize;
242 }
Craig Topper48d114b2014-04-26 18:35:24 +0000243 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Craig Topper2d2aa0c2014-04-30 07:17:30 +0000244 makeArrayRef(TFOps, i));
Dan Gohmanbb919df2010-05-11 17:31:57 +0000245}
Renato Golin4cd51872011-05-22 21:41:23 +0000246
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000247SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
248 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
249 SDValue Size, unsigned Align, bool isVolatile,
250 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
John Brawn70605f72015-05-12 13:13:38 +0000251 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
252 RTLIB::MEMMOVE);
253}
254
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000255SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
256 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
257 SDValue Size, unsigned Align, bool isVolatile,
258 MachinePointerInfo DstPtrInfo) const {
John Brawn70605f72015-05-12 13:13:38 +0000259 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
260 RTLIB::MEMSET);
Renato Golin4cd51872011-05-22 21:41:23 +0000261}