Dan Gohman | 9becddd | 2010-04-16 23:04:22 +0000 | [diff] [blame] | 1 | //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file implements the ARMSelectionDAGInfo class. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
Dan Gohman | bb919df | 2010-05-11 17:31:57 +0000 | [diff] [blame] | 14 | #include "ARMTargetMachine.h" |
Renato Golin | 4cd5187 | 2011-05-22 21:41:23 +0000 | [diff] [blame] | 15 | #include "llvm/CodeGen/SelectionDAG.h" |
Chandler Carruth | 9fb823b | 2013-01-02 11:36:10 +0000 | [diff] [blame] | 16 | #include "llvm/IR/DerivedTypes.h" |
Dan Gohman | 9becddd | 2010-04-16 23:04:22 +0000 | [diff] [blame] | 17 | using namespace llvm; |
| 18 | |
Chandler Carruth | 84e68b2 | 2014-04-22 02:41:26 +0000 | [diff] [blame] | 19 | #define DEBUG_TYPE "arm-selectiondag-info" |
| 20 | |
John Brawn | 70605f7 | 2015-05-12 13:13:38 +0000 | [diff] [blame] | 21 | // Emit, if possible, a specialized version of the given Libcall. Typically this |
| 22 | // means selecting the appropriately aligned version, but we also convert memset |
| 23 | // of 0 into memclr. |
Benjamin Kramer | bdc4956 | 2016-06-12 15:39:02 +0000 | [diff] [blame] | 24 | SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall( |
| 25 | SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, |
| 26 | SDValue Size, unsigned Align, RTLIB::Libcall LC) const { |
John Brawn | 70605f7 | 2015-05-12 13:13:38 +0000 | [diff] [blame] | 27 | const ARMSubtarget &Subtarget = |
| 28 | DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); |
| 29 | const ARMTargetLowering *TLI = Subtarget.getTargetLowering(); |
| 30 | |
| 31 | // Only use a specialized AEABI function if the default version of this |
| 32 | // Libcall is an AEABI function. |
| 33 | if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0) |
| 34 | return SDValue(); |
| 35 | |
| 36 | // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be |
| 37 | // able to translate memset to memclr and use the value to index the function |
| 38 | // name array. |
| 39 | enum { |
| 40 | AEABI_MEMCPY = 0, |
| 41 | AEABI_MEMMOVE, |
| 42 | AEABI_MEMSET, |
| 43 | AEABI_MEMCLR |
| 44 | } AEABILibcall; |
| 45 | switch (LC) { |
| 46 | case RTLIB::MEMCPY: |
| 47 | AEABILibcall = AEABI_MEMCPY; |
| 48 | break; |
| 49 | case RTLIB::MEMMOVE: |
| 50 | AEABILibcall = AEABI_MEMMOVE; |
| 51 | break; |
| 52 | case RTLIB::MEMSET: |
| 53 | AEABILibcall = AEABI_MEMSET; |
| 54 | if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src)) |
| 55 | if (ConstantSrc->getZExtValue() == 0) |
| 56 | AEABILibcall = AEABI_MEMCLR; |
| 57 | break; |
| 58 | default: |
| 59 | return SDValue(); |
| 60 | } |
| 61 | |
| 62 | // Choose the most-aligned libcall variant that we can |
| 63 | enum { |
| 64 | ALIGN1 = 0, |
| 65 | ALIGN4, |
| 66 | ALIGN8 |
| 67 | } AlignVariant; |
| 68 | if ((Align & 7) == 0) |
| 69 | AlignVariant = ALIGN8; |
| 70 | else if ((Align & 3) == 0) |
| 71 | AlignVariant = ALIGN4; |
| 72 | else |
| 73 | AlignVariant = ALIGN1; |
| 74 | |
| 75 | TargetLowering::ArgListTy Args; |
| 76 | TargetLowering::ArgListEntry Entry; |
Mehdi Amini | a749f2a | 2015-07-09 02:09:52 +0000 | [diff] [blame] | 77 | Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); |
John Brawn | 70605f7 | 2015-05-12 13:13:38 +0000 | [diff] [blame] | 78 | Entry.Node = Dst; |
| 79 | Args.push_back(Entry); |
| 80 | if (AEABILibcall == AEABI_MEMCLR) { |
| 81 | Entry.Node = Size; |
| 82 | Args.push_back(Entry); |
| 83 | } else if (AEABILibcall == AEABI_MEMSET) { |
| 84 | // Adjust parameters for memset, EABI uses format (ptr, size, value), |
| 85 | // GNU library uses (ptr, value, size) |
| 86 | // See RTABI section 4.3.4 |
| 87 | Entry.Node = Size; |
| 88 | Args.push_back(Entry); |
| 89 | |
| 90 | // Extend or truncate the argument to be an i32 value for the call. |
| 91 | if (Src.getValueType().bitsGT(MVT::i32)) |
| 92 | Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); |
| 93 | else if (Src.getValueType().bitsLT(MVT::i32)) |
| 94 | Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); |
| 95 | |
| 96 | Entry.Node = Src; |
| 97 | Entry.Ty = Type::getInt32Ty(*DAG.getContext()); |
| 98 | Entry.isSExt = false; |
| 99 | Args.push_back(Entry); |
| 100 | } else { |
| 101 | Entry.Node = Src; |
| 102 | Args.push_back(Entry); |
| 103 | |
| 104 | Entry.Node = Size; |
| 105 | Args.push_back(Entry); |
| 106 | } |
| 107 | |
| 108 | char const *FunctionNames[4][3] = { |
| 109 | { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" }, |
| 110 | { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" }, |
| 111 | { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" }, |
| 112 | { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" } |
| 113 | }; |
| 114 | TargetLowering::CallLoweringInfo CLI(DAG); |
Mehdi Amini | 44ede33 | 2015-07-09 02:09:04 +0000 | [diff] [blame] | 115 | CLI.setDebugLoc(dl) |
| 116 | .setChain(Chain) |
| 117 | .setCallee( |
| 118 | TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), |
| 119 | DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], |
| 120 | TLI->getPointerTy(DAG.getDataLayout())), |
Krzysztof Parzyszek | e116d500 | 2016-06-22 12:54:25 +0000 | [diff] [blame] | 121 | std::move(Args)) |
Mehdi Amini | 44ede33 | 2015-07-09 02:09:04 +0000 | [diff] [blame] | 122 | .setDiscardResult(); |
John Brawn | 70605f7 | 2015-05-12 13:13:38 +0000 | [diff] [blame] | 123 | std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); |
| 124 | |
| 125 | return CallResult.second; |
| 126 | } |
| 127 | |
Benjamin Kramer | bdc4956 | 2016-06-12 15:39:02 +0000 | [diff] [blame] | 128 | SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy( |
| 129 | SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, |
| 130 | SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, |
| 131 | MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { |
Eric Christopher | 22b2ad2 | 2015-02-20 08:24:37 +0000 | [diff] [blame] | 132 | const ARMSubtarget &Subtarget = |
| 133 | DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); |
Dan Gohman | bb919df | 2010-05-11 17:31:57 +0000 | [diff] [blame] | 134 | // Do repeated 4-byte loads and stores. To be improved. |
| 135 | // This requires 4-byte alignment. |
| 136 | if ((Align & 3) != 0) |
| 137 | return SDValue(); |
Chris Lattner | 0ab5e2c | 2011-04-15 05:18:47 +0000 | [diff] [blame] | 138 | // This requires the copy size to be a constant, preferably |
Dan Gohman | bb919df | 2010-05-11 17:31:57 +0000 | [diff] [blame] | 139 | // within a subtarget-specific limit. |
| 140 | ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); |
| 141 | if (!ConstantSize) |
John Brawn | 70605f7 | 2015-05-12 13:13:38 +0000 | [diff] [blame] | 142 | return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, |
| 143 | RTLIB::MEMCPY); |
Dan Gohman | bb919df | 2010-05-11 17:31:57 +0000 | [diff] [blame] | 144 | uint64_t SizeVal = ConstantSize->getZExtValue(); |
Eric Christopher | 70e005a | 2014-06-12 23:39:49 +0000 | [diff] [blame] | 145 | if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) |
John Brawn | 70605f7 | 2015-05-12 13:13:38 +0000 | [diff] [blame] | 146 | return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, |
| 147 | RTLIB::MEMCPY); |
Dan Gohman | bb919df | 2010-05-11 17:31:57 +0000 | [diff] [blame] | 148 | |
| 149 | unsigned BytesLeft = SizeVal & 3; |
| 150 | unsigned NumMemOps = SizeVal >> 2; |
| 151 | unsigned EmittedNumMemOps = 0; |
| 152 | EVT VT = MVT::i32; |
| 153 | unsigned VTSize = 4; |
| 154 | unsigned i = 0; |
James Molloy | a70697e | 2014-05-16 14:24:22 +0000 | [diff] [blame] | 155 | // Emit a maximum of 4 loads in Thumb1 since we have fewer registers |
Scott Douglass | 953f908 | 2015-10-05 14:49:54 +0000 | [diff] [blame] | 156 | const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6; |
James Molloy | a70697e | 2014-05-16 14:24:22 +0000 | [diff] [blame] | 157 | SDValue TFOps[6]; |
| 158 | SDValue Loads[6]; |
Dan Gohman | bb919df | 2010-05-11 17:31:57 +0000 | [diff] [blame] | 159 | uint64_t SrcOff = 0, DstOff = 0; |
| 160 | |
Scott Douglass | 953f908 | 2015-10-05 14:49:54 +0000 | [diff] [blame] | 161 | // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to |
| 162 | // VLDM/VSTM and make this code emit it when appropriate. This would reduce |
| 163 | // pressure on the general purpose registers. However this seems harder to map |
| 164 | // onto the register allocator's view of the world. |
Dan Gohman | bb919df | 2010-05-11 17:31:57 +0000 | [diff] [blame] | 165 | |
Scott Douglass | 953f908 | 2015-10-05 14:49:54 +0000 | [diff] [blame] | 166 | // The number of MEMCPY pseudo-instructions to emit. We use up to |
| 167 | // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm |
| 168 | // later on. This is a lower bound on the number of MEMCPY operations we must |
| 169 | // emit. |
| 170 | unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM; |
Dan Gohman | bb919df | 2010-05-11 17:31:57 +0000 | [diff] [blame] | 171 | |
Sjoerd Meijer | 9bc93f6 | 2016-06-03 15:38:55 +0000 | [diff] [blame] | 172 | // Code size optimisation: do not inline memcpy if expansion results in |
| 173 | // more instructions than the libary call. |
| 174 | if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction()->optForMinSize()) { |
| 175 | return SDValue(); |
| 176 | } |
| 177 | |
Scott Douglass | 953f908 | 2015-10-05 14:49:54 +0000 | [diff] [blame] | 178 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue); |
| 179 | |
| 180 | for (unsigned I = 0; I != NumMEMCPYs; ++I) { |
| 181 | // Evenly distribute registers among MEMCPY operations to reduce register |
| 182 | // pressure. |
| 183 | unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs; |
| 184 | unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps; |
| 185 | |
| 186 | Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src, |
| 187 | DAG.getConstant(NumRegs, dl, MVT::i32)); |
| 188 | Src = Dst.getValue(1); |
| 189 | Chain = Dst.getValue(2); |
| 190 | |
| 191 | DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize); |
| 192 | SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize); |
| 193 | |
| 194 | EmittedNumMemOps = NextEmittedNumMemOps; |
Dan Gohman | bb919df | 2010-05-11 17:31:57 +0000 | [diff] [blame] | 195 | } |
| 196 | |
| 197 | if (BytesLeft == 0) |
| 198 | return Chain; |
| 199 | |
| 200 | // Issue loads / stores for the trailing (1 - 3) bytes. |
| 201 | unsigned BytesLeftSave = BytesLeft; |
| 202 | i = 0; |
| 203 | while (BytesLeft) { |
| 204 | if (BytesLeft >= 2) { |
| 205 | VT = MVT::i16; |
| 206 | VTSize = 2; |
| 207 | } else { |
| 208 | VT = MVT::i8; |
| 209 | VTSize = 1; |
| 210 | } |
| 211 | |
| 212 | Loads[i] = DAG.getLoad(VT, dl, Chain, |
| 213 | DAG.getNode(ISD::ADD, dl, MVT::i32, Src, |
Sergey Dmitrouk | 842a51b | 2015-04-28 14:05:47 +0000 | [diff] [blame] | 214 | DAG.getConstant(SrcOff, dl, MVT::i32)), |
Justin Lebar | 9c37581 | 2016-07-15 18:27:10 +0000 | [diff] [blame] | 215 | SrcPtrInfo.getWithOffset(SrcOff)); |
Dan Gohman | bb919df | 2010-05-11 17:31:57 +0000 | [diff] [blame] | 216 | TFOps[i] = Loads[i].getValue(1); |
| 217 | ++i; |
| 218 | SrcOff += VTSize; |
| 219 | BytesLeft -= VTSize; |
| 220 | } |
Craig Topper | 48d114b | 2014-04-26 18:35:24 +0000 | [diff] [blame] | 221 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
Craig Topper | 2d2aa0c | 2014-04-30 07:17:30 +0000 | [diff] [blame] | 222 | makeArrayRef(TFOps, i)); |
Dan Gohman | bb919df | 2010-05-11 17:31:57 +0000 | [diff] [blame] | 223 | |
| 224 | i = 0; |
| 225 | BytesLeft = BytesLeftSave; |
| 226 | while (BytesLeft) { |
| 227 | if (BytesLeft >= 2) { |
| 228 | VT = MVT::i16; |
| 229 | VTSize = 2; |
| 230 | } else { |
| 231 | VT = MVT::i8; |
| 232 | VTSize = 1; |
| 233 | } |
| 234 | |
| 235 | TFOps[i] = DAG.getStore(Chain, dl, Loads[i], |
| 236 | DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, |
Sergey Dmitrouk | 842a51b | 2015-04-28 14:05:47 +0000 | [diff] [blame] | 237 | DAG.getConstant(DstOff, dl, MVT::i32)), |
Justin Lebar | 9c37581 | 2016-07-15 18:27:10 +0000 | [diff] [blame] | 238 | DstPtrInfo.getWithOffset(DstOff)); |
Dan Gohman | bb919df | 2010-05-11 17:31:57 +0000 | [diff] [blame] | 239 | ++i; |
| 240 | DstOff += VTSize; |
| 241 | BytesLeft -= VTSize; |
| 242 | } |
Craig Topper | 48d114b | 2014-04-26 18:35:24 +0000 | [diff] [blame] | 243 | return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
Craig Topper | 2d2aa0c | 2014-04-30 07:17:30 +0000 | [diff] [blame] | 244 | makeArrayRef(TFOps, i)); |
Dan Gohman | bb919df | 2010-05-11 17:31:57 +0000 | [diff] [blame] | 245 | } |
Renato Golin | 4cd5187 | 2011-05-22 21:41:23 +0000 | [diff] [blame] | 246 | |
Benjamin Kramer | bdc4956 | 2016-06-12 15:39:02 +0000 | [diff] [blame] | 247 | SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove( |
| 248 | SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, |
| 249 | SDValue Size, unsigned Align, bool isVolatile, |
| 250 | MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { |
John Brawn | 70605f7 | 2015-05-12 13:13:38 +0000 | [diff] [blame] | 251 | return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, |
| 252 | RTLIB::MEMMOVE); |
| 253 | } |
| 254 | |
Benjamin Kramer | bdc4956 | 2016-06-12 15:39:02 +0000 | [diff] [blame] | 255 | SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset( |
| 256 | SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, |
| 257 | SDValue Size, unsigned Align, bool isVolatile, |
| 258 | MachinePointerInfo DstPtrInfo) const { |
John Brawn | 70605f7 | 2015-05-12 13:13:38 +0000 | [diff] [blame] | 259 | return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, |
| 260 | RTLIB::MEMSET); |
Renato Golin | 4cd5187 | 2011-05-22 21:41:23 +0000 | [diff] [blame] | 261 | } |