Richard Sandiford | d131ff8 | 2013-07-08 09:35:23 +0000 | [diff] [blame] | 1 | //===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file implements the SystemZSelectionDAGInfo class. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
Richard Sandiford | d131ff8 | 2013-07-08 09:35:23 +0000 | [diff] [blame] | 14 | #include "SystemZTargetMachine.h" |
| 15 | #include "llvm/CodeGen/SelectionDAG.h" |
| 16 | |
| 17 | using namespace llvm; |
| 18 | |
Chandler Carruth | 84e68b2 | 2014-04-22 02:41:26 +0000 | [diff] [blame] | 19 | #define DEBUG_TYPE "systemz-selectiondag-info" |
| 20 | |
Richard Sandiford | d131ff8 | 2013-07-08 09:35:23 +0000 | [diff] [blame] | 21 | SystemZSelectionDAGInfo:: |
| 22 | SystemZSelectionDAGInfo(const SystemZTargetMachine &TM) |
| 23 | : TargetSelectionDAGInfo(TM) { |
| 24 | } |
| 25 | |
| 26 | SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { |
| 27 | } |
| 28 | |
Richard Sandiford | 4943bc3 | 2013-09-06 10:25:07 +0000 | [diff] [blame] | 29 | // Decide whether it is best to use a loop or straight-line code for |
| 30 | // a block operation of Size bytes with source address Src and destination |
| 31 | // address Dest. Sequence is the opcode to use for straight-line code |
| 32 | // (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP). |
| 33 | // Return the chain for the completed operation. |
| 34 | static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence, |
| 35 | unsigned Loop, SDValue Chain, SDValue Dst, |
| 36 | SDValue Src, uint64_t Size) { |
Richard Sandiford | 5e318f0 | 2013-08-27 09:54:29 +0000 | [diff] [blame] | 37 | EVT PtrVT = Src.getValueType(); |
| 38 | // The heuristic we use is to prefer loops for anything that would |
| 39 | // require 7 or more MVCs. With these kinds of sizes there isn't |
| 40 | // much to choose between straight-line code and looping code, |
| 41 | // since the time will be dominated by the MVCs themselves. |
| 42 | // However, the loop has 4 or 5 instructions (depending on whether |
| 43 | // the base addresses can be proved equal), so there doesn't seem |
| 44 | // much point using a loop for 5 * 256 bytes or fewer. Anything in |
| 45 | // the range (5 * 256, 6 * 256) will need another instruction after |
| 46 | // the loop, so it doesn't seem worth using a loop then either. |
| 47 | // The next value up, 6 * 256, can be implemented in the same |
| 48 | // number of straight-line MVCs as 6 * 256 - 1. |
| 49 | if (Size > 6 * 256) |
Richard Sandiford | 4943bc3 | 2013-09-06 10:25:07 +0000 | [diff] [blame] | 50 | return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src, |
Richard Sandiford | 5e318f0 | 2013-08-27 09:54:29 +0000 | [diff] [blame] | 51 | DAG.getConstant(Size, PtrVT), |
| 52 | DAG.getConstant(Size / 256, PtrVT)); |
Richard Sandiford | 4943bc3 | 2013-09-06 10:25:07 +0000 | [diff] [blame] | 53 | return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src, |
Richard Sandiford | 5e318f0 | 2013-08-27 09:54:29 +0000 | [diff] [blame] | 54 | DAG.getConstant(Size, PtrVT)); |
| 55 | } |
| 56 | |
Richard Sandiford | d131ff8 | 2013-07-08 09:35:23 +0000 | [diff] [blame] | 57 | SDValue SystemZSelectionDAGInfo:: |
| 58 | EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, |
| 59 | SDValue Dst, SDValue Src, SDValue Size, unsigned Align, |
| 60 | bool IsVolatile, bool AlwaysInline, |
| 61 | MachinePointerInfo DstPtrInfo, |
| 62 | MachinePointerInfo SrcPtrInfo) const { |
| 63 | if (IsVolatile) |
| 64 | return SDValue(); |
| 65 | |
Richard Sandiford | 21f5d68 | 2014-03-06 11:22:58 +0000 | [diff] [blame] | 66 | if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) |
Richard Sandiford | 4943bc3 | 2013-09-06 10:25:07 +0000 | [diff] [blame] | 67 | return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, |
| 68 | Chain, Dst, Src, CSize->getZExtValue()); |
Richard Sandiford | d131ff8 | 2013-07-08 09:35:23 +0000 | [diff] [blame] | 69 | return SDValue(); |
| 70 | } |
Richard Sandiford | 47660c1 | 2013-07-09 09:32:42 +0000 | [diff] [blame] | 71 | |
| 72 | // Handle a memset of 1, 2, 4 or 8 bytes with the operands given by |
| 73 | // Chain, Dst, ByteVal and Size. These cases are expected to use |
| 74 | // MVI, MVHHI, MVHI and MVGHI respectively. |
| 75 | static SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain, |
| 76 | SDValue Dst, uint64_t ByteVal, uint64_t Size, |
| 77 | unsigned Align, |
| 78 | MachinePointerInfo DstPtrInfo) { |
| 79 | uint64_t StoreVal = ByteVal; |
| 80 | for (unsigned I = 1; I < Size; ++I) |
| 81 | StoreVal |= ByteVal << (I * 8); |
| 82 | return DAG.getStore(Chain, DL, |
| 83 | DAG.getConstant(StoreVal, MVT::getIntegerVT(Size * 8)), |
| 84 | Dst, DstPtrInfo, false, false, Align); |
| 85 | } |
| 86 | |
| 87 | SDValue SystemZSelectionDAGInfo:: |
| 88 | EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, |
| 89 | SDValue Dst, SDValue Byte, SDValue Size, |
| 90 | unsigned Align, bool IsVolatile, |
| 91 | MachinePointerInfo DstPtrInfo) const { |
Richard Sandiford | 5e318f0 | 2013-08-27 09:54:29 +0000 | [diff] [blame] | 92 | EVT PtrVT = Dst.getValueType(); |
Richard Sandiford | 47660c1 | 2013-07-09 09:32:42 +0000 | [diff] [blame] | 93 | |
| 94 | if (IsVolatile) |
| 95 | return SDValue(); |
| 96 | |
Richard Sandiford | 21f5d68 | 2014-03-06 11:22:58 +0000 | [diff] [blame] | 97 | if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) { |
Richard Sandiford | 47660c1 | 2013-07-09 09:32:42 +0000 | [diff] [blame] | 98 | uint64_t Bytes = CSize->getZExtValue(); |
| 99 | if (Bytes == 0) |
| 100 | return SDValue(); |
Richard Sandiford | 21f5d68 | 2014-03-06 11:22:58 +0000 | [diff] [blame] | 101 | if (auto *CByte = dyn_cast<ConstantSDNode>(Byte)) { |
Richard Sandiford | 47660c1 | 2013-07-09 09:32:42 +0000 | [diff] [blame] | 102 | // Handle cases that can be done using at most two of |
| 103 | // MVI, MVHI, MVHHI and MVGHI. The latter two can only be |
| 104 | // used if ByteVal is all zeros or all ones; in other casees, |
| 105 | // we can move at most 2 halfwords. |
| 106 | uint64_t ByteVal = CByte->getZExtValue(); |
| 107 | if (ByteVal == 0 || ByteVal == 255 ? |
| 108 | Bytes <= 16 && CountPopulation_64(Bytes) <= 2 : |
| 109 | Bytes <= 4) { |
| 110 | unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes); |
| 111 | unsigned Size2 = Bytes - Size1; |
| 112 | SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1, |
| 113 | Align, DstPtrInfo); |
| 114 | if (Size2 == 0) |
| 115 | return Chain1; |
Richard Sandiford | 5e318f0 | 2013-08-27 09:54:29 +0000 | [diff] [blame] | 116 | Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, |
| 117 | DAG.getConstant(Size1, PtrVT)); |
Richard Sandiford | 47660c1 | 2013-07-09 09:32:42 +0000 | [diff] [blame] | 118 | DstPtrInfo = DstPtrInfo.getWithOffset(Size1); |
| 119 | SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2, |
| 120 | std::min(Align, Size1), DstPtrInfo); |
| 121 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); |
| 122 | } |
| 123 | } else { |
| 124 | // Handle one and two bytes using STC. |
| 125 | if (Bytes <= 2) { |
| 126 | SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, |
| 127 | false, false, Align); |
| 128 | if (Bytes == 1) |
| 129 | return Chain1; |
Richard Sandiford | 5e318f0 | 2013-08-27 09:54:29 +0000 | [diff] [blame] | 130 | SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, |
| 131 | DAG.getConstant(1, PtrVT)); |
Richard Sandiford | 47660c1 | 2013-07-09 09:32:42 +0000 | [diff] [blame] | 132 | SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2, |
| 133 | DstPtrInfo.getWithOffset(1), |
| 134 | false, false, 1); |
| 135 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); |
| 136 | } |
| 137 | } |
| 138 | assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already"); |
Richard Sandiford | 4943bc3 | 2013-09-06 10:25:07 +0000 | [diff] [blame] | 139 | |
| 140 | // Handle the special case of a memset of 0, which can use XC. |
Richard Sandiford | 21f5d68 | 2014-03-06 11:22:58 +0000 | [diff] [blame] | 141 | auto *CByte = dyn_cast<ConstantSDNode>(Byte); |
Richard Sandiford | 4943bc3 | 2013-09-06 10:25:07 +0000 | [diff] [blame] | 142 | if (CByte && CByte->getZExtValue() == 0) |
| 143 | return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP, |
| 144 | Chain, Dst, Dst, Bytes); |
| 145 | |
Richard Sandiford | 5e318f0 | 2013-08-27 09:54:29 +0000 | [diff] [blame] | 146 | // Copy the byte to the first location and then use MVC to copy |
| 147 | // it to the rest. |
| 148 | Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, |
| 149 | false, false, Align); |
| 150 | SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, |
| 151 | DAG.getConstant(1, PtrVT)); |
Richard Sandiford | 4943bc3 | 2013-09-06 10:25:07 +0000 | [diff] [blame] | 152 | return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, |
| 153 | Chain, DstPlus1, Dst, Bytes - 1); |
Richard Sandiford | 47660c1 | 2013-07-09 09:32:42 +0000 | [diff] [blame] | 154 | } |
| 155 | return SDValue(); |
| 156 | } |
Richard Sandiford | 564681c | 2013-08-12 10:28:10 +0000 | [diff] [blame] | 157 | |
Richard Sandiford | be133a8 | 2013-08-28 09:01:51 +0000 | [diff] [blame] | 158 | // Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size), |
| 159 | // deciding whether to use a loop or straight-line code. |
| 160 | static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain, |
| 161 | SDValue Src1, SDValue Src2, uint64_t Size) { |
| 162 | SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); |
| 163 | EVT PtrVT = Src1.getValueType(); |
| 164 | // A two-CLC sequence is a clear win over a loop, not least because it |
| 165 | // needs only one branch. A three-CLC sequence needs the same number |
| 166 | // of branches as a loop (i.e. 2), but is shorter. That brings us to |
| 167 | // lengths greater than 768 bytes. It seems relatively likely that |
| 168 | // a difference will be found within the first 768 bytes, so we just |
| 169 | // optimize for the smallest number of branch instructions, in order |
| 170 | // to avoid polluting the prediction buffer too much. A loop only ever |
| 171 | // needs 2 branches, whereas a straight-line sequence would need 3 or more. |
| 172 | if (Size > 3 * 256) |
| 173 | return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2, |
| 174 | DAG.getConstant(Size, PtrVT), |
| 175 | DAG.getConstant(Size / 256, PtrVT)); |
| 176 | return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2, |
| 177 | DAG.getConstant(Size, PtrVT)); |
| 178 | } |
| 179 | |
Richard Sandiford | ca23271 | 2013-08-16 11:21:54 +0000 | [diff] [blame] | 180 | // Convert the current CC value into an integer that is 0 if CC == 0, |
| 181 | // less than zero if CC == 1 and greater than zero if CC >= 2. |
| 182 | // The sequence starts with IPM, which puts CC into bits 29 and 28 |
| 183 | // of an integer and clears bits 30 and 31. |
| 184 | static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) { |
| 185 | SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); |
| 186 | SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, |
Richard Sandiford | f722a8e30 | 2013-10-16 11:10:55 +0000 | [diff] [blame] | 187 | DAG.getConstant(SystemZ::IPM_CC, MVT::i32)); |
Richard Sandiford | ca23271 | 2013-08-16 11:21:54 +0000 | [diff] [blame] | 188 | SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL, |
| 189 | DAG.getConstant(31, MVT::i32)); |
| 190 | return ROTL; |
| 191 | } |
| 192 | |
Richard Sandiford | 564681c | 2013-08-12 10:28:10 +0000 | [diff] [blame] | 193 | std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: |
| 194 | EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, |
| 195 | SDValue Src1, SDValue Src2, SDValue Size, |
| 196 | MachinePointerInfo Op1PtrInfo, |
| 197 | MachinePointerInfo Op2PtrInfo) const { |
Richard Sandiford | 21f5d68 | 2014-03-06 11:22:58 +0000 | [diff] [blame] | 198 | if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) { |
Richard Sandiford | 564681c | 2013-08-12 10:28:10 +0000 | [diff] [blame] | 199 | uint64_t Bytes = CSize->getZExtValue(); |
Richard Sandiford | be133a8 | 2013-08-28 09:01:51 +0000 | [diff] [blame] | 200 | assert(Bytes > 0 && "Caller should have handled 0-size case"); |
| 201 | Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes); |
| 202 | SDValue Glue = Chain.getValue(1); |
| 203 | return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); |
Richard Sandiford | 564681c | 2013-08-12 10:28:10 +0000 | [diff] [blame] | 204 | } |
| 205 | return std::make_pair(SDValue(), SDValue()); |
| 206 | } |
Richard Sandiford | ca23271 | 2013-08-16 11:21:54 +0000 | [diff] [blame] | 207 | |
| 208 | std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: |
Richard Sandiford | 6f6d551 | 2013-08-20 09:38:48 +0000 | [diff] [blame] | 209 | EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, |
| 210 | SDValue Src, SDValue Char, SDValue Length, |
| 211 | MachinePointerInfo SrcPtrInfo) const { |
| 212 | // Use SRST to find the character. End is its address on success. |
| 213 | EVT PtrVT = Src.getValueType(); |
| 214 | SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); |
| 215 | Length = DAG.getZExtOrTrunc(Length, DL, PtrVT); |
| 216 | Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32); |
| 217 | Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char, |
| 218 | DAG.getConstant(255, MVT::i32)); |
| 219 | SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length); |
| 220 | SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, |
| 221 | Limit, Src, Char); |
| 222 | Chain = End.getValue(1); |
| 223 | SDValue Glue = End.getValue(2); |
| 224 | |
| 225 | // Now select between End and null, depending on whether the character |
| 226 | // was found. |
| 227 | SmallVector<SDValue, 5> Ops; |
| 228 | Ops.push_back(End); |
| 229 | Ops.push_back(DAG.getConstant(0, PtrVT)); |
| 230 | Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST, MVT::i32)); |
| 231 | Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, MVT::i32)); |
| 232 | Ops.push_back(Glue); |
| 233 | VTs = DAG.getVTList(PtrVT, MVT::Glue); |
Craig Topper | 48d114b | 2014-04-26 18:35:24 +0000 | [diff] [blame^] | 234 | End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); |
Richard Sandiford | 6f6d551 | 2013-08-20 09:38:48 +0000 | [diff] [blame] | 235 | return std::make_pair(End, Chain); |
| 236 | } |
| 237 | |
| 238 | std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: |
Richard Sandiford | bb83a50 | 2013-08-16 11:29:37 +0000 | [diff] [blame] | 239 | EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, |
| 240 | SDValue Dest, SDValue Src, |
| 241 | MachinePointerInfo DestPtrInfo, |
| 242 | MachinePointerInfo SrcPtrInfo, bool isStpcpy) const { |
| 243 | SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other); |
| 244 | SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src, |
| 245 | DAG.getConstant(0, MVT::i32)); |
| 246 | return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1)); |
| 247 | } |
| 248 | |
| 249 | std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: |
Richard Sandiford | ca23271 | 2013-08-16 11:21:54 +0000 | [diff] [blame] | 250 | EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, |
| 251 | SDValue Src1, SDValue Src2, |
| 252 | MachinePointerInfo Op1PtrInfo, |
| 253 | MachinePointerInfo Op2PtrInfo) const { |
| 254 | SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue); |
| 255 | SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2, |
| 256 | DAG.getConstant(0, MVT::i32)); |
| 257 | Chain = Unused.getValue(1); |
| 258 | SDValue Glue = Chain.getValue(2); |
| 259 | return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); |
| 260 | } |
Richard Sandiford | 0dec06a | 2013-08-16 11:41:43 +0000 | [diff] [blame] | 261 | |
| 262 | // Search from Src for a null character, stopping once Src reaches Limit. |
| 263 | // Return a pair of values, the first being the number of nonnull characters |
| 264 | // and the second being the out chain. |
| 265 | // |
| 266 | // This can be used for strlen by setting Limit to 0. |
| 267 | static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL, |
| 268 | SDValue Chain, SDValue Src, |
| 269 | SDValue Limit) { |
| 270 | EVT PtrVT = Src.getValueType(); |
| 271 | SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); |
| 272 | SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, |
| 273 | Limit, Src, DAG.getConstant(0, MVT::i32)); |
| 274 | Chain = End.getValue(1); |
| 275 | SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src); |
| 276 | return std::make_pair(Len, Chain); |
| 277 | } |
| 278 | |
| 279 | std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: |
| 280 | EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, |
| 281 | SDValue Src, MachinePointerInfo SrcPtrInfo) const { |
| 282 | EVT PtrVT = Src.getValueType(); |
| 283 | return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, PtrVT)); |
| 284 | } |
| 285 | |
| 286 | std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: |
| 287 | EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, |
| 288 | SDValue Src, SDValue MaxLength, |
| 289 | MachinePointerInfo SrcPtrInfo) const { |
| 290 | EVT PtrVT = Src.getValueType(); |
| 291 | MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT); |
| 292 | SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength); |
| 293 | return getBoundedStrlen(DAG, DL, Chain, Src, Limit); |
| 294 | } |