Marcin Koscielnicki | cf7cc72 | 2016-07-10 14:41:22 +0000 | [diff] [blame] | 1 | //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This pass looks for instructions that can be replaced by a Test Data Class |
| 11 | // instruction, and replaces them when profitable. |
| 12 | // |
| 13 | // Roughly, the following rules are recognized: |
| 14 | // |
| 15 | // 1: fcmp pred X, 0 -> tdc X, mask |
| 16 | // 2: fcmp pred X, +-inf -> tdc X, mask |
| 17 | // 3: fcmp pred X, +-minnorm -> tdc X, mask |
| 18 | // 4: tdc (fabs X), mask -> tdc X, newmask |
| 19 | // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit] |
| 20 | // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask |
| 21 | // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask |
| 22 | // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2) |
| 23 | // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2) |
| 24 | // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2) |
| 25 | // |
| 26 | // The pass works in 4 steps: |
| 27 | // |
| 28 | // 1. All fcmp and icmp instructions in a function are checked for a match |
| 29 | // with rules 1-3 and 5-7. Their TDC equivalents are stored in |
| 30 | // the ConvertedInsts mapping. If the operand of a fcmp instruction is |
| 31 | // a fabs, it's also folded according to rule 4. |
| 32 | // 2. All and/or/xor i1 instructions whose both operands have been already |
| 33 | // mapped are mapped according to rules 8-10. LogicOpsWorklist is used |
| 34 | // as a queue of instructions to check. |
| 35 | // 3. All mapped instructions that are considered worthy of conversion (ie. |
| 36 | // replacing them will actually simplify the final code) are replaced |
| 37 | // with a call to the s390.tdc intrinsic. |
| 38 | // 4. All intermediate results of replaced instructions are removed if unused. |
| 39 | // |
| 40 | // Instructions that match rules 1-3 are considered unworthy of conversion |
| 41 | // on their own (since a comparison instruction is superior), but are mapped |
| 42 | // in the hopes of folding the result using rules 4 and 8-10 (likely removing |
| 43 | // the original comparison in the process). |
| 44 | // |
| 45 | //===----------------------------------------------------------------------===// |
| 46 | |
| 47 | #include "SystemZ.h" |
| 48 | #include "llvm/ADT/MapVector.h" |
| 49 | #include "llvm/IR/Constants.h" |
| 50 | #include "llvm/IR/Instructions.h" |
| 51 | #include "llvm/IR/InstIterator.h" |
| 52 | #include "llvm/IR/IntrinsicInst.h" |
| 53 | #include "llvm/IR/IRBuilder.h" |
| 54 | #include "llvm/IR/LegacyPassManager.h" |
| 55 | #include "llvm/IR/Module.h" |
| 56 | #include <deque> |
| 57 | #include <set> |
| 58 | |
| 59 | using namespace llvm; |
| 60 | |
| 61 | namespace llvm { |
| 62 | void initializeSystemZTDCPassPass(PassRegistry&); |
| 63 | } |
| 64 | |
| 65 | namespace { |
| 66 | |
| 67 | class SystemZTDCPass : public FunctionPass { |
| 68 | public: |
| 69 | static char ID; |
| 70 | SystemZTDCPass() : FunctionPass(ID) { |
| 71 | initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry()); |
| 72 | } |
| 73 | |
| 74 | bool runOnFunction(Function &F) override; |
| 75 | private: |
| 76 | // Maps seen instructions that can be mapped to a TDC, values are |
| 77 | // (TDC operand, TDC mask, worthy flag) triples. |
| 78 | MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts; |
| 79 | // The queue of and/or/xor i1 instructions to be potentially folded. |
| 80 | std::vector<BinaryOperator *> LogicOpsWorklist; |
| 81 | // Instructions matched while folding, to be removed at the end if unused. |
| 82 | std::set<Instruction *> PossibleJunk; |
| 83 | |
| 84 | // Tries to convert a fcmp instruction. |
| 85 | void convertFCmp(CmpInst &I); |
| 86 | |
| 87 | // Tries to convert an icmp instruction. |
| 88 | void convertICmp(CmpInst &I); |
| 89 | |
| 90 | // Tries to convert an i1 and/or/xor instruction, whose both operands |
| 91 | // have been already converted. |
| 92 | void convertLogicOp(BinaryOperator &I); |
| 93 | |
| 94 | // Marks an instruction as converted - adds it to ConvertedInsts and adds |
| 95 | // any and/or/xor i1 users to the queue. |
| 96 | void converted(Instruction *I, Value *V, int Mask, bool Worthy) { |
| 97 | ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy); |
| 98 | auto &M = *I->getFunction()->getParent(); |
| 99 | auto &Ctx = M.getContext(); |
| 100 | for (auto *U : I->users()) { |
| 101 | auto *LI = dyn_cast<BinaryOperator>(U); |
| 102 | if (LI && LI->getType() == Type::getInt1Ty(Ctx) && |
| 103 | (LI->getOpcode() == Instruction::And || |
| 104 | LI->getOpcode() == Instruction::Or || |
| 105 | LI->getOpcode() == Instruction::Xor)) { |
| 106 | LogicOpsWorklist.push_back(LI); |
| 107 | } |
| 108 | } |
| 109 | } |
| 110 | }; |
| 111 | |
| 112 | } // end anonymous namespace |
| 113 | |
| 114 | char SystemZTDCPass::ID = 0; |
| 115 | INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc", |
| 116 | "SystemZ Test Data Class optimization", false, false) |
| 117 | |
| 118 | FunctionPass *llvm::createSystemZTDCPass() { |
| 119 | return new SystemZTDCPass(); |
| 120 | } |
| 121 | |
| 122 | void SystemZTDCPass::convertFCmp(CmpInst &I) { |
| 123 | Value *Op0 = I.getOperand(0); |
| 124 | auto *Const = dyn_cast<ConstantFP>(I.getOperand(1)); |
| 125 | auto Pred = I.getPredicate(); |
| 126 | // Only comparisons with consts are interesting. |
| 127 | if (!Const) |
| 128 | return; |
| 129 | // Compute the smallest normal number (and its negation). |
| 130 | auto &Sem = Op0->getType()->getFltSemantics(); |
| 131 | APFloat Smallest = APFloat::getSmallestNormalized(Sem); |
| 132 | APFloat NegSmallest = Smallest; |
| 133 | NegSmallest.changeSign(); |
| 134 | // Check if Const is one of our recognized consts. |
| 135 | int WhichConst; |
| 136 | if (Const->isZero()) { |
| 137 | // All comparisons with 0 can be converted. |
| 138 | WhichConst = 0; |
| 139 | } else if (Const->isInfinity()) { |
| 140 | // Likewise for infinities. |
| 141 | WhichConst = Const->isNegative() ? 2 : 1; |
| 142 | } else if (Const->isExactlyValue(Smallest)) { |
| 143 | // For Smallest, we cannot do EQ separately from GT. |
| 144 | if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE && |
| 145 | (Pred & CmpInst::FCMP_OGE) != 0) |
| 146 | return; |
| 147 | WhichConst = 3; |
| 148 | } else if (Const->isExactlyValue(NegSmallest)) { |
| 149 | // Likewise for NegSmallest, we cannot do EQ separately from LT. |
| 150 | if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE && |
| 151 | (Pred & CmpInst::FCMP_OLE) != 0) |
| 152 | return; |
| 153 | WhichConst = 4; |
| 154 | } else { |
| 155 | // Not one of our special constants. |
| 156 | return; |
| 157 | } |
| 158 | // Partial masks to use for EQ, GT, LT, UN comparisons, respectively. |
| 159 | static const int Masks[][4] = { |
| 160 | { // 0 |
| 161 | SystemZ::TDCMASK_ZERO, // eq |
| 162 | SystemZ::TDCMASK_POSITIVE, // gt |
| 163 | SystemZ::TDCMASK_NEGATIVE, // lt |
| 164 | SystemZ::TDCMASK_NAN, // un |
| 165 | }, |
| 166 | { // inf |
| 167 | SystemZ::TDCMASK_INFINITY_PLUS, // eq |
| 168 | 0, // gt |
| 169 | (SystemZ::TDCMASK_ZERO | |
| 170 | SystemZ::TDCMASK_NEGATIVE | |
| 171 | SystemZ::TDCMASK_NORMAL_PLUS | |
| 172 | SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt |
| 173 | SystemZ::TDCMASK_NAN, // un |
| 174 | }, |
| 175 | { // -inf |
| 176 | SystemZ::TDCMASK_INFINITY_MINUS, // eq |
| 177 | (SystemZ::TDCMASK_ZERO | |
| 178 | SystemZ::TDCMASK_POSITIVE | |
| 179 | SystemZ::TDCMASK_NORMAL_MINUS | |
| 180 | SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt |
| 181 | 0, // lt |
| 182 | SystemZ::TDCMASK_NAN, // un |
| 183 | }, |
| 184 | { // minnorm |
| 185 | 0, // eq (unsupported) |
| 186 | (SystemZ::TDCMASK_NORMAL_PLUS | |
| 187 | SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge) |
| 188 | (SystemZ::TDCMASK_ZERO | |
| 189 | SystemZ::TDCMASK_NEGATIVE | |
| 190 | SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt |
| 191 | SystemZ::TDCMASK_NAN, // un |
| 192 | }, |
| 193 | { // -minnorm |
| 194 | 0, // eq (unsupported) |
| 195 | (SystemZ::TDCMASK_ZERO | |
| 196 | SystemZ::TDCMASK_POSITIVE | |
| 197 | SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt |
| 198 | (SystemZ::TDCMASK_NORMAL_MINUS | |
| 199 | SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le) |
| 200 | SystemZ::TDCMASK_NAN, // un |
| 201 | } |
| 202 | }; |
| 203 | // Construct the mask as a combination of the partial masks. |
| 204 | int Mask = 0; |
| 205 | if (Pred & CmpInst::FCMP_OEQ) |
| 206 | Mask |= Masks[WhichConst][0]; |
| 207 | if (Pred & CmpInst::FCMP_OGT) |
| 208 | Mask |= Masks[WhichConst][1]; |
| 209 | if (Pred & CmpInst::FCMP_OLT) |
| 210 | Mask |= Masks[WhichConst][2]; |
| 211 | if (Pred & CmpInst::FCMP_UNO) |
| 212 | Mask |= Masks[WhichConst][3]; |
| 213 | // A lone fcmp is unworthy of tdc conversion on its own, but may become |
| 214 | // worthy if combined with fabs. |
| 215 | bool Worthy = false; |
| 216 | if (CallInst *CI = dyn_cast<CallInst>(Op0)) { |
| 217 | Function *F = CI->getCalledFunction(); |
| 218 | if (F && F->getIntrinsicID() == Intrinsic::fabs) { |
| 219 | // Fold with fabs - adjust the mask appropriately. |
| 220 | Mask &= SystemZ::TDCMASK_PLUS; |
| 221 | Mask |= Mask >> 1; |
| 222 | Op0 = CI->getArgOperand(0); |
| 223 | // A combination of fcmp with fabs is a win, unless the constant |
| 224 | // involved is 0 (which is handled by later passes). |
| 225 | Worthy = WhichConst != 0; |
| 226 | PossibleJunk.insert(CI); |
| 227 | } |
| 228 | } |
| 229 | converted(&I, Op0, Mask, Worthy); |
| 230 | } |
| 231 | |
| 232 | void SystemZTDCPass::convertICmp(CmpInst &I) { |
| 233 | Value *Op0 = I.getOperand(0); |
| 234 | auto *Const = dyn_cast<ConstantInt>(I.getOperand(1)); |
| 235 | auto Pred = I.getPredicate(); |
| 236 | // All our icmp rules involve comparisons with consts. |
| 237 | if (!Const) |
| 238 | return; |
| 239 | if (auto *Cast = dyn_cast<BitCastInst>(Op0)) { |
| 240 | // Check for icmp+bitcast used for signbit. |
| 241 | if (!Cast->getSrcTy()->isFloatTy() && |
| 242 | !Cast->getSrcTy()->isDoubleTy() && |
| 243 | !Cast->getSrcTy()->isFP128Ty()) |
| 244 | return; |
| 245 | Value *V = Cast->getOperand(0); |
| 246 | int Mask; |
| 247 | if (Pred == CmpInst::ICMP_SLT && Const->isZero()) { |
| 248 | // icmp slt (bitcast X), 0 - set if sign bit true |
| 249 | Mask = SystemZ::TDCMASK_MINUS; |
| 250 | } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) { |
| 251 | // icmp sgt (bitcast X), -1 - set if sign bit false |
| 252 | Mask = SystemZ::TDCMASK_PLUS; |
| 253 | } else { |
| 254 | // Not a sign bit check. |
| 255 | return; |
| 256 | } |
| 257 | PossibleJunk.insert(Cast); |
| 258 | converted(&I, V, Mask, true); |
| 259 | } else if (auto *CI = dyn_cast<CallInst>(Op0)) { |
| 260 | // Check if this is a pre-existing call of our tdc intrinsic. |
| 261 | Function *F = CI->getCalledFunction(); |
| 262 | if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc) |
| 263 | return; |
| 264 | if (!Const->isZero()) |
| 265 | return; |
| 266 | Value *V = CI->getArgOperand(0); |
| 267 | auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); |
| 268 | // Bail if the mask is not a constant. |
| 269 | if (!MaskC) |
| 270 | return; |
| 271 | int Mask = MaskC->getZExtValue(); |
| 272 | Mask &= SystemZ::TDCMASK_ALL; |
| 273 | if (Pred == CmpInst::ICMP_NE) { |
| 274 | // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC |
| 275 | } else if (Pred == CmpInst::ICMP_EQ) { |
| 276 | // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask |
| 277 | Mask ^= SystemZ::TDCMASK_ALL; |
| 278 | } else { |
| 279 | // An unknown comparison - ignore. |
| 280 | return; |
| 281 | } |
| 282 | PossibleJunk.insert(CI); |
| 283 | converted(&I, V, Mask, false); |
| 284 | } |
| 285 | } |
| 286 | |
| 287 | void SystemZTDCPass::convertLogicOp(BinaryOperator &I) { |
| 288 | Value *Op0, *Op1; |
| 289 | int Mask0, Mask1; |
| 290 | bool Worthy0, Worthy1; |
| 291 | std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))]; |
| 292 | std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))]; |
| 293 | if (Op0 != Op1) |
| 294 | return; |
| 295 | int Mask; |
| 296 | switch (I.getOpcode()) { |
| 297 | case Instruction::And: |
| 298 | Mask = Mask0 & Mask1; |
| 299 | break; |
| 300 | case Instruction::Or: |
| 301 | Mask = Mask0 | Mask1; |
| 302 | break; |
| 303 | case Instruction::Xor: |
| 304 | Mask = Mask0 ^ Mask1; |
| 305 | break; |
| 306 | default: |
| 307 | llvm_unreachable("Unknown op in convertLogicOp"); |
| 308 | } |
| 309 | converted(&I, Op0, Mask, true); |
| 310 | } |
| 311 | |
| 312 | bool SystemZTDCPass::runOnFunction(Function &F) { |
| 313 | ConvertedInsts.clear(); |
| 314 | LogicOpsWorklist.clear(); |
| 315 | PossibleJunk.clear(); |
| 316 | |
| 317 | // Look for icmp+fcmp instructions. |
| 318 | for (auto &I : instructions(F)) { |
| 319 | if (I.getOpcode() == Instruction::FCmp) |
| 320 | convertFCmp(cast<CmpInst>(I)); |
| 321 | else if (I.getOpcode() == Instruction::ICmp) |
| 322 | convertICmp(cast<CmpInst>(I)); |
| 323 | } |
| 324 | |
| 325 | // If none found, bail already. |
| 326 | if (ConvertedInsts.empty()) |
| 327 | return false; |
| 328 | |
| 329 | // Process the queue of logic instructions. |
| 330 | while (!LogicOpsWorklist.empty()) { |
| 331 | BinaryOperator *Op = LogicOpsWorklist.back(); |
| 332 | LogicOpsWorklist.pop_back(); |
| 333 | // If both operands mapped, and the instruction itself not yet mapped, |
| 334 | // convert it. |
| 335 | if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) && |
| 336 | ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) && |
| 337 | !ConvertedInsts.count(Op)) |
| 338 | convertLogicOp(*Op); |
| 339 | } |
| 340 | |
| 341 | // Time to actually replace the instructions. Do it in the reverse order |
| 342 | // of finding them, since there's a good chance the earlier ones will be |
| 343 | // unused (due to being folded into later ones). |
| 344 | Module &M = *F.getParent(); |
| 345 | auto &Ctx = M.getContext(); |
| 346 | Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0); |
| 347 | bool MadeChange = false; |
| 348 | for (auto &It : reverse(ConvertedInsts)) { |
| 349 | Instruction *I = It.first; |
| 350 | Value *V; |
| 351 | int Mask; |
| 352 | bool Worthy; |
| 353 | std::tie(V, Mask, Worthy) = It.second; |
| 354 | if (!I->user_empty()) { |
| 355 | // If used and unworthy of conversion, skip it. |
| 356 | if (!Worthy) |
| 357 | continue; |
| 358 | // Call the intrinsic, compare result with 0. |
| 359 | Value *TDCFunc = Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, |
| 360 | V->getType()); |
| 361 | IRBuilder<> IRB(I); |
| 362 | Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask); |
| 363 | Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal}); |
| 364 | Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32); |
| 365 | I->replaceAllUsesWith(ICmp); |
| 366 | } |
| 367 | // If unused, or used and converted, remove it. |
| 368 | I->eraseFromParent(); |
| 369 | MadeChange = true; |
| 370 | } |
| 371 | |
| 372 | if (!MadeChange) |
| 373 | return false; |
| 374 | |
| 375 | // We've actually done something - now clear misc accumulated junk (fabs, |
| 376 | // bitcast). |
| 377 | for (auto *I : PossibleJunk) |
| 378 | if (I->user_empty()) |
| 379 | I->eraseFromParent(); |
| 380 | |
| 381 | return true; |
| 382 | } |