|  | //===- PoisonChecking.cpp - -----------------------------------------------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // Implements a transform pass which instruments IR such that poison semantics | 
|  | // are made explicit.  That is, it provides a (possibly partial) executable | 
|  | // semantics for every instruction w.r.t. poison as specified in the LLVM | 
|  | // LangRef.  There are obvious parallels to the sanitizer tools, but this pass | 
|  | // is focused purely on the semantics of LLVM IR, not any particular source | 
|  | // language.   If you're looking for something to see if your C/C++ contains | 
|  | // UB, this is not it. | 
|  | // | 
|  | // The rewritten semantics of each instruction will include the following | 
|  | // components: | 
|  | // | 
|  | // 1) The original instruction, unmodified. | 
|  | // 2) A propagation rule which translates dynamic information about the poison | 
|  | //    state of each input to whether the dynamic output of the instruction | 
|  | //    produces poison. | 
|  | // 3) A flag validation rule which validates any poison producing flags on the | 
|  | //    instruction itself (e.g. checks for overflow on nsw). | 
|  | // 4) A check rule which traps (to a handler function) if this instruction must | 
|  | //    execute undefined behavior given the poison state of it's inputs. | 
|  | // | 
|  | // At the moment, the UB detection is done in a best effort manner; that is, | 
|  | // the resulting code may produce a false negative result (not report UB when | 
|  | // it actually exists according to the LangRef spec), but should never produce | 
|  | // a false positive (report UB where it doesn't exist).  The intention is to | 
|  | // eventually support a "strict" mode which never dynamically reports a false | 
|  | // negative at the cost of rejecting some valid inputs to translation. | 
|  | // | 
|  | // Use cases for this pass include: | 
|  | // - Understanding (and testing!) the implications of the definition of poison | 
|  | //   from the LangRef. | 
|  | // - Validating the output of a IR fuzzer to ensure that all programs produced | 
|  | //   are well defined on the specific input used. | 
|  | // - Finding/confirming poison specific miscompiles by checking the poison | 
|  | //   status of an input/IR pair is the same before and after an optimization | 
|  | //   transform. | 
|  | // - Checking that a bugpoint reduction does not introduce UB which didn't | 
|  | //   exist in the original program being reduced. | 
|  | // | 
|  | // The major sources of inaccuracy are currently: | 
|  | // - Most validation rules not yet implemented for instructions with poison | 
|  | //   relavant flags.  At the moment, only nsw/nuw on add/sub are supported. | 
|  | // - UB which is control dependent on a branch on poison is not yet | 
|  | //   reported. Currently, only data flow dependence is modeled. | 
|  | // - Poison which is propagated through memory is not modeled.  As such, | 
|  | //   storing poison to memory and then reloading it will cause a false negative | 
|  | //   as we consider the reloaded value to not be poisoned. | 
|  | // - Poison propagation across function boundaries is not modeled.  At the | 
|  | //   moment, all arguments and return values are assumed not to be poison. | 
|  | // - Undef is not modeled.  In particular, the optimizer's freedom to pick | 
|  | //   concrete values for undef bits so as to maximize potential for producing | 
|  | //   poison is not modeled. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "llvm/Transforms/Instrumentation/PoisonChecking.h" | 
|  | #include "llvm/ADT/DenseMap.h" | 
|  | #include "llvm/ADT/Statistic.h" | 
|  | #include "llvm/Analysis/MemoryBuiltins.h" | 
|  | #include "llvm/Analysis/ValueTracking.h" | 
|  | #include "llvm/IR/InstVisitor.h" | 
|  | #include "llvm/IR/IntrinsicInst.h" | 
|  | #include "llvm/IR/IRBuilder.h" | 
|  | #include "llvm/IR/PatternMatch.h" | 
|  | #include "llvm/Support/Debug.h" | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | #define DEBUG_TYPE "poison-checking" | 
|  |  | 
|  | static cl::opt<bool> | 
|  | LocalCheck("poison-checking-function-local", | 
|  | cl::init(false), | 
|  | cl::desc("Check that returns are non-poison (for testing)")); | 
|  |  | 
|  |  | 
|  | static bool isConstantFalse(Value* V) { | 
|  | assert(V->getType()->isIntegerTy(1)); | 
|  | if (auto *CI = dyn_cast<ConstantInt>(V)) | 
|  | return CI->isZero(); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) { | 
|  | if (Ops.size() == 0) | 
|  | return B.getFalse(); | 
|  | unsigned i = 0; | 
|  | for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {} | 
|  | if (i == Ops.size()) | 
|  | return B.getFalse(); | 
|  | Value *Accum = Ops[i++]; | 
|  | for (; i < Ops.size(); i++) | 
|  | if (!isConstantFalse(Ops[i])) | 
|  | Accum = B.CreateOr(Accum, Ops[i]); | 
|  | return Accum; | 
|  | } | 
|  |  | 
|  | static void generatePoisonChecksForBinOp(Instruction &I, | 
|  | SmallVector<Value*, 2> &Checks) { | 
|  | assert(isa<BinaryOperator>(I)); | 
|  |  | 
|  | IRBuilder<> B(&I); | 
|  | Value *LHS = I.getOperand(0); | 
|  | Value *RHS = I.getOperand(1); | 
|  | switch (I.getOpcode()) { | 
|  | default: | 
|  | return; | 
|  | case Instruction::Add: { | 
|  | if (I.hasNoSignedWrap()) { | 
|  | auto *OverflowOp = | 
|  | B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS); | 
|  | Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); | 
|  | } | 
|  | if (I.hasNoUnsignedWrap()) { | 
|  | auto *OverflowOp = | 
|  | B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS); | 
|  | Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); | 
|  | } | 
|  | break; | 
|  | } | 
|  | case Instruction::Sub: { | 
|  | if (I.hasNoSignedWrap()) { | 
|  | auto *OverflowOp = | 
|  | B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS); | 
|  | Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); | 
|  | } | 
|  | if (I.hasNoUnsignedWrap()) { | 
|  | auto *OverflowOp = | 
|  | B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS); | 
|  | Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); | 
|  | } | 
|  | break; | 
|  | } | 
|  | case Instruction::Mul: { | 
|  | if (I.hasNoSignedWrap()) { | 
|  | auto *OverflowOp = | 
|  | B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS); | 
|  | Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); | 
|  | } | 
|  | if (I.hasNoUnsignedWrap()) { | 
|  | auto *OverflowOp = | 
|  | B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS); | 
|  | Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); | 
|  | } | 
|  | break; | 
|  | } | 
|  | case Instruction::UDiv: { | 
|  | if (I.isExact()) { | 
|  | auto *Check = | 
|  | B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS), | 
|  | ConstantInt::get(LHS->getType(), 0)); | 
|  | Checks.push_back(Check); | 
|  | } | 
|  | break; | 
|  | } | 
|  | case Instruction::SDiv: { | 
|  | if (I.isExact()) { | 
|  | auto *Check = | 
|  | B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS), | 
|  | ConstantInt::get(LHS->getType(), 0)); | 
|  | Checks.push_back(Check); | 
|  | } | 
|  | break; | 
|  | } | 
|  | case Instruction::AShr: | 
|  | case Instruction::LShr: | 
|  | case Instruction::Shl: { | 
|  | Value *ShiftCheck = | 
|  | B.CreateICmp(ICmpInst::ICMP_UGE, RHS, | 
|  | ConstantInt::get(RHS->getType(), | 
|  | LHS->getType()->getScalarSizeInBits())); | 
|  | Checks.push_back(ShiftCheck); | 
|  | break; | 
|  | } | 
|  | }; | 
|  | } | 
|  |  | 
|  | static Value* generatePoisonChecks(Instruction &I) { | 
|  | IRBuilder<> B(&I); | 
|  | SmallVector<Value*, 2> Checks; | 
|  | if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy()) | 
|  | generatePoisonChecksForBinOp(I, Checks); | 
|  |  | 
|  | // Handle non-binops seperately | 
|  | switch (I.getOpcode()) { | 
|  | default: | 
|  | break; | 
|  | case Instruction::ExtractElement: { | 
|  | Value *Vec = I.getOperand(0); | 
|  | if (Vec->getType()->getVectorIsScalable()) | 
|  | break; | 
|  | Value *Idx = I.getOperand(1); | 
|  | unsigned NumElts = Vec->getType()->getVectorNumElements(); | 
|  | Value *Check = | 
|  | B.CreateICmp(ICmpInst::ICMP_UGE, Idx, | 
|  | ConstantInt::get(Idx->getType(), NumElts)); | 
|  | Checks.push_back(Check); | 
|  | break; | 
|  | } | 
|  | case Instruction::InsertElement: { | 
|  | Value *Vec = I.getOperand(0); | 
|  | if (Vec->getType()->getVectorIsScalable()) | 
|  | break; | 
|  | Value *Idx = I.getOperand(2); | 
|  | unsigned NumElts = Vec->getType()->getVectorNumElements(); | 
|  | Value *Check = | 
|  | B.CreateICmp(ICmpInst::ICMP_UGE, Idx, | 
|  | ConstantInt::get(Idx->getType(), NumElts)); | 
|  | Checks.push_back(Check); | 
|  | break; | 
|  | } | 
|  | }; | 
|  | return buildOrChain(B, Checks); | 
|  | } | 
|  |  | 
|  | static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) { | 
|  | auto Itr = ValToPoison.find(V); | 
|  | if (Itr != ValToPoison.end()) | 
|  | return Itr->second; | 
|  | if (isa<Constant>(V)) { | 
|  | return ConstantInt::getFalse(V->getContext()); | 
|  | } | 
|  | // Return false for unknwon values - this implements a non-strict mode where | 
|  | // unhandled IR constructs are simply considered to never produce poison.  At | 
|  | // some point in the future, we probably want a "strict mode" for testing if | 
|  | // nothing else. | 
|  | return ConstantInt::getFalse(V->getContext()); | 
|  | } | 
|  |  | 
|  | static void CreateAssert(IRBuilder<> &B, Value *Cond) { | 
|  | assert(Cond->getType()->isIntegerTy(1)); | 
|  | if (auto *CI = dyn_cast<ConstantInt>(Cond)) | 
|  | if (CI->isAllOnesValue()) | 
|  | return; | 
|  |  | 
|  | Module *M = B.GetInsertBlock()->getModule(); | 
|  | M->getOrInsertFunction("__poison_checker_assert", | 
|  | Type::getVoidTy(M->getContext()), | 
|  | Type::getInt1Ty(M->getContext())); | 
|  | Function *TrapFunc = M->getFunction("__poison_checker_assert"); | 
|  | B.CreateCall(TrapFunc, Cond); | 
|  | } | 
|  |  | 
|  | static void CreateAssertNot(IRBuilder<> &B, Value *Cond) { | 
|  | assert(Cond->getType()->isIntegerTy(1)); | 
|  | CreateAssert(B, B.CreateNot(Cond)); | 
|  | } | 
|  |  | 
|  | static bool rewrite(Function &F) { | 
|  | auto * const Int1Ty = Type::getInt1Ty(F.getContext()); | 
|  |  | 
|  | DenseMap<Value *, Value *> ValToPoison; | 
|  |  | 
|  | for (BasicBlock &BB : F) | 
|  | for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { | 
|  | auto *OldPHI = cast<PHINode>(&*I); | 
|  | auto *NewPHI = PHINode::Create(Int1Ty, | 
|  | OldPHI->getNumIncomingValues()); | 
|  | for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) | 
|  | NewPHI->addIncoming(UndefValue::get(Int1Ty), | 
|  | OldPHI->getIncomingBlock(i)); | 
|  | NewPHI->insertBefore(OldPHI); | 
|  | ValToPoison[OldPHI] = NewPHI; | 
|  | } | 
|  |  | 
|  | for (BasicBlock &BB : F) | 
|  | for (Instruction &I : BB) { | 
|  | if (isa<PHINode>(I)) continue; | 
|  |  | 
|  | IRBuilder<> B(cast<Instruction>(&I)); | 
|  |  | 
|  | // Note: There are many more sources of documented UB, but this pass only | 
|  | // attempts to find UB triggered by propagation of poison. | 
|  | if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I))) | 
|  | CreateAssertNot(B, getPoisonFor(ValToPoison, Op)); | 
|  |  | 
|  | if (LocalCheck) | 
|  | if (auto *RI = dyn_cast<ReturnInst>(&I)) | 
|  | if (RI->getNumOperands() != 0) { | 
|  | Value *Op = RI->getOperand(0); | 
|  | CreateAssertNot(B, getPoisonFor(ValToPoison, Op)); | 
|  | } | 
|  |  | 
|  | SmallVector<Value*, 4> Checks; | 
|  | if (propagatesFullPoison(&I)) | 
|  | for (Value *V : I.operands()) | 
|  | Checks.push_back(getPoisonFor(ValToPoison, V)); | 
|  |  | 
|  | if (auto *Check = generatePoisonChecks(I)) | 
|  | Checks.push_back(Check); | 
|  | ValToPoison[&I] = buildOrChain(B, Checks); | 
|  | } | 
|  |  | 
|  | for (BasicBlock &BB : F) | 
|  | for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { | 
|  | auto *OldPHI = cast<PHINode>(&*I); | 
|  | if (!ValToPoison.count(OldPHI)) | 
|  | continue; // skip the newly inserted phis | 
|  | auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]); | 
|  | for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) { | 
|  | auto *OldVal = OldPHI->getIncomingValue(i); | 
|  | NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal)); | 
|  | } | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  |  | 
|  | PreservedAnalyses PoisonCheckingPass::run(Module &M, | 
|  | ModuleAnalysisManager &AM) { | 
|  | bool Changed = false; | 
|  | for (auto &F : M) | 
|  | Changed |= rewrite(F); | 
|  |  | 
|  | return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); | 
|  | } | 
|  |  | 
|  | PreservedAnalyses PoisonCheckingPass::run(Function &F, | 
|  | FunctionAnalysisManager &AM) { | 
|  | return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all(); | 
|  | } | 
|  |  | 
|  |  | 
|  | /* Major TODO Items: | 
|  | - Control dependent poison UB | 
|  | - Strict mode - (i.e. must analyze every operand) | 
|  | - Poison through memory | 
|  | - Function ABIs | 
|  | - Full coverage of intrinsics, etc.. (ouch) | 
|  |  | 
|  | Instructions w/Unclear Semantics: | 
|  | - shufflevector - It would seem reasonable for an out of bounds mask element | 
|  | to produce poison, but the LangRef does not state. | 
|  | - and/or - It would seem reasonable for poison to propagate from both | 
|  | arguments, but LangRef doesn't state and propagatesFullPoison doesn't | 
|  | include these two. | 
|  | - all binary ops w/vector operands - The likely interpretation would be that | 
|  | any element overflowing should produce poison for the entire result, but | 
|  | the LangRef does not state. | 
|  | - Floating point binary ops w/fmf flags other than (nnan, noinfs).  It seems | 
|  | strange that only certian flags should be documented as producing poison. | 
|  |  | 
|  | Cases of clear poison semantics not yet implemented: | 
|  | - Exact flags on ashr/lshr produce poison | 
|  | - NSW/NUW flags on shl produce poison | 
|  | - Inbounds flag on getelementptr produce poison | 
|  | - fptosi/fptoui (out of bounds input) produce poison | 
|  | - Scalable vector types for insertelement/extractelement | 
|  | - Floating point binary ops w/fmf nnan/noinfs flags produce poison | 
|  | */ |