Blame - llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp - toolchain/llvm-project

blob: 81d92e724c7d2dde4a3c496c2801f9a650fcbe0b [file] [log] [blame]

Philip Reames	f47a313	2019-07-09 18:49:29 +0000	[diff] [blame]	1	//===- PoisonChecking.cpp - -----------------------------------------------===//
				2	//
				3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				6	//
				7	//===----------------------------------------------------------------------===//
				8	//
				9	// Implements a transform pass which instruments IR such that poison semantics
				10	// are made explicit. That is, it provides a (possibly partial) executable
				11	// semantics for every instruction w.r.t. poison as specified in the LLVM
				12	// LangRef. There are obvious parallels to the sanitizer tools, but this pass
				13	// is focused purely on the semantics of LLVM IR, not any particular source
				14	// language. If you're looking for something to see if your C/C++ contains
				15	// UB, this is not it.
				16	//
				17	// The rewritten semantics of each instruction will include the following
				18	// components:
				19	//
				20	// 1) The original instruction, unmodified.
				21	// 2) A propagation rule which translates dynamic information about the poison
				22	// state of each input to whether the dynamic output of the instruction
				23	// produces poison.
				24	// 3) A flag validation rule which validates any poison producing flags on the
				25	// instruction itself (e.g. checks for overflow on nsw).
				26	// 4) A check rule which traps (to a handler function) if this instruction must
				27	// execute undefined behavior given the poison state of it's inputs.
				28	//
				29	// At the moment, the UB detection is done in a best effort manner; that is,
				30	// the resulting code may produce a false negative result (not report UB when
				31	// it actually exists according to the LangRef spec), but should never produce
				32	// a false positive (report UB where it doesn't exist). The intention is to
				33	// eventually support a "strict" mode which never dynamically reports a false
				34	// negative at the cost of rejecting some valid inputs to translation.
				35	//
				36	// Use cases for this pass include:
				37	// - Understanding (and testing!) the implications of the definition of poison
				38	// from the LangRef.
				39	// - Validating the output of a IR fuzzer to ensure that all programs produced
				40	// are well defined on the specific input used.
				41	// - Finding/confirming poison specific miscompiles by checking the poison
				42	// status of an input/IR pair is the same before and after an optimization
				43	// transform.
				44	// - Checking that a bugpoint reduction does not introduce UB which didn't
				45	// exist in the original program being reduced.
				46	//
				47	// The major sources of inaccuracy are currently:
				48	// - Most validation rules not yet implemented for instructions with poison
				49	// relavant flags. At the moment, only nsw/nuw on add/sub are supported.
				50	// - UB which is control dependent on a branch on poison is not yet
				51	// reported. Currently, only data flow dependence is modeled.
				52	// - Poison which is propagated through memory is not modeled. As such,
				53	// storing poison to memory and then reloading it will cause a false negative
				54	// as we consider the reloaded value to not be poisoned.
				55	// - Poison propagation across function boundaries is not modeled. At the
				56	// moment, all arguments and return values are assumed not to be poison.
				57	// - Undef is not modeled. In particular, the optimizer's freedom to pick
				58	// concrete values for undef bits so as to maximize potential for producing
				59	// poison is not modeled.
				60	//
				61	//===----------------------------------------------------------------------===//
				62
				63	#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
				64	#include "llvm/ADT/DenseMap.h"
				65	#include "llvm/ADT/Statistic.h"
				66	#include "llvm/Analysis/MemoryBuiltins.h"
				67	#include "llvm/Analysis/ValueTracking.h"
				68	#include "llvm/IR/InstVisitor.h"
				69	#include "llvm/IR/IntrinsicInst.h"
				70	#include "llvm/IR/IRBuilder.h"
				71	#include "llvm/IR/PatternMatch.h"
				72	#include "llvm/Support/Debug.h"
				73
				74	using namespace llvm;
				75
				76	#define DEBUG_TYPE "poison-checking"
				77
				78	static cl::opt<bool>
				79	LocalCheck("poison-checking-function-local",
				80	cl::init(false),
				81	cl::desc("Check that returns are non-poison (for testing)"));
				82
				83
				84	static bool isConstantFalse(Value* V) {
				85	assert(V->getType()->isIntegerTy(1));
				86	if (auto *CI = dyn_cast<ConstantInt>(V))
				87	return CI->isZero();
				88	return false;
				89	}
				90
				91	static Value buildOrChain(IRBuilder<> &B, ArrayRef<Value> Ops) {
				92	if (Ops.size() == 0)
				93	return B.getFalse();
				94	unsigned i = 0;
				95	for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
				96	if (i == Ops.size())
				97	return B.getFalse();
				98	Value *Accum = Ops[i++];
				99	for (; i < Ops.size(); i++)
				100	if (!isConstantFalse(Ops[i]))
				101	Accum = B.CreateOr(Accum, Ops[i]);
				102	return Accum;
				103	}
				104
				105	static void generatePoisonChecksForBinOp(Instruction &I,
				106	SmallVector<Value*, 2> &Checks) {
				107	assert(isa<BinaryOperator>(I));
				108
				109	IRBuilder<> B(&I);
				110	Value *LHS = I.getOperand(0);
				111	Value *RHS = I.getOperand(1);
				112	switch (I.getOpcode()) {
				113	default:
				114	return;
				115	case Instruction::Add: {
				116	if (I.hasNoSignedWrap()) {
				117	auto *OverflowOp =
				118	B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
				119	Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
				120	}
				121	if (I.hasNoUnsignedWrap()) {
				122	auto *OverflowOp =
				123	B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
				124	Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
				125	}
				126	break;
				127	}
				128	case Instruction::Sub: {
				129	if (I.hasNoSignedWrap()) {
				130	auto *OverflowOp =
				131	B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
				132	Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
				133	}
				134	if (I.hasNoUnsignedWrap()) {
				135	auto *OverflowOp =
				136	B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
				137	Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
				138	}
				139	break;
				140	}
				141	case Instruction::Mul: {
				142	if (I.hasNoSignedWrap()) {
				143	auto *OverflowOp =
				144	B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
				145	Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
				146	}
				147	if (I.hasNoUnsignedWrap()) {
				148	auto *OverflowOp =
				149	B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
				150	Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
				151	}
				152	break;
				153	}
Philip Reames	3b38b92	2019-07-09 18:56:41 +0000	[diff] [blame]	154	case Instruction::UDiv: {
				155	if (I.isExact()) {
				156	auto *Check =
				157	B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
				158	ConstantInt::get(LHS->getType(), 0));
				159	Checks.push_back(Check);
				160	}
				161	break;
				162	}
				163	case Instruction::SDiv: {
				164	if (I.isExact()) {
				165	auto *Check =
				166	B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
				167	ConstantInt::get(LHS->getType(), 0));
				168	Checks.push_back(Check);
				169	}
				170	break;
				171	}
Philip Reames	3dbd7e9	2019-07-09 19:26:12 +0000	[diff] [blame]	172	case Instruction::AShr:
				173	case Instruction::LShr:
				174	case Instruction::Shl: {
				175	Value *ShiftCheck =
				176	B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
				177	ConstantInt::get(RHS->getType(),
				178	LHS->getType()->getScalarSizeInBits()));
				179	Checks.push_back(ShiftCheck);
				180	break;
				181	}
Philip Reames	f47a313	2019-07-09 18:49:29 +0000	[diff] [blame]	182	};
				183	}
				184
				185	static Value* generatePoisonChecks(Instruction &I) {
				186	IRBuilder<> B(&I);
				187	SmallVector<Value*, 2> Checks;
Philip Reames	3dbd7e9	2019-07-09 19:26:12 +0000	[diff] [blame]	188	if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
Philip Reames	f47a313	2019-07-09 18:49:29 +0000	[diff] [blame]	189	generatePoisonChecksForBinOp(I, Checks);
Philip Reames	3dbd7e9	2019-07-09 19:26:12 +0000	[diff] [blame]	190
				191	// Handle non-binops seperately
				192	switch (I.getOpcode()) {
				193	default:
				194	break;
				195	case Instruction::ExtractElement: {
				196	Value *Vec = I.getOperand(0);
				197	if (Vec->getType()->getVectorIsScalable())
				198	break;
				199	Value *Idx = I.getOperand(1);
				200	unsigned NumElts = Vec->getType()->getVectorNumElements();
				201	Value *Check =
				202	B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
				203	ConstantInt::get(Idx->getType(), NumElts));
				204	Checks.push_back(Check);
				205	break;
				206	}
				207	case Instruction::InsertElement: {
				208	Value *Vec = I.getOperand(0);
				209	if (Vec->getType()->getVectorIsScalable())
				210	break;
				211	Value *Idx = I.getOperand(2);
				212	unsigned NumElts = Vec->getType()->getVectorNumElements();
				213	Value *Check =
				214	B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
				215	ConstantInt::get(Idx->getType(), NumElts));
				216	Checks.push_back(Check);
				217	break;
				218	}
				219	};
Philip Reames	f47a313	2019-07-09 18:49:29 +0000	[diff] [blame]	220	return buildOrChain(B, Checks);
				221	}
				222
				223	static Value getPoisonFor(DenseMap<Value , Value > &ValToPoison, Value V) {
				224	auto Itr = ValToPoison.find(V);
				225	if (Itr != ValToPoison.end())
				226	return Itr->second;
				227	if (isa<Constant>(V)) {
				228	return ConstantInt::getFalse(V->getContext());
				229	}
				230	// Return false for unknwon values - this implements a non-strict mode where
				231	// unhandled IR constructs are simply considered to never produce poison. At
				232	// some point in the future, we probably want a "strict mode" for testing if
				233	// nothing else.
				234	return ConstantInt::getFalse(V->getContext());
				235	}
				236
				237	static void CreateAssert(IRBuilder<> &B, Value *Cond) {
				238	assert(Cond->getType()->isIntegerTy(1));
				239	if (auto *CI = dyn_cast<ConstantInt>(Cond))
				240	if (CI->isAllOnesValue())
				241	return;
				242
				243	Module *M = B.GetInsertBlock()->getModule();
				244	M->getOrInsertFunction("__poison_checker_assert",
				245	Type::getVoidTy(M->getContext()),
				246	Type::getInt1Ty(M->getContext()));
				247	Function *TrapFunc = M->getFunction("__poison_checker_assert");
				248	B.CreateCall(TrapFunc, Cond);
				249	}
				250
				251	static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
				252	assert(Cond->getType()->isIntegerTy(1));
				253	CreateAssert(B, B.CreateNot(Cond));
				254	}
				255
				256	static bool rewrite(Function &F) {
				257	auto * const Int1Ty = Type::getInt1Ty(F.getContext());
				258
				259	DenseMap<Value , Value > ValToPoison;
				260
				261	for (BasicBlock &BB : F)
				262	for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
				263	auto OldPHI = cast<PHINode>(&I);
				264	auto *NewPHI = PHINode::Create(Int1Ty,
				265	OldPHI->getNumIncomingValues());
				266	for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
				267	NewPHI->addIncoming(UndefValue::get(Int1Ty),
				268	OldPHI->getIncomingBlock(i));
				269	NewPHI->insertBefore(OldPHI);
				270	ValToPoison[OldPHI] = NewPHI;
				271	}
				272
				273	for (BasicBlock &BB : F)
				274	for (Instruction &I : BB) {
				275	if (isa<PHINode>(I)) continue;
				276
				277	IRBuilder<> B(cast<Instruction>(&I));
Philip Reames	a6548d0	2019-07-09 19:59:39 +0000	[diff] [blame]	278
				279	// Note: There are many more sources of documented UB, but this pass only
				280	// attempts to find UB triggered by propagation of poison.
Philip Reames	f47a313	2019-07-09 18:49:29 +0000	[diff] [blame]	281	if (Value Op = const_cast<Value>(getGuaranteedNonFullPoisonOp(&I)))
				282	CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
				283
				284	if (LocalCheck)
				285	if (auto *RI = dyn_cast<ReturnInst>(&I))
				286	if (RI->getNumOperands() != 0) {
				287	Value *Op = RI->getOperand(0);
				288	CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
				289	}
				290
				291	SmallVector<Value*, 4> Checks;
				292	if (propagatesFullPoison(&I))
				293	for (Value *V : I.operands())
				294	Checks.push_back(getPoisonFor(ValToPoison, V));
				295
				296	if (auto *Check = generatePoisonChecks(I))
				297	Checks.push_back(Check);
				298	ValToPoison[&I] = buildOrChain(B, Checks);
				299	}
				300
				301	for (BasicBlock &BB : F)
				302	for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
				303	auto OldPHI = cast<PHINode>(&I);
				304	if (!ValToPoison.count(OldPHI))
				305	continue; // skip the newly inserted phis
				306	auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
				307	for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
				308	auto *OldVal = OldPHI->getIncomingValue(i);
				309	NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
				310	}
				311	}
				312	return true;
				313	}
				314
				315
				316	PreservedAnalyses PoisonCheckingPass::run(Module &M,
				317	ModuleAnalysisManager &AM) {
				318	bool Changed = false;
				319	for (auto &F : M)
				320	Changed \|= rewrite(F);
				321
				322	return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
				323	}
				324
				325	PreservedAnalyses PoisonCheckingPass::run(Function &F,
				326	FunctionAnalysisManager &AM) {
				327	return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
				328	}
				329
				330
				331	/* Major TODO Items:
				332	- Control dependent poison UB
				333	- Strict mode - (i.e. must analyze every operand)
				334	- Poison through memory
				335	- Function ABIs
Philip Reames	a6548d0	2019-07-09 19:59:39 +0000	[diff] [blame]	336	- Full coverage of intrinsics, etc.. (ouch)
				337
				338	Instructions w/Unclear Semantics:
				339	- shufflevector - It would seem reasonable for an out of bounds mask element
				340	to produce poison, but the LangRef does not state.
				341	- and/or - It would seem reasonable for poison to propagate from both
				342	arguments, but LangRef doesn't state and propagatesFullPoison doesn't
				343	include these two.
				344	- all binary ops w/vector operands - The likely interpretation would be that
				345	any element overflowing should produce poison for the entire result, but
				346	the LangRef does not state.
				347	- Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems
				348	strange that only certian flags should be documented as producing poison.
				349
				350	Cases of clear poison semantics not yet implemented:
				351	- Exact flags on ashr/lshr produce poison
				352	- NSW/NUW flags on shl produce poison
				353	- Inbounds flag on getelementptr produce poison
				354	- fptosi/fptoui (out of bounds input) produce poison
				355	- Scalable vector types for insertelement/extractelement
				356	- Floating point binary ops w/fmf nnan/noinfs flags produce poison
Philip Reames	f47a313	2019-07-09 18:49:29 +0000	[diff] [blame]	357	*/