blob: 5c400cec573464eba6381aad3ea83de7f83d4745 [file] [log] [blame]
Erik Eckstein4d6fb722016-11-11 21:15:13 +00001//===- FunctionComparator.h - Function Comparator -------------------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Erik Eckstein4d6fb722016-11-11 21:15:13 +00006//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the FunctionComparator and GlobalNumberState classes
10// which are used by the MergeFunctions pass for comparing functions.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/Utils/FunctionComparator.h"
Eugene Zelenko286d5892017-10-11 21:41:43 +000015#include "llvm/ADT/APFloat.h"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/Hashing.h"
19#include "llvm/ADT/SmallPtrSet.h"
Eugene Zelenko286d5892017-10-11 21:41:43 +000020#include "llvm/ADT/SmallVector.h"
21#include "llvm/IR/Attributes.h"
22#include "llvm/IR/BasicBlock.h"
Erik Eckstein4d6fb722016-11-11 21:15:13 +000023#include "llvm/IR/CallSite.h"
Eugene Zelenko286d5892017-10-11 21:41:43 +000024#include "llvm/IR/Constant.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/GlobalValue.h"
Erik Eckstein4d6fb722016-11-11 21:15:13 +000030#include "llvm/IR/InlineAsm.h"
Eugene Zelenko286d5892017-10-11 21:41:43 +000031#include "llvm/IR/InstrTypes.h"
32#include "llvm/IR/Instruction.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000033#include "llvm/IR/Instructions.h"
Eugene Zelenko286d5892017-10-11 21:41:43 +000034#include "llvm/IR/LLVMContext.h"
35#include "llvm/IR/Metadata.h"
Erik Eckstein4d6fb722016-11-11 21:15:13 +000036#include "llvm/IR/Module.h"
Eugene Zelenko286d5892017-10-11 21:41:43 +000037#include "llvm/IR/Operator.h"
38#include "llvm/IR/Type.h"
39#include "llvm/IR/Value.h"
40#include "llvm/Support/Casting.h"
41#include "llvm/Support/Compiler.h"
Erik Eckstein4d6fb722016-11-11 21:15:13 +000042#include "llvm/Support/Debug.h"
Eugene Zelenko286d5892017-10-11 21:41:43 +000043#include "llvm/Support/ErrorHandling.h"
Erik Eckstein4d6fb722016-11-11 21:15:13 +000044#include "llvm/Support/raw_ostream.h"
Eugene Zelenko286d5892017-10-11 21:41:43 +000045#include <cassert>
46#include <cstddef>
47#include <cstdint>
48#include <utility>
Erik Eckstein4d6fb722016-11-11 21:15:13 +000049
50using namespace llvm;
51
52#define DEBUG_TYPE "functioncomparator"
53
54int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
55 if (L < R) return -1;
56 if (L > R) return 1;
57 return 0;
58}
59
60int FunctionComparator::cmpOrderings(AtomicOrdering L, AtomicOrdering R) const {
61 if ((int)L < (int)R) return -1;
62 if ((int)L > (int)R) return 1;
63 return 0;
64}
65
66int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
67 if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
68 return Res;
69 if (L.ugt(R)) return 1;
70 if (R.ugt(L)) return -1;
71 return 0;
72}
73
74int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
75 // Floats are ordered first by semantics (i.e. float, double, half, etc.),
76 // then by value interpreted as a bitstring (aka APInt).
77 const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics();
78 if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL),
79 APFloat::semanticsPrecision(SR)))
80 return Res;
81 if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL),
82 APFloat::semanticsMaxExponent(SR)))
83 return Res;
84 if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL),
85 APFloat::semanticsMinExponent(SR)))
86 return Res;
87 if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL),
88 APFloat::semanticsSizeInBits(SR)))
89 return Res;
90 return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
91}
92
93int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
94 // Prevent heavy comparison, compare sizes first.
95 if (int Res = cmpNumbers(L.size(), R.size()))
96 return Res;
97
98 // Compare strings lexicographically only when it is necessary: only when
99 // strings are equal in size.
100 return L.compare(R);
101}
102
Reid Klecknerb5180542017-03-21 16:57:19 +0000103int FunctionComparator::cmpAttrs(const AttributeList L,
104 const AttributeList R) const {
Reid Kleckner8bf67fe2017-05-23 17:01:48 +0000105 if (int Res = cmpNumbers(L.getNumAttrSets(), R.getNumAttrSets()))
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000106 return Res;
107
Reid Kleckner8bf67fe2017-05-23 17:01:48 +0000108 for (unsigned i = L.index_begin(), e = L.index_end(); i != e; ++i) {
109 AttributeSet LAS = L.getAttributes(i);
110 AttributeSet RAS = R.getAttributes(i);
111 AttributeSet::iterator LI = LAS.begin(), LE = LAS.end();
112 AttributeSet::iterator RI = RAS.begin(), RE = RAS.end();
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000113 for (; LI != LE && RI != RE; ++LI, ++RI) {
114 Attribute LA = *LI;
115 Attribute RA = *RI;
116 if (LA < RA)
117 return -1;
118 if (RA < LA)
119 return 1;
120 }
121 if (LI != LE)
122 return 1;
123 if (RI != RE)
124 return -1;
125 }
126 return 0;
127}
128
129int FunctionComparator::cmpRangeMetadata(const MDNode *L,
130 const MDNode *R) const {
131 if (L == R)
132 return 0;
133 if (!L)
134 return -1;
135 if (!R)
136 return 1;
137 // Range metadata is a sequence of numbers. Make sure they are the same
138 // sequence.
139 // TODO: Note that as this is metadata, it is possible to drop and/or merge
140 // this data when considering functions to merge. Thus this comparison would
141 // return 0 (i.e. equivalent), but merging would become more complicated
142 // because the ranges would need to be unioned. It is not likely that
143 // functions differ ONLY in this metadata if they are actually the same
144 // function semantically.
145 if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
146 return Res;
147 for (size_t I = 0; I < L->getNumOperands(); ++I) {
148 ConstantInt *LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
149 ConstantInt *RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
150 if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
151 return Res;
152 }
153 return 0;
154}
155
156int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
157 const Instruction *R) const {
158 ImmutableCallSite LCS(L);
159 ImmutableCallSite RCS(R);
160
161 assert(LCS && RCS && "Must be calls or invokes!");
162 assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!");
163
164 if (int Res =
165 cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles()))
166 return Res;
167
168 for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) {
169 auto OBL = LCS.getOperandBundleAt(i);
170 auto OBR = RCS.getOperandBundleAt(i);
171
172 if (int Res = OBL.getTagName().compare(OBR.getTagName()))
173 return Res;
174
175 if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size()))
176 return Res;
177 }
178
179 return 0;
180}
181
182/// Constants comparison:
183/// 1. Check whether type of L constant could be losslessly bitcasted to R
184/// type.
185/// 2. Compare constant contents.
186/// For more details see declaration comments.
187int FunctionComparator::cmpConstants(const Constant *L,
188 const Constant *R) const {
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000189 Type *TyL = L->getType();
190 Type *TyR = R->getType();
191
192 // Check whether types are bitcastable. This part is just re-factored
193 // Type::canLosslesslyBitCastTo method, but instead of returning true/false,
194 // we also pack into result which type is "less" for us.
195 int TypesRes = cmpTypes(TyL, TyR);
196 if (TypesRes != 0) {
197 // Types are different, but check whether we can bitcast them.
198 if (!TyL->isFirstClassType()) {
199 if (TyR->isFirstClassType())
200 return -1;
201 // Neither TyL nor TyR are values of first class type. Return the result
202 // of comparing the types
203 return TypesRes;
204 }
205 if (!TyR->isFirstClassType()) {
206 if (TyL->isFirstClassType())
207 return 1;
208 return TypesRes;
209 }
210
211 // Vector -> Vector conversions are always lossless if the two vector types
212 // have the same size, otherwise not.
213 unsigned TyLWidth = 0;
214 unsigned TyRWidth = 0;
215
216 if (auto *VecTyL = dyn_cast<VectorType>(TyL))
217 TyLWidth = VecTyL->getBitWidth();
218 if (auto *VecTyR = dyn_cast<VectorType>(TyR))
219 TyRWidth = VecTyR->getBitWidth();
220
221 if (TyLWidth != TyRWidth)
222 return cmpNumbers(TyLWidth, TyRWidth);
223
224 // Zero bit-width means neither TyL nor TyR are vectors.
225 if (!TyLWidth) {
226 PointerType *PTyL = dyn_cast<PointerType>(TyL);
227 PointerType *PTyR = dyn_cast<PointerType>(TyR);
228 if (PTyL && PTyR) {
229 unsigned AddrSpaceL = PTyL->getAddressSpace();
230 unsigned AddrSpaceR = PTyR->getAddressSpace();
231 if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR))
232 return Res;
233 }
234 if (PTyL)
235 return 1;
236 if (PTyR)
237 return -1;
238
239 // TyL and TyR aren't vectors, nor pointers. We don't know how to
240 // bitcast them.
241 return TypesRes;
242 }
243 }
244
245 // OK, types are bitcastable, now check constant contents.
246
247 if (L->isNullValue() && R->isNullValue())
248 return TypesRes;
249 if (L->isNullValue() && !R->isNullValue())
250 return 1;
251 if (!L->isNullValue() && R->isNullValue())
252 return -1;
253
Eugene Zelenko286d5892017-10-11 21:41:43 +0000254 auto GlobalValueL = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(L));
255 auto GlobalValueR = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(R));
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000256 if (GlobalValueL && GlobalValueR) {
257 return cmpGlobalValues(GlobalValueL, GlobalValueR);
258 }
259
260 if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
261 return Res;
262
263 if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) {
264 const auto *SeqR = cast<ConstantDataSequential>(R);
265 // This handles ConstantDataArray and ConstantDataVector. Note that we
266 // compare the two raw data arrays, which might differ depending on the host
267 // endianness. This isn't a problem though, because the endiness of a module
268 // will affect the order of the constants, but this order is the same
269 // for a given input module and host platform.
270 return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues());
271 }
272
273 switch (L->getValueID()) {
274 case Value::UndefValueVal:
275 case Value::ConstantTokenNoneVal:
276 return TypesRes;
277 case Value::ConstantIntVal: {
278 const APInt &LInt = cast<ConstantInt>(L)->getValue();
279 const APInt &RInt = cast<ConstantInt>(R)->getValue();
280 return cmpAPInts(LInt, RInt);
281 }
282 case Value::ConstantFPVal: {
283 const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF();
284 const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF();
285 return cmpAPFloats(LAPF, RAPF);
286 }
287 case Value::ConstantArrayVal: {
288 const ConstantArray *LA = cast<ConstantArray>(L);
289 const ConstantArray *RA = cast<ConstantArray>(R);
290 uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements();
291 uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements();
292 if (int Res = cmpNumbers(NumElementsL, NumElementsR))
293 return Res;
294 for (uint64_t i = 0; i < NumElementsL; ++i) {
295 if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)),
296 cast<Constant>(RA->getOperand(i))))
297 return Res;
298 }
299 return 0;
300 }
301 case Value::ConstantStructVal: {
302 const ConstantStruct *LS = cast<ConstantStruct>(L);
303 const ConstantStruct *RS = cast<ConstantStruct>(R);
304 unsigned NumElementsL = cast<StructType>(TyL)->getNumElements();
305 unsigned NumElementsR = cast<StructType>(TyR)->getNumElements();
306 if (int Res = cmpNumbers(NumElementsL, NumElementsR))
307 return Res;
308 for (unsigned i = 0; i != NumElementsL; ++i) {
309 if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)),
310 cast<Constant>(RS->getOperand(i))))
311 return Res;
312 }
313 return 0;
314 }
315 case Value::ConstantVectorVal: {
316 const ConstantVector *LV = cast<ConstantVector>(L);
317 const ConstantVector *RV = cast<ConstantVector>(R);
318 unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements();
319 unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements();
320 if (int Res = cmpNumbers(NumElementsL, NumElementsR))
321 return Res;
322 for (uint64_t i = 0; i < NumElementsL; ++i) {
323 if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)),
324 cast<Constant>(RV->getOperand(i))))
325 return Res;
326 }
327 return 0;
328 }
329 case Value::ConstantExprVal: {
330 const ConstantExpr *LE = cast<ConstantExpr>(L);
331 const ConstantExpr *RE = cast<ConstantExpr>(R);
332 unsigned NumOperandsL = LE->getNumOperands();
333 unsigned NumOperandsR = RE->getNumOperands();
334 if (int Res = cmpNumbers(NumOperandsL, NumOperandsR))
335 return Res;
336 for (unsigned i = 0; i < NumOperandsL; ++i) {
337 if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)),
338 cast<Constant>(RE->getOperand(i))))
339 return Res;
340 }
341 return 0;
342 }
343 case Value::BlockAddressVal: {
344 const BlockAddress *LBA = cast<BlockAddress>(L);
345 const BlockAddress *RBA = cast<BlockAddress>(R);
346 if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction()))
347 return Res;
348 if (LBA->getFunction() == RBA->getFunction()) {
349 // They are BBs in the same function. Order by which comes first in the
350 // BB order of the function. This order is deterministic.
351 Function* F = LBA->getFunction();
352 BasicBlock *LBB = LBA->getBasicBlock();
353 BasicBlock *RBB = RBA->getBasicBlock();
354 if (LBB == RBB)
355 return 0;
356 for(BasicBlock &BB : F->getBasicBlockList()) {
357 if (&BB == LBB) {
358 assert(&BB != RBB);
359 return -1;
360 }
361 if (&BB == RBB)
362 return 1;
363 }
364 llvm_unreachable("Basic Block Address does not point to a basic block in "
365 "its function.");
366 return -1;
367 } else {
368 // cmpValues said the functions are the same. So because they aren't
369 // literally the same pointer, they must respectively be the left and
370 // right functions.
371 assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR);
372 // cmpValues will tell us if these are equivalent BasicBlocks, in the
373 // context of their respective functions.
374 return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock());
375 }
376 }
377 default: // Unknown constant, abort.
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000378 LLVM_DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000379 llvm_unreachable("Constant ValueID not recognized.");
380 return -1;
381 }
382}
383
384int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue *R) const {
Erik Ecksteinc1d52e52016-11-11 22:21:39 +0000385 uint64_t LNumber = GlobalNumbers->getNumber(L);
386 uint64_t RNumber = GlobalNumbers->getNumber(R);
387 return cmpNumbers(LNumber, RNumber);
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000388}
389
390/// cmpType - compares two types,
391/// defines total ordering among the types set.
392/// See method declaration comments for more details.
393int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
394 PointerType *PTyL = dyn_cast<PointerType>(TyL);
395 PointerType *PTyR = dyn_cast<PointerType>(TyR);
396
397 const DataLayout &DL = FnL->getParent()->getDataLayout();
398 if (PTyL && PTyL->getAddressSpace() == 0)
399 TyL = DL.getIntPtrType(TyL);
400 if (PTyR && PTyR->getAddressSpace() == 0)
401 TyR = DL.getIntPtrType(TyR);
402
403 if (TyL == TyR)
404 return 0;
405
406 if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID()))
407 return Res;
408
409 switch (TyL->getTypeID()) {
410 default:
411 llvm_unreachable("Unknown type!");
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000412 case Type::IntegerTyID:
413 return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
414 cast<IntegerType>(TyR)->getBitWidth());
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000415 // TyL == TyR would have returned true earlier, because types are uniqued.
416 case Type::VoidTyID:
417 case Type::FloatTyID:
418 case Type::DoubleTyID:
419 case Type::X86_FP80TyID:
420 case Type::FP128TyID:
421 case Type::PPC_FP128TyID:
422 case Type::LabelTyID:
423 case Type::MetadataTyID:
424 case Type::TokenTyID:
425 return 0;
426
Eugene Zelenko286d5892017-10-11 21:41:43 +0000427 case Type::PointerTyID:
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000428 assert(PTyL && PTyR && "Both types must be pointers here.");
429 return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace());
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000430
431 case Type::StructTyID: {
432 StructType *STyL = cast<StructType>(TyL);
433 StructType *STyR = cast<StructType>(TyR);
434 if (STyL->getNumElements() != STyR->getNumElements())
435 return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
436
437 if (STyL->isPacked() != STyR->isPacked())
438 return cmpNumbers(STyL->isPacked(), STyR->isPacked());
439
440 for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) {
441 if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i)))
442 return Res;
443 }
444 return 0;
445 }
446
447 case Type::FunctionTyID: {
448 FunctionType *FTyL = cast<FunctionType>(TyL);
449 FunctionType *FTyR = cast<FunctionType>(TyR);
450 if (FTyL->getNumParams() != FTyR->getNumParams())
451 return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams());
452
453 if (FTyL->isVarArg() != FTyR->isVarArg())
454 return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg());
455
456 if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType()))
457 return Res;
458
459 for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) {
460 if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i)))
461 return Res;
462 }
463 return 0;
464 }
465
Peter Collingbournebc070522016-12-02 03:20:58 +0000466 case Type::ArrayTyID:
467 case Type::VectorTyID: {
468 auto *STyL = cast<SequentialType>(TyL);
469 auto *STyR = cast<SequentialType>(TyR);
470 if (STyL->getNumElements() != STyR->getNumElements())
471 return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
472 return cmpTypes(STyL->getElementType(), STyR->getElementType());
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000473 }
474 }
475}
476
477// Determine whether the two operations are the same except that pointer-to-A
478// and pointer-to-B are equivalent. This should be kept in sync with
479// Instruction::isSameOperationAs.
480// Read method declaration comments for more details.
481int FunctionComparator::cmpOperations(const Instruction *L,
482 const Instruction *R,
483 bool &needToCmpOperands) const {
484 needToCmpOperands = true;
485 if (int Res = cmpValues(L, R))
486 return Res;
487
488 // Differences from Instruction::isSameOperationAs:
489 // * replace type comparison with calls to cmpTypes.
490 // * we test for I->getRawSubclassOptionalData (nuw/nsw/tail) at the top.
491 // * because of the above, we don't test for the tail bit on calls later on.
492 if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode()))
493 return Res;
494
495 if (const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(L)) {
496 needToCmpOperands = false;
497 const GetElementPtrInst *GEPR = cast<GetElementPtrInst>(R);
498 if (int Res =
499 cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand()))
500 return Res;
501 return cmpGEPs(GEPL, GEPR);
502 }
503
504 if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
505 return Res;
506
507 if (int Res = cmpTypes(L->getType(), R->getType()))
508 return Res;
509
510 if (int Res = cmpNumbers(L->getRawSubclassOptionalData(),
511 R->getRawSubclassOptionalData()))
512 return Res;
513
514 // We have two instructions of identical opcode and #operands. Check to see
515 // if all operands are the same type
516 for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
517 if (int Res =
518 cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
519 return Res;
520 }
521
522 // Check special state that is a part of some instructions.
523 if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) {
524 if (int Res = cmpTypes(AI->getAllocatedType(),
525 cast<AllocaInst>(R)->getAllocatedType()))
526 return Res;
527 return cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment());
528 }
529 if (const LoadInst *LI = dyn_cast<LoadInst>(L)) {
530 if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile()))
531 return Res;
532 if (int Res =
533 cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment()))
534 return Res;
535 if (int Res =
536 cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
537 return Res;
Konstantin Zhuravlyovbb80d3e2017-07-11 22:23:00 +0000538 if (int Res = cmpNumbers(LI->getSyncScopeID(),
539 cast<LoadInst>(R)->getSyncScopeID()))
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000540 return Res;
541 return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
542 cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
543 }
544 if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
545 if (int Res =
546 cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile()))
547 return Res;
548 if (int Res =
549 cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment()))
550 return Res;
551 if (int Res =
552 cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
553 return Res;
Konstantin Zhuravlyovbb80d3e2017-07-11 22:23:00 +0000554 return cmpNumbers(SI->getSyncScopeID(),
555 cast<StoreInst>(R)->getSyncScopeID());
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000556 }
557 if (const CmpInst *CI = dyn_cast<CmpInst>(L))
558 return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
Vedant Kumare21ab222019-01-17 00:29:14 +0000559 if (auto CSL = CallSite(const_cast<Instruction *>(L))) {
560 auto CSR = CallSite(const_cast<Instruction *>(R));
561 if (int Res = cmpNumbers(CSL.getCallingConv(), CSR.getCallingConv()))
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000562 return Res;
Vedant Kumare21ab222019-01-17 00:29:14 +0000563 if (int Res = cmpAttrs(CSL.getAttributes(), CSR.getAttributes()))
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000564 return Res;
Vedant Kumare21ab222019-01-17 00:29:14 +0000565 if (int Res = cmpOperandBundlesSchema(L, R))
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000566 return Res;
Vedant Kumare21ab222019-01-17 00:29:14 +0000567 if (const CallInst *CI = dyn_cast<CallInst>(L))
568 if (int Res = cmpNumbers(CI->getTailCallKind(),
569 cast<CallInst>(R)->getTailCallKind()))
570 return Res;
571 return cmpRangeMetadata(L->getMetadata(LLVMContext::MD_range),
572 R->getMetadata(LLVMContext::MD_range));
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000573 }
574 if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
575 ArrayRef<unsigned> LIndices = IVI->getIndices();
576 ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices();
577 if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
578 return Res;
579 for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
580 if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
581 return Res;
582 }
583 return 0;
584 }
585 if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) {
586 ArrayRef<unsigned> LIndices = EVI->getIndices();
587 ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices();
588 if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
589 return Res;
590 for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
591 if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
592 return Res;
593 }
594 }
595 if (const FenceInst *FI = dyn_cast<FenceInst>(L)) {
596 if (int Res =
597 cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
598 return Res;
Konstantin Zhuravlyovbb80d3e2017-07-11 22:23:00 +0000599 return cmpNumbers(FI->getSyncScopeID(),
600 cast<FenceInst>(R)->getSyncScopeID());
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000601 }
602 if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {
603 if (int Res = cmpNumbers(CXI->isVolatile(),
604 cast<AtomicCmpXchgInst>(R)->isVolatile()))
605 return Res;
606 if (int Res = cmpNumbers(CXI->isWeak(),
607 cast<AtomicCmpXchgInst>(R)->isWeak()))
608 return Res;
609 if (int Res =
610 cmpOrderings(CXI->getSuccessOrdering(),
611 cast<AtomicCmpXchgInst>(R)->getSuccessOrdering()))
612 return Res;
613 if (int Res =
614 cmpOrderings(CXI->getFailureOrdering(),
615 cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
616 return Res;
Konstantin Zhuravlyovbb80d3e2017-07-11 22:23:00 +0000617 return cmpNumbers(CXI->getSyncScopeID(),
618 cast<AtomicCmpXchgInst>(R)->getSyncScopeID());
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000619 }
620 if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) {
621 if (int Res = cmpNumbers(RMWI->getOperation(),
622 cast<AtomicRMWInst>(R)->getOperation()))
623 return Res;
624 if (int Res = cmpNumbers(RMWI->isVolatile(),
625 cast<AtomicRMWInst>(R)->isVolatile()))
626 return Res;
627 if (int Res = cmpOrderings(RMWI->getOrdering(),
628 cast<AtomicRMWInst>(R)->getOrdering()))
629 return Res;
Konstantin Zhuravlyovbb80d3e2017-07-11 22:23:00 +0000630 return cmpNumbers(RMWI->getSyncScopeID(),
631 cast<AtomicRMWInst>(R)->getSyncScopeID());
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000632 }
633 if (const PHINode *PNL = dyn_cast<PHINode>(L)) {
634 const PHINode *PNR = cast<PHINode>(R);
635 // Ensure that in addition to the incoming values being identical
636 // (checked by the caller of this function), the incoming blocks
637 // are also identical.
638 for (unsigned i = 0, e = PNL->getNumIncomingValues(); i != e; ++i) {
639 if (int Res =
640 cmpValues(PNL->getIncomingBlock(i), PNR->getIncomingBlock(i)))
641 return Res;
642 }
643 }
644 return 0;
645}
646
647// Determine whether two GEP operations perform the same underlying arithmetic.
648// Read method declaration comments for more details.
649int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
650 const GEPOperator *GEPR) const {
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000651 unsigned int ASL = GEPL->getPointerAddressSpace();
652 unsigned int ASR = GEPR->getPointerAddressSpace();
653
654 if (int Res = cmpNumbers(ASL, ASR))
655 return Res;
656
657 // When we have target data, we can reduce the GEP down to the value in bytes
658 // added to the address.
659 const DataLayout &DL = FnL->getParent()->getDataLayout();
660 unsigned BitWidth = DL.getPointerSizeInBits(ASL);
661 APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
662 if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
663 GEPR->accumulateConstantOffset(DL, OffsetR))
664 return cmpAPInts(OffsetL, OffsetR);
665 if (int Res = cmpTypes(GEPL->getSourceElementType(),
666 GEPR->getSourceElementType()))
667 return Res;
668
669 if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
670 return Res;
671
672 for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) {
673 if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i)))
674 return Res;
675 }
676
677 return 0;
678}
679
680int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
681 const InlineAsm *R) const {
682 // InlineAsm's are uniqued. If they are the same pointer, obviously they are
683 // the same, otherwise compare the fields.
684 if (L == R)
685 return 0;
686 if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType()))
687 return Res;
688 if (int Res = cmpMem(L->getAsmString(), R->getAsmString()))
689 return Res;
690 if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString()))
691 return Res;
692 if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects()))
693 return Res;
694 if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack()))
695 return Res;
696 if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))
697 return Res;
whitequark68403562018-05-10 15:05:47 +0000698 assert(L->getFunctionType() != R->getFunctionType());
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000699 return 0;
700}
701
702/// Compare two values used by the two functions under pair-wise comparison. If
703/// this is the first time the values are seen, they're added to the mapping so
704/// that we will detect mismatches on next use.
705/// See comments in declaration for more details.
706int FunctionComparator::cmpValues(const Value *L, const Value *R) const {
707 // Catch self-reference case.
708 if (L == FnL) {
709 if (R == FnR)
710 return 0;
711 return -1;
712 }
713 if (R == FnR) {
714 if (L == FnL)
715 return 0;
716 return 1;
717 }
718
719 const Constant *ConstL = dyn_cast<Constant>(L);
720 const Constant *ConstR = dyn_cast<Constant>(R);
721 if (ConstL && ConstR) {
722 if (L == R)
723 return 0;
724 return cmpConstants(ConstL, ConstR);
725 }
726
727 if (ConstL)
728 return 1;
729 if (ConstR)
730 return -1;
731
732 const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L);
733 const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
734
735 if (InlineAsmL && InlineAsmR)
736 return cmpInlineAsm(InlineAsmL, InlineAsmR);
737 if (InlineAsmL)
738 return 1;
739 if (InlineAsmR)
740 return -1;
741
742 auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())),
743 RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size()));
744
745 return cmpNumbers(LeftSN.first->second, RightSN.first->second);
746}
747
748// Test whether two basic blocks have equivalent behaviour.
749int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
750 const BasicBlock *BBR) const {
751 BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end();
752 BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end();
753
754 do {
755 bool needToCmpOperands = true;
756 if (int Res = cmpOperations(&*InstL, &*InstR, needToCmpOperands))
757 return Res;
758 if (needToCmpOperands) {
759 assert(InstL->getNumOperands() == InstR->getNumOperands());
760
761 for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) {
762 Value *OpL = InstL->getOperand(i);
763 Value *OpR = InstR->getOperand(i);
764 if (int Res = cmpValues(OpL, OpR))
765 return Res;
766 // cmpValues should ensure this is true.
767 assert(cmpTypes(OpL->getType(), OpR->getType()) == 0);
768 }
769 }
770
771 ++InstL;
772 ++InstR;
773 } while (InstL != InstLE && InstR != InstRE);
774
775 if (InstL != InstLE && InstR == InstRE)
776 return 1;
777 if (InstL == InstLE && InstR != InstRE)
778 return -1;
779 return 0;
780}
781
782int FunctionComparator::compareSignature() const {
783 if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes()))
784 return Res;
785
786 if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC()))
787 return Res;
788
789 if (FnL->hasGC()) {
790 if (int Res = cmpMem(FnL->getGC(), FnR->getGC()))
791 return Res;
792 }
793
794 if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection()))
795 return Res;
796
797 if (FnL->hasSection()) {
798 if (int Res = cmpMem(FnL->getSection(), FnR->getSection()))
799 return Res;
800 }
801
802 if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg()))
803 return Res;
804
805 // TODO: if it's internal and only used in direct calls, we could handle this
806 // case too.
807 if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv()))
808 return Res;
809
810 if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType()))
811 return Res;
812
813 assert(FnL->arg_size() == FnR->arg_size() &&
814 "Identically typed functions have different numbers of args!");
815
816 // Visit the arguments so that they get enumerated in the order they're
817 // passed in.
818 for (Function::const_arg_iterator ArgLI = FnL->arg_begin(),
819 ArgRI = FnR->arg_begin(),
820 ArgLE = FnL->arg_end();
821 ArgLI != ArgLE; ++ArgLI, ++ArgRI) {
822 if (cmpValues(&*ArgLI, &*ArgRI) != 0)
823 llvm_unreachable("Arguments repeat!");
824 }
825 return 0;
826}
827
828// Test whether the two functions have equivalent behaviour.
829int FunctionComparator::compare() {
830 beginCompare();
831
832 if (int Res = compareSignature())
833 return Res;
834
835 // We do a CFG-ordered walk since the actual ordering of the blocks in the
836 // linked list is immaterial. Our walk starts at the entry block for both
837 // functions, then takes each block from each terminator in order. As an
838 // artifact, this also means that unreachable blocks are ignored.
839 SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs;
840 SmallPtrSet<const BasicBlock *, 32> VisitedBBs; // in terms of F1.
841
842 FnLBBs.push_back(&FnL->getEntryBlock());
843 FnRBBs.push_back(&FnR->getEntryBlock());
844
845 VisitedBBs.insert(FnLBBs[0]);
846 while (!FnLBBs.empty()) {
847 const BasicBlock *BBL = FnLBBs.pop_back_val();
848 const BasicBlock *BBR = FnRBBs.pop_back_val();
849
850 if (int Res = cmpValues(BBL, BBR))
851 return Res;
852
853 if (int Res = cmpBasicBlocks(BBL, BBR))
854 return Res;
855
Chandler Carruthedb12a82018-10-15 10:04:59 +0000856 const Instruction *TermL = BBL->getTerminator();
857 const Instruction *TermR = BBR->getTerminator();
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000858
859 assert(TermL->getNumSuccessors() == TermR->getNumSuccessors());
860 for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) {
861 if (!VisitedBBs.insert(TermL->getSuccessor(i)).second)
862 continue;
863
864 FnLBBs.push_back(TermL->getSuccessor(i));
865 FnRBBs.push_back(TermR->getSuccessor(i));
866 }
867 }
868 return 0;
869}
870
871namespace {
872
873// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
874// hash of a sequence of 64bit ints, but the entire input does not need to be
875// available at once. This interface is necessary for functionHash because it
876// needs to accumulate the hash as the structure of the function is traversed
877// without saving these values to an intermediate buffer. This form of hashing
878// is not often needed, as usually the object to hash is just read from a
879// buffer.
880class HashAccumulator64 {
881 uint64_t Hash;
Eugene Zelenko286d5892017-10-11 21:41:43 +0000882
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000883public:
884 // Initialize to random constant, so the state isn't zero.
885 HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
Eugene Zelenko286d5892017-10-11 21:41:43 +0000886
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000887 void add(uint64_t V) {
Eugene Zelenko286d5892017-10-11 21:41:43 +0000888 Hash = hashing::detail::hash_16_bytes(Hash, V);
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000889 }
Eugene Zelenko286d5892017-10-11 21:41:43 +0000890
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000891 // No finishing is required, because the entire hash value is used.
892 uint64_t getHash() { return Hash; }
893};
Eugene Zelenko286d5892017-10-11 21:41:43 +0000894
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000895} // end anonymous namespace
896
897// A function hash is calculated by considering only the number of arguments and
898// whether a function is varargs, the order of basic blocks (given by the
899// successors of each basic block in depth first order), and the order of
900// opcodes of each instruction within each of these basic blocks. This mirrors
901// the strategy compare() uses to compare functions by walking the BBs in depth
902// first order and comparing each instruction in sequence. Because this hash
903// does not look at the operands, it is insensitive to things such as the
904// target of calls and the constants used in the function, which makes it useful
905// when possibly merging functions which are the same modulo constants and call
906// targets.
907FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
908 HashAccumulator64 H;
909 H.add(F.isVarArg());
910 H.add(F.arg_size());
911
912 SmallVector<const BasicBlock *, 8> BBs;
Florian Hahna1cc8482018-06-12 11:16:56 +0000913 SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000914
915 // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
916 // accumulating the hash of the function "structure." (BB and opcode sequence)
917 BBs.push_back(&F.getEntryBlock());
918 VisitedBBs.insert(BBs[0]);
919 while (!BBs.empty()) {
920 const BasicBlock *BB = BBs.pop_back_val();
921 // This random value acts as a block header, as otherwise the partition of
922 // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
923 H.add(45798);
924 for (auto &Inst : *BB) {
925 H.add(Inst.getOpcode());
926 }
Chandler Carruthedb12a82018-10-15 10:04:59 +0000927 const Instruction *Term = BB->getTerminator();
Erik Eckstein4d6fb722016-11-11 21:15:13 +0000928 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
929 if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
930 continue;
931 BBs.push_back(Term->getSuccessor(i));
932 }
933 }
934 return H.getHash();
935}