Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 1 | //=======- PaddingChecker.cpp ------------------------------------*- C++ -*-==// |
| 2 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines a checker that checks for padding that could be |
| 10 | // removed by re-ordering members. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
Kristof Umann | 76a2150 | 2018-12-15 16:23:51 +0000 | [diff] [blame] | 14 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 15 | #include "clang/AST/CharUnits.h" |
| 16 | #include "clang/AST/DeclTemplate.h" |
| 17 | #include "clang/AST/RecordLayout.h" |
| 18 | #include "clang/AST/RecursiveASTVisitor.h" |
Kristof Umann | 748c139 | 2019-03-08 16:00:42 +0000 | [diff] [blame] | 19 | #include "clang/Driver/DriverDiagnostic.h" |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 20 | #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" |
| 21 | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" |
| 22 | #include "clang/StaticAnalyzer/Core/Checker.h" |
| 23 | #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" |
| 24 | #include "llvm/ADT/SmallString.h" |
| 25 | #include "llvm/Support/MathExtras.h" |
| 26 | #include "llvm/Support/raw_ostream.h" |
| 27 | #include <numeric> |
| 28 | |
| 29 | using namespace clang; |
| 30 | using namespace ento; |
| 31 | |
| 32 | namespace { |
| 33 | class PaddingChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> { |
| 34 | private: |
| 35 | mutable std::unique_ptr<BugType> PaddingBug; |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 36 | mutable BugReporter *BR; |
| 37 | |
| 38 | public: |
Kristof Umann | 088b1c9 | 2019-03-04 00:28:16 +0000 | [diff] [blame] | 39 | int64_t AllowedPad; |
| 40 | |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 41 | void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR, |
| 42 | BugReporter &BRArg) const { |
| 43 | BR = &BRArg; |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 44 | |
| 45 | // The calls to checkAST* from AnalysisConsumer don't |
| 46 | // visit template instantiations or lambda classes. We |
| 47 | // want to visit those, so we make our own RecursiveASTVisitor. |
| 48 | struct LocalVisitor : public RecursiveASTVisitor<LocalVisitor> { |
| 49 | const PaddingChecker *Checker; |
| 50 | bool shouldVisitTemplateInstantiations() const { return true; } |
| 51 | bool shouldVisitImplicitCode() const { return true; } |
| 52 | explicit LocalVisitor(const PaddingChecker *Checker) : Checker(Checker) {} |
| 53 | bool VisitRecordDecl(const RecordDecl *RD) { |
| 54 | Checker->visitRecord(RD); |
| 55 | return true; |
| 56 | } |
| 57 | bool VisitVarDecl(const VarDecl *VD) { |
| 58 | Checker->visitVariable(VD); |
| 59 | return true; |
| 60 | } |
| 61 | // TODO: Visit array new and mallocs for arrays. |
| 62 | }; |
| 63 | |
| 64 | LocalVisitor visitor(this); |
| 65 | visitor.TraverseDecl(const_cast<TranslationUnitDecl *>(TUD)); |
| 66 | } |
| 67 | |
Adrian Prantl | 9fc8faf | 2018-05-09 01:00:01 +0000 | [diff] [blame] | 68 | /// Look for records of overly padded types. If padding * |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 69 | /// PadMultiplier exceeds AllowedPad, then generate a report. |
| 70 | /// PadMultiplier is used to share code with the array padding |
| 71 | /// checker. |
| 72 | void visitRecord(const RecordDecl *RD, uint64_t PadMultiplier = 1) const { |
| 73 | if (shouldSkipDecl(RD)) |
| 74 | return; |
| 75 | |
Alexander Shaposhnikov | e2f0734 | 2018-10-30 01:20:37 +0000 | [diff] [blame] | 76 | // TODO: Figure out why we are going through declarations and not only |
| 77 | // definitions. |
| 78 | if (!(RD = RD->getDefinition())) |
| 79 | return; |
| 80 | |
| 81 | // This is the simplest correct case: a class with no fields and one base |
| 82 | // class. Other cases are more complicated because of how the base classes |
| 83 | // & fields might interact, so we don't bother dealing with them. |
| 84 | // TODO: Support other combinations of base classes and fields. |
| 85 | if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) |
| 86 | if (CXXRD->field_empty() && CXXRD->getNumBases() == 1) |
| 87 | return visitRecord(CXXRD->bases().begin()->getType()->getAsRecordDecl(), |
| 88 | PadMultiplier); |
| 89 | |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 90 | auto &ASTContext = RD->getASTContext(); |
| 91 | const ASTRecordLayout &RL = ASTContext.getASTRecordLayout(RD); |
| 92 | assert(llvm::isPowerOf2_64(RL.getAlignment().getQuantity())); |
| 93 | |
| 94 | CharUnits BaselinePad = calculateBaselinePad(RD, ASTContext, RL); |
| 95 | if (BaselinePad.isZero()) |
| 96 | return; |
| 97 | |
| 98 | CharUnits OptimalPad; |
| 99 | SmallVector<const FieldDecl *, 20> OptimalFieldsOrder; |
| 100 | std::tie(OptimalPad, OptimalFieldsOrder) = |
| 101 | calculateOptimalPad(RD, ASTContext, RL); |
| 102 | |
| 103 | CharUnits DiffPad = PadMultiplier * (BaselinePad - OptimalPad); |
| 104 | if (DiffPad.getQuantity() <= AllowedPad) { |
| 105 | assert(!DiffPad.isNegative() && "DiffPad should not be negative"); |
| 106 | // There is not enough excess padding to trigger a warning. |
| 107 | return; |
| 108 | } |
| 109 | reportRecord(RD, BaselinePad, OptimalPad, OptimalFieldsOrder); |
| 110 | } |
| 111 | |
Adrian Prantl | 9fc8faf | 2018-05-09 01:00:01 +0000 | [diff] [blame] | 112 | /// Look for arrays of overly padded types. If the padding of the |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 113 | /// array type exceeds AllowedPad, then generate a report. |
| 114 | void visitVariable(const VarDecl *VD) const { |
| 115 | const ArrayType *ArrTy = VD->getType()->getAsArrayTypeUnsafe(); |
| 116 | if (ArrTy == nullptr) |
| 117 | return; |
| 118 | uint64_t Elts = 0; |
| 119 | if (const ConstantArrayType *CArrTy = dyn_cast<ConstantArrayType>(ArrTy)) |
| 120 | Elts = CArrTy->getSize().getZExtValue(); |
| 121 | if (Elts == 0) |
| 122 | return; |
| 123 | const RecordType *RT = ArrTy->getElementType()->getAs<RecordType>(); |
| 124 | if (RT == nullptr) |
| 125 | return; |
| 126 | |
Alexander Shaposhnikov | e2f0734 | 2018-10-30 01:20:37 +0000 | [diff] [blame] | 127 | // TODO: Recurse into the fields to see if they have excess padding. |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 128 | visitRecord(RT->getDecl(), Elts); |
| 129 | } |
| 130 | |
| 131 | bool shouldSkipDecl(const RecordDecl *RD) const { |
Alexander Shaposhnikov | e2f0734 | 2018-10-30 01:20:37 +0000 | [diff] [blame] | 132 | // TODO: Figure out why we are going through declarations and not only |
| 133 | // definitions. |
| 134 | if (!(RD = RD->getDefinition())) |
| 135 | return true; |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 136 | auto Location = RD->getLocation(); |
| 137 | // If the construct doesn't have a source file, then it's not something |
| 138 | // we want to diagnose. |
| 139 | if (!Location.isValid()) |
| 140 | return true; |
| 141 | SrcMgr::CharacteristicKind Kind = |
| 142 | BR->getSourceManager().getFileCharacteristic(Location); |
| 143 | // Throw out all records that come from system headers. |
| 144 | if (Kind != SrcMgr::C_User) |
| 145 | return true; |
| 146 | |
| 147 | // Not going to attempt to optimize unions. |
| 148 | if (RD->isUnion()) |
| 149 | return true; |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 150 | if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { |
| 151 | // Tail padding with base classes ends up being very complicated. |
Alexander Shaposhnikov | e2f0734 | 2018-10-30 01:20:37 +0000 | [diff] [blame] | 152 | // We will skip objects with base classes for now, unless they do not |
| 153 | // have fields. |
| 154 | // TODO: Handle more base class scenarios. |
| 155 | if (!CXXRD->field_empty() && CXXRD->getNumBases() != 0) |
| 156 | return true; |
| 157 | if (CXXRD->field_empty() && CXXRD->getNumBases() != 1) |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 158 | return true; |
| 159 | // Virtual bases are complicated, skipping those for now. |
| 160 | if (CXXRD->getNumVBases() != 0) |
| 161 | return true; |
| 162 | // Can't layout a template, so skip it. We do still layout the |
| 163 | // instantiations though. |
| 164 | if (CXXRD->getTypeForDecl()->isDependentType()) |
| 165 | return true; |
| 166 | if (CXXRD->getTypeForDecl()->isInstantiationDependentType()) |
| 167 | return true; |
| 168 | } |
Alexander Shaposhnikov | e2f0734 | 2018-10-30 01:20:37 +0000 | [diff] [blame] | 169 | // How do you reorder fields if you haven't got any? |
| 170 | else if (RD->field_empty()) |
| 171 | return true; |
| 172 | |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 173 | auto IsTrickyField = [](const FieldDecl *FD) -> bool { |
| 174 | // Bitfield layout is hard. |
| 175 | if (FD->isBitField()) |
| 176 | return true; |
| 177 | |
| 178 | // Variable length arrays are tricky too. |
| 179 | QualType Ty = FD->getType(); |
| 180 | if (Ty->isIncompleteArrayType()) |
| 181 | return true; |
| 182 | return false; |
| 183 | }; |
| 184 | |
| 185 | if (std::any_of(RD->field_begin(), RD->field_end(), IsTrickyField)) |
| 186 | return true; |
| 187 | return false; |
| 188 | } |
| 189 | |
| 190 | static CharUnits calculateBaselinePad(const RecordDecl *RD, |
| 191 | const ASTContext &ASTContext, |
| 192 | const ASTRecordLayout &RL) { |
| 193 | CharUnits PaddingSum; |
| 194 | CharUnits Offset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0)); |
| 195 | for (const FieldDecl *FD : RD->fields()) { |
| 196 | // This checker only cares about the padded size of the |
| 197 | // field, and not the data size. If the field is a record |
| 198 | // with tail padding, then we won't put that number in our |
| 199 | // total because reordering fields won't fix that problem. |
| 200 | CharUnits FieldSize = ASTContext.getTypeSizeInChars(FD->getType()); |
| 201 | auto FieldOffsetBits = RL.getFieldOffset(FD->getFieldIndex()); |
| 202 | CharUnits FieldOffset = ASTContext.toCharUnitsFromBits(FieldOffsetBits); |
| 203 | PaddingSum += (FieldOffset - Offset); |
| 204 | Offset = FieldOffset + FieldSize; |
| 205 | } |
| 206 | PaddingSum += RL.getSize() - Offset; |
| 207 | return PaddingSum; |
| 208 | } |
| 209 | |
| 210 | /// Optimal padding overview: |
| 211 | /// 1. Find a close approximation to where we can place our first field. |
| 212 | /// This will usually be at offset 0. |
| 213 | /// 2. Try to find the best field that can legally be placed at the current |
| 214 | /// offset. |
| 215 | /// a. "Best" is the largest alignment that is legal, but smallest size. |
| 216 | /// This is to account for overly aligned types. |
| 217 | /// 3. If no fields can fit, pad by rounding the current offset up to the |
| 218 | /// smallest alignment requirement of our fields. Measure and track the |
| 219 | // amount of padding added. Go back to 2. |
| 220 | /// 4. Increment the current offset by the size of the chosen field. |
| 221 | /// 5. Remove the chosen field from the set of future possibilities. |
| 222 | /// 6. Go back to 2 if there are still unplaced fields. |
| 223 | /// 7. Add tail padding by rounding the current offset up to the structure |
| 224 | /// alignment. Track the amount of padding added. |
| 225 | |
| 226 | static std::pair<CharUnits, SmallVector<const FieldDecl *, 20>> |
| 227 | calculateOptimalPad(const RecordDecl *RD, const ASTContext &ASTContext, |
| 228 | const ASTRecordLayout &RL) { |
| 229 | struct FieldInfo { |
| 230 | CharUnits Align; |
| 231 | CharUnits Size; |
| 232 | const FieldDecl *Field; |
| 233 | bool operator<(const FieldInfo &RHS) const { |
| 234 | // Order from small alignments to large alignments, |
| 235 | // then large sizes to small sizes. |
| 236 | // then large field indices to small field indices |
| 237 | return std::make_tuple(Align, -Size, |
| 238 | Field ? -static_cast<int>(Field->getFieldIndex()) |
| 239 | : 0) < |
| 240 | std::make_tuple( |
| 241 | RHS.Align, -RHS.Size, |
| 242 | RHS.Field ? -static_cast<int>(RHS.Field->getFieldIndex()) |
| 243 | : 0); |
| 244 | } |
| 245 | }; |
| 246 | SmallVector<FieldInfo, 20> Fields; |
| 247 | auto GatherSizesAndAlignments = [](const FieldDecl *FD) { |
| 248 | FieldInfo RetVal; |
| 249 | RetVal.Field = FD; |
| 250 | auto &Ctx = FD->getASTContext(); |
| 251 | std::tie(RetVal.Size, RetVal.Align) = |
| 252 | Ctx.getTypeInfoInChars(FD->getType()); |
| 253 | assert(llvm::isPowerOf2_64(RetVal.Align.getQuantity())); |
| 254 | if (auto Max = FD->getMaxAlignment()) |
| 255 | RetVal.Align = std::max(Ctx.toCharUnitsFromBits(Max), RetVal.Align); |
| 256 | return RetVal; |
| 257 | }; |
| 258 | std::transform(RD->field_begin(), RD->field_end(), |
| 259 | std::back_inserter(Fields), GatherSizesAndAlignments); |
Fangrui Song | 55fab26 | 2018-09-26 22:16:28 +0000 | [diff] [blame] | 260 | llvm::sort(Fields); |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 261 | // This lets us skip over vptrs and non-virtual bases, |
| 262 | // so that we can just worry about the fields in our object. |
| 263 | // Note that this does cause us to miss some cases where we |
| 264 | // could pack more bytes in to a base class's tail padding. |
| 265 | CharUnits NewOffset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0)); |
| 266 | CharUnits NewPad; |
| 267 | SmallVector<const FieldDecl *, 20> OptimalFieldsOrder; |
| 268 | while (!Fields.empty()) { |
| 269 | unsigned TrailingZeros = |
| 270 | llvm::countTrailingZeros((unsigned long long)NewOffset.getQuantity()); |
| 271 | // If NewOffset is zero, then countTrailingZeros will be 64. Shifting |
| 272 | // 64 will overflow our unsigned long long. Shifting 63 will turn |
| 273 | // our long long (and CharUnits internal type) negative. So shift 62. |
| 274 | long long CurAlignmentBits = 1ull << (std::min)(TrailingZeros, 62u); |
| 275 | CharUnits CurAlignment = CharUnits::fromQuantity(CurAlignmentBits); |
| 276 | FieldInfo InsertPoint = {CurAlignment, CharUnits::Zero(), nullptr}; |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 277 | |
| 278 | // In the typical case, this will find the last element |
| 279 | // of the vector. We won't find a middle element unless |
| 280 | // we started on a poorly aligned address or have an overly |
| 281 | // aligned field. |
Fangrui Song | 7264a47 | 2019-07-03 08:13:17 +0000 | [diff] [blame] | 282 | auto Iter = llvm::upper_bound(Fields, InsertPoint); |
| 283 | if (Iter != Fields.begin()) { |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 284 | // We found a field that we can layout with the current alignment. |
| 285 | --Iter; |
| 286 | NewOffset += Iter->Size; |
| 287 | OptimalFieldsOrder.push_back(Iter->Field); |
| 288 | Fields.erase(Iter); |
| 289 | } else { |
| 290 | // We are poorly aligned, and we need to pad in order to layout another |
| 291 | // field. Round up to at least the smallest field alignment that we |
| 292 | // currently have. |
| 293 | CharUnits NextOffset = NewOffset.alignTo(Fields[0].Align); |
| 294 | NewPad += NextOffset - NewOffset; |
| 295 | NewOffset = NextOffset; |
| 296 | } |
| 297 | } |
| 298 | // Calculate tail padding. |
| 299 | CharUnits NewSize = NewOffset.alignTo(RL.getAlignment()); |
| 300 | NewPad += NewSize - NewOffset; |
| 301 | return {NewPad, std::move(OptimalFieldsOrder)}; |
| 302 | } |
| 303 | |
| 304 | void reportRecord( |
| 305 | const RecordDecl *RD, CharUnits BaselinePad, CharUnits OptimalPad, |
| 306 | const SmallVector<const FieldDecl *, 20> &OptimalFieldsOrder) const { |
| 307 | if (!PaddingBug) |
| 308 | PaddingBug = |
Jonas Devlieghere | 2b3d49b | 2019-08-14 23:04:18 +0000 | [diff] [blame] | 309 | std::make_unique<BugType>(this, "Excessive Padding", "Performance"); |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 310 | |
| 311 | SmallString<100> Buf; |
| 312 | llvm::raw_svector_ostream Os(Buf); |
| 313 | Os << "Excessive padding in '"; |
| 314 | Os << QualType::getAsString(RD->getTypeForDecl(), Qualifiers(), |
| 315 | LangOptions()) |
| 316 | << "'"; |
| 317 | |
| 318 | if (auto *TSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) { |
| 319 | // TODO: make this show up better in the console output and in |
| 320 | // the HTML. Maybe just make it show up in HTML like the path |
| 321 | // diagnostics show. |
| 322 | SourceLocation ILoc = TSD->getPointOfInstantiation(); |
| 323 | if (ILoc.isValid()) |
| 324 | Os << " instantiated here: " |
| 325 | << ILoc.printToString(BR->getSourceManager()); |
| 326 | } |
| 327 | |
| 328 | Os << " (" << BaselinePad.getQuantity() << " padding bytes, where " |
| 329 | << OptimalPad.getQuantity() << " is optimal). \n" |
| 330 | << "Optimal fields order: \n"; |
| 331 | for (const auto *FD : OptimalFieldsOrder) |
| 332 | Os << FD->getName() << ", \n"; |
| 333 | Os << "consider reordering the fields or adding explicit padding " |
| 334 | "members."; |
| 335 | |
| 336 | PathDiagnosticLocation CELoc = |
| 337 | PathDiagnosticLocation::create(RD, BR->getSourceManager()); |
Artem Dergachev | 2f169e7 | 2019-09-09 20:34:40 +0000 | [diff] [blame] | 338 | auto Report = |
| 339 | std::make_unique<BasicBugReport>(*PaddingBug, Os.str(), CELoc); |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 340 | Report->setDeclWithIssue(RD); |
| 341 | Report->addRange(RD->getSourceRange()); |
| 342 | BR->emitReport(std::move(Report)); |
| 343 | } |
| 344 | }; |
Alexander Shaposhnikov | e2f0734 | 2018-10-30 01:20:37 +0000 | [diff] [blame] | 345 | } // namespace |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 346 | |
| 347 | void ento::registerPaddingChecker(CheckerManager &Mgr) { |
Kristof Umann | 088b1c9 | 2019-03-04 00:28:16 +0000 | [diff] [blame] | 348 | auto *Checker = Mgr.registerChecker<PaddingChecker>(); |
| 349 | Checker->AllowedPad = Mgr.getAnalyzerOptions() |
Kristof Umann | 83cc1b3 | 2019-05-17 15:52:13 +0000 | [diff] [blame] | 350 | .getCheckerIntegerOption(Checker, "AllowedPad"); |
Kristof Umann | 748c139 | 2019-03-08 16:00:42 +0000 | [diff] [blame] | 351 | if (Checker->AllowedPad < 0) |
| 352 | Mgr.reportInvalidCheckerOptionValue( |
| 353 | Checker, "AllowedPad", "a non-negative value"); |
Malcolm Parsons | fab3680 | 2018-04-16 08:31:08 +0000 | [diff] [blame] | 354 | } |
Kristof Umann | 058a7a4 | 2019-01-26 14:23:08 +0000 | [diff] [blame] | 355 | |
| 356 | bool ento::shouldRegisterPaddingChecker(const LangOptions &LO) { |
| 357 | return true; |
| 358 | } |