blob: df5b46ebd2e53825842bce8166659698903afdd1 [file] [log] [blame]
Malcolm Parsonsfab36802018-04-16 08:31:08 +00001//=======- PaddingChecker.cpp ------------------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Malcolm Parsonsfab36802018-04-16 08:31:08 +00006//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a checker that checks for padding that could be
10// removed by re-ordering members.
11//
12//===----------------------------------------------------------------------===//
13
Kristof Umann76a21502018-12-15 16:23:51 +000014#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
Malcolm Parsonsfab36802018-04-16 08:31:08 +000015#include "clang/AST/CharUnits.h"
16#include "clang/AST/DeclTemplate.h"
17#include "clang/AST/RecordLayout.h"
18#include "clang/AST/RecursiveASTVisitor.h"
19#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
20#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
21#include "clang/StaticAnalyzer/Core/Checker.h"
22#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
23#include "llvm/ADT/SmallString.h"
24#include "llvm/Support/MathExtras.h"
25#include "llvm/Support/raw_ostream.h"
26#include <numeric>
27
28using namespace clang;
29using namespace ento;
30
31namespace {
32class PaddingChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> {
33private:
34 mutable std::unique_ptr<BugType> PaddingBug;
Malcolm Parsonsfab36802018-04-16 08:31:08 +000035 mutable BugReporter *BR;
36
37public:
Kristof Umann088b1c92019-03-04 00:28:16 +000038 int64_t AllowedPad;
39
Malcolm Parsonsfab36802018-04-16 08:31:08 +000040 void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR,
41 BugReporter &BRArg) const {
42 BR = &BRArg;
Malcolm Parsonsfab36802018-04-16 08:31:08 +000043
44 // The calls to checkAST* from AnalysisConsumer don't
45 // visit template instantiations or lambda classes. We
46 // want to visit those, so we make our own RecursiveASTVisitor.
47 struct LocalVisitor : public RecursiveASTVisitor<LocalVisitor> {
48 const PaddingChecker *Checker;
49 bool shouldVisitTemplateInstantiations() const { return true; }
50 bool shouldVisitImplicitCode() const { return true; }
51 explicit LocalVisitor(const PaddingChecker *Checker) : Checker(Checker) {}
52 bool VisitRecordDecl(const RecordDecl *RD) {
53 Checker->visitRecord(RD);
54 return true;
55 }
56 bool VisitVarDecl(const VarDecl *VD) {
57 Checker->visitVariable(VD);
58 return true;
59 }
60 // TODO: Visit array new and mallocs for arrays.
61 };
62
63 LocalVisitor visitor(this);
64 visitor.TraverseDecl(const_cast<TranslationUnitDecl *>(TUD));
65 }
66
Adrian Prantl9fc8faf2018-05-09 01:00:01 +000067 /// Look for records of overly padded types. If padding *
Malcolm Parsonsfab36802018-04-16 08:31:08 +000068 /// PadMultiplier exceeds AllowedPad, then generate a report.
69 /// PadMultiplier is used to share code with the array padding
70 /// checker.
71 void visitRecord(const RecordDecl *RD, uint64_t PadMultiplier = 1) const {
72 if (shouldSkipDecl(RD))
73 return;
74
Alexander Shaposhnikove2f07342018-10-30 01:20:37 +000075 // TODO: Figure out why we are going through declarations and not only
76 // definitions.
77 if (!(RD = RD->getDefinition()))
78 return;
79
80 // This is the simplest correct case: a class with no fields and one base
81 // class. Other cases are more complicated because of how the base classes
82 // & fields might interact, so we don't bother dealing with them.
83 // TODO: Support other combinations of base classes and fields.
84 if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD))
85 if (CXXRD->field_empty() && CXXRD->getNumBases() == 1)
86 return visitRecord(CXXRD->bases().begin()->getType()->getAsRecordDecl(),
87 PadMultiplier);
88
Malcolm Parsonsfab36802018-04-16 08:31:08 +000089 auto &ASTContext = RD->getASTContext();
90 const ASTRecordLayout &RL = ASTContext.getASTRecordLayout(RD);
91 assert(llvm::isPowerOf2_64(RL.getAlignment().getQuantity()));
92
93 CharUnits BaselinePad = calculateBaselinePad(RD, ASTContext, RL);
94 if (BaselinePad.isZero())
95 return;
96
97 CharUnits OptimalPad;
98 SmallVector<const FieldDecl *, 20> OptimalFieldsOrder;
99 std::tie(OptimalPad, OptimalFieldsOrder) =
100 calculateOptimalPad(RD, ASTContext, RL);
101
102 CharUnits DiffPad = PadMultiplier * (BaselinePad - OptimalPad);
103 if (DiffPad.getQuantity() <= AllowedPad) {
104 assert(!DiffPad.isNegative() && "DiffPad should not be negative");
105 // There is not enough excess padding to trigger a warning.
106 return;
107 }
108 reportRecord(RD, BaselinePad, OptimalPad, OptimalFieldsOrder);
109 }
110
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000111 /// Look for arrays of overly padded types. If the padding of the
Malcolm Parsonsfab36802018-04-16 08:31:08 +0000112 /// array type exceeds AllowedPad, then generate a report.
113 void visitVariable(const VarDecl *VD) const {
114 const ArrayType *ArrTy = VD->getType()->getAsArrayTypeUnsafe();
115 if (ArrTy == nullptr)
116 return;
117 uint64_t Elts = 0;
118 if (const ConstantArrayType *CArrTy = dyn_cast<ConstantArrayType>(ArrTy))
119 Elts = CArrTy->getSize().getZExtValue();
120 if (Elts == 0)
121 return;
122 const RecordType *RT = ArrTy->getElementType()->getAs<RecordType>();
123 if (RT == nullptr)
124 return;
125
Alexander Shaposhnikove2f07342018-10-30 01:20:37 +0000126 // TODO: Recurse into the fields to see if they have excess padding.
Malcolm Parsonsfab36802018-04-16 08:31:08 +0000127 visitRecord(RT->getDecl(), Elts);
128 }
129
130 bool shouldSkipDecl(const RecordDecl *RD) const {
Alexander Shaposhnikove2f07342018-10-30 01:20:37 +0000131 // TODO: Figure out why we are going through declarations and not only
132 // definitions.
133 if (!(RD = RD->getDefinition()))
134 return true;
Malcolm Parsonsfab36802018-04-16 08:31:08 +0000135 auto Location = RD->getLocation();
136 // If the construct doesn't have a source file, then it's not something
137 // we want to diagnose.
138 if (!Location.isValid())
139 return true;
140 SrcMgr::CharacteristicKind Kind =
141 BR->getSourceManager().getFileCharacteristic(Location);
142 // Throw out all records that come from system headers.
143 if (Kind != SrcMgr::C_User)
144 return true;
145
146 // Not going to attempt to optimize unions.
147 if (RD->isUnion())
148 return true;
Malcolm Parsonsfab36802018-04-16 08:31:08 +0000149 if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
150 // Tail padding with base classes ends up being very complicated.
Alexander Shaposhnikove2f07342018-10-30 01:20:37 +0000151 // We will skip objects with base classes for now, unless they do not
152 // have fields.
153 // TODO: Handle more base class scenarios.
154 if (!CXXRD->field_empty() && CXXRD->getNumBases() != 0)
155 return true;
156 if (CXXRD->field_empty() && CXXRD->getNumBases() != 1)
Malcolm Parsonsfab36802018-04-16 08:31:08 +0000157 return true;
158 // Virtual bases are complicated, skipping those for now.
159 if (CXXRD->getNumVBases() != 0)
160 return true;
161 // Can't layout a template, so skip it. We do still layout the
162 // instantiations though.
163 if (CXXRD->getTypeForDecl()->isDependentType())
164 return true;
165 if (CXXRD->getTypeForDecl()->isInstantiationDependentType())
166 return true;
167 }
Alexander Shaposhnikove2f07342018-10-30 01:20:37 +0000168 // How do you reorder fields if you haven't got any?
169 else if (RD->field_empty())
170 return true;
171
Malcolm Parsonsfab36802018-04-16 08:31:08 +0000172 auto IsTrickyField = [](const FieldDecl *FD) -> bool {
173 // Bitfield layout is hard.
174 if (FD->isBitField())
175 return true;
176
177 // Variable length arrays are tricky too.
178 QualType Ty = FD->getType();
179 if (Ty->isIncompleteArrayType())
180 return true;
181 return false;
182 };
183
184 if (std::any_of(RD->field_begin(), RD->field_end(), IsTrickyField))
185 return true;
186 return false;
187 }
188
189 static CharUnits calculateBaselinePad(const RecordDecl *RD,
190 const ASTContext &ASTContext,
191 const ASTRecordLayout &RL) {
192 CharUnits PaddingSum;
193 CharUnits Offset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0));
194 for (const FieldDecl *FD : RD->fields()) {
195 // This checker only cares about the padded size of the
196 // field, and not the data size. If the field is a record
197 // with tail padding, then we won't put that number in our
198 // total because reordering fields won't fix that problem.
199 CharUnits FieldSize = ASTContext.getTypeSizeInChars(FD->getType());
200 auto FieldOffsetBits = RL.getFieldOffset(FD->getFieldIndex());
201 CharUnits FieldOffset = ASTContext.toCharUnitsFromBits(FieldOffsetBits);
202 PaddingSum += (FieldOffset - Offset);
203 Offset = FieldOffset + FieldSize;
204 }
205 PaddingSum += RL.getSize() - Offset;
206 return PaddingSum;
207 }
208
209 /// Optimal padding overview:
210 /// 1. Find a close approximation to where we can place our first field.
211 /// This will usually be at offset 0.
212 /// 2. Try to find the best field that can legally be placed at the current
213 /// offset.
214 /// a. "Best" is the largest alignment that is legal, but smallest size.
215 /// This is to account for overly aligned types.
216 /// 3. If no fields can fit, pad by rounding the current offset up to the
217 /// smallest alignment requirement of our fields. Measure and track the
218 // amount of padding added. Go back to 2.
219 /// 4. Increment the current offset by the size of the chosen field.
220 /// 5. Remove the chosen field from the set of future possibilities.
221 /// 6. Go back to 2 if there are still unplaced fields.
222 /// 7. Add tail padding by rounding the current offset up to the structure
223 /// alignment. Track the amount of padding added.
224
225 static std::pair<CharUnits, SmallVector<const FieldDecl *, 20>>
226 calculateOptimalPad(const RecordDecl *RD, const ASTContext &ASTContext,
227 const ASTRecordLayout &RL) {
228 struct FieldInfo {
229 CharUnits Align;
230 CharUnits Size;
231 const FieldDecl *Field;
232 bool operator<(const FieldInfo &RHS) const {
233 // Order from small alignments to large alignments,
234 // then large sizes to small sizes.
235 // then large field indices to small field indices
236 return std::make_tuple(Align, -Size,
237 Field ? -static_cast<int>(Field->getFieldIndex())
238 : 0) <
239 std::make_tuple(
240 RHS.Align, -RHS.Size,
241 RHS.Field ? -static_cast<int>(RHS.Field->getFieldIndex())
242 : 0);
243 }
244 };
245 SmallVector<FieldInfo, 20> Fields;
246 auto GatherSizesAndAlignments = [](const FieldDecl *FD) {
247 FieldInfo RetVal;
248 RetVal.Field = FD;
249 auto &Ctx = FD->getASTContext();
250 std::tie(RetVal.Size, RetVal.Align) =
251 Ctx.getTypeInfoInChars(FD->getType());
252 assert(llvm::isPowerOf2_64(RetVal.Align.getQuantity()));
253 if (auto Max = FD->getMaxAlignment())
254 RetVal.Align = std::max(Ctx.toCharUnitsFromBits(Max), RetVal.Align);
255 return RetVal;
256 };
257 std::transform(RD->field_begin(), RD->field_end(),
258 std::back_inserter(Fields), GatherSizesAndAlignments);
Fangrui Song55fab262018-09-26 22:16:28 +0000259 llvm::sort(Fields);
Malcolm Parsonsfab36802018-04-16 08:31:08 +0000260 // This lets us skip over vptrs and non-virtual bases,
261 // so that we can just worry about the fields in our object.
262 // Note that this does cause us to miss some cases where we
263 // could pack more bytes in to a base class's tail padding.
264 CharUnits NewOffset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0));
265 CharUnits NewPad;
266 SmallVector<const FieldDecl *, 20> OptimalFieldsOrder;
267 while (!Fields.empty()) {
268 unsigned TrailingZeros =
269 llvm::countTrailingZeros((unsigned long long)NewOffset.getQuantity());
270 // If NewOffset is zero, then countTrailingZeros will be 64. Shifting
271 // 64 will overflow our unsigned long long. Shifting 63 will turn
272 // our long long (and CharUnits internal type) negative. So shift 62.
273 long long CurAlignmentBits = 1ull << (std::min)(TrailingZeros, 62u);
274 CharUnits CurAlignment = CharUnits::fromQuantity(CurAlignmentBits);
275 FieldInfo InsertPoint = {CurAlignment, CharUnits::Zero(), nullptr};
276 auto CurBegin = Fields.begin();
277 auto CurEnd = Fields.end();
278
279 // In the typical case, this will find the last element
280 // of the vector. We won't find a middle element unless
281 // we started on a poorly aligned address or have an overly
282 // aligned field.
283 auto Iter = std::upper_bound(CurBegin, CurEnd, InsertPoint);
284 if (Iter != CurBegin) {
285 // We found a field that we can layout with the current alignment.
286 --Iter;
287 NewOffset += Iter->Size;
288 OptimalFieldsOrder.push_back(Iter->Field);
289 Fields.erase(Iter);
290 } else {
291 // We are poorly aligned, and we need to pad in order to layout another
292 // field. Round up to at least the smallest field alignment that we
293 // currently have.
294 CharUnits NextOffset = NewOffset.alignTo(Fields[0].Align);
295 NewPad += NextOffset - NewOffset;
296 NewOffset = NextOffset;
297 }
298 }
299 // Calculate tail padding.
300 CharUnits NewSize = NewOffset.alignTo(RL.getAlignment());
301 NewPad += NewSize - NewOffset;
302 return {NewPad, std::move(OptimalFieldsOrder)};
303 }
304
305 void reportRecord(
306 const RecordDecl *RD, CharUnits BaselinePad, CharUnits OptimalPad,
307 const SmallVector<const FieldDecl *, 20> &OptimalFieldsOrder) const {
308 if (!PaddingBug)
309 PaddingBug =
310 llvm::make_unique<BugType>(this, "Excessive Padding", "Performance");
311
312 SmallString<100> Buf;
313 llvm::raw_svector_ostream Os(Buf);
314 Os << "Excessive padding in '";
315 Os << QualType::getAsString(RD->getTypeForDecl(), Qualifiers(),
316 LangOptions())
317 << "'";
318
319 if (auto *TSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
320 // TODO: make this show up better in the console output and in
321 // the HTML. Maybe just make it show up in HTML like the path
322 // diagnostics show.
323 SourceLocation ILoc = TSD->getPointOfInstantiation();
324 if (ILoc.isValid())
325 Os << " instantiated here: "
326 << ILoc.printToString(BR->getSourceManager());
327 }
328
329 Os << " (" << BaselinePad.getQuantity() << " padding bytes, where "
330 << OptimalPad.getQuantity() << " is optimal). \n"
331 << "Optimal fields order: \n";
332 for (const auto *FD : OptimalFieldsOrder)
333 Os << FD->getName() << ", \n";
334 Os << "consider reordering the fields or adding explicit padding "
335 "members.";
336
337 PathDiagnosticLocation CELoc =
338 PathDiagnosticLocation::create(RD, BR->getSourceManager());
339 auto Report = llvm::make_unique<BugReport>(*PaddingBug, Os.str(), CELoc);
340 Report->setDeclWithIssue(RD);
341 Report->addRange(RD->getSourceRange());
342 BR->emitReport(std::move(Report));
343 }
344};
Alexander Shaposhnikove2f07342018-10-30 01:20:37 +0000345} // namespace
Malcolm Parsonsfab36802018-04-16 08:31:08 +0000346
347void ento::registerPaddingChecker(CheckerManager &Mgr) {
Kristof Umann088b1c92019-03-04 00:28:16 +0000348 auto *Checker = Mgr.registerChecker<PaddingChecker>();
349 Checker->AllowedPad = Mgr.getAnalyzerOptions()
350 .getCheckerIntegerOption(Checker, "AllowedPad", 24);
351 assert(Checker->AllowedPad >= 0 &&
352 "AllowedPad option should be non-negative");
Malcolm Parsonsfab36802018-04-16 08:31:08 +0000353}
Kristof Umann058a7a42019-01-26 14:23:08 +0000354
355bool ento::shouldRegisterPaddingChecker(const LangOptions &LO) {
356 return true;
357}