[CodeGen] Add initial support for union members in TBAA
The basic idea behind this patch is that since in strict aliasing
mode all accesses to union members require their outermost
enclosing union objects to be specified explicitly, then for a
couple given accesses to union members of the form
p->a.b.c...
q->x.y.z...
it is known they can only alias if both p and q point to the same
union type and offset ranges of members a.b.c... and x.y.z...
overlap. Note that the actual types of the members do not matter.
Specifically, in this patch we do the following:
* Make unions to be valid TBAA base access types. This enables
generation of TBAA type descriptors for unions.
* Encode union types as structures with a single member of a
special "union member" type. Currently we do not encode
information about sizes of types, but conceptually such union
members are considered to be of the size of the whole union.
* Encode accesses to direct and indirect union members, including
member arrays, as accesses to these special members. All
accesses to members of a union thus get the same offset, which
is the offset of the union they are part of. This means the
existing LLVM TBAA machinery is able to handle such accesses
with no changes.
While this is already an improvement comparing to the current
situation, that is, representing all union accesses as may-alias
ones, there are further changes planned to complete the support
for unions. One of them is storing information about access sizes
so we can distinct accesses to non-overlapping union members,
including accesses to different elements of member arrays.
Another change is encoding type sizes in order to make it
possible to compute offsets within constant-indexed array
elements. These enhancements will be addressed with separate
patches.
Differential Revision: https://reviews.llvm.org/D39455
llvm-svn: 319413
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 88116f7..50d116e 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3723,9 +3723,6 @@
if (base.getTBAAInfo().isMayAlias() ||
rec->hasAttr<MayAliasAttr>() || FieldType->isVectorType()) {
FieldTBAAInfo = TBAAAccessInfo::getMayAliasInfo();
- } else if (rec->isUnion()) {
- // TODO: Support TBAA for unions.
- FieldTBAAInfo = TBAAAccessInfo::getMayAliasInfo();
} else {
// If no base type been assigned for the base access, then try to generate
// one for this base lvalue.
@@ -3736,16 +3733,26 @@
"Nonzero offset for an access with no base type!");
}
- // Adjust offset to be relative to the base type.
- const ASTRecordLayout &Layout =
- getContext().getASTRecordLayout(field->getParent());
- unsigned CharWidth = getContext().getCharWidth();
- if (FieldTBAAInfo.BaseType)
- FieldTBAAInfo.Offset +=
- Layout.getFieldOffset(field->getFieldIndex()) / CharWidth;
+ // All union members are encoded to be of the same special type.
+ if (FieldTBAAInfo.BaseType && rec->isUnion())
+ FieldTBAAInfo = TBAAAccessInfo::getUnionMemberInfo(FieldTBAAInfo.BaseType,
+ FieldTBAAInfo.Offset,
+ FieldTBAAInfo.Size);
- // Update the final access type.
- FieldTBAAInfo.AccessType = CGM.getTBAATypeInfo(FieldType);
+ // For now we describe accesses to direct and indirect union members as if
+ // they were at the offset of their outermost enclosing union.
+ if (!FieldTBAAInfo.isUnionMember()) {
+ // Adjust offset to be relative to the base type.
+ const ASTRecordLayout &Layout =
+ getContext().getASTRecordLayout(field->getParent());
+ unsigned CharWidth = getContext().getCharWidth();
+ if (FieldTBAAInfo.BaseType)
+ FieldTBAAInfo.Offset +=
+ Layout.getFieldOffset(field->getFieldIndex()) / CharWidth;
+
+ // Update the final access type.
+ FieldTBAAInfo.AccessType = CGM.getTBAATypeInfo(FieldType);
+ }
}
Address addr = base.getAddress();
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 8b14293..ff9866a 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -688,8 +688,9 @@
/// getTBAAInfoForSubobject - Get TBAA information for an access with a given
/// base lvalue.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType) {
- if (Base.getTBAAInfo().isMayAlias())
- return TBAAAccessInfo::getMayAliasInfo();
+ TBAAAccessInfo TBAAInfo = Base.getTBAAInfo();
+ if (TBAAInfo.isMayAlias() || TBAAInfo.isUnionMember())
+ return TBAAInfo;
return getTBAAAccessInfo(AccessType);
}
diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index 2bc4b8a..80ad9ae 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -74,6 +74,10 @@
return Char;
}
+llvm::MDNode *CodeGenTBAA::getUnionMemberType(uint64_t Size) {
+ return createScalarTypeNode("union member", getChar(), Size);
+}
+
static bool TypeHasMayAlias(QualType QTy) {
// Tagged types have declarations, and therefore may have attributes.
if (const TagType *TTy = dyn_cast<TagType>(QTy))
@@ -101,9 +105,8 @@
return false;
if (RD->hasFlexibleArrayMember())
return false;
- // RD can be struct, union, class, interface or enum.
- // For now, we only handle struct and class.
- if (RD->isStruct() || RD->isClass())
+ // For now, we do not allow interface classes to be base access types.
+ if (RD->isStruct() || RD->isClass() || RD->isUnion())
return true;
}
return false;
@@ -277,18 +280,27 @@
const RecordDecl *RD = TTy->getDecl()->getDefinition();
const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
SmallVector<llvm::MDBuilder::TBAAStructField, 4> Fields;
- for (FieldDecl *Field : RD->fields()) {
- QualType FieldQTy = Field->getType();
- llvm::MDNode *TypeNode = isValidBaseType(FieldQTy) ?
- getBaseTypeInfo(FieldQTy) : getTypeInfo(FieldQTy);
- if (!TypeNode)
- return BaseTypeMetadataCache[Ty] = nullptr;
-
- uint64_t BitOffset = Layout.getFieldOffset(Field->getFieldIndex());
- uint64_t Offset = Context.toCharUnitsFromBits(BitOffset).getQuantity();
- uint64_t Size = Context.getTypeSizeInChars(FieldQTy).getQuantity();
- Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size,
+ if (RD->isUnion()) {
+ // Unions are represented as structures with a single member that has a
+ // special type and occupies the whole object.
+ uint64_t Size = Context.getTypeSizeInChars(Ty).getQuantity();
+ llvm::MDNode *TypeNode = getUnionMemberType(Size);
+ Fields.push_back(llvm::MDBuilder::TBAAStructField(/* Offset= */ 0, Size,
TypeNode));
+ } else {
+ for (FieldDecl *Field : RD->fields()) {
+ QualType FieldQTy = Field->getType();
+ llvm::MDNode *TypeNode = isValidBaseType(FieldQTy) ?
+ getBaseTypeInfo(FieldQTy) : getTypeInfo(FieldQTy);
+ if (!TypeNode)
+ return nullptr;
+
+ uint64_t BitOffset = Layout.getFieldOffset(Field->getFieldIndex());
+ uint64_t Offset = Context.toCharUnitsFromBits(BitOffset).getQuantity();
+ uint64_t Size = Context.getTypeSizeInChars(FieldQTy).getQuantity();
+ Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size,
+ TypeNode));
+ }
}
SmallString<256> OutName;
@@ -333,6 +345,8 @@
if (Info.isMayAlias())
Info = TBAAAccessInfo(getChar(), Info.Size);
+ else if (Info.isUnionMember())
+ Info.AccessType = getUnionMemberType(Info.Size);
if (!Info.AccessType)
return nullptr;
diff --git a/clang/lib/CodeGen/CodeGenTBAA.h b/clang/lib/CodeGen/CodeGenTBAA.h
index a5b1f66..7b3473f 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.h
+++ b/clang/lib/CodeGen/CodeGenTBAA.h
@@ -34,9 +34,10 @@
// TBAAAccessKind - A kind of TBAA memory access descriptor.
enum class TBAAAccessKind : unsigned {
- Ordinary,
- MayAlias,
- Incomplete,
+ Ordinary, // An ordinary memory access.
+ MayAlias, // An access that may alias with any other accesses.
+ Incomplete, // Used to designate pointee values of incomplete types.
+ UnionMember, // An access to a direct or indirect union member.
};
// TBAAAccessInfo - Describes a memory access in terms of TBAA.
@@ -77,6 +78,14 @@
bool isIncomplete() const { return Kind == TBAAAccessKind::Incomplete; }
+ static TBAAAccessInfo getUnionMemberInfo(llvm::MDNode *BaseType,
+ uint64_t Offset, uint64_t Size) {
+ return TBAAAccessInfo(TBAAAccessKind::UnionMember, BaseType,
+ /* AccessType= */ nullptr, Offset, Size);
+ }
+
+ bool isUnionMember() const { return Kind == TBAAAccessKind::UnionMember; }
+
bool operator==(const TBAAAccessInfo &Other) const {
return Kind == Other.Kind &&
BaseType == Other.BaseType &&
@@ -148,6 +157,10 @@
/// considered to be equivalent to it.
llvm::MDNode *getChar();
+ /// getUnionMemberType - Get metadata that represents the type of union
+ /// members.
+ llvm::MDNode *getUnionMemberType(uint64_t Size);
+
/// CollectFields - Collect information about the fields of a type for
/// !tbaa.struct metadata formation. Return false for an unsupported type.
bool CollectFields(uint64_t BaseOffset,