hwasan: add tag_offset DWARF attribute to optimized debug info

Summary:
Support alloca-referencing dbg.value in hwasan instrumentation.
Update AsmPrinter to emit DW_AT_LLVM_tag_offset when location is in
loclist format.

Reviewers: pcc

Subscribers: srhines, aprantl, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70753
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
index f483d53..8c61098 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
@@ -42,4 +42,6 @@
     return;
   V.initializeDbgValue(&MI);
   V.setDebugLocListIndex(ListIndex);
+  if (TagOffset)
+    V.setDebugLocListTagOffset(*TagOffset);
 }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
index 0db86b0..10019a4 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -159,11 +159,17 @@
   DbgVariable &V;
   const MachineInstr &MI;
   size_t ListIndex;
+  Optional<uint8_t> TagOffset;
 
 public:
   ListBuilder(DebugLocStream &Locs, DwarfCompileUnit &CU, AsmPrinter &Asm,
               DbgVariable &V, const MachineInstr &MI)
-      : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)) {}
+      : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)),
+        TagOffset(None) {}
+
+  void setTagOffset(uint8_t TO) {
+    TagOffset = TO;
+  }
 
   /// Finalize the list.
   ///
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 4e90c10..0f3d8c6 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -615,6 +615,10 @@
   unsigned Offset = DV.getDebugLocListIndex();
   if (Offset != ~0U) {
     addLocationList(*VariableDie, dwarf::DW_AT_location, Offset);
+    auto TagOffset = DV.getDebugLocListTagOffset();
+    if (TagOffset)
+      addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+              *TagOffset);
     return VariableDie;
   }
 
@@ -632,6 +636,10 @@
         DwarfExpr.addUnsignedConstant(DVal->getInt());
         DwarfExpr.addExpression(Expr);
         addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+        if (DwarfExpr.TagOffset)
+          addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset,
+                  dwarf::DW_FORM_data1, *DwarfExpr.TagOffset);
+
       } else
         addConstantValue(*VariableDie, DVal->getInt(), DV.getType());
     } else if (DVal->isConstantFP()) {
@@ -1198,6 +1206,10 @@
 
   // Now attach the location information to the DIE.
   addBlock(Die, Attribute, DwarfExpr.finalize());
+
+  if (DwarfExpr.TagOffset)
+    addUInt(Die, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+            *DwarfExpr.TagOffset);
 }
 
 /// Start with the address based on the location provided, and generate the
@@ -1228,6 +1240,10 @@
 
   // Now attach the location information to the DIE.
   addBlock(Die, Attribute, DwarfExpr.finalize());
+
+  if (DwarfExpr.TagOffset)
+    addUInt(Die, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+            *DwarfExpr.TagOffset);
 }
 
 /// Add a Dwarf loclistptr attribute data and value.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 0a20de5..098b5a9 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -2275,6 +2275,8 @@
     DwarfDebug::emitDebugLocValue(AP, BT, Value, DwarfExpr);
   }
   DwarfExpr.finalize();
+  if (DwarfExpr.TagOffset)
+    List.setTagOffset(*DwarfExpr.TagOffset);
 }
 
 void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 03949db..bdd1b25 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -118,6 +118,9 @@
 class DbgVariable : public DbgEntity {
   /// Offset in DebugLocs.
   unsigned DebugLocListIndex = ~0u;
+  /// DW_OP_LLVM_tag_offset value from DebugLocs.
+  Optional<uint8_t> DebugLocListTagOffset;
+
   /// Single value location description.
   std::unique_ptr<DbgValueLoc> ValueLoc = nullptr;
 
@@ -174,6 +177,8 @@
 
   void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
   unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
+  void setDebugLocListTagOffset(uint8_t O) { DebugLocListTagOffset = O; }
+  Optional<uint8_t> getDebugLocListTagOffset() const { return DebugLocListTagOffset; }
   StringRef getName() const { return getVariable()->getName(); }
   const DbgValueLoc *getValueLoc() const { return ValueLoc.get(); }
   /// Get the FI entries, sorted by fragment offset.
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index be5abd2..7e8f8e2 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -222,7 +222,7 @@
   Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
   bool instrumentStack(
       SmallVectorImpl<AllocaInst *> &Allocas,
-      DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> &AllocaDeclareMap,
+      DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
       SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
   Value *readRegister(IRBuilder<> &IRB, StringRef Name);
   bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
@@ -1016,7 +1016,7 @@
 
 bool HWAddressSanitizer::instrumentStack(
     SmallVectorImpl<AllocaInst *> &Allocas,
-    DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> &AllocaDeclareMap,
+    DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
     SmallVectorImpl<Instruction *> &RetVec, Value *StackTag) {
   // Ideally, we want to calculate tagged stack base pointer, and rewrite all
   // alloca addresses using that. Unfortunately, offsets are not known yet
@@ -1038,11 +1038,15 @@
     AI->replaceUsesWithIf(Replacement,
                           [AILong](Use &U) { return U.getUser() != AILong; });
 
-    for (auto *DDI : AllocaDeclareMap.lookup(AI)) {
-      DIExpression *OldExpr = DDI->getExpression();
-      DIExpression *NewExpr = DIExpression::append(
-          OldExpr, {dwarf::DW_OP_LLVM_tag_offset, RetagMask(N)});
-      DDI->setArgOperand(2, MetadataAsValue::get(*C, NewExpr));
+    for (auto *DDI : AllocaDbgMap.lookup(AI)) {
+      // Prepend "tag_offset, N" to the dwarf expression.
+      // Tag offset logically applies to the alloca pointer, and it makes sense
+      // to put it at the beginning of the expression.
+      SmallVector<uint64_t, 8> NewOps = {dwarf::DW_OP_LLVM_tag_offset,
+                                         RetagMask(N)};
+      DDI->setArgOperand(
+          2, MetadataAsValue::get(*C, DIExpression::prependOpcodes(
+                                          DDI->getExpression(), NewOps)));
     }
 
     size_t Size = getAllocaSizeInBytes(*AI);
@@ -1089,7 +1093,7 @@
   SmallVector<AllocaInst*, 8> AllocasToInstrument;
   SmallVector<Instruction*, 8> RetVec;
   SmallVector<Instruction*, 8> LandingPadVec;
-  DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> AllocaDeclareMap;
+  DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> AllocaDbgMap;
   for (auto &BB : F) {
     for (auto &Inst : BB) {
       if (ClInstrumentStack)
@@ -1103,9 +1107,10 @@
           isa<CleanupReturnInst>(Inst))
         RetVec.push_back(&Inst);
 
-      if (auto *DDI = dyn_cast<DbgDeclareInst>(&Inst))
-        if (auto *Alloca = dyn_cast_or_null<AllocaInst>(DDI->getAddress()))
-          AllocaDeclareMap[Alloca].push_back(DDI);
+      if (auto *DDI = dyn_cast<DbgVariableIntrinsic>(&Inst))
+        if (auto *Alloca =
+                dyn_cast_or_null<AllocaInst>(DDI->getVariableLocation()))
+          AllocaDbgMap[Alloca].push_back(DDI);
 
       if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
         LandingPadVec.push_back(&Inst);
@@ -1148,7 +1153,7 @@
   if (!AllocasToInstrument.empty()) {
     Value *StackTag =
         ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
-    Changed |= instrumentStack(AllocasToInstrument, AllocaDeclareMap, RetVec,
+    Changed |= instrumentStack(AllocasToInstrument, AllocaDbgMap, RetVec,
                                StackTag);
   }
 
diff --git a/llvm/test/CodeGen/AArch64/dbg-value-tag-offset.ll b/llvm/test/CodeGen/AArch64/dbg-value-tag-offset.ll
new file mode 100644
index 0000000..ab511fa
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/dbg-value-tag-offset.ll
@@ -0,0 +1,68 @@
+; RUN: llc -o - %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-android24"
+
+; CHECK: .Linfo_string5:
+; CHECK-NEXT: .asciz "y"
+; CHECK: .Linfo_string7:
+; CHECK-NEXT: .asciz "x"
+
+; CHECK:      .byte   128                     // DW_AT_LLVM_tag_offset
+; CHECK-NEXT: .word   .Linfo_string5          // DW_AT_name
+; CHECK:      .byte   0                       // DW_AT_LLVM_tag_offset
+; CHECK-NEXT: .word   .Linfo_string7          // DW_AT_name
+
+define dso_local void @f() !dbg !14 {
+  %1 = alloca i32, align 4
+  %2 = alloca i32, align 4
+  %3 = bitcast i32* %1 to i8*, !dbg !21
+  %4 = bitcast i32* %2 to i8*, !dbg !21
+  call void @llvm.dbg.value(metadata i32 1, metadata !20, metadata !DIExpression()), !dbg !22
+  store i32 1, i32* %2, align 4, !dbg !23, !tbaa !24
+  call void @llvm.dbg.value(metadata i32* %1, metadata !18, metadata !DIExpression(DW_OP_LLVM_tag_offset, 0, DW_OP_deref)), !dbg !22
+  call void @use(i8* nonnull %3), !dbg !28
+  call void @llvm.dbg.value(metadata i32* %2, metadata !20, metadata !DIExpression(DW_OP_LLVM_tag_offset, 128, DW_OP_deref)), !dbg !22
+  call void @use(i8* nonnull %4), !dbg !29
+  ret void, !dbg !30
+}
+
+declare !dbg !5 void @use(i8*)
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9, !10, !11, !12}
+!llvm.ident = !{!13}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 10.0.0 (git@github.com:llvm/llvm-project.git 5560dd08b99a0e8b0c55116376624e4f967caec5)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, nameTableKind: None)
+!1 = !DIFile(filename: "dbg.cc", directory: "/tmp")
+!2 = !{}
+!3 = !{!4, !5}
+!4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
+!5 = !DISubprogram(name: "use", scope: !1, file: !1, line: 2, type: !6, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2)
+!6 = !DISubroutineType(types: !7)
+!7 = !{null, !4}
+!8 = !{i32 7, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{i32 1, !"wchar_size", i32 4}
+!11 = !{i32 7, !"PIC Level", i32 2}
+!12 = !{i32 7, !"PIE Level", i32 2}
+!13 = !{!"clang version 10.0.0 (git@github.com:llvm/llvm-project.git 5560dd08b99a0e8b0c55116376624e4f967caec5)"}
+!14 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 4, type: !15, scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !17)
+!15 = !DISubroutineType(types: !16)
+!16 = !{null}
+!17 = !{!18, !20}
+!18 = !DILocalVariable(name: "x", scope: !14, file: !1, line: 5, type: !19)
+!19 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!20 = !DILocalVariable(name: "y", scope: !14, file: !1, line: 5, type: !19)
+!21 = !DILocation(line: 5, column: 3, scope: !14)
+!22 = !DILocation(line: 0, scope: !14)
+!23 = !DILocation(line: 5, column: 10, scope: !14)
+!24 = !{!25, !25, i64 0}
+!25 = !{!"int", !26, i64 0}
+!26 = !{!"omnipotent char", !27, i64 0}
+!27 = !{!"Simple C++ TBAA"}
+!28 = !DILocation(line: 6, column: 3, scope: !14)
+!29 = !DILocation(line: 7, column: 3, scope: !14)
+!30 = !DILocation(line: 8, column: 1, scope: !14)
diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/dbg-value-tag-offset.ll b/llvm/test/Instrumentation/HWAddressSanitizer/dbg-value-tag-offset.ll
new file mode 100644
index 0000000..49e970b
--- /dev/null
+++ b/llvm/test/Instrumentation/HWAddressSanitizer/dbg-value-tag-offset.ll
@@ -0,0 +1,61 @@
+; RUN: opt -hwasan -S -o - %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-android24"
+
+define dso_local void @f() sanitize_hwaddress !dbg !14 {
+  %1 = alloca i32, align 4
+  %2 = alloca i32, align 4
+  %3 = bitcast i32* %1 to i8*, !dbg !21
+  %4 = bitcast i32* %2 to i8*, !dbg !21
+; CHECK: call void @llvm.dbg.value(metadata i32 1, {{.*}}, metadata !DIExpression())
+  call void @llvm.dbg.value(metadata i32 1, metadata !20, metadata !DIExpression()), !dbg !22
+  store i32 1, i32* %2, align 4, !dbg !23, !tbaa !24
+; CHECK: call void @llvm.dbg.value(metadata i32* {{.*}}, metadata !DIExpression(DW_OP_LLVM_tag_offset, 0, DW_OP_deref))
+  call void @llvm.dbg.value(metadata i32* %1, metadata !18, metadata !DIExpression(DW_OP_deref)), !dbg !22
+  call void @use(i8* nonnull %3), !dbg !28
+; CHECK: call void @llvm.dbg.value(metadata i32* {{.*}}, metadata !DIExpression(DW_OP_LLVM_tag_offset, 128, DW_OP_deref))
+  call void @llvm.dbg.value(metadata i32* %2, metadata !20, metadata !DIExpression(DW_OP_deref)), !dbg !22
+  call void @use(i8* nonnull %4), !dbg !29
+  ret void, !dbg !30
+}
+
+declare !dbg !5 void @use(i8*)
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9, !10, !11, !12}
+!llvm.ident = !{!13}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 10.0.0 (git@github.com:llvm/llvm-project.git 5560dd08b99a0e8b0c55116376624e4f967caec5)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, nameTableKind: None)
+!1 = !DIFile(filename: "dbg.cc", directory: "/tmp")
+!2 = !{}
+!3 = !{!4, !5}
+!4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
+!5 = !DISubprogram(name: "use", scope: !1, file: !1, line: 2, type: !6, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2)
+!6 = !DISubroutineType(types: !7)
+!7 = !{null, !4}
+!8 = !{i32 7, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{i32 1, !"wchar_size", i32 4}
+!11 = !{i32 7, !"PIC Level", i32 2}
+!12 = !{i32 7, !"PIE Level", i32 2}
+!13 = !{!"clang version 10.0.0 (git@github.com:llvm/llvm-project.git 5560dd08b99a0e8b0c55116376624e4f967caec5)"}
+!14 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 4, type: !15, scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !17)
+!15 = !DISubroutineType(types: !16)
+!16 = !{null}
+!17 = !{!18, !20}
+!18 = !DILocalVariable(name: "x", scope: !14, file: !1, line: 5, type: !19)
+!19 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!20 = !DILocalVariable(name: "y", scope: !14, file: !1, line: 5, type: !19)
+!21 = !DILocation(line: 5, column: 3, scope: !14)
+!22 = !DILocation(line: 0, scope: !14)
+!23 = !DILocation(line: 5, column: 10, scope: !14)
+!24 = !{!25, !25, i64 0}
+!25 = !{!"int", !26, i64 0}
+!26 = !{!"omnipotent char", !27, i64 0}
+!27 = !{!"Simple C++ TBAA"}
+!28 = !DILocation(line: 6, column: 3, scope: !14)
+!29 = !DILocation(line: 7, column: 3, scope: !14)
+!30 = !DILocation(line: 8, column: 1, scope: !14)