[BPF] do compile-once run-everywhere relocation for bitfields
A bpf specific clang intrinsic is introduced:
   u32 __builtin_preserve_field_info(member_access, info_kind)
Depending on info_kind, different information will
be returned to the program. A relocation is also
recorded for this builtin so that bpf loader can
patch the instruction on the target host.
This clang intrinsic is used to get certain information
to facilitate struct/union member relocations.
The offset relocation is extended by 4 bytes to
include relocation kind.
Currently supported relocation kinds are
 enum {
    FIELD_BYTE_OFFSET = 0,
    FIELD_BYTE_SIZE,
    FIELD_EXISTENCE,
    FIELD_SIGNEDNESS,
    FIELD_LSHIFT_U64,
    FIELD_RSHIFT_U64,
 };
for __builtin_preserve_field_info. The old
access offset relocation is covered by
    FIELD_BYTE_OFFSET = 0.
An example:
struct s {
    int a;
    int b1:9;
    int b2:4;
};
enum {
    FIELD_BYTE_OFFSET = 0,
    FIELD_BYTE_SIZE,
    FIELD_EXISTENCE,
    FIELD_SIGNEDNESS,
    FIELD_LSHIFT_U64,
    FIELD_RSHIFT_U64,
};
void bpf_probe_read(void *, unsigned, const void *);
int field_read(struct s *arg) {
  unsigned long long ull = 0;
  unsigned offset = __builtin_preserve_field_info(arg->b2, FIELD_BYTE_OFFSET);
  unsigned size = __builtin_preserve_field_info(arg->b2, FIELD_BYTE_SIZE);
 #ifdef USE_PROBE_READ
  bpf_probe_read(&ull, size, (const void *)arg + offset);
  unsigned lshift = __builtin_preserve_field_info(arg->b2, FIELD_LSHIFT_U64);
 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  lshift = lshift + (size << 3) - 64;
 #endif
 #else
  switch(size) {
  case 1:
    ull = *(unsigned char *)((void *)arg + offset); break;
  case 2:
    ull = *(unsigned short *)((void *)arg + offset); break;
  case 4:
    ull = *(unsigned int *)((void *)arg + offset); break;
  case 8:
    ull = *(unsigned long long *)((void *)arg + offset); break;
  }
  unsigned lshift = __builtin_preserve_field_info(arg->b2, FIELD_LSHIFT_U64);
 #endif
  ull <<= lshift;
  if (__builtin_preserve_field_info(arg->b2, FIELD_SIGNEDNESS))
    return (long long)ull >> __builtin_preserve_field_info(arg->b2, FIELD_RSHIFT_U64);
  return ull >> __builtin_preserve_field_info(arg->b2, FIELD_RSHIFT_U64);
}
There is a minor overhead for bpf_probe_read() on big endian.
The code and relocation generated for field_read where bpf_probe_read() is
used to access argument data on little endian mode:
        r3 = r1
        r1 = 0
        r1 = 4  <=== relocation (FIELD_BYTE_OFFSET)
        r3 += r1
        r1 = r10
        r1 += -8
        r2 = 4  <=== relocation (FIELD_BYTE_SIZE)
        call bpf_probe_read
        r2 = 51 <=== relocation (FIELD_LSHIFT_U64)
        r1 = *(u64 *)(r10 - 8)
        r1 <<= r2
        r2 = 60 <=== relocation (FIELD_RSHIFT_U64)
        r0 = r1
        r0 >>= r2
        r3 = 1  <=== relocation (FIELD_SIGNEDNESS)
        if r3 == 0 goto LBB0_2
        r1 s>>= r2
        r0 = r1
LBB0_2:
        exit
Compare to the above code between relocations FIELD_LSHIFT_U64 and
FIELD_LSHIFT_U64, the code with big endian mode has four more
instructions.
        r1 = 41   <=== relocation (FIELD_LSHIFT_U64)
        r6 += r1
        r6 += -64
        r6 <<= 32
        r6 >>= 32
        r1 = *(u64 *)(r10 - 8)
        r1 <<= r6
        r2 = 60   <=== relocation (FIELD_RSHIFT_U64)
The code and relocation generated when using direct load.
        r2 = 0
        r3 = 4
        r4 = 4
        if r4 s> 3 goto LBB0_3
        if r4 == 1 goto LBB0_5
        if r4 == 2 goto LBB0_6
        goto LBB0_9
LBB0_6:                                 # %sw.bb1
        r1 += r3
        r2 = *(u16 *)(r1 + 0)
        goto LBB0_9
LBB0_3:                                 # %entry
        if r4 == 4 goto LBB0_7
        if r4 == 8 goto LBB0_8
        goto LBB0_9
LBB0_8:                                 # %sw.bb9
        r1 += r3
        r2 = *(u64 *)(r1 + 0)
        goto LBB0_9
LBB0_5:                                 # %sw.bb
        r1 += r3
        r2 = *(u8 *)(r1 + 0)
        goto LBB0_9
LBB0_7:                                 # %sw.bb5
        r1 += r3
        r2 = *(u32 *)(r1 + 0)
LBB0_9:                                 # %sw.epilog
        r1 = 51
        r2 <<= r1
        r1 = 60
        r0 = r2
        r0 >>= r1
        r3 = 1
        if r3 == 0 goto LBB0_11
        r2 s>>= r1
        r0 = r2
LBB0_11:                                # %sw.epilog
        exit
Considering verifier is able to do limited constant
propogation following branches. The following is the
code actually traversed.
        r2 = 0
        r3 = 4   <=== relocation
        r4 = 4   <=== relocation
        if r4 s> 3 goto LBB0_3
LBB0_3:                                 # %entry
        if r4 == 4 goto LBB0_7
LBB0_7:                                 # %sw.bb5
        r1 += r3
        r2 = *(u32 *)(r1 + 0)
LBB0_9:                                 # %sw.epilog
        r1 = 51   <=== relocation
        r2 <<= r1
        r1 = 60   <=== relocation
        r0 = r2
        r0 >>= r1
        r3 = 1
        if r3 == 0 goto LBB0_11
        r2 s>>= r1
        r0 = r2
LBB0_11:                                # %sw.epilog
        exit
For native load case, the load size is calculated to be the
same as the size of load width LLVM otherwise used to load
the value which is then used to extract the bitfield value.
Differential Revision: https://reviews.llvm.org/D67980
llvm-svn: 374099
diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h
index d311fc1..ba21503 100644
--- a/llvm/lib/Target/BPF/BPF.h
+++ b/llvm/lib/Target/BPF/BPF.h
@@ -15,7 +15,7 @@
 namespace llvm {
 class BPFTargetMachine;
 
-ModulePass *createBPFAbstractMemberAccess();
+ModulePass *createBPFAbstractMemberAccess(BPFTargetMachine *TM);
 
 FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
 FunctionPass *createBPFMISimplifyPatchablePass();
diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
index 5a9a34e..1b44281 100644
--- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
+++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
@@ -50,6 +50,28 @@
 //   addr = preserve_struct_access_index(base, gep_index, di_index)
 //          !llvm.preserve.access.index <struct_ditype>
 //
+// Bitfield member access needs special attention. User cannot take the
+// address of a bitfield acceess. To facilitate kernel verifier
+// for easy bitfield code optimization, a new clang intrinsic is introduced:
+//   uint32_t __builtin_preserve_field_info(member_access, info_kind)
+// In IR, a chain with two (or more) intrinsic calls will be generated:
+//   ...
+//   addr = preserve_struct_access_index(base, 1, 1) !struct s
+//   uint32_t result = bpf_preserve_field_info(addr, info_kind)
+//
+// Suppose the info_kind is FIELD_SIGNEDNESS,
+// The above two IR intrinsics will be replaced with
+// a relocatable insn:
+//   signness = /* signness of member_access */
+// and signness can be changed by bpf loader based on the
+// types on the host.
+//
+// User can also test whether a field exists or not with
+//   uint32_t result = bpf_preserve_field_info(member_access, FIELD_EXISTENCE)
+// The field will be always available (result = 1) during initial
+// compilation, but bpf loader can patch with the correct value
+// on the target host where the member_access may or may not be available
+//
 //===----------------------------------------------------------------------===//
 
 #include "BPF.h"
@@ -88,7 +110,11 @@
 
 public:
   static char ID;
-  BPFAbstractMemberAccess() : ModulePass(ID) {}
+  TargetMachine *TM;
+  // Add optional BPFTargetMachine parameter so that BPF backend can add the phase
+  // with target machine to find out the endianness. The default constructor (without
+  // parameters) is used by the pass manager for managing purposes.
+  BPFAbstractMemberAccess(BPFTargetMachine *TM = nullptr) : ModulePass(ID), TM(TM) {}
 
   struct CallInfo {
     uint32_t Kind;
@@ -96,19 +122,21 @@
     MDNode *Metadata;
     Value *Base;
   };
+  typedef std::stack<std::pair<CallInst *, CallInfo>> CallInfoStack;
 
 private:
   enum : uint32_t {
     BPFPreserveArrayAI = 1,
     BPFPreserveUnionAI = 2,
     BPFPreserveStructAI = 3,
+    BPFPreserveFieldInfoAI = 4,
   };
 
   std::map<std::string, GlobalVariable *> GEPGlobals;
   // A map to link preserve_*_access_index instrinsic calls.
   std::map<CallInst *, std::pair<CallInst *, CallInfo>> AIChain;
   // A map to hold all the base preserve_*_access_index instrinsic calls.
-  // The base call is not an input of any other preserve_*_access_index
+  // The base call is not an input of any other preserve_*
   // intrinsics.
   std::map<CallInst *, CallInfo> BaseAICalls;
 
@@ -127,6 +155,12 @@
   bool removePreserveAccessIndexIntrinsic(Module &M);
   void replaceWithGEP(std::vector<CallInst *> &CallList,
                       uint32_t NumOfZerosIndex, uint32_t DIIndex);
+  bool HasPreserveFieldInfoCall(CallInfoStack &CallStack);
+  void GetStorageBitRange(DICompositeType *CTy, DIDerivedType *MemberTy,
+                          uint32_t AccessIndex, uint32_t &StartBitOffset,
+                          uint32_t &EndBitOffset);
+  uint32_t GetFieldInfo(uint32_t InfoKind, DICompositeType *CTy,
+                        uint32_t AccessIndex, uint32_t PatchImm);
 
   Value *computeBaseAndAccessKey(CallInst *Call, CallInfo &CInfo,
                                  std::string &AccessKey, MDNode *&BaseMeta);
@@ -139,8 +173,8 @@
 INITIALIZE_PASS(BPFAbstractMemberAccess, DEBUG_TYPE,
                 "abstracting struct/union member accessees", false, false)
 
-ModulePass *llvm::createBPFAbstractMemberAccess() {
-  return new BPFAbstractMemberAccess();
+ModulePass *llvm::createBPFAbstractMemberAccess(BPFTargetMachine *TM) {
+  return new BPFAbstractMemberAccess(TM);
 }
 
 bool BPFAbstractMemberAccess::runOnModule(Module &M) {
@@ -231,6 +265,16 @@
     CInfo.Base = Call->getArgOperand(0);
     return true;
   }
+  if (GV->getName().startswith("llvm.bpf.preserve.field.info")) {
+    CInfo.Kind = BPFPreserveFieldInfoAI;
+    CInfo.Metadata = nullptr;
+    // Check validity of info_kind as clang did not check this.
+    uint64_t InfoKind = getConstant(Call->getArgOperand(1));
+    if (InfoKind >= BPFCoreSharedInfo::MAX_FIELD_RELOC_KIND)
+      report_fatal_error("Incorrect info_kind for llvm.bpf.preserve.field.info intrinsic");
+    CInfo.AccessIndex = InfoKind;
+    return true;
+  }
 
   return false;
 }
@@ -306,6 +350,9 @@
 bool BPFAbstractMemberAccess::IsValidAIChain(const MDNode *ParentType,
                                              uint32_t ParentAI,
                                              const MDNode *ChildType) {
+  if (!ChildType)
+    return true; // preserve_field_info, no type comparison needed.
+
   const DIType *PType = stripQualifiers(cast<DIType>(ParentType));
   const DIType *CType = stripQualifiers(cast<DIType>(ChildType));
 
@@ -463,7 +510,187 @@
   return CV->getValue().getZExtValue();
 }
 
-/// Compute the base of the whole preserve_*_access_index chains, i.e., the base
+/// Get the start and the end of storage offset for \p MemberTy.
+/// The storage bits are corresponding to the LLVM internal types,
+/// and the storage bits for the member determines what load width
+/// to use in order to extract the bitfield value.
+void BPFAbstractMemberAccess::GetStorageBitRange(DICompositeType *CTy,
+                                                 DIDerivedType *MemberTy,
+                                                 uint32_t AccessIndex,
+                                                 uint32_t &StartBitOffset,
+                                                 uint32_t &EndBitOffset) {
+  auto SOff = dyn_cast<ConstantInt>(MemberTy->getStorageOffsetInBits());
+  assert(SOff);
+  StartBitOffset = SOff->getZExtValue();
+
+  EndBitOffset = CTy->getSizeInBits();
+  uint32_t Index = AccessIndex + 1;
+  for (; Index < CTy->getElements().size(); ++Index) {
+    auto Member = cast<DIDerivedType>(CTy->getElements()[Index]);
+    if (!Member->getStorageOffsetInBits()) {
+      EndBitOffset = Member->getOffsetInBits();
+      break;
+    }
+    SOff = dyn_cast<ConstantInt>(Member->getStorageOffsetInBits());
+    assert(SOff);
+    unsigned BitOffset = SOff->getZExtValue();
+    if (BitOffset != StartBitOffset) {
+      EndBitOffset = BitOffset;
+      break;
+    }
+  }
+}
+
+uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
+                                               DICompositeType *CTy,
+                                               uint32_t AccessIndex,
+                                               uint32_t PatchImm) {
+  if (InfoKind == BPFCoreSharedInfo::FIELD_EXISTENCE)
+      return 1;
+
+  uint32_t Tag = CTy->getTag();
+  if (InfoKind == BPFCoreSharedInfo::FIELD_BYTE_OFFSET) {
+    if (Tag == dwarf::DW_TAG_array_type) {
+      auto *EltTy = stripQualifiers(CTy->getBaseType());
+      PatchImm += AccessIndex * calcArraySize(CTy, 1) *
+                  (EltTy->getSizeInBits() >> 3);
+    } else if (Tag == dwarf::DW_TAG_structure_type) {
+      auto *MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]);
+      if (!MemberTy->isBitField()) {
+        PatchImm += MemberTy->getOffsetInBits() >> 3;
+      } else {
+        auto SOffset = dyn_cast<ConstantInt>(MemberTy->getStorageOffsetInBits());
+        assert(SOffset);
+        PatchImm += SOffset->getZExtValue() >> 3;
+      }
+    }
+    return PatchImm;
+  }
+
+  if (InfoKind == BPFCoreSharedInfo::FIELD_BYTE_SIZE) {
+    if (Tag == dwarf::DW_TAG_array_type) {
+      auto *EltTy = stripQualifiers(CTy->getBaseType());
+      return calcArraySize(CTy, 1) * (EltTy->getSizeInBits() >> 3);
+    } else {
+      auto *MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]);
+      uint32_t SizeInBits = MemberTy->getSizeInBits();
+      if (!MemberTy->isBitField())
+        return SizeInBits >> 3;
+
+      unsigned SBitOffset, NextSBitOffset;
+      GetStorageBitRange(CTy, MemberTy, AccessIndex, SBitOffset, NextSBitOffset);
+      SizeInBits = NextSBitOffset - SBitOffset;
+      if (SizeInBits & (SizeInBits - 1))
+        report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info");
+      return SizeInBits >> 3;
+    }
+  }
+
+  if (InfoKind == BPFCoreSharedInfo::FIELD_SIGNEDNESS) {
+    const DIType *BaseTy;
+    if (Tag == dwarf::DW_TAG_array_type) {
+      // Signedness only checked when final array elements are accessed.
+      if (CTy->getElements().size() != 1)
+        report_fatal_error("Invalid array expression for llvm.bpf.preserve.field.info");
+      BaseTy = stripQualifiers(CTy->getBaseType());
+    } else {
+      auto *MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]);
+      BaseTy = stripQualifiers(MemberTy->getBaseType());
+    }
+
+    // Only basic types and enum types have signedness.
+    const auto *BTy = dyn_cast<DIBasicType>(BaseTy);
+    while (!BTy) {
+      const auto *CompTy = dyn_cast<DICompositeType>(BaseTy);
+      // Report an error if the field expression does not have signedness.
+      if (!CompTy || CompTy->getTag() != dwarf::DW_TAG_enumeration_type)
+        report_fatal_error("Invalid field expression for llvm.bpf.preserve.field.info");
+      BaseTy = stripQualifiers(CompTy->getBaseType());
+      BTy = dyn_cast<DIBasicType>(BaseTy);
+    }
+    uint32_t Encoding = BTy->getEncoding();
+    return (Encoding == dwarf::DW_ATE_signed || Encoding == dwarf::DW_ATE_signed_char);
+  }
+
+  if (InfoKind == BPFCoreSharedInfo::FIELD_LSHIFT_U64) {
+    // The value is loaded into a value with FIELD_BYTE_SIZE size,
+    // and then zero or sign extended to U64.
+    // FIELD_LSHIFT_U64 and FIELD_RSHIFT_U64 are operations
+    // to extract the original value.
+    const Triple &Triple = TM->getTargetTriple();
+    DIDerivedType *MemberTy = nullptr;
+    bool IsBitField = false;
+    uint32_t SizeInBits;
+
+    if (Tag == dwarf::DW_TAG_array_type) {
+      auto *EltTy = stripQualifiers(CTy->getBaseType());
+      SizeInBits = calcArraySize(CTy, 1) * EltTy->getSizeInBits();
+    } else {
+      MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]);
+      SizeInBits = MemberTy->getSizeInBits();
+      IsBitField = MemberTy->isBitField();
+    }
+
+    if (!IsBitField) {
+      if (SizeInBits > 64)
+        report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
+      return 64 - SizeInBits;
+    }
+
+    unsigned SBitOffset, NextSBitOffset;
+    GetStorageBitRange(CTy, MemberTy, AccessIndex, SBitOffset, NextSBitOffset);
+    if (NextSBitOffset - SBitOffset > 64)
+      report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
+
+    unsigned OffsetInBits = MemberTy->getOffsetInBits();
+    if (Triple.getArch() == Triple::bpfel)
+      return SBitOffset + 64 - OffsetInBits - SizeInBits;
+    else
+      return OffsetInBits + 64 - NextSBitOffset;
+  }
+
+  if (InfoKind == BPFCoreSharedInfo::FIELD_RSHIFT_U64) {
+    DIDerivedType *MemberTy = nullptr;
+    bool IsBitField = false;
+    uint32_t SizeInBits;
+    if (Tag == dwarf::DW_TAG_array_type) {
+      auto *EltTy = stripQualifiers(CTy->getBaseType());
+      SizeInBits = calcArraySize(CTy, 1) * EltTy->getSizeInBits();
+    } else {
+      MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]);
+      SizeInBits = MemberTy->getSizeInBits();
+      IsBitField = MemberTy->isBitField();
+    }
+
+    if (!IsBitField) {
+      if (SizeInBits > 64)
+        report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
+      return 64 - SizeInBits;
+    }
+
+    unsigned SBitOffset, NextSBitOffset;
+    GetStorageBitRange(CTy, MemberTy, AccessIndex, SBitOffset, NextSBitOffset);
+    if (NextSBitOffset - SBitOffset > 64)
+      report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
+
+    return 64 - SizeInBits;
+  }
+
+  llvm_unreachable("Unknown llvm.bpf.preserve.field.info info kind");
+}
+
+bool BPFAbstractMemberAccess::HasPreserveFieldInfoCall(CallInfoStack &CallStack) {
+  // This is called in error return path, no need to maintain CallStack.
+  while (CallStack.size()) {
+    auto StackElem = CallStack.top();
+    if (StackElem.second.Kind == BPFPreserveFieldInfoAI)
+      return true;
+    CallStack.pop();
+  }
+  return false;
+}
+
+/// Compute the base of the whole preserve_* intrinsics chains, i.e., the base
 /// pointer of the first preserve_*_access_index call, and construct the access
 /// string, which will be the name of a global variable.
 Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call,
@@ -472,7 +699,7 @@
                                                         MDNode *&TypeMeta) {
   Value *Base = nullptr;
   std::string TypeName;
-  std::stack<std::pair<CallInst *, CallInfo>> CallStack;
+  CallInfoStack CallStack;
 
   // Put the access chain into a stack with the top as the head of the chain.
   while (Call) {
@@ -492,7 +719,8 @@
   //    int a[10][20]; ... __builtin_preserve_access_index(&a[2][3]) ...
   // we will skip them.
   uint32_t FirstIndex = 0;
-  uint32_t AccessOffset = 0;
+  uint32_t PatchImm = 0; // AccessOffset or the requested field info
+  uint32_t InfoKind = BPFCoreSharedInfo::FIELD_BYTE_OFFSET;
   while (CallStack.size()) {
     auto StackElem = CallStack.top();
     Call = StackElem.first;
@@ -507,10 +735,12 @@
       // struct or union type
       TypeName = Ty->getName();
       TypeMeta = Ty;
-      AccessOffset += FirstIndex * Ty->getSizeInBits() >> 3;
+      PatchImm += FirstIndex * (Ty->getSizeInBits() >> 3);
       break;
     }
 
+    assert(CInfo.Kind == BPFPreserveArrayAI);
+
     // Array entries will always be consumed for accumulative initial index.
     CallStack.pop();
 
@@ -546,16 +776,22 @@
 
     if (CheckElemType) {
       auto *CTy = dyn_cast<DICompositeType>(BaseTy);
-      if (!CTy)
+      if (!CTy) {
+        if (HasPreserveFieldInfoCall(CallStack))
+          report_fatal_error("Invalid field access for llvm.preserve.field.info intrinsic");
         return nullptr;
+      }
 
       unsigned CTag = CTy->getTag();
-      if (CTag != dwarf::DW_TAG_structure_type && CTag != dwarf::DW_TAG_union_type)
-        return nullptr;
-      else
+      if (CTag == dwarf::DW_TAG_structure_type || CTag == dwarf::DW_TAG_union_type) {
         TypeName = CTy->getName();
+      } else {
+        if (HasPreserveFieldInfoCall(CallStack))
+          report_fatal_error("Invalid field access for llvm.preserve.field.info intrinsic");
+        return nullptr;
+      }
       TypeMeta = CTy;
-      AccessOffset += FirstIndex * CTy->getSizeInBits() >> 3;
+      PatchImm += FirstIndex * (CTy->getSizeInBits() >> 3);
       break;
     }
   }
@@ -569,6 +805,20 @@
     CInfo = StackElem.second;
     CallStack.pop();
 
+    if (CInfo.Kind == BPFPreserveFieldInfoAI)
+      break;
+
+    // If the next Call (the top of the stack) is a BPFPreserveFieldInfoAI,
+    // the action will be extracting field info.
+    if (CallStack.size()) {
+      auto StackElem2 = CallStack.top();
+      CallInfo CInfo2 = StackElem2.second;
+      if (CInfo2.Kind == BPFPreserveFieldInfoAI) {
+        InfoKind = CInfo2.AccessIndex;
+        assert(CallStack.size() == 1);
+      }
+    }
+
     // Access Index
     uint64_t AccessIndex = CInfo.AccessIndex;
     AccessKey += ":" + std::to_string(AccessIndex);
@@ -576,20 +826,13 @@
     MDNode *MDN = CInfo.Metadata;
     // At this stage, it cannot be pointer type.
     auto *CTy = cast<DICompositeType>(stripQualifiers(cast<DIType>(MDN)));
-    uint32_t Tag = CTy->getTag();
-    if (Tag == dwarf::DW_TAG_structure_type) {
-      auto *MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]);
-      AccessOffset += MemberTy->getOffsetInBits() >> 3;
-    } else if (Tag == dwarf::DW_TAG_array_type) {
-      auto *EltTy = stripQualifiers(CTy->getBaseType());
-      AccessOffset += AccessIndex * calcArraySize(CTy, 1) *
-                      EltTy->getSizeInBits() >> 3;
-    }
+    PatchImm = GetFieldInfo(InfoKind, CTy, AccessIndex, PatchImm);
   }
 
-  // Access key is the type name + access string, uniquely identifying
-  // one kernel memory access.
-  AccessKey = TypeName + ":" + std::to_string(AccessOffset) + "$" + AccessKey;
+  // Access key is the type name + reloc type + patched imm + access string,
+  // uniquely identifying one relocation.
+  AccessKey = TypeName + ":" + std::to_string(InfoKind) + ":" +
+              std::to_string(PatchImm) + "$" + AccessKey;
 
   return Base;
 }
@@ -605,7 +848,34 @@
   if (!Base)
     return false;
 
-  // Do the transformation
+  BasicBlock *BB = Call->getParent();
+  GlobalVariable *GV;
+
+  if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) {
+    IntegerType *VarType;
+    if (CInfo.Kind == BPFPreserveFieldInfoAI)
+      VarType = Type::getInt32Ty(BB->getContext()); // 32bit return value
+    else
+      VarType = Type::getInt64Ty(BB->getContext()); // 64bit ptr arith
+
+    GV = new GlobalVariable(M, VarType, false, GlobalVariable::ExternalLinkage,
+                            NULL, AccessKey);
+    GV->addAttribute(BPFCoreSharedInfo::AmaAttr);
+    GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta);
+    GEPGlobals[AccessKey] = GV;
+  } else {
+    GV = GEPGlobals[AccessKey];
+  }
+
+  if (CInfo.Kind == BPFPreserveFieldInfoAI) {
+    // Load the global variable which represents the returned field info.
+    auto *LDInst = new LoadInst(Type::getInt32Ty(BB->getContext()), GV);
+    BB->getInstList().insert(Call->getIterator(), LDInst);
+    Call->replaceAllUsesWith(LDInst);
+    Call->eraseFromParent();
+    return true;
+  }
+
   // For any original GEP Call and Base %2 like
   //   %4 = bitcast %struct.net_device** %dev1 to i64*
   // it is transformed to:
@@ -615,18 +885,6 @@
   //   %9 = bitcast i8* %8 to i64*
   //   using %9 instead of %4
   // The original Call inst is removed.
-  BasicBlock *BB = Call->getParent();
-  GlobalVariable *GV;
-
-  if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) {
-    GV = new GlobalVariable(M, Type::getInt64Ty(BB->getContext()), false,
-                            GlobalVariable::ExternalLinkage, NULL, AccessKey);
-    GV->addAttribute(BPFCoreSharedInfo::AmaAttr);
-    GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta);
-    GEPGlobals[AccessKey] = GV;
-  } else {
-    GV = GEPGlobals[AccessKey];
-  }
 
   // Load the global variable.
   auto *LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV);
diff --git a/llvm/lib/Target/BPF/BPFCORE.h b/llvm/lib/Target/BPF/BPFCORE.h
index e0950d9..a6cb3cf 100644
--- a/llvm/lib/Target/BPF/BPFCORE.h
+++ b/llvm/lib/Target/BPF/BPFCORE.h
@@ -13,6 +13,16 @@
 
 class BPFCoreSharedInfo {
 public:
+  enum OffsetRelocKind : uint32_t {
+    FIELD_BYTE_OFFSET = 0,
+    FIELD_BYTE_SIZE,
+    FIELD_EXISTENCE,
+    FIELD_SIGNEDNESS,
+    FIELD_LSHIFT_U64,
+    FIELD_RSHIFT_U64,
+
+    MAX_FIELD_RELOC_KIND,
+  };
   /// The attribute attached to globals representing a member offset
   static const std::string AmaAttr;
   /// The section name to identify a patchable external global
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index a69a806..d940ac9 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -94,7 +94,7 @@
 
 void BPFPassConfig::addIRPasses() {
 
-  addPass(createBPFAbstractMemberAccess());
+  addPass(createBPFAbstractMemberAccess(&getBPFTargetMachine()));
 
   TargetPassConfig::addIRPasses();
 }
diff --git a/llvm/lib/Target/BPF/BTF.h b/llvm/lib/Target/BPF/BTF.h
index ad56716..ef408da 100644
--- a/llvm/lib/Target/BPF/BTF.h
+++ b/llvm/lib/Target/BPF/BTF.h
@@ -17,7 +17,7 @@
 ///
 /// The binary layout for .BTF.ext section:
 ///   struct ExtHeader
-///   FuncInfo, LineInfo, OffsetReloc and ExternReloc subsections
+///   FuncInfo, LineInfo, FieldReloc and ExternReloc subsections
 /// The FuncInfo subsection is defined as below:
 ///   BTFFuncInfo Size
 ///   struct SecFuncInfo for ELF section #1
@@ -32,12 +32,12 @@
 ///   struct SecLineInfo for ELF section #2
 ///   A number of struct BPFLineInfo for ELF section #2
 ///   ...
-/// The OffsetReloc subsection is defined as below:
-///   BPFOffsetReloc Size
-///   struct SecOffsetReloc for ELF section #1
-///   A number of struct BPFOffsetReloc for ELF section #1
-///   struct SecOffsetReloc for ELF section #2
-///   A number of struct BPFOffsetReloc for ELF section #2
+/// The FieldReloc subsection is defined as below:
+///   BPFFieldReloc Size
+///   struct SecFieldReloc for ELF section #1
+///   A number of struct BPFFieldReloc for ELF section #1
+///   struct SecFieldReloc for ELF section #2
+///   A number of struct BPFFieldReloc for ELF section #2
 ///   ...
 /// The ExternReloc subsection is defined as below:
 ///   BPFExternReloc Size
@@ -72,11 +72,11 @@
   BTFDataSecVarSize = 12,
   SecFuncInfoSize = 8,
   SecLineInfoSize = 8,
-  SecOffsetRelocSize = 8,
+  SecFieldRelocSize = 8,
   SecExternRelocSize = 8,
   BPFFuncInfoSize = 8,
   BPFLineInfoSize = 16,
-  BPFOffsetRelocSize = 12,
+  BPFFieldRelocSize = 16,
   BPFExternRelocSize = 8,
 };
 
@@ -213,8 +213,8 @@
   uint32_t FuncInfoLen;    ///< Length of func info section
   uint32_t LineInfoOff;    ///< Offset of line info section
   uint32_t LineInfoLen;    ///< Length of line info section
-  uint32_t OffsetRelocOff; ///< Offset of offset reloc section
-  uint32_t OffsetRelocLen; ///< Length of offset reloc section
+  uint32_t FieldRelocOff; ///< Offset of offset reloc section
+  uint32_t FieldRelocLen; ///< Length of offset reloc section
   uint32_t ExternRelocOff; ///< Offset of extern reloc section
   uint32_t ExternRelocLen; ///< Length of extern reloc section
 };
@@ -247,16 +247,17 @@
 };
 
 /// Specifying one offset relocation.
-struct BPFOffsetReloc {
+struct BPFFieldReloc {
   uint32_t InsnOffset;    ///< Byte offset in this section
   uint32_t TypeID;        ///< TypeID for the relocation
   uint32_t OffsetNameOff; ///< The string to traverse types
+  uint32_t RelocKind;     ///< What to patch the instruction
 };
 
 /// Specifying offset relocation's in one section.
-struct SecOffsetReloc {
+struct SecFieldReloc {
   uint32_t SecNameOff;     ///< Section name index in the .BTF string table
-  uint32_t NumOffsetReloc; ///< Number of offset reloc's in this section
+  uint32_t NumFieldReloc; ///< Number of offset reloc's in this section
 };
 
 /// Specifying one offset relocation.
diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index 3ad3234..a8f8573 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -754,7 +754,7 @@
 void BTFDebug::emitBTFExtSection() {
   // Do not emit section if empty FuncInfoTable and LineInfoTable.
   if (!FuncInfoTable.size() && !LineInfoTable.size() &&
-      !OffsetRelocTable.size() && !ExternRelocTable.size())
+      !FieldRelocTable.size() && !ExternRelocTable.size())
     return;
 
   MCContext &Ctx = OS.getContext();
@@ -766,8 +766,8 @@
 
   // Account for FuncInfo/LineInfo record size as well.
   uint32_t FuncLen = 4, LineLen = 4;
-  // Do not account for optional OffsetReloc/ExternReloc.
-  uint32_t OffsetRelocLen = 0, ExternRelocLen = 0;
+  // Do not account for optional FieldReloc/ExternReloc.
+  uint32_t FieldRelocLen = 0, ExternRelocLen = 0;
   for (const auto &FuncSec : FuncInfoTable) {
     FuncLen += BTF::SecFuncInfoSize;
     FuncLen += FuncSec.second.size() * BTF::BPFFuncInfoSize;
@@ -776,17 +776,17 @@
     LineLen += BTF::SecLineInfoSize;
     LineLen += LineSec.second.size() * BTF::BPFLineInfoSize;
   }
-  for (const auto &OffsetRelocSec : OffsetRelocTable) {
-    OffsetRelocLen += BTF::SecOffsetRelocSize;
-    OffsetRelocLen += OffsetRelocSec.second.size() * BTF::BPFOffsetRelocSize;
+  for (const auto &FieldRelocSec : FieldRelocTable) {
+    FieldRelocLen += BTF::SecFieldRelocSize;
+    FieldRelocLen += FieldRelocSec.second.size() * BTF::BPFFieldRelocSize;
   }
   for (const auto &ExternRelocSec : ExternRelocTable) {
     ExternRelocLen += BTF::SecExternRelocSize;
     ExternRelocLen += ExternRelocSec.second.size() * BTF::BPFExternRelocSize;
   }
 
-  if (OffsetRelocLen)
-    OffsetRelocLen += 4;
+  if (FieldRelocLen)
+    FieldRelocLen += 4;
   if (ExternRelocLen)
     ExternRelocLen += 4;
 
@@ -795,8 +795,8 @@
   OS.EmitIntValue(FuncLen, 4);
   OS.EmitIntValue(LineLen, 4);
   OS.EmitIntValue(FuncLen + LineLen, 4);
-  OS.EmitIntValue(OffsetRelocLen, 4);
-  OS.EmitIntValue(FuncLen + LineLen + OffsetRelocLen, 4);
+  OS.EmitIntValue(FieldRelocLen, 4);
+  OS.EmitIntValue(FuncLen + LineLen + FieldRelocLen, 4);
   OS.EmitIntValue(ExternRelocLen, 4);
 
   // Emit func_info table.
@@ -831,19 +831,20 @@
     }
   }
 
-  // Emit offset reloc table.
-  if (OffsetRelocLen) {
-    OS.AddComment("OffsetReloc");
-    OS.EmitIntValue(BTF::BPFOffsetRelocSize, 4);
-    for (const auto &OffsetRelocSec : OffsetRelocTable) {
-      OS.AddComment("Offset reloc section string offset=" +
-                    std::to_string(OffsetRelocSec.first));
-      OS.EmitIntValue(OffsetRelocSec.first, 4);
-      OS.EmitIntValue(OffsetRelocSec.second.size(), 4);
-      for (const auto &OffsetRelocInfo : OffsetRelocSec.second) {
-        Asm->EmitLabelReference(OffsetRelocInfo.Label, 4);
-        OS.EmitIntValue(OffsetRelocInfo.TypeID, 4);
-        OS.EmitIntValue(OffsetRelocInfo.OffsetNameOff, 4);
+  // Emit field reloc table.
+  if (FieldRelocLen) {
+    OS.AddComment("FieldReloc");
+    OS.EmitIntValue(BTF::BPFFieldRelocSize, 4);
+    for (const auto &FieldRelocSec : FieldRelocTable) {
+      OS.AddComment("Field reloc section string offset=" +
+                    std::to_string(FieldRelocSec.first));
+      OS.EmitIntValue(FieldRelocSec.first, 4);
+      OS.EmitIntValue(FieldRelocSec.second.size(), 4);
+      for (const auto &FieldRelocInfo : FieldRelocSec.second) {
+        Asm->EmitLabelReference(FieldRelocInfo.Label, 4);
+        OS.EmitIntValue(FieldRelocInfo.TypeID, 4);
+        OS.EmitIntValue(FieldRelocInfo.OffsetNameOff, 4);
+        OS.EmitIntValue(FieldRelocInfo.RelocKind, 4);
       }
     }
   }
@@ -958,23 +959,27 @@
   return Id;
 }
 
-/// Generate a struct member offset relocation.
-void BTFDebug::generateOffsetReloc(const MachineInstr *MI,
+/// Generate a struct member field relocation.
+void BTFDebug::generateFieldReloc(const MachineInstr *MI,
                                    const MCSymbol *ORSym, DIType *RootTy,
                                    StringRef AccessPattern) {
   unsigned RootId = populateStructType(RootTy);
   size_t FirstDollar = AccessPattern.find_first_of('$');
   size_t FirstColon = AccessPattern.find_first_of(':');
+  size_t SecondColon = AccessPattern.find_first_of(':', FirstColon + 1);
   StringRef IndexPattern = AccessPattern.substr(FirstDollar + 1);
-  StringRef OffsetStr = AccessPattern.substr(FirstColon + 1,
-      FirstDollar - FirstColon);
+  StringRef RelocKindStr = AccessPattern.substr(FirstColon + 1,
+      SecondColon - FirstColon);
+  StringRef PatchImmStr = AccessPattern.substr(SecondColon + 1,
+      FirstDollar - SecondColon);
 
-  BTFOffsetReloc OffsetReloc;
-  OffsetReloc.Label = ORSym;
-  OffsetReloc.OffsetNameOff = addString(IndexPattern);
-  OffsetReloc.TypeID = RootId;
-  AccessOffsets[AccessPattern.str()] = std::stoi(OffsetStr);
-  OffsetRelocTable[SecNameOff].push_back(OffsetReloc);
+  BTFFieldReloc FieldReloc;
+  FieldReloc.Label = ORSym;
+  FieldReloc.OffsetNameOff = addString(IndexPattern);
+  FieldReloc.TypeID = RootId;
+  FieldReloc.RelocKind = std::stoull(RelocKindStr);
+  PatchImms[AccessPattern.str()] = std::stoul(PatchImmStr);
+  FieldRelocTable[SecNameOff].push_back(FieldReloc);
 }
 
 void BTFDebug::processLDimm64(const MachineInstr *MI) {
@@ -982,7 +987,7 @@
   // will generate an .BTF.ext record.
   //
   // If the insn is "r2 = LD_imm64 @__BTF_...",
-  // add this insn into the .BTF.ext OffsetReloc subsection.
+  // add this insn into the .BTF.ext FieldReloc subsection.
   // Relocation looks like:
   //  . SecName:
   //    . InstOffset
@@ -1013,7 +1018,7 @@
 
       MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index);
       DIType *Ty = dyn_cast<DIType>(MDN);
-      generateOffsetReloc(MI, ORSym, Ty, GVar->getName());
+      generateFieldReloc(MI, ORSym, Ty, GVar->getName());
     } else if (GVar && !GVar->hasInitializer() && GVar->hasExternalLinkage() &&
                GVar->getSection() == BPFCoreSharedInfo::PatchableExtSecName) {
       MCSymbol *ORSym = OS.getContext().createTempSymbol();
@@ -1154,8 +1159,8 @@
       const GlobalValue *GVal = MO.getGlobal();
       auto *GVar = dyn_cast<GlobalVariable>(GVal);
       if (GVar && GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) {
-        // Emit "mov ri, <imm>" for abstract member accesses.
-        int64_t Imm = AccessOffsets[GVar->getName().str()];
+        // Emit "mov ri, <imm>" for patched immediate.
+        uint32_t Imm = PatchImms[GVar->getName().str()];
         OutMI.setOpcode(BPF::MOV_ri);
         OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
         OutMI.addOperand(MCOperand::createImm(Imm));
diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h
index a79527d..eec8614 100644
--- a/llvm/lib/Target/BPF/BTFDebug.h
+++ b/llvm/lib/Target/BPF/BTFDebug.h
@@ -195,7 +195,7 @@
   /// A mapping from string table offset to the index
   /// of the Table. It is used to avoid putting
   /// duplicated strings in the table.
-  std::unordered_map<uint32_t, uint32_t> OffsetToIdMap;
+  std::map<uint32_t, uint32_t> OffsetToIdMap;
   /// A vector of strings to represent the string table.
   std::vector<std::string> Table;
 
@@ -224,10 +224,11 @@
 };
 
 /// Represent one offset relocation.
-struct BTFOffsetReloc {
+struct BTFFieldReloc {
   const MCSymbol *Label;  ///< MCSymbol identifying insn for the reloc
   uint32_t TypeID;        ///< Type ID
   uint32_t OffsetNameOff; ///< The string to traverse types
+  uint32_t RelocKind;     ///< What to patch the instruction
 };
 
 /// Represent one extern relocation.
@@ -249,12 +250,12 @@
   std::unordered_map<const DIType *, uint32_t> DIToIdMap;
   std::map<uint32_t, std::vector<BTFFuncInfo>> FuncInfoTable;
   std::map<uint32_t, std::vector<BTFLineInfo>> LineInfoTable;
-  std::map<uint32_t, std::vector<BTFOffsetReloc>> OffsetRelocTable;
+  std::map<uint32_t, std::vector<BTFFieldReloc>> FieldRelocTable;
   std::map<uint32_t, std::vector<BTFExternReloc>> ExternRelocTable;
   StringMap<std::vector<std::string>> FileContent;
   std::map<std::string, std::unique_ptr<BTFKindDataSec>> DataSecEntries;
   std::vector<BTFTypeStruct *> StructTypes;
-  std::map<std::string, int64_t> AccessOffsets;
+  std::map<std::string, uint32_t> PatchImms;
   std::map<StringRef, std::pair<bool, std::vector<BTFTypeDerived *>>>
       FixupDerivedTypes;
 
@@ -300,7 +301,7 @@
   void processGlobals(bool ProcessingMapDef);
 
   /// Generate one offset relocation record.
-  void generateOffsetReloc(const MachineInstr *MI, const MCSymbol *ORSym,
+  void generateFieldReloc(const MachineInstr *MI, const MCSymbol *ORSym,
                            DIType *RootTy, StringRef AccessPattern);
 
   /// Populating unprocessed struct type.