Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 1 | //===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===// |
| 2 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "AArch64ExternalSymbolizer.h" |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 10 | #include "MCTargetDesc/AArch64AddressingModes.h" |
| 11 | #include "Utils/AArch64BaseInfo.h" |
| 12 | #include "llvm/MC/MCContext.h" |
| 13 | #include "llvm/MC/MCExpr.h" |
| 14 | #include "llvm/MC/MCInst.h" |
Benjamin Kramer | 27c769d | 2018-09-10 12:53:46 +0000 | [diff] [blame] | 15 | #include "llvm/MC/MCRegisterInfo.h" |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 16 | #include "llvm/Support/Format.h" |
| 17 | #include "llvm/Support/raw_ostream.h" |
| 18 | |
| 19 | using namespace llvm; |
| 20 | |
| 21 | #define DEBUG_TYPE "aarch64-disassembler" |
| 22 | |
| 23 | static MCSymbolRefExpr::VariantKind |
| 24 | getVariant(uint64_t LLVMDisassembler_VariantKind) { |
| 25 | switch (LLVMDisassembler_VariantKind) { |
| 26 | case LLVMDisassembler_VariantKind_None: |
| 27 | return MCSymbolRefExpr::VK_None; |
| 28 | case LLVMDisassembler_VariantKind_ARM64_PAGE: |
| 29 | return MCSymbolRefExpr::VK_PAGE; |
| 30 | case LLVMDisassembler_VariantKind_ARM64_PAGEOFF: |
| 31 | return MCSymbolRefExpr::VK_PAGEOFF; |
| 32 | case LLVMDisassembler_VariantKind_ARM64_GOTPAGE: |
| 33 | return MCSymbolRefExpr::VK_GOTPAGE; |
| 34 | case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF: |
| 35 | return MCSymbolRefExpr::VK_GOTPAGEOFF; |
| 36 | case LLVMDisassembler_VariantKind_ARM64_TLVP: |
| 37 | case LLVMDisassembler_VariantKind_ARM64_TLVOFF: |
| 38 | default: |
Craig Topper | 35b2f75 | 2014-06-19 06:10:58 +0000 | [diff] [blame] | 39 | llvm_unreachable("bad LLVMDisassembler_VariantKind"); |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 40 | } |
| 41 | } |
| 42 | |
| 43 | /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic |
| 44 | /// operand in place of the immediate Value in the MCInst. The immediate |
| 45 | /// Value has not had any PC adjustment made by the caller. If the instruction |
| 46 | /// is a branch that adds the PC to the immediate Value then isBranch is |
| 47 | /// Success, else Fail. If GetOpInfo is non-null, then it is called to get any |
| 48 | /// symbolic information at the Address for this instrution. If that returns |
| 49 | /// non-zero then the symbolic information it returns is used to create an |
| 50 | /// MCExpr and that is added as an operand to the MCInst. If GetOpInfo() |
| 51 | /// returns zero and isBranch is Success then a symbol look up for |
| 52 | /// Address + Value is done and if a symbol is found an MCExpr is created with |
| 53 | /// that, else an MCExpr with Address + Value is created. If GetOpInfo() |
Eric Christopher | 572e03a | 2015-06-19 01:53:21 +0000 | [diff] [blame] | 54 | /// returns zero and isBranch is Fail then the Opcode of the MCInst is |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 55 | /// tested and for ADRP an other instructions that help to load of pointers |
| 56 | /// a symbol look up is done to see it is returns a specific reference type |
| 57 | /// to add to the comment stream. This function returns Success if it adds |
| 58 | /// an operand to the MCInst and Fail otherwise. |
| 59 | bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( |
| 60 | MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, |
| 61 | bool IsBranch, uint64_t Offset, uint64_t InstSize) { |
Daniel Sanders | 6a943fb | 2018-08-21 15:47:25 +0000 | [diff] [blame] | 62 | if (!SymbolLookUp) |
| 63 | return false; |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 64 | // FIXME: This method shares a lot of code with |
| 65 | // MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible |
| 66 | // refactor the MCExternalSymbolizer interface to allow more of this |
| 67 | // implementation to be shared. |
| 68 | // |
| 69 | struct LLVMOpInfo1 SymbolicOp; |
| 70 | memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); |
| 71 | SymbolicOp.Value = Value; |
| 72 | uint64_t ReferenceType; |
| 73 | const char *ReferenceName; |
| 74 | if (!GetOpInfo || |
| 75 | !GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { |
| 76 | if (IsBranch) { |
| 77 | ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; |
| 78 | const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, |
| 79 | Address, &ReferenceName); |
| 80 | if (Name) { |
| 81 | SymbolicOp.AddSymbol.Name = Name; |
| 82 | SymbolicOp.AddSymbol.Present = true; |
| 83 | SymbolicOp.Value = 0; |
| 84 | } else { |
| 85 | SymbolicOp.Value = Address + Value; |
| 86 | } |
| 87 | if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) |
| 88 | CommentStream << "symbol stub for: " << ReferenceName; |
| 89 | else if (ReferenceType == |
| 90 | LLVMDisassembler_ReferenceType_Out_Objc_Message) |
| 91 | CommentStream << "Objc message: " << ReferenceName; |
| 92 | } else if (MI.getOpcode() == AArch64::ADRP) { |
| 93 | ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP; |
| 94 | // otool expects the fully encoded ADRP instruction to be passed in as |
| 95 | // the value here, so reconstruct it: |
| 96 | const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo(); |
| 97 | uint32_t EncodedInst = 0x90000000; |
| 98 | EncodedInst |= (Value & 0x3) << 29; // immlo |
| 99 | EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi |
| 100 | EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // reg |
| 101 | SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address, |
| 102 | &ReferenceName); |
Tim Northover | d8949f5 | 2018-05-30 09:54:59 +0000 | [diff] [blame] | 103 | CommentStream << format("0x%llx", (0xfffffffffffff000LL & Address) + |
| 104 | Value * 0x1000); |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 105 | } else if (MI.getOpcode() == AArch64::ADDXri || |
| 106 | MI.getOpcode() == AArch64::LDRXui || |
| 107 | MI.getOpcode() == AArch64::LDRXl || |
| 108 | MI.getOpcode() == AArch64::ADR) { |
| 109 | if (MI.getOpcode() == AArch64::ADDXri) |
| 110 | ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri; |
| 111 | else if (MI.getOpcode() == AArch64::LDRXui) |
| 112 | ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui; |
| 113 | if (MI.getOpcode() == AArch64::LDRXl) { |
| 114 | ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl; |
| 115 | SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, |
| 116 | &ReferenceName); |
| 117 | } else if (MI.getOpcode() == AArch64::ADR) { |
| 118 | ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR; |
| 119 | SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, |
| 120 | &ReferenceName); |
| 121 | } else { |
| 122 | const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo(); |
| 123 | // otool expects the fully encoded ADD/LDR instruction to be passed in |
| 124 | // as the value here, so reconstruct it: |
| 125 | unsigned EncodedInst = |
| 126 | MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000; |
| 127 | EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD] |
| 128 | EncodedInst |= |
| 129 | MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn |
| 130 | EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd |
| 131 | |
| 132 | SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address, |
| 133 | &ReferenceName); |
| 134 | } |
| 135 | if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) |
| 136 | CommentStream << "literal pool symbol address: " << ReferenceName; |
| 137 | else if (ReferenceType == |
Kevin Enderby | d2d2ce9 | 2016-06-13 21:08:57 +0000 | [diff] [blame] | 138 | LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) { |
| 139 | CommentStream << "literal pool for: \""; |
NAKAMURA Takumi | fe1202c | 2016-06-20 00:37:41 +0000 | [diff] [blame] | 140 | CommentStream.write_escaped(ReferenceName); |
Kevin Enderby | d2d2ce9 | 2016-06-13 21:08:57 +0000 | [diff] [blame] | 141 | CommentStream << "\""; |
| 142 | } else if (ReferenceType == |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 143 | LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) |
| 144 | CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\""; |
| 145 | else if (ReferenceType == |
| 146 | LLVMDisassembler_ReferenceType_Out_Objc_Message) |
| 147 | CommentStream << "Objc message: " << ReferenceName; |
| 148 | else if (ReferenceType == |
| 149 | LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) |
| 150 | CommentStream << "Objc message ref: " << ReferenceName; |
| 151 | else if (ReferenceType == |
| 152 | LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) |
| 153 | CommentStream << "Objc selector ref: " << ReferenceName; |
| 154 | else if (ReferenceType == |
| 155 | LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) |
| 156 | CommentStream << "Objc class ref: " << ReferenceName; |
| 157 | // For these instructions, the SymbolLookUp() above is just to get the |
| 158 | // ReferenceType and ReferenceName. We want to make sure not to |
| 159 | // fall through so we don't build an MCExpr to leave the disassembly |
| 160 | // of the immediate values of these instructions to the InstPrinter. |
| 161 | return false; |
| 162 | } else { |
| 163 | return false; |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | const MCExpr *Add = nullptr; |
| 168 | if (SymbolicOp.AddSymbol.Present) { |
| 169 | if (SymbolicOp.AddSymbol.Name) { |
| 170 | StringRef Name(SymbolicOp.AddSymbol.Name); |
Jim Grosbach | 6f48200 | 2015-05-18 18:43:14 +0000 | [diff] [blame] | 171 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 172 | MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind); |
| 173 | if (Variant != MCSymbolRefExpr::VK_None) |
Jim Grosbach | 13760bd | 2015-05-30 01:25:56 +0000 | [diff] [blame] | 174 | Add = MCSymbolRefExpr::create(Sym, Variant, Ctx); |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 175 | else |
Jim Grosbach | 13760bd | 2015-05-30 01:25:56 +0000 | [diff] [blame] | 176 | Add = MCSymbolRefExpr::create(Sym, Ctx); |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 177 | } else { |
Jim Grosbach | 13760bd | 2015-05-30 01:25:56 +0000 | [diff] [blame] | 178 | Add = MCConstantExpr::create(SymbolicOp.AddSymbol.Value, Ctx); |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 179 | } |
| 180 | } |
| 181 | |
| 182 | const MCExpr *Sub = nullptr; |
| 183 | if (SymbolicOp.SubtractSymbol.Present) { |
| 184 | if (SymbolicOp.SubtractSymbol.Name) { |
| 185 | StringRef Name(SymbolicOp.SubtractSymbol.Name); |
Jim Grosbach | 6f48200 | 2015-05-18 18:43:14 +0000 | [diff] [blame] | 186 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); |
Jim Grosbach | 13760bd | 2015-05-30 01:25:56 +0000 | [diff] [blame] | 187 | Sub = MCSymbolRefExpr::create(Sym, Ctx); |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 188 | } else { |
Jim Grosbach | 13760bd | 2015-05-30 01:25:56 +0000 | [diff] [blame] | 189 | Sub = MCConstantExpr::create(SymbolicOp.SubtractSymbol.Value, Ctx); |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 190 | } |
| 191 | } |
| 192 | |
| 193 | const MCExpr *Off = nullptr; |
| 194 | if (SymbolicOp.Value != 0) |
Jim Grosbach | 13760bd | 2015-05-30 01:25:56 +0000 | [diff] [blame] | 195 | Off = MCConstantExpr::create(SymbolicOp.Value, Ctx); |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 196 | |
| 197 | const MCExpr *Expr; |
| 198 | if (Sub) { |
| 199 | const MCExpr *LHS; |
| 200 | if (Add) |
Jim Grosbach | 13760bd | 2015-05-30 01:25:56 +0000 | [diff] [blame] | 201 | LHS = MCBinaryExpr::createSub(Add, Sub, Ctx); |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 202 | else |
Jim Grosbach | 13760bd | 2015-05-30 01:25:56 +0000 | [diff] [blame] | 203 | LHS = MCUnaryExpr::createMinus(Sub, Ctx); |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 204 | if (Off) |
Jim Grosbach | 13760bd | 2015-05-30 01:25:56 +0000 | [diff] [blame] | 205 | Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx); |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 206 | else |
| 207 | Expr = LHS; |
| 208 | } else if (Add) { |
| 209 | if (Off) |
Jim Grosbach | 13760bd | 2015-05-30 01:25:56 +0000 | [diff] [blame] | 210 | Expr = MCBinaryExpr::createAdd(Add, Off, Ctx); |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 211 | else |
| 212 | Expr = Add; |
| 213 | } else { |
| 214 | if (Off) |
| 215 | Expr = Off; |
| 216 | else |
Jim Grosbach | 13760bd | 2015-05-30 01:25:56 +0000 | [diff] [blame] | 217 | Expr = MCConstantExpr::create(0, Ctx); |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 218 | } |
| 219 | |
Jim Grosbach | e9119e4 | 2015-05-13 18:37:00 +0000 | [diff] [blame] | 220 | MI.addOperand(MCOperand::createExpr(Expr)); |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 221 | |
| 222 | return true; |
| 223 | } |