Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 1 | //===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA -----*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file is part of the ARM Disassembler. |
| 11 | // It contains code to implement the public interfaces of ARMDisassembler and |
| 12 | // ThumbDisassembler, both of which are instances of MCDisassembler. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #define DEBUG_TYPE "arm-disassembler" |
| 17 | |
| 18 | #include "ARMDisassembler.h" |
| 19 | #include "ARMDisassemblerCore.h" |
| 20 | |
Benjamin Kramer | 83ccbff | 2011-03-24 21:14:28 +0000 | [diff] [blame] | 21 | #include "llvm/ADT/OwningPtr.h" |
Sean Callanan | 9899f70 | 2010-04-13 21:21:57 +0000 | [diff] [blame] | 22 | #include "llvm/MC/EDInstInfo.h" |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 23 | #include "llvm/MC/MCInst.h" |
| 24 | #include "llvm/Target/TargetRegistry.h" |
| 25 | #include "llvm/Support/Debug.h" |
| 26 | #include "llvm/Support/MemoryObject.h" |
| 27 | #include "llvm/Support/ErrorHandling.h" |
| 28 | #include "llvm/Support/raw_ostream.h" |
| 29 | |
Johnny Chen | 270159f | 2010-08-12 01:40:54 +0000 | [diff] [blame] | 30 | //#define DEBUG(X) do { X; } while (0) |
| 31 | |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 32 | /// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from |
| 33 | /// ARMDecoderEmitter.cpp TableGen backend. It contains: |
| 34 | /// |
| 35 | /// o Mappings from opcode to ARM/Thumb instruction format |
| 36 | /// |
| 37 | /// o static uint16_t decodeInstruction(uint32_t insn) - the decoding function |
| 38 | /// for an ARM instruction. |
| 39 | /// |
| 40 | /// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding |
| 41 | /// function for a Thumb instruction. |
| 42 | /// |
Oscar Fuentes | 38e1390 | 2010-09-28 11:48:19 +0000 | [diff] [blame] | 43 | #include "ARMGenDecoderTables.inc" |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 44 | |
Oscar Fuentes | 38e1390 | 2010-09-28 11:48:19 +0000 | [diff] [blame] | 45 | #include "ARMGenEDInfo.inc" |
Sean Callanan | 9899f70 | 2010-04-13 21:21:57 +0000 | [diff] [blame] | 46 | |
| 47 | using namespace llvm; |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 48 | |
| 49 | /// showBitVector - Use the raw_ostream to log a diagnostic message describing |
| 50 | /// the inidividual bits of the instruction. |
| 51 | /// |
| 52 | static inline void showBitVector(raw_ostream &os, const uint32_t &insn) { |
| 53 | // Split the bit position markers into more than one lines to fit 80 columns. |
| 54 | os << " 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11" |
| 55 | << " 10 9 8 7 6 5 4 3 2 1 0 \n"; |
| 56 | os << "---------------------------------------------------------------" |
| 57 | << "----------------------------------\n"; |
| 58 | os << '|'; |
| 59 | for (unsigned i = 32; i != 0; --i) { |
| 60 | if (insn >> (i - 1) & 0x01) |
| 61 | os << " 1"; |
| 62 | else |
| 63 | os << " 0"; |
| 64 | os << (i%4 == 1 ? '|' : ':'); |
| 65 | } |
| 66 | os << '\n'; |
| 67 | // Split the bit position markers into more than one lines to fit 80 columns. |
| 68 | os << "---------------------------------------------------------------" |
| 69 | << "----------------------------------\n"; |
| 70 | os << '\n'; |
| 71 | } |
| 72 | |
| 73 | /// decodeARMInstruction is a decorator function which tries special cases of |
| 74 | /// instruction matching before calling the auto-generated decoder function. |
| 75 | static unsigned decodeARMInstruction(uint32_t &insn) { |
| 76 | if (slice(insn, 31, 28) == 15) |
| 77 | goto AutoGenedDecoder; |
| 78 | |
| 79 | // Special case processing, if any, goes here.... |
| 80 | |
| 81 | // LLVM combines the offset mode of A8.6.197 & A8.6.198 into STRB. |
| 82 | // The insufficient encoding information of the combined instruction confuses |
| 83 | // the decoder wrt BFC/BFI. Therefore, we try to recover here. |
| 84 | // For BFC, Inst{27-21} = 0b0111110 & Inst{6-0} = 0b0011111. |
| 85 | // For BFI, Inst{27-21} = 0b0111110 & Inst{6-4} = 0b001 & Inst{3-0} =! 0b1111. |
| 86 | if (slice(insn, 27, 21) == 0x3e && slice(insn, 6, 4) == 1) { |
| 87 | if (slice(insn, 3, 0) == 15) |
| 88 | return ARM::BFC; |
| 89 | else |
| 90 | return ARM::BFI; |
| 91 | } |
| 92 | |
Jim Grosbach | 55561d1 | 2010-10-13 23:47:11 +0000 | [diff] [blame] | 93 | // Ditto for STRBT, which is a super-instruction for A8.6.199 Encodings |
| 94 | // A1 & A2. |
Johnny Chen | 270159f | 2010-08-12 01:40:54 +0000 | [diff] [blame] | 95 | // As a result, the decoder fails to deocode USAT properly. |
| 96 | if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1) |
| 97 | return ARM::USAT; |
Johnny Chen | 18b475f | 2011-03-09 20:01:14 +0000 | [diff] [blame] | 98 | // As a result, the decoder fails to deocode UQADD16 properly. |
| 99 | if (slice(insn, 27, 20) == 0x66 && slice(insn, 7, 4) == 1) |
| 100 | return ARM::UQADD16; |
Johnny Chen | 270159f | 2010-08-12 01:40:54 +0000 | [diff] [blame] | 101 | |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 102 | // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8. |
| 103 | // As a result, the decoder fails to decode UMULL properly. |
| 104 | if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) { |
| 105 | return ARM::UMULL; |
| 106 | } |
| 107 | |
| 108 | // Ditto for STR_PRE, which is a super-instruction for A8.6.194 & A8.6.195. |
| 109 | // As a result, the decoder fails to decode SBFX properly. |
| 110 | if (slice(insn, 27, 21) == 0x3d && slice(insn, 6, 4) == 5) |
| 111 | return ARM::SBFX; |
| 112 | |
| 113 | // And STRB_PRE, which is a super-instruction for A8.6.197 & A8.6.198. |
| 114 | // As a result, the decoder fails to decode UBFX properly. |
| 115 | if (slice(insn, 27, 21) == 0x3f && slice(insn, 6, 4) == 5) |
| 116 | return ARM::UBFX; |
| 117 | |
| 118 | // Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2. |
| 119 | // As a result, the decoder fails to deocode SSAT properly. |
| 120 | if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1) |
Bob Wilson | eaf1c98 | 2010-08-11 23:10:46 +0000 | [diff] [blame] | 121 | return ARM::SSAT; |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 122 | |
| 123 | // Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147. |
| 124 | // As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT. |
| 125 | if (slice(insn, 27, 24) == 0) { |
| 126 | switch (slice(insn, 21, 20)) { |
| 127 | case 2: |
| 128 | switch (slice(insn, 7, 4)) { |
| 129 | case 11: |
| 130 | return ARM::STRHT; |
| 131 | default: |
| 132 | break; // fallthrough |
| 133 | } |
| 134 | break; |
| 135 | case 3: |
| 136 | switch (slice(insn, 7, 4)) { |
| 137 | case 11: |
| 138 | return ARM::LDRHT; |
| 139 | case 13: |
| 140 | return ARM::LDRSBT; |
| 141 | case 15: |
| 142 | return ARM::LDRSHT; |
| 143 | default: |
| 144 | break; // fallthrough |
| 145 | } |
| 146 | break; |
| 147 | default: |
| 148 | break; // fallthrough |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | // Ditto for SBCrs, which is a super-instruction for A8.6.152 & A8.6.153. |
| 153 | // As a result, the decoder fails to decode STRH_Post/LDRD_POST/STRD_POST |
| 154 | // properly. |
| 155 | if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 0) { |
| 156 | unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21); |
| 157 | switch (slice(insn, 7, 4)) { |
| 158 | case 11: |
| 159 | switch (PW) { |
| 160 | case 2: // Offset |
| 161 | return ARM::STRH; |
| 162 | case 3: // Pre-indexed |
| 163 | return ARM::STRH_PRE; |
| 164 | case 0: // Post-indexed |
| 165 | return ARM::STRH_POST; |
| 166 | default: |
| 167 | break; // fallthrough |
| 168 | } |
| 169 | break; |
| 170 | case 13: |
| 171 | switch (PW) { |
| 172 | case 2: // Offset |
| 173 | return ARM::LDRD; |
| 174 | case 3: // Pre-indexed |
| 175 | return ARM::LDRD_PRE; |
| 176 | case 0: // Post-indexed |
| 177 | return ARM::LDRD_POST; |
| 178 | default: |
| 179 | break; // fallthrough |
| 180 | } |
| 181 | break; |
| 182 | case 15: |
| 183 | switch (PW) { |
| 184 | case 2: // Offset |
| 185 | return ARM::STRD; |
| 186 | case 3: // Pre-indexed |
| 187 | return ARM::STRD_PRE; |
| 188 | case 0: // Post-indexed |
| 189 | return ARM::STRD_POST; |
| 190 | default: |
| 191 | break; // fallthrough |
| 192 | } |
| 193 | break; |
| 194 | default: |
| 195 | break; // fallthrough |
| 196 | } |
| 197 | } |
| 198 | |
| 199 | // Ditto for SBCSSrs, which is a super-instruction for A8.6.152 & A8.6.153. |
| 200 | // As a result, the decoder fails to decode LDRH_POST/LDRSB_POST/LDRSH_POST |
| 201 | // properly. |
| 202 | if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 1) { |
| 203 | unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21); |
| 204 | switch (slice(insn, 7, 4)) { |
| 205 | case 11: |
| 206 | switch (PW) { |
| 207 | case 2: // Offset |
| 208 | return ARM::LDRH; |
| 209 | case 3: // Pre-indexed |
| 210 | return ARM::LDRH_PRE; |
| 211 | case 0: // Post-indexed |
| 212 | return ARM::LDRH_POST; |
| 213 | default: |
| 214 | break; // fallthrough |
| 215 | } |
| 216 | break; |
| 217 | case 13: |
| 218 | switch (PW) { |
| 219 | case 2: // Offset |
| 220 | return ARM::LDRSB; |
| 221 | case 3: // Pre-indexed |
| 222 | return ARM::LDRSB_PRE; |
| 223 | case 0: // Post-indexed |
| 224 | return ARM::LDRSB_POST; |
| 225 | default: |
| 226 | break; // fallthrough |
| 227 | } |
| 228 | break; |
| 229 | case 15: |
| 230 | switch (PW) { |
| 231 | case 2: // Offset |
| 232 | return ARM::LDRSH; |
| 233 | case 3: // Pre-indexed |
| 234 | return ARM::LDRSH_PRE; |
| 235 | case 0: // Post-indexed |
| 236 | return ARM::LDRSH_POST; |
| 237 | default: |
| 238 | break; // fallthrough |
| 239 | } |
| 240 | break; |
| 241 | default: |
| 242 | break; // fallthrough |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | AutoGenedDecoder: |
| 247 | // Calling the auto-generated decoder function. |
| 248 | return decodeInstruction(insn); |
| 249 | } |
| 250 | |
| 251 | // Helper function for special case handling of LDR (literal) and friends. |
| 252 | // See, for example, A6.3.7 Load word: Table A6-18 Load word. |
| 253 | // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode |
| 254 | // before returning it. |
| 255 | static unsigned T2Morph2LoadLiteral(unsigned Opcode) { |
| 256 | switch (Opcode) { |
| 257 | default: |
| 258 | return Opcode; // Return unmorphed opcode. |
| 259 | |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 260 | case ARM::t2LDR_POST: case ARM::t2LDR_PRE: |
| 261 | case ARM::t2LDRi12: case ARM::t2LDRi8: |
Johnny Chen | ef37e3a | 2010-04-20 17:28:50 +0000 | [diff] [blame] | 262 | case ARM::t2LDRs: case ARM::t2LDRT: |
Owen Anderson | 971b83b | 2011-02-08 22:39:40 +0000 | [diff] [blame] | 263 | return ARM::t2LDRpci; |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 264 | |
| 265 | case ARM::t2LDRB_POST: case ARM::t2LDRB_PRE: |
| 266 | case ARM::t2LDRBi12: case ARM::t2LDRBi8: |
Johnny Chen | ef37e3a | 2010-04-20 17:28:50 +0000 | [diff] [blame] | 267 | case ARM::t2LDRBs: case ARM::t2LDRBT: |
Owen Anderson | 971b83b | 2011-02-08 22:39:40 +0000 | [diff] [blame] | 268 | return ARM::t2LDRBpci; |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 269 | |
| 270 | case ARM::t2LDRH_POST: case ARM::t2LDRH_PRE: |
| 271 | case ARM::t2LDRHi12: case ARM::t2LDRHi8: |
Johnny Chen | ef37e3a | 2010-04-20 17:28:50 +0000 | [diff] [blame] | 272 | case ARM::t2LDRHs: case ARM::t2LDRHT: |
Owen Anderson | 971b83b | 2011-02-08 22:39:40 +0000 | [diff] [blame] | 273 | return ARM::t2LDRHpci; |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 274 | |
| 275 | case ARM::t2LDRSB_POST: case ARM::t2LDRSB_PRE: |
| 276 | case ARM::t2LDRSBi12: case ARM::t2LDRSBi8: |
Johnny Chen | ef37e3a | 2010-04-20 17:28:50 +0000 | [diff] [blame] | 277 | case ARM::t2LDRSBs: case ARM::t2LDRSBT: |
Owen Anderson | 971b83b | 2011-02-08 22:39:40 +0000 | [diff] [blame] | 278 | return ARM::t2LDRSBpci; |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 279 | |
| 280 | case ARM::t2LDRSH_POST: case ARM::t2LDRSH_PRE: |
| 281 | case ARM::t2LDRSHi12: case ARM::t2LDRSHi8: |
Johnny Chen | ef37e3a | 2010-04-20 17:28:50 +0000 | [diff] [blame] | 282 | case ARM::t2LDRSHs: case ARM::t2LDRSHT: |
Owen Anderson | 971b83b | 2011-02-08 22:39:40 +0000 | [diff] [blame] | 283 | return ARM::t2LDRSHpci; |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 284 | } |
| 285 | } |
| 286 | |
| 287 | /// decodeThumbSideEffect is a decorator function which can potentially twiddle |
| 288 | /// the instruction or morph the returned opcode under Thumb2. |
| 289 | /// |
| 290 | /// First it checks whether the insn is a NEON or VFP instr; if true, bit |
| 291 | /// twiddling could be performed on insn to turn it into an ARM NEON/VFP |
| 292 | /// equivalent instruction and decodeInstruction is called with the transformed |
| 293 | /// insn. |
| 294 | /// |
| 295 | /// Next, there is special handling for Load byte/halfword/word instruction by |
| 296 | /// checking whether Rn=0b1111 and call T2Morph2LoadLiteral() on the decoded |
| 297 | /// Thumb2 instruction. See comments below for further details. |
| 298 | /// |
| 299 | /// Finally, one last check is made to see whether the insn is a NEON/VFP and |
| 300 | /// decodeInstruction(insn) is invoked on the original insn. |
| 301 | /// |
| 302 | /// Otherwise, decodeThumbInstruction is called with the original insn. |
NAKAMURA Takumi | 186acea | 2010-09-08 04:48:17 +0000 | [diff] [blame] | 303 | static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) { |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 304 | if (IsThumb2) { |
| 305 | uint16_t op1 = slice(insn, 28, 27); |
| 306 | uint16_t op2 = slice(insn, 26, 20); |
| 307 | |
| 308 | // A6.3 32-bit Thumb instruction encoding |
| 309 | // Table A6-9 32-bit Thumb instruction encoding |
| 310 | |
| 311 | // The coprocessor instructions of interest are transformed to their ARM |
| 312 | // equivalents. |
| 313 | |
| 314 | // --------- Transform Begin Marker --------- |
| 315 | if ((op1 == 1 || op1 == 3) && slice(op2, 6, 4) == 7) { |
| 316 | // A7.4 Advanced SIMD data-processing instructions |
| 317 | // U bit of Thumb corresponds to Inst{24} of ARM. |
| 318 | uint16_t U = slice(op1, 1, 1); |
| 319 | |
| 320 | // Inst{28-24} of ARM = {1,0,0,1,U}; |
| 321 | uint16_t bits28_24 = 9 << 1 | U; |
| 322 | DEBUG(showBitVector(errs(), insn)); |
| 323 | setSlice(insn, 28, 24, bits28_24); |
| 324 | return decodeInstruction(insn); |
| 325 | } |
| 326 | |
| 327 | if (op1 == 3 && slice(op2, 6, 4) == 1 && slice(op2, 0, 0) == 0) { |
| 328 | // A7.7 Advanced SIMD element or structure load/store instructions |
| 329 | // Inst{27-24} of Thumb = 0b1001 |
| 330 | // Inst{27-24} of ARM = 0b0100 |
| 331 | DEBUG(showBitVector(errs(), insn)); |
| 332 | setSlice(insn, 27, 24, 4); |
| 333 | return decodeInstruction(insn); |
| 334 | } |
| 335 | // --------- Transform End Marker --------- |
| 336 | |
| 337 | // See, for example, A6.3.7 Load word: Table A6-18 Load word. |
| 338 | // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode |
| 339 | // before returning it to our caller. |
| 340 | if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1 |
| 341 | && slice(insn, 19, 16) == 15) |
| 342 | return T2Morph2LoadLiteral(decodeThumbInstruction(insn)); |
| 343 | |
| 344 | // One last check for NEON/VFP instructions. |
| 345 | if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1) |
| 346 | return decodeInstruction(insn); |
| 347 | |
| 348 | // Fall through. |
| 349 | } |
| 350 | |
| 351 | return decodeThumbInstruction(insn); |
| 352 | } |
| 353 | |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 354 | // |
| 355 | // Public interface for the disassembler |
| 356 | // |
| 357 | |
| 358 | bool ARMDisassembler::getInstruction(MCInst &MI, |
| 359 | uint64_t &Size, |
| 360 | const MemoryObject &Region, |
| 361 | uint64_t Address, |
| 362 | raw_ostream &os) const { |
| 363 | // The machine instruction. |
| 364 | uint32_t insn; |
Johnny Chen | 9d563b6 | 2010-04-05 04:46:17 +0000 | [diff] [blame] | 365 | uint8_t bytes[4]; |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 366 | |
| 367 | // We want to read exactly 4 bytes of data. |
Johnny Chen | 9d563b6 | 2010-04-05 04:46:17 +0000 | [diff] [blame] | 368 | if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 369 | return false; |
| 370 | |
Johnny Chen | 9d563b6 | 2010-04-05 04:46:17 +0000 | [diff] [blame] | 371 | // Encoded as a small-endian 32-bit word in the stream. |
| 372 | insn = (bytes[3] << 24) | |
| 373 | (bytes[2] << 16) | |
| 374 | (bytes[1] << 8) | |
| 375 | (bytes[0] << 0); |
Johnny Chen | 7fb053d | 2010-04-05 04:51:50 +0000 | [diff] [blame] | 376 | |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 377 | unsigned Opcode = decodeARMInstruction(insn); |
| 378 | ARMFormat Format = ARMFormats[Opcode]; |
| 379 | Size = 4; |
| 380 | |
| 381 | DEBUG({ |
Johnny Chen | c59c87c | 2011-03-22 23:49:46 +0000 | [diff] [blame] | 382 | errs() << "\nOpcode=" << Opcode << " Name=" <<ARMUtils::OpcodeName(Opcode) |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 383 | << " Format=" << stringForARMFormat(Format) << '(' << (int)Format |
| 384 | << ")\n"; |
| 385 | showBitVector(errs(), insn); |
| 386 | }); |
| 387 | |
Benjamin Kramer | 83ccbff | 2011-03-24 21:14:28 +0000 | [diff] [blame] | 388 | OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format)); |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 389 | if (!Builder) |
| 390 | return false; |
| 391 | |
| 392 | if (!Builder->Build(MI, insn)) |
| 393 | return false; |
| 394 | |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 395 | return true; |
| 396 | } |
| 397 | |
| 398 | bool ThumbDisassembler::getInstruction(MCInst &MI, |
| 399 | uint64_t &Size, |
| 400 | const MemoryObject &Region, |
| 401 | uint64_t Address, |
| 402 | raw_ostream &os) const { |
Johnny Chen | 9d563b6 | 2010-04-05 04:46:17 +0000 | [diff] [blame] | 403 | // The Thumb instruction stream is a sequence of halhwords. |
| 404 | |
| 405 | // This represents the first halfword as well as the machine instruction |
| 406 | // passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top |
| 407 | // halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to |
| 408 | // the top half followed by the second halfword. |
NAKAMURA Takumi | 186acea | 2010-09-08 04:48:17 +0000 | [diff] [blame] | 409 | unsigned insn = 0; |
Johnny Chen | 9d563b6 | 2010-04-05 04:46:17 +0000 | [diff] [blame] | 410 | // Possible second halfword. |
| 411 | uint16_t insn1 = 0; |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 412 | |
| 413 | // A6.1 Thumb instruction set encoding |
| 414 | // |
| 415 | // If bits [15:11] of the halfword being decoded take any of the following |
| 416 | // values, the halfword is the first halfword of a 32-bit instruction: |
| 417 | // o 0b11101 |
| 418 | // o 0b11110 |
| 419 | // o 0b11111. |
| 420 | // |
| 421 | // Otherwise, the halfword is a 16-bit instruction. |
| 422 | |
| 423 | // Read 2 bytes of data first. |
Johnny Chen | 9d563b6 | 2010-04-05 04:46:17 +0000 | [diff] [blame] | 424 | uint8_t bytes[2]; |
| 425 | if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1) |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 426 | return false; |
| 427 | |
Johnny Chen | 9d563b6 | 2010-04-05 04:46:17 +0000 | [diff] [blame] | 428 | // Encoded as a small-endian 16-bit halfword in the stream. |
| 429 | insn = (bytes[1] << 8) | bytes[0]; |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 430 | unsigned bits15_11 = slice(insn, 15, 11); |
| 431 | bool IsThumb2 = false; |
| 432 | |
| 433 | // 32-bit instructions if the bits [15:11] of the halfword matches |
| 434 | // { 0b11101 /* 0x1D */, 0b11110 /* 0x1E */, ob11111 /* 0x1F */ }. |
| 435 | if (bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) { |
| 436 | IsThumb2 = true; |
Johnny Chen | 9d563b6 | 2010-04-05 04:46:17 +0000 | [diff] [blame] | 437 | if (Region.readBytes(Address + 2, 2, (uint8_t*)bytes, NULL) == -1) |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 438 | return false; |
Johnny Chen | 9d563b6 | 2010-04-05 04:46:17 +0000 | [diff] [blame] | 439 | // Encoded as a small-endian 16-bit halfword in the stream. |
| 440 | insn1 = (bytes[1] << 8) | bytes[0]; |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 441 | insn = (insn << 16 | insn1); |
| 442 | } |
| 443 | |
| 444 | // The insn could potentially be bit-twiddled in order to be decoded as an ARM |
| 445 | // NEON/VFP opcode. In such case, the modified insn is later disassembled as |
| 446 | // an ARM NEON/VFP instruction. |
| 447 | // |
| 448 | // This is a short term solution for lack of encoding bits specified for the |
| 449 | // Thumb2 NEON/VFP instructions. The long term solution could be adding some |
| 450 | // infrastructure to have each instruction support more than one encodings. |
| 451 | // Which encoding is used would be based on which subtarget the compiler/ |
| 452 | // disassembler is working with at the time. This would allow the sharing of |
| 453 | // the NEON patterns between ARM and Thumb2, as well as potential greater |
| 454 | // sharing between the regular ARM instructions and the 32-bit wide Thumb2 |
| 455 | // instructions as well. |
| 456 | unsigned Opcode = decodeThumbSideEffect(IsThumb2, insn); |
| 457 | |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 458 | ARMFormat Format = ARMFormats[Opcode]; |
| 459 | Size = IsThumb2 ? 4 : 2; |
| 460 | |
| 461 | DEBUG({ |
| 462 | errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode) |
| 463 | << " Format=" << stringForARMFormat(Format) << '(' << (int)Format |
| 464 | << ")\n"; |
| 465 | showBitVector(errs(), insn); |
| 466 | }); |
| 467 | |
Johnny Chen | 8cb9886 | 2011-03-24 21:42:55 +0000 | [diff] [blame^] | 468 | OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format)); |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 469 | if (!Builder) |
| 470 | return false; |
| 471 | |
Johnny Chen | af5b0e8 | 2010-04-16 23:02:25 +0000 | [diff] [blame] | 472 | Builder->SetSession(const_cast<Session *>(&SO)); |
| 473 | |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 474 | if (!Builder->Build(MI, insn)) |
| 475 | return false; |
| 476 | |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 477 | return true; |
| 478 | } |
| 479 | |
| 480 | // A8.6.50 |
Johnny Chen | d0f3c46 | 2010-04-19 23:02:58 +0000 | [diff] [blame] | 481 | // Valid return values are {1, 2, 3, 4}, with 0 signifying an error condition. |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 482 | static unsigned short CountITSize(unsigned ITMask) { |
| 483 | // First count the trailing zeros of the IT mask. |
| 484 | unsigned TZ = CountTrailingZeros_32(ITMask); |
Johnny Chen | d0f3c46 | 2010-04-19 23:02:58 +0000 | [diff] [blame] | 485 | if (TZ > 3) { |
Johnny Chen | 6bcf52f | 2010-04-20 00:15:41 +0000 | [diff] [blame] | 486 | DEBUG(errs() << "Encoding error: IT Mask '0000'"); |
Johnny Chen | d0f3c46 | 2010-04-19 23:02:58 +0000 | [diff] [blame] | 487 | return 0; |
| 488 | } |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 489 | return (4 - TZ); |
| 490 | } |
| 491 | |
Johnny Chen | d0f3c46 | 2010-04-19 23:02:58 +0000 | [diff] [blame] | 492 | /// Init ITState. Note that at least one bit is always 1 in mask. |
| 493 | bool Session::InitIT(unsigned short bits7_0) { |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 494 | ITCounter = CountITSize(slice(bits7_0, 3, 0)); |
Johnny Chen | 6bcf52f | 2010-04-20 00:15:41 +0000 | [diff] [blame] | 495 | if (ITCounter == 0) |
| 496 | return false; |
| 497 | |
| 498 | // A8.6.50 IT |
| 499 | unsigned short FirstCond = slice(bits7_0, 7, 4); |
| 500 | if (FirstCond == 0xF) { |
| 501 | DEBUG(errs() << "Encoding error: IT FirstCond '1111'"); |
| 502 | return false; |
| 503 | } |
| 504 | if (FirstCond == 0xE && ITCounter != 1) { |
| 505 | DEBUG(errs() << "Encoding error: IT FirstCond '1110' && Mask != '1000'"); |
| 506 | return false; |
| 507 | } |
| 508 | |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 509 | ITState = bits7_0; |
Johnny Chen | 6bcf52f | 2010-04-20 00:15:41 +0000 | [diff] [blame] | 510 | |
| 511 | return true; |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 512 | } |
| 513 | |
| 514 | /// Update ITState if necessary. |
| 515 | void Session::UpdateIT() { |
| 516 | assert(ITCounter); |
| 517 | --ITCounter; |
| 518 | if (ITCounter == 0) |
| 519 | ITState = 0; |
| 520 | else { |
| 521 | unsigned short NewITState4_0 = slice(ITState, 4, 0) << 1; |
| 522 | setSlice(ITState, 4, 0, NewITState4_0); |
| 523 | } |
| 524 | } |
| 525 | |
| 526 | static MCDisassembler *createARMDisassembler(const Target &T) { |
| 527 | return new ARMDisassembler; |
| 528 | } |
| 529 | |
| 530 | static MCDisassembler *createThumbDisassembler(const Target &T) { |
| 531 | return new ThumbDisassembler; |
| 532 | } |
| 533 | |
Owen Anderson | 971b83b | 2011-02-08 22:39:40 +0000 | [diff] [blame] | 534 | extern "C" void LLVMInitializeARMDisassembler() { |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 535 | // Register the disassembler. |
Owen Anderson | 971b83b | 2011-02-08 22:39:40 +0000 | [diff] [blame] | 536 | TargetRegistry::RegisterMCDisassembler(TheARMTarget, |
Johnny Chen | b68a3ee | 2010-04-02 22:27:38 +0000 | [diff] [blame] | 537 | createARMDisassembler); |
| 538 | TargetRegistry::RegisterMCDisassembler(TheThumbTarget, |
| 539 | createThumbDisassembler); |
| 540 | } |
| 541 | |
Sean Callanan | 9899f70 | 2010-04-13 21:21:57 +0000 | [diff] [blame] | 542 | EDInstInfo *ARMDisassembler::getEDInfo() const { |
| 543 | return instInfoARM; |
| 544 | } |
| 545 | |
| 546 | EDInstInfo *ThumbDisassembler::getEDInfo() const { |
| 547 | return instInfoARM; |
| 548 | } |