Blame - lib/Target/ARM/Disassembler/ARMDisassembler.cpp - platform/external/llvm

blob: 52010cd057f22d2b4aa52ca468a075057b405724 [file] [log] [blame]

Johnny Chen	d30a98e	2010-03-16 16:36:54 +0000	[diff] [blame^]	1	//===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA ----- C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file is part of the ARM Disassembler.
				11	// It contains code to translate the data produced by the decoder into MCInsts.
				12	// Documentation for the disassembler can be found in ARMDisassembler.h.
				13	//
				14	//===----------------------------------------------------------------------===//
				15
				16	#define DEBUG_TYPE "arm-disassembler"
				17
				18	#include "ARMDisassembler.h"
				19	#include "ARMDisassemblerCore.h"
				20
				21	#include "llvm/MC/MCInst.h"
				22	#include "llvm/Target/TargetRegistry.h"
				23	#include "llvm/Support/Debug.h"
				24	#include "llvm/Support/MemoryObject.h"
				25	#include "llvm/Support/ErrorHandling.h"
				26	#include "llvm/Support/raw_ostream.h"
				27
				28	/// ARMDisassemblerTables.inc - ARMDisassemblerTables.inc is tblgen'ed from
				29	/// RISCDisassemblerEmitter.cpp TableGen backend. It contains:
				30	///
				31	/// o Mappings from opcode to ARM/Thumb instruction format
				32	///
				33	/// o static uint16_t decodeInstruction(uint32_t insn) - the decoding function
				34	/// for an ARM instruction.
				35	///
				36	/// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding
				37	/// function for a Thumb instruction.
				38	///
				39	#include "../ARMGenDisassemblerTables.inc"
				40
				41	namespace llvm {
				42
				43	namespace ARMDisassembler {
				44
				45	/// showBitVector - Use the raw_ostream to log a diagnostic message describing
				46	/// the inidividual bits of the instruction. This is a sample output:
				47	///
				48	/// 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
				49	/// -------------------------------------------------------------------------------------------------
				50	/// \| 1: 0: 1: 0\| 1: 0: 1: 0\| 1: 0: 1: 0\| 1: 0: 1: 0\| 1: 0: 1: 0\| 1: 0: 1: 0\| 1: 0: 1: 0\| 1: 0: 1: 0\|
				51	/// -------------------------------------------------------------------------------------------------
				52	///
				53	static inline void showBitVector(raw_ostream &os, const uint32_t &insn) {
				54	os << " 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 \n";
				55	os << "-------------------------------------------------------------------------------------------------\n";
				56	os << '\|';
				57	for (unsigned i = 32; i != 0; --i) {
				58	if (insn >> (i - 1) & 0x01)
				59	os << " 1";
				60	else
				61	os << " 0";
				62	os << (i%4 == 1 ? '\|' : ':');
				63	}
				64	os << '\n';
				65	os << "-------------------------------------------------------------------------------------------------\n";
				66	os << '\n';
				67	}
				68
				69	/// decodeARMInstruction is a decorator function which tries special cases of
				70	/// instruction matching before calling the auto-generated decoder function.
				71	static unsigned decodeARMInstruction(uint32_t &insn) {
				72	if (slice(insn, 31, 28) == 15)
				73	goto AutoGenedDecoder;
				74
				75	// Special case processing, if any, goes here....
				76
				77	// LLVM combines the offset mode of A8.6.197 & A8.6.198 into STRB.
				78	// The insufficient encoding information of the combined instruction confuses
				79	// the decoder wrt BFC/BFI. Therefore, we try to recover here.
				80	// For BFC, Inst{27-21} = 0b0111110 & Inst{6-0} = 0b0011111.
				81	// For BFI, Inst{27-21} = 0b0111110 & Inst{6-4} = 0b001 & Inst{3-0} =! 0b1111.
				82	if (slice(insn, 27, 21) == 0x3e && slice(insn, 6, 4) == 1) {
				83	if (slice(insn, 3, 0) == 15)
				84	return ARM::BFC;
				85	else
				86	return ARM::BFI;
				87	}
				88
				89	// Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8.
				90	// As a result, the decoder fails to decode UMULL properly.
				91	if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) {
				92	return ARM::UMULL;
				93	}
				94
				95	// Ditto for STR_PRE, which is a super-instruction for A8.6.194 & A8.6.195.
				96	// As a result, the decoder fails to decode SBFX properly.
				97	if (slice(insn, 27, 21) == 0x3d && slice(insn, 6, 4) == 5)
				98	return ARM::SBFX;
				99
				100	// And STRB_PRE, which is a super-instruction for A8.6.197 & A8.6.198.
				101	// As a result, the decoder fails to decode UBFX properly.
				102	if (slice(insn, 27, 21) == 0x3f && slice(insn, 6, 4) == 5)
				103	return ARM::UBFX;
				104
				105	// Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2.
				106	// As a result, the decoder fails to deocode SSAT properly.
				107	if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1)
				108	return slice(insn, 6, 6) == 0 ? ARM::SSATlsl : ARM::SSATasr;
				109
				110	// Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147.
				111	// As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT.
				112	if (slice(insn, 27, 24) == 0) {
				113	switch (slice(insn, 21, 20)) {
				114	case 2:
				115	switch (slice(insn, 7, 4)) {
				116	case 11:
				117	return ARM::STRHT;
				118	default:
				119	break; // fallthrough
				120	}
				121	break;
				122	case 3:
				123	switch (slice(insn, 7, 4)) {
				124	case 11:
				125	return ARM::LDRHT;
				126	case 13:
				127	return ARM::LDRSBT;
				128	case 15:
				129	return ARM::LDRSHT;
				130	default:
				131	break; // fallthrough
				132	}
				133	break;
				134	default:
				135	break; // fallthrough
				136	}
				137	}
				138
				139	// Ditto for SBCrs, which is a super-instruction for A8.6.152 & A8.6.153.
				140	// As a result, the decoder fails to decode STRH_Post/LDRD_POST/STRD_POST
				141	// properly.
				142	if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 0) {
				143	unsigned PW = slice(insn, 24, 24) << 1 \| slice(insn, 21, 21);
				144	switch (slice(insn, 7, 4)) {
				145	case 11:
				146	switch (PW) {
				147	case 2: // Offset
				148	return ARM::STRH;
				149	case 3: // Pre-indexed
				150	return ARM::STRH_PRE;
				151	case 0: // Post-indexed
				152	return ARM::STRH_POST;
				153	default:
				154	break; // fallthrough
				155	}
				156	break;
				157	case 13:
				158	switch (PW) {
				159	case 2: // Offset
				160	return ARM::LDRD;
				161	case 3: // Pre-indexed
				162	return ARM::LDRD_PRE;
				163	case 0: // Post-indexed
				164	return ARM::LDRD_POST;
				165	default:
				166	break; // fallthrough
				167	}
				168	break;
				169	case 15:
				170	switch (PW) {
				171	case 2: // Offset
				172	return ARM::STRD;
				173	case 3: // Pre-indexed
				174	return ARM::STRD_PRE;
				175	case 0: // Post-indexed
				176	return ARM::STRD_POST;
				177	default:
				178	break; // fallthrough
				179	}
				180	break;
				181	default:
				182	break; // fallthrough
				183	}
				184	}
				185
				186	// Ditto for SBCSSrs, which is a super-instruction for A8.6.152 & A8.6.153.
				187	// As a result, the decoder fails to decode LDRH_POST/LDRSB_POST/LDRSH_POST
				188	// properly.
				189	if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 1) {
				190	unsigned PW = slice(insn, 24, 24) << 1 \| slice(insn, 21, 21);
				191	switch (slice(insn, 7, 4)) {
				192	case 11:
				193	switch (PW) {
				194	case 2: // Offset
				195	return ARM::LDRH;
				196	case 3: // Pre-indexed
				197	return ARM::LDRH_PRE;
				198	case 0: // Post-indexed
				199	return ARM::LDRH_POST;
				200	default:
				201	break; // fallthrough
				202	}
				203	break;
				204	case 13:
				205	switch (PW) {
				206	case 2: // Offset
				207	return ARM::LDRSB;
				208	case 3: // Pre-indexed
				209	return ARM::LDRSB_PRE;
				210	case 0: // Post-indexed
				211	return ARM::LDRSB_POST;
				212	default:
				213	break; // fallthrough
				214	}
				215	break;
				216	case 15:
				217	switch (PW) {
				218	case 2: // Offset
				219	return ARM::LDRSH;
				220	case 3: // Pre-indexed
				221	return ARM::LDRSH_PRE;
				222	case 0: // Post-indexed
				223	return ARM::LDRSH_POST;
				224	default:
				225	break; // fallthrough
				226	}
				227	break;
				228	default:
				229	break; // fallthrough
				230	}
				231	}
				232
				233	AutoGenedDecoder:
				234	// Calling the auto-generated decoder function.
				235	return decodeInstruction(insn);
				236	}
				237
				238	// Helper function for special case handling of LDR (literal) and friends.
				239	// See, for example, A6.3.7 Load word: Table A6-18 Load word.
				240	// See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
				241	// before passing it on.
				242	static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
				243	switch (Opcode) {
				244	default:
				245	return Opcode; // Return unmorphed opcode.
				246
				247	case ARM::t2LDRDi8:
				248	return ARM::t2LDRDpci;
				249
				250	case ARM::t2LDR_POST: case ARM::t2LDR_PRE:
				251	case ARM::t2LDRi12: case ARM::t2LDRi8:
				252	case ARM::t2LDRs:
				253	return ARM::t2LDRpci;
				254
				255	case ARM::t2LDRB_POST: case ARM::t2LDRB_PRE:
				256	case ARM::t2LDRBi12: case ARM::t2LDRBi8:
				257	case ARM::t2LDRBs:
				258	return ARM::t2LDRBpci;
				259
				260	case ARM::t2LDRH_POST: case ARM::t2LDRH_PRE:
				261	case ARM::t2LDRHi12: case ARM::t2LDRHi8:
				262	case ARM::t2LDRHs:
				263	return ARM::t2LDRHpci;
				264
				265	case ARM::t2LDRSB_POST: case ARM::t2LDRSB_PRE:
				266	case ARM::t2LDRSBi12: case ARM::t2LDRSBi8:
				267	case ARM::t2LDRSBs:
				268	return ARM::t2LDRSBpci;
				269
				270	case ARM::t2LDRSH_POST: case ARM::t2LDRSH_PRE:
				271	case ARM::t2LDRSHi12: case ARM::t2LDRSHi8:
				272	case ARM::t2LDRSHs:
				273	return ARM::t2LDRSHpci;
				274	}
				275	}
				276
				277	/// decodeThumbSideEffect is a decorator function which can potentially twiddle
				278	/// the instruction or morph the returned opcode under Thumb2.
				279	///
				280	/// First it checks whether the insn is a NEON or VFP instr; if true, bit
				281	/// twiddling could be performed on insn to turn it into an ARM NEON/VFP
				282	/// equivalent instruction and decodeInstruction is called with the transformed
				283	/// insn.
				284	///
				285	/// Next, there is special handling for Load byte/halfword/word instruction by
				286	/// checking whether Rn=0b1111 and call T2Morph2LoadLiteral() on the decoded
				287	/// Thumb2 instruction. See comments below for further details.
				288	///
				289	/// Finally, one last check is made to see whether the insn is a NEON/VFP and
				290	/// decodeInstruction(insn) is invoked on the original insn.
				291	///
				292	/// Otherwise, decodeThumbInstruction is called with the original insn.
				293	static unsigned decodeThumbSideEffect(bool IsThumb2, uint32_t &insn) {
				294	if (IsThumb2) {
				295	uint16_t op1 = slice(insn, 28, 27);
				296	uint16_t op2 = slice(insn, 26, 20);
				297
				298	// A6.3 32-bit Thumb instruction encoding
				299	// Table A6-9 32-bit Thumb instruction encoding
				300
				301	// The coprocessor instructions of interest are transformed to their ARM
				302	// equivalents.
				303
				304	// --------- Transform Begin Marker ---------
				305	if ((op1 == 1 \|\| op1 == 3) && slice(op2, 6, 4) == 7) {
				306	// A7.4 Advanced SIMD data-processing instructions
				307	// U bit of Thumb corresponds to Inst{24} of ARM.
				308	uint16_t U = slice(op1, 1, 1);
				309
				310	// Inst{28-24} of ARM = {1,0,0,1,U};
				311	uint16_t bits28_24 = 9 << 1 \| U;
				312	DEBUG(showBitVector(errs(), insn));
				313	setSlice(insn, 28, 24, bits28_24);
				314	return decodeInstruction(insn);
				315	}
				316
				317	if (op1 == 3 && slice(op2, 6, 4) == 1 && slice(op2, 0, 0) == 0) {
				318	// A7.7 Advanced SIMD element or structure load/store instructions
				319	// Inst{27-24} of Thumb = 0b1001
				320	// Inst{27-24} of ARM = 0b0100
				321	DEBUG(showBitVector(errs(), insn));
				322	setSlice(insn, 27, 24, 4);
				323	return decodeInstruction(insn);
				324	}
				325	// --------- Transform End Marker ---------
				326
				327	// See, for example, A6.3.7 Load word: Table A6-18 Load word.
				328	// See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
				329	// before passing it on to our delegate.
				330	if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1
				331	&& slice(insn, 19, 16) == 15)
				332	return T2Morph2LoadLiteral(decodeThumbInstruction(insn));
				333
				334	// One last check for NEON/VFP instructions.
				335	if ((op1 == 1 \|\| op1 == 3) && slice(op2, 6, 6) == 1)
				336	return decodeInstruction(insn);
				337
				338	// Fall through.
				339	}
				340
				341	return decodeThumbInstruction(insn);
				342	}
				343
				344	static inline bool Thumb2PreloadOpcodeNoPCI(unsigned Opcode) {
				345	switch (Opcode) {
				346	default:
				347	return false;
				348	case ARM::t2PLDi12: case ARM::t2PLDi8:
				349	case ARM::t2PLDr: case ARM::t2PLDs:
				350	case ARM::t2PLDWi12: case ARM::t2PLDWi8:
				351	case ARM::t2PLDWr: case ARM::t2PLDWs:
				352	case ARM::t2PLIi12: case ARM::t2PLIi8:
				353	case ARM::t2PLIr: case ARM::t2PLIs:
				354	return true;
				355	}
				356	}
				357
				358	static inline unsigned T2Morph2Preload2PCI(unsigned Opcode) {
				359	switch (Opcode) {
				360	default:
				361	return 0;
				362	case ARM::t2PLDi12: case ARM::t2PLDi8:
				363	case ARM::t2PLDr: case ARM::t2PLDs:
				364	return ARM::t2PLDpci;
				365	case ARM::t2PLDWi12: case ARM::t2PLDWi8:
				366	case ARM::t2PLDWr: case ARM::t2PLDWs:
				367	return ARM::t2PLDWpci;
				368	case ARM::t2PLIi12: case ARM::t2PLIi8:
				369	case ARM::t2PLIr: case ARM::t2PLIs:
				370	return ARM::t2PLIpci;
				371	}
				372	}
				373
				374	//
				375	// Public interface for the disassembler
				376	//
				377
				378	bool ARMDisassembler::getInstruction(MCInst &MI,
				379	uint64_t &Size,
				380	const MemoryObject &Region,
				381	uint64_t Address,
				382	raw_ostream &os) const {
				383	// The machine instruction.
				384	uint32_t insn;
				385
				386	// We want to read exactly 4 bytes of data.
				387	if (Region.readBytes(Address, 4, (uint8_t*)&insn, NULL) == -1)
				388	return false;
				389
				390	unsigned Opcode = decodeARMInstruction(insn);
				391	ARMFormat Format = ARMFormats[Opcode];
				392	NSFormat NSF = NSFormats[Opcode];
				393	Size = 4;
				394
				395	DEBUG({
				396	errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
				397	<< " Format=" << stringForARMFormat(Format) << " NSFormat="
				398	<< stringForNSFormat(NSF) << '\n';
				399	showBitVector(errs(), insn);
				400	});
				401
				402	AbstractARMMCBuilder *Builder =
				403	ARMMCBuilderFactory::CreateMCBuilder(Opcode, Format, NSF);
				404
				405	if (!Builder)
				406	return false;
				407
				408	if (!Builder->Build(MI, insn))
				409	return false;
				410
				411	delete Builder;
				412
				413	return true;
				414	}
				415
				416	bool ThumbDisassembler::getInstruction(MCInst &MI,
				417	uint64_t &Size,
				418	const MemoryObject &Region,
				419	uint64_t Address,
				420	raw_ostream &os) const {
				421	// The machine instruction.
				422	uint32_t insn = 0;
				423	uint32_t insn1 = 0;
				424
				425	// A6.1 Thumb instruction set encoding
				426	//
				427	// If bits [15:11] of the halfword being decoded take any of the following
				428	// values, the halfword is the first halfword of a 32-bit instruction:
				429	// o 0b11101
				430	// o 0b11110
				431	// o 0b11111.
				432	//
				433	// Otherwise, the halfword is a 16-bit instruction.
				434
				435	// Read 2 bytes of data first.
				436	if (Region.readBytes(Address, 2, (uint8_t*)&insn, NULL) == -1)
				437	return false;
				438
				439	unsigned bits15_11 = slice(insn, 15, 11);
				440	bool IsThumb2 = false;
				441
				442	// 32-bit instructions if the bits [15:11] of the halfword matches
				443	// { 0b11101 /* 0x1D /, 0b11110 / 0x1E /, ob11111 / 0x1F */ }.
				444	if (bits15_11 == 0x1D \|\| bits15_11 == 0x1E \|\| bits15_11 == 0x1F) {
				445	IsThumb2 = true;
				446	if (Region.readBytes(Address + 2, 2, (uint8_t*)&insn1, NULL) == -1)
				447	return false;
				448	insn = (insn << 16 \| insn1);
				449	}
				450
				451	// The insn could potentially be bit-twiddled in order to be decoded as an ARM
				452	// NEON/VFP opcode. In such case, the modified insn is later disassembled as
				453	// an ARM NEON/VFP instruction.
				454	//
				455	// This is a short term solution for lack of encoding bits specified for the
				456	// Thumb2 NEON/VFP instructions. The long term solution could be adding some
				457	// infrastructure to have each instruction support more than one encodings.
				458	// Which encoding is used would be based on which subtarget the compiler/
				459	// disassembler is working with at the time. This would allow the sharing of
				460	// the NEON patterns between ARM and Thumb2, as well as potential greater
				461	// sharing between the regular ARM instructions and the 32-bit wide Thumb2
				462	// instructions as well.
				463	unsigned Opcode = decodeThumbSideEffect(IsThumb2, insn);
				464
				465	// A8.6.117/119/120/121.
				466	// PLD/PLDW/PLI instructions with Rn==15 is transformed to the pci variant.
				467	if (Thumb2PreloadOpcodeNoPCI(Opcode) && slice(insn, 19, 16) == 15)
				468	Opcode = T2Morph2Preload2PCI(Opcode);
				469
				470	ARMFormat Format = ARMFormats[Opcode];
				471	NSFormat NSF = NSFormats[Opcode];
				472	Size = IsThumb2 ? 4 : 2;
				473
				474	DEBUG({
				475	errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
				476	<< " Format=" << stringForARMFormat(Format) << " NSFormat="
				477	<< stringForNSFormat(NSF) << '\n';
				478	showBitVector(errs(), insn);
				479	});
				480
				481	AbstractARMMCBuilder *Builder =
				482	ARMMCBuilderFactory::CreateMCBuilder(Opcode, Format, NSF);
				483
				484	if (!Builder)
				485	return false;
				486
				487	if (!Builder->Build(MI, insn))
				488	return false;
				489
				490	delete Builder;
				491
				492	return true;
				493	}
				494
				495	} // namespace ARM Disassembler
				496
				497	static const MCDisassembler *createARMDisassembler(const Target &T) {
				498	return new ARMDisassembler::ARMDisassembler;
				499	}
				500
				501	static const MCDisassembler *createThumbDisassembler(const Target &T) {
				502	return new ARMDisassembler::ThumbDisassembler;
				503	}
				504
				505	extern "C" void LLVMInitializeARMDisassembler() {
				506	// Register the disassembler.
				507	TargetRegistry::RegisterMCDisassembler(TheARMTarget,
				508	createARMDisassembler);
				509	TargetRegistry::RegisterMCDisassembler(TheThumbTarget,
				510	createThumbDisassembler);
				511	}
				512
				513	} // namespace llvm