Blame - lld/ELF/Arch/X86_64.cpp - toolchain/llvm-project

blob: b790868c71254ac767c259f3bc24d11c1a44eff5 [file] [log] [blame]

Rui Ueyama	21c0a9c	2017-06-16 17:32:43 +0000	[diff] [blame^]	1	//===- X86_64.cpp ---------------------------------------------------------===//
				2	//
				3	// The LLVM Linker
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9
				10	#include "Error.h"
				11	#include "InputFiles.h"
				12	#include "Memory.h"
				13	#include "Symbols.h"
				14	#include "SyntheticSections.h"
				15	#include "Target.h"
				16	#include "llvm/Object/ELF.h"
				17	#include "llvm/Support/Endian.h"
				18
				19	using namespace llvm;
				20	using namespace llvm::object;
				21	using namespace llvm::support::endian;
				22	using namespace llvm::ELF;
				23	using namespace lld;
				24	using namespace lld::elf;
				25
				26	namespace {
				27	template <class ELFT> class X86_64 final : public TargetInfo {
				28	public:
				29	X86_64();
				30	RelExpr getRelExpr(uint32_t Type, const SymbolBody &S,
				31	const uint8_t *Loc) const override;
				32	bool isPicRel(uint32_t Type) const override;
				33	void writeGotPltHeader(uint8_t *Buf) const override;
				34	void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override;
				35	void writePltHeader(uint8_t *Buf) const override;
				36	void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
				37	int32_t Index, unsigned RelOff) const override;
				38	void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override;
				39
				40	RelExpr adjustRelaxExpr(uint32_t Type, const uint8_t *Data,
				41	RelExpr Expr) const override;
				42	void relaxGot(uint8_t *Loc, uint64_t Val) const override;
				43	void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override;
				44	void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override;
				45	void relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override;
				46	void relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override;
				47
				48	private:
				49	void relaxGotNoPic(uint8_t *Loc, uint64_t Val, uint8_t Op,
				50	uint8_t ModRm) const;
				51	};
				52	} // namespace
				53
				54	template <class ELFT> X86_64<ELFT>::X86_64() {
				55	CopyRel = R_X86_64_COPY;
				56	GotRel = R_X86_64_GLOB_DAT;
				57	PltRel = R_X86_64_JUMP_SLOT;
				58	RelativeRel = R_X86_64_RELATIVE;
				59	IRelativeRel = R_X86_64_IRELATIVE;
				60	TlsGotRel = R_X86_64_TPOFF64;
				61	TlsModuleIndexRel = R_X86_64_DTPMOD64;
				62	TlsOffsetRel = R_X86_64_DTPOFF64;
				63	GotEntrySize = 8;
				64	GotPltEntrySize = 8;
				65	PltEntrySize = 16;
				66	PltHeaderSize = 16;
				67	TlsGdRelaxSkip = 2;
				68
				69	// Align to the large page size (known as a superpage or huge page).
				70	// FreeBSD automatically promotes large, superpage-aligned allocations.
				71	DefaultImageBase = 0x200000;
				72
				73	// 0xCC is the "int3" (call debug exception handler) instruction.
				74	TrapInstr = 0xcccccccc;
				75	}
				76
				77	template <class ELFT>
				78	RelExpr X86_64<ELFT>::getRelExpr(uint32_t Type, const SymbolBody &S,
				79	const uint8_t *Loc) const {
				80	switch (Type) {
				81	case R_X86_64_8:
				82	case R_X86_64_16:
				83	case R_X86_64_32:
				84	case R_X86_64_32S:
				85	case R_X86_64_64:
				86	case R_X86_64_DTPOFF32:
				87	case R_X86_64_DTPOFF64:
				88	return R_ABS;
				89	case R_X86_64_TPOFF32:
				90	return R_TLS;
				91	case R_X86_64_TLSLD:
				92	return R_TLSLD_PC;
				93	case R_X86_64_TLSGD:
				94	return R_TLSGD_PC;
				95	case R_X86_64_SIZE32:
				96	case R_X86_64_SIZE64:
				97	return R_SIZE;
				98	case R_X86_64_PLT32:
				99	return R_PLT_PC;
				100	case R_X86_64_PC32:
				101	case R_X86_64_PC64:
				102	return R_PC;
				103	case R_X86_64_GOT32:
				104	case R_X86_64_GOT64:
				105	return R_GOT_FROM_END;
				106	case R_X86_64_GOTPCREL:
				107	case R_X86_64_GOTPCRELX:
				108	case R_X86_64_REX_GOTPCRELX:
				109	case R_X86_64_GOTTPOFF:
				110	return R_GOT_PC;
				111	case R_X86_64_NONE:
				112	return R_NONE;
				113	default:
				114	error(toString(S.File) + ": unknown relocation type: " + toString(Type));
				115	return R_HINT;
				116	}
				117	}
				118
				119	template <class ELFT> void X86_64<ELFT>::writeGotPltHeader(uint8_t *Buf) const {
				120	// The first entry holds the value of _DYNAMIC. It is not clear why that is
				121	// required, but it is documented in the psabi and the glibc dynamic linker
				122	// seems to use it (note that this is relevant for linking ld.so, not any
				123	// other program).
				124	write64le(Buf, InX::Dynamic->getVA());
				125	}
				126
				127	template <class ELFT>
				128	void X86_64<ELFT>::writeGotPlt(uint8_t *Buf, const SymbolBody &S) const {
				129	// See comments in X86TargetInfo::writeGotPlt.
				130	write32le(Buf, S.getPltVA() + 6);
				131	}
				132
				133	template <class ELFT> void X86_64<ELFT>::writePltHeader(uint8_t *Buf) const {
				134	const uint8_t PltData[] = {
				135	0xff, 0x35, 0x00, 0x00, 0x00, 0x00, // pushq GOTPLT+8(%rip)
				136	0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *GOTPLT+16(%rip)
				137	0x0f, 0x1f, 0x40, 0x00 // nop
				138	};
				139	memcpy(Buf, PltData, sizeof(PltData));
				140	uint64_t GotPlt = InX::GotPlt->getVA();
				141	uint64_t Plt = InX::Plt->getVA();
				142	write32le(Buf + 2, GotPlt - Plt + 2); // GOTPLT+8
				143	write32le(Buf + 8, GotPlt - Plt + 4); // GOTPLT+16
				144	}
				145
				146	template <class ELFT>
				147	void X86_64<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
				148	uint64_t PltEntryAddr, int32_t Index,
				149	unsigned RelOff) const {
				150	const uint8_t Inst[] = {
				151	0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmpq *got(%rip)
				152	0x68, 0x00, 0x00, 0x00, 0x00, // pushq <relocation index>
				153	0xe9, 0x00, 0x00, 0x00, 0x00 // jmpq plt[0]
				154	};
				155	memcpy(Buf, Inst, sizeof(Inst));
				156
				157	write32le(Buf + 2, GotPltEntryAddr - PltEntryAddr - 6);
				158	write32le(Buf + 7, Index);
				159	write32le(Buf + 12, -Index * PltEntrySize - PltHeaderSize - 16);
				160	}
				161
				162	template <class ELFT> bool X86_64<ELFT>::isPicRel(uint32_t Type) const {
				163	return Type != R_X86_64_PC32 && Type != R_X86_64_32 &&
				164	Type != R_X86_64_TPOFF32;
				165	}
				166
				167	template <class ELFT>
				168	void X86_64<ELFT>::relaxTlsGdToLe(uint8_t *Loc, uint32_t Type,
				169	uint64_t Val) const {
				170	// Convert
				171	// .byte 0x66
				172	// leaq x@tlsgd(%rip), %rdi
				173	// .word 0x6666
				174	// rex64
				175	// call __tls_get_addr@plt
				176	// to
				177	// mov %fs:0x0,%rax
				178	// lea x@tpoff,%rax
				179	const uint8_t Inst[] = {
				180	0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax
				181	0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00 // lea x@tpoff,%rax
				182	};
				183	memcpy(Loc - 4, Inst, sizeof(Inst));
				184
				185	// The original code used a pc relative relocation and so we have to
				186	// compensate for the -4 in had in the addend.
				187	write32le(Loc + 8, Val + 4);
				188	}
				189
				190	template <class ELFT>
				191	void X86_64<ELFT>::relaxTlsGdToIe(uint8_t *Loc, uint32_t Type,
				192	uint64_t Val) const {
				193	// Convert
				194	// .byte 0x66
				195	// leaq x@tlsgd(%rip), %rdi
				196	// .word 0x6666
				197	// rex64
				198	// call __tls_get_addr@plt
				199	// to
				200	// mov %fs:0x0,%rax
				201	// addq x@tpoff,%rax
				202	const uint8_t Inst[] = {
				203	0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax
				204	0x48, 0x03, 0x05, 0x00, 0x00, 0x00, 0x00 // addq x@tpoff,%rax
				205	};
				206	memcpy(Loc - 4, Inst, sizeof(Inst));
				207
				208	// Both code sequences are PC relatives, but since we are moving the constant
				209	// forward by 8 bytes we have to subtract the value by 8.
				210	write32le(Loc + 8, Val - 8);
				211	}
				212
				213	// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
				214	// R_X86_64_TPOFF32 so that it does not use GOT.
				215	template <class ELFT>
				216	void X86_64<ELFT>::relaxTlsIeToLe(uint8_t *Loc, uint32_t Type,
				217	uint64_t Val) const {
				218	uint8_t *Inst = Loc - 3;
				219	uint8_t Reg = Loc[-1] >> 3;
				220	uint8_t *RegSlot = Loc - 1;
				221
				222	// Note that ADD with RSP or R12 is converted to ADD instead of LEA
				223	// because LEA with these registers needs 4 bytes to encode and thus
				224	// wouldn't fit the space.
				225
				226	if (memcmp(Inst, "\x48\x03\x25", 3) == 0) {
				227	// "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
				228	memcpy(Inst, "\x48\x81\xc4", 3);
				229	} else if (memcmp(Inst, "\x4c\x03\x25", 3) == 0) {
				230	// "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12"
				231	memcpy(Inst, "\x49\x81\xc4", 3);
				232	} else if (memcmp(Inst, "\x4c\x03", 2) == 0) {
				233	// "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
				234	memcpy(Inst, "\x4d\x8d", 2);
				235	*RegSlot = 0x80 \| (Reg << 3) \| Reg;
				236	} else if (memcmp(Inst, "\x48\x03", 2) == 0) {
				237	// "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
				238	memcpy(Inst, "\x48\x8d", 2);
				239	*RegSlot = 0x80 \| (Reg << 3) \| Reg;
				240	} else if (memcmp(Inst, "\x4c\x8b", 2) == 0) {
				241	// "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
				242	memcpy(Inst, "\x49\xc7", 2);
				243	*RegSlot = 0xc0 \| Reg;
				244	} else if (memcmp(Inst, "\x48\x8b", 2) == 0) {
				245	// "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg"
				246	memcpy(Inst, "\x48\xc7", 2);
				247	*RegSlot = 0xc0 \| Reg;
				248	} else {
				249	error(getErrorLocation(Loc - 3) +
				250	"R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only");
				251	}
				252
				253	// The original code used a PC relative relocation.
				254	// Need to compensate for the -4 it had in the addend.
				255	write32le(Loc, Val + 4);
				256	}
				257
				258	template <class ELFT>
				259	void X86_64<ELFT>::relaxTlsLdToLe(uint8_t *Loc, uint32_t Type,
				260	uint64_t Val) const {
				261	// Convert
				262	// leaq bar@tlsld(%rip), %rdi
				263	// callq __tls_get_addr@PLT
				264	// leaq bar@dtpoff(%rax), %rcx
				265	// to
				266	// .word 0x6666
				267	// .byte 0x66
				268	// mov %fs:0,%rax
				269	// leaq bar@tpoff(%rax), %rcx
				270	if (Type == R_X86_64_DTPOFF64) {
				271	write64le(Loc, Val);
				272	return;
				273	}
				274	if (Type == R_X86_64_DTPOFF32) {
				275	write32le(Loc, Val);
				276	return;
				277	}
				278
				279	const uint8_t Inst[] = {
				280	0x66, 0x66, // .word 0x6666
				281	0x66, // .byte 0x66
				282	0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00 // mov %fs:0,%rax
				283	};
				284	memcpy(Loc - 3, Inst, sizeof(Inst));
				285	}
				286
				287	template <class ELFT>
				288	void X86_64<ELFT>::relocateOne(uint8_t *Loc, uint32_t Type,
				289	uint64_t Val) const {
				290	switch (Type) {
				291	case R_X86_64_8:
				292	checkUInt<8>(Loc, Val, Type);
				293	*Loc = Val;
				294	break;
				295	case R_X86_64_16:
				296	checkUInt<16>(Loc, Val, Type);
				297	write16le(Loc, Val);
				298	break;
				299	case R_X86_64_32:
				300	checkUInt<32>(Loc, Val, Type);
				301	write32le(Loc, Val);
				302	break;
				303	case R_X86_64_32S:
				304	case R_X86_64_TPOFF32:
				305	case R_X86_64_GOT32:
				306	case R_X86_64_GOTPCREL:
				307	case R_X86_64_GOTPCRELX:
				308	case R_X86_64_REX_GOTPCRELX:
				309	case R_X86_64_PC32:
				310	case R_X86_64_GOTTPOFF:
				311	case R_X86_64_PLT32:
				312	case R_X86_64_TLSGD:
				313	case R_X86_64_TLSLD:
				314	case R_X86_64_DTPOFF32:
				315	case R_X86_64_SIZE32:
				316	checkInt<32>(Loc, Val, Type);
				317	write32le(Loc, Val);
				318	break;
				319	case R_X86_64_64:
				320	case R_X86_64_DTPOFF64:
				321	case R_X86_64_GLOB_DAT:
				322	case R_X86_64_PC64:
				323	case R_X86_64_SIZE64:
				324	case R_X86_64_GOT64:
				325	write64le(Loc, Val);
				326	break;
				327	default:
				328	llvm_unreachable("unexpected relocation");
				329	}
				330	}
				331
				332	template <class ELFT>
				333	RelExpr X86_64<ELFT>::adjustRelaxExpr(uint32_t Type, const uint8_t *Data,
				334	RelExpr RelExpr) const {
				335	if (Type != R_X86_64_GOTPCRELX && Type != R_X86_64_REX_GOTPCRELX)
				336	return RelExpr;
				337	const uint8_t Op = Data[-2];
				338	const uint8_t ModRm = Data[-1];
				339
				340	// FIXME: When PIC is disabled and foo is defined locally in the
				341	// lower 32 bit address space, memory operand in mov can be converted into
				342	// immediate operand. Otherwise, mov must be changed to lea. We support only
				343	// latter relaxation at this moment.
				344	if (Op == 0x8b)
				345	return R_RELAX_GOT_PC;
				346
				347	// Relax call and jmp.
				348	if (Op == 0xff && (ModRm == 0x15 \|\| ModRm == 0x25))
				349	return R_RELAX_GOT_PC;
				350
				351	// Relaxation of test, adc, add, and, cmp, or, sbb, sub, xor.
				352	// If PIC then no relaxation is available.
				353	// We also don't relax test/binop instructions without REX byte,
				354	// they are 32bit operations and not common to have.
				355	assert(Type == R_X86_64_REX_GOTPCRELX);
				356	return Config->Pic ? RelExpr : R_RELAX_GOT_PC_NOPIC;
				357	}
				358
				359	// A subset of relaxations can only be applied for no-PIC. This method
				360	// handles such relaxations. Instructions encoding information was taken from:
				361	// "Intel 64 and IA-32 Architectures Software Developer's Manual V2"
				362	// (http://www.intel.com/content/dam/www/public/us/en/documents/manuals/
				363	// 64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf)
				364	template <class ELFT>
				365	void X86_64<ELFT>::relaxGotNoPic(uint8_t *Loc, uint64_t Val, uint8_t Op,
				366	uint8_t ModRm) const {
				367	const uint8_t Rex = Loc[-3];
				368	// Convert "test %reg, foo@GOTPCREL(%rip)" to "test $foo, %reg".
				369	if (Op == 0x85) {
				370	// See "TEST-Logical Compare" (4-428 Vol. 2B),
				371	// TEST r/m64, r64 uses "full" ModR / M byte (no opcode extension).
				372
				373	// ModR/M byte has form XX YYY ZZZ, where
				374	// YYY is MODRM.reg(register 2), ZZZ is MODRM.rm(register 1).
				375	// XX has different meanings:
				376	// 00: The operand's memory address is in reg1.
				377	// 01: The operand's memory address is reg1 + a byte-sized displacement.
				378	// 10: The operand's memory address is reg1 + a word-sized displacement.
				379	// 11: The operand is reg1 itself.
				380	// If an instruction requires only one operand, the unused reg2 field
				381	// holds extra opcode bits rather than a register code
				382	// 0xC0 == 11 000 000 binary.
				383	// 0x38 == 00 111 000 binary.
				384	// We transfer reg2 to reg1 here as operand.
				385	// See "2.1.3 ModR/M and SIB Bytes" (Vol. 2A 2-3).
				386	Loc[-1] = 0xc0 \| (ModRm & 0x38) >> 3; // ModR/M byte.
				387
				388	// Change opcode from TEST r/m64, r64 to TEST r/m64, imm32
				389	// See "TEST-Logical Compare" (4-428 Vol. 2B).
				390	Loc[-2] = 0xf7;
				391
				392	// Move R bit to the B bit in REX byte.
				393	// REX byte is encoded as 0100WRXB, where
				394	// 0100 is 4bit fixed pattern.
				395	// REX.W When 1, a 64-bit operand size is used. Otherwise, when 0, the
				396	// default operand size is used (which is 32-bit for most but not all
				397	// instructions).
				398	// REX.R This 1-bit value is an extension to the MODRM.reg field.
				399	// REX.X This 1-bit value is an extension to the SIB.index field.
				400	// REX.B This 1-bit value is an extension to the MODRM.rm field or the
				401	// SIB.base field.
				402	// See "2.2.1.2 More on REX Prefix Fields " (2-8 Vol. 2A).
				403	Loc[-3] = (Rex & ~0x4) \| (Rex & 0x4) >> 2;
				404	write32le(Loc, Val);
				405	return;
				406	}
				407
				408	// If we are here then we need to relax the adc, add, and, cmp, or, sbb, sub
				409	// or xor operations.
				410
				411	// Convert "binop foo@GOTPCREL(%rip), %reg" to "binop $foo, %reg".
				412	// Logic is close to one for test instruction above, but we also
				413	// write opcode extension here, see below for details.
				414	Loc[-1] = 0xc0 \| (ModRm & 0x38) >> 3 \| (Op & 0x3c); // ModR/M byte.
				415
				416	// Primary opcode is 0x81, opcode extension is one of:
				417	// 000b = ADD, 001b is OR, 010b is ADC, 011b is SBB,
				418	// 100b is AND, 101b is SUB, 110b is XOR, 111b is CMP.
				419	// This value was wrote to MODRM.reg in a line above.
				420	// See "3.2 INSTRUCTIONS (A-M)" (Vol. 2A 3-15),
				421	// "INSTRUCTION SET REFERENCE, N-Z" (Vol. 2B 4-1) for
				422	// descriptions about each operation.
				423	Loc[-2] = 0x81;
				424	Loc[-3] = (Rex & ~0x4) \| (Rex & 0x4) >> 2;
				425	write32le(Loc, Val);
				426	}
				427
				428	template <class ELFT>
				429	void X86_64<ELFT>::relaxGot(uint8_t *Loc, uint64_t Val) const {
				430	const uint8_t Op = Loc[-2];
				431	const uint8_t ModRm = Loc[-1];
				432
				433	// Convert "mov foo@GOTPCREL(%rip),%reg" to "lea foo(%rip),%reg".
				434	if (Op == 0x8b) {
				435	Loc[-2] = 0x8d;
				436	write32le(Loc, Val);
				437	return;
				438	}
				439
				440	if (Op != 0xff) {
				441	// We are relaxing a rip relative to an absolute, so compensate
				442	// for the old -4 addend.
				443	assert(!Config->Pic);
				444	relaxGotNoPic(Loc, Val + 4, Op, ModRm);
				445	return;
				446	}
				447
				448	// Convert call/jmp instructions.
				449	if (ModRm == 0x15) {
				450	// ABI says we can convert "call *foo@GOTPCREL(%rip)" to "nop; call foo".
				451	// Instead we convert to "addr32 call foo" where addr32 is an instruction
				452	// prefix. That makes result expression to be a single instruction.
				453	Loc[-2] = 0x67; // addr32 prefix
				454	Loc[-1] = 0xe8; // call
				455	write32le(Loc, Val);
				456	return;
				457	}
				458
				459	// Convert "jmp *foo@GOTPCREL(%rip)" to "jmp foo; nop".
				460	// jmp doesn't return, so it is fine to use nop here, it is just a stub.
				461	assert(ModRm == 0x25);
				462	Loc[-2] = 0xe9; // jmp
				463	Loc[3] = 0x90; // nop
				464	write32le(Loc - 1, Val + 1);
				465	}
				466
				467	TargetInfo *elf::createX32TargetInfo() { return make<X86_64<ELF32LE>>(); }
				468	TargetInfo *elf::createX86_64TargetInfo() { return make<X86_64<ELF64LE>>(); }