This patch implements basic variant of lazy loading for x86_x64 and for X86 targets. 

What was done:
1) .got.plt section is created for functions that requires PLT. .got.plt has 3 predefined empty entries now that are required for dynamic linker.
Also other new items created are configured to have correct jump to PLT[N].
2) PLT section now has PLT[0] entry, also others ones are configured to support PLT->GOT(.got.plt) calls.
3) Implemented .rel[a].plt sections (based on patch http://reviews.llvm.org/D13569).
4) Fixed plt relocations types (based on patch http://reviews.llvm.org/D13589).

NOTES:
The .plt.got zero entry is still empty now. According to ELF specification it should hold the address of the dynamic structure, referenced with the symbol
_DYNAMIC. The _DYNAMIC entry points to the .dynamic section which contains information used by the ELF interpreter to setup the binary.

Differential Revision: http://reviews.llvm.org/D13651

llvm-svn: 250169
diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp
index 7009b7f..7a98f44 100644
--- a/lld/ELF/Target.cpp
+++ b/lld/ELF/Target.cpp
@@ -55,16 +55,40 @@
   PCRelReloc = R_386_PC32;
   GotReloc = R_386_GLOB_DAT;
   GotRefReloc = R_386_GOT32;
+  PltReloc = R_386_JUMP_SLOT;
+  PltEntrySize = 16;
   VAStart = 0x10000;
 }
 
+void X86TargetInfo::writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const {
+  // Skip 6 bytes of "jmpq *got(%rip)"
+  write32le(Buf, Plt + 6);
+}
+
+void X86TargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr,
+                                      uint64_t PltEntryAddr) const {
+  const uint8_t PltData[] = {
+      0xff, 0x35, 0x00, 0x00, 0x00, 0x00, // pushq GOT+8(%rip)
+      0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *GOT+16(%rip)
+      0x00, 0x00, 0x00, 0x00
+  };
+  memcpy(Buf, PltData, sizeof(PltData));
+  write32le(Buf + 2, GotEntryAddr - PltEntryAddr + 2); // GOT+8
+  write32le(Buf + 8, GotEntryAddr - PltEntryAddr + 4); // GOT+16
+};
+
 void X86TargetInfo::writePltEntry(uint8_t *Buf, uint64_t GotEntryAddr,
-                                  uint64_t PltEntryAddr) const {
-  // jmpl *val; nop; nop
-  const uint8_t Inst[] = {0xff, 0x25, 0, 0, 0, 0, 0x90, 0x90};
+                                  uint64_t PltEntryAddr, int32_t Index) const {
+  const uint8_t Inst[] = {
+      0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmpq *got(%rip)
+      0x68, 0x00, 0x00, 0x00, 0x00,       // pushq <relocation index>
+      0xe9, 0x00, 0x00, 0x00, 0x00        // jmpq plt[0]
+  };
   memcpy(Buf, Inst, sizeof(Inst));
-  assert(isUInt<32>(GotEntryAddr));
-  write32le(Buf + 2, GotEntryAddr);
+
+  write32le(Buf + 2, GotEntryAddr - PltEntryAddr - 6);
+  write32le(Buf + 7, Index);
+  write32le(Buf + 12, -Index * PltEntrySize - PltZeroEntrySize - 16);
 }
 
 bool X86TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const {
@@ -109,7 +133,9 @@
   PCRelReloc = R_X86_64_PC32;
   GotReloc = R_X86_64_GLOB_DAT;
   GotRefReloc = R_X86_64_PC32;
+  PltReloc = R_X86_64_JUMP_SLOT;
   RelativeReloc = R_X86_64_RELATIVE;
+  PltEntrySize = 16;
 
   // On freebsd x86_64 the first page cannot be mmaped.
   // On linux that is controled by vm.mmap_min_addr. At least on some x86_64
@@ -120,16 +146,35 @@
   VAStart = 0x10000;
 }
 
+void X86_64TargetInfo::writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const {
+  // Skip 6 bytes of "jmpq *got(%rip)"
+  write32le(Buf, Plt + 6);
+}
+
+void X86_64TargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr,
+                                      uint64_t PltEntryAddr) const {
+  const uint8_t PltData[] = {
+      0xff, 0x35, 0x00, 0x00, 0x00, 0x00, // pushq GOT+8(%rip)
+      0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *GOT+16(%rip)
+      0x0f, 0x1f, 0x40, 0x00              // nopl 0x0(rax)
+  };
+  memcpy(Buf, PltData, sizeof(PltData));
+  write32le(Buf + 2, GotEntryAddr - PltEntryAddr + 2); // GOT+8
+  write32le(Buf + 8, GotEntryAddr - PltEntryAddr + 4); // GOT+16
+}
+
 void X86_64TargetInfo::writePltEntry(uint8_t *Buf, uint64_t GotEntryAddr,
-                                     uint64_t PltEntryAddr) const {
-  // jmpq *val(%rip); nop; nop
-  const uint8_t Inst[] = {0xff, 0x25, 0, 0, 0, 0, 0x90, 0x90};
+                                     uint64_t PltEntryAddr, int32_t Index) const {
+  const uint8_t Inst[] = {
+      0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmpq *got(%rip)
+      0x68, 0x00, 0x00, 0x00, 0x00,       // pushq <relocation index>
+      0xe9, 0x00, 0x00, 0x00, 0x00        // jmpq plt[0]
+  };
   memcpy(Buf, Inst, sizeof(Inst));
 
-  uint64_t NextPC = PltEntryAddr + 6;
-  int64_t Delta = GotEntryAddr - NextPC;
-  assert(isInt<32>(Delta));
-  write32le(Buf + 2, Delta);
+  write32le(Buf + 2, GotEntryAddr - PltEntryAddr - 6);
+  write32le(Buf + 7, Index);
+  write32le(Buf + 12, -Index * PltEntrySize - PltZeroEntrySize - 16);
 }
 
 bool X86_64TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const {
@@ -241,7 +286,9 @@
   GotReloc = R_PPC64_GLOB_DAT;
   GotRefReloc = R_PPC64_REL64;
   RelativeReloc = R_PPC64_RELATIVE;
+  // PltReloc = FIXME
   PltEntrySize = 32;
+  PltZeroEntrySize = 0; //FIXME
 
   // We need 64K pages (at least under glibc/Linux, the loader won't
   // set different permissions on a finer granularity than that).
@@ -267,8 +314,12 @@
   return TocVA + 0x8000;
 }
 
+
+void PPC64TargetInfo::writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const {}
+void PPC64TargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr,
+                                       uint64_t PltEntryAddr) const {};
 void PPC64TargetInfo::writePltEntry(uint8_t *Buf, uint64_t GotEntryAddr,
-                                    uint64_t PltEntryAddr) const {
+                                    uint64_t PltEntryAddr, int32_t Index) const {
   uint64_t Off = GotEntryAddr - getPPC64TocBase();
 
   // FIXME: What we should do, in theory, is get the offset of the function
@@ -457,11 +508,16 @@
 PPCTargetInfo::PPCTargetInfo() {
   // PCRelReloc = FIXME
   // GotReloc = FIXME
+  // PltReloc = FIXME
   PageSize = 65536;
   VAStart = 0x10000000;
 }
+
+void PPCTargetInfo::writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const {}
+void PPCTargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr,
+                                      uint64_t PltEntryAddr) const {};
 void PPCTargetInfo::writePltEntry(uint8_t *Buf, uint64_t GotEntryAddr,
-                                  uint64_t PltEntryAddr) const {}
+                                  uint64_t PltEntryAddr, int32_t Index) const {}
 bool PPCTargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const {
   return false;
 }
@@ -475,10 +531,16 @@
 AArch64TargetInfo::AArch64TargetInfo() {
   // PCRelReloc = FIXME
   // GotReloc = FIXME
+  // PltReloc = FIXME
   VAStart = 0x400000;
 }
+
+void AArch64TargetInfo::writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const {}
+void AArch64TargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr,
+                                          uint64_t PltEntryAddr) const {};
 void AArch64TargetInfo::writePltEntry(uint8_t *Buf, uint64_t GotEntryAddr,
-                                      uint64_t PltEntryAddr) const {}
+                                      uint64_t PltEntryAddr,
+                                      int32_t Index) const {}
 bool AArch64TargetInfo::relocNeedsGot(uint32_t Type,
                                       const SymbolBody &S) const {
   return false;
@@ -553,12 +615,19 @@
 MipsTargetInfo::MipsTargetInfo() {
   // PCRelReloc = FIXME
   // GotReloc = FIXME
+  // PltReloc = FIXME
   PageSize = 65536;
   VAStart = 0x400000;
 }
 
+void MipsTargetInfo::writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const {}
+
+void MipsTargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr,
+                                       uint64_t PltEntryAddr) const {}
+
 void MipsTargetInfo::writePltEntry(uint8_t *Buf, uint64_t GotEntryAddr,
-                                   uint64_t PltEntryAddr) const {}
+                                   uint64_t PltEntryAddr, int32_t Index) const {
+}
 
 bool MipsTargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const {
   return false;