Sam Clegg | c94d393 | 2017-11-17 18:14:09 +0000 | [diff] [blame] | 1 | //===- InputFiles.cpp -----------------------------------------------------===// |
| 2 | // |
| 3 | // The LLVM Linker |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #include "InputFiles.h" |
Sam Clegg | c94d393 | 2017-11-17 18:14:09 +0000 | [diff] [blame] | 11 | #include "Config.h" |
Sam Clegg | 5fa274b | 2018-01-10 01:13:34 +0000 | [diff] [blame] | 12 | #include "InputChunks.h" |
Sam Clegg | 9310297 | 2018-02-23 05:08:53 +0000 | [diff] [blame] | 13 | #include "InputGlobal.h" |
Sam Clegg | c94d393 | 2017-11-17 18:14:09 +0000 | [diff] [blame] | 14 | #include "SymbolTable.h" |
| 15 | #include "lld/Common/ErrorHandler.h" |
Rui Ueyama | 2017d52 | 2017-11-28 20:39:17 +0000 | [diff] [blame] | 16 | #include "lld/Common/Memory.h" |
Sam Clegg | c94d393 | 2017-11-17 18:14:09 +0000 | [diff] [blame] | 17 | #include "llvm/Object/Binary.h" |
| 18 | #include "llvm/Object/Wasm.h" |
| 19 | #include "llvm/Support/raw_ostream.h" |
| 20 | |
| 21 | #define DEBUG_TYPE "lld" |
| 22 | |
| 23 | using namespace lld; |
| 24 | using namespace lld::wasm; |
| 25 | |
| 26 | using namespace llvm; |
| 27 | using namespace llvm::object; |
| 28 | using namespace llvm::wasm; |
| 29 | |
| 30 | Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) { |
| 31 | log("Loading: " + Path); |
| 32 | |
| 33 | auto MBOrErr = MemoryBuffer::getFile(Path); |
| 34 | if (auto EC = MBOrErr.getError()) { |
| 35 | error("cannot open " + Path + ": " + EC.message()); |
| 36 | return None; |
| 37 | } |
| 38 | std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; |
| 39 | MemoryBufferRef MBRef = MB->getMemBufferRef(); |
| 40 | make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership |
| 41 | |
| 42 | return MBRef; |
| 43 | } |
| 44 | |
| 45 | void ObjFile::dumpInfo() const { |
Rui Ueyama | 0a9583c | 2018-02-28 02:57:37 +0000 | [diff] [blame] | 46 | log("info for: " + getName() + |
| 47 | "\n Symbols : " + Twine(Symbols.size()) + |
| 48 | "\n Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) + |
| 49 | "\n Global Imports : " + Twine(WasmObj->getNumImportedGlobals())); |
Sam Clegg | c94d393 | 2017-11-17 18:14:09 +0000 | [diff] [blame] | 50 | } |
| 51 | |
Sam Clegg | d96d935 | 2018-01-10 19:22:42 +0000 | [diff] [blame] | 52 | // Relocations contain an index into the function, global or table index |
| 53 | // space of the input file. This function takes a relocation and returns the |
| 54 | // relocated index (i.e. translates from the input index space to the output |
| 55 | // index space). |
| 56 | uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const { |
Rui Ueyama | d1063bb | 2018-02-28 00:26:26 +0000 | [diff] [blame] | 57 | if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB) { |
| 58 | assert(TypeIsUsed[Reloc.Index]); |
| 59 | return TypeMap[Reloc.Index]; |
| 60 | } |
| 61 | return Symbols[Reloc.Index]->getOutputSymbolIndex(); |
Sam Clegg | d96d935 | 2018-01-10 19:22:42 +0000 | [diff] [blame] | 62 | } |
| 63 | |
Sam Clegg | ab604a9 | 2018-01-23 01:25:56 +0000 | [diff] [blame] | 64 | // Translate from the relocation's index into the final linked output value. |
| 65 | uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const { |
| 66 | switch (Reloc.Type) { |
| 67 | case R_WEBASSEMBLY_TABLE_INDEX_I32: |
Rui Ueyama | d1063bb | 2018-02-28 00:26:26 +0000 | [diff] [blame] | 68 | case R_WEBASSEMBLY_TABLE_INDEX_SLEB: { |
| 69 | // The null case is possible, if you take the address of a weak function |
| 70 | // that's simply not supplied. |
| 71 | FunctionSymbol *Sym = getFunctionSymbol(Reloc.Index); |
| 72 | if (Sym->hasTableIndex()) |
| 73 | return Sym->getTableIndex(); |
| 74 | return 0; |
| 75 | } |
Sam Clegg | ab604a9 | 2018-01-23 01:25:56 +0000 | [diff] [blame] | 76 | case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: |
| 77 | case R_WEBASSEMBLY_MEMORY_ADDR_I32: |
| 78 | case R_WEBASSEMBLY_MEMORY_ADDR_LEB: |
Rui Ueyama | d1063bb | 2018-02-28 00:26:26 +0000 | [diff] [blame] | 79 | if (auto *Sym = dyn_cast<DefinedData>(getDataSymbol(Reloc.Index))) |
| 80 | return Sym->getVirtualAddress() + Reloc.Addend; |
| 81 | return Reloc.Addend; |
Sam Clegg | ab604a9 | 2018-01-23 01:25:56 +0000 | [diff] [blame] | 82 | case R_WEBASSEMBLY_TYPE_INDEX_LEB: |
Rui Ueyama | d1063bb | 2018-02-28 00:26:26 +0000 | [diff] [blame] | 83 | return TypeMap[Reloc.Index]; |
Sam Clegg | ab604a9 | 2018-01-23 01:25:56 +0000 | [diff] [blame] | 84 | case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: |
Rui Ueyama | d1063bb | 2018-02-28 00:26:26 +0000 | [diff] [blame] | 85 | return getFunctionSymbol(Reloc.Index)->getOutputIndex(); |
Sam Clegg | ab604a9 | 2018-01-23 01:25:56 +0000 | [diff] [blame] | 86 | case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: |
Rui Ueyama | d1063bb | 2018-02-28 00:26:26 +0000 | [diff] [blame] | 87 | return getGlobalSymbol(Reloc.Index)->getOutputIndex(); |
Sam Clegg | ab604a9 | 2018-01-23 01:25:56 +0000 | [diff] [blame] | 88 | default: |
| 89 | llvm_unreachable("unknown relocation type"); |
| 90 | } |
| 91 | } |
| 92 | |
Sam Clegg | c94d393 | 2017-11-17 18:14:09 +0000 | [diff] [blame] | 93 | void ObjFile::parse() { |
| 94 | // Parse a memory buffer as a wasm file. |
| 95 | DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n"); |
Rui Ueyama | bdc5150 | 2017-12-06 22:08:17 +0000 | [diff] [blame] | 96 | std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this)); |
Sam Clegg | c94d393 | 2017-11-17 18:14:09 +0000 | [diff] [blame] | 97 | |
| 98 | auto *Obj = dyn_cast<WasmObjectFile>(Bin.get()); |
| 99 | if (!Obj) |
| 100 | fatal(toString(this) + ": not a wasm file"); |
| 101 | if (!Obj->isRelocatableObject()) |
| 102 | fatal(toString(this) + ": not a relocatable wasm file"); |
| 103 | |
| 104 | Bin.release(); |
| 105 | WasmObj.reset(Obj); |
| 106 | |
| 107 | // Find the code and data sections. Wasm objects can have at most one code |
| 108 | // and one data section. |
| 109 | for (const SectionRef &Sec : WasmObj->sections()) { |
| 110 | const WasmSection &Section = WasmObj->getWasmSection(Sec); |
| 111 | if (Section.Type == WASM_SEC_CODE) |
| 112 | CodeSection = &Section; |
| 113 | else if (Section.Type == WASM_SEC_DATA) |
| 114 | DataSection = &Section; |
| 115 | } |
| 116 | |
Sam Clegg | 8f6d2de | 2018-01-31 23:48:14 +0000 | [diff] [blame] | 117 | TypeMap.resize(getWasmObj()->types().size()); |
| 118 | TypeIsUsed.resize(getWasmObj()->types().size(), false); |
| 119 | |
Rui Ueyama | 0a9583c | 2018-02-28 02:57:37 +0000 | [diff] [blame] | 120 | // Populate `Segments`. |
| 121 | for (const WasmSegment &S : WasmObj->dataSegments()) { |
| 122 | InputSegment *Seg = make<InputSegment>(S, this); |
| 123 | Seg->copyRelocations(*DataSection); |
| 124 | Segments.emplace_back(Seg); |
| 125 | } |
Sam Clegg | 8d146bb | 2018-01-09 23:56:44 +0000 | [diff] [blame] | 126 | |
Rui Ueyama | 0a9583c | 2018-02-28 02:57:37 +0000 | [diff] [blame] | 127 | // Populate `Functions`. |
| 128 | ArrayRef<WasmFunction> Funcs = WasmObj->functions(); |
| 129 | ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes(); |
| 130 | ArrayRef<WasmSignature> Types = WasmObj->types(); |
| 131 | Functions.reserve(Funcs.size()); |
Sam Clegg | 8d146bb | 2018-01-09 23:56:44 +0000 | [diff] [blame] | 132 | |
Rui Ueyama | 0a9583c | 2018-02-28 02:57:37 +0000 | [diff] [blame] | 133 | for (size_t I = 0, E = Funcs.size(); I != E; ++I) { |
| 134 | InputFunction *F = |
| 135 | make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this); |
| 136 | F->copyRelocations(*CodeSection); |
| 137 | Functions.emplace_back(F); |
| 138 | } |
| 139 | |
| 140 | // Populate `Globals`. |
| 141 | for (const WasmGlobal &G : WasmObj->globals()) |
| 142 | Globals.emplace_back(make<InputGlobal>(G)); |
| 143 | |
| 144 | // Populate `Symbols` based on the WasmSymbols in the object. |
| 145 | Symbols.reserve(WasmObj->getNumberOfSymbols()); |
| 146 | for (const SymbolRef &Sym : WasmObj->symbols()) { |
| 147 | const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl()); |
| 148 | if (Symbol *Sym = createDefined(WasmSym)) |
| 149 | Symbols.push_back(Sym); |
| 150 | else |
| 151 | Symbols.push_back(createUndefined(WasmSym)); |
| 152 | } |
Sam Clegg | 9310297 | 2018-02-23 05:08:53 +0000 | [diff] [blame] | 153 | } |
| 154 | |
Sam Clegg | e0f6fcd | 2018-01-12 22:25:17 +0000 | [diff] [blame] | 155 | bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const { |
Rui Ueyama | dcf6234 | 2018-03-01 23:29:05 +0000 | [diff] [blame] | 156 | StringRef S = Chunk->getComdat(); |
| 157 | if (S.empty()) |
| 158 | return false; |
| 159 | return !Symtab->addComdat(S, this); |
Sam Clegg | e0f6fcd | 2018-01-12 22:25:17 +0000 | [diff] [blame] | 160 | } |
| 161 | |
Sam Clegg | 9310297 | 2018-02-23 05:08:53 +0000 | [diff] [blame] | 162 | FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const { |
| 163 | return cast<FunctionSymbol>(Symbols[Index]); |
| 164 | } |
| 165 | |
| 166 | GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const { |
| 167 | return cast<GlobalSymbol>(Symbols[Index]); |
| 168 | } |
| 169 | |
| 170 | DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const { |
| 171 | return cast<DataSymbol>(Symbols[Index]); |
| 172 | } |
| 173 | |
Rui Ueyama | 4b56adc | 2018-02-28 00:50:54 +0000 | [diff] [blame] | 174 | Symbol *ObjFile::createDefined(const WasmSymbol &Sym) { |
| 175 | if (!Sym.isDefined()) |
| 176 | return nullptr; |
| 177 | |
Rui Ueyama | e89b0ef | 2018-03-02 21:19:55 +0000 | [diff] [blame] | 178 | StringRef Name = Sym.Info.Name; |
| 179 | uint32_t Flags = Sym.Info.Flags; |
| 180 | |
Rui Ueyama | 4b56adc | 2018-02-28 00:50:54 +0000 | [diff] [blame] | 181 | switch (Sym.Info.Kind) { |
| 182 | case WASM_SYMBOL_TYPE_FUNCTION: { |
Rui Ueyama | 0a9583c | 2018-02-28 02:57:37 +0000 | [diff] [blame] | 183 | InputFunction *Func = |
| 184 | Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()]; |
Rui Ueyama | 4b56adc | 2018-02-28 00:50:54 +0000 | [diff] [blame] | 185 | if (isExcludedByComdat(Func)) { |
| 186 | Func->Live = false; |
| 187 | return nullptr; |
| 188 | } |
| 189 | |
| 190 | if (Sym.isBindingLocal()) |
Rui Ueyama | e89b0ef | 2018-03-02 21:19:55 +0000 | [diff] [blame] | 191 | return make<DefinedFunction>(Name, Flags, this, Func); |
| 192 | return Symtab->addDefinedFunction(Name, Flags, this, Func); |
Rui Ueyama | 4b56adc | 2018-02-28 00:50:54 +0000 | [diff] [blame] | 193 | } |
| 194 | case WASM_SYMBOL_TYPE_DATA: { |
Rui Ueyama | 0a9583c | 2018-02-28 02:57:37 +0000 | [diff] [blame] | 195 | InputSegment *Seg = Segments[Sym.Info.DataRef.Segment]; |
Rui Ueyama | 4b56adc | 2018-02-28 00:50:54 +0000 | [diff] [blame] | 196 | if (isExcludedByComdat(Seg)) { |
| 197 | Seg->Live = false; |
| 198 | return nullptr; |
| 199 | } |
| 200 | |
| 201 | uint32_t Offset = Sym.Info.DataRef.Offset; |
| 202 | uint32_t Size = Sym.Info.DataRef.Size; |
| 203 | |
| 204 | if (Sym.isBindingLocal()) |
Rui Ueyama | e89b0ef | 2018-03-02 21:19:55 +0000 | [diff] [blame] | 205 | return make<DefinedData>(Name, Flags, this, Seg, Offset, Size); |
| 206 | return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size); |
Rui Ueyama | 4b56adc | 2018-02-28 00:50:54 +0000 | [diff] [blame] | 207 | } |
| 208 | case WASM_SYMBOL_TYPE_GLOBAL: |
Rui Ueyama | 0a9583c | 2018-02-28 02:57:37 +0000 | [diff] [blame] | 209 | InputGlobal *Global = |
| 210 | Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()]; |
Rui Ueyama | 4b56adc | 2018-02-28 00:50:54 +0000 | [diff] [blame] | 211 | if (Sym.isBindingLocal()) |
Rui Ueyama | e89b0ef | 2018-03-02 21:19:55 +0000 | [diff] [blame] | 212 | return make<DefinedGlobal>(Name, Flags, this, Global); |
| 213 | return Symtab->addDefinedGlobal(Name, Flags, this, Global); |
Rui Ueyama | 4b56adc | 2018-02-28 00:50:54 +0000 | [diff] [blame] | 214 | } |
| 215 | llvm_unreachable("unkown symbol kind"); |
| 216 | } |
| 217 | |
Sam Clegg | 9310297 | 2018-02-23 05:08:53 +0000 | [diff] [blame] | 218 | Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) { |
Rui Ueyama | e3498ec | 2018-02-28 00:09:22 +0000 | [diff] [blame] | 219 | StringRef Name = Sym.Info.Name; |
| 220 | uint32_t Flags = Sym.Info.Flags; |
| 221 | |
| 222 | switch (Sym.Info.Kind) { |
| 223 | case WASM_SYMBOL_TYPE_FUNCTION: |
| 224 | return Symtab->addUndefinedFunction(Name, Flags, this, Sym.FunctionType); |
| 225 | case WASM_SYMBOL_TYPE_DATA: |
| 226 | return Symtab->addUndefinedData(Name, Flags, this); |
| 227 | case WASM_SYMBOL_TYPE_GLOBAL: |
| 228 | return Symtab->addUndefinedGlobal(Name, Flags, this, Sym.GlobalType); |
| 229 | } |
| 230 | llvm_unreachable("unkown symbol kind"); |
Sam Clegg | c94d393 | 2017-11-17 18:14:09 +0000 | [diff] [blame] | 231 | } |
| 232 | |
Sam Clegg | c94d393 | 2017-11-17 18:14:09 +0000 | [diff] [blame] | 233 | void ArchiveFile::parse() { |
| 234 | // Parse a MemoryBufferRef as an archive file. |
| 235 | DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n"); |
Rui Ueyama | bdc5150 | 2017-12-06 22:08:17 +0000 | [diff] [blame] | 236 | File = CHECK(Archive::create(MB), toString(this)); |
Sam Clegg | c94d393 | 2017-11-17 18:14:09 +0000 | [diff] [blame] | 237 | |
| 238 | // Read the symbol table to construct Lazy symbols. |
| 239 | int Count = 0; |
| 240 | for (const Archive::Symbol &Sym : File->symbols()) { |
| 241 | Symtab->addLazy(this, &Sym); |
| 242 | ++Count; |
| 243 | } |
| 244 | DEBUG(dbgs() << "Read " << Count << " symbols\n"); |
| 245 | } |
| 246 | |
| 247 | void ArchiveFile::addMember(const Archive::Symbol *Sym) { |
| 248 | const Archive::Child &C = |
Rui Ueyama | bdc5150 | 2017-12-06 22:08:17 +0000 | [diff] [blame] | 249 | CHECK(Sym->getMember(), |
Sam Clegg | c94d393 | 2017-11-17 18:14:09 +0000 | [diff] [blame] | 250 | "could not get the member for symbol " + Sym->getName()); |
| 251 | |
| 252 | // Don't try to load the same member twice (this can happen when members |
| 253 | // mutually reference each other). |
| 254 | if (!Seen.insert(C.getChildOffset()).second) |
| 255 | return; |
| 256 | |
Sam Clegg | a681a11 | 2017-12-06 03:10:39 +0000 | [diff] [blame] | 257 | DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n"); |
Sam Clegg | c94d393 | 2017-11-17 18:14:09 +0000 | [diff] [blame] | 258 | DEBUG(dbgs() << "from archive: " << toString(this) << "\n"); |
| 259 | |
| 260 | MemoryBufferRef MB = |
Rui Ueyama | bdc5150 | 2017-12-06 22:08:17 +0000 | [diff] [blame] | 261 | CHECK(C.getMemoryBufferRef(), |
Sam Clegg | c94d393 | 2017-11-17 18:14:09 +0000 | [diff] [blame] | 262 | "could not get the buffer for the member defining symbol " + |
| 263 | Sym->getName()); |
| 264 | |
| 265 | if (identify_magic(MB.getBuffer()) != file_magic::wasm_object) { |
| 266 | error("unknown file type: " + MB.getBufferIdentifier()); |
| 267 | return; |
| 268 | } |
| 269 | |
| 270 | InputFile *Obj = make<ObjFile>(MB); |
| 271 | Obj->ParentName = ParentName; |
| 272 | Symtab->addFile(Obj); |
| 273 | } |
| 274 | |
| 275 | // Returns a string in the format of "foo.o" or "foo.a(bar.o)". |
Sam Clegg | 7e75663 | 2017-12-05 16:50:46 +0000 | [diff] [blame] | 276 | std::string lld::toString(const wasm::InputFile *File) { |
Sam Clegg | c94d393 | 2017-11-17 18:14:09 +0000 | [diff] [blame] | 277 | if (!File) |
| 278 | return "<internal>"; |
| 279 | |
| 280 | if (File->ParentName.empty()) |
| 281 | return File->getName(); |
| 282 | |
| 283 | return (File->ParentName + "(" + File->getName() + ")").str(); |
| 284 | } |