blob: 2caf71ff330ef1e66d830c824c3d7e466c4b8c14 [file] [log] [blame]
Mehdi Amini7c4a1a82016-03-09 01:37:22 +00001//===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the Thin Link Time Optimization library. This library is
11// intended to be used by linker to optimize code at link time.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/LTO/ThinLTOCodeGenerator.h"
16
17#include "llvm/ADT/StringExtras.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/Analysis/TargetLibraryInfo.h"
20#include "llvm/Analysis/TargetTransformInfo.h"
21#include "llvm/Bitcode/ReaderWriter.h"
22#include "llvm/Bitcode/BitcodeWriterPass.h"
23#include "llvm/ExecutionEngine/ObjectMemoryBuffer.h"
24#include "llvm/IR/LLVMContext.h"
25#include "llvm/IR/DiagnosticPrinter.h"
26#include "llvm/IR/LegacyPassManager.h"
27#include "llvm/IR/Mangler.h"
28#include "llvm/IRReader/IRReader.h"
29#include "llvm/Linker/Linker.h"
30#include "llvm/MC/SubtargetFeature.h"
31#include "llvm/Object/FunctionIndexObjectFile.h"
32#include "llvm/Support/SourceMgr.h"
33#include "llvm/Support/TargetRegistry.h"
34#include "llvm/Support/ThreadPool.h"
35#include "llvm/Target/TargetMachine.h"
36#include "llvm/Transforms/IPO.h"
37#include "llvm/Transforms/IPO/FunctionImport.h"
38#include "llvm/Transforms/IPO/PassManagerBuilder.h"
39#include "llvm/Transforms/ObjCARC.h"
40#include "llvm/Transforms/Utils/FunctionImportUtils.h"
41
42using namespace llvm;
43
Mehdi Amini09b4a8d2016-03-10 01:28:54 +000044namespace llvm {
45// Flags -discard-value-names, defined in LTOCodeGenerator.cpp
46extern cl::opt<bool> LTODiscardValueNames;
47}
48
Mehdi Amini7c4a1a82016-03-09 01:37:22 +000049namespace {
50
51static cl::opt<int> ThreadCount("threads",
52 cl::init(std::thread::hardware_concurrency()));
53
54static void diagnosticHandler(const DiagnosticInfo &DI) {
55 DiagnosticPrinterRawOStream DP(errs());
56 DI.print(DP);
57 errs() << '\n';
58}
59
60// Simple helper to load a module from bitcode
61static std::unique_ptr<Module>
62loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context,
63 bool Lazy) {
64 SMDiagnostic Err;
65 ErrorOr<std::unique_ptr<Module>> ModuleOrErr(nullptr);
66 if (Lazy) {
67 ModuleOrErr =
68 getLazyBitcodeModule(MemoryBuffer::getMemBuffer(Buffer, false), Context,
69 /* ShouldLazyLoadMetadata */ Lazy);
70 } else {
71 ModuleOrErr = parseBitcodeFile(Buffer, Context);
72 }
73 if (std::error_code EC = ModuleOrErr.getError()) {
74 Err = SMDiagnostic(Buffer.getBufferIdentifier(), SourceMgr::DK_Error,
75 EC.message());
76 Err.print("ThinLTO", errs());
77 report_fatal_error("Can't load module, abort.");
78 }
79 return std::move(ModuleOrErr.get());
80}
81
82// Simple helper to save temporary files for debug.
83static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
84 unsigned count, StringRef Suffix) {
85 if (TempDir.empty())
86 return;
87 // User asked to save temps, let dump the bitcode file after import.
88 auto SaveTempPath = TempDir + llvm::utostr(count) + Suffix;
89 std::error_code EC;
90 raw_fd_ostream OS(SaveTempPath.str(), EC, sys::fs::F_None);
91 if (EC)
92 report_fatal_error(Twine("Failed to open ") + SaveTempPath +
93 " to save optimized bitcode\n");
94 WriteBitcodeToFile(&TheModule, OS, true, false);
95}
96
97static StringMap<MemoryBufferRef>
98generateModuleMap(const std::vector<MemoryBufferRef> &Modules) {
99 StringMap<MemoryBufferRef> ModuleMap;
100 for (auto &ModuleBuffer : Modules) {
101 assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) ==
102 ModuleMap.end() &&
103 "Expect unique Buffer Identifier");
104 ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer;
105 }
106 return ModuleMap;
107}
108
109/// Provide a "loader" for the FunctionImporter to access function from other
110/// modules.
111class ModuleLoader {
112 /// The context that will be used for importing.
113 LLVMContext &Context;
114
115 /// Map from Module identifier to MemoryBuffer. Used by clients like the
116 /// FunctionImported to request loading a Module.
117 StringMap<MemoryBufferRef> &ModuleMap;
118
119public:
120 ModuleLoader(LLVMContext &Context, StringMap<MemoryBufferRef> &ModuleMap)
121 : Context(Context), ModuleMap(ModuleMap) {}
122
123 /// Load a module on demand.
124 std::unique_ptr<Module> operator()(StringRef Identifier) {
125 return loadModuleFromBuffer(ModuleMap[Identifier], Context, /*Lazy*/ true);
126 }
127};
128
129static void promoteModule(Module &TheModule, const FunctionInfoIndex &Index) {
130 if (renameModuleForThinLTO(TheModule, Index))
131 report_fatal_error("renameModuleForThinLTO failed");
132}
133
134static void crossImportIntoModule(Module &TheModule,
135 const FunctionInfoIndex &Index,
136 StringMap<MemoryBufferRef> &ModuleMap) {
137 ModuleLoader Loader(TheModule.getContext(), ModuleMap);
138 FunctionImporter Importer(Index, Loader);
139 Importer.importFunctions(TheModule);
140}
141
142static void optimizeModule(Module &TheModule, TargetMachine &TM) {
143 // Populate the PassManager
144 PassManagerBuilder PMB;
145 PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple());
146 PMB.Inliner = createFunctionInliningPass();
147 // FIXME: should get it from the bitcode?
148 PMB.OptLevel = 3;
149 PMB.LoopVectorize = true;
150 PMB.SLPVectorize = true;
151 PMB.VerifyInput = true;
152 PMB.VerifyOutput = false;
153
154 legacy::PassManager PM;
155
156 // Add the TTI (required to inform the vectorizer about register size for
157 // instance)
158 PM.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis()));
159
160 // Add optimizations
161 PMB.populateThinLTOPassManager(PM);
162 PM.add(createObjCARCContractPass());
163
164 PM.run(TheModule);
165}
166
167std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
168 TargetMachine &TM) {
169 SmallVector<char, 128> OutputBuffer;
170
171 // CodeGen
172 {
173 raw_svector_ostream OS(OutputBuffer);
174 legacy::PassManager PM;
175 if (TM.addPassesToEmitFile(PM, OS, TargetMachine::CGFT_ObjectFile,
176 /* DisableVerify */ true))
177 report_fatal_error("Failed to setup codegen");
178
179 // Run codegen now. resulting binary is in OutputBuffer.
180 PM.run(TheModule);
181 }
182 return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
183}
184
185static std::unique_ptr<MemoryBuffer>
186ProcessThinLTOModule(Module &TheModule, const FunctionInfoIndex &Index,
187 StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM,
188 ThinLTOCodeGenerator::CachingOptions CacheOptions,
189 StringRef SaveTempsDir, unsigned count) {
190
191 // Save temps: after IPO.
192 saveTempBitcode(TheModule, SaveTempsDir, count, ".1.IPO.bc");
193
194 // "Benchmark"-like optimization: single-source case
195 bool SingleModule = (ModuleMap.size() == 1);
196
197 if (!SingleModule) {
198 promoteModule(TheModule, Index);
199
200 // Save temps: after promotion.
201 saveTempBitcode(TheModule, SaveTempsDir, count, ".2.promoted.bc");
202
203 crossImportIntoModule(TheModule, Index, ModuleMap);
204
205 // Save temps: after cross-module import.
206 saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
207 }
208
209 optimizeModule(TheModule, TM);
210
211 saveTempBitcode(TheModule, SaveTempsDir, count, ".3.opt.bc");
212
213 return codegenModule(TheModule, TM);
214}
215
216// Initialize the TargetMachine builder for a given Triple
217static void initTMBuilder(TargetMachineBuilder &TMBuilder,
218 const Triple &TheTriple) {
219 // Set a default CPU for Darwin triples (copied from LTOCodeGenerator).
220 // FIXME this looks pretty terrible...
221 if (TMBuilder.MCpu.empty() && TheTriple.isOSDarwin()) {
222 if (TheTriple.getArch() == llvm::Triple::x86_64)
223 TMBuilder.MCpu = "core2";
224 else if (TheTriple.getArch() == llvm::Triple::x86)
225 TMBuilder.MCpu = "yonah";
226 else if (TheTriple.getArch() == llvm::Triple::aarch64)
227 TMBuilder.MCpu = "cyclone";
228 }
229 TMBuilder.TheTriple = std::move(TheTriple);
230}
231
232} // end anonymous namespace
233
234void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
235 MemoryBufferRef Buffer(Data, Identifier);
236 if (Modules.empty()) {
237 // First module added, so initialize the triple and some options
238 LLVMContext Context;
239 Triple TheTriple(getBitcodeTargetTriple(Buffer, Context));
240 initTMBuilder(TMBuilder, Triple(TheTriple));
241 }
242#ifndef NDEBUG
243 else {
244 LLVMContext Context;
245 assert(TMBuilder.TheTriple.str() ==
246 getBitcodeTargetTriple(Buffer, Context) &&
247 "ThinLTO modules with different triple not supported");
248 }
249#endif
250 Modules.push_back(Buffer);
251}
252
253void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) {
254 PreservedSymbols.insert(Name);
255}
256
257void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) {
258 CrossReferencedSymbols.insert(Name);
259}
260
261// TargetMachine factory
262std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
263 std::string ErrMsg;
264 const Target *TheTarget =
265 TargetRegistry::lookupTarget(TheTriple.str(), ErrMsg);
266 if (!TheTarget) {
267 report_fatal_error("Can't load target for this Triple: " + ErrMsg);
268 }
269
270 // Use MAttr as the default set of features.
271 SubtargetFeatures Features(MAttr);
272 Features.getDefaultSubtargetFeatures(TheTriple);
273 std::string FeatureStr = Features.getString();
274 return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
275 TheTriple.str(), MCpu, FeatureStr, Options, RelocModel,
276 CodeModel::Default, CGOptLevel));
277}
278
279/**
280 * Produce the combined function index from all the bitcode files:
281 * "thin-link".
282 */
283std::unique_ptr<FunctionInfoIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
284 std::unique_ptr<FunctionInfoIndex> CombinedIndex;
285 uint64_t NextModuleId = 0;
286 for (auto &ModuleBuffer : Modules) {
287 ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr =
288 object::FunctionIndexObjectFile::create(ModuleBuffer, diagnosticHandler,
289 false);
290 if (std::error_code EC = ObjOrErr.getError()) {
291 // FIXME diagnose
292 errs() << "error: can't create FunctionIndexObjectFile for buffer: "
293 << EC.message() << "\n";
294 return nullptr;
295 }
296 auto Index = (*ObjOrErr)->takeIndex();
297 if (CombinedIndex) {
298 CombinedIndex->mergeFrom(std::move(Index), ++NextModuleId);
299 } else {
300 CombinedIndex = std::move(Index);
301 }
302 }
303 return CombinedIndex;
304}
305
306/**
307 * Perform promotion and renaming of exported internal functions.
308 */
309void ThinLTOCodeGenerator::promote(Module &TheModule,
310 FunctionInfoIndex &Index) {
311 promoteModule(TheModule, Index);
312}
313
314/**
315 * Perform cross-module importing for the module identified by ModuleIdentifier.
316 */
317void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
318 FunctionInfoIndex &Index) {
319 auto ModuleMap = generateModuleMap(Modules);
320 crossImportIntoModule(TheModule, Index, ModuleMap);
321}
322
323/**
324 * Perform post-importing ThinLTO optimizations.
325 */
326void ThinLTOCodeGenerator::optimize(Module &TheModule) {
327 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
328 optimizeModule(TheModule, *TMBuilder.create());
329}
330
331/**
332 * Perform ThinLTO CodeGen.
333 */
334std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) {
335 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
336 return codegenModule(TheModule, *TMBuilder.create());
337}
338
339// Main entry point for the ThinLTO processing
340void ThinLTOCodeGenerator::run() {
341 // Sequential linking phase
342 auto Index = linkCombinedIndex();
343
344 // Save temps: index.
345 if (!SaveTempsDir.empty()) {
346 auto SaveTempPath = SaveTempsDir + "index.bc";
347 std::error_code EC;
348 raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
349 if (EC)
350 report_fatal_error(Twine("Failed to open ") + SaveTempPath +
351 " to save optimized bitcode\n");
Teresa Johnson76a1c1d2016-03-11 18:52:24 +0000352 WriteIndexToFile(*Index, OS);
Mehdi Amini7c4a1a82016-03-09 01:37:22 +0000353 }
354
355 // Prepare the resulting object vector
356 assert(ProducedBinaries.empty() && "The generator should not be reused");
357 ProducedBinaries.resize(Modules.size());
358
359 // Prepare the module map.
360 auto ModuleMap = generateModuleMap(Modules);
361
362 // Parallel optimizer + codegen
363 {
364 ThreadPool Pool(ThreadCount);
365 int count = 0;
366 for (auto &ModuleBuffer : Modules) {
367 Pool.async([&](int count) {
368 LLVMContext Context;
Mehdi Amini09b4a8d2016-03-10 01:28:54 +0000369 Context.setDiscardValueNames(LTODiscardValueNames);
Mehdi Amini7c4a1a82016-03-09 01:37:22 +0000370
371 // Parse module now
372 auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false);
373
374 // Save temps: original file.
375 if (!SaveTempsDir.empty()) {
376 saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
377 }
378
379 ProducedBinaries[count] = ProcessThinLTOModule(
380 *TheModule, *Index, ModuleMap, *TMBuilder.create(), CacheOptions,
381 SaveTempsDir, count);
382 }, count);
383 count++;
384 }
385 }
386
387 // If statistics were requested, print them out now.
388 if (llvm::AreStatisticsEnabled())
389 llvm::PrintStatistics();
390}