blob: 513ddc308e68a097d9bd105ed2e0c0099483eef9 [file] [log] [blame]
Fangrui Songef598752019-02-21 07:42:31 +00001//===-- llvm-mc-assemble-fuzzer.cpp - Fuzzer for the MC layer -------------===//
Brian Cain50aa37b2017-02-27 06:22:17 +00002//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Brian Cain50aa37b2017-02-27 06:22:17 +00006//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10
Brian Cain50aa37b2017-02-27 06:22:17 +000011#include "llvm-c/Target.h"
Brian Cain50aa37b2017-02-27 06:22:17 +000012#include "llvm/MC/MCAsmBackend.h"
13#include "llvm/MC/MCAsmInfo.h"
Mitch Phillips5ebf7a82017-11-06 18:56:36 +000014#include "llvm/MC/MCCodeEmitter.h"
Brian Cain50aa37b2017-02-27 06:22:17 +000015#include "llvm/MC/MCContext.h"
16#include "llvm/MC/MCInstPrinter.h"
17#include "llvm/MC/MCInstrInfo.h"
18#include "llvm/MC/MCObjectFileInfo.h"
Brian Cainf72611b2018-08-17 04:38:41 +000019#include "llvm/MC/MCObjectWriter.h"
Brian Cain50aa37b2017-02-27 06:22:17 +000020#include "llvm/MC/MCParser/AsmLexer.h"
21#include "llvm/MC/MCParser/MCTargetAsmParser.h"
22#include "llvm/MC/MCRegisterInfo.h"
23#include "llvm/MC/MCSectionMachO.h"
24#include "llvm/MC/MCStreamer.h"
25#include "llvm/MC/MCSubtargetInfo.h"
serge-sans-pailleac1d23e2020-03-04 00:47:43 +010026#include "llvm/MC/MCTargetOptionsCommandFlags.h"
27#include "llvm/MC/SubtargetFeature.h"
Brian Cain50aa37b2017-02-27 06:22:17 +000028#include "llvm/Support/CommandLine.h"
29#include "llvm/Support/FileUtilities.h"
Fedor Sergeeva39faac2020-05-18 02:17:56 +070030#include "llvm/Support/Host.h"
serge-sans-pailleac1d23e2020-03-04 00:47:43 +010031#include "llvm/Support/MemoryBuffer.h"
Brian Cain50aa37b2017-02-27 06:22:17 +000032#include "llvm/Support/SourceMgr.h"
Brian Cain50aa37b2017-02-27 06:22:17 +000033#include "llvm/Support/TargetRegistry.h"
serge-sans-pailleac1d23e2020-03-04 00:47:43 +010034#include "llvm/Support/TargetSelect.h"
Brian Cain50aa37b2017-02-27 06:22:17 +000035#include "llvm/Support/ToolOutputFile.h"
serge-sans-pailleac1d23e2020-03-04 00:47:43 +010036#include "llvm/Support/raw_ostream.h"
Brian Cain50aa37b2017-02-27 06:22:17 +000037
38using namespace llvm;
39
40static cl::opt<std::string>
41 TripleName("triple", cl::desc("Target triple to assemble for, "
42 "see -version for available targets"));
43
44static cl::opt<std::string>
45 MCPU("mcpu",
46 cl::desc("Target a specific cpu type (-mcpu=help for details)"),
47 cl::value_desc("cpu-name"), cl::init(""));
48
49// This is useful for variable-length instruction sets.
50static cl::opt<unsigned> InsnLimit(
51 "insn-limit",
52 cl::desc("Limit the number of instructions to process (0 for no limit)"),
53 cl::value_desc("count"), cl::init(0));
54
55static cl::list<std::string>
56 MAttrs("mattr", cl::CommaSeparated,
57 cl::desc("Target specific attributes (-mattr=help for details)"),
58 cl::value_desc("a1,+a2,-a3,..."));
59// The feature string derived from -mattr's values.
60std::string FeaturesStr;
61
62static cl::list<std::string>
63 FuzzerArgs("fuzzer-args", cl::Positional,
64 cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore,
65 cl::PositionalEatsArgs);
66static std::vector<char *> ModifiedArgv;
67
68enum OutputFileType {
69 OFT_Null,
70 OFT_AssemblyFile,
71 OFT_ObjectFile
72};
73static cl::opt<OutputFileType>
74FileType("filetype", cl::init(OFT_AssemblyFile),
75 cl::desc("Choose an output file type:"),
76 cl::values(
77 clEnumValN(OFT_AssemblyFile, "asm",
78 "Emit an assembly ('.s') file"),
79 clEnumValN(OFT_Null, "null",
80 "Don't emit anything (for timing purposes)"),
81 clEnumValN(OFT_ObjectFile, "obj",
82 "Emit a native object ('.o') file")));
83
84
85class LLVMFuzzerInputBuffer : public MemoryBuffer
86{
87 public:
88 LLVMFuzzerInputBuffer(const uint8_t *data_, size_t size_)
Mitch Phillips5ebf7a82017-11-06 18:56:36 +000089 : Data(reinterpret_cast<const char *>(data_)),
Brian Cain50aa37b2017-02-27 06:22:17 +000090 Size(size_) {
91 init(Data, Data+Size, false);
92 }
93
94
95 virtual BufferKind getBufferKind() const {
96 return MemoryBuffer_Malloc; // it's not disk-backed so I think that's
97 // the intent ... though AFAIK it
98 // probably came from an mmap or sbrk
99 }
100
101 private:
102 const char *Data;
103 size_t Size;
104};
105
106static int AssembleInput(const char *ProgName, const Target *TheTarget,
107 SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str,
108 MCAsmInfo &MAI, MCSubtargetInfo &STI,
109 MCInstrInfo &MCII, MCTargetOptions &MCOptions) {
110 static const bool NoInitialTextSection = false;
111
112 std::unique_ptr<MCAsmParser> Parser(
113 createMCAsmParser(SrcMgr, Ctx, Str, MAI));
114
115 std::unique_ptr<MCTargetAsmParser> TAP(
116 TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions));
117
118 if (!TAP) {
119 errs() << ProgName
120 << ": error: this target '" << TripleName
121 << "', does not support assembly parsing.\n";
122 abort();
123 }
124
125 Parser->setTargetParser(*TAP);
126
127 return Parser->Run(NoInitialTextSection);
128}
129
130
131int AssembleOneInput(const uint8_t *Data, size_t Size) {
132 const bool ShowInst = false;
133 const bool AsmVerbose = false;
134 const bool UseDwarfDirectory = true;
135
136 Triple TheTriple(Triple::normalize(TripleName));
137
138 SourceMgr SrcMgr;
139
140 std::unique_ptr<MemoryBuffer> BufferPtr(new LLVMFuzzerInputBuffer(Data, Size));
141
142 // Tell SrcMgr about this buffer, which is what the parser will pick up.
143 SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc());
144
145 static const std::vector<std::string> NoIncludeDirs;
146 SrcMgr.setIncludeDirs(NoIncludeDirs);
147
148 static std::string ArchName;
149 std::string Error;
150 const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple,
151 Error);
152 if (!TheTarget) {
153 errs() << "error: this target '" << TheTriple.normalize()
154 << "/" << ArchName << "', was not found: '" << Error << "'\n";
155
156 abort();
157 }
158
159 std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
160 if (!MRI) {
161 errs() << "Unable to create target register info!";
162 abort();
163 }
164
serge-sans-pailleac1d23e2020-03-04 00:47:43 +0100165 MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags();
Mirko Brkusanin4b63ca12019-10-23 12:24:35 +0200166 std::unique_ptr<MCAsmInfo> MAI(
167 TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
Brian Cain50aa37b2017-02-27 06:22:17 +0000168 if (!MAI) {
169 errs() << "Unable to create target asm info!";
170 abort();
171 }
172
173
174 MCObjectFileInfo MOFI;
175 MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr);
176
177 static const bool UsePIC = false;
Justin Bogner29c5d022017-08-29 17:08:44 +0000178 MOFI.InitMCObjectFileInfo(TheTriple, UsePIC, Ctx);
Brian Cain50aa37b2017-02-27 06:22:17 +0000179
180 const unsigned OutputAsmVariant = 0;
181 std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
182 MCInstPrinter *IP = TheTarget->createMCInstPrinter(Triple(TripleName), OutputAsmVariant,
183 *MAI, *MCII, *MRI);
184 if (!IP) {
185 errs()
186 << "error: unable to create instruction printer for target triple '"
187 << TheTriple.normalize() << "' with assembly variant "
188 << OutputAsmVariant << ".\n";
189
190 abort();
191 }
192
193 const char *ProgName = "llvm-mc-fuzzer";
194 std::unique_ptr<MCSubtargetInfo> STI(
195 TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
Davide Italiano8d5b0072018-05-04 23:41:25 +0000196 std::unique_ptr<MCCodeEmitter> CE = nullptr;
197 std::unique_ptr<MCAsmBackend> MAB = nullptr;
Brian Cain50aa37b2017-02-27 06:22:17 +0000198
Brian Cain50aa37b2017-02-27 06:22:17 +0000199 std::string OutputString;
200 raw_string_ostream Out(OutputString);
Jonas Devlieghere0eaee542019-08-15 15:54:37 +0000201 auto FOut = std::make_unique<formatted_raw_ostream>(Out);
Brian Cain50aa37b2017-02-27 06:22:17 +0000202
203 std::unique_ptr<MCStreamer> Str;
204
205 if (FileType == OFT_AssemblyFile) {
Davide Italiano8d5b0072018-05-04 23:41:25 +0000206 Str.reset(TheTarget->createAsmStreamer(Ctx, std::move(FOut), AsmVerbose,
207 UseDwarfDirectory, IP, std::move(CE),
208 std::move(MAB), ShowInst));
Brian Cain50aa37b2017-02-27 06:22:17 +0000209 } else {
210 assert(FileType == OFT_ObjectFile && "Invalid file type!");
211
212 std::error_code EC;
213 const std::string OutputFilename = "-";
Reid Kleckner3fc649c2017-09-23 01:03:17 +0000214 auto Out =
Jonas Devlieghere0eaee542019-08-15 15:54:37 +0000215 std::make_unique<ToolOutputFile>(OutputFilename, EC, sys::fs::OF_None);
Brian Cain50aa37b2017-02-27 06:22:17 +0000216 if (EC) {
217 errs() << EC.message() << '\n';
218 abort();
219 }
220
221 // Don't waste memory on names of temp labels.
222 Ctx.setUseNamesOnTempLabels(false);
223
224 std::unique_ptr<buffer_ostream> BOS;
225 raw_pwrite_stream *OS = &Out->os();
226 if (!Out->os().supportsSeeking()) {
Jonas Devlieghere0eaee542019-08-15 15:54:37 +0000227 BOS = std::make_unique<buffer_ostream>(Out->os());
Brian Cain50aa37b2017-02-27 06:22:17 +0000228 OS = BOS.get();
229 }
230
231 MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
Davide Italiano8d5b0072018-05-04 23:41:25 +0000232 MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions);
Brian Cain50aa37b2017-02-27 06:22:17 +0000233 Str.reset(TheTarget->createMCObjectStreamer(
Brian Cainf72611b2018-08-17 04:38:41 +0000234 TheTriple, Ctx, std::unique_ptr<MCAsmBackend>(MAB),
235 MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(CE), *STI,
236 MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible,
Brian Cain50aa37b2017-02-27 06:22:17 +0000237 /*DWARFMustBeAtTheEnd*/ false));
238 }
239 const int Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI,
240 *MCII, MCOptions);
241
242 (void) Res;
243
244 return 0;
245}
246
Justin Bogner29c5d022017-08-29 17:08:44 +0000247extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
Brian Cain50aa37b2017-02-27 06:22:17 +0000248 return AssembleOneInput(Data, Size);
249}
250
Justin Bogner29c5d022017-08-29 17:08:44 +0000251extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc,
252 char ***argv) {
Brian Cain50aa37b2017-02-27 06:22:17 +0000253 // The command line is unusual compared to other fuzzers due to the need to
254 // specify the target. Options like -triple, -mcpu, and -mattr work like
255 // their counterparts in llvm-mc, while -fuzzer-args collects options for the
256 // fuzzer itself.
257 //
258 // Examples:
259 //
260 // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
261 // 4-bytes each and use the contents of ./corpus as the test corpus:
262 // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
263 // -fuzzer-args -max_len=4 -runs=100000 ./corpus
264 //
265 // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
266 // feature enabled using up to 64-byte inputs:
267 // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
268 // -disassemble -fuzzer-args ./corpus
269 //
270 // If your aim is to find instructions that are not tested, then it is
271 // advisable to constrain the maximum input size to a single instruction
272 // using -max_len as in the first example. This results in a test corpus of
273 // individual instructions that test unique paths. Without this constraint,
274 // there will be considerable redundancy in the corpus.
275
276 char **OriginalArgv = *argv;
277
278 LLVMInitializeAllTargetInfos();
279 LLVMInitializeAllTargetMCs();
280 LLVMInitializeAllAsmParsers();
281
282 cl::ParseCommandLineOptions(*argc, OriginalArgv);
283
284 // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
285 // the driver can parse its arguments.
286 //
287 // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
288 // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
289 // non-const buffer to avoid the need to clean up when the fuzzer terminates.
290 ModifiedArgv.push_back(OriginalArgv[0]);
291 for (const auto &FuzzerArg : FuzzerArgs) {
292 for (int i = 1; i < *argc; ++i) {
293 if (FuzzerArg == OriginalArgv[i])
294 ModifiedArgv.push_back(OriginalArgv[i]);
295 }
296 }
297 *argc = ModifiedArgv.size();
298 *argv = ModifiedArgv.data();
299
300 // Package up features to be passed to target/subtarget
301 // We have to pass it via a global since the callback doesn't
302 // permit any user data.
303 if (MAttrs.size()) {
304 SubtargetFeatures Features;
305 for (unsigned i = 0; i != MAttrs.size(); ++i)
306 Features.AddFeature(MAttrs[i]);
307 FeaturesStr = Features.getString();
308 }
309
310 if (TripleName.empty())
311 TripleName = sys::getDefaultTargetTriple();
312
313 return 0;
314}