blob: 888761f10f0a3e0c2188e56735d533c7e8d044c1 [file] [log] [blame]
Jim Grosbach7dd4dc82012-10-31 23:24:13 +00001//===-- llvm-mcmarkup.cpp - Parse the MC assembly markup tags -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Example simple parser implementation for the MC assembly markup language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/OwningPtr.h"
15#include "llvm/Support/CommandLine.h"
16#include "llvm/Support/Format.h"
17#include "llvm/Support/ManagedStatic.h"
18#include "llvm/Support/MemoryBuffer.h"
19#include "llvm/Support/PrettyStackTrace.h"
20#include "llvm/Support/Signals.h"
21#include "llvm/Support/SourceMgr.h"
22#include "llvm/Support/raw_ostream.h"
23#include "llvm/Support/system_error.h"
24using namespace llvm;
25
26static cl::list<std::string>
27 InputFilenames(cl::Positional, cl::desc("<input files>"),
28 cl::ZeroOrMore);
29static cl::opt<bool>
30DumpTags("dump-tags", cl::desc("List all tags encountered in input"));
31
32static StringRef ToolName;
33
34/// Trivial lexer for the markup parser. Input is always handled a character
35/// at a time. The lexer just encapsulates EOF and lookahead handling.
36class MarkupLexer {
37 StringRef::const_iterator Start;
38 StringRef::const_iterator CurPtr;
39 StringRef::const_iterator End;
40public:
41 MarkupLexer(StringRef Source)
42 : Start(Source.begin()), CurPtr(Source.begin()), End(Source.end()) {}
43 // When processing non-markup, input is consumed a character at a time.
44 bool isEOF() { return CurPtr == End; }
45 int getNextChar() {
46 if (CurPtr == End) return EOF;
47 return *CurPtr++;
48 }
49 int peekNextChar() {
50 if (CurPtr == End) return EOF;
51 return *CurPtr;
52 }
53 StringRef::const_iterator getPosition() const { return CurPtr; }
54};
55
56/// A markup tag is a name and a (usually empty) list of modifiers.
57class MarkupTag {
58 StringRef Name;
59 StringRef Modifiers;
60 SMLoc StartLoc;
61public:
62 MarkupTag(StringRef n, StringRef m, SMLoc Loc)
63 : Name(n), Modifiers(m), StartLoc(Loc) {}
64 StringRef getName() const { return Name; }
65 StringRef getModifiers() const { return Modifiers; }
66 SMLoc getLoc() const { return StartLoc; }
67};
68
69/// A simple parser implementation for creating MarkupTags from input text.
70class MarkupParser {
71 MarkupLexer &Lex;
72 SourceMgr &SM;
73public:
74 MarkupParser(MarkupLexer &lex, SourceMgr &SrcMgr) : Lex(lex), SM(SrcMgr) {}
75 /// Create a MarkupTag from the current position in the MarkupLexer.
76 /// The parseTag() method should be called when the lexer has processed
77 /// the opening '<' character. Input will be consumed up to and including
78 /// the ':' which terminates the tag open.
79 MarkupTag parseTag();
80 /// Issue a diagnostic and terminate program execution.
81 void FatalError(SMLoc Loc, StringRef Msg);
82};
83
84void MarkupParser::FatalError(SMLoc Loc, StringRef Msg) {
85 SM.PrintMessage(Loc, SourceMgr::DK_Error, Msg);
86 exit(1);
87}
88
89// Example handler for when a tag is recognized.
90static void processStartTag(MarkupTag &Tag) {
91 // If we're just printing the tags, do that, otherwise do some simple
92 // colorization.
93 if (DumpTags) {
94 outs() << Tag.getName();
95 if (Tag.getModifiers().size())
96 outs() << " " << Tag.getModifiers();
97 outs() << "\n";
98 return;
99 }
100
101 if (!outs().has_colors())
102 return;
103 // Color registers as red and immediates as cyan. Those don't have nested
104 // tags, so don't bother keeping a stack of colors to reset to.
105 if (Tag.getName() == "reg")
106 outs().changeColor(raw_ostream::RED);
107 else if (Tag.getName() == "imm")
108 outs().changeColor(raw_ostream::CYAN);
109}
110
111// Example handler for when the end of a tag is recognized.
112static void processEndTag(MarkupTag &Tag) {
113 // If we're printing the tags, there's nothing more to do here. Otherwise,
114 // set the color back the normal.
115 if (DumpTags)
116 return;
117 if (!outs().has_colors())
118 return;
119 // Just reset to basic white.
120 outs().changeColor(raw_ostream::WHITE, false);
121}
122
123MarkupTag MarkupParser::parseTag() {
124 // First off, extract the tag into it's own StringRef so we can look at it
125 // outside of the context of consuming input.
126 StringRef::const_iterator Start = Lex.getPosition();
127 SMLoc Loc = SMLoc::getFromPointer(Start - 1);
128 while(Lex.getNextChar() != ':') {
129 // EOF is an error.
130 if (Lex.isEOF())
131 FatalError(SMLoc::getFromPointer(Start), "unterminated markup tag");
132 }
133 StringRef RawTag(Start, Lex.getPosition() - Start - 1);
134 std::pair<StringRef, StringRef> SplitTag = RawTag.split(' ');
135 return MarkupTag(SplitTag.first, SplitTag.second, Loc);
136}
137
138static void parseMCMarkup(StringRef Filename) {
139 OwningPtr<MemoryBuffer> BufferPtr;
140 if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, BufferPtr)) {
141 errs() << ToolName << ": " << ec.message() << '\n';
142 return;
143 }
144 MemoryBuffer *Buffer = BufferPtr.take();
145
146 SourceMgr SrcMgr;
147
148 // Tell SrcMgr about this buffer, which is what the parser will pick up.
149 SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
150
151 StringRef InputSource = Buffer->getBuffer();
152 MarkupLexer Lex(InputSource);
153 MarkupParser Parser(Lex, SrcMgr);
154
155 SmallVector<MarkupTag, 4> TagStack;
156
157 for (int CurChar = Lex.getNextChar();
158 CurChar != EOF;
159 CurChar = Lex.getNextChar()) {
160 switch (CurChar) {
161 case '<': {
162 // A "<<" is output as a literal '<' and does not start a markup tag.
163 if (Lex.peekNextChar() == '<') {
164 (void)Lex.getNextChar();
165 break;
166 }
167 // Parse the markup entry.
168 TagStack.push_back(Parser.parseTag());
169
170 // Do any special handling for the start of a tag.
171 processStartTag(TagStack.back());
172 continue;
173 }
174 case '>': {
175 SMLoc Loc = SMLoc::getFromPointer(Lex.getPosition() - 1);
176 // A ">>" is output as a literal '>' and does not end a markup tag.
177 if (Lex.peekNextChar() == '>') {
178 (void)Lex.getNextChar();
179 break;
180 }
181 // Close out the innermost tag.
182 if (TagStack.empty())
183 Parser.FatalError(Loc, "'>' without matching '<'");
184
185 // Do any special handling for the end of a tag.
186 processEndTag(TagStack.back());
187
188 TagStack.pop_back();
189 continue;
190 }
191 default:
192 break;
193 }
194 // For anything else, just echo the character back out.
195 if (!DumpTags && CurChar != EOF)
196 outs() << (char)CurChar;
197 }
198
199 // If there are any unterminated markup tags, issue diagnostics for them.
200 while (!TagStack.empty()) {
201 MarkupTag &Tag = TagStack.back();
202 SrcMgr.PrintMessage(Tag.getLoc(), SourceMgr::DK_Error,
203 "unterminated markup tag");
204 TagStack.pop_back();
205 }
206}
207
208int main(int argc, char **argv) {
209 // Print a stack trace if we signal out.
210 sys::PrintStackTraceOnErrorSignal();
211 PrettyStackTraceProgram X(argc, argv);
212
213 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
214 cl::ParseCommandLineOptions(argc, argv, "llvm MC markup parser\n");
215
216 ToolName = argv[0];
217
218 // If no input files specified, read from stdin.
219 if (InputFilenames.size() == 0)
220 InputFilenames.push_back("-");
221
222 std::for_each(InputFilenames.begin(), InputFilenames.end(),
223 parseMCMarkup);
224 return 0;
225}