blob: 5dfcf297a7ea78bcf2595098cb6eeb7b65b9191d [file] [log] [blame]
Manuel Klimekc4850c92011-12-20 09:26:26 +00001//===--- JSONParser.cpp - Simple JSON parser ------------------------------===//
Manuel Klimek76f13012011-12-16 13:09:10 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a JSON parser.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Support/JSONParser.h"
15
16#include "llvm/ADT/Twine.h"
17#include "llvm/Support/Casting.h"
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000018#include "llvm/Support/MemoryBuffer.h"
Manuel Klimek76f13012011-12-16 13:09:10 +000019
Manuel Klimekc4850c92011-12-20 09:26:26 +000020using namespace llvm;
Manuel Klimek76f13012011-12-16 13:09:10 +000021
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000022JSONParser::JSONParser(StringRef Input, SourceMgr *SM)
23 : SM(SM), Failed(false) {
24 InputBuffer = MemoryBuffer::getMemBuffer(Input, "JSON");
25 SM->AddNewSourceBuffer(InputBuffer, SMLoc());
26 End = InputBuffer->getBuffer().end();
27 Position = InputBuffer->getBuffer().begin();
28}
Manuel Klimek76f13012011-12-16 13:09:10 +000029
30JSONValue *JSONParser::parseRoot() {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000031 if (Position != InputBuffer->getBuffer().begin())
Sebastian Pop41a26002012-02-06 05:29:29 +000032 report_fatal_error("Cannot reuse JSONParser.");
Manuel Klimek76f13012011-12-16 13:09:10 +000033 if (isWhitespace())
34 nextNonWhitespace();
35 if (errorIfAtEndOfFile("'[' or '{' at start of JSON text"))
36 return 0;
37 switch (*Position) {
38 case '[':
39 return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this);
40 case '{':
41 return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this);
42 default:
43 setExpectedError("'[' or '{' at start of JSON text", *Position);
44 return 0;
45 }
46}
47
48bool JSONParser::validate() {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000049 JSONValue *Root = parseRoot();
50 if (Root == NULL) {
51 return false;
52 }
53 return skip(*Root);
Manuel Klimek9ce69372011-12-20 10:42:52 +000054}
55
Manuel Klimek9ce69372011-12-20 10:42:52 +000056bool JSONParser::skip(const JSONAtom &Atom) {
57 switch(Atom.getKind()) {
Manuel Klimek44b920f2012-01-17 09:34:07 +000058 case JSONAtom::JK_Array:
59 case JSONAtom::JK_Object:
60 return skipContainer(*cast<JSONContainer>(&Atom));
61 case JSONAtom::JK_String:
62 return true;
Manuel Klimek9ce69372011-12-20 10:42:52 +000063 case JSONAtom::JK_KeyValuePair:
64 return skip(*cast<JSONKeyValuePair>(&Atom)->Value);
65 }
66 llvm_unreachable("Impossible enum value.");
Manuel Klimek76f13012011-12-16 13:09:10 +000067}
68
69// Sets the current error to:
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000070// "expected <Expected>, but found <Found>".
Manuel Klimek76f13012011-12-16 13:09:10 +000071void JSONParser::setExpectedError(StringRef Expected, StringRef Found) {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000072 SM->PrintMessage(SMLoc::getFromPointer(Position), SourceMgr::DK_Error,
73 "expected " + Expected + ", but found " + Found + ".", ArrayRef<SMRange>());
74 Failed = true;
Manuel Klimek76f13012011-12-16 13:09:10 +000075}
76
77// Sets the current error to:
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000078// "expected <Expected>, but found <Found>".
Manuel Klimek76f13012011-12-16 13:09:10 +000079void JSONParser::setExpectedError(StringRef Expected, char Found) {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000080 setExpectedError(Expected, ("'" + StringRef(&Found, 1) + "'").str());
Manuel Klimek76f13012011-12-16 13:09:10 +000081}
82
83// If there is no character available, returns true and sets the current error
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000084// to: "expected <Expected>, but found EOF.".
Manuel Klimek76f13012011-12-16 13:09:10 +000085bool JSONParser::errorIfAtEndOfFile(StringRef Expected) {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000086 if (Position == End) {
Manuel Klimek76f13012011-12-16 13:09:10 +000087 setExpectedError(Expected, "EOF");
88 return true;
89 }
90 return false;
91}
92
93// Sets the current error if the current character is not C to:
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000094// "expected 'C', but got <current character>".
Manuel Klimek76f13012011-12-16 13:09:10 +000095bool JSONParser::errorIfNotAt(char C, StringRef Message) {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000096 if (*Position != C) {
Manuel Klimek76f13012011-12-16 13:09:10 +000097 std::string Expected =
98 ("'" + StringRef(&C, 1) + "' " + Message).str();
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000099 if (Position == End)
Manuel Klimek76f13012011-12-16 13:09:10 +0000100 setExpectedError(Expected, "EOF");
101 else
102 setExpectedError(Expected, *Position);
103 return true;
104 }
105 return false;
106}
107
108// Forbidding inlining improves performance by roughly 20%.
109// FIXME: Remove once llvm optimizes this to the faster version without hints.
110LLVM_ATTRIBUTE_NOINLINE static bool
111wasEscaped(StringRef::iterator First, StringRef::iterator Position);
112
113// Returns whether a character at 'Position' was escaped with a leading '\'.
114// 'First' specifies the position of the first character in the string.
115static bool wasEscaped(StringRef::iterator First,
116 StringRef::iterator Position) {
117 assert(Position - 1 >= First);
118 StringRef::iterator I = Position - 1;
119 // We calulate the number of consecutive '\'s before the current position
120 // by iterating backwards through our string.
121 while (I >= First && *I == '\\') --I;
122 // (Position - 1 - I) now contains the number of '\'s before the current
123 // position. If it is odd, the character at 'Positon' was escaped.
124 return (Position - 1 - I) % 2 == 1;
125}
126
127// Parses a JSONString, assuming that the current position is on a quote.
128JSONString *JSONParser::parseString() {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +0000129 assert(Position != End);
Manuel Klimek76f13012011-12-16 13:09:10 +0000130 assert(!isWhitespace());
131 if (errorIfNotAt('"', "at start of string"))
132 return 0;
133 StringRef::iterator First = Position + 1;
134
135 // Benchmarking shows that this loop is the hot path of the application with
136 // about 2/3rd of the runtime cycles. Since escaped quotes are not the common
137 // case, and multiple escaped backslashes before escaped quotes are very rare,
138 // we pessimize this case to achieve a smaller inner loop in the common case.
139 // We're doing that by having a quick inner loop that just scans for the next
140 // quote. Once we find the quote we check the last character to see whether
141 // the quote might have been escaped. If the last character is not a '\', we
142 // know the quote was not escaped and have thus found the end of the string.
143 // If the immediately preceding character was a '\', we have to scan backwards
144 // to see whether the previous character was actually an escaped backslash, or
145 // an escape character for the quote. If we find that the current quote was
146 // escaped, we continue parsing for the next quote and repeat.
147 // This optimization brings around 30% performance improvements.
148 do {
149 // Step over the current quote.
150 ++Position;
151 // Find the next quote.
Manuel Klimek84cbb6f2011-12-21 18:16:39 +0000152 while (Position != End && *Position != '"')
Manuel Klimek76f13012011-12-16 13:09:10 +0000153 ++Position;
Manuel Klimek84cbb6f2011-12-21 18:16:39 +0000154 if (errorIfAtEndOfFile("'\"' at end of string"))
Manuel Klimek76f13012011-12-16 13:09:10 +0000155 return 0;
156 // Repeat until the previous character was not a '\' or was an escaped
157 // backslash.
158 } while (*(Position - 1) == '\\' && wasEscaped(First, Position));
159
160 return new (ValueAllocator.Allocate<JSONString>())
161 JSONString(StringRef(First, Position - First));
162}
163
164
165// Advances the position to the next non-whitespace position.
166void JSONParser::nextNonWhitespace() {
167 do {
168 ++Position;
169 } while (isWhitespace());
170}
171
172// Checks if there is a whitespace character at the current position.
173bool JSONParser::isWhitespace() {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +0000174 return *Position == ' ' || *Position == '\t' ||
175 *Position == '\n' || *Position == '\r';
Manuel Klimek76f13012011-12-16 13:09:10 +0000176}
177
178bool JSONParser::failed() const {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +0000179 return Failed;
Manuel Klimek76f13012011-12-16 13:09:10 +0000180}
181
Manuel Klimek76f13012011-12-16 13:09:10 +0000182// Parses a JSONValue, assuming that the current position is at the first
183// character of the value.
184JSONValue *JSONParser::parseValue() {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +0000185 assert(Position != End);
Manuel Klimek76f13012011-12-16 13:09:10 +0000186 assert(!isWhitespace());
187 switch (*Position) {
188 case '[':
189 return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this);
190 case '{':
191 return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this);
192 case '"':
193 return parseString();
194 default:
195 setExpectedError("'[', '{' or '\"' at start of value", *Position);
196 return 0;
197 }
198}
199
200// Parses a JSONKeyValuePair, assuming that the current position is at the first
201// character of the key, value pair.
202JSONKeyValuePair *JSONParser::parseKeyValuePair() {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +0000203 assert(Position != End);
Manuel Klimek76f13012011-12-16 13:09:10 +0000204 assert(!isWhitespace());
205
206 JSONString *Key = parseString();
207 if (Key == 0)
208 return 0;
209
210 nextNonWhitespace();
211 if (errorIfNotAt(':', "between key and value"))
212 return 0;
213
214 nextNonWhitespace();
215 const JSONValue *Value = parseValue();
216 if (Value == 0)
217 return 0;
218
219 return new (ValueAllocator.Allocate<JSONKeyValuePair>(1))
220 JSONKeyValuePair(Key, Value);
221}
222
Manuel Klimek44b920f2012-01-17 09:34:07 +0000223/// \brief Parses the first element of a JSON array or object, or closes the
224/// array.
225///
226/// The method assumes that the current position is before the first character
227/// of the element, with possible white space in between. When successful, it
228/// returns the new position after parsing the element. Otherwise, if there is
229/// no next value, it returns a default constructed StringRef::iterator.
230StringRef::iterator JSONParser::parseFirstElement(JSONAtom::Kind ContainerKind,
231 char StartChar, char EndChar,
232 const JSONAtom *&Element) {
233 assert(*Position == StartChar);
234 Element = 0;
235 nextNonWhitespace();
236 if (errorIfAtEndOfFile("value or end of container at start of container"))
237 return StringRef::iterator();
238
239 if (*Position == EndChar)
240 return StringRef::iterator();
241
242 Element = parseElement(ContainerKind);
243 if (Element == 0)
244 return StringRef::iterator();
245
246 return Position;
Manuel Klimek76f13012011-12-16 13:09:10 +0000247}
248
Manuel Klimek44b920f2012-01-17 09:34:07 +0000249/// \brief Parses the next element of a JSON array or object, or closes the
250/// array.
251///
252/// The method assumes that the current position is before the ',' which
253/// separates the next element from the current element. When successful, it
254/// returns the new position after parsing the element. Otherwise, if there is
255/// no next value, it returns a default constructed StringRef::iterator.
256StringRef::iterator JSONParser::parseNextElement(JSONAtom::Kind ContainerKind,
257 char EndChar,
258 const JSONAtom *&Element) {
259 Element = 0;
260 nextNonWhitespace();
261 if (errorIfAtEndOfFile("',' or end of container for next element"))
262 return 0;
263
264 if (*Position == ',') {
265 nextNonWhitespace();
266 if (errorIfAtEndOfFile("element in container"))
267 return StringRef::iterator();
268
269 Element = parseElement(ContainerKind);
270 if (Element == 0)
271 return StringRef::iterator();
272
273 return Position;
274 } else if (*Position == EndChar) {
275 return StringRef::iterator();
276 } else {
277 setExpectedError("',' or end of container for next element", *Position);
278 return StringRef::iterator();
279 }
280}
281
282const JSONAtom *JSONParser::parseElement(JSONAtom::Kind ContainerKind) {
283 switch (ContainerKind) {
284 case JSONAtom::JK_Array:
285 return parseValue();
286 case JSONAtom::JK_Object:
287 return parseKeyValuePair();
288 default:
289 llvm_unreachable("Impossible code path");
290 }
291}
292
293bool JSONParser::skipContainer(const JSONContainer &Container) {
294 for (JSONContainer::AtomIterator I = Container.atom_current(),
295 E = Container.atom_end();
296 I != E; ++I) {
297 assert(*I != 0);
298 if (!skip(**I))
299 return false;
300 }
301 return !failed();
Manuel Klimek76f13012011-12-16 13:09:10 +0000302}