blob: 30c2afcdfc332e5cc6bd2d4c14edbbe482d747ba [file] [log] [blame]
Manuel Klimek76f13012011-12-16 13:09:10 +00001//===--- JsonParser.cpp - Simple JSON parser ------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a JSON parser.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Support/JSONParser.h"
15
16#include "llvm/ADT/Twine.h"
17#include "llvm/Support/Casting.h"
18
19namespace llvm {
20
21JSONParser::JSONParser(StringRef Input)
22 : Input(Input), Position(Input.begin()) {}
23
24JSONValue *JSONParser::parseRoot() {
25 if (Position != Input.begin())
26 report_fatal_error("Cannot resuse JSONParser.");
27 if (isWhitespace())
28 nextNonWhitespace();
29 if (errorIfAtEndOfFile("'[' or '{' at start of JSON text"))
30 return 0;
31 switch (*Position) {
32 case '[':
33 return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this);
34 case '{':
35 return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this);
36 default:
37 setExpectedError("'[' or '{' at start of JSON text", *Position);
38 return 0;
39 }
40}
41
42bool JSONParser::validate() {
43 return parseRoot()->skip();
44}
45
46// Sets the current error to:
47// "Error while parsing JSON: expected <Expected>, but found <Found>".
48void JSONParser::setExpectedError(StringRef Expected, StringRef Found) {
49 ErrorMessage = ("Error while parsing JSON: expected " +
50 Expected + ", but found " + Found + ".").str();
51}
52
53// Sets the current error to:
54// "Error while parsing JSON: expected <Expected>, but found <Found>".
55void JSONParser::setExpectedError(StringRef Expected, char Found) {
56 setExpectedError(Expected, StringRef(&Found, 1));
57}
58
59// If there is no character available, returns true and sets the current error
60// to: "Error while parsing JSON: expected <Expected>, but found EOF.".
61bool JSONParser::errorIfAtEndOfFile(StringRef Expected) {
62 if (Position == Input.end()) {
63 setExpectedError(Expected, "EOF");
64 return true;
65 }
66 return false;
67}
68
69// Sets the current error if the current character is not C to:
70// "Error while parsing JSON: expected 'C', but got <current character>".
71bool JSONParser::errorIfNotAt(char C, StringRef Message) {
72 if (Position == Input.end() || *Position != C) {
73 std::string Expected =
74 ("'" + StringRef(&C, 1) + "' " + Message).str();
75 if (Position == Input.end())
76 setExpectedError(Expected, "EOF");
77 else
78 setExpectedError(Expected, *Position);
79 return true;
80 }
81 return false;
82}
83
84// Forbidding inlining improves performance by roughly 20%.
85// FIXME: Remove once llvm optimizes this to the faster version without hints.
86LLVM_ATTRIBUTE_NOINLINE static bool
87wasEscaped(StringRef::iterator First, StringRef::iterator Position);
88
89// Returns whether a character at 'Position' was escaped with a leading '\'.
90// 'First' specifies the position of the first character in the string.
91static bool wasEscaped(StringRef::iterator First,
92 StringRef::iterator Position) {
93 assert(Position - 1 >= First);
94 StringRef::iterator I = Position - 1;
95 // We calulate the number of consecutive '\'s before the current position
96 // by iterating backwards through our string.
97 while (I >= First && *I == '\\') --I;
98 // (Position - 1 - I) now contains the number of '\'s before the current
99 // position. If it is odd, the character at 'Positon' was escaped.
100 return (Position - 1 - I) % 2 == 1;
101}
102
103// Parses a JSONString, assuming that the current position is on a quote.
104JSONString *JSONParser::parseString() {
105 assert(Position != Input.end());
106 assert(!isWhitespace());
107 if (errorIfNotAt('"', "at start of string"))
108 return 0;
109 StringRef::iterator First = Position + 1;
110
111 // Benchmarking shows that this loop is the hot path of the application with
112 // about 2/3rd of the runtime cycles. Since escaped quotes are not the common
113 // case, and multiple escaped backslashes before escaped quotes are very rare,
114 // we pessimize this case to achieve a smaller inner loop in the common case.
115 // We're doing that by having a quick inner loop that just scans for the next
116 // quote. Once we find the quote we check the last character to see whether
117 // the quote might have been escaped. If the last character is not a '\', we
118 // know the quote was not escaped and have thus found the end of the string.
119 // If the immediately preceding character was a '\', we have to scan backwards
120 // to see whether the previous character was actually an escaped backslash, or
121 // an escape character for the quote. If we find that the current quote was
122 // escaped, we continue parsing for the next quote and repeat.
123 // This optimization brings around 30% performance improvements.
124 do {
125 // Step over the current quote.
126 ++Position;
127 // Find the next quote.
128 while (Position != Input.end() && *Position != '"')
129 ++Position;
130 if (errorIfAtEndOfFile("\" at end of string"))
131 return 0;
132 // Repeat until the previous character was not a '\' or was an escaped
133 // backslash.
134 } while (*(Position - 1) == '\\' && wasEscaped(First, Position));
135
136 return new (ValueAllocator.Allocate<JSONString>())
137 JSONString(StringRef(First, Position - First));
138}
139
140
141// Advances the position to the next non-whitespace position.
142void JSONParser::nextNonWhitespace() {
143 do {
144 ++Position;
145 } while (isWhitespace());
146}
147
148// Checks if there is a whitespace character at the current position.
149bool JSONParser::isWhitespace() {
150 return Position != Input.end() && (*Position == ' ' || *Position == '\t' ||
151 *Position == '\n' || *Position == '\r');
152}
153
154bool JSONParser::failed() const {
155 return !ErrorMessage.empty();
156}
157
158std::string JSONParser::getErrorMessage() const {
159 return ErrorMessage;
160}
161
162bool JSONAtom::skip() const {
163 switch (MyKind) {
164 case JK_Array: return cast<JSONArray>(this)->skip();
165 case JK_Object: return cast<JSONObject>(this)->skip();
166 case JK_String: return cast<JSONString>(this)->skip();
167 case JK_KeyValuePair: return cast<JSONKeyValuePair>(this)->skip();
168 }
169 llvm_unreachable("Impossible enum value.");
170}
171
172// Parses a JSONValue, assuming that the current position is at the first
173// character of the value.
174JSONValue *JSONParser::parseValue() {
175 assert(Position != Input.end());
176 assert(!isWhitespace());
177 switch (*Position) {
178 case '[':
179 return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this);
180 case '{':
181 return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this);
182 case '"':
183 return parseString();
184 default:
185 setExpectedError("'[', '{' or '\"' at start of value", *Position);
186 return 0;
187 }
188}
189
190// Parses a JSONKeyValuePair, assuming that the current position is at the first
191// character of the key, value pair.
192JSONKeyValuePair *JSONParser::parseKeyValuePair() {
193 assert(Position != Input.end());
194 assert(!isWhitespace());
195
196 JSONString *Key = parseString();
197 if (Key == 0)
198 return 0;
199
200 nextNonWhitespace();
201 if (errorIfNotAt(':', "between key and value"))
202 return 0;
203
204 nextNonWhitespace();
205 const JSONValue *Value = parseValue();
206 if (Value == 0)
207 return 0;
208
209 return new (ValueAllocator.Allocate<JSONKeyValuePair>(1))
210 JSONKeyValuePair(Key, Value);
211}
212
213template <> JSONValue *JSONParser::parseElement() {
214 return parseValue();
215}
216
217template <> JSONKeyValuePair *JSONParser::parseElement() {
218 return parseKeyValuePair();
219}
220
221} // end namespace llvm