blob: 205ac05c6f305e185a91a0d081ba8d6fb3721bae [file] [log] [blame]
Manuel Klimekc4850c92011-12-20 09:26:26 +00001//===--- JSONParser.cpp - Simple JSON parser ------------------------------===//
Manuel Klimek76f13012011-12-16 13:09:10 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a JSON parser.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Support/JSONParser.h"
15
16#include "llvm/ADT/Twine.h"
17#include "llvm/Support/Casting.h"
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000018#include "llvm/Support/MemoryBuffer.h"
Manuel Klimek76f13012011-12-16 13:09:10 +000019
Manuel Klimekc4850c92011-12-20 09:26:26 +000020using namespace llvm;
Manuel Klimek76f13012011-12-16 13:09:10 +000021
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000022JSONParser::JSONParser(StringRef Input, SourceMgr *SM)
23 : SM(SM), Failed(false) {
24 InputBuffer = MemoryBuffer::getMemBuffer(Input, "JSON");
25 SM->AddNewSourceBuffer(InputBuffer, SMLoc());
26 End = InputBuffer->getBuffer().end();
27 Position = InputBuffer->getBuffer().begin();
28}
Manuel Klimek76f13012011-12-16 13:09:10 +000029
30JSONValue *JSONParser::parseRoot() {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000031 if (Position != InputBuffer->getBuffer().begin())
Manuel Klimek76f13012011-12-16 13:09:10 +000032 report_fatal_error("Cannot resuse JSONParser.");
33 if (isWhitespace())
34 nextNonWhitespace();
35 if (errorIfAtEndOfFile("'[' or '{' at start of JSON text"))
36 return 0;
37 switch (*Position) {
38 case '[':
39 return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this);
40 case '{':
41 return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this);
42 default:
43 setExpectedError("'[' or '{' at start of JSON text", *Position);
44 return 0;
45 }
46}
47
48bool JSONParser::validate() {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000049 JSONValue *Root = parseRoot();
50 if (Root == NULL) {
51 return false;
52 }
53 return skip(*Root);
Manuel Klimek9ce69372011-12-20 10:42:52 +000054}
55
Manuel Klimek9ce69372011-12-20 10:42:52 +000056bool JSONParser::skip(const JSONAtom &Atom) {
57 switch(Atom.getKind()) {
58 case JSONAtom::JK_Array: return skipContainer(*cast<JSONArray>(&Atom));
59 case JSONAtom::JK_Object: return skipContainer(*cast<JSONObject>(&Atom));
60 case JSONAtom::JK_String: return true;
61 case JSONAtom::JK_KeyValuePair:
62 return skip(*cast<JSONKeyValuePair>(&Atom)->Value);
63 }
64 llvm_unreachable("Impossible enum value.");
Manuel Klimek76f13012011-12-16 13:09:10 +000065}
66
67// Sets the current error to:
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000068// "expected <Expected>, but found <Found>".
Manuel Klimek76f13012011-12-16 13:09:10 +000069void JSONParser::setExpectedError(StringRef Expected, StringRef Found) {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000070 SM->PrintMessage(SMLoc::getFromPointer(Position), SourceMgr::DK_Error,
71 "expected " + Expected + ", but found " + Found + ".", ArrayRef<SMRange>());
72 Failed = true;
Manuel Klimek76f13012011-12-16 13:09:10 +000073}
74
75// Sets the current error to:
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000076// "expected <Expected>, but found <Found>".
Manuel Klimek76f13012011-12-16 13:09:10 +000077void JSONParser::setExpectedError(StringRef Expected, char Found) {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000078 setExpectedError(Expected, ("'" + StringRef(&Found, 1) + "'").str());
Manuel Klimek76f13012011-12-16 13:09:10 +000079}
80
81// If there is no character available, returns true and sets the current error
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000082// to: "expected <Expected>, but found EOF.".
Manuel Klimek76f13012011-12-16 13:09:10 +000083bool JSONParser::errorIfAtEndOfFile(StringRef Expected) {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000084 if (Position == End) {
Manuel Klimek76f13012011-12-16 13:09:10 +000085 setExpectedError(Expected, "EOF");
86 return true;
87 }
88 return false;
89}
90
91// Sets the current error if the current character is not C to:
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000092// "expected 'C', but got <current character>".
Manuel Klimek76f13012011-12-16 13:09:10 +000093bool JSONParser::errorIfNotAt(char C, StringRef Message) {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000094 if (*Position != C) {
Manuel Klimek76f13012011-12-16 13:09:10 +000095 std::string Expected =
96 ("'" + StringRef(&C, 1) + "' " + Message).str();
Manuel Klimek84cbb6f2011-12-21 18:16:39 +000097 if (Position == End)
Manuel Klimek76f13012011-12-16 13:09:10 +000098 setExpectedError(Expected, "EOF");
99 else
100 setExpectedError(Expected, *Position);
101 return true;
102 }
103 return false;
104}
105
106// Forbidding inlining improves performance by roughly 20%.
107// FIXME: Remove once llvm optimizes this to the faster version without hints.
108LLVM_ATTRIBUTE_NOINLINE static bool
109wasEscaped(StringRef::iterator First, StringRef::iterator Position);
110
111// Returns whether a character at 'Position' was escaped with a leading '\'.
112// 'First' specifies the position of the first character in the string.
113static bool wasEscaped(StringRef::iterator First,
114 StringRef::iterator Position) {
115 assert(Position - 1 >= First);
116 StringRef::iterator I = Position - 1;
117 // We calulate the number of consecutive '\'s before the current position
118 // by iterating backwards through our string.
119 while (I >= First && *I == '\\') --I;
120 // (Position - 1 - I) now contains the number of '\'s before the current
121 // position. If it is odd, the character at 'Positon' was escaped.
122 return (Position - 1 - I) % 2 == 1;
123}
124
125// Parses a JSONString, assuming that the current position is on a quote.
126JSONString *JSONParser::parseString() {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +0000127 assert(Position != End);
Manuel Klimek76f13012011-12-16 13:09:10 +0000128 assert(!isWhitespace());
129 if (errorIfNotAt('"', "at start of string"))
130 return 0;
131 StringRef::iterator First = Position + 1;
132
133 // Benchmarking shows that this loop is the hot path of the application with
134 // about 2/3rd of the runtime cycles. Since escaped quotes are not the common
135 // case, and multiple escaped backslashes before escaped quotes are very rare,
136 // we pessimize this case to achieve a smaller inner loop in the common case.
137 // We're doing that by having a quick inner loop that just scans for the next
138 // quote. Once we find the quote we check the last character to see whether
139 // the quote might have been escaped. If the last character is not a '\', we
140 // know the quote was not escaped and have thus found the end of the string.
141 // If the immediately preceding character was a '\', we have to scan backwards
142 // to see whether the previous character was actually an escaped backslash, or
143 // an escape character for the quote. If we find that the current quote was
144 // escaped, we continue parsing for the next quote and repeat.
145 // This optimization brings around 30% performance improvements.
146 do {
147 // Step over the current quote.
148 ++Position;
149 // Find the next quote.
Manuel Klimek84cbb6f2011-12-21 18:16:39 +0000150 while (Position != End && *Position != '"')
Manuel Klimek76f13012011-12-16 13:09:10 +0000151 ++Position;
Manuel Klimek84cbb6f2011-12-21 18:16:39 +0000152 if (errorIfAtEndOfFile("'\"' at end of string"))
Manuel Klimek76f13012011-12-16 13:09:10 +0000153 return 0;
154 // Repeat until the previous character was not a '\' or was an escaped
155 // backslash.
156 } while (*(Position - 1) == '\\' && wasEscaped(First, Position));
157
158 return new (ValueAllocator.Allocate<JSONString>())
159 JSONString(StringRef(First, Position - First));
160}
161
162
163// Advances the position to the next non-whitespace position.
164void JSONParser::nextNonWhitespace() {
165 do {
166 ++Position;
167 } while (isWhitespace());
168}
169
170// Checks if there is a whitespace character at the current position.
171bool JSONParser::isWhitespace() {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +0000172 return *Position == ' ' || *Position == '\t' ||
173 *Position == '\n' || *Position == '\r';
Manuel Klimek76f13012011-12-16 13:09:10 +0000174}
175
176bool JSONParser::failed() const {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +0000177 return Failed;
Manuel Klimek76f13012011-12-16 13:09:10 +0000178}
179
Manuel Klimek76f13012011-12-16 13:09:10 +0000180// Parses a JSONValue, assuming that the current position is at the first
181// character of the value.
182JSONValue *JSONParser::parseValue() {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +0000183 assert(Position != End);
Manuel Klimek76f13012011-12-16 13:09:10 +0000184 assert(!isWhitespace());
185 switch (*Position) {
186 case '[':
187 return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this);
188 case '{':
189 return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this);
190 case '"':
191 return parseString();
192 default:
193 setExpectedError("'[', '{' or '\"' at start of value", *Position);
194 return 0;
195 }
196}
197
198// Parses a JSONKeyValuePair, assuming that the current position is at the first
199// character of the key, value pair.
200JSONKeyValuePair *JSONParser::parseKeyValuePair() {
Manuel Klimek84cbb6f2011-12-21 18:16:39 +0000201 assert(Position != End);
Manuel Klimek76f13012011-12-16 13:09:10 +0000202 assert(!isWhitespace());
203
204 JSONString *Key = parseString();
205 if (Key == 0)
206 return 0;
207
208 nextNonWhitespace();
209 if (errorIfNotAt(':', "between key and value"))
210 return 0;
211
212 nextNonWhitespace();
213 const JSONValue *Value = parseValue();
214 if (Value == 0)
215 return 0;
216
217 return new (ValueAllocator.Allocate<JSONKeyValuePair>(1))
218 JSONKeyValuePair(Key, Value);
219}
220
221template <> JSONValue *JSONParser::parseElement() {
222 return parseValue();
223}
224
225template <> JSONKeyValuePair *JSONParser::parseElement() {
226 return parseKeyValuePair();
227}