Manuel Klimek | c4850c9 | 2011-12-20 09:26:26 +0000 | [diff] [blame] | 1 | //===--- JSONParser.cpp - Simple JSON parser ------------------------------===// |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file implements a JSON parser. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "llvm/Support/JSONParser.h" |
| 15 | |
| 16 | #include "llvm/ADT/Twine.h" |
| 17 | #include "llvm/Support/Casting.h" |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 18 | #include "llvm/Support/MemoryBuffer.h" |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 19 | |
Manuel Klimek | c4850c9 | 2011-12-20 09:26:26 +0000 | [diff] [blame] | 20 | using namespace llvm; |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 21 | |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 22 | JSONParser::JSONParser(StringRef Input, SourceMgr *SM) |
| 23 | : SM(SM), Failed(false) { |
| 24 | InputBuffer = MemoryBuffer::getMemBuffer(Input, "JSON"); |
| 25 | SM->AddNewSourceBuffer(InputBuffer, SMLoc()); |
| 26 | End = InputBuffer->getBuffer().end(); |
| 27 | Position = InputBuffer->getBuffer().begin(); |
| 28 | } |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 29 | |
| 30 | JSONValue *JSONParser::parseRoot() { |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 31 | if (Position != InputBuffer->getBuffer().begin()) |
Sebastian Pop | 41a2600 | 2012-02-06 05:29:29 +0000 | [diff] [blame] | 32 | report_fatal_error("Cannot reuse JSONParser."); |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 33 | if (isWhitespace()) |
| 34 | nextNonWhitespace(); |
| 35 | if (errorIfAtEndOfFile("'[' or '{' at start of JSON text")) |
| 36 | return 0; |
| 37 | switch (*Position) { |
| 38 | case '[': |
| 39 | return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this); |
| 40 | case '{': |
| 41 | return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this); |
| 42 | default: |
| 43 | setExpectedError("'[' or '{' at start of JSON text", *Position); |
| 44 | return 0; |
| 45 | } |
| 46 | } |
| 47 | |
| 48 | bool JSONParser::validate() { |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 49 | JSONValue *Root = parseRoot(); |
| 50 | if (Root == NULL) { |
| 51 | return false; |
| 52 | } |
| 53 | return skip(*Root); |
Manuel Klimek | 9ce6937 | 2011-12-20 10:42:52 +0000 | [diff] [blame] | 54 | } |
| 55 | |
Manuel Klimek | 9ce6937 | 2011-12-20 10:42:52 +0000 | [diff] [blame] | 56 | bool JSONParser::skip(const JSONAtom &Atom) { |
| 57 | switch(Atom.getKind()) { |
Manuel Klimek | 44b920f | 2012-01-17 09:34:07 +0000 | [diff] [blame] | 58 | case JSONAtom::JK_Array: |
| 59 | case JSONAtom::JK_Object: |
| 60 | return skipContainer(*cast<JSONContainer>(&Atom)); |
| 61 | case JSONAtom::JK_String: |
| 62 | return true; |
Manuel Klimek | 9ce6937 | 2011-12-20 10:42:52 +0000 | [diff] [blame] | 63 | case JSONAtom::JK_KeyValuePair: |
| 64 | return skip(*cast<JSONKeyValuePair>(&Atom)->Value); |
| 65 | } |
| 66 | llvm_unreachable("Impossible enum value."); |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 67 | } |
| 68 | |
| 69 | // Sets the current error to: |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 70 | // "expected <Expected>, but found <Found>". |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 71 | void JSONParser::setExpectedError(StringRef Expected, StringRef Found) { |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 72 | SM->PrintMessage(SMLoc::getFromPointer(Position), SourceMgr::DK_Error, |
| 73 | "expected " + Expected + ", but found " + Found + ".", ArrayRef<SMRange>()); |
| 74 | Failed = true; |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 75 | } |
| 76 | |
| 77 | // Sets the current error to: |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 78 | // "expected <Expected>, but found <Found>". |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 79 | void JSONParser::setExpectedError(StringRef Expected, char Found) { |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 80 | setExpectedError(Expected, ("'" + StringRef(&Found, 1) + "'").str()); |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 81 | } |
| 82 | |
| 83 | // If there is no character available, returns true and sets the current error |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 84 | // to: "expected <Expected>, but found EOF.". |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 85 | bool JSONParser::errorIfAtEndOfFile(StringRef Expected) { |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 86 | if (Position == End) { |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 87 | setExpectedError(Expected, "EOF"); |
| 88 | return true; |
| 89 | } |
| 90 | return false; |
| 91 | } |
| 92 | |
| 93 | // Sets the current error if the current character is not C to: |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 94 | // "expected 'C', but got <current character>". |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 95 | bool JSONParser::errorIfNotAt(char C, StringRef Message) { |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 96 | if (*Position != C) { |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 97 | std::string Expected = |
| 98 | ("'" + StringRef(&C, 1) + "' " + Message).str(); |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 99 | if (Position == End) |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 100 | setExpectedError(Expected, "EOF"); |
| 101 | else |
| 102 | setExpectedError(Expected, *Position); |
| 103 | return true; |
| 104 | } |
| 105 | return false; |
| 106 | } |
| 107 | |
| 108 | // Forbidding inlining improves performance by roughly 20%. |
| 109 | // FIXME: Remove once llvm optimizes this to the faster version without hints. |
| 110 | LLVM_ATTRIBUTE_NOINLINE static bool |
| 111 | wasEscaped(StringRef::iterator First, StringRef::iterator Position); |
| 112 | |
| 113 | // Returns whether a character at 'Position' was escaped with a leading '\'. |
| 114 | // 'First' specifies the position of the first character in the string. |
| 115 | static bool wasEscaped(StringRef::iterator First, |
| 116 | StringRef::iterator Position) { |
| 117 | assert(Position - 1 >= First); |
| 118 | StringRef::iterator I = Position - 1; |
| 119 | // We calulate the number of consecutive '\'s before the current position |
| 120 | // by iterating backwards through our string. |
| 121 | while (I >= First && *I == '\\') --I; |
| 122 | // (Position - 1 - I) now contains the number of '\'s before the current |
| 123 | // position. If it is odd, the character at 'Positon' was escaped. |
| 124 | return (Position - 1 - I) % 2 == 1; |
| 125 | } |
| 126 | |
| 127 | // Parses a JSONString, assuming that the current position is on a quote. |
| 128 | JSONString *JSONParser::parseString() { |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 129 | assert(Position != End); |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 130 | assert(!isWhitespace()); |
| 131 | if (errorIfNotAt('"', "at start of string")) |
| 132 | return 0; |
| 133 | StringRef::iterator First = Position + 1; |
| 134 | |
| 135 | // Benchmarking shows that this loop is the hot path of the application with |
| 136 | // about 2/3rd of the runtime cycles. Since escaped quotes are not the common |
| 137 | // case, and multiple escaped backslashes before escaped quotes are very rare, |
| 138 | // we pessimize this case to achieve a smaller inner loop in the common case. |
| 139 | // We're doing that by having a quick inner loop that just scans for the next |
| 140 | // quote. Once we find the quote we check the last character to see whether |
| 141 | // the quote might have been escaped. If the last character is not a '\', we |
| 142 | // know the quote was not escaped and have thus found the end of the string. |
| 143 | // If the immediately preceding character was a '\', we have to scan backwards |
| 144 | // to see whether the previous character was actually an escaped backslash, or |
| 145 | // an escape character for the quote. If we find that the current quote was |
| 146 | // escaped, we continue parsing for the next quote and repeat. |
| 147 | // This optimization brings around 30% performance improvements. |
| 148 | do { |
| 149 | // Step over the current quote. |
| 150 | ++Position; |
| 151 | // Find the next quote. |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 152 | while (Position != End && *Position != '"') |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 153 | ++Position; |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 154 | if (errorIfAtEndOfFile("'\"' at end of string")) |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 155 | return 0; |
| 156 | // Repeat until the previous character was not a '\' or was an escaped |
| 157 | // backslash. |
| 158 | } while (*(Position - 1) == '\\' && wasEscaped(First, Position)); |
| 159 | |
| 160 | return new (ValueAllocator.Allocate<JSONString>()) |
| 161 | JSONString(StringRef(First, Position - First)); |
| 162 | } |
| 163 | |
| 164 | |
| 165 | // Advances the position to the next non-whitespace position. |
| 166 | void JSONParser::nextNonWhitespace() { |
| 167 | do { |
| 168 | ++Position; |
| 169 | } while (isWhitespace()); |
| 170 | } |
| 171 | |
| 172 | // Checks if there is a whitespace character at the current position. |
| 173 | bool JSONParser::isWhitespace() { |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 174 | return *Position == ' ' || *Position == '\t' || |
| 175 | *Position == '\n' || *Position == '\r'; |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 176 | } |
| 177 | |
| 178 | bool JSONParser::failed() const { |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 179 | return Failed; |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 180 | } |
| 181 | |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 182 | // Parses a JSONValue, assuming that the current position is at the first |
| 183 | // character of the value. |
| 184 | JSONValue *JSONParser::parseValue() { |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 185 | assert(Position != End); |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 186 | assert(!isWhitespace()); |
| 187 | switch (*Position) { |
| 188 | case '[': |
| 189 | return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this); |
| 190 | case '{': |
| 191 | return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this); |
| 192 | case '"': |
| 193 | return parseString(); |
| 194 | default: |
| 195 | setExpectedError("'[', '{' or '\"' at start of value", *Position); |
| 196 | return 0; |
| 197 | } |
| 198 | } |
| 199 | |
| 200 | // Parses a JSONKeyValuePair, assuming that the current position is at the first |
| 201 | // character of the key, value pair. |
| 202 | JSONKeyValuePair *JSONParser::parseKeyValuePair() { |
Manuel Klimek | 84cbb6f | 2011-12-21 18:16:39 +0000 | [diff] [blame] | 203 | assert(Position != End); |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 204 | assert(!isWhitespace()); |
| 205 | |
| 206 | JSONString *Key = parseString(); |
| 207 | if (Key == 0) |
| 208 | return 0; |
| 209 | |
| 210 | nextNonWhitespace(); |
| 211 | if (errorIfNotAt(':', "between key and value")) |
| 212 | return 0; |
| 213 | |
| 214 | nextNonWhitespace(); |
| 215 | const JSONValue *Value = parseValue(); |
| 216 | if (Value == 0) |
| 217 | return 0; |
| 218 | |
| 219 | return new (ValueAllocator.Allocate<JSONKeyValuePair>(1)) |
| 220 | JSONKeyValuePair(Key, Value); |
| 221 | } |
| 222 | |
Manuel Klimek | 44b920f | 2012-01-17 09:34:07 +0000 | [diff] [blame] | 223 | /// \brief Parses the first element of a JSON array or object, or closes the |
| 224 | /// array. |
| 225 | /// |
| 226 | /// The method assumes that the current position is before the first character |
| 227 | /// of the element, with possible white space in between. When successful, it |
| 228 | /// returns the new position after parsing the element. Otherwise, if there is |
| 229 | /// no next value, it returns a default constructed StringRef::iterator. |
| 230 | StringRef::iterator JSONParser::parseFirstElement(JSONAtom::Kind ContainerKind, |
| 231 | char StartChar, char EndChar, |
| 232 | const JSONAtom *&Element) { |
| 233 | assert(*Position == StartChar); |
| 234 | Element = 0; |
| 235 | nextNonWhitespace(); |
| 236 | if (errorIfAtEndOfFile("value or end of container at start of container")) |
| 237 | return StringRef::iterator(); |
| 238 | |
| 239 | if (*Position == EndChar) |
| 240 | return StringRef::iterator(); |
| 241 | |
| 242 | Element = parseElement(ContainerKind); |
| 243 | if (Element == 0) |
| 244 | return StringRef::iterator(); |
| 245 | |
| 246 | return Position; |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 247 | } |
| 248 | |
Manuel Klimek | 44b920f | 2012-01-17 09:34:07 +0000 | [diff] [blame] | 249 | /// \brief Parses the next element of a JSON array or object, or closes the |
| 250 | /// array. |
| 251 | /// |
| 252 | /// The method assumes that the current position is before the ',' which |
| 253 | /// separates the next element from the current element. When successful, it |
| 254 | /// returns the new position after parsing the element. Otherwise, if there is |
| 255 | /// no next value, it returns a default constructed StringRef::iterator. |
| 256 | StringRef::iterator JSONParser::parseNextElement(JSONAtom::Kind ContainerKind, |
| 257 | char EndChar, |
| 258 | const JSONAtom *&Element) { |
| 259 | Element = 0; |
| 260 | nextNonWhitespace(); |
| 261 | if (errorIfAtEndOfFile("',' or end of container for next element")) |
| 262 | return 0; |
| 263 | |
| 264 | if (*Position == ',') { |
| 265 | nextNonWhitespace(); |
| 266 | if (errorIfAtEndOfFile("element in container")) |
| 267 | return StringRef::iterator(); |
| 268 | |
| 269 | Element = parseElement(ContainerKind); |
| 270 | if (Element == 0) |
| 271 | return StringRef::iterator(); |
| 272 | |
| 273 | return Position; |
| 274 | } else if (*Position == EndChar) { |
| 275 | return StringRef::iterator(); |
| 276 | } else { |
| 277 | setExpectedError("',' or end of container for next element", *Position); |
| 278 | return StringRef::iterator(); |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | const JSONAtom *JSONParser::parseElement(JSONAtom::Kind ContainerKind) { |
| 283 | switch (ContainerKind) { |
| 284 | case JSONAtom::JK_Array: |
| 285 | return parseValue(); |
| 286 | case JSONAtom::JK_Object: |
| 287 | return parseKeyValuePair(); |
| 288 | default: |
| 289 | llvm_unreachable("Impossible code path"); |
| 290 | } |
| 291 | } |
| 292 | |
| 293 | bool JSONParser::skipContainer(const JSONContainer &Container) { |
| 294 | for (JSONContainer::AtomIterator I = Container.atom_current(), |
| 295 | E = Container.atom_end(); |
| 296 | I != E; ++I) { |
| 297 | assert(*I != 0); |
| 298 | if (!skip(**I)) |
| 299 | return false; |
| 300 | } |
| 301 | return !failed(); |
Manuel Klimek | 76f1301 | 2011-12-16 13:09:10 +0000 | [diff] [blame] | 302 | } |