blob: 8d0b5dce0e76d3aba33d32296d39820dcc3aaa3a [file] [log] [blame]
Michael Clarkf0d08882007-03-13 08:26:18 +00001/*
Michael Clarka850f8e2007-03-13 08:26:26 +00002 * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
Michael Clarkf0d08882007-03-13 08:26:18 +00003 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00004 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
Michael Clarkf0d08882007-03-13 08:26:18 +00005 * Michael Clark <michael@metaparadigm.com>
6 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00007 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the MIT license. See COPYING for details.
Michael Clarkf0d08882007-03-13 08:26:18 +00009 *
Michael Clark95f55a72009-04-27 08:16:58 +000010 *
11 * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
12 * The copyrights to the contents of this file are licensed under the MIT License
13 * (http://www.opensource.org/licenses/mit-license.php)
Michael Clarkf0d08882007-03-13 08:26:18 +000014 */
15
Michael Clark4504df72007-03-13 08:26:20 +000016#include "config.h"
17
Michael Clarkf0d08882007-03-13 08:26:18 +000018#include <stdio.h>
19#include <stdlib.h>
Michael Clarkc8f4a6e2007-12-07 02:44:24 +000020#include <stddef.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000021#include <ctype.h>
22#include <string.h>
23
24#include "bits.h"
25#include "debug.h"
26#include "printbuf.h"
27#include "arraylist.h"
28#include "json_object.h"
29#include "json_tokener.h"
30
Michael Clarka850f8e2007-03-13 08:26:26 +000031
Michael Clark837240f2007-03-13 08:26:25 +000032#if !HAVE_STRNCASECMP && defined(_MSC_VER)
33 /* MSC has the version as _strnicmp */
34# define strncasecmp _strnicmp
35#elif !HAVE_STRNCASECMP
36# error You do not have strncasecmp on your system.
37#endif /* HAVE_STRNCASECMP */
38
39
Michael Clarka850f8e2007-03-13 08:26:26 +000040static const char* json_null_str = "null";
41static const char* json_true_str = "true";
42static const char* json_false_str = "false";
Michael Clarkf0d08882007-03-13 08:26:18 +000043
Michael Clarka850f8e2007-03-13 08:26:26 +000044const char* json_tokener_errors[] = {
45 "success",
46 "continue",
47 "nesting to deep",
48 "unexpected end of data",
49 "unexpected character",
50 "null expected",
51 "boolean expected",
52 "number expected",
53 "array value separator ',' expected",
54 "quoted object property name expected",
55 "object property name separator ':' expected",
56 "object value separator ',' expected",
57 "invalid string sequence",
58 "expected comment",
59};
60
Brent Miller126ad952009-08-20 06:50:22 +000061/* Stuff for decoding unicode sequences */
62#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
63#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
64#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
65static unsigned char utf8_replacement_char[3] = { 0xEF, 0xBF, 0xBD };
66
Michael Clarka850f8e2007-03-13 08:26:26 +000067
Michael Clarke8de0782009-02-25 01:45:00 +000068struct json_tokener* json_tokener_new(void)
Michael Clarkf0d08882007-03-13 08:26:18 +000069{
Michael Clarkaaec1ef2009-02-25 02:31:32 +000070 struct json_tokener *tok;
71
72 tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
Christopher Watford543bb142009-07-08 03:46:10 +000073 if (!tok) return NULL;
Michael Clarka850f8e2007-03-13 08:26:26 +000074 tok->pb = printbuf_new();
75 json_tokener_reset(tok);
76 return tok;
77}
78
79void json_tokener_free(struct json_tokener *tok)
80{
81 json_tokener_reset(tok);
82 if(tok) printbuf_free(tok->pb);
83 free(tok);
84}
85
86static void json_tokener_reset_level(struct json_tokener *tok, int depth)
87{
88 tok->stack[depth].state = json_tokener_state_eatws;
89 tok->stack[depth].saved_state = json_tokener_state_start;
90 json_object_put(tok->stack[depth].current);
91 tok->stack[depth].current = NULL;
92 free(tok->stack[depth].obj_field_name);
93 tok->stack[depth].obj_field_name = NULL;
94}
95
96void json_tokener_reset(struct json_tokener *tok)
97{
98 int i;
Michael Clark22dee7c2009-02-25 01:51:40 +000099 if (!tok)
100 return;
101
Michael Clarka850f8e2007-03-13 08:26:26 +0000102 for(i = tok->depth; i >= 0; i--)
103 json_tokener_reset_level(tok, i);
104 tok->depth = 0;
105 tok->err = json_tokener_success;
106}
107
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000108struct json_object* json_tokener_parse(const char *str)
Michael Clarka850f8e2007-03-13 08:26:26 +0000109{
110 struct json_tokener* tok;
Michael Clarkf0d08882007-03-13 08:26:18 +0000111 struct json_object* obj;
112
Michael Clarka850f8e2007-03-13 08:26:26 +0000113 tok = json_tokener_new();
114 obj = json_tokener_parse_ex(tok, str, -1);
115 if(tok->err != json_tokener_success)
Michael Clarkaaec1ef2009-02-25 02:31:32 +0000116 obj = (struct json_object*)error_ptr(-tok->err);
Michael Clarka850f8e2007-03-13 08:26:26 +0000117 json_tokener_free(tok);
Michael Clarkf0d08882007-03-13 08:26:18 +0000118 return obj;
119}
120
Michael Clarka850f8e2007-03-13 08:26:26 +0000121
Michael Clark4504df72007-03-13 08:26:20 +0000122#if !HAVE_STRNDUP
123/* CAW: compliant version of strndup() */
124char* strndup(const char* str, size_t n)
125{
Michael Clarka850f8e2007-03-13 08:26:26 +0000126 if(str) {
127 size_t len = strlen(str);
Michael Clark7fb9b032009-07-25 00:13:44 +0000128 size_t nn = json_min(len,n);
Michael Clarka850f8e2007-03-13 08:26:26 +0000129 char* s = (char*)malloc(sizeof(char) * (nn + 1));
Michael Clark4504df72007-03-13 08:26:20 +0000130
Michael Clarka850f8e2007-03-13 08:26:26 +0000131 if(s) {
132 memcpy(s, str, nn);
133 s[nn] = '\0';
134 }
Michael Clark4504df72007-03-13 08:26:20 +0000135
Michael Clarka850f8e2007-03-13 08:26:26 +0000136 return s;
137 }
Michael Clark4504df72007-03-13 08:26:20 +0000138
Michael Clarka850f8e2007-03-13 08:26:26 +0000139 return NULL;
Michael Clark4504df72007-03-13 08:26:20 +0000140}
141#endif
142
Michael Clarka850f8e2007-03-13 08:26:26 +0000143
144#define state tok->stack[tok->depth].state
145#define saved_state tok->stack[tok->depth].saved_state
146#define current tok->stack[tok->depth].current
147#define obj_field_name tok->stack[tok->depth].obj_field_name
148
Michael Clark95f55a72009-04-27 08:16:58 +0000149/* Optimization:
150 * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
151 * iterating character-by character. A large performance boost is
152 * achieved by using tighter loops to locally handle units such as
153 * comments and strings. Loops that handle an entire token within
154 * their scope also gather entire strings and pass them to
155 * printbuf_memappend() in a single call, rather than calling
156 * printbuf_memappend() one char at a time.
157 *
158 * POP_CHAR() and ADVANCE_CHAR() macros are used for code that is
159 * common to both the main loop and the tighter loops.
160 */
161
162/* POP_CHAR(dest, tok) macro:
163 * Not really a pop()...peeks at the current char and stores it in dest.
164 * Returns 1 on success, sets tok->err and returns 0 if no more chars.
165 * Implicit inputs: str, len vars
166 */
167#define POP_CHAR(dest, tok) \
168 (((tok)->char_offset == len) ? \
169 (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
170 (((tok)->err = json_tokener_success), 0) \
171 : \
172 (((tok)->err = json_tokener_continue), 0) \
173 ) : \
174 (((dest) = *str), 1) \
175 )
176
177/* ADVANCE_CHAR() macro:
178 * Incrementes str & tok->char_offset.
179 * For convenience of existing conditionals, returns the old value of c (0 on eof)
180 * Implicit inputs: c var
181 */
182#define ADVANCE_CHAR(str, tok) \
183 ( ++(str), ((tok)->char_offset)++, c)
184
Brent Miller126ad952009-08-20 06:50:22 +0000185
Michael Clark95f55a72009-04-27 08:16:58 +0000186/* End optimization macro defs */
187
188
Michael Clarka850f8e2007-03-13 08:26:26 +0000189struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000190 const char *str, int len)
Michael Clarkf0d08882007-03-13 08:26:18 +0000191{
Michael Clarka850f8e2007-03-13 08:26:26 +0000192 struct json_object *obj = NULL;
Michael Clark95f55a72009-04-27 08:16:58 +0000193 char c = '\1';
Michael Clarkf0d08882007-03-13 08:26:18 +0000194
Michael Clarka850f8e2007-03-13 08:26:26 +0000195 tok->char_offset = 0;
196 tok->err = json_tokener_success;
Michael Clarkf0d08882007-03-13 08:26:18 +0000197
Michael Clark95f55a72009-04-27 08:16:58 +0000198 while (POP_CHAR(c, tok)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000199
Michael Clarka850f8e2007-03-13 08:26:26 +0000200 redo_char:
Michael Clarkf0d08882007-03-13 08:26:18 +0000201 switch(state) {
202
203 case json_tokener_state_eatws:
Michael Clark95f55a72009-04-27 08:16:58 +0000204 /* Advance until we change state */
205 while (isspace(c)) {
206 if ((!ADVANCE_CHAR(str, tok)) || (!POP_CHAR(c, tok)))
207 goto out;
208 }
209 if(c == '/') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000210 printbuf_reset(tok->pb);
Michael Clark95f55a72009-04-27 08:16:58 +0000211 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000212 state = json_tokener_state_comment_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000213 } else {
214 state = saved_state;
Michael Clarka850f8e2007-03-13 08:26:26 +0000215 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000216 }
217 break;
218
219 case json_tokener_state_start:
220 switch(c) {
221 case '{':
222 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000223 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000224 current = json_object_new_object();
Michael Clarkf0d08882007-03-13 08:26:18 +0000225 break;
226 case '[':
227 state = json_tokener_state_eatws;
228 saved_state = json_tokener_state_array;
229 current = json_object_new_array();
Michael Clarkf0d08882007-03-13 08:26:18 +0000230 break;
231 case 'N':
232 case 'n':
233 state = json_tokener_state_null;
Michael Clarka850f8e2007-03-13 08:26:26 +0000234 printbuf_reset(tok->pb);
235 tok->st_pos = 0;
236 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000237 case '"':
238 case '\'':
Michael Clarkf0d08882007-03-13 08:26:18 +0000239 state = json_tokener_state_string;
Michael Clarka850f8e2007-03-13 08:26:26 +0000240 printbuf_reset(tok->pb);
241 tok->quote_char = c;
Michael Clarkf0d08882007-03-13 08:26:18 +0000242 break;
243 case 'T':
244 case 't':
245 case 'F':
246 case 'f':
247 state = json_tokener_state_boolean;
Michael Clarka850f8e2007-03-13 08:26:26 +0000248 printbuf_reset(tok->pb);
249 tok->st_pos = 0;
250 goto redo_char;
Michael Clark4504df72007-03-13 08:26:20 +0000251#if defined(__GNUC__)
252 case '0' ... '9':
253#else
254 case '0':
255 case '1':
256 case '2':
257 case '3':
258 case '4':
259 case '5':
260 case '6':
261 case '7':
262 case '8':
263 case '9':
264#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000265 case '-':
Michael Clarkf0d08882007-03-13 08:26:18 +0000266 state = json_tokener_state_number;
Michael Clarka850f8e2007-03-13 08:26:26 +0000267 printbuf_reset(tok->pb);
268 tok->is_double = 0;
269 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000270 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000271 tok->err = json_tokener_error_parse_unexpected;
Michael Clarkf0d08882007-03-13 08:26:18 +0000272 goto out;
273 }
274 break;
275
276 case json_tokener_state_finish:
Michael Clarka850f8e2007-03-13 08:26:26 +0000277 if(tok->depth == 0) goto out;
278 obj = json_object_get(current);
279 json_tokener_reset_level(tok, tok->depth);
280 tok->depth--;
281 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000282
283 case json_tokener_state_null:
Michael Clark95f55a72009-04-27 08:16:58 +0000284 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000285 if(strncasecmp(json_null_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000286 json_min(tok->st_pos+1, strlen(json_null_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000287 if(tok->st_pos == strlen(json_null_str)) {
288 current = NULL;
289 saved_state = json_tokener_state_finish;
290 state = json_tokener_state_eatws;
291 goto redo_char;
292 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000293 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000294 tok->err = json_tokener_error_parse_null;
295 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000296 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000297 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000298 break;
299
300 case json_tokener_state_comment_start:
301 if(c == '*') {
302 state = json_tokener_state_comment;
303 } else if(c == '/') {
304 state = json_tokener_state_comment_eol;
305 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000306 tok->err = json_tokener_error_parse_comment;
Michael Clarkf0d08882007-03-13 08:26:18 +0000307 goto out;
308 }
Michael Clark95f55a72009-04-27 08:16:58 +0000309 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000310 break;
311
312 case json_tokener_state_comment:
Michael Clark95f55a72009-04-27 08:16:58 +0000313 {
314 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000315 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000316 while(c != '*') {
317 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
318 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
319 goto out;
320 }
321 }
322 printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
323 state = json_tokener_state_comment_end;
324 }
325 break;
Michael Clarkf0d08882007-03-13 08:26:18 +0000326
327 case json_tokener_state_comment_eol:
Michael Clark95f55a72009-04-27 08:16:58 +0000328 {
329 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000330 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000331 while(c != '\n') {
332 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
333 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
334 goto out;
335 }
336 }
337 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Michael Clarkdfaf6702007-10-25 02:26:00 +0000338 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000339 state = json_tokener_state_eatws;
340 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000341 break;
342
343 case json_tokener_state_comment_end:
Michael Clark95f55a72009-04-27 08:16:58 +0000344 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000345 if(c == '/') {
Michael Clarkdfaf6702007-10-25 02:26:00 +0000346 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000347 state = json_tokener_state_eatws;
348 } else {
349 state = json_tokener_state_comment;
350 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000351 break;
352
353 case json_tokener_state_string:
Michael Clark95f55a72009-04-27 08:16:58 +0000354 {
355 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000356 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000357 while(1) {
358 if(c == tok->quote_char) {
359 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
360 current = json_object_new_string(tok->pb->buf);
361 saved_state = json_tokener_state_finish;
362 state = json_tokener_state_eatws;
363 break;
364 } else if(c == '\\') {
365 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
366 saved_state = json_tokener_state_string;
367 state = json_tokener_state_string_escape;
368 break;
369 }
370 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
371 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
372 goto out;
373 }
374 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000375 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000376 break;
377
378 case json_tokener_state_string_escape:
379 switch(c) {
380 case '"':
381 case '\\':
Michael Clarka850f8e2007-03-13 08:26:26 +0000382 case '/':
Michael Clark95f55a72009-04-27 08:16:58 +0000383 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000384 state = saved_state;
385 break;
386 case 'b':
387 case 'n':
388 case 'r':
389 case 't':
Michael Clark95f55a72009-04-27 08:16:58 +0000390 if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
391 else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
392 else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
393 else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000394 state = saved_state;
395 break;
396 case 'u':
Michael Clarka850f8e2007-03-13 08:26:26 +0000397 tok->ucs_char = 0;
398 tok->st_pos = 0;
Michael Clarkf0d08882007-03-13 08:26:18 +0000399 state = json_tokener_state_escape_unicode;
400 break;
401 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000402 tok->err = json_tokener_error_parse_string;
Michael Clarkf0d08882007-03-13 08:26:18 +0000403 goto out;
404 }
405 break;
406
407 case json_tokener_state_escape_unicode:
Michael Clark95f55a72009-04-27 08:16:58 +0000408 {
Brent Miller126ad952009-08-20 06:50:22 +0000409 unsigned int got_hi_surrogate = 0;
410
411 /* Handle a 4-byte sequence, or two sequences if a surrogate pair */
Michael Clark95f55a72009-04-27 08:16:58 +0000412 while(1) {
413 if(strchr(json_hex_chars, c)) {
414 tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
415 if(tok->st_pos == 4) {
Brent Miller126ad952009-08-20 06:50:22 +0000416 unsigned char unescaped_utf[4];
417
418 if (got_hi_surrogate) {
419 if (IS_LOW_SURROGATE(tok->ucs_char)) {
420 /* Recalculate the ucs_char, then fall thru to process normally */
421 tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char);
422 } else {
423 /* Hi surrogate was not followed by a low surrogate */
424 /* Replace the hi and process the rest normally */
425 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
426 }
427 got_hi_surrogate = 0;
428 }
429
Michael Clark95f55a72009-04-27 08:16:58 +0000430 if (tok->ucs_char < 0x80) {
Brent Miller126ad952009-08-20 06:50:22 +0000431 unescaped_utf[0] = tok->ucs_char;
432 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1);
Michael Clark95f55a72009-04-27 08:16:58 +0000433 } else if (tok->ucs_char < 0x800) {
Brent Miller126ad952009-08-20 06:50:22 +0000434 unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
435 unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
436 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2);
437 } else if (IS_HIGH_SURROGATE(tok->ucs_char)) {
438 /* Got a high surrogate. Remember it and look for the
439 * the beginning of another sequence, which should be the
440 * low surrogate.
441 */
442 got_hi_surrogate = tok->ucs_char;
443 /* Not at end, and the next two chars should be "\u" */
444 if ((tok->char_offset+1 != len) &&
445 (tok->char_offset+2 != len) &&
446 (str[1] == '\\') &&
447 (str[2] == 'u'))
448 {
449 ADVANCE_CHAR(str, tok);
450 ADVANCE_CHAR(str, tok);
451
452 /* Advance to the first char of the next sequence and
453 * continue processing with the next sequence.
454 */
455 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
456 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
457 goto out;
458 }
459 tok->ucs_char = 0;
460 tok->st_pos = 0;
461 continue; /* other json_tokener_state_escape_unicode */
462 } else {
463 /* Got a high surrogate without another sequence following
464 * it. Put a replacement char in for the hi surrogate
465 * and pretend we finished.
466 */
467 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
468 }
469 } else if (IS_LOW_SURROGATE(tok->ucs_char)) {
470 /* Got a low surrogate not preceded by a high */
471 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
472 } else if (tok->ucs_char < 0x10000) {
473 unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
474 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
475 unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
476 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3);
477 } else if (tok->ucs_char < 0x110000) {
478 unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
479 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
480 unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
481 unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
482 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4);
Michael Clark95f55a72009-04-27 08:16:58 +0000483 } else {
Brent Miller126ad952009-08-20 06:50:22 +0000484 /* Don't know what we got--insert the replacement char */
485 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
486 }
Michael Clark95f55a72009-04-27 08:16:58 +0000487 state = saved_state;
488 break;
489 }
490 } else {
491 tok->err = json_tokener_error_parse_string;
492 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000493 }
494 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
495 if (got_hi_surrogate) /* Clean up any pending chars */
496 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
Michael Clark95f55a72009-04-27 08:16:58 +0000497 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000498 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000499 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000500 }
501 break;
502
503 case json_tokener_state_boolean:
Michael Clark95f55a72009-04-27 08:16:58 +0000504 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000505 if(strncasecmp(json_true_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000506 json_min(tok->st_pos+1, strlen(json_true_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000507 if(tok->st_pos == strlen(json_true_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000508 current = json_object_new_boolean(1);
509 saved_state = json_tokener_state_finish;
510 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000511 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000512 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000513 } else if(strncasecmp(json_false_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000514 json_min(tok->st_pos+1, strlen(json_false_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000515 if(tok->st_pos == strlen(json_false_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000516 current = json_object_new_boolean(0);
517 saved_state = json_tokener_state_finish;
518 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000519 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000520 }
521 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000522 tok->err = json_tokener_error_parse_boolean;
Michael Clarkf0d08882007-03-13 08:26:18 +0000523 goto out;
524 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000525 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000526 break;
527
528 case json_tokener_state_number:
Michael Clark95f55a72009-04-27 08:16:58 +0000529 {
530 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000531 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000532 int case_len=0;
533 while(c && strchr(json_number_chars, c)) {
534 ++case_len;
535 if(c == '.' || c == 'e') tok->is_double = 1;
536 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
537 printbuf_memappend_fast(tok->pb, case_start, case_len);
538 goto out;
539 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000540 }
Michael Clark95f55a72009-04-27 08:16:58 +0000541 if (case_len>0)
542 printbuf_memappend_fast(tok->pb, case_start, case_len);
543 }
544 {
545 int numi;
546 double numd;
547 if(!tok->is_double && sscanf(tok->pb->buf, "%d", &numi) == 1) {
548 current = json_object_new_int(numi);
549 } else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
550 current = json_object_new_double(numd);
551 } else {
552 tok->err = json_tokener_error_parse_number;
553 goto out;
554 }
555 saved_state = json_tokener_state_finish;
556 state = json_tokener_state_eatws;
557 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000558 }
559 break;
560
561 case json_tokener_state_array:
562 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000563 saved_state = json_tokener_state_finish;
564 state = json_tokener_state_eatws;
565 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000566 if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
567 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000568 goto out;
569 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000570 state = json_tokener_state_array_add;
571 tok->depth++;
572 json_tokener_reset_level(tok, tok->depth);
573 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000574 }
575 break;
576
Michael Clarka850f8e2007-03-13 08:26:26 +0000577 case json_tokener_state_array_add:
578 json_object_array_add(current, obj);
579 saved_state = json_tokener_state_array_sep;
580 state = json_tokener_state_eatws;
581 goto redo_char;
582
Michael Clarkf0d08882007-03-13 08:26:18 +0000583 case json_tokener_state_array_sep:
584 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000585 saved_state = json_tokener_state_finish;
586 state = json_tokener_state_eatws;
587 } else if(c == ',') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000588 saved_state = json_tokener_state_array;
589 state = json_tokener_state_eatws;
590 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000591 tok->err = json_tokener_error_parse_array;
592 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000593 }
594 break;
595
Michael Clarkf0d08882007-03-13 08:26:18 +0000596 case json_tokener_state_object_field_start:
597 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000598 saved_state = json_tokener_state_finish;
599 state = json_tokener_state_eatws;
600 } else if (c == '"' || c == '\'') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000601 tok->quote_char = c;
602 printbuf_reset(tok->pb);
Michael Clarkf0d08882007-03-13 08:26:18 +0000603 state = json_tokener_state_object_field;
Michael Clark0370baa2007-03-13 08:26:22 +0000604 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000605 tok->err = json_tokener_error_parse_object_key_name;
Michael Clark0370baa2007-03-13 08:26:22 +0000606 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000607 }
608 break;
609
610 case json_tokener_state_object_field:
Michael Clark95f55a72009-04-27 08:16:58 +0000611 {
612 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000613 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000614 while(1) {
615 if(c == tok->quote_char) {
616 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
617 obj_field_name = strdup(tok->pb->buf);
618 saved_state = json_tokener_state_object_field_end;
619 state = json_tokener_state_eatws;
620 break;
621 } else if(c == '\\') {
622 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
623 saved_state = json_tokener_state_object_field;
624 state = json_tokener_state_string_escape;
625 break;
626 }
627 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
628 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
629 goto out;
630 }
631 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000632 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000633 break;
634
635 case json_tokener_state_object_field_end:
636 if(c == ':') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000637 saved_state = json_tokener_state_object_value;
638 state = json_tokener_state_eatws;
639 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000640 tok->err = json_tokener_error_parse_object_key_sep;
641 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000642 }
643 break;
644
645 case json_tokener_state_object_value:
Michael Clarka850f8e2007-03-13 08:26:26 +0000646 if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
647 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000648 goto out;
649 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000650 state = json_tokener_state_object_value_add;
651 tok->depth++;
652 json_tokener_reset_level(tok, tok->depth);
653 goto redo_char;
654
655 case json_tokener_state_object_value_add:
Michael Clarkf0d08882007-03-13 08:26:18 +0000656 json_object_object_add(current, obj_field_name, obj);
657 free(obj_field_name);
658 obj_field_name = NULL;
659 saved_state = json_tokener_state_object_sep;
660 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000661 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000662
663 case json_tokener_state_object_sep:
664 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000665 saved_state = json_tokener_state_finish;
666 state = json_tokener_state_eatws;
667 } else if(c == ',') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000668 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000669 state = json_tokener_state_eatws;
670 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000671 tok->err = json_tokener_error_parse_object_value_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000672 goto out;
673 }
674 break;
675
676 }
Michael Clark95f55a72009-04-27 08:16:58 +0000677 if (!ADVANCE_CHAR(str, tok))
678 goto out;
679 } /* while(POP_CHAR) */
Michael Clarkf0d08882007-03-13 08:26:18 +0000680
681 out:
Michael Clark95f55a72009-04-27 08:16:58 +0000682 if (!c) { /* We hit an eof char (0) */
683 if(state != json_tokener_state_finish &&
684 saved_state != json_tokener_state_finish)
685 tok->err = json_tokener_error_parse_eof;
686 }
687
Michael Clarka850f8e2007-03-13 08:26:26 +0000688 if(tok->err == json_tokener_success) return json_object_get(current);
Michael Clarkdfaf6702007-10-25 02:26:00 +0000689 MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
Michael Clarka850f8e2007-03-13 08:26:26 +0000690 json_tokener_errors[tok->err], tok->char_offset);
691 return NULL;
Michael Clarkf0d08882007-03-13 08:26:18 +0000692}