blob: da414e74216f0cb911f7031e097f832e468a5f58 [file] [log] [blame]
Michael Clarkf0d08882007-03-13 08:26:18 +00001/*
Michael Clarka850f8e2007-03-13 08:26:26 +00002 * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
Michael Clarkf0d08882007-03-13 08:26:18 +00003 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00004 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
Michael Clarkf0d08882007-03-13 08:26:18 +00005 * Michael Clark <michael@metaparadigm.com>
6 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00007 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the MIT license. See COPYING for details.
Michael Clarkf0d08882007-03-13 08:26:18 +00009 *
Michael Clark95f55a72009-04-27 08:16:58 +000010 *
11 * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
12 * The copyrights to the contents of this file are licensed under the MIT License
13 * (http://www.opensource.org/licenses/mit-license.php)
Michael Clarkf0d08882007-03-13 08:26:18 +000014 */
15
Michael Clark4504df72007-03-13 08:26:20 +000016#include "config.h"
17
Michael Clarkf0d08882007-03-13 08:26:18 +000018#include <stdio.h>
19#include <stdlib.h>
Michael Clarkc8f4a6e2007-12-07 02:44:24 +000020#include <stddef.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000021#include <ctype.h>
22#include <string.h>
Michael Clarkc4dceae2010-10-06 16:39:20 +000023#include <limits.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000024
25#include "bits.h"
26#include "debug.h"
27#include "printbuf.h"
28#include "arraylist.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000029#include "json_inttypes.h"
Michael Clarkf0d08882007-03-13 08:26:18 +000030#include "json_object.h"
31#include "json_tokener.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000032#include "json_util.h"
Michael Clarka850f8e2007-03-13 08:26:26 +000033
Michael Clark837240f2007-03-13 08:26:25 +000034#if !HAVE_STRNCASECMP && defined(_MSC_VER)
35 /* MSC has the version as _strnicmp */
36# define strncasecmp _strnicmp
37#elif !HAVE_STRNCASECMP
38# error You do not have strncasecmp on your system.
39#endif /* HAVE_STRNCASECMP */
40
41
Michael Clarka850f8e2007-03-13 08:26:26 +000042static const char* json_null_str = "null";
43static const char* json_true_str = "true";
44static const char* json_false_str = "false";
Michael Clarkf0d08882007-03-13 08:26:18 +000045
Michael Clarka850f8e2007-03-13 08:26:26 +000046const char* json_tokener_errors[] = {
47 "success",
48 "continue",
49 "nesting to deep",
50 "unexpected end of data",
51 "unexpected character",
52 "null expected",
53 "boolean expected",
54 "number expected",
55 "array value separator ',' expected",
56 "quoted object property name expected",
57 "object property name separator ':' expected",
58 "object value separator ',' expected",
59 "invalid string sequence",
60 "expected comment",
61};
62
Brent Miller126ad952009-08-20 06:50:22 +000063/* Stuff for decoding unicode sequences */
64#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
65#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
66#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
67static unsigned char utf8_replacement_char[3] = { 0xEF, 0xBF, 0xBD };
68
Michael Clarka850f8e2007-03-13 08:26:26 +000069
Michael Clarke8de0782009-02-25 01:45:00 +000070struct json_tokener* json_tokener_new(void)
Michael Clarkf0d08882007-03-13 08:26:18 +000071{
Michael Clarkaaec1ef2009-02-25 02:31:32 +000072 struct json_tokener *tok;
73
74 tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
Christopher Watford543bb142009-07-08 03:46:10 +000075 if (!tok) return NULL;
Michael Clarka850f8e2007-03-13 08:26:26 +000076 tok->pb = printbuf_new();
77 json_tokener_reset(tok);
78 return tok;
79}
80
81void json_tokener_free(struct json_tokener *tok)
82{
83 json_tokener_reset(tok);
84 if(tok) printbuf_free(tok->pb);
85 free(tok);
86}
87
88static void json_tokener_reset_level(struct json_tokener *tok, int depth)
89{
90 tok->stack[depth].state = json_tokener_state_eatws;
91 tok->stack[depth].saved_state = json_tokener_state_start;
92 json_object_put(tok->stack[depth].current);
93 tok->stack[depth].current = NULL;
94 free(tok->stack[depth].obj_field_name);
95 tok->stack[depth].obj_field_name = NULL;
96}
97
98void json_tokener_reset(struct json_tokener *tok)
99{
100 int i;
Michael Clark22dee7c2009-02-25 01:51:40 +0000101 if (!tok)
102 return;
103
Michael Clarka850f8e2007-03-13 08:26:26 +0000104 for(i = tok->depth; i >= 0; i--)
105 json_tokener_reset_level(tok, i);
106 tok->depth = 0;
107 tok->err = json_tokener_success;
108}
109
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000110struct json_object* json_tokener_parse(const char *str)
Michael Clarka850f8e2007-03-13 08:26:26 +0000111{
112 struct json_tokener* tok;
Michael Clarkf0d08882007-03-13 08:26:18 +0000113 struct json_object* obj;
114
Michael Clarka850f8e2007-03-13 08:26:26 +0000115 tok = json_tokener_new();
116 obj = json_tokener_parse_ex(tok, str, -1);
117 if(tok->err != json_tokener_success)
Michael Clarkaaec1ef2009-02-25 02:31:32 +0000118 obj = (struct json_object*)error_ptr(-tok->err);
Michael Clarka850f8e2007-03-13 08:26:26 +0000119 json_tokener_free(tok);
Michael Clarkf0d08882007-03-13 08:26:18 +0000120 return obj;
121}
122
Michael Clarka850f8e2007-03-13 08:26:26 +0000123
Michael Clark4504df72007-03-13 08:26:20 +0000124#if !HAVE_STRNDUP
125/* CAW: compliant version of strndup() */
126char* strndup(const char* str, size_t n)
127{
Michael Clarka850f8e2007-03-13 08:26:26 +0000128 if(str) {
129 size_t len = strlen(str);
Michael Clark7fb9b032009-07-25 00:13:44 +0000130 size_t nn = json_min(len,n);
Michael Clarka850f8e2007-03-13 08:26:26 +0000131 char* s = (char*)malloc(sizeof(char) * (nn + 1));
Michael Clark4504df72007-03-13 08:26:20 +0000132
Michael Clarka850f8e2007-03-13 08:26:26 +0000133 if(s) {
134 memcpy(s, str, nn);
135 s[nn] = '\0';
136 }
Michael Clark4504df72007-03-13 08:26:20 +0000137
Michael Clarka850f8e2007-03-13 08:26:26 +0000138 return s;
139 }
Michael Clark4504df72007-03-13 08:26:20 +0000140
Michael Clarka850f8e2007-03-13 08:26:26 +0000141 return NULL;
Michael Clark4504df72007-03-13 08:26:20 +0000142}
143#endif
144
Michael Clarka850f8e2007-03-13 08:26:26 +0000145
146#define state tok->stack[tok->depth].state
147#define saved_state tok->stack[tok->depth].saved_state
148#define current tok->stack[tok->depth].current
149#define obj_field_name tok->stack[tok->depth].obj_field_name
150
Michael Clark95f55a72009-04-27 08:16:58 +0000151/* Optimization:
152 * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
153 * iterating character-by character. A large performance boost is
154 * achieved by using tighter loops to locally handle units such as
155 * comments and strings. Loops that handle an entire token within
156 * their scope also gather entire strings and pass them to
157 * printbuf_memappend() in a single call, rather than calling
158 * printbuf_memappend() one char at a time.
159 *
160 * POP_CHAR() and ADVANCE_CHAR() macros are used for code that is
161 * common to both the main loop and the tighter loops.
162 */
163
164/* POP_CHAR(dest, tok) macro:
165 * Not really a pop()...peeks at the current char and stores it in dest.
166 * Returns 1 on success, sets tok->err and returns 0 if no more chars.
167 * Implicit inputs: str, len vars
168 */
169#define POP_CHAR(dest, tok) \
170 (((tok)->char_offset == len) ? \
171 (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
172 (((tok)->err = json_tokener_success), 0) \
173 : \
174 (((tok)->err = json_tokener_continue), 0) \
175 ) : \
176 (((dest) = *str), 1) \
177 )
178
179/* ADVANCE_CHAR() macro:
180 * Incrementes str & tok->char_offset.
181 * For convenience of existing conditionals, returns the old value of c (0 on eof)
182 * Implicit inputs: c var
183 */
184#define ADVANCE_CHAR(str, tok) \
185 ( ++(str), ((tok)->char_offset)++, c)
186
Brent Miller126ad952009-08-20 06:50:22 +0000187
Michael Clark95f55a72009-04-27 08:16:58 +0000188/* End optimization macro defs */
189
190
Michael Clarka850f8e2007-03-13 08:26:26 +0000191struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000192 const char *str, int len)
Michael Clarkf0d08882007-03-13 08:26:18 +0000193{
Michael Clarka850f8e2007-03-13 08:26:26 +0000194 struct json_object *obj = NULL;
Michael Clark95f55a72009-04-27 08:16:58 +0000195 char c = '\1';
Michael Clarkf0d08882007-03-13 08:26:18 +0000196
Michael Clarka850f8e2007-03-13 08:26:26 +0000197 tok->char_offset = 0;
198 tok->err = json_tokener_success;
Michael Clarkf0d08882007-03-13 08:26:18 +0000199
Michael Clark95f55a72009-04-27 08:16:58 +0000200 while (POP_CHAR(c, tok)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000201
Michael Clarka850f8e2007-03-13 08:26:26 +0000202 redo_char:
Michael Clarkf0d08882007-03-13 08:26:18 +0000203 switch(state) {
204
205 case json_tokener_state_eatws:
Michael Clark95f55a72009-04-27 08:16:58 +0000206 /* Advance until we change state */
ehaszla252669c2010-12-07 18:15:35 +0000207 while (isspace((int)c)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000208 if ((!ADVANCE_CHAR(str, tok)) || (!POP_CHAR(c, tok)))
209 goto out;
210 }
211 if(c == '/') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000212 printbuf_reset(tok->pb);
Michael Clark95f55a72009-04-27 08:16:58 +0000213 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000214 state = json_tokener_state_comment_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000215 } else {
216 state = saved_state;
Michael Clarka850f8e2007-03-13 08:26:26 +0000217 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000218 }
219 break;
220
221 case json_tokener_state_start:
222 switch(c) {
223 case '{':
224 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000225 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000226 current = json_object_new_object();
Michael Clarkf0d08882007-03-13 08:26:18 +0000227 break;
228 case '[':
229 state = json_tokener_state_eatws;
230 saved_state = json_tokener_state_array;
231 current = json_object_new_array();
Michael Clarkf0d08882007-03-13 08:26:18 +0000232 break;
233 case 'N':
234 case 'n':
235 state = json_tokener_state_null;
Michael Clarka850f8e2007-03-13 08:26:26 +0000236 printbuf_reset(tok->pb);
237 tok->st_pos = 0;
238 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000239 case '"':
240 case '\'':
Michael Clarkf0d08882007-03-13 08:26:18 +0000241 state = json_tokener_state_string;
Michael Clarka850f8e2007-03-13 08:26:26 +0000242 printbuf_reset(tok->pb);
243 tok->quote_char = c;
Michael Clarkf0d08882007-03-13 08:26:18 +0000244 break;
245 case 'T':
246 case 't':
247 case 'F':
248 case 'f':
249 state = json_tokener_state_boolean;
Michael Clarka850f8e2007-03-13 08:26:26 +0000250 printbuf_reset(tok->pb);
251 tok->st_pos = 0;
252 goto redo_char;
Michael Clark4504df72007-03-13 08:26:20 +0000253#if defined(__GNUC__)
254 case '0' ... '9':
255#else
256 case '0':
257 case '1':
258 case '2':
259 case '3':
260 case '4':
261 case '5':
262 case '6':
263 case '7':
264 case '8':
265 case '9':
266#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000267 case '-':
Michael Clarkf0d08882007-03-13 08:26:18 +0000268 state = json_tokener_state_number;
Michael Clarka850f8e2007-03-13 08:26:26 +0000269 printbuf_reset(tok->pb);
270 tok->is_double = 0;
271 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000272 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000273 tok->err = json_tokener_error_parse_unexpected;
Michael Clarkf0d08882007-03-13 08:26:18 +0000274 goto out;
275 }
276 break;
277
278 case json_tokener_state_finish:
Michael Clarka850f8e2007-03-13 08:26:26 +0000279 if(tok->depth == 0) goto out;
280 obj = json_object_get(current);
281 json_tokener_reset_level(tok, tok->depth);
282 tok->depth--;
283 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000284
285 case json_tokener_state_null:
Michael Clark95f55a72009-04-27 08:16:58 +0000286 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000287 if(strncasecmp(json_null_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000288 json_min(tok->st_pos+1, strlen(json_null_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000289 if(tok->st_pos == strlen(json_null_str)) {
290 current = NULL;
291 saved_state = json_tokener_state_finish;
292 state = json_tokener_state_eatws;
293 goto redo_char;
294 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000295 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000296 tok->err = json_tokener_error_parse_null;
297 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000298 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000299 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000300 break;
301
302 case json_tokener_state_comment_start:
303 if(c == '*') {
304 state = json_tokener_state_comment;
305 } else if(c == '/') {
306 state = json_tokener_state_comment_eol;
307 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000308 tok->err = json_tokener_error_parse_comment;
Michael Clarkf0d08882007-03-13 08:26:18 +0000309 goto out;
310 }
Michael Clark95f55a72009-04-27 08:16:58 +0000311 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000312 break;
313
314 case json_tokener_state_comment:
Michael Clark95f55a72009-04-27 08:16:58 +0000315 {
316 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000317 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000318 while(c != '*') {
319 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
320 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
321 goto out;
322 }
323 }
324 printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
325 state = json_tokener_state_comment_end;
326 }
327 break;
Michael Clarkf0d08882007-03-13 08:26:18 +0000328
329 case json_tokener_state_comment_eol:
Michael Clark95f55a72009-04-27 08:16:58 +0000330 {
331 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000332 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000333 while(c != '\n') {
334 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
335 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
336 goto out;
337 }
338 }
339 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Michael Clarkdfaf6702007-10-25 02:26:00 +0000340 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000341 state = json_tokener_state_eatws;
342 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000343 break;
344
345 case json_tokener_state_comment_end:
Michael Clark95f55a72009-04-27 08:16:58 +0000346 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000347 if(c == '/') {
Michael Clarkdfaf6702007-10-25 02:26:00 +0000348 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000349 state = json_tokener_state_eatws;
350 } else {
351 state = json_tokener_state_comment;
352 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000353 break;
354
355 case json_tokener_state_string:
Michael Clark95f55a72009-04-27 08:16:58 +0000356 {
357 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000358 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000359 while(1) {
360 if(c == tok->quote_char) {
361 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
362 current = json_object_new_string(tok->pb->buf);
363 saved_state = json_tokener_state_finish;
364 state = json_tokener_state_eatws;
365 break;
366 } else if(c == '\\') {
367 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
368 saved_state = json_tokener_state_string;
369 state = json_tokener_state_string_escape;
370 break;
371 }
372 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
373 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
374 goto out;
375 }
376 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000377 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000378 break;
379
380 case json_tokener_state_string_escape:
381 switch(c) {
382 case '"':
383 case '\\':
Michael Clarka850f8e2007-03-13 08:26:26 +0000384 case '/':
Michael Clark95f55a72009-04-27 08:16:58 +0000385 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000386 state = saved_state;
387 break;
388 case 'b':
389 case 'n':
390 case 'r':
391 case 't':
Michael Clark95f55a72009-04-27 08:16:58 +0000392 if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
393 else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
394 else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
395 else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000396 state = saved_state;
397 break;
398 case 'u':
Michael Clarka850f8e2007-03-13 08:26:26 +0000399 tok->ucs_char = 0;
400 tok->st_pos = 0;
Michael Clarkf0d08882007-03-13 08:26:18 +0000401 state = json_tokener_state_escape_unicode;
402 break;
403 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000404 tok->err = json_tokener_error_parse_string;
Michael Clarkf0d08882007-03-13 08:26:18 +0000405 goto out;
406 }
407 break;
408
409 case json_tokener_state_escape_unicode:
Michael Clark95f55a72009-04-27 08:16:58 +0000410 {
Brent Miller126ad952009-08-20 06:50:22 +0000411 unsigned int got_hi_surrogate = 0;
412
413 /* Handle a 4-byte sequence, or two sequences if a surrogate pair */
Michael Clark95f55a72009-04-27 08:16:58 +0000414 while(1) {
415 if(strchr(json_hex_chars, c)) {
416 tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
417 if(tok->st_pos == 4) {
Brent Miller126ad952009-08-20 06:50:22 +0000418 unsigned char unescaped_utf[4];
419
420 if (got_hi_surrogate) {
421 if (IS_LOW_SURROGATE(tok->ucs_char)) {
422 /* Recalculate the ucs_char, then fall thru to process normally */
423 tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char);
424 } else {
425 /* Hi surrogate was not followed by a low surrogate */
426 /* Replace the hi and process the rest normally */
427 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
428 }
429 got_hi_surrogate = 0;
430 }
431
Michael Clark95f55a72009-04-27 08:16:58 +0000432 if (tok->ucs_char < 0x80) {
Brent Miller126ad952009-08-20 06:50:22 +0000433 unescaped_utf[0] = tok->ucs_char;
434 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1);
Michael Clark95f55a72009-04-27 08:16:58 +0000435 } else if (tok->ucs_char < 0x800) {
Brent Miller126ad952009-08-20 06:50:22 +0000436 unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
437 unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
438 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2);
439 } else if (IS_HIGH_SURROGATE(tok->ucs_char)) {
440 /* Got a high surrogate. Remember it and look for the
441 * the beginning of another sequence, which should be the
442 * low surrogate.
443 */
444 got_hi_surrogate = tok->ucs_char;
445 /* Not at end, and the next two chars should be "\u" */
446 if ((tok->char_offset+1 != len) &&
447 (tok->char_offset+2 != len) &&
448 (str[1] == '\\') &&
449 (str[2] == 'u'))
450 {
451 ADVANCE_CHAR(str, tok);
452 ADVANCE_CHAR(str, tok);
453
454 /* Advance to the first char of the next sequence and
455 * continue processing with the next sequence.
456 */
457 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
458 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
459 goto out;
460 }
461 tok->ucs_char = 0;
462 tok->st_pos = 0;
463 continue; /* other json_tokener_state_escape_unicode */
464 } else {
465 /* Got a high surrogate without another sequence following
466 * it. Put a replacement char in for the hi surrogate
467 * and pretend we finished.
468 */
469 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
470 }
471 } else if (IS_LOW_SURROGATE(tok->ucs_char)) {
472 /* Got a low surrogate not preceded by a high */
473 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
474 } else if (tok->ucs_char < 0x10000) {
475 unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
476 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
477 unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
478 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3);
479 } else if (tok->ucs_char < 0x110000) {
480 unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
481 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
482 unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
483 unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
484 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4);
Michael Clark95f55a72009-04-27 08:16:58 +0000485 } else {
Brent Miller126ad952009-08-20 06:50:22 +0000486 /* Don't know what we got--insert the replacement char */
487 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
488 }
Michael Clark95f55a72009-04-27 08:16:58 +0000489 state = saved_state;
490 break;
491 }
492 } else {
493 tok->err = json_tokener_error_parse_string;
494 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000495 }
496 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
497 if (got_hi_surrogate) /* Clean up any pending chars */
498 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
Michael Clark95f55a72009-04-27 08:16:58 +0000499 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000500 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000501 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000502 }
503 break;
504
505 case json_tokener_state_boolean:
Michael Clark95f55a72009-04-27 08:16:58 +0000506 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000507 if(strncasecmp(json_true_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000508 json_min(tok->st_pos+1, strlen(json_true_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000509 if(tok->st_pos == strlen(json_true_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000510 current = json_object_new_boolean(1);
511 saved_state = json_tokener_state_finish;
512 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000513 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000514 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000515 } else if(strncasecmp(json_false_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000516 json_min(tok->st_pos+1, strlen(json_false_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000517 if(tok->st_pos == strlen(json_false_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000518 current = json_object_new_boolean(0);
519 saved_state = json_tokener_state_finish;
520 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000521 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000522 }
523 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000524 tok->err = json_tokener_error_parse_boolean;
Michael Clarkf0d08882007-03-13 08:26:18 +0000525 goto out;
526 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000527 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000528 break;
529
530 case json_tokener_state_number:
Michael Clark95f55a72009-04-27 08:16:58 +0000531 {
532 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000533 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000534 int case_len=0;
535 while(c && strchr(json_number_chars, c)) {
536 ++case_len;
537 if(c == '.' || c == 'e') tok->is_double = 1;
538 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
539 printbuf_memappend_fast(tok->pb, case_start, case_len);
540 goto out;
541 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000542 }
Michael Clark95f55a72009-04-27 08:16:58 +0000543 if (case_len>0)
544 printbuf_memappend_fast(tok->pb, case_start, case_len);
545 }
546 {
Michael Clarkc4dceae2010-10-06 16:39:20 +0000547 int64_t num64;
548 double numd;
549 if (!tok->is_double && json_parse_int64(tok->pb->buf, &num64) == 0) {
ehaszla252669c2010-12-07 18:15:35 +0000550 current = json_object_new_int64(num64);
Michael Clarkc4dceae2010-10-06 16:39:20 +0000551 } else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
Michael Clark95f55a72009-04-27 08:16:58 +0000552 current = json_object_new_double(numd);
553 } else {
554 tok->err = json_tokener_error_parse_number;
555 goto out;
556 }
557 saved_state = json_tokener_state_finish;
558 state = json_tokener_state_eatws;
559 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000560 }
561 break;
562
563 case json_tokener_state_array:
564 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000565 saved_state = json_tokener_state_finish;
566 state = json_tokener_state_eatws;
567 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000568 if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
569 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000570 goto out;
571 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000572 state = json_tokener_state_array_add;
573 tok->depth++;
574 json_tokener_reset_level(tok, tok->depth);
575 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000576 }
577 break;
578
Michael Clarka850f8e2007-03-13 08:26:26 +0000579 case json_tokener_state_array_add:
580 json_object_array_add(current, obj);
581 saved_state = json_tokener_state_array_sep;
582 state = json_tokener_state_eatws;
583 goto redo_char;
584
Michael Clarkf0d08882007-03-13 08:26:18 +0000585 case json_tokener_state_array_sep:
586 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000587 saved_state = json_tokener_state_finish;
588 state = json_tokener_state_eatws;
589 } else if(c == ',') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000590 saved_state = json_tokener_state_array;
591 state = json_tokener_state_eatws;
592 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000593 tok->err = json_tokener_error_parse_array;
594 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000595 }
596 break;
597
Michael Clarkf0d08882007-03-13 08:26:18 +0000598 case json_tokener_state_object_field_start:
599 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000600 saved_state = json_tokener_state_finish;
601 state = json_tokener_state_eatws;
602 } else if (c == '"' || c == '\'') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000603 tok->quote_char = c;
604 printbuf_reset(tok->pb);
Michael Clarkf0d08882007-03-13 08:26:18 +0000605 state = json_tokener_state_object_field;
Michael Clark0370baa2007-03-13 08:26:22 +0000606 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000607 tok->err = json_tokener_error_parse_object_key_name;
Michael Clark0370baa2007-03-13 08:26:22 +0000608 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000609 }
610 break;
611
612 case json_tokener_state_object_field:
Michael Clark95f55a72009-04-27 08:16:58 +0000613 {
614 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000615 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000616 while(1) {
617 if(c == tok->quote_char) {
618 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
619 obj_field_name = strdup(tok->pb->buf);
620 saved_state = json_tokener_state_object_field_end;
621 state = json_tokener_state_eatws;
622 break;
623 } else if(c == '\\') {
624 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
625 saved_state = json_tokener_state_object_field;
626 state = json_tokener_state_string_escape;
627 break;
628 }
629 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
630 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
631 goto out;
632 }
633 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000634 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000635 break;
636
637 case json_tokener_state_object_field_end:
638 if(c == ':') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000639 saved_state = json_tokener_state_object_value;
640 state = json_tokener_state_eatws;
641 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000642 tok->err = json_tokener_error_parse_object_key_sep;
643 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000644 }
645 break;
646
647 case json_tokener_state_object_value:
Michael Clarka850f8e2007-03-13 08:26:26 +0000648 if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
649 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000650 goto out;
651 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000652 state = json_tokener_state_object_value_add;
653 tok->depth++;
654 json_tokener_reset_level(tok, tok->depth);
655 goto redo_char;
656
657 case json_tokener_state_object_value_add:
Michael Clarkf0d08882007-03-13 08:26:18 +0000658 json_object_object_add(current, obj_field_name, obj);
659 free(obj_field_name);
660 obj_field_name = NULL;
661 saved_state = json_tokener_state_object_sep;
662 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000663 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000664
665 case json_tokener_state_object_sep:
666 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000667 saved_state = json_tokener_state_finish;
668 state = json_tokener_state_eatws;
669 } else if(c == ',') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000670 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000671 state = json_tokener_state_eatws;
672 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000673 tok->err = json_tokener_error_parse_object_value_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000674 goto out;
675 }
676 break;
677
678 }
Michael Clark95f55a72009-04-27 08:16:58 +0000679 if (!ADVANCE_CHAR(str, tok))
680 goto out;
681 } /* while(POP_CHAR) */
Michael Clarkf0d08882007-03-13 08:26:18 +0000682
683 out:
Michael Clark95f55a72009-04-27 08:16:58 +0000684 if (!c) { /* We hit an eof char (0) */
685 if(state != json_tokener_state_finish &&
686 saved_state != json_tokener_state_finish)
687 tok->err = json_tokener_error_parse_eof;
688 }
689
Michael Clarka850f8e2007-03-13 08:26:26 +0000690 if(tok->err == json_tokener_success) return json_object_get(current);
Michael Clarkdfaf6702007-10-25 02:26:00 +0000691 MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
Michael Clarka850f8e2007-03-13 08:26:26 +0000692 json_tokener_errors[tok->err], tok->char_offset);
693 return NULL;
Michael Clarkf0d08882007-03-13 08:26:18 +0000694}