blob: e0edca06ae7499db35cb437b23a273eef41e1bbb [file] [log] [blame]
Michael Clarkf0d08882007-03-13 08:26:18 +00001/*
Michael Clarka850f8e2007-03-13 08:26:26 +00002 * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
Michael Clarkf0d08882007-03-13 08:26:18 +00003 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00004 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
Michael Clarkf0d08882007-03-13 08:26:18 +00005 * Michael Clark <michael@metaparadigm.com>
6 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00007 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the MIT license. See COPYING for details.
Michael Clarkf0d08882007-03-13 08:26:18 +00009 *
Michael Clark95f55a72009-04-27 08:16:58 +000010 *
11 * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
12 * The copyrights to the contents of this file are licensed under the MIT License
13 * (http://www.opensource.org/licenses/mit-license.php)
Michael Clarkf0d08882007-03-13 08:26:18 +000014 */
15
Michael Clark4504df72007-03-13 08:26:20 +000016#include "config.h"
17
Michael Clarkf0d08882007-03-13 08:26:18 +000018#include <stdio.h>
19#include <stdlib.h>
Michael Clarkc8f4a6e2007-12-07 02:44:24 +000020#include <stddef.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000021#include <ctype.h>
22#include <string.h>
Michael Clarkc4dceae2010-10-06 16:39:20 +000023#include <limits.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000024
25#include "bits.h"
26#include "debug.h"
27#include "printbuf.h"
28#include "arraylist.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000029#include "json_inttypes.h"
Michael Clarkf0d08882007-03-13 08:26:18 +000030#include "json_object.h"
31#include "json_tokener.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000032#include "json_util.h"
Michael Clarka850f8e2007-03-13 08:26:26 +000033
Mateusz Loskota6f39a32012-05-21 23:22:36 +010034#if !HAVE_STRDUP && defined(_MSC_VER)
35 /* MSC has the version as _strdup */
36# define strdup _strdup
37#elif !HAVE_STRDUP
38# error You do not have strdup on your system.
39#endif /* HAVE_STRDUP */
40
Michael Clark837240f2007-03-13 08:26:25 +000041#if !HAVE_STRNCASECMP && defined(_MSC_VER)
42 /* MSC has the version as _strnicmp */
43# define strncasecmp _strnicmp
44#elif !HAVE_STRNCASECMP
45# error You do not have strncasecmp on your system.
46#endif /* HAVE_STRNCASECMP */
47
Michael Clarka850f8e2007-03-13 08:26:26 +000048static const char* json_null_str = "null";
49static const char* json_true_str = "true";
50static const char* json_false_str = "false";
Michael Clarkf0d08882007-03-13 08:26:18 +000051
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060052// XXX after v0.10 this array will become static:
Michael Clarka850f8e2007-03-13 08:26:26 +000053const char* json_tokener_errors[] = {
54 "success",
55 "continue",
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -050056 "nesting too deep",
Michael Clarka850f8e2007-03-13 08:26:26 +000057 "unexpected end of data",
58 "unexpected character",
59 "null expected",
60 "boolean expected",
61 "number expected",
62 "array value separator ',' expected",
63 "quoted object property name expected",
64 "object property name separator ':' expected",
65 "object value separator ',' expected",
66 "invalid string sequence",
67 "expected comment",
68};
69
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060070const char *json_tokener_error_desc(enum json_tokener_error jerr)
71{
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -060072 int jerr_int = (int)jerr;
73 if (jerr_int < 0 || jerr_int > (int)sizeof(json_tokener_errors))
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060074 return "Unknown error, invalid json_tokener_error value passed to json_tokener_error_desc()";
75 return json_tokener_errors[jerr];
76}
77
78enum json_tokener_error json_tokener_get_error(json_tokener *tok)
79{
80 return tok->err;
81}
82
Brent Miller126ad952009-08-20 06:50:22 +000083/* Stuff for decoding unicode sequences */
84#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
85#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
86#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
87static unsigned char utf8_replacement_char[3] = { 0xEF, 0xBF, 0xBD };
88
Remi Collet197cb1d2012-11-27 09:01:45 +010089struct json_tokener* json_tokener_new_ex(int depth)
Michael Clarkf0d08882007-03-13 08:26:18 +000090{
Michael Clarkaaec1ef2009-02-25 02:31:32 +000091 struct json_tokener *tok;
92
93 tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
Christopher Watford543bb142009-07-08 03:46:10 +000094 if (!tok) return NULL;
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -060095 tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec));
Remi Collet197cb1d2012-11-27 09:01:45 +010096 if (!tok->stack) {
97 free(tok);
98 return NULL;
99 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000100 tok->pb = printbuf_new();
Remi Collet197cb1d2012-11-27 09:01:45 +0100101 tok->max_depth = depth;
Michael Clarka850f8e2007-03-13 08:26:26 +0000102 json_tokener_reset(tok);
103 return tok;
104}
105
Remi Collet197cb1d2012-11-27 09:01:45 +0100106struct json_tokener* json_tokener_new(void)
107{
108 return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH);
109}
110
Michael Clarka850f8e2007-03-13 08:26:26 +0000111void json_tokener_free(struct json_tokener *tok)
112{
113 json_tokener_reset(tok);
Remi Collet197cb1d2012-11-27 09:01:45 +0100114 if (tok->pb) printbuf_free(tok->pb);
115 if (tok->stack) free(tok->stack);
Michael Clarka850f8e2007-03-13 08:26:26 +0000116 free(tok);
117}
118
119static void json_tokener_reset_level(struct json_tokener *tok, int depth)
120{
121 tok->stack[depth].state = json_tokener_state_eatws;
122 tok->stack[depth].saved_state = json_tokener_state_start;
123 json_object_put(tok->stack[depth].current);
124 tok->stack[depth].current = NULL;
125 free(tok->stack[depth].obj_field_name);
126 tok->stack[depth].obj_field_name = NULL;
127}
128
129void json_tokener_reset(struct json_tokener *tok)
130{
131 int i;
Michael Clark22dee7c2009-02-25 01:51:40 +0000132 if (!tok)
133 return;
134
Michael Clarka850f8e2007-03-13 08:26:26 +0000135 for(i = tok->depth; i >= 0; i--)
136 json_tokener_reset_level(tok, i);
137 tok->depth = 0;
138 tok->err = json_tokener_success;
139}
140
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000141struct json_object* json_tokener_parse(const char *str)
Michael Clarka850f8e2007-03-13 08:26:26 +0000142{
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500143 enum json_tokener_error jerr_ignored;
144 struct json_object* obj;
145 obj = json_tokener_parse_verbose(str, &jerr_ignored);
146 return obj;
Michael Clarkf0d08882007-03-13 08:26:18 +0000147}
148
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000149struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
150{
151 struct json_tokener* tok;
152 struct json_object* obj;
153
154 tok = json_tokener_new();
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500155 if (!tok)
156 return NULL;
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000157 obj = json_tokener_parse_ex(tok, str, -1);
158 *error = tok->err;
159 if(tok->err != json_tokener_success) {
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500160 if (obj != NULL)
161 json_object_put(obj);
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000162 obj = NULL;
163 }
164
165 json_tokener_free(tok);
166 return obj;
167}
168
Michael Clarka850f8e2007-03-13 08:26:26 +0000169
Michael Clark4504df72007-03-13 08:26:20 +0000170#if !HAVE_STRNDUP
171/* CAW: compliant version of strndup() */
172char* strndup(const char* str, size_t n)
173{
Michael Clarka850f8e2007-03-13 08:26:26 +0000174 if(str) {
175 size_t len = strlen(str);
Michael Clark7fb9b032009-07-25 00:13:44 +0000176 size_t nn = json_min(len,n);
Michael Clarka850f8e2007-03-13 08:26:26 +0000177 char* s = (char*)malloc(sizeof(char) * (nn + 1));
Michael Clark4504df72007-03-13 08:26:20 +0000178
Michael Clarka850f8e2007-03-13 08:26:26 +0000179 if(s) {
180 memcpy(s, str, nn);
181 s[nn] = '\0';
182 }
Michael Clark4504df72007-03-13 08:26:20 +0000183
Michael Clarka850f8e2007-03-13 08:26:26 +0000184 return s;
185 }
Michael Clark4504df72007-03-13 08:26:20 +0000186
Michael Clarka850f8e2007-03-13 08:26:26 +0000187 return NULL;
Michael Clark4504df72007-03-13 08:26:20 +0000188}
189#endif
190
Michael Clarka850f8e2007-03-13 08:26:26 +0000191
192#define state tok->stack[tok->depth].state
193#define saved_state tok->stack[tok->depth].saved_state
194#define current tok->stack[tok->depth].current
195#define obj_field_name tok->stack[tok->depth].obj_field_name
196
Michael Clark95f55a72009-04-27 08:16:58 +0000197/* Optimization:
198 * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
199 * iterating character-by character. A large performance boost is
200 * achieved by using tighter loops to locally handle units such as
201 * comments and strings. Loops that handle an entire token within
202 * their scope also gather entire strings and pass them to
203 * printbuf_memappend() in a single call, rather than calling
204 * printbuf_memappend() one char at a time.
205 *
206 * POP_CHAR() and ADVANCE_CHAR() macros are used for code that is
207 * common to both the main loop and the tighter loops.
208 */
209
210/* POP_CHAR(dest, tok) macro:
211 * Not really a pop()...peeks at the current char and stores it in dest.
212 * Returns 1 on success, sets tok->err and returns 0 if no more chars.
213 * Implicit inputs: str, len vars
214 */
215#define POP_CHAR(dest, tok) \
216 (((tok)->char_offset == len) ? \
217 (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
218 (((tok)->err = json_tokener_success), 0) \
219 : \
220 (((tok)->err = json_tokener_continue), 0) \
221 ) : \
222 (((dest) = *str), 1) \
223 )
224
225/* ADVANCE_CHAR() macro:
226 * Incrementes str & tok->char_offset.
227 * For convenience of existing conditionals, returns the old value of c (0 on eof)
228 * Implicit inputs: c var
229 */
230#define ADVANCE_CHAR(str, tok) \
231 ( ++(str), ((tok)->char_offset)++, c)
232
Brent Miller126ad952009-08-20 06:50:22 +0000233
Michael Clark95f55a72009-04-27 08:16:58 +0000234/* End optimization macro defs */
235
236
Michael Clarka850f8e2007-03-13 08:26:26 +0000237struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000238 const char *str, int len)
Michael Clarkf0d08882007-03-13 08:26:18 +0000239{
Michael Clarka850f8e2007-03-13 08:26:26 +0000240 struct json_object *obj = NULL;
Michael Clark95f55a72009-04-27 08:16:58 +0000241 char c = '\1';
Michael Clarkf0d08882007-03-13 08:26:18 +0000242
Michael Clarka850f8e2007-03-13 08:26:26 +0000243 tok->char_offset = 0;
244 tok->err = json_tokener_success;
Michael Clarkf0d08882007-03-13 08:26:18 +0000245
Michael Clark95f55a72009-04-27 08:16:58 +0000246 while (POP_CHAR(c, tok)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000247
Michael Clarka850f8e2007-03-13 08:26:26 +0000248 redo_char:
Michael Clarkf0d08882007-03-13 08:26:18 +0000249 switch(state) {
250
251 case json_tokener_state_eatws:
Michael Clark95f55a72009-04-27 08:16:58 +0000252 /* Advance until we change state */
ehaszla252669c2010-12-07 18:15:35 +0000253 while (isspace((int)c)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000254 if ((!ADVANCE_CHAR(str, tok)) || (!POP_CHAR(c, tok)))
255 goto out;
256 }
257 if(c == '/') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000258 printbuf_reset(tok->pb);
Michael Clark95f55a72009-04-27 08:16:58 +0000259 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000260 state = json_tokener_state_comment_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000261 } else {
262 state = saved_state;
Michael Clarka850f8e2007-03-13 08:26:26 +0000263 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000264 }
265 break;
266
267 case json_tokener_state_start:
268 switch(c) {
269 case '{':
270 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000271 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000272 current = json_object_new_object();
Michael Clarkf0d08882007-03-13 08:26:18 +0000273 break;
274 case '[':
275 state = json_tokener_state_eatws;
276 saved_state = json_tokener_state_array;
277 current = json_object_new_array();
Michael Clarkf0d08882007-03-13 08:26:18 +0000278 break;
279 case 'N':
280 case 'n':
281 state = json_tokener_state_null;
Michael Clarka850f8e2007-03-13 08:26:26 +0000282 printbuf_reset(tok->pb);
283 tok->st_pos = 0;
284 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000285 case '"':
286 case '\'':
Michael Clarkf0d08882007-03-13 08:26:18 +0000287 state = json_tokener_state_string;
Michael Clarka850f8e2007-03-13 08:26:26 +0000288 printbuf_reset(tok->pb);
289 tok->quote_char = c;
Michael Clarkf0d08882007-03-13 08:26:18 +0000290 break;
291 case 'T':
292 case 't':
293 case 'F':
294 case 'f':
295 state = json_tokener_state_boolean;
Michael Clarka850f8e2007-03-13 08:26:26 +0000296 printbuf_reset(tok->pb);
297 tok->st_pos = 0;
298 goto redo_char;
Michael Clark4504df72007-03-13 08:26:20 +0000299#if defined(__GNUC__)
300 case '0' ... '9':
301#else
302 case '0':
303 case '1':
304 case '2':
305 case '3':
306 case '4':
307 case '5':
308 case '6':
309 case '7':
310 case '8':
311 case '9':
312#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000313 case '-':
Michael Clarkf0d08882007-03-13 08:26:18 +0000314 state = json_tokener_state_number;
Michael Clarka850f8e2007-03-13 08:26:26 +0000315 printbuf_reset(tok->pb);
316 tok->is_double = 0;
317 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000318 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000319 tok->err = json_tokener_error_parse_unexpected;
Michael Clarkf0d08882007-03-13 08:26:18 +0000320 goto out;
321 }
322 break;
323
324 case json_tokener_state_finish:
Michael Clarka850f8e2007-03-13 08:26:26 +0000325 if(tok->depth == 0) goto out;
326 obj = json_object_get(current);
327 json_tokener_reset_level(tok, tok->depth);
328 tok->depth--;
329 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000330
331 case json_tokener_state_null:
Michael Clark95f55a72009-04-27 08:16:58 +0000332 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000333 if(strncasecmp(json_null_str, tok->pb->buf,
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -0600334 json_min(tok->st_pos+1, (int)strlen(json_null_str))) == 0) {
335 if(tok->st_pos == (int)strlen(json_null_str)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000336 current = NULL;
337 saved_state = json_tokener_state_finish;
338 state = json_tokener_state_eatws;
339 goto redo_char;
340 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000341 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000342 tok->err = json_tokener_error_parse_null;
343 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000344 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000345 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000346 break;
347
348 case json_tokener_state_comment_start:
349 if(c == '*') {
350 state = json_tokener_state_comment;
351 } else if(c == '/') {
352 state = json_tokener_state_comment_eol;
353 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000354 tok->err = json_tokener_error_parse_comment;
Michael Clarkf0d08882007-03-13 08:26:18 +0000355 goto out;
356 }
Michael Clark95f55a72009-04-27 08:16:58 +0000357 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000358 break;
359
360 case json_tokener_state_comment:
Michael Clark95f55a72009-04-27 08:16:58 +0000361 {
362 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000363 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000364 while(c != '*') {
365 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
366 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
367 goto out;
368 }
369 }
370 printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
371 state = json_tokener_state_comment_end;
372 }
373 break;
Michael Clarkf0d08882007-03-13 08:26:18 +0000374
375 case json_tokener_state_comment_eol:
Michael Clark95f55a72009-04-27 08:16:58 +0000376 {
377 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000378 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000379 while(c != '\n') {
380 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
381 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
382 goto out;
383 }
384 }
385 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Michael Clarkdfaf6702007-10-25 02:26:00 +0000386 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000387 state = json_tokener_state_eatws;
388 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000389 break;
390
391 case json_tokener_state_comment_end:
Michael Clark95f55a72009-04-27 08:16:58 +0000392 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000393 if(c == '/') {
Michael Clarkdfaf6702007-10-25 02:26:00 +0000394 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000395 state = json_tokener_state_eatws;
396 } else {
397 state = json_tokener_state_comment;
398 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000399 break;
400
401 case json_tokener_state_string:
Michael Clark95f55a72009-04-27 08:16:58 +0000402 {
403 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000404 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000405 while(1) {
406 if(c == tok->quote_char) {
407 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Eric Haszlakiewicz4e4af932012-12-09 16:32:11 -0600408 current = json_object_new_string_len(tok->pb->buf, tok->pb->bpos);
Michael Clark95f55a72009-04-27 08:16:58 +0000409 saved_state = json_tokener_state_finish;
410 state = json_tokener_state_eatws;
411 break;
412 } else if(c == '\\') {
413 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
414 saved_state = json_tokener_state_string;
415 state = json_tokener_state_string_escape;
416 break;
417 }
418 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
419 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
420 goto out;
421 }
422 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000423 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000424 break;
425
426 case json_tokener_state_string_escape:
427 switch(c) {
428 case '"':
429 case '\\':
Michael Clarka850f8e2007-03-13 08:26:26 +0000430 case '/':
Michael Clark95f55a72009-04-27 08:16:58 +0000431 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000432 state = saved_state;
433 break;
434 case 'b':
435 case 'n':
436 case 'r':
437 case 't':
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500438 case 'f':
Michael Clark95f55a72009-04-27 08:16:58 +0000439 if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
440 else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
441 else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
442 else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500443 else if(c == 'f') printbuf_memappend_fast(tok->pb, "\f", 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000444 state = saved_state;
445 break;
446 case 'u':
Michael Clarka850f8e2007-03-13 08:26:26 +0000447 tok->ucs_char = 0;
448 tok->st_pos = 0;
Michael Clarkf0d08882007-03-13 08:26:18 +0000449 state = json_tokener_state_escape_unicode;
450 break;
451 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000452 tok->err = json_tokener_error_parse_string;
Michael Clarkf0d08882007-03-13 08:26:18 +0000453 goto out;
454 }
455 break;
456
457 case json_tokener_state_escape_unicode:
Michael Clark95f55a72009-04-27 08:16:58 +0000458 {
Brent Miller126ad952009-08-20 06:50:22 +0000459 unsigned int got_hi_surrogate = 0;
460
461 /* Handle a 4-byte sequence, or two sequences if a surrogate pair */
Michael Clark95f55a72009-04-27 08:16:58 +0000462 while(1) {
463 if(strchr(json_hex_chars, c)) {
464 tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
465 if(tok->st_pos == 4) {
Brent Miller126ad952009-08-20 06:50:22 +0000466 unsigned char unescaped_utf[4];
467
468 if (got_hi_surrogate) {
469 if (IS_LOW_SURROGATE(tok->ucs_char)) {
470 /* Recalculate the ucs_char, then fall thru to process normally */
471 tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char);
472 } else {
473 /* Hi surrogate was not followed by a low surrogate */
474 /* Replace the hi and process the rest normally */
475 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
476 }
477 got_hi_surrogate = 0;
478 }
479
Michael Clark95f55a72009-04-27 08:16:58 +0000480 if (tok->ucs_char < 0x80) {
Brent Miller126ad952009-08-20 06:50:22 +0000481 unescaped_utf[0] = tok->ucs_char;
482 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1);
Michael Clark95f55a72009-04-27 08:16:58 +0000483 } else if (tok->ucs_char < 0x800) {
Brent Miller126ad952009-08-20 06:50:22 +0000484 unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
485 unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
486 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2);
487 } else if (IS_HIGH_SURROGATE(tok->ucs_char)) {
488 /* Got a high surrogate. Remember it and look for the
489 * the beginning of another sequence, which should be the
490 * low surrogate.
491 */
492 got_hi_surrogate = tok->ucs_char;
493 /* Not at end, and the next two chars should be "\u" */
494 if ((tok->char_offset+1 != len) &&
495 (tok->char_offset+2 != len) &&
496 (str[1] == '\\') &&
497 (str[2] == 'u'))
498 {
499 ADVANCE_CHAR(str, tok);
500 ADVANCE_CHAR(str, tok);
501
502 /* Advance to the first char of the next sequence and
503 * continue processing with the next sequence.
504 */
505 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
506 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
507 goto out;
508 }
509 tok->ucs_char = 0;
510 tok->st_pos = 0;
511 continue; /* other json_tokener_state_escape_unicode */
512 } else {
513 /* Got a high surrogate without another sequence following
514 * it. Put a replacement char in for the hi surrogate
515 * and pretend we finished.
516 */
517 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
518 }
519 } else if (IS_LOW_SURROGATE(tok->ucs_char)) {
520 /* Got a low surrogate not preceded by a high */
521 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
522 } else if (tok->ucs_char < 0x10000) {
523 unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
524 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
525 unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
526 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3);
527 } else if (tok->ucs_char < 0x110000) {
528 unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
529 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
530 unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
531 unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
532 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4);
Michael Clark95f55a72009-04-27 08:16:58 +0000533 } else {
Brent Miller126ad952009-08-20 06:50:22 +0000534 /* Don't know what we got--insert the replacement char */
535 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
536 }
Michael Clark95f55a72009-04-27 08:16:58 +0000537 state = saved_state;
538 break;
539 }
540 } else {
541 tok->err = json_tokener_error_parse_string;
542 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000543 }
544 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
545 if (got_hi_surrogate) /* Clean up any pending chars */
546 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
Michael Clark95f55a72009-04-27 08:16:58 +0000547 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000548 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000549 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000550 }
551 break;
552
553 case json_tokener_state_boolean:
Michael Clark95f55a72009-04-27 08:16:58 +0000554 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000555 if(strncasecmp(json_true_str, tok->pb->buf,
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -0600556 json_min(tok->st_pos+1, (int)strlen(json_true_str))) == 0) {
557 if(tok->st_pos == (int)strlen(json_true_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000558 current = json_object_new_boolean(1);
559 saved_state = json_tokener_state_finish;
560 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000561 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000562 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000563 } else if(strncasecmp(json_false_str, tok->pb->buf,
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -0600564 json_min(tok->st_pos+1, (int)strlen(json_false_str))) == 0) {
565 if(tok->st_pos == (int)strlen(json_false_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000566 current = json_object_new_boolean(0);
567 saved_state = json_tokener_state_finish;
568 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000569 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000570 }
571 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000572 tok->err = json_tokener_error_parse_boolean;
Michael Clarkf0d08882007-03-13 08:26:18 +0000573 goto out;
574 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000575 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000576 break;
577
578 case json_tokener_state_number:
Michael Clark95f55a72009-04-27 08:16:58 +0000579 {
580 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000581 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000582 int case_len=0;
583 while(c && strchr(json_number_chars, c)) {
584 ++case_len;
Eric Haszlakiewiczf931f612012-04-24 22:17:13 -0500585 if(c == '.' || c == 'e' || c == 'E')
586 tok->is_double = 1;
Michael Clark95f55a72009-04-27 08:16:58 +0000587 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
588 printbuf_memappend_fast(tok->pb, case_start, case_len);
589 goto out;
590 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000591 }
Michael Clark95f55a72009-04-27 08:16:58 +0000592 if (case_len>0)
593 printbuf_memappend_fast(tok->pb, case_start, case_len);
594 }
595 {
Michael Clarkc4dceae2010-10-06 16:39:20 +0000596 int64_t num64;
597 double numd;
598 if (!tok->is_double && json_parse_int64(tok->pb->buf, &num64) == 0) {
ehaszla252669c2010-12-07 18:15:35 +0000599 current = json_object_new_int64(num64);
Michael Clarkc4dceae2010-10-06 16:39:20 +0000600 } else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
Michael Clark95f55a72009-04-27 08:16:58 +0000601 current = json_object_new_double(numd);
602 } else {
603 tok->err = json_tokener_error_parse_number;
604 goto out;
605 }
606 saved_state = json_tokener_state_finish;
607 state = json_tokener_state_eatws;
608 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000609 }
610 break;
611
612 case json_tokener_state_array:
613 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000614 saved_state = json_tokener_state_finish;
615 state = json_tokener_state_eatws;
616 } else {
Remi Collet197cb1d2012-11-27 09:01:45 +0100617 if(tok->depth >= tok->max_depth-1) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000618 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000619 goto out;
620 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000621 state = json_tokener_state_array_add;
622 tok->depth++;
623 json_tokener_reset_level(tok, tok->depth);
624 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000625 }
626 break;
627
Michael Clarka850f8e2007-03-13 08:26:26 +0000628 case json_tokener_state_array_add:
629 json_object_array_add(current, obj);
630 saved_state = json_tokener_state_array_sep;
631 state = json_tokener_state_eatws;
632 goto redo_char;
633
Michael Clarkf0d08882007-03-13 08:26:18 +0000634 case json_tokener_state_array_sep:
635 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000636 saved_state = json_tokener_state_finish;
637 state = json_tokener_state_eatws;
638 } else if(c == ',') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000639 saved_state = json_tokener_state_array;
640 state = json_tokener_state_eatws;
641 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000642 tok->err = json_tokener_error_parse_array;
643 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000644 }
645 break;
646
Michael Clarkf0d08882007-03-13 08:26:18 +0000647 case json_tokener_state_object_field_start:
648 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000649 saved_state = json_tokener_state_finish;
650 state = json_tokener_state_eatws;
651 } else if (c == '"' || c == '\'') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000652 tok->quote_char = c;
653 printbuf_reset(tok->pb);
Michael Clarkf0d08882007-03-13 08:26:18 +0000654 state = json_tokener_state_object_field;
Michael Clark0370baa2007-03-13 08:26:22 +0000655 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000656 tok->err = json_tokener_error_parse_object_key_name;
Michael Clark0370baa2007-03-13 08:26:22 +0000657 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000658 }
659 break;
660
661 case json_tokener_state_object_field:
Michael Clark95f55a72009-04-27 08:16:58 +0000662 {
663 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000664 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000665 while(1) {
666 if(c == tok->quote_char) {
667 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
668 obj_field_name = strdup(tok->pb->buf);
669 saved_state = json_tokener_state_object_field_end;
670 state = json_tokener_state_eatws;
671 break;
672 } else if(c == '\\') {
673 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
674 saved_state = json_tokener_state_object_field;
675 state = json_tokener_state_string_escape;
676 break;
677 }
678 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
679 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
680 goto out;
681 }
682 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000683 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000684 break;
685
686 case json_tokener_state_object_field_end:
687 if(c == ':') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000688 saved_state = json_tokener_state_object_value;
689 state = json_tokener_state_eatws;
690 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000691 tok->err = json_tokener_error_parse_object_key_sep;
692 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000693 }
694 break;
695
696 case json_tokener_state_object_value:
Remi Collet197cb1d2012-11-27 09:01:45 +0100697 if(tok->depth >= tok->max_depth-1) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000698 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000699 goto out;
700 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000701 state = json_tokener_state_object_value_add;
702 tok->depth++;
703 json_tokener_reset_level(tok, tok->depth);
704 goto redo_char;
705
706 case json_tokener_state_object_value_add:
Michael Clarkf0d08882007-03-13 08:26:18 +0000707 json_object_object_add(current, obj_field_name, obj);
708 free(obj_field_name);
709 obj_field_name = NULL;
710 saved_state = json_tokener_state_object_sep;
711 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000712 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000713
714 case json_tokener_state_object_sep:
715 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000716 saved_state = json_tokener_state_finish;
717 state = json_tokener_state_eatws;
718 } else if(c == ',') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000719 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000720 state = json_tokener_state_eatws;
721 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000722 tok->err = json_tokener_error_parse_object_value_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000723 goto out;
724 }
725 break;
726
727 }
Michael Clark95f55a72009-04-27 08:16:58 +0000728 if (!ADVANCE_CHAR(str, tok))
729 goto out;
730 } /* while(POP_CHAR) */
Michael Clarkf0d08882007-03-13 08:26:18 +0000731
732 out:
Michael Clark95f55a72009-04-27 08:16:58 +0000733 if (!c) { /* We hit an eof char (0) */
734 if(state != json_tokener_state_finish &&
735 saved_state != json_tokener_state_finish)
736 tok->err = json_tokener_error_parse_eof;
737 }
738
Eric Haszlakiewiczd809fa62012-03-31 22:53:43 -0500739 if (tok->err == json_tokener_success)
740 {
741 json_object *ret = json_object_get(current);
742 int ii;
743
744 /* Partially reset, so we parse additional objects on subsequent calls. */
745 for(ii = tok->depth; ii >= 0; ii--)
746 json_tokener_reset_level(tok, ii);
747 return ret;
748 }
749
Michael Clarkdfaf6702007-10-25 02:26:00 +0000750 MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
Michael Clarka850f8e2007-03-13 08:26:26 +0000751 json_tokener_errors[tok->err], tok->char_offset);
752 return NULL;
Michael Clarkf0d08882007-03-13 08:26:18 +0000753}