blob: 47768f41dea31784a361fe3de0fd3bd75b5809ab [file] [log] [blame]
Michael Clarkf0d08882007-03-13 08:26:18 +00001/*
Michael Clarka850f8e2007-03-13 08:26:26 +00002 * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
Michael Clarkf0d08882007-03-13 08:26:18 +00003 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00004 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
Michael Clarkf0d08882007-03-13 08:26:18 +00005 * Michael Clark <michael@metaparadigm.com>
6 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00007 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the MIT license. See COPYING for details.
Michael Clarkf0d08882007-03-13 08:26:18 +00009 *
Michael Clark95f55a72009-04-27 08:16:58 +000010 *
11 * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
12 * The copyrights to the contents of this file are licensed under the MIT License
13 * (http://www.opensource.org/licenses/mit-license.php)
Michael Clarkf0d08882007-03-13 08:26:18 +000014 */
15
Michael Clark4504df72007-03-13 08:26:20 +000016#include "config.h"
17
Michael Clarkf0d08882007-03-13 08:26:18 +000018#include <stdio.h>
19#include <stdlib.h>
Michael Clarkc8f4a6e2007-12-07 02:44:24 +000020#include <stddef.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000021#include <ctype.h>
22#include <string.h>
Michael Clarkc4dceae2010-10-06 16:39:20 +000023#include <limits.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000024
25#include "bits.h"
26#include "debug.h"
27#include "printbuf.h"
28#include "arraylist.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000029#include "json_inttypes.h"
Michael Clarkf0d08882007-03-13 08:26:18 +000030#include "json_object.h"
31#include "json_tokener.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000032#include "json_util.h"
Michael Clarka850f8e2007-03-13 08:26:26 +000033
Mateusz Loskota6f39a32012-05-21 23:22:36 +010034#if !HAVE_STRDUP && defined(_MSC_VER)
35 /* MSC has the version as _strdup */
36# define strdup _strdup
37#elif !HAVE_STRDUP
38# error You do not have strdup on your system.
39#endif /* HAVE_STRDUP */
40
Michael Clark837240f2007-03-13 08:26:25 +000041#if !HAVE_STRNCASECMP && defined(_MSC_VER)
42 /* MSC has the version as _strnicmp */
43# define strncasecmp _strnicmp
44#elif !HAVE_STRNCASECMP
45# error You do not have strncasecmp on your system.
46#endif /* HAVE_STRNCASECMP */
47
Michael Clarka850f8e2007-03-13 08:26:26 +000048static const char* json_null_str = "null";
49static const char* json_true_str = "true";
50static const char* json_false_str = "false";
Michael Clarkf0d08882007-03-13 08:26:18 +000051
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060052// XXX after v0.10 this array will become static:
Michael Clarka850f8e2007-03-13 08:26:26 +000053const char* json_tokener_errors[] = {
54 "success",
55 "continue",
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -050056 "nesting too deep",
Michael Clarka850f8e2007-03-13 08:26:26 +000057 "unexpected end of data",
58 "unexpected character",
59 "null expected",
60 "boolean expected",
61 "number expected",
62 "array value separator ',' expected",
63 "quoted object property name expected",
64 "object property name separator ':' expected",
65 "object value separator ',' expected",
66 "invalid string sequence",
67 "expected comment",
68};
69
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060070const char *json_tokener_error_desc(enum json_tokener_error jerr)
71{
72 if (jerr < 0 || jerr > sizeof(json_tokener_errors))
73 return "Unknown error, invalid json_tokener_error value passed to json_tokener_error_desc()";
74 return json_tokener_errors[jerr];
75}
76
77enum json_tokener_error json_tokener_get_error(json_tokener *tok)
78{
79 return tok->err;
80}
81
Brent Miller126ad952009-08-20 06:50:22 +000082/* Stuff for decoding unicode sequences */
83#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
84#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
85#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
86static unsigned char utf8_replacement_char[3] = { 0xEF, 0xBF, 0xBD };
87
Michael Clarka850f8e2007-03-13 08:26:26 +000088
Michael Clarke8de0782009-02-25 01:45:00 +000089struct json_tokener* json_tokener_new(void)
Michael Clarkf0d08882007-03-13 08:26:18 +000090{
Michael Clarkaaec1ef2009-02-25 02:31:32 +000091 struct json_tokener *tok;
92
93 tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
Christopher Watford543bb142009-07-08 03:46:10 +000094 if (!tok) return NULL;
Michael Clarka850f8e2007-03-13 08:26:26 +000095 tok->pb = printbuf_new();
96 json_tokener_reset(tok);
97 return tok;
98}
99
100void json_tokener_free(struct json_tokener *tok)
101{
102 json_tokener_reset(tok);
103 if(tok) printbuf_free(tok->pb);
104 free(tok);
105}
106
107static void json_tokener_reset_level(struct json_tokener *tok, int depth)
108{
109 tok->stack[depth].state = json_tokener_state_eatws;
110 tok->stack[depth].saved_state = json_tokener_state_start;
111 json_object_put(tok->stack[depth].current);
112 tok->stack[depth].current = NULL;
113 free(tok->stack[depth].obj_field_name);
114 tok->stack[depth].obj_field_name = NULL;
115}
116
117void json_tokener_reset(struct json_tokener *tok)
118{
119 int i;
Michael Clark22dee7c2009-02-25 01:51:40 +0000120 if (!tok)
121 return;
122
Michael Clarka850f8e2007-03-13 08:26:26 +0000123 for(i = tok->depth; i >= 0; i--)
124 json_tokener_reset_level(tok, i);
125 tok->depth = 0;
126 tok->err = json_tokener_success;
127}
128
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000129struct json_object* json_tokener_parse(const char *str)
Michael Clarka850f8e2007-03-13 08:26:26 +0000130{
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500131 enum json_tokener_error jerr_ignored;
132 struct json_object* obj;
133 obj = json_tokener_parse_verbose(str, &jerr_ignored);
134 return obj;
Michael Clarkf0d08882007-03-13 08:26:18 +0000135}
136
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000137struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
138{
139 struct json_tokener* tok;
140 struct json_object* obj;
141
142 tok = json_tokener_new();
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500143 if (!tok)
144 return NULL;
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000145 obj = json_tokener_parse_ex(tok, str, -1);
146 *error = tok->err;
147 if(tok->err != json_tokener_success) {
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500148 if (obj != NULL)
149 json_object_put(obj);
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000150 obj = NULL;
151 }
152
153 json_tokener_free(tok);
154 return obj;
155}
156
Michael Clarka850f8e2007-03-13 08:26:26 +0000157
Michael Clark4504df72007-03-13 08:26:20 +0000158#if !HAVE_STRNDUP
159/* CAW: compliant version of strndup() */
160char* strndup(const char* str, size_t n)
161{
Michael Clarka850f8e2007-03-13 08:26:26 +0000162 if(str) {
163 size_t len = strlen(str);
Michael Clark7fb9b032009-07-25 00:13:44 +0000164 size_t nn = json_min(len,n);
Michael Clarka850f8e2007-03-13 08:26:26 +0000165 char* s = (char*)malloc(sizeof(char) * (nn + 1));
Michael Clark4504df72007-03-13 08:26:20 +0000166
Michael Clarka850f8e2007-03-13 08:26:26 +0000167 if(s) {
168 memcpy(s, str, nn);
169 s[nn] = '\0';
170 }
Michael Clark4504df72007-03-13 08:26:20 +0000171
Michael Clarka850f8e2007-03-13 08:26:26 +0000172 return s;
173 }
Michael Clark4504df72007-03-13 08:26:20 +0000174
Michael Clarka850f8e2007-03-13 08:26:26 +0000175 return NULL;
Michael Clark4504df72007-03-13 08:26:20 +0000176}
177#endif
178
Michael Clarka850f8e2007-03-13 08:26:26 +0000179
180#define state tok->stack[tok->depth].state
181#define saved_state tok->stack[tok->depth].saved_state
182#define current tok->stack[tok->depth].current
183#define obj_field_name tok->stack[tok->depth].obj_field_name
184
Michael Clark95f55a72009-04-27 08:16:58 +0000185/* Optimization:
186 * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
187 * iterating character-by character. A large performance boost is
188 * achieved by using tighter loops to locally handle units such as
189 * comments and strings. Loops that handle an entire token within
190 * their scope also gather entire strings and pass them to
191 * printbuf_memappend() in a single call, rather than calling
192 * printbuf_memappend() one char at a time.
193 *
194 * POP_CHAR() and ADVANCE_CHAR() macros are used for code that is
195 * common to both the main loop and the tighter loops.
196 */
197
198/* POP_CHAR(dest, tok) macro:
199 * Not really a pop()...peeks at the current char and stores it in dest.
200 * Returns 1 on success, sets tok->err and returns 0 if no more chars.
201 * Implicit inputs: str, len vars
202 */
203#define POP_CHAR(dest, tok) \
204 (((tok)->char_offset == len) ? \
205 (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
206 (((tok)->err = json_tokener_success), 0) \
207 : \
208 (((tok)->err = json_tokener_continue), 0) \
209 ) : \
210 (((dest) = *str), 1) \
211 )
212
213/* ADVANCE_CHAR() macro:
214 * Incrementes str & tok->char_offset.
215 * For convenience of existing conditionals, returns the old value of c (0 on eof)
216 * Implicit inputs: c var
217 */
218#define ADVANCE_CHAR(str, tok) \
219 ( ++(str), ((tok)->char_offset)++, c)
220
Brent Miller126ad952009-08-20 06:50:22 +0000221
Michael Clark95f55a72009-04-27 08:16:58 +0000222/* End optimization macro defs */
223
224
Michael Clarka850f8e2007-03-13 08:26:26 +0000225struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000226 const char *str, int len)
Michael Clarkf0d08882007-03-13 08:26:18 +0000227{
Michael Clarka850f8e2007-03-13 08:26:26 +0000228 struct json_object *obj = NULL;
Michael Clark95f55a72009-04-27 08:16:58 +0000229 char c = '\1';
Michael Clarkf0d08882007-03-13 08:26:18 +0000230
Michael Clarka850f8e2007-03-13 08:26:26 +0000231 tok->char_offset = 0;
232 tok->err = json_tokener_success;
Michael Clarkf0d08882007-03-13 08:26:18 +0000233
Michael Clark95f55a72009-04-27 08:16:58 +0000234 while (POP_CHAR(c, tok)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000235
Michael Clarka850f8e2007-03-13 08:26:26 +0000236 redo_char:
Michael Clarkf0d08882007-03-13 08:26:18 +0000237 switch(state) {
238
239 case json_tokener_state_eatws:
Michael Clark95f55a72009-04-27 08:16:58 +0000240 /* Advance until we change state */
ehaszla252669c2010-12-07 18:15:35 +0000241 while (isspace((int)c)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000242 if ((!ADVANCE_CHAR(str, tok)) || (!POP_CHAR(c, tok)))
243 goto out;
244 }
245 if(c == '/') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000246 printbuf_reset(tok->pb);
Michael Clark95f55a72009-04-27 08:16:58 +0000247 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000248 state = json_tokener_state_comment_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000249 } else {
250 state = saved_state;
Michael Clarka850f8e2007-03-13 08:26:26 +0000251 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000252 }
253 break;
254
255 case json_tokener_state_start:
256 switch(c) {
257 case '{':
258 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000259 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000260 current = json_object_new_object();
Michael Clarkf0d08882007-03-13 08:26:18 +0000261 break;
262 case '[':
263 state = json_tokener_state_eatws;
264 saved_state = json_tokener_state_array;
265 current = json_object_new_array();
Michael Clarkf0d08882007-03-13 08:26:18 +0000266 break;
267 case 'N':
268 case 'n':
269 state = json_tokener_state_null;
Michael Clarka850f8e2007-03-13 08:26:26 +0000270 printbuf_reset(tok->pb);
271 tok->st_pos = 0;
272 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000273 case '"':
274 case '\'':
Michael Clarkf0d08882007-03-13 08:26:18 +0000275 state = json_tokener_state_string;
Michael Clarka850f8e2007-03-13 08:26:26 +0000276 printbuf_reset(tok->pb);
277 tok->quote_char = c;
Michael Clarkf0d08882007-03-13 08:26:18 +0000278 break;
279 case 'T':
280 case 't':
281 case 'F':
282 case 'f':
283 state = json_tokener_state_boolean;
Michael Clarka850f8e2007-03-13 08:26:26 +0000284 printbuf_reset(tok->pb);
285 tok->st_pos = 0;
286 goto redo_char;
Michael Clark4504df72007-03-13 08:26:20 +0000287#if defined(__GNUC__)
288 case '0' ... '9':
289#else
290 case '0':
291 case '1':
292 case '2':
293 case '3':
294 case '4':
295 case '5':
296 case '6':
297 case '7':
298 case '8':
299 case '9':
300#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000301 case '-':
Michael Clarkf0d08882007-03-13 08:26:18 +0000302 state = json_tokener_state_number;
Michael Clarka850f8e2007-03-13 08:26:26 +0000303 printbuf_reset(tok->pb);
304 tok->is_double = 0;
305 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000306 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000307 tok->err = json_tokener_error_parse_unexpected;
Michael Clarkf0d08882007-03-13 08:26:18 +0000308 goto out;
309 }
310 break;
311
312 case json_tokener_state_finish:
Michael Clarka850f8e2007-03-13 08:26:26 +0000313 if(tok->depth == 0) goto out;
314 obj = json_object_get(current);
315 json_tokener_reset_level(tok, tok->depth);
316 tok->depth--;
317 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000318
319 case json_tokener_state_null:
Michael Clark95f55a72009-04-27 08:16:58 +0000320 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000321 if(strncasecmp(json_null_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000322 json_min(tok->st_pos+1, strlen(json_null_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000323 if(tok->st_pos == strlen(json_null_str)) {
324 current = NULL;
325 saved_state = json_tokener_state_finish;
326 state = json_tokener_state_eatws;
327 goto redo_char;
328 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000329 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000330 tok->err = json_tokener_error_parse_null;
331 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000332 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000333 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000334 break;
335
336 case json_tokener_state_comment_start:
337 if(c == '*') {
338 state = json_tokener_state_comment;
339 } else if(c == '/') {
340 state = json_tokener_state_comment_eol;
341 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000342 tok->err = json_tokener_error_parse_comment;
Michael Clarkf0d08882007-03-13 08:26:18 +0000343 goto out;
344 }
Michael Clark95f55a72009-04-27 08:16:58 +0000345 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000346 break;
347
348 case json_tokener_state_comment:
Michael Clark95f55a72009-04-27 08:16:58 +0000349 {
350 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000351 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000352 while(c != '*') {
353 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
354 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
355 goto out;
356 }
357 }
358 printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
359 state = json_tokener_state_comment_end;
360 }
361 break;
Michael Clarkf0d08882007-03-13 08:26:18 +0000362
363 case json_tokener_state_comment_eol:
Michael Clark95f55a72009-04-27 08:16:58 +0000364 {
365 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000366 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000367 while(c != '\n') {
368 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
369 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
370 goto out;
371 }
372 }
373 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Michael Clarkdfaf6702007-10-25 02:26:00 +0000374 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000375 state = json_tokener_state_eatws;
376 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000377 break;
378
379 case json_tokener_state_comment_end:
Michael Clark95f55a72009-04-27 08:16:58 +0000380 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000381 if(c == '/') {
Michael Clarkdfaf6702007-10-25 02:26:00 +0000382 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000383 state = json_tokener_state_eatws;
384 } else {
385 state = json_tokener_state_comment;
386 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000387 break;
388
389 case json_tokener_state_string:
Michael Clark95f55a72009-04-27 08:16:58 +0000390 {
391 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000392 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000393 while(1) {
394 if(c == tok->quote_char) {
395 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
396 current = json_object_new_string(tok->pb->buf);
397 saved_state = json_tokener_state_finish;
398 state = json_tokener_state_eatws;
399 break;
400 } else if(c == '\\') {
401 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
402 saved_state = json_tokener_state_string;
403 state = json_tokener_state_string_escape;
404 break;
405 }
406 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
407 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
408 goto out;
409 }
410 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000411 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000412 break;
413
414 case json_tokener_state_string_escape:
415 switch(c) {
416 case '"':
417 case '\\':
Michael Clarka850f8e2007-03-13 08:26:26 +0000418 case '/':
Michael Clark95f55a72009-04-27 08:16:58 +0000419 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000420 state = saved_state;
421 break;
422 case 'b':
423 case 'n':
424 case 'r':
425 case 't':
Michael Clark95f55a72009-04-27 08:16:58 +0000426 if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
427 else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
428 else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
429 else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000430 state = saved_state;
431 break;
432 case 'u':
Michael Clarka850f8e2007-03-13 08:26:26 +0000433 tok->ucs_char = 0;
434 tok->st_pos = 0;
Michael Clarkf0d08882007-03-13 08:26:18 +0000435 state = json_tokener_state_escape_unicode;
436 break;
437 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000438 tok->err = json_tokener_error_parse_string;
Michael Clarkf0d08882007-03-13 08:26:18 +0000439 goto out;
440 }
441 break;
442
443 case json_tokener_state_escape_unicode:
Michael Clark95f55a72009-04-27 08:16:58 +0000444 {
Brent Miller126ad952009-08-20 06:50:22 +0000445 unsigned int got_hi_surrogate = 0;
446
447 /* Handle a 4-byte sequence, or two sequences if a surrogate pair */
Michael Clark95f55a72009-04-27 08:16:58 +0000448 while(1) {
449 if(strchr(json_hex_chars, c)) {
450 tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
451 if(tok->st_pos == 4) {
Brent Miller126ad952009-08-20 06:50:22 +0000452 unsigned char unescaped_utf[4];
453
454 if (got_hi_surrogate) {
455 if (IS_LOW_SURROGATE(tok->ucs_char)) {
456 /* Recalculate the ucs_char, then fall thru to process normally */
457 tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char);
458 } else {
459 /* Hi surrogate was not followed by a low surrogate */
460 /* Replace the hi and process the rest normally */
461 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
462 }
463 got_hi_surrogate = 0;
464 }
465
Michael Clark95f55a72009-04-27 08:16:58 +0000466 if (tok->ucs_char < 0x80) {
Brent Miller126ad952009-08-20 06:50:22 +0000467 unescaped_utf[0] = tok->ucs_char;
468 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1);
Michael Clark95f55a72009-04-27 08:16:58 +0000469 } else if (tok->ucs_char < 0x800) {
Brent Miller126ad952009-08-20 06:50:22 +0000470 unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
471 unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
472 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2);
473 } else if (IS_HIGH_SURROGATE(tok->ucs_char)) {
474 /* Got a high surrogate. Remember it and look for the
475 * the beginning of another sequence, which should be the
476 * low surrogate.
477 */
478 got_hi_surrogate = tok->ucs_char;
479 /* Not at end, and the next two chars should be "\u" */
480 if ((tok->char_offset+1 != len) &&
481 (tok->char_offset+2 != len) &&
482 (str[1] == '\\') &&
483 (str[2] == 'u'))
484 {
485 ADVANCE_CHAR(str, tok);
486 ADVANCE_CHAR(str, tok);
487
488 /* Advance to the first char of the next sequence and
489 * continue processing with the next sequence.
490 */
491 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
492 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
493 goto out;
494 }
495 tok->ucs_char = 0;
496 tok->st_pos = 0;
497 continue; /* other json_tokener_state_escape_unicode */
498 } else {
499 /* Got a high surrogate without another sequence following
500 * it. Put a replacement char in for the hi surrogate
501 * and pretend we finished.
502 */
503 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
504 }
505 } else if (IS_LOW_SURROGATE(tok->ucs_char)) {
506 /* Got a low surrogate not preceded by a high */
507 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
508 } else if (tok->ucs_char < 0x10000) {
509 unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
510 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
511 unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
512 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3);
513 } else if (tok->ucs_char < 0x110000) {
514 unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
515 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
516 unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
517 unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
518 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4);
Michael Clark95f55a72009-04-27 08:16:58 +0000519 } else {
Brent Miller126ad952009-08-20 06:50:22 +0000520 /* Don't know what we got--insert the replacement char */
521 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
522 }
Michael Clark95f55a72009-04-27 08:16:58 +0000523 state = saved_state;
524 break;
525 }
526 } else {
527 tok->err = json_tokener_error_parse_string;
528 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000529 }
530 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
531 if (got_hi_surrogate) /* Clean up any pending chars */
532 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
Michael Clark95f55a72009-04-27 08:16:58 +0000533 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000534 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000535 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000536 }
537 break;
538
539 case json_tokener_state_boolean:
Michael Clark95f55a72009-04-27 08:16:58 +0000540 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000541 if(strncasecmp(json_true_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000542 json_min(tok->st_pos+1, strlen(json_true_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000543 if(tok->st_pos == strlen(json_true_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000544 current = json_object_new_boolean(1);
545 saved_state = json_tokener_state_finish;
546 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000547 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000548 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000549 } else if(strncasecmp(json_false_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000550 json_min(tok->st_pos+1, strlen(json_false_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000551 if(tok->st_pos == strlen(json_false_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000552 current = json_object_new_boolean(0);
553 saved_state = json_tokener_state_finish;
554 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000555 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000556 }
557 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000558 tok->err = json_tokener_error_parse_boolean;
Michael Clarkf0d08882007-03-13 08:26:18 +0000559 goto out;
560 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000561 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000562 break;
563
564 case json_tokener_state_number:
Michael Clark95f55a72009-04-27 08:16:58 +0000565 {
566 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000567 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000568 int case_len=0;
569 while(c && strchr(json_number_chars, c)) {
570 ++case_len;
Eric Haszlakiewiczf931f612012-04-24 22:17:13 -0500571 if(c == '.' || c == 'e' || c == 'E')
572 tok->is_double = 1;
Michael Clark95f55a72009-04-27 08:16:58 +0000573 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
574 printbuf_memappend_fast(tok->pb, case_start, case_len);
575 goto out;
576 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000577 }
Michael Clark95f55a72009-04-27 08:16:58 +0000578 if (case_len>0)
579 printbuf_memappend_fast(tok->pb, case_start, case_len);
580 }
581 {
Michael Clarkc4dceae2010-10-06 16:39:20 +0000582 int64_t num64;
583 double numd;
584 if (!tok->is_double && json_parse_int64(tok->pb->buf, &num64) == 0) {
ehaszla252669c2010-12-07 18:15:35 +0000585 current = json_object_new_int64(num64);
Michael Clarkc4dceae2010-10-06 16:39:20 +0000586 } else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
Michael Clark95f55a72009-04-27 08:16:58 +0000587 current = json_object_new_double(numd);
588 } else {
589 tok->err = json_tokener_error_parse_number;
590 goto out;
591 }
592 saved_state = json_tokener_state_finish;
593 state = json_tokener_state_eatws;
594 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000595 }
596 break;
597
598 case json_tokener_state_array:
599 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000600 saved_state = json_tokener_state_finish;
601 state = json_tokener_state_eatws;
602 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000603 if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
604 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000605 goto out;
606 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000607 state = json_tokener_state_array_add;
608 tok->depth++;
609 json_tokener_reset_level(tok, tok->depth);
610 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000611 }
612 break;
613
Michael Clarka850f8e2007-03-13 08:26:26 +0000614 case json_tokener_state_array_add:
615 json_object_array_add(current, obj);
616 saved_state = json_tokener_state_array_sep;
617 state = json_tokener_state_eatws;
618 goto redo_char;
619
Michael Clarkf0d08882007-03-13 08:26:18 +0000620 case json_tokener_state_array_sep:
621 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000622 saved_state = json_tokener_state_finish;
623 state = json_tokener_state_eatws;
624 } else if(c == ',') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000625 saved_state = json_tokener_state_array;
626 state = json_tokener_state_eatws;
627 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000628 tok->err = json_tokener_error_parse_array;
629 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000630 }
631 break;
632
Michael Clarkf0d08882007-03-13 08:26:18 +0000633 case json_tokener_state_object_field_start:
634 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000635 saved_state = json_tokener_state_finish;
636 state = json_tokener_state_eatws;
637 } else if (c == '"' || c == '\'') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000638 tok->quote_char = c;
639 printbuf_reset(tok->pb);
Michael Clarkf0d08882007-03-13 08:26:18 +0000640 state = json_tokener_state_object_field;
Michael Clark0370baa2007-03-13 08:26:22 +0000641 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000642 tok->err = json_tokener_error_parse_object_key_name;
Michael Clark0370baa2007-03-13 08:26:22 +0000643 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000644 }
645 break;
646
647 case json_tokener_state_object_field:
Michael Clark95f55a72009-04-27 08:16:58 +0000648 {
649 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000650 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000651 while(1) {
652 if(c == tok->quote_char) {
653 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
654 obj_field_name = strdup(tok->pb->buf);
655 saved_state = json_tokener_state_object_field_end;
656 state = json_tokener_state_eatws;
657 break;
658 } else if(c == '\\') {
659 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
660 saved_state = json_tokener_state_object_field;
661 state = json_tokener_state_string_escape;
662 break;
663 }
664 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
665 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
666 goto out;
667 }
668 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000669 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000670 break;
671
672 case json_tokener_state_object_field_end:
673 if(c == ':') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000674 saved_state = json_tokener_state_object_value;
675 state = json_tokener_state_eatws;
676 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000677 tok->err = json_tokener_error_parse_object_key_sep;
678 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000679 }
680 break;
681
682 case json_tokener_state_object_value:
Michael Clarka850f8e2007-03-13 08:26:26 +0000683 if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
684 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000685 goto out;
686 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000687 state = json_tokener_state_object_value_add;
688 tok->depth++;
689 json_tokener_reset_level(tok, tok->depth);
690 goto redo_char;
691
692 case json_tokener_state_object_value_add:
Michael Clarkf0d08882007-03-13 08:26:18 +0000693 json_object_object_add(current, obj_field_name, obj);
694 free(obj_field_name);
695 obj_field_name = NULL;
696 saved_state = json_tokener_state_object_sep;
697 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000698 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000699
700 case json_tokener_state_object_sep:
701 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000702 saved_state = json_tokener_state_finish;
703 state = json_tokener_state_eatws;
704 } else if(c == ',') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000705 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000706 state = json_tokener_state_eatws;
707 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000708 tok->err = json_tokener_error_parse_object_value_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000709 goto out;
710 }
711 break;
712
713 }
Michael Clark95f55a72009-04-27 08:16:58 +0000714 if (!ADVANCE_CHAR(str, tok))
715 goto out;
716 } /* while(POP_CHAR) */
Michael Clarkf0d08882007-03-13 08:26:18 +0000717
718 out:
Michael Clark95f55a72009-04-27 08:16:58 +0000719 if (!c) { /* We hit an eof char (0) */
720 if(state != json_tokener_state_finish &&
721 saved_state != json_tokener_state_finish)
722 tok->err = json_tokener_error_parse_eof;
723 }
724
Eric Haszlakiewiczd809fa62012-03-31 22:53:43 -0500725 if (tok->err == json_tokener_success)
726 {
727 json_object *ret = json_object_get(current);
728 int ii;
729
730 /* Partially reset, so we parse additional objects on subsequent calls. */
731 for(ii = tok->depth; ii >= 0; ii--)
732 json_tokener_reset_level(tok, ii);
733 return ret;
734 }
735
Michael Clarkdfaf6702007-10-25 02:26:00 +0000736 MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
Michael Clarka850f8e2007-03-13 08:26:26 +0000737 json_tokener_errors[tok->err], tok->char_offset);
738 return NULL;
Michael Clarkf0d08882007-03-13 08:26:18 +0000739}