blob: 05357fbebaa34952c0acdad85a729b469374b2b4 [file] [log] [blame]
Michael Clarkf0d08882007-03-13 08:26:18 +00001/*
Michael Clarka850f8e2007-03-13 08:26:26 +00002 * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
Michael Clarkf0d08882007-03-13 08:26:18 +00003 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00004 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
Michael Clarkf0d08882007-03-13 08:26:18 +00005 * Michael Clark <michael@metaparadigm.com>
6 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00007 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the MIT license. See COPYING for details.
Michael Clarkf0d08882007-03-13 08:26:18 +00009 *
Michael Clark95f55a72009-04-27 08:16:58 +000010 *
11 * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
12 * The copyrights to the contents of this file are licensed under the MIT License
13 * (http://www.opensource.org/licenses/mit-license.php)
Michael Clarkf0d08882007-03-13 08:26:18 +000014 */
15
Michael Clark4504df72007-03-13 08:26:20 +000016#include "config.h"
17
Michael Clarkf0d08882007-03-13 08:26:18 +000018#include <stdio.h>
19#include <stdlib.h>
Michael Clarkc8f4a6e2007-12-07 02:44:24 +000020#include <stddef.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000021#include <ctype.h>
22#include <string.h>
Michael Clarkc4dceae2010-10-06 16:39:20 +000023#include <limits.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000024
25#include "bits.h"
26#include "debug.h"
27#include "printbuf.h"
28#include "arraylist.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000029#include "json_inttypes.h"
Michael Clarkf0d08882007-03-13 08:26:18 +000030#include "json_object.h"
31#include "json_tokener.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000032#include "json_util.h"
Michael Clarka850f8e2007-03-13 08:26:26 +000033
Mateusz Loskota6f39a32012-05-21 23:22:36 +010034#if !HAVE_STRDUP && defined(_MSC_VER)
35 /* MSC has the version as _strdup */
36# define strdup _strdup
37#elif !HAVE_STRDUP
38# error You do not have strdup on your system.
39#endif /* HAVE_STRDUP */
40
Michael Clark837240f2007-03-13 08:26:25 +000041#if !HAVE_STRNCASECMP && defined(_MSC_VER)
42 /* MSC has the version as _strnicmp */
43# define strncasecmp _strnicmp
44#elif !HAVE_STRNCASECMP
45# error You do not have strncasecmp on your system.
46#endif /* HAVE_STRNCASECMP */
47
Michael Clarka850f8e2007-03-13 08:26:26 +000048static const char* json_null_str = "null";
49static const char* json_true_str = "true";
50static const char* json_false_str = "false";
Michael Clarkf0d08882007-03-13 08:26:18 +000051
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060052// XXX after v0.10 this array will become static:
Michael Clarka850f8e2007-03-13 08:26:26 +000053const char* json_tokener_errors[] = {
54 "success",
55 "continue",
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -050056 "nesting too deep",
Michael Clarka850f8e2007-03-13 08:26:26 +000057 "unexpected end of data",
58 "unexpected character",
59 "null expected",
60 "boolean expected",
61 "number expected",
62 "array value separator ',' expected",
63 "quoted object property name expected",
64 "object property name separator ':' expected",
65 "object value separator ',' expected",
66 "invalid string sequence",
67 "expected comment",
68};
69
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060070const char *json_tokener_error_desc(enum json_tokener_error jerr)
71{
72 if (jerr < 0 || jerr > sizeof(json_tokener_errors))
73 return "Unknown error, invalid json_tokener_error value passed to json_tokener_error_desc()";
74 return json_tokener_errors[jerr];
75}
76
77enum json_tokener_error json_tokener_get_error(json_tokener *tok)
78{
79 return tok->err;
80}
81
Brent Miller126ad952009-08-20 06:50:22 +000082/* Stuff for decoding unicode sequences */
83#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
84#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
85#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
86static unsigned char utf8_replacement_char[3] = { 0xEF, 0xBF, 0xBD };
87
Michael Clarka850f8e2007-03-13 08:26:26 +000088
Michael Clarke8de0782009-02-25 01:45:00 +000089struct json_tokener* json_tokener_new(void)
Michael Clarkf0d08882007-03-13 08:26:18 +000090{
Michael Clarkaaec1ef2009-02-25 02:31:32 +000091 struct json_tokener *tok;
92
93 tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
Christopher Watford543bb142009-07-08 03:46:10 +000094 if (!tok) return NULL;
Michael Clarka850f8e2007-03-13 08:26:26 +000095 tok->pb = printbuf_new();
96 json_tokener_reset(tok);
97 return tok;
98}
99
100void json_tokener_free(struct json_tokener *tok)
101{
102 json_tokener_reset(tok);
103 if(tok) printbuf_free(tok->pb);
104 free(tok);
105}
106
107static void json_tokener_reset_level(struct json_tokener *tok, int depth)
108{
109 tok->stack[depth].state = json_tokener_state_eatws;
110 tok->stack[depth].saved_state = json_tokener_state_start;
111 json_object_put(tok->stack[depth].current);
112 tok->stack[depth].current = NULL;
113 free(tok->stack[depth].obj_field_name);
114 tok->stack[depth].obj_field_name = NULL;
115}
116
117void json_tokener_reset(struct json_tokener *tok)
118{
119 int i;
Michael Clark22dee7c2009-02-25 01:51:40 +0000120 if (!tok)
121 return;
122
Michael Clarka850f8e2007-03-13 08:26:26 +0000123 for(i = tok->depth; i >= 0; i--)
124 json_tokener_reset_level(tok, i);
125 tok->depth = 0;
126 tok->err = json_tokener_success;
127}
128
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000129struct json_object* json_tokener_parse(const char *str)
Michael Clarka850f8e2007-03-13 08:26:26 +0000130{
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500131 enum json_tokener_error jerr_ignored;
132 struct json_object* obj;
133 obj = json_tokener_parse_verbose(str, &jerr_ignored);
134 return obj;
Michael Clarkf0d08882007-03-13 08:26:18 +0000135}
136
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000137struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
138{
139 struct json_tokener* tok;
140 struct json_object* obj;
141
142 tok = json_tokener_new();
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500143 if (!tok)
144 return NULL;
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000145 obj = json_tokener_parse_ex(tok, str, -1);
146 *error = tok->err;
147 if(tok->err != json_tokener_success) {
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500148 if (obj != NULL)
149 json_object_put(obj);
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000150 obj = NULL;
151 }
152
153 json_tokener_free(tok);
154 return obj;
155}
156
Michael Clarka850f8e2007-03-13 08:26:26 +0000157
Michael Clark4504df72007-03-13 08:26:20 +0000158#if !HAVE_STRNDUP
159/* CAW: compliant version of strndup() */
160char* strndup(const char* str, size_t n)
161{
Michael Clarka850f8e2007-03-13 08:26:26 +0000162 if(str) {
163 size_t len = strlen(str);
Michael Clark7fb9b032009-07-25 00:13:44 +0000164 size_t nn = json_min(len,n);
Michael Clarka850f8e2007-03-13 08:26:26 +0000165 char* s = (char*)malloc(sizeof(char) * (nn + 1));
Michael Clark4504df72007-03-13 08:26:20 +0000166
Michael Clarka850f8e2007-03-13 08:26:26 +0000167 if(s) {
168 memcpy(s, str, nn);
169 s[nn] = '\0';
170 }
Michael Clark4504df72007-03-13 08:26:20 +0000171
Michael Clarka850f8e2007-03-13 08:26:26 +0000172 return s;
173 }
Michael Clark4504df72007-03-13 08:26:20 +0000174
Michael Clarka850f8e2007-03-13 08:26:26 +0000175 return NULL;
Michael Clark4504df72007-03-13 08:26:20 +0000176}
177#endif
178
Michael Clarka850f8e2007-03-13 08:26:26 +0000179
180#define state tok->stack[tok->depth].state
181#define saved_state tok->stack[tok->depth].saved_state
182#define current tok->stack[tok->depth].current
183#define obj_field_name tok->stack[tok->depth].obj_field_name
184
Michael Clark95f55a72009-04-27 08:16:58 +0000185/* Optimization:
186 * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
187 * iterating character-by character. A large performance boost is
188 * achieved by using tighter loops to locally handle units such as
189 * comments and strings. Loops that handle an entire token within
190 * their scope also gather entire strings and pass them to
191 * printbuf_memappend() in a single call, rather than calling
192 * printbuf_memappend() one char at a time.
193 *
194 * POP_CHAR() and ADVANCE_CHAR() macros are used for code that is
195 * common to both the main loop and the tighter loops.
196 */
197
198/* POP_CHAR(dest, tok) macro:
199 * Not really a pop()...peeks at the current char and stores it in dest.
200 * Returns 1 on success, sets tok->err and returns 0 if no more chars.
201 * Implicit inputs: str, len vars
202 */
203#define POP_CHAR(dest, tok) \
204 (((tok)->char_offset == len) ? \
205 (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
206 (((tok)->err = json_tokener_success), 0) \
207 : \
208 (((tok)->err = json_tokener_continue), 0) \
209 ) : \
210 (((dest) = *str), 1) \
211 )
212
213/* ADVANCE_CHAR() macro:
214 * Incrementes str & tok->char_offset.
215 * For convenience of existing conditionals, returns the old value of c (0 on eof)
216 * Implicit inputs: c var
217 */
218#define ADVANCE_CHAR(str, tok) \
219 ( ++(str), ((tok)->char_offset)++, c)
220
Brent Miller126ad952009-08-20 06:50:22 +0000221
Michael Clark95f55a72009-04-27 08:16:58 +0000222/* End optimization macro defs */
223
224
Michael Clarka850f8e2007-03-13 08:26:26 +0000225struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000226 const char *str, int len)
Michael Clarkf0d08882007-03-13 08:26:18 +0000227{
Michael Clarka850f8e2007-03-13 08:26:26 +0000228 struct json_object *obj = NULL;
Michael Clark95f55a72009-04-27 08:16:58 +0000229 char c = '\1';
Michael Clarkf0d08882007-03-13 08:26:18 +0000230
Michael Clarka850f8e2007-03-13 08:26:26 +0000231 tok->char_offset = 0;
232 tok->err = json_tokener_success;
Michael Clarkf0d08882007-03-13 08:26:18 +0000233
Michael Clark95f55a72009-04-27 08:16:58 +0000234 while (POP_CHAR(c, tok)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000235
Michael Clarka850f8e2007-03-13 08:26:26 +0000236 redo_char:
Michael Clarkf0d08882007-03-13 08:26:18 +0000237 switch(state) {
238
239 case json_tokener_state_eatws:
Michael Clark95f55a72009-04-27 08:16:58 +0000240 /* Advance until we change state */
ehaszla252669c2010-12-07 18:15:35 +0000241 while (isspace((int)c)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000242 if ((!ADVANCE_CHAR(str, tok)) || (!POP_CHAR(c, tok)))
243 goto out;
244 }
245 if(c == '/') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000246 printbuf_reset(tok->pb);
Michael Clark95f55a72009-04-27 08:16:58 +0000247 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000248 state = json_tokener_state_comment_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000249 } else {
250 state = saved_state;
Michael Clarka850f8e2007-03-13 08:26:26 +0000251 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000252 }
253 break;
254
255 case json_tokener_state_start:
256 switch(c) {
257 case '{':
258 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000259 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000260 current = json_object_new_object();
Michael Clarkf0d08882007-03-13 08:26:18 +0000261 break;
262 case '[':
263 state = json_tokener_state_eatws;
264 saved_state = json_tokener_state_array;
265 current = json_object_new_array();
Michael Clarkf0d08882007-03-13 08:26:18 +0000266 break;
267 case 'N':
268 case 'n':
269 state = json_tokener_state_null;
Michael Clarka850f8e2007-03-13 08:26:26 +0000270 printbuf_reset(tok->pb);
271 tok->st_pos = 0;
272 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000273 case '"':
274 case '\'':
Michael Clarkf0d08882007-03-13 08:26:18 +0000275 state = json_tokener_state_string;
Michael Clarka850f8e2007-03-13 08:26:26 +0000276 printbuf_reset(tok->pb);
277 tok->quote_char = c;
Michael Clarkf0d08882007-03-13 08:26:18 +0000278 break;
279 case 'T':
280 case 't':
281 case 'F':
282 case 'f':
283 state = json_tokener_state_boolean;
Michael Clarka850f8e2007-03-13 08:26:26 +0000284 printbuf_reset(tok->pb);
285 tok->st_pos = 0;
286 goto redo_char;
Michael Clark4504df72007-03-13 08:26:20 +0000287#if defined(__GNUC__)
288 case '0' ... '9':
289#else
290 case '0':
291 case '1':
292 case '2':
293 case '3':
294 case '4':
295 case '5':
296 case '6':
297 case '7':
298 case '8':
299 case '9':
300#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000301 case '-':
Michael Clarkf0d08882007-03-13 08:26:18 +0000302 state = json_tokener_state_number;
Michael Clarka850f8e2007-03-13 08:26:26 +0000303 printbuf_reset(tok->pb);
304 tok->is_double = 0;
305 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000306 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000307 tok->err = json_tokener_error_parse_unexpected;
Michael Clarkf0d08882007-03-13 08:26:18 +0000308 goto out;
309 }
310 break;
311
312 case json_tokener_state_finish:
Michael Clarka850f8e2007-03-13 08:26:26 +0000313 if(tok->depth == 0) goto out;
314 obj = json_object_get(current);
315 json_tokener_reset_level(tok, tok->depth);
316 tok->depth--;
317 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000318
319 case json_tokener_state_null:
Michael Clark95f55a72009-04-27 08:16:58 +0000320 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000321 if(strncasecmp(json_null_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000322 json_min(tok->st_pos+1, strlen(json_null_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000323 if(tok->st_pos == strlen(json_null_str)) {
324 current = NULL;
325 saved_state = json_tokener_state_finish;
326 state = json_tokener_state_eatws;
327 goto redo_char;
328 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000329 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000330 tok->err = json_tokener_error_parse_null;
331 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000332 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000333 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000334 break;
335
336 case json_tokener_state_comment_start:
337 if(c == '*') {
338 state = json_tokener_state_comment;
339 } else if(c == '/') {
340 state = json_tokener_state_comment_eol;
341 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000342 tok->err = json_tokener_error_parse_comment;
Michael Clarkf0d08882007-03-13 08:26:18 +0000343 goto out;
344 }
Michael Clark95f55a72009-04-27 08:16:58 +0000345 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000346 break;
347
348 case json_tokener_state_comment:
Michael Clark95f55a72009-04-27 08:16:58 +0000349 {
350 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000351 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000352 while(c != '*') {
353 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
354 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
355 goto out;
356 }
357 }
358 printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
359 state = json_tokener_state_comment_end;
360 }
361 break;
Michael Clarkf0d08882007-03-13 08:26:18 +0000362
363 case json_tokener_state_comment_eol:
Michael Clark95f55a72009-04-27 08:16:58 +0000364 {
365 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000366 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000367 while(c != '\n') {
368 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
369 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
370 goto out;
371 }
372 }
373 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Michael Clarkdfaf6702007-10-25 02:26:00 +0000374 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000375 state = json_tokener_state_eatws;
376 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000377 break;
378
379 case json_tokener_state_comment_end:
Michael Clark95f55a72009-04-27 08:16:58 +0000380 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000381 if(c == '/') {
Michael Clarkdfaf6702007-10-25 02:26:00 +0000382 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000383 state = json_tokener_state_eatws;
384 } else {
385 state = json_tokener_state_comment;
386 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000387 break;
388
389 case json_tokener_state_string:
Michael Clark95f55a72009-04-27 08:16:58 +0000390 {
391 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000392 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000393 while(1) {
394 if(c == tok->quote_char) {
395 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Eric Haszlakiewicz4e4af932012-12-09 16:32:11 -0600396 current = json_object_new_string_len(tok->pb->buf, tok->pb->bpos);
Michael Clark95f55a72009-04-27 08:16:58 +0000397 saved_state = json_tokener_state_finish;
398 state = json_tokener_state_eatws;
399 break;
400 } else if(c == '\\') {
401 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
402 saved_state = json_tokener_state_string;
403 state = json_tokener_state_string_escape;
404 break;
405 }
406 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
407 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
408 goto out;
409 }
410 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000411 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000412 break;
413
414 case json_tokener_state_string_escape:
415 switch(c) {
416 case '"':
417 case '\\':
Michael Clarka850f8e2007-03-13 08:26:26 +0000418 case '/':
Michael Clark95f55a72009-04-27 08:16:58 +0000419 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000420 state = saved_state;
421 break;
422 case 'b':
423 case 'n':
424 case 'r':
425 case 't':
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500426 case 'f':
Michael Clark95f55a72009-04-27 08:16:58 +0000427 if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
428 else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
429 else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
430 else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500431 else if(c == 'f') printbuf_memappend_fast(tok->pb, "\f", 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000432 state = saved_state;
433 break;
434 case 'u':
Michael Clarka850f8e2007-03-13 08:26:26 +0000435 tok->ucs_char = 0;
436 tok->st_pos = 0;
Michael Clarkf0d08882007-03-13 08:26:18 +0000437 state = json_tokener_state_escape_unicode;
438 break;
439 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000440 tok->err = json_tokener_error_parse_string;
Michael Clarkf0d08882007-03-13 08:26:18 +0000441 goto out;
442 }
443 break;
444
445 case json_tokener_state_escape_unicode:
Michael Clark95f55a72009-04-27 08:16:58 +0000446 {
Brent Miller126ad952009-08-20 06:50:22 +0000447 unsigned int got_hi_surrogate = 0;
448
449 /* Handle a 4-byte sequence, or two sequences if a surrogate pair */
Michael Clark95f55a72009-04-27 08:16:58 +0000450 while(1) {
451 if(strchr(json_hex_chars, c)) {
452 tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
453 if(tok->st_pos == 4) {
Brent Miller126ad952009-08-20 06:50:22 +0000454 unsigned char unescaped_utf[4];
455
456 if (got_hi_surrogate) {
457 if (IS_LOW_SURROGATE(tok->ucs_char)) {
458 /* Recalculate the ucs_char, then fall thru to process normally */
459 tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char);
460 } else {
461 /* Hi surrogate was not followed by a low surrogate */
462 /* Replace the hi and process the rest normally */
463 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
464 }
465 got_hi_surrogate = 0;
466 }
467
Michael Clark95f55a72009-04-27 08:16:58 +0000468 if (tok->ucs_char < 0x80) {
Brent Miller126ad952009-08-20 06:50:22 +0000469 unescaped_utf[0] = tok->ucs_char;
470 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1);
Michael Clark95f55a72009-04-27 08:16:58 +0000471 } else if (tok->ucs_char < 0x800) {
Brent Miller126ad952009-08-20 06:50:22 +0000472 unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
473 unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
474 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2);
475 } else if (IS_HIGH_SURROGATE(tok->ucs_char)) {
476 /* Got a high surrogate. Remember it and look for the
477 * the beginning of another sequence, which should be the
478 * low surrogate.
479 */
480 got_hi_surrogate = tok->ucs_char;
481 /* Not at end, and the next two chars should be "\u" */
482 if ((tok->char_offset+1 != len) &&
483 (tok->char_offset+2 != len) &&
484 (str[1] == '\\') &&
485 (str[2] == 'u'))
486 {
487 ADVANCE_CHAR(str, tok);
488 ADVANCE_CHAR(str, tok);
489
490 /* Advance to the first char of the next sequence and
491 * continue processing with the next sequence.
492 */
493 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
494 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
495 goto out;
496 }
497 tok->ucs_char = 0;
498 tok->st_pos = 0;
499 continue; /* other json_tokener_state_escape_unicode */
500 } else {
501 /* Got a high surrogate without another sequence following
502 * it. Put a replacement char in for the hi surrogate
503 * and pretend we finished.
504 */
505 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
506 }
507 } else if (IS_LOW_SURROGATE(tok->ucs_char)) {
508 /* Got a low surrogate not preceded by a high */
509 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
510 } else if (tok->ucs_char < 0x10000) {
511 unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
512 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
513 unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
514 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3);
515 } else if (tok->ucs_char < 0x110000) {
516 unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
517 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
518 unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
519 unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
520 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4);
Michael Clark95f55a72009-04-27 08:16:58 +0000521 } else {
Brent Miller126ad952009-08-20 06:50:22 +0000522 /* Don't know what we got--insert the replacement char */
523 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
524 }
Michael Clark95f55a72009-04-27 08:16:58 +0000525 state = saved_state;
526 break;
527 }
528 } else {
529 tok->err = json_tokener_error_parse_string;
530 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000531 }
532 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
533 if (got_hi_surrogate) /* Clean up any pending chars */
534 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
Michael Clark95f55a72009-04-27 08:16:58 +0000535 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000536 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000537 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000538 }
539 break;
540
541 case json_tokener_state_boolean:
Michael Clark95f55a72009-04-27 08:16:58 +0000542 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000543 if(strncasecmp(json_true_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000544 json_min(tok->st_pos+1, strlen(json_true_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000545 if(tok->st_pos == strlen(json_true_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000546 current = json_object_new_boolean(1);
547 saved_state = json_tokener_state_finish;
548 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000549 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000550 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000551 } else if(strncasecmp(json_false_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000552 json_min(tok->st_pos+1, strlen(json_false_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000553 if(tok->st_pos == strlen(json_false_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000554 current = json_object_new_boolean(0);
555 saved_state = json_tokener_state_finish;
556 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000557 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000558 }
559 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000560 tok->err = json_tokener_error_parse_boolean;
Michael Clarkf0d08882007-03-13 08:26:18 +0000561 goto out;
562 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000563 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000564 break;
565
566 case json_tokener_state_number:
Michael Clark95f55a72009-04-27 08:16:58 +0000567 {
568 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000569 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000570 int case_len=0;
571 while(c && strchr(json_number_chars, c)) {
572 ++case_len;
Eric Haszlakiewiczf931f612012-04-24 22:17:13 -0500573 if(c == '.' || c == 'e' || c == 'E')
574 tok->is_double = 1;
Michael Clark95f55a72009-04-27 08:16:58 +0000575 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
576 printbuf_memappend_fast(tok->pb, case_start, case_len);
577 goto out;
578 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000579 }
Michael Clark95f55a72009-04-27 08:16:58 +0000580 if (case_len>0)
581 printbuf_memappend_fast(tok->pb, case_start, case_len);
582 }
583 {
Michael Clarkc4dceae2010-10-06 16:39:20 +0000584 int64_t num64;
585 double numd;
586 if (!tok->is_double && json_parse_int64(tok->pb->buf, &num64) == 0) {
ehaszla252669c2010-12-07 18:15:35 +0000587 current = json_object_new_int64(num64);
Michael Clarkc4dceae2010-10-06 16:39:20 +0000588 } else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
Michael Clark95f55a72009-04-27 08:16:58 +0000589 current = json_object_new_double(numd);
590 } else {
591 tok->err = json_tokener_error_parse_number;
592 goto out;
593 }
594 saved_state = json_tokener_state_finish;
595 state = json_tokener_state_eatws;
596 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000597 }
598 break;
599
600 case json_tokener_state_array:
601 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000602 saved_state = json_tokener_state_finish;
603 state = json_tokener_state_eatws;
604 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000605 if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
606 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000607 goto out;
608 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000609 state = json_tokener_state_array_add;
610 tok->depth++;
611 json_tokener_reset_level(tok, tok->depth);
612 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000613 }
614 break;
615
Michael Clarka850f8e2007-03-13 08:26:26 +0000616 case json_tokener_state_array_add:
617 json_object_array_add(current, obj);
618 saved_state = json_tokener_state_array_sep;
619 state = json_tokener_state_eatws;
620 goto redo_char;
621
Michael Clarkf0d08882007-03-13 08:26:18 +0000622 case json_tokener_state_array_sep:
623 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000624 saved_state = json_tokener_state_finish;
625 state = json_tokener_state_eatws;
626 } else if(c == ',') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000627 saved_state = json_tokener_state_array;
628 state = json_tokener_state_eatws;
629 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000630 tok->err = json_tokener_error_parse_array;
631 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000632 }
633 break;
634
Michael Clarkf0d08882007-03-13 08:26:18 +0000635 case json_tokener_state_object_field_start:
636 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000637 saved_state = json_tokener_state_finish;
638 state = json_tokener_state_eatws;
639 } else if (c == '"' || c == '\'') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000640 tok->quote_char = c;
641 printbuf_reset(tok->pb);
Michael Clarkf0d08882007-03-13 08:26:18 +0000642 state = json_tokener_state_object_field;
Michael Clark0370baa2007-03-13 08:26:22 +0000643 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000644 tok->err = json_tokener_error_parse_object_key_name;
Michael Clark0370baa2007-03-13 08:26:22 +0000645 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000646 }
647 break;
648
649 case json_tokener_state_object_field:
Michael Clark95f55a72009-04-27 08:16:58 +0000650 {
651 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000652 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000653 while(1) {
654 if(c == tok->quote_char) {
655 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
656 obj_field_name = strdup(tok->pb->buf);
657 saved_state = json_tokener_state_object_field_end;
658 state = json_tokener_state_eatws;
659 break;
660 } else if(c == '\\') {
661 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
662 saved_state = json_tokener_state_object_field;
663 state = json_tokener_state_string_escape;
664 break;
665 }
666 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
667 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
668 goto out;
669 }
670 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000671 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000672 break;
673
674 case json_tokener_state_object_field_end:
675 if(c == ':') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000676 saved_state = json_tokener_state_object_value;
677 state = json_tokener_state_eatws;
678 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000679 tok->err = json_tokener_error_parse_object_key_sep;
680 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000681 }
682 break;
683
684 case json_tokener_state_object_value:
Michael Clarka850f8e2007-03-13 08:26:26 +0000685 if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
686 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000687 goto out;
688 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000689 state = json_tokener_state_object_value_add;
690 tok->depth++;
691 json_tokener_reset_level(tok, tok->depth);
692 goto redo_char;
693
694 case json_tokener_state_object_value_add:
Michael Clarkf0d08882007-03-13 08:26:18 +0000695 json_object_object_add(current, obj_field_name, obj);
696 free(obj_field_name);
697 obj_field_name = NULL;
698 saved_state = json_tokener_state_object_sep;
699 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000700 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000701
702 case json_tokener_state_object_sep:
703 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000704 saved_state = json_tokener_state_finish;
705 state = json_tokener_state_eatws;
706 } else if(c == ',') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000707 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000708 state = json_tokener_state_eatws;
709 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000710 tok->err = json_tokener_error_parse_object_value_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000711 goto out;
712 }
713 break;
714
715 }
Michael Clark95f55a72009-04-27 08:16:58 +0000716 if (!ADVANCE_CHAR(str, tok))
717 goto out;
718 } /* while(POP_CHAR) */
Michael Clarkf0d08882007-03-13 08:26:18 +0000719
720 out:
Michael Clark95f55a72009-04-27 08:16:58 +0000721 if (!c) { /* We hit an eof char (0) */
722 if(state != json_tokener_state_finish &&
723 saved_state != json_tokener_state_finish)
724 tok->err = json_tokener_error_parse_eof;
725 }
726
Eric Haszlakiewiczd809fa62012-03-31 22:53:43 -0500727 if (tok->err == json_tokener_success)
728 {
729 json_object *ret = json_object_get(current);
730 int ii;
731
732 /* Partially reset, so we parse additional objects on subsequent calls. */
733 for(ii = tok->depth; ii >= 0; ii--)
734 json_tokener_reset_level(tok, ii);
735 return ret;
736 }
737
Michael Clarkdfaf6702007-10-25 02:26:00 +0000738 MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
Michael Clarka850f8e2007-03-13 08:26:26 +0000739 json_tokener_errors[tok->err], tok->char_offset);
740 return NULL;
Michael Clarkf0d08882007-03-13 08:26:18 +0000741}