blob: a6924a1f579d7506a9e1bc84dc60fd463e574624 [file] [log] [blame]
Michael Clarkf0d08882007-03-13 08:26:18 +00001/*
Michael Clarka850f8e2007-03-13 08:26:26 +00002 * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
Michael Clarkf0d08882007-03-13 08:26:18 +00003 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00004 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
Michael Clarkf0d08882007-03-13 08:26:18 +00005 * Michael Clark <michael@metaparadigm.com>
6 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00007 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the MIT license. See COPYING for details.
Michael Clarkf0d08882007-03-13 08:26:18 +00009 *
Michael Clark95f55a72009-04-27 08:16:58 +000010 *
11 * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
12 * The copyrights to the contents of this file are licensed under the MIT License
13 * (http://www.opensource.org/licenses/mit-license.php)
Michael Clarkf0d08882007-03-13 08:26:18 +000014 */
15
Michael Clark4504df72007-03-13 08:26:20 +000016#include "config.h"
17
Michael Clarkf0d08882007-03-13 08:26:18 +000018#include <stdio.h>
19#include <stdlib.h>
Michael Clarkc8f4a6e2007-12-07 02:44:24 +000020#include <stddef.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000021#include <ctype.h>
22#include <string.h>
Michael Clarkc4dceae2010-10-06 16:39:20 +000023#include <limits.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000024
25#include "bits.h"
26#include "debug.h"
27#include "printbuf.h"
28#include "arraylist.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000029#include "json_inttypes.h"
Michael Clarkf0d08882007-03-13 08:26:18 +000030#include "json_object.h"
31#include "json_tokener.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000032#include "json_util.h"
Michael Clarka850f8e2007-03-13 08:26:26 +000033
Remi Colleta01b6592012-12-13 09:47:33 +010034#ifdef HAVE_LOCALE_H
35#include <locale.h>
36#endif /* HAVE_LOCALE_H */
37
Mateusz Loskota6f39a32012-05-21 23:22:36 +010038#if !HAVE_STRDUP && defined(_MSC_VER)
39 /* MSC has the version as _strdup */
40# define strdup _strdup
41#elif !HAVE_STRDUP
42# error You do not have strdup on your system.
43#endif /* HAVE_STRDUP */
44
Michael Clark837240f2007-03-13 08:26:25 +000045#if !HAVE_STRNCASECMP && defined(_MSC_VER)
46 /* MSC has the version as _strnicmp */
47# define strncasecmp _strnicmp
48#elif !HAVE_STRNCASECMP
49# error You do not have strncasecmp on your system.
50#endif /* HAVE_STRNCASECMP */
51
Michael Clarka850f8e2007-03-13 08:26:26 +000052static const char* json_null_str = "null";
53static const char* json_true_str = "true";
54static const char* json_false_str = "false";
Michael Clarkf0d08882007-03-13 08:26:18 +000055
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060056// XXX after v0.10 this array will become static:
Michael Clarka850f8e2007-03-13 08:26:26 +000057const char* json_tokener_errors[] = {
58 "success",
59 "continue",
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -050060 "nesting too deep",
Michael Clarka850f8e2007-03-13 08:26:26 +000061 "unexpected end of data",
62 "unexpected character",
63 "null expected",
64 "boolean expected",
65 "number expected",
66 "array value separator ',' expected",
67 "quoted object property name expected",
68 "object property name separator ':' expected",
69 "object value separator ',' expected",
70 "invalid string sequence",
71 "expected comment",
72};
73
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060074const char *json_tokener_error_desc(enum json_tokener_error jerr)
75{
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -060076 int jerr_int = (int)jerr;
77 if (jerr_int < 0 || jerr_int > (int)sizeof(json_tokener_errors))
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060078 return "Unknown error, invalid json_tokener_error value passed to json_tokener_error_desc()";
79 return json_tokener_errors[jerr];
80}
81
82enum json_tokener_error json_tokener_get_error(json_tokener *tok)
83{
84 return tok->err;
85}
86
Brent Miller126ad952009-08-20 06:50:22 +000087/* Stuff for decoding unicode sequences */
88#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
89#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
90#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
91static unsigned char utf8_replacement_char[3] = { 0xEF, 0xBF, 0xBD };
92
Remi Collet197cb1d2012-11-27 09:01:45 +010093struct json_tokener* json_tokener_new_ex(int depth)
Michael Clarkf0d08882007-03-13 08:26:18 +000094{
Michael Clarkaaec1ef2009-02-25 02:31:32 +000095 struct json_tokener *tok;
96
97 tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
Christopher Watford543bb142009-07-08 03:46:10 +000098 if (!tok) return NULL;
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -060099 tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec));
Remi Collet197cb1d2012-11-27 09:01:45 +0100100 if (!tok->stack) {
101 free(tok);
102 return NULL;
103 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000104 tok->pb = printbuf_new();
Remi Collet197cb1d2012-11-27 09:01:45 +0100105 tok->max_depth = depth;
Michael Clarka850f8e2007-03-13 08:26:26 +0000106 json_tokener_reset(tok);
107 return tok;
108}
109
Remi Collet197cb1d2012-11-27 09:01:45 +0100110struct json_tokener* json_tokener_new(void)
111{
112 return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH);
113}
114
Michael Clarka850f8e2007-03-13 08:26:26 +0000115void json_tokener_free(struct json_tokener *tok)
116{
117 json_tokener_reset(tok);
Remi Collet197cb1d2012-11-27 09:01:45 +0100118 if (tok->pb) printbuf_free(tok->pb);
119 if (tok->stack) free(tok->stack);
Michael Clarka850f8e2007-03-13 08:26:26 +0000120 free(tok);
121}
122
123static void json_tokener_reset_level(struct json_tokener *tok, int depth)
124{
125 tok->stack[depth].state = json_tokener_state_eatws;
126 tok->stack[depth].saved_state = json_tokener_state_start;
127 json_object_put(tok->stack[depth].current);
128 tok->stack[depth].current = NULL;
129 free(tok->stack[depth].obj_field_name);
130 tok->stack[depth].obj_field_name = NULL;
131}
132
133void json_tokener_reset(struct json_tokener *tok)
134{
135 int i;
Michael Clark22dee7c2009-02-25 01:51:40 +0000136 if (!tok)
137 return;
138
Michael Clarka850f8e2007-03-13 08:26:26 +0000139 for(i = tok->depth; i >= 0; i--)
140 json_tokener_reset_level(tok, i);
141 tok->depth = 0;
142 tok->err = json_tokener_success;
143}
144
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000145struct json_object* json_tokener_parse(const char *str)
Michael Clarka850f8e2007-03-13 08:26:26 +0000146{
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500147 enum json_tokener_error jerr_ignored;
148 struct json_object* obj;
149 obj = json_tokener_parse_verbose(str, &jerr_ignored);
150 return obj;
Michael Clarkf0d08882007-03-13 08:26:18 +0000151}
152
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000153struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
154{
155 struct json_tokener* tok;
156 struct json_object* obj;
157
158 tok = json_tokener_new();
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500159 if (!tok)
160 return NULL;
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000161 obj = json_tokener_parse_ex(tok, str, -1);
162 *error = tok->err;
163 if(tok->err != json_tokener_success) {
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500164 if (obj != NULL)
165 json_object_put(obj);
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000166 obj = NULL;
167 }
168
169 json_tokener_free(tok);
170 return obj;
171}
172
Michael Clarka850f8e2007-03-13 08:26:26 +0000173
Michael Clark4504df72007-03-13 08:26:20 +0000174#if !HAVE_STRNDUP
175/* CAW: compliant version of strndup() */
176char* strndup(const char* str, size_t n)
177{
Michael Clarka850f8e2007-03-13 08:26:26 +0000178 if(str) {
179 size_t len = strlen(str);
Michael Clark7fb9b032009-07-25 00:13:44 +0000180 size_t nn = json_min(len,n);
Michael Clarka850f8e2007-03-13 08:26:26 +0000181 char* s = (char*)malloc(sizeof(char) * (nn + 1));
Michael Clark4504df72007-03-13 08:26:20 +0000182
Michael Clarka850f8e2007-03-13 08:26:26 +0000183 if(s) {
184 memcpy(s, str, nn);
185 s[nn] = '\0';
186 }
Michael Clark4504df72007-03-13 08:26:20 +0000187
Michael Clarka850f8e2007-03-13 08:26:26 +0000188 return s;
189 }
Michael Clark4504df72007-03-13 08:26:20 +0000190
Michael Clarka850f8e2007-03-13 08:26:26 +0000191 return NULL;
Michael Clark4504df72007-03-13 08:26:20 +0000192}
193#endif
194
Michael Clarka850f8e2007-03-13 08:26:26 +0000195
196#define state tok->stack[tok->depth].state
197#define saved_state tok->stack[tok->depth].saved_state
198#define current tok->stack[tok->depth].current
199#define obj_field_name tok->stack[tok->depth].obj_field_name
200
Michael Clark95f55a72009-04-27 08:16:58 +0000201/* Optimization:
202 * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
203 * iterating character-by character. A large performance boost is
204 * achieved by using tighter loops to locally handle units such as
William Dignaziobb492d42013-03-06 12:29:33 -0500205 * comments and strings. Loops that handle an entire token within
206 * their scope also gather entire strings and pass them to
Michael Clark95f55a72009-04-27 08:16:58 +0000207 * printbuf_memappend() in a single call, rather than calling
208 * printbuf_memappend() one char at a time.
209 *
William Dignaziobb492d42013-03-06 12:29:33 -0500210 * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is
Michael Clark95f55a72009-04-27 08:16:58 +0000211 * common to both the main loop and the tighter loops.
212 */
213
William Dignaziobb492d42013-03-06 12:29:33 -0500214/* PEEK_CHAR(dest, tok) macro:
215 * Peeks at the current char and stores it in dest.
Michael Clark95f55a72009-04-27 08:16:58 +0000216 * Returns 1 on success, sets tok->err and returns 0 if no more chars.
217 * Implicit inputs: str, len vars
218 */
William Dignaziobb492d42013-03-06 12:29:33 -0500219#define PEEK_CHAR(dest, tok) \
Michael Clark95f55a72009-04-27 08:16:58 +0000220 (((tok)->char_offset == len) ? \
221 (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
222 (((tok)->err = json_tokener_success), 0) \
223 : \
224 (((tok)->err = json_tokener_continue), 0) \
225 ) : \
226 (((dest) = *str), 1) \
227 )
William Dignaziobb492d42013-03-06 12:29:33 -0500228
Michael Clark95f55a72009-04-27 08:16:58 +0000229/* ADVANCE_CHAR() macro:
230 * Incrementes str & tok->char_offset.
231 * For convenience of existing conditionals, returns the old value of c (0 on eof)
232 * Implicit inputs: c var
233 */
234#define ADVANCE_CHAR(str, tok) \
235 ( ++(str), ((tok)->char_offset)++, c)
236
Brent Miller126ad952009-08-20 06:50:22 +0000237
Michael Clark95f55a72009-04-27 08:16:58 +0000238/* End optimization macro defs */
239
240
Michael Clarka850f8e2007-03-13 08:26:26 +0000241struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000242 const char *str, int len)
Michael Clarkf0d08882007-03-13 08:26:18 +0000243{
Michael Clarka850f8e2007-03-13 08:26:26 +0000244 struct json_object *obj = NULL;
Michael Clark95f55a72009-04-27 08:16:58 +0000245 char c = '\1';
Remi Colleta01b6592012-12-13 09:47:33 +0100246#ifdef HAVE_SETLOCALE
247 char *oldlocale=NULL, *tmplocale;
248
249 tmplocale = setlocale(LC_NUMERIC, NULL);
250 if (tmplocale) oldlocale = strdup(tmplocale);
251 setlocale(LC_NUMERIC, "C");
252#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000253
Michael Clarka850f8e2007-03-13 08:26:26 +0000254 tok->char_offset = 0;
255 tok->err = json_tokener_success;
Michael Clarkf0d08882007-03-13 08:26:18 +0000256
William Dignaziobb492d42013-03-06 12:29:33 -0500257 while (PEEK_CHAR(c, tok)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000258
Michael Clarka850f8e2007-03-13 08:26:26 +0000259 redo_char:
Michael Clarkf0d08882007-03-13 08:26:18 +0000260 switch(state) {
261
262 case json_tokener_state_eatws:
Michael Clark95f55a72009-04-27 08:16:58 +0000263 /* Advance until we change state */
ehaszla252669c2010-12-07 18:15:35 +0000264 while (isspace((int)c)) {
William Dignaziobb492d42013-03-06 12:29:33 -0500265 if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok)))
Michael Clark95f55a72009-04-27 08:16:58 +0000266 goto out;
267 }
268 if(c == '/') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000269 printbuf_reset(tok->pb);
Michael Clark95f55a72009-04-27 08:16:58 +0000270 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000271 state = json_tokener_state_comment_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000272 } else {
273 state = saved_state;
Michael Clarka850f8e2007-03-13 08:26:26 +0000274 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000275 }
276 break;
277
278 case json_tokener_state_start:
279 switch(c) {
280 case '{':
281 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000282 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000283 current = json_object_new_object();
Michael Clarkf0d08882007-03-13 08:26:18 +0000284 break;
285 case '[':
286 state = json_tokener_state_eatws;
287 saved_state = json_tokener_state_array;
288 current = json_object_new_array();
Michael Clarkf0d08882007-03-13 08:26:18 +0000289 break;
290 case 'N':
291 case 'n':
292 state = json_tokener_state_null;
Michael Clarka850f8e2007-03-13 08:26:26 +0000293 printbuf_reset(tok->pb);
294 tok->st_pos = 0;
295 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000296 case '"':
297 case '\'':
Michael Clarkf0d08882007-03-13 08:26:18 +0000298 state = json_tokener_state_string;
Michael Clarka850f8e2007-03-13 08:26:26 +0000299 printbuf_reset(tok->pb);
300 tok->quote_char = c;
Michael Clarkf0d08882007-03-13 08:26:18 +0000301 break;
302 case 'T':
303 case 't':
304 case 'F':
305 case 'f':
306 state = json_tokener_state_boolean;
Michael Clarka850f8e2007-03-13 08:26:26 +0000307 printbuf_reset(tok->pb);
308 tok->st_pos = 0;
309 goto redo_char;
Michael Clark4504df72007-03-13 08:26:20 +0000310#if defined(__GNUC__)
311 case '0' ... '9':
312#else
313 case '0':
314 case '1':
315 case '2':
316 case '3':
317 case '4':
318 case '5':
319 case '6':
320 case '7':
321 case '8':
322 case '9':
323#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000324 case '-':
Michael Clarkf0d08882007-03-13 08:26:18 +0000325 state = json_tokener_state_number;
Michael Clarka850f8e2007-03-13 08:26:26 +0000326 printbuf_reset(tok->pb);
327 tok->is_double = 0;
328 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000329 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000330 tok->err = json_tokener_error_parse_unexpected;
Michael Clarkf0d08882007-03-13 08:26:18 +0000331 goto out;
332 }
333 break;
334
335 case json_tokener_state_finish:
Michael Clarka850f8e2007-03-13 08:26:26 +0000336 if(tok->depth == 0) goto out;
337 obj = json_object_get(current);
338 json_tokener_reset_level(tok, tok->depth);
339 tok->depth--;
340 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000341
342 case json_tokener_state_null:
Michael Clark95f55a72009-04-27 08:16:58 +0000343 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000344 if(strncasecmp(json_null_str, tok->pb->buf,
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -0600345 json_min(tok->st_pos+1, (int)strlen(json_null_str))) == 0) {
346 if(tok->st_pos == (int)strlen(json_null_str)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000347 current = NULL;
348 saved_state = json_tokener_state_finish;
349 state = json_tokener_state_eatws;
350 goto redo_char;
351 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000352 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000353 tok->err = json_tokener_error_parse_null;
354 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000355 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000356 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000357 break;
358
359 case json_tokener_state_comment_start:
360 if(c == '*') {
361 state = json_tokener_state_comment;
362 } else if(c == '/') {
363 state = json_tokener_state_comment_eol;
364 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000365 tok->err = json_tokener_error_parse_comment;
Michael Clarkf0d08882007-03-13 08:26:18 +0000366 goto out;
367 }
Michael Clark95f55a72009-04-27 08:16:58 +0000368 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000369 break;
370
371 case json_tokener_state_comment:
Michael Clark95f55a72009-04-27 08:16:58 +0000372 {
373 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000374 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000375 while(c != '*') {
William Dignaziobb492d42013-03-06 12:29:33 -0500376 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000377 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
378 goto out;
William Dignaziobb492d42013-03-06 12:29:33 -0500379 }
Michael Clark95f55a72009-04-27 08:16:58 +0000380 }
381 printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
382 state = json_tokener_state_comment_end;
383 }
384 break;
Michael Clarkf0d08882007-03-13 08:26:18 +0000385
386 case json_tokener_state_comment_eol:
Michael Clark95f55a72009-04-27 08:16:58 +0000387 {
388 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000389 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000390 while(c != '\n') {
William Dignaziobb492d42013-03-06 12:29:33 -0500391 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000392 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
393 goto out;
394 }
395 }
396 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Michael Clarkdfaf6702007-10-25 02:26:00 +0000397 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000398 state = json_tokener_state_eatws;
399 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000400 break;
401
402 case json_tokener_state_comment_end:
Michael Clark95f55a72009-04-27 08:16:58 +0000403 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000404 if(c == '/') {
Michael Clarkdfaf6702007-10-25 02:26:00 +0000405 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000406 state = json_tokener_state_eatws;
407 } else {
408 state = json_tokener_state_comment;
409 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000410 break;
411
412 case json_tokener_state_string:
Michael Clark95f55a72009-04-27 08:16:58 +0000413 {
414 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000415 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000416 while(1) {
417 if(c == tok->quote_char) {
418 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Eric Haszlakiewicz4e4af932012-12-09 16:32:11 -0600419 current = json_object_new_string_len(tok->pb->buf, tok->pb->bpos);
Michael Clark95f55a72009-04-27 08:16:58 +0000420 saved_state = json_tokener_state_finish;
421 state = json_tokener_state_eatws;
422 break;
423 } else if(c == '\\') {
424 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
425 saved_state = json_tokener_state_string;
426 state = json_tokener_state_string_escape;
427 break;
428 }
William Dignaziobb492d42013-03-06 12:29:33 -0500429 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000430 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
431 goto out;
432 }
433 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000434 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000435 break;
436
437 case json_tokener_state_string_escape:
438 switch(c) {
439 case '"':
440 case '\\':
Michael Clarka850f8e2007-03-13 08:26:26 +0000441 case '/':
Michael Clark95f55a72009-04-27 08:16:58 +0000442 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000443 state = saved_state;
444 break;
445 case 'b':
446 case 'n':
447 case 'r':
448 case 't':
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500449 case 'f':
Michael Clark95f55a72009-04-27 08:16:58 +0000450 if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
451 else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
452 else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
453 else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500454 else if(c == 'f') printbuf_memappend_fast(tok->pb, "\f", 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000455 state = saved_state;
456 break;
457 case 'u':
Michael Clarka850f8e2007-03-13 08:26:26 +0000458 tok->ucs_char = 0;
459 tok->st_pos = 0;
Michael Clarkf0d08882007-03-13 08:26:18 +0000460 state = json_tokener_state_escape_unicode;
461 break;
462 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000463 tok->err = json_tokener_error_parse_string;
Michael Clarkf0d08882007-03-13 08:26:18 +0000464 goto out;
465 }
466 break;
467
468 case json_tokener_state_escape_unicode:
Michael Clark95f55a72009-04-27 08:16:58 +0000469 {
Brent Miller126ad952009-08-20 06:50:22 +0000470 unsigned int got_hi_surrogate = 0;
471
472 /* Handle a 4-byte sequence, or two sequences if a surrogate pair */
Michael Clark95f55a72009-04-27 08:16:58 +0000473 while(1) {
474 if(strchr(json_hex_chars, c)) {
475 tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
476 if(tok->st_pos == 4) {
Brent Miller126ad952009-08-20 06:50:22 +0000477 unsigned char unescaped_utf[4];
478
479 if (got_hi_surrogate) {
480 if (IS_LOW_SURROGATE(tok->ucs_char)) {
481 /* Recalculate the ucs_char, then fall thru to process normally */
482 tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char);
483 } else {
484 /* Hi surrogate was not followed by a low surrogate */
485 /* Replace the hi and process the rest normally */
486 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
487 }
488 got_hi_surrogate = 0;
489 }
490
Michael Clark95f55a72009-04-27 08:16:58 +0000491 if (tok->ucs_char < 0x80) {
Brent Miller126ad952009-08-20 06:50:22 +0000492 unescaped_utf[0] = tok->ucs_char;
493 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1);
Michael Clark95f55a72009-04-27 08:16:58 +0000494 } else if (tok->ucs_char < 0x800) {
Brent Miller126ad952009-08-20 06:50:22 +0000495 unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
496 unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
497 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2);
498 } else if (IS_HIGH_SURROGATE(tok->ucs_char)) {
499 /* Got a high surrogate. Remember it and look for the
500 * the beginning of another sequence, which should be the
501 * low surrogate.
502 */
503 got_hi_surrogate = tok->ucs_char;
504 /* Not at end, and the next two chars should be "\u" */
505 if ((tok->char_offset+1 != len) &&
506 (tok->char_offset+2 != len) &&
507 (str[1] == '\\') &&
508 (str[2] == 'u'))
509 {
William Dignazio32eddd62013-03-06 20:18:14 -0500510 /* Advance through the 16 bit surrogate, and move on to the
511 * next sequence. The next step is to process the following
512 * characters.
513 */
514 if( !ADVANCE_CHAR(str, tok) || !ADVANCE_CHAR(str, tok) ) {
515 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
516 }
Brent Miller126ad952009-08-20 06:50:22 +0000517 /* Advance to the first char of the next sequence and
518 * continue processing with the next sequence.
519 */
William Dignaziobb492d42013-03-06 12:29:33 -0500520 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Brent Miller126ad952009-08-20 06:50:22 +0000521 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
522 goto out;
523 }
524 tok->ucs_char = 0;
525 tok->st_pos = 0;
526 continue; /* other json_tokener_state_escape_unicode */
527 } else {
528 /* Got a high surrogate without another sequence following
529 * it. Put a replacement char in for the hi surrogate
530 * and pretend we finished.
531 */
532 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
533 }
534 } else if (IS_LOW_SURROGATE(tok->ucs_char)) {
535 /* Got a low surrogate not preceded by a high */
536 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
537 } else if (tok->ucs_char < 0x10000) {
538 unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
539 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
540 unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
541 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3);
542 } else if (tok->ucs_char < 0x110000) {
543 unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
544 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
545 unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
546 unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
547 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4);
Michael Clark95f55a72009-04-27 08:16:58 +0000548 } else {
Brent Miller126ad952009-08-20 06:50:22 +0000549 /* Don't know what we got--insert the replacement char */
550 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
551 }
Michael Clark95f55a72009-04-27 08:16:58 +0000552 state = saved_state;
553 break;
554 }
555 } else {
556 tok->err = json_tokener_error_parse_string;
557 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000558 }
William Dignaziobb492d42013-03-06 12:29:33 -0500559 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Brent Miller126ad952009-08-20 06:50:22 +0000560 if (got_hi_surrogate) /* Clean up any pending chars */
561 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
Michael Clark95f55a72009-04-27 08:16:58 +0000562 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000563 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000564 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000565 }
566 break;
567
568 case json_tokener_state_boolean:
Michael Clark95f55a72009-04-27 08:16:58 +0000569 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000570 if(strncasecmp(json_true_str, tok->pb->buf,
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -0600571 json_min(tok->st_pos+1, (int)strlen(json_true_str))) == 0) {
572 if(tok->st_pos == (int)strlen(json_true_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000573 current = json_object_new_boolean(1);
574 saved_state = json_tokener_state_finish;
575 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000576 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000577 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000578 } else if(strncasecmp(json_false_str, tok->pb->buf,
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -0600579 json_min(tok->st_pos+1, (int)strlen(json_false_str))) == 0) {
580 if(tok->st_pos == (int)strlen(json_false_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000581 current = json_object_new_boolean(0);
582 saved_state = json_tokener_state_finish;
583 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000584 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000585 }
586 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000587 tok->err = json_tokener_error_parse_boolean;
Michael Clarkf0d08882007-03-13 08:26:18 +0000588 goto out;
589 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000590 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000591 break;
592
593 case json_tokener_state_number:
Michael Clark95f55a72009-04-27 08:16:58 +0000594 {
595 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000596 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000597 int case_len=0;
598 while(c && strchr(json_number_chars, c)) {
599 ++case_len;
Eric Haszlakiewiczf931f612012-04-24 22:17:13 -0500600 if(c == '.' || c == 'e' || c == 'E')
601 tok->is_double = 1;
William Dignaziobb492d42013-03-06 12:29:33 -0500602 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000603 printbuf_memappend_fast(tok->pb, case_start, case_len);
604 goto out;
605 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000606 }
Michael Clark95f55a72009-04-27 08:16:58 +0000607 if (case_len>0)
608 printbuf_memappend_fast(tok->pb, case_start, case_len);
609 }
610 {
Michael Clarkc4dceae2010-10-06 16:39:20 +0000611 int64_t num64;
612 double numd;
613 if (!tok->is_double && json_parse_int64(tok->pb->buf, &num64) == 0) {
Remi Collete9ee4ae2013-06-13 13:40:01 +0200614 if (num64 && tok->pb->buf[0]=='0' && (tok->flags & JSON_TOKENER_STRICT)) {
Eric Haszlakiewiczd032aad2013-06-19 09:14:19 -0500615 /* in strict mode, number must not start with 0 */
Remi Collete9ee4ae2013-06-13 13:40:01 +0200616 tok->err = json_tokener_error_parse_number;
617 goto out;
618 }
ehaszla252669c2010-12-07 18:15:35 +0000619 current = json_object_new_int64(num64);
Remi Collet16a4a322012-11-27 11:06:49 +0100620 } else if(tok->is_double && json_parse_double(tok->pb->buf, &numd) == 0) {
Michael Clark95f55a72009-04-27 08:16:58 +0000621 current = json_object_new_double(numd);
622 } else {
623 tok->err = json_tokener_error_parse_number;
624 goto out;
625 }
626 saved_state = json_tokener_state_finish;
627 state = json_tokener_state_eatws;
628 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000629 }
630 break;
631
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500632 case json_tokener_state_array_after_sep:
Michael Clarkf0d08882007-03-13 08:26:18 +0000633 case json_tokener_state_array:
634 if(c == ']') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500635 if (state == json_tokener_state_array_after_sep &&
636 (tok->flags & JSON_TOKENER_STRICT))
637 {
638 tok->err = json_tokener_error_parse_unexpected;
639 goto out;
640 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000641 saved_state = json_tokener_state_finish;
642 state = json_tokener_state_eatws;
643 } else {
Remi Collet197cb1d2012-11-27 09:01:45 +0100644 if(tok->depth >= tok->max_depth-1) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000645 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000646 goto out;
647 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000648 state = json_tokener_state_array_add;
649 tok->depth++;
650 json_tokener_reset_level(tok, tok->depth);
651 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000652 }
653 break;
654
Michael Clarka850f8e2007-03-13 08:26:26 +0000655 case json_tokener_state_array_add:
656 json_object_array_add(current, obj);
657 saved_state = json_tokener_state_array_sep;
658 state = json_tokener_state_eatws;
659 goto redo_char;
660
Michael Clarkf0d08882007-03-13 08:26:18 +0000661 case json_tokener_state_array_sep:
662 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000663 saved_state = json_tokener_state_finish;
664 state = json_tokener_state_eatws;
665 } else if(c == ',') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500666 saved_state = json_tokener_state_array_after_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000667 state = json_tokener_state_eatws;
668 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000669 tok->err = json_tokener_error_parse_array;
670 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000671 }
672 break;
673
Michael Clarkf0d08882007-03-13 08:26:18 +0000674 case json_tokener_state_object_field_start:
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500675 case json_tokener_state_object_field_start_after_sep:
Michael Clarkf0d08882007-03-13 08:26:18 +0000676 if(c == '}') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500677 if (state == json_tokener_state_object_field_start_after_sep &&
678 (tok->flags & JSON_TOKENER_STRICT))
679 {
680 tok->err = json_tokener_error_parse_unexpected;
681 goto out;
682 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000683 saved_state = json_tokener_state_finish;
684 state = json_tokener_state_eatws;
685 } else if (c == '"' || c == '\'') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000686 tok->quote_char = c;
687 printbuf_reset(tok->pb);
Michael Clarkf0d08882007-03-13 08:26:18 +0000688 state = json_tokener_state_object_field;
Michael Clark0370baa2007-03-13 08:26:22 +0000689 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000690 tok->err = json_tokener_error_parse_object_key_name;
Michael Clark0370baa2007-03-13 08:26:22 +0000691 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000692 }
693 break;
694
695 case json_tokener_state_object_field:
Michael Clark95f55a72009-04-27 08:16:58 +0000696 {
697 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000698 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000699 while(1) {
700 if(c == tok->quote_char) {
701 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
702 obj_field_name = strdup(tok->pb->buf);
703 saved_state = json_tokener_state_object_field_end;
704 state = json_tokener_state_eatws;
705 break;
706 } else if(c == '\\') {
707 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
708 saved_state = json_tokener_state_object_field;
709 state = json_tokener_state_string_escape;
710 break;
711 }
William Dignaziobb492d42013-03-06 12:29:33 -0500712 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000713 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
714 goto out;
715 }
716 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000717 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000718 break;
719
720 case json_tokener_state_object_field_end:
721 if(c == ':') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000722 saved_state = json_tokener_state_object_value;
723 state = json_tokener_state_eatws;
724 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000725 tok->err = json_tokener_error_parse_object_key_sep;
726 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000727 }
728 break;
729
730 case json_tokener_state_object_value:
Remi Collet197cb1d2012-11-27 09:01:45 +0100731 if(tok->depth >= tok->max_depth-1) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000732 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000733 goto out;
734 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000735 state = json_tokener_state_object_value_add;
736 tok->depth++;
737 json_tokener_reset_level(tok, tok->depth);
738 goto redo_char;
739
740 case json_tokener_state_object_value_add:
Michael Clarkf0d08882007-03-13 08:26:18 +0000741 json_object_object_add(current, obj_field_name, obj);
742 free(obj_field_name);
743 obj_field_name = NULL;
744 saved_state = json_tokener_state_object_sep;
745 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000746 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000747
748 case json_tokener_state_object_sep:
749 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000750 saved_state = json_tokener_state_finish;
751 state = json_tokener_state_eatws;
752 } else if(c == ',') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500753 saved_state = json_tokener_state_object_field_start_after_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000754 state = json_tokener_state_eatws;
755 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000756 tok->err = json_tokener_error_parse_object_value_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000757 goto out;
758 }
759 break;
760
761 }
Michael Clark95f55a72009-04-27 08:16:58 +0000762 if (!ADVANCE_CHAR(str, tok))
763 goto out;
764 } /* while(POP_CHAR) */
Michael Clarkf0d08882007-03-13 08:26:18 +0000765
766 out:
Michael Clark95f55a72009-04-27 08:16:58 +0000767 if (!c) { /* We hit an eof char (0) */
768 if(state != json_tokener_state_finish &&
769 saved_state != json_tokener_state_finish)
770 tok->err = json_tokener_error_parse_eof;
771 }
772
Remi Colleta01b6592012-12-13 09:47:33 +0100773#ifdef HAVE_SETLOCALE
774 setlocale(LC_NUMERIC, oldlocale);
775 if (oldlocale) free(oldlocale);
776#endif
777
William Dignaziobb492d42013-03-06 12:29:33 -0500778 if (tok->err == json_tokener_success)
Eric Haszlakiewiczd809fa62012-03-31 22:53:43 -0500779 {
780 json_object *ret = json_object_get(current);
781 int ii;
782
783 /* Partially reset, so we parse additional objects on subsequent calls. */
784 for(ii = tok->depth; ii >= 0; ii--)
785 json_tokener_reset_level(tok, ii);
786 return ret;
787 }
788
Michael Clarkdfaf6702007-10-25 02:26:00 +0000789 MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
Michael Clarka850f8e2007-03-13 08:26:26 +0000790 json_tokener_errors[tok->err], tok->char_offset);
791 return NULL;
Michael Clarkf0d08882007-03-13 08:26:18 +0000792}
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500793
794void json_tokener_set_flags(struct json_tokener *tok, int flags)
795{
796 tok->flags = flags;
797}