blob: 4ebe71208498fafa42ce428d2fe280d55157ade2 [file] [log] [blame]
Michael Clarkf0d08882007-03-13 08:26:18 +00001/*
Michael Clarka850f8e2007-03-13 08:26:26 +00002 * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
Michael Clarkf0d08882007-03-13 08:26:18 +00003 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00004 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
Michael Clarkf0d08882007-03-13 08:26:18 +00005 * Michael Clark <michael@metaparadigm.com>
6 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00007 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the MIT license. See COPYING for details.
Michael Clarkf0d08882007-03-13 08:26:18 +00009 *
Michael Clark95f55a72009-04-27 08:16:58 +000010 *
11 * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
12 * The copyrights to the contents of this file are licensed under the MIT License
13 * (http://www.opensource.org/licenses/mit-license.php)
Michael Clarkf0d08882007-03-13 08:26:18 +000014 */
15
Michael Clark4504df72007-03-13 08:26:20 +000016#include "config.h"
17
Michael Clarkf0d08882007-03-13 08:26:18 +000018#include <stdio.h>
19#include <stdlib.h>
Michael Clarkc8f4a6e2007-12-07 02:44:24 +000020#include <stddef.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000021#include <ctype.h>
22#include <string.h>
Michael Clarkc4dceae2010-10-06 16:39:20 +000023#include <limits.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000024
25#include "bits.h"
26#include "debug.h"
27#include "printbuf.h"
28#include "arraylist.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000029#include "json_inttypes.h"
Michael Clarkf0d08882007-03-13 08:26:18 +000030#include "json_object.h"
31#include "json_tokener.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000032#include "json_util.h"
Michael Clarka850f8e2007-03-13 08:26:26 +000033
Remi Colleta01b6592012-12-13 09:47:33 +010034#ifdef HAVE_LOCALE_H
35#include <locale.h>
36#endif /* HAVE_LOCALE_H */
37
Mateusz Loskota6f39a32012-05-21 23:22:36 +010038#if !HAVE_STRDUP && defined(_MSC_VER)
39 /* MSC has the version as _strdup */
40# define strdup _strdup
41#elif !HAVE_STRDUP
42# error You do not have strdup on your system.
43#endif /* HAVE_STRDUP */
44
Michael Clark837240f2007-03-13 08:26:25 +000045#if !HAVE_STRNCASECMP && defined(_MSC_VER)
46 /* MSC has the version as _strnicmp */
47# define strncasecmp _strnicmp
48#elif !HAVE_STRNCASECMP
49# error You do not have strncasecmp on your system.
50#endif /* HAVE_STRNCASECMP */
51
Michael Clarka850f8e2007-03-13 08:26:26 +000052static const char* json_null_str = "null";
53static const char* json_true_str = "true";
54static const char* json_false_str = "false";
Michael Clarkf0d08882007-03-13 08:26:18 +000055
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060056// XXX after v0.10 this array will become static:
Michael Clarka850f8e2007-03-13 08:26:26 +000057const char* json_tokener_errors[] = {
58 "success",
59 "continue",
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -050060 "nesting too deep",
Michael Clarka850f8e2007-03-13 08:26:26 +000061 "unexpected end of data",
62 "unexpected character",
63 "null expected",
64 "boolean expected",
65 "number expected",
66 "array value separator ',' expected",
67 "quoted object property name expected",
68 "object property name separator ':' expected",
69 "object value separator ',' expected",
70 "invalid string sequence",
71 "expected comment",
72};
73
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060074const char *json_tokener_error_desc(enum json_tokener_error jerr)
75{
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -060076 int jerr_int = (int)jerr;
Eric Haszlakiewicz56df93d2014-02-11 23:16:53 -050077 if (jerr_int < 0 || jerr_int >= (int)(sizeof(json_tokener_errors) / sizeof(json_tokener_errors[0])))
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060078 return "Unknown error, invalid json_tokener_error value passed to json_tokener_error_desc()";
79 return json_tokener_errors[jerr];
80}
81
82enum json_tokener_error json_tokener_get_error(json_tokener *tok)
83{
84 return tok->err;
85}
86
Brent Miller126ad952009-08-20 06:50:22 +000087/* Stuff for decoding unicode sequences */
88#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
89#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
90#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
91static unsigned char utf8_replacement_char[3] = { 0xEF, 0xBF, 0xBD };
92
Remi Collet197cb1d2012-11-27 09:01:45 +010093struct json_tokener* json_tokener_new_ex(int depth)
Michael Clarkf0d08882007-03-13 08:26:18 +000094{
Michael Clarkaaec1ef2009-02-25 02:31:32 +000095 struct json_tokener *tok;
96
97 tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
Christopher Watford543bb142009-07-08 03:46:10 +000098 if (!tok) return NULL;
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -060099 tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec));
Remi Collet197cb1d2012-11-27 09:01:45 +0100100 if (!tok->stack) {
101 free(tok);
102 return NULL;
103 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000104 tok->pb = printbuf_new();
Remi Collet197cb1d2012-11-27 09:01:45 +0100105 tok->max_depth = depth;
Michael Clarka850f8e2007-03-13 08:26:26 +0000106 json_tokener_reset(tok);
107 return tok;
108}
109
Remi Collet197cb1d2012-11-27 09:01:45 +0100110struct json_tokener* json_tokener_new(void)
111{
112 return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH);
113}
114
Michael Clarka850f8e2007-03-13 08:26:26 +0000115void json_tokener_free(struct json_tokener *tok)
116{
117 json_tokener_reset(tok);
Remi Collet197cb1d2012-11-27 09:01:45 +0100118 if (tok->pb) printbuf_free(tok->pb);
119 if (tok->stack) free(tok->stack);
Michael Clarka850f8e2007-03-13 08:26:26 +0000120 free(tok);
121}
122
123static void json_tokener_reset_level(struct json_tokener *tok, int depth)
124{
125 tok->stack[depth].state = json_tokener_state_eatws;
126 tok->stack[depth].saved_state = json_tokener_state_start;
127 json_object_put(tok->stack[depth].current);
128 tok->stack[depth].current = NULL;
129 free(tok->stack[depth].obj_field_name);
130 tok->stack[depth].obj_field_name = NULL;
131}
132
133void json_tokener_reset(struct json_tokener *tok)
134{
135 int i;
Michael Clark22dee7c2009-02-25 01:51:40 +0000136 if (!tok)
137 return;
138
Michael Clarka850f8e2007-03-13 08:26:26 +0000139 for(i = tok->depth; i >= 0; i--)
140 json_tokener_reset_level(tok, i);
141 tok->depth = 0;
142 tok->err = json_tokener_success;
143}
144
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000145struct json_object* json_tokener_parse(const char *str)
Michael Clarka850f8e2007-03-13 08:26:26 +0000146{
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500147 enum json_tokener_error jerr_ignored;
148 struct json_object* obj;
149 obj = json_tokener_parse_verbose(str, &jerr_ignored);
150 return obj;
Michael Clarkf0d08882007-03-13 08:26:18 +0000151}
152
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000153struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
154{
155 struct json_tokener* tok;
156 struct json_object* obj;
157
158 tok = json_tokener_new();
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500159 if (!tok)
160 return NULL;
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000161 obj = json_tokener_parse_ex(tok, str, -1);
162 *error = tok->err;
163 if(tok->err != json_tokener_success) {
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500164 if (obj != NULL)
165 json_object_put(obj);
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000166 obj = NULL;
167 }
168
169 json_tokener_free(tok);
170 return obj;
171}
172
Michael Clarka850f8e2007-03-13 08:26:26 +0000173#define state tok->stack[tok->depth].state
174#define saved_state tok->stack[tok->depth].saved_state
175#define current tok->stack[tok->depth].current
176#define obj_field_name tok->stack[tok->depth].obj_field_name
177
Michael Clark95f55a72009-04-27 08:16:58 +0000178/* Optimization:
179 * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
180 * iterating character-by character. A large performance boost is
181 * achieved by using tighter loops to locally handle units such as
William Dignaziobb492d42013-03-06 12:29:33 -0500182 * comments and strings. Loops that handle an entire token within
183 * their scope also gather entire strings and pass them to
Michael Clark95f55a72009-04-27 08:16:58 +0000184 * printbuf_memappend() in a single call, rather than calling
185 * printbuf_memappend() one char at a time.
186 *
William Dignaziobb492d42013-03-06 12:29:33 -0500187 * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is
Michael Clark95f55a72009-04-27 08:16:58 +0000188 * common to both the main loop and the tighter loops.
189 */
190
William Dignaziobb492d42013-03-06 12:29:33 -0500191/* PEEK_CHAR(dest, tok) macro:
192 * Peeks at the current char and stores it in dest.
Michael Clark95f55a72009-04-27 08:16:58 +0000193 * Returns 1 on success, sets tok->err and returns 0 if no more chars.
194 * Implicit inputs: str, len vars
195 */
William Dignaziobb492d42013-03-06 12:29:33 -0500196#define PEEK_CHAR(dest, tok) \
Michael Clark95f55a72009-04-27 08:16:58 +0000197 (((tok)->char_offset == len) ? \
198 (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
199 (((tok)->err = json_tokener_success), 0) \
200 : \
201 (((tok)->err = json_tokener_continue), 0) \
202 ) : \
203 (((dest) = *str), 1) \
204 )
William Dignaziobb492d42013-03-06 12:29:33 -0500205
Michael Clark95f55a72009-04-27 08:16:58 +0000206/* ADVANCE_CHAR() macro:
207 * Incrementes str & tok->char_offset.
208 * For convenience of existing conditionals, returns the old value of c (0 on eof)
209 * Implicit inputs: c var
210 */
211#define ADVANCE_CHAR(str, tok) \
212 ( ++(str), ((tok)->char_offset)++, c)
213
Brent Miller126ad952009-08-20 06:50:22 +0000214
Michael Clark95f55a72009-04-27 08:16:58 +0000215/* End optimization macro defs */
216
217
Michael Clarka850f8e2007-03-13 08:26:26 +0000218struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000219 const char *str, int len)
Michael Clarkf0d08882007-03-13 08:26:18 +0000220{
Michael Clarka850f8e2007-03-13 08:26:26 +0000221 struct json_object *obj = NULL;
Michael Clark95f55a72009-04-27 08:16:58 +0000222 char c = '\1';
Remi Colleta01b6592012-12-13 09:47:33 +0100223#ifdef HAVE_SETLOCALE
224 char *oldlocale=NULL, *tmplocale;
225
226 tmplocale = setlocale(LC_NUMERIC, NULL);
227 if (tmplocale) oldlocale = strdup(tmplocale);
228 setlocale(LC_NUMERIC, "C");
229#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000230
Michael Clarka850f8e2007-03-13 08:26:26 +0000231 tok->char_offset = 0;
232 tok->err = json_tokener_success;
Michael Clarkf0d08882007-03-13 08:26:18 +0000233
William Dignaziobb492d42013-03-06 12:29:33 -0500234 while (PEEK_CHAR(c, tok)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000235
Michael Clarka850f8e2007-03-13 08:26:26 +0000236 redo_char:
Michael Clarkf0d08882007-03-13 08:26:18 +0000237 switch(state) {
238
239 case json_tokener_state_eatws:
Michael Clark95f55a72009-04-27 08:16:58 +0000240 /* Advance until we change state */
ehaszla252669c2010-12-07 18:15:35 +0000241 while (isspace((int)c)) {
William Dignaziobb492d42013-03-06 12:29:33 -0500242 if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok)))
Michael Clark95f55a72009-04-27 08:16:58 +0000243 goto out;
244 }
Remi Collet87fa32d2013-08-21 15:41:40 +0200245 if(c == '/' && !(tok->flags & JSON_TOKENER_STRICT)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000246 printbuf_reset(tok->pb);
Michael Clark95f55a72009-04-27 08:16:58 +0000247 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000248 state = json_tokener_state_comment_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000249 } else {
250 state = saved_state;
Michael Clarka850f8e2007-03-13 08:26:26 +0000251 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000252 }
253 break;
254
255 case json_tokener_state_start:
256 switch(c) {
257 case '{':
258 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000259 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000260 current = json_object_new_object();
Michael Clarkf0d08882007-03-13 08:26:18 +0000261 break;
262 case '[':
263 state = json_tokener_state_eatws;
264 saved_state = json_tokener_state_array;
265 current = json_object_new_array();
Michael Clarkf0d08882007-03-13 08:26:18 +0000266 break;
267 case 'N':
268 case 'n':
269 state = json_tokener_state_null;
Michael Clarka850f8e2007-03-13 08:26:26 +0000270 printbuf_reset(tok->pb);
271 tok->st_pos = 0;
272 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000273 case '\'':
Remi Colleta07ef3d2013-08-06 10:41:14 +0200274 if (tok->flags & JSON_TOKENER_STRICT) {
275 /* in STRICT mode only double-quote are allowed */
276 tok->err = json_tokener_error_parse_unexpected;
277 goto out;
278 }
279 case '"':
Michael Clarkf0d08882007-03-13 08:26:18 +0000280 state = json_tokener_state_string;
Michael Clarka850f8e2007-03-13 08:26:26 +0000281 printbuf_reset(tok->pb);
282 tok->quote_char = c;
Michael Clarkf0d08882007-03-13 08:26:18 +0000283 break;
284 case 'T':
285 case 't':
286 case 'F':
287 case 'f':
288 state = json_tokener_state_boolean;
Michael Clarka850f8e2007-03-13 08:26:26 +0000289 printbuf_reset(tok->pb);
290 tok->st_pos = 0;
291 goto redo_char;
Michael Clark4504df72007-03-13 08:26:20 +0000292#if defined(__GNUC__)
293 case '0' ... '9':
294#else
295 case '0':
296 case '1':
297 case '2':
298 case '3':
299 case '4':
300 case '5':
301 case '6':
302 case '7':
303 case '8':
304 case '9':
305#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000306 case '-':
Michael Clarkf0d08882007-03-13 08:26:18 +0000307 state = json_tokener_state_number;
Michael Clarka850f8e2007-03-13 08:26:26 +0000308 printbuf_reset(tok->pb);
309 tok->is_double = 0;
310 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000311 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000312 tok->err = json_tokener_error_parse_unexpected;
Michael Clarkf0d08882007-03-13 08:26:18 +0000313 goto out;
314 }
315 break;
316
317 case json_tokener_state_finish:
Michael Clarka850f8e2007-03-13 08:26:26 +0000318 if(tok->depth == 0) goto out;
319 obj = json_object_get(current);
320 json_tokener_reset_level(tok, tok->depth);
321 tok->depth--;
322 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000323
324 case json_tokener_state_null:
Michael Clark95f55a72009-04-27 08:16:58 +0000325 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000326 if(strncasecmp(json_null_str, tok->pb->buf,
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -0600327 json_min(tok->st_pos+1, (int)strlen(json_null_str))) == 0) {
328 if(tok->st_pos == (int)strlen(json_null_str)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000329 current = NULL;
330 saved_state = json_tokener_state_finish;
331 state = json_tokener_state_eatws;
332 goto redo_char;
333 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000334 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000335 tok->err = json_tokener_error_parse_null;
336 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000337 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000338 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000339 break;
340
341 case json_tokener_state_comment_start:
342 if(c == '*') {
343 state = json_tokener_state_comment;
344 } else if(c == '/') {
345 state = json_tokener_state_comment_eol;
346 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000347 tok->err = json_tokener_error_parse_comment;
Michael Clarkf0d08882007-03-13 08:26:18 +0000348 goto out;
349 }
Michael Clark95f55a72009-04-27 08:16:58 +0000350 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000351 break;
352
353 case json_tokener_state_comment:
Michael Clark95f55a72009-04-27 08:16:58 +0000354 {
355 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000356 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000357 while(c != '*') {
William Dignaziobb492d42013-03-06 12:29:33 -0500358 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000359 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
360 goto out;
William Dignaziobb492d42013-03-06 12:29:33 -0500361 }
Michael Clark95f55a72009-04-27 08:16:58 +0000362 }
363 printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
364 state = json_tokener_state_comment_end;
365 }
366 break;
Michael Clarkf0d08882007-03-13 08:26:18 +0000367
368 case json_tokener_state_comment_eol:
Michael Clark95f55a72009-04-27 08:16:58 +0000369 {
370 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000371 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000372 while(c != '\n') {
William Dignaziobb492d42013-03-06 12:29:33 -0500373 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000374 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
375 goto out;
376 }
377 }
378 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Michael Clarkdfaf6702007-10-25 02:26:00 +0000379 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000380 state = json_tokener_state_eatws;
381 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000382 break;
383
384 case json_tokener_state_comment_end:
Michael Clark95f55a72009-04-27 08:16:58 +0000385 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000386 if(c == '/') {
Michael Clarkdfaf6702007-10-25 02:26:00 +0000387 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000388 state = json_tokener_state_eatws;
389 } else {
390 state = json_tokener_state_comment;
391 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000392 break;
393
394 case json_tokener_state_string:
Michael Clark95f55a72009-04-27 08:16:58 +0000395 {
396 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000397 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000398 while(1) {
399 if(c == tok->quote_char) {
400 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Eric Haszlakiewicz4e4af932012-12-09 16:32:11 -0600401 current = json_object_new_string_len(tok->pb->buf, tok->pb->bpos);
Michael Clark95f55a72009-04-27 08:16:58 +0000402 saved_state = json_tokener_state_finish;
403 state = json_tokener_state_eatws;
404 break;
405 } else if(c == '\\') {
406 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
407 saved_state = json_tokener_state_string;
408 state = json_tokener_state_string_escape;
409 break;
410 }
William Dignaziobb492d42013-03-06 12:29:33 -0500411 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000412 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
413 goto out;
414 }
415 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000416 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000417 break;
418
419 case json_tokener_state_string_escape:
420 switch(c) {
421 case '"':
422 case '\\':
Michael Clarka850f8e2007-03-13 08:26:26 +0000423 case '/':
Michael Clark95f55a72009-04-27 08:16:58 +0000424 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000425 state = saved_state;
426 break;
427 case 'b':
428 case 'n':
429 case 'r':
430 case 't':
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500431 case 'f':
Michael Clark95f55a72009-04-27 08:16:58 +0000432 if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
433 else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
434 else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
435 else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500436 else if(c == 'f') printbuf_memappend_fast(tok->pb, "\f", 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000437 state = saved_state;
438 break;
439 case 'u':
Michael Clarka850f8e2007-03-13 08:26:26 +0000440 tok->ucs_char = 0;
441 tok->st_pos = 0;
Michael Clarkf0d08882007-03-13 08:26:18 +0000442 state = json_tokener_state_escape_unicode;
443 break;
444 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000445 tok->err = json_tokener_error_parse_string;
Michael Clarkf0d08882007-03-13 08:26:18 +0000446 goto out;
447 }
448 break;
449
450 case json_tokener_state_escape_unicode:
Michael Clark95f55a72009-04-27 08:16:58 +0000451 {
Brent Miller126ad952009-08-20 06:50:22 +0000452 unsigned int got_hi_surrogate = 0;
453
454 /* Handle a 4-byte sequence, or two sequences if a surrogate pair */
Michael Clark95f55a72009-04-27 08:16:58 +0000455 while(1) {
456 if(strchr(json_hex_chars, c)) {
457 tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
458 if(tok->st_pos == 4) {
Brent Miller126ad952009-08-20 06:50:22 +0000459 unsigned char unescaped_utf[4];
460
461 if (got_hi_surrogate) {
462 if (IS_LOW_SURROGATE(tok->ucs_char)) {
463 /* Recalculate the ucs_char, then fall thru to process normally */
464 tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char);
465 } else {
466 /* Hi surrogate was not followed by a low surrogate */
467 /* Replace the hi and process the rest normally */
468 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
469 }
470 got_hi_surrogate = 0;
471 }
472
Michael Clark95f55a72009-04-27 08:16:58 +0000473 if (tok->ucs_char < 0x80) {
Brent Miller126ad952009-08-20 06:50:22 +0000474 unescaped_utf[0] = tok->ucs_char;
475 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1);
Michael Clark95f55a72009-04-27 08:16:58 +0000476 } else if (tok->ucs_char < 0x800) {
Brent Miller126ad952009-08-20 06:50:22 +0000477 unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
478 unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
479 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2);
480 } else if (IS_HIGH_SURROGATE(tok->ucs_char)) {
481 /* Got a high surrogate. Remember it and look for the
482 * the beginning of another sequence, which should be the
483 * low surrogate.
484 */
485 got_hi_surrogate = tok->ucs_char;
486 /* Not at end, and the next two chars should be "\u" */
487 if ((tok->char_offset+1 != len) &&
488 (tok->char_offset+2 != len) &&
489 (str[1] == '\\') &&
490 (str[2] == 'u'))
491 {
William Dignazio32eddd62013-03-06 20:18:14 -0500492 /* Advance through the 16 bit surrogate, and move on to the
493 * next sequence. The next step is to process the following
494 * characters.
495 */
496 if( !ADVANCE_CHAR(str, tok) || !ADVANCE_CHAR(str, tok) ) {
497 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
498 }
Brent Miller126ad952009-08-20 06:50:22 +0000499 /* Advance to the first char of the next sequence and
500 * continue processing with the next sequence.
501 */
William Dignaziobb492d42013-03-06 12:29:33 -0500502 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Brent Miller126ad952009-08-20 06:50:22 +0000503 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
504 goto out;
505 }
506 tok->ucs_char = 0;
507 tok->st_pos = 0;
508 continue; /* other json_tokener_state_escape_unicode */
509 } else {
510 /* Got a high surrogate without another sequence following
511 * it. Put a replacement char in for the hi surrogate
512 * and pretend we finished.
513 */
514 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
515 }
516 } else if (IS_LOW_SURROGATE(tok->ucs_char)) {
517 /* Got a low surrogate not preceded by a high */
518 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
519 } else if (tok->ucs_char < 0x10000) {
520 unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
521 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
522 unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
523 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3);
524 } else if (tok->ucs_char < 0x110000) {
525 unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
526 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
527 unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
528 unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
529 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4);
Michael Clark95f55a72009-04-27 08:16:58 +0000530 } else {
Brent Miller126ad952009-08-20 06:50:22 +0000531 /* Don't know what we got--insert the replacement char */
532 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
533 }
Michael Clark95f55a72009-04-27 08:16:58 +0000534 state = saved_state;
535 break;
536 }
537 } else {
538 tok->err = json_tokener_error_parse_string;
539 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000540 }
William Dignaziobb492d42013-03-06 12:29:33 -0500541 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Brent Miller126ad952009-08-20 06:50:22 +0000542 if (got_hi_surrogate) /* Clean up any pending chars */
543 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
Michael Clark95f55a72009-04-27 08:16:58 +0000544 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000545 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000546 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000547 }
548 break;
549
550 case json_tokener_state_boolean:
Michael Clark95f55a72009-04-27 08:16:58 +0000551 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000552 if(strncasecmp(json_true_str, tok->pb->buf,
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -0600553 json_min(tok->st_pos+1, (int)strlen(json_true_str))) == 0) {
554 if(tok->st_pos == (int)strlen(json_true_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000555 current = json_object_new_boolean(1);
556 saved_state = json_tokener_state_finish;
557 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000558 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000559 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000560 } else if(strncasecmp(json_false_str, tok->pb->buf,
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -0600561 json_min(tok->st_pos+1, (int)strlen(json_false_str))) == 0) {
562 if(tok->st_pos == (int)strlen(json_false_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000563 current = json_object_new_boolean(0);
564 saved_state = json_tokener_state_finish;
565 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000566 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000567 }
568 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000569 tok->err = json_tokener_error_parse_boolean;
Michael Clarkf0d08882007-03-13 08:26:18 +0000570 goto out;
571 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000572 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000573 break;
574
575 case json_tokener_state_number:
Michael Clark95f55a72009-04-27 08:16:58 +0000576 {
577 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000578 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000579 int case_len=0;
580 while(c && strchr(json_number_chars, c)) {
581 ++case_len;
Eric Haszlakiewiczf931f612012-04-24 22:17:13 -0500582 if(c == '.' || c == 'e' || c == 'E')
583 tok->is_double = 1;
William Dignaziobb492d42013-03-06 12:29:33 -0500584 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000585 printbuf_memappend_fast(tok->pb, case_start, case_len);
586 goto out;
587 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000588 }
Michael Clark95f55a72009-04-27 08:16:58 +0000589 if (case_len>0)
590 printbuf_memappend_fast(tok->pb, case_start, case_len);
591 }
592 {
Michael Clarkc4dceae2010-10-06 16:39:20 +0000593 int64_t num64;
594 double numd;
595 if (!tok->is_double && json_parse_int64(tok->pb->buf, &num64) == 0) {
Remi Collete9ee4ae2013-06-13 13:40:01 +0200596 if (num64 && tok->pb->buf[0]=='0' && (tok->flags & JSON_TOKENER_STRICT)) {
Eric Haszlakiewiczd032aad2013-06-19 09:14:19 -0500597 /* in strict mode, number must not start with 0 */
Remi Collete9ee4ae2013-06-13 13:40:01 +0200598 tok->err = json_tokener_error_parse_number;
599 goto out;
600 }
ehaszla252669c2010-12-07 18:15:35 +0000601 current = json_object_new_int64(num64);
Eric Haszlakiewicz51993c22013-09-11 20:27:39 -0500602 }
603 else if(tok->is_double && json_parse_double(tok->pb->buf, &numd) == 0)
604 {
605 current = json_object_new_double_s(numd, tok->pb->buf);
Michael Clark95f55a72009-04-27 08:16:58 +0000606 } else {
607 tok->err = json_tokener_error_parse_number;
608 goto out;
609 }
610 saved_state = json_tokener_state_finish;
611 state = json_tokener_state_eatws;
612 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000613 }
614 break;
615
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500616 case json_tokener_state_array_after_sep:
Michael Clarkf0d08882007-03-13 08:26:18 +0000617 case json_tokener_state_array:
618 if(c == ']') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500619 if (state == json_tokener_state_array_after_sep &&
620 (tok->flags & JSON_TOKENER_STRICT))
621 {
622 tok->err = json_tokener_error_parse_unexpected;
623 goto out;
624 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000625 saved_state = json_tokener_state_finish;
626 state = json_tokener_state_eatws;
627 } else {
Remi Collet197cb1d2012-11-27 09:01:45 +0100628 if(tok->depth >= tok->max_depth-1) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000629 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000630 goto out;
631 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000632 state = json_tokener_state_array_add;
633 tok->depth++;
634 json_tokener_reset_level(tok, tok->depth);
635 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000636 }
637 break;
638
Michael Clarka850f8e2007-03-13 08:26:26 +0000639 case json_tokener_state_array_add:
640 json_object_array_add(current, obj);
641 saved_state = json_tokener_state_array_sep;
642 state = json_tokener_state_eatws;
643 goto redo_char;
644
Michael Clarkf0d08882007-03-13 08:26:18 +0000645 case json_tokener_state_array_sep:
646 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000647 saved_state = json_tokener_state_finish;
648 state = json_tokener_state_eatws;
649 } else if(c == ',') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500650 saved_state = json_tokener_state_array_after_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000651 state = json_tokener_state_eatws;
652 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000653 tok->err = json_tokener_error_parse_array;
654 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000655 }
656 break;
657
Michael Clarkf0d08882007-03-13 08:26:18 +0000658 case json_tokener_state_object_field_start:
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500659 case json_tokener_state_object_field_start_after_sep:
Michael Clarkf0d08882007-03-13 08:26:18 +0000660 if(c == '}') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500661 if (state == json_tokener_state_object_field_start_after_sep &&
662 (tok->flags & JSON_TOKENER_STRICT))
663 {
664 tok->err = json_tokener_error_parse_unexpected;
665 goto out;
666 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000667 saved_state = json_tokener_state_finish;
668 state = json_tokener_state_eatws;
669 } else if (c == '"' || c == '\'') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000670 tok->quote_char = c;
671 printbuf_reset(tok->pb);
Michael Clarkf0d08882007-03-13 08:26:18 +0000672 state = json_tokener_state_object_field;
Michael Clark0370baa2007-03-13 08:26:22 +0000673 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000674 tok->err = json_tokener_error_parse_object_key_name;
Michael Clark0370baa2007-03-13 08:26:22 +0000675 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000676 }
677 break;
678
679 case json_tokener_state_object_field:
Michael Clark95f55a72009-04-27 08:16:58 +0000680 {
681 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000682 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000683 while(1) {
684 if(c == tok->quote_char) {
685 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
686 obj_field_name = strdup(tok->pb->buf);
687 saved_state = json_tokener_state_object_field_end;
688 state = json_tokener_state_eatws;
689 break;
690 } else if(c == '\\') {
691 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
692 saved_state = json_tokener_state_object_field;
693 state = json_tokener_state_string_escape;
694 break;
695 }
William Dignaziobb492d42013-03-06 12:29:33 -0500696 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000697 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
698 goto out;
699 }
700 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000701 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000702 break;
703
704 case json_tokener_state_object_field_end:
705 if(c == ':') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000706 saved_state = json_tokener_state_object_value;
707 state = json_tokener_state_eatws;
708 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000709 tok->err = json_tokener_error_parse_object_key_sep;
710 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000711 }
712 break;
713
714 case json_tokener_state_object_value:
Remi Collet197cb1d2012-11-27 09:01:45 +0100715 if(tok->depth >= tok->max_depth-1) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000716 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000717 goto out;
718 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000719 state = json_tokener_state_object_value_add;
720 tok->depth++;
721 json_tokener_reset_level(tok, tok->depth);
722 goto redo_char;
723
724 case json_tokener_state_object_value_add:
Michael Clarkf0d08882007-03-13 08:26:18 +0000725 json_object_object_add(current, obj_field_name, obj);
726 free(obj_field_name);
727 obj_field_name = NULL;
728 saved_state = json_tokener_state_object_sep;
729 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000730 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000731
732 case json_tokener_state_object_sep:
733 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000734 saved_state = json_tokener_state_finish;
735 state = json_tokener_state_eatws;
736 } else if(c == ',') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500737 saved_state = json_tokener_state_object_field_start_after_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000738 state = json_tokener_state_eatws;
739 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000740 tok->err = json_tokener_error_parse_object_value_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000741 goto out;
742 }
743 break;
744
745 }
Michael Clark95f55a72009-04-27 08:16:58 +0000746 if (!ADVANCE_CHAR(str, tok))
747 goto out;
748 } /* while(POP_CHAR) */
Michael Clarkf0d08882007-03-13 08:26:18 +0000749
750 out:
Remi Collet4039f912013-08-23 13:40:01 +0200751 if (c &&
752 (state == json_tokener_state_finish) &&
753 (tok->depth == 0) &&
754 (tok->flags & JSON_TOKENER_STRICT)) {
755 /* unexpected char after JSON data */
756 tok->err = json_tokener_error_parse_unexpected;
757 }
Michael Clark95f55a72009-04-27 08:16:58 +0000758 if (!c) { /* We hit an eof char (0) */
759 if(state != json_tokener_state_finish &&
760 saved_state != json_tokener_state_finish)
761 tok->err = json_tokener_error_parse_eof;
762 }
763
Remi Colleta01b6592012-12-13 09:47:33 +0100764#ifdef HAVE_SETLOCALE
765 setlocale(LC_NUMERIC, oldlocale);
766 if (oldlocale) free(oldlocale);
767#endif
768
William Dignaziobb492d42013-03-06 12:29:33 -0500769 if (tok->err == json_tokener_success)
Eric Haszlakiewiczd809fa62012-03-31 22:53:43 -0500770 {
771 json_object *ret = json_object_get(current);
772 int ii;
773
774 /* Partially reset, so we parse additional objects on subsequent calls. */
775 for(ii = tok->depth; ii >= 0; ii--)
776 json_tokener_reset_level(tok, ii);
777 return ret;
778 }
779
Michael Clarkdfaf6702007-10-25 02:26:00 +0000780 MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
Michael Clarka850f8e2007-03-13 08:26:26 +0000781 json_tokener_errors[tok->err], tok->char_offset);
782 return NULL;
Michael Clarkf0d08882007-03-13 08:26:18 +0000783}
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500784
785void json_tokener_set_flags(struct json_tokener *tok, int flags)
786{
787 tok->flags = flags;
788}