blob: 0c800d8f85cfb01d43802b9511cef5f632311612 [file] [log] [blame]
Michael Clarkf0d08882007-03-13 08:26:18 +00001/*
Michael Clarka850f8e2007-03-13 08:26:26 +00002 * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
Michael Clarkf0d08882007-03-13 08:26:18 +00003 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00004 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
Michael Clarkf0d08882007-03-13 08:26:18 +00005 * Michael Clark <michael@metaparadigm.com>
6 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00007 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the MIT license. See COPYING for details.
Michael Clarkf0d08882007-03-13 08:26:18 +00009 *
Michael Clark95f55a72009-04-27 08:16:58 +000010 *
11 * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
12 * The copyrights to the contents of this file are licensed under the MIT License
13 * (http://www.opensource.org/licenses/mit-license.php)
Michael Clarkf0d08882007-03-13 08:26:18 +000014 */
15
Michael Clark4504df72007-03-13 08:26:20 +000016#include "config.h"
17
Eric Haszlakiewicz0eedf382014-03-09 16:41:33 -040018#include <math.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000019#include <stdio.h>
20#include <stdlib.h>
Michael Clarkc8f4a6e2007-12-07 02:44:24 +000021#include <stddef.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000022#include <ctype.h>
23#include <string.h>
Michael Clarkc4dceae2010-10-06 16:39:20 +000024#include <limits.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000025
26#include "bits.h"
27#include "debug.h"
28#include "printbuf.h"
29#include "arraylist.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000030#include "json_inttypes.h"
Michael Clarkf0d08882007-03-13 08:26:18 +000031#include "json_object.h"
32#include "json_tokener.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000033#include "json_util.h"
Michael Clarka850f8e2007-03-13 08:26:26 +000034
Remi Colleta01b6592012-12-13 09:47:33 +010035#ifdef HAVE_LOCALE_H
36#include <locale.h>
37#endif /* HAVE_LOCALE_H */
38
Mateusz Loskota6f39a32012-05-21 23:22:36 +010039#if !HAVE_STRDUP && defined(_MSC_VER)
40 /* MSC has the version as _strdup */
41# define strdup _strdup
42#elif !HAVE_STRDUP
43# error You do not have strdup on your system.
44#endif /* HAVE_STRDUP */
45
Michael Clark837240f2007-03-13 08:26:25 +000046#if !HAVE_STRNCASECMP && defined(_MSC_VER)
47 /* MSC has the version as _strnicmp */
48# define strncasecmp _strnicmp
49#elif !HAVE_STRNCASECMP
50# error You do not have strncasecmp on your system.
51#endif /* HAVE_STRNCASECMP */
52
Eric Haszlakiewicz0eedf382014-03-09 16:41:33 -040053static const char json_null_str[] = "null";
54static const int json_null_str_len = sizeof(json_null_str) - 1;
55static const char json_nan_str[] = "NaN";
56static const int json_nan_str_len = sizeof(json_nan_str) - 1;
57static const char json_true_str[] = "true";
58static const int json_true_str_len = sizeof(json_true_str) - 1;
59static const char json_false_str[] = "false";
60static const int json_false_str_len = sizeof(json_false_str) - 1;
Michael Clarkf0d08882007-03-13 08:26:18 +000061
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060062// XXX after v0.10 this array will become static:
Michael Clarka850f8e2007-03-13 08:26:26 +000063const char* json_tokener_errors[] = {
64 "success",
65 "continue",
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -050066 "nesting too deep",
Michael Clarka850f8e2007-03-13 08:26:26 +000067 "unexpected end of data",
68 "unexpected character",
69 "null expected",
70 "boolean expected",
71 "number expected",
72 "array value separator ',' expected",
73 "quoted object property name expected",
74 "object property name separator ':' expected",
75 "object value separator ',' expected",
76 "invalid string sequence",
77 "expected comment",
78};
79
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060080const char *json_tokener_error_desc(enum json_tokener_error jerr)
81{
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -060082 int jerr_int = (int)jerr;
Eric Haszlakiewicz56df93d2014-02-11 23:16:53 -050083 if (jerr_int < 0 || jerr_int >= (int)(sizeof(json_tokener_errors) / sizeof(json_tokener_errors[0])))
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060084 return "Unknown error, invalid json_tokener_error value passed to json_tokener_error_desc()";
85 return json_tokener_errors[jerr];
86}
87
88enum json_tokener_error json_tokener_get_error(json_tokener *tok)
89{
90 return tok->err;
91}
92
Brent Miller126ad952009-08-20 06:50:22 +000093/* Stuff for decoding unicode sequences */
94#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
95#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
96#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
97static unsigned char utf8_replacement_char[3] = { 0xEF, 0xBF, 0xBD };
98
Remi Collet197cb1d2012-11-27 09:01:45 +010099struct json_tokener* json_tokener_new_ex(int depth)
Michael Clarkf0d08882007-03-13 08:26:18 +0000100{
Michael Clarkaaec1ef2009-02-25 02:31:32 +0000101 struct json_tokener *tok;
102
103 tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
Christopher Watford543bb142009-07-08 03:46:10 +0000104 if (!tok) return NULL;
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -0600105 tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec));
Remi Collet197cb1d2012-11-27 09:01:45 +0100106 if (!tok->stack) {
107 free(tok);
108 return NULL;
109 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000110 tok->pb = printbuf_new();
Remi Collet197cb1d2012-11-27 09:01:45 +0100111 tok->max_depth = depth;
Michael Clarka850f8e2007-03-13 08:26:26 +0000112 json_tokener_reset(tok);
113 return tok;
114}
115
Remi Collet197cb1d2012-11-27 09:01:45 +0100116struct json_tokener* json_tokener_new(void)
117{
118 return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH);
119}
120
Michael Clarka850f8e2007-03-13 08:26:26 +0000121void json_tokener_free(struct json_tokener *tok)
122{
123 json_tokener_reset(tok);
Remi Collet197cb1d2012-11-27 09:01:45 +0100124 if (tok->pb) printbuf_free(tok->pb);
125 if (tok->stack) free(tok->stack);
Michael Clarka850f8e2007-03-13 08:26:26 +0000126 free(tok);
127}
128
129static void json_tokener_reset_level(struct json_tokener *tok, int depth)
130{
131 tok->stack[depth].state = json_tokener_state_eatws;
132 tok->stack[depth].saved_state = json_tokener_state_start;
133 json_object_put(tok->stack[depth].current);
134 tok->stack[depth].current = NULL;
135 free(tok->stack[depth].obj_field_name);
136 tok->stack[depth].obj_field_name = NULL;
137}
138
139void json_tokener_reset(struct json_tokener *tok)
140{
141 int i;
Michael Clark22dee7c2009-02-25 01:51:40 +0000142 if (!tok)
143 return;
144
Michael Clarka850f8e2007-03-13 08:26:26 +0000145 for(i = tok->depth; i >= 0; i--)
146 json_tokener_reset_level(tok, i);
147 tok->depth = 0;
148 tok->err = json_tokener_success;
149}
150
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000151struct json_object* json_tokener_parse(const char *str)
Michael Clarka850f8e2007-03-13 08:26:26 +0000152{
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500153 enum json_tokener_error jerr_ignored;
154 struct json_object* obj;
155 obj = json_tokener_parse_verbose(str, &jerr_ignored);
156 return obj;
Michael Clarkf0d08882007-03-13 08:26:18 +0000157}
158
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000159struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
160{
161 struct json_tokener* tok;
162 struct json_object* obj;
163
164 tok = json_tokener_new();
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500165 if (!tok)
166 return NULL;
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000167 obj = json_tokener_parse_ex(tok, str, -1);
168 *error = tok->err;
169 if(tok->err != json_tokener_success) {
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500170 if (obj != NULL)
171 json_object_put(obj);
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000172 obj = NULL;
173 }
174
175 json_tokener_free(tok);
176 return obj;
177}
178
Michael Clarka850f8e2007-03-13 08:26:26 +0000179#define state tok->stack[tok->depth].state
180#define saved_state tok->stack[tok->depth].saved_state
181#define current tok->stack[tok->depth].current
182#define obj_field_name tok->stack[tok->depth].obj_field_name
183
Michael Clark95f55a72009-04-27 08:16:58 +0000184/* Optimization:
185 * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
186 * iterating character-by character. A large performance boost is
187 * achieved by using tighter loops to locally handle units such as
William Dignaziobb492d42013-03-06 12:29:33 -0500188 * comments and strings. Loops that handle an entire token within
189 * their scope also gather entire strings and pass them to
Michael Clark95f55a72009-04-27 08:16:58 +0000190 * printbuf_memappend() in a single call, rather than calling
191 * printbuf_memappend() one char at a time.
192 *
William Dignaziobb492d42013-03-06 12:29:33 -0500193 * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is
Michael Clark95f55a72009-04-27 08:16:58 +0000194 * common to both the main loop and the tighter loops.
195 */
196
William Dignaziobb492d42013-03-06 12:29:33 -0500197/* PEEK_CHAR(dest, tok) macro:
198 * Peeks at the current char and stores it in dest.
Michael Clark95f55a72009-04-27 08:16:58 +0000199 * Returns 1 on success, sets tok->err and returns 0 if no more chars.
200 * Implicit inputs: str, len vars
201 */
William Dignaziobb492d42013-03-06 12:29:33 -0500202#define PEEK_CHAR(dest, tok) \
Michael Clark95f55a72009-04-27 08:16:58 +0000203 (((tok)->char_offset == len) ? \
204 (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
205 (((tok)->err = json_tokener_success), 0) \
206 : \
207 (((tok)->err = json_tokener_continue), 0) \
208 ) : \
209 (((dest) = *str), 1) \
210 )
William Dignaziobb492d42013-03-06 12:29:33 -0500211
Michael Clark95f55a72009-04-27 08:16:58 +0000212/* ADVANCE_CHAR() macro:
213 * Incrementes str & tok->char_offset.
214 * For convenience of existing conditionals, returns the old value of c (0 on eof)
215 * Implicit inputs: c var
216 */
217#define ADVANCE_CHAR(str, tok) \
218 ( ++(str), ((tok)->char_offset)++, c)
219
Brent Miller126ad952009-08-20 06:50:22 +0000220
Michael Clark95f55a72009-04-27 08:16:58 +0000221/* End optimization macro defs */
222
223
Michael Clarka850f8e2007-03-13 08:26:26 +0000224struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000225 const char *str, int len)
Michael Clarkf0d08882007-03-13 08:26:18 +0000226{
Michael Clarka850f8e2007-03-13 08:26:26 +0000227 struct json_object *obj = NULL;
Michael Clark95f55a72009-04-27 08:16:58 +0000228 char c = '\1';
Remi Colleta01b6592012-12-13 09:47:33 +0100229#ifdef HAVE_SETLOCALE
230 char *oldlocale=NULL, *tmplocale;
231
232 tmplocale = setlocale(LC_NUMERIC, NULL);
233 if (tmplocale) oldlocale = strdup(tmplocale);
234 setlocale(LC_NUMERIC, "C");
235#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000236
Michael Clarka850f8e2007-03-13 08:26:26 +0000237 tok->char_offset = 0;
238 tok->err = json_tokener_success;
Michael Clarkf0d08882007-03-13 08:26:18 +0000239
William Dignaziobb492d42013-03-06 12:29:33 -0500240 while (PEEK_CHAR(c, tok)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000241
Michael Clarka850f8e2007-03-13 08:26:26 +0000242 redo_char:
Michael Clarkf0d08882007-03-13 08:26:18 +0000243 switch(state) {
244
245 case json_tokener_state_eatws:
Michael Clark95f55a72009-04-27 08:16:58 +0000246 /* Advance until we change state */
ehaszla252669c2010-12-07 18:15:35 +0000247 while (isspace((int)c)) {
William Dignaziobb492d42013-03-06 12:29:33 -0500248 if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok)))
Michael Clark95f55a72009-04-27 08:16:58 +0000249 goto out;
250 }
Remi Collet87fa32d2013-08-21 15:41:40 +0200251 if(c == '/' && !(tok->flags & JSON_TOKENER_STRICT)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000252 printbuf_reset(tok->pb);
Michael Clark95f55a72009-04-27 08:16:58 +0000253 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000254 state = json_tokener_state_comment_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000255 } else {
256 state = saved_state;
Michael Clarka850f8e2007-03-13 08:26:26 +0000257 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000258 }
259 break;
260
261 case json_tokener_state_start:
262 switch(c) {
263 case '{':
264 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000265 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000266 current = json_object_new_object();
Michael Clarkf0d08882007-03-13 08:26:18 +0000267 break;
268 case '[':
269 state = json_tokener_state_eatws;
270 saved_state = json_tokener_state_array;
271 current = json_object_new_array();
Michael Clarkf0d08882007-03-13 08:26:18 +0000272 break;
273 case 'N':
274 case 'n':
Eric Haszlakiewicz0eedf382014-03-09 16:41:33 -0400275 state = json_tokener_state_null; // or NaN
Michael Clarka850f8e2007-03-13 08:26:26 +0000276 printbuf_reset(tok->pb);
277 tok->st_pos = 0;
278 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000279 case '\'':
Remi Colleta07ef3d2013-08-06 10:41:14 +0200280 if (tok->flags & JSON_TOKENER_STRICT) {
281 /* in STRICT mode only double-quote are allowed */
282 tok->err = json_tokener_error_parse_unexpected;
283 goto out;
284 }
285 case '"':
Michael Clarkf0d08882007-03-13 08:26:18 +0000286 state = json_tokener_state_string;
Michael Clarka850f8e2007-03-13 08:26:26 +0000287 printbuf_reset(tok->pb);
288 tok->quote_char = c;
Michael Clarkf0d08882007-03-13 08:26:18 +0000289 break;
290 case 'T':
291 case 't':
292 case 'F':
293 case 'f':
294 state = json_tokener_state_boolean;
Michael Clarka850f8e2007-03-13 08:26:26 +0000295 printbuf_reset(tok->pb);
296 tok->st_pos = 0;
297 goto redo_char;
Michael Clark4504df72007-03-13 08:26:20 +0000298#if defined(__GNUC__)
299 case '0' ... '9':
300#else
301 case '0':
302 case '1':
303 case '2':
304 case '3':
305 case '4':
306 case '5':
307 case '6':
308 case '7':
309 case '8':
310 case '9':
311#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000312 case '-':
Michael Clarkf0d08882007-03-13 08:26:18 +0000313 state = json_tokener_state_number;
Michael Clarka850f8e2007-03-13 08:26:26 +0000314 printbuf_reset(tok->pb);
315 tok->is_double = 0;
316 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000317 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000318 tok->err = json_tokener_error_parse_unexpected;
Michael Clarkf0d08882007-03-13 08:26:18 +0000319 goto out;
320 }
321 break;
322
323 case json_tokener_state_finish:
Michael Clarka850f8e2007-03-13 08:26:26 +0000324 if(tok->depth == 0) goto out;
325 obj = json_object_get(current);
326 json_tokener_reset_level(tok, tok->depth);
327 tok->depth--;
328 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000329
330 case json_tokener_state_null:
Andrea Fauldsbda05402013-11-14 21:13:32 +0000331 {
332 int size;
Eric Haszlakiewicz0eedf382014-03-09 16:41:33 -0400333 int size_nan;
Andrea Fauldsbda05402013-11-14 21:13:32 +0000334 printbuf_memappend_fast(tok->pb, &c, 1);
Eric Haszlakiewicz0eedf382014-03-09 16:41:33 -0400335 size = json_min(tok->st_pos+1, json_null_str_len);
336 size_nan = json_min(tok->st_pos+1, json_nan_str_len);
Andrea Fauldsbda05402013-11-14 21:13:32 +0000337 if((!(tok->flags & JSON_TOKENER_STRICT) &&
338 strncasecmp(json_null_str, tok->pb->buf, size) == 0)
339 || (strncmp(json_null_str, tok->pb->buf, size) == 0)
340 ) {
Eric Haszlakiewicz0eedf382014-03-09 16:41:33 -0400341 if (tok->st_pos == json_null_str_len) {
Andrea Fauldsbda05402013-11-14 21:13:32 +0000342 current = NULL;
343 saved_state = json_tokener_state_finish;
344 state = json_tokener_state_eatws;
345 goto redo_char;
346 }
Eric Haszlakiewicz0eedf382014-03-09 16:41:33 -0400347 }
348 else if ((!(tok->flags & JSON_TOKENER_STRICT) &&
349 strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) ||
350 (strncmp(json_nan_str, tok->pb->buf, size_nan) == 0)
351 )
352 {
353 if (tok->st_pos == json_nan_str_len)
354 {
355 current = json_object_new_double(nan(""));
356 saved_state = json_tokener_state_finish;
357 state = json_tokener_state_eatws;
358 goto redo_char;
359 }
Andrea Fauldsbda05402013-11-14 21:13:32 +0000360 } else {
361 tok->err = json_tokener_error_parse_null;
362 goto out;
Michael Clarka850f8e2007-03-13 08:26:26 +0000363 }
Andrea Fauldsbda05402013-11-14 21:13:32 +0000364 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000365 }
366 break;
367
368 case json_tokener_state_comment_start:
369 if(c == '*') {
370 state = json_tokener_state_comment;
371 } else if(c == '/') {
372 state = json_tokener_state_comment_eol;
373 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000374 tok->err = json_tokener_error_parse_comment;
Michael Clarkf0d08882007-03-13 08:26:18 +0000375 goto out;
376 }
Michael Clark95f55a72009-04-27 08:16:58 +0000377 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000378 break;
379
380 case json_tokener_state_comment:
Michael Clark95f55a72009-04-27 08:16:58 +0000381 {
382 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000383 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000384 while(c != '*') {
William Dignaziobb492d42013-03-06 12:29:33 -0500385 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000386 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
387 goto out;
William Dignaziobb492d42013-03-06 12:29:33 -0500388 }
Michael Clark95f55a72009-04-27 08:16:58 +0000389 }
390 printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
391 state = json_tokener_state_comment_end;
392 }
393 break;
Michael Clarkf0d08882007-03-13 08:26:18 +0000394
395 case json_tokener_state_comment_eol:
Michael Clark95f55a72009-04-27 08:16:58 +0000396 {
397 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000398 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000399 while(c != '\n') {
William Dignaziobb492d42013-03-06 12:29:33 -0500400 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000401 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
402 goto out;
403 }
404 }
405 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Michael Clarkdfaf6702007-10-25 02:26:00 +0000406 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000407 state = json_tokener_state_eatws;
408 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000409 break;
410
411 case json_tokener_state_comment_end:
Michael Clark95f55a72009-04-27 08:16:58 +0000412 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000413 if(c == '/') {
Michael Clarkdfaf6702007-10-25 02:26:00 +0000414 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000415 state = json_tokener_state_eatws;
416 } else {
417 state = json_tokener_state_comment;
418 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000419 break;
420
421 case json_tokener_state_string:
Michael Clark95f55a72009-04-27 08:16:58 +0000422 {
423 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000424 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000425 while(1) {
426 if(c == tok->quote_char) {
427 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Eric Haszlakiewicz4e4af932012-12-09 16:32:11 -0600428 current = json_object_new_string_len(tok->pb->buf, tok->pb->bpos);
Michael Clark95f55a72009-04-27 08:16:58 +0000429 saved_state = json_tokener_state_finish;
430 state = json_tokener_state_eatws;
431 break;
432 } else if(c == '\\') {
433 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
434 saved_state = json_tokener_state_string;
435 state = json_tokener_state_string_escape;
436 break;
437 }
William Dignaziobb492d42013-03-06 12:29:33 -0500438 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000439 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
440 goto out;
441 }
442 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000443 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000444 break;
445
446 case json_tokener_state_string_escape:
447 switch(c) {
448 case '"':
449 case '\\':
Michael Clarka850f8e2007-03-13 08:26:26 +0000450 case '/':
Michael Clark95f55a72009-04-27 08:16:58 +0000451 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000452 state = saved_state;
453 break;
454 case 'b':
455 case 'n':
456 case 'r':
457 case 't':
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500458 case 'f':
Michael Clark95f55a72009-04-27 08:16:58 +0000459 if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
460 else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
461 else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
462 else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500463 else if(c == 'f') printbuf_memappend_fast(tok->pb, "\f", 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000464 state = saved_state;
465 break;
466 case 'u':
Michael Clarka850f8e2007-03-13 08:26:26 +0000467 tok->ucs_char = 0;
468 tok->st_pos = 0;
Michael Clarkf0d08882007-03-13 08:26:18 +0000469 state = json_tokener_state_escape_unicode;
470 break;
471 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000472 tok->err = json_tokener_error_parse_string;
Michael Clarkf0d08882007-03-13 08:26:18 +0000473 goto out;
474 }
475 break;
476
477 case json_tokener_state_escape_unicode:
Michael Clark95f55a72009-04-27 08:16:58 +0000478 {
Brent Miller126ad952009-08-20 06:50:22 +0000479 unsigned int got_hi_surrogate = 0;
480
481 /* Handle a 4-byte sequence, or two sequences if a surrogate pair */
Michael Clark95f55a72009-04-27 08:16:58 +0000482 while(1) {
483 if(strchr(json_hex_chars, c)) {
484 tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
485 if(tok->st_pos == 4) {
Brent Miller126ad952009-08-20 06:50:22 +0000486 unsigned char unescaped_utf[4];
487
488 if (got_hi_surrogate) {
489 if (IS_LOW_SURROGATE(tok->ucs_char)) {
490 /* Recalculate the ucs_char, then fall thru to process normally */
491 tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char);
492 } else {
493 /* Hi surrogate was not followed by a low surrogate */
494 /* Replace the hi and process the rest normally */
495 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
496 }
497 got_hi_surrogate = 0;
498 }
499
Michael Clark95f55a72009-04-27 08:16:58 +0000500 if (tok->ucs_char < 0x80) {
Brent Miller126ad952009-08-20 06:50:22 +0000501 unescaped_utf[0] = tok->ucs_char;
502 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1);
Michael Clark95f55a72009-04-27 08:16:58 +0000503 } else if (tok->ucs_char < 0x800) {
Brent Miller126ad952009-08-20 06:50:22 +0000504 unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
505 unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
506 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2);
507 } else if (IS_HIGH_SURROGATE(tok->ucs_char)) {
508 /* Got a high surrogate. Remember it and look for the
509 * the beginning of another sequence, which should be the
510 * low surrogate.
511 */
512 got_hi_surrogate = tok->ucs_char;
513 /* Not at end, and the next two chars should be "\u" */
514 if ((tok->char_offset+1 != len) &&
515 (tok->char_offset+2 != len) &&
516 (str[1] == '\\') &&
517 (str[2] == 'u'))
518 {
William Dignazio32eddd62013-03-06 20:18:14 -0500519 /* Advance through the 16 bit surrogate, and move on to the
520 * next sequence. The next step is to process the following
521 * characters.
522 */
523 if( !ADVANCE_CHAR(str, tok) || !ADVANCE_CHAR(str, tok) ) {
524 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
525 }
Brent Miller126ad952009-08-20 06:50:22 +0000526 /* Advance to the first char of the next sequence and
527 * continue processing with the next sequence.
528 */
William Dignaziobb492d42013-03-06 12:29:33 -0500529 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Brent Miller126ad952009-08-20 06:50:22 +0000530 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
531 goto out;
532 }
533 tok->ucs_char = 0;
534 tok->st_pos = 0;
535 continue; /* other json_tokener_state_escape_unicode */
536 } else {
537 /* Got a high surrogate without another sequence following
538 * it. Put a replacement char in for the hi surrogate
539 * and pretend we finished.
540 */
541 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
542 }
543 } else if (IS_LOW_SURROGATE(tok->ucs_char)) {
544 /* Got a low surrogate not preceded by a high */
545 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
546 } else if (tok->ucs_char < 0x10000) {
547 unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
548 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
549 unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
550 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3);
551 } else if (tok->ucs_char < 0x110000) {
552 unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
553 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
554 unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
555 unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
556 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4);
Michael Clark95f55a72009-04-27 08:16:58 +0000557 } else {
Brent Miller126ad952009-08-20 06:50:22 +0000558 /* Don't know what we got--insert the replacement char */
559 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
560 }
Michael Clark95f55a72009-04-27 08:16:58 +0000561 state = saved_state;
562 break;
563 }
564 } else {
565 tok->err = json_tokener_error_parse_string;
566 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000567 }
William Dignaziobb492d42013-03-06 12:29:33 -0500568 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Brent Miller126ad952009-08-20 06:50:22 +0000569 if (got_hi_surrogate) /* Clean up any pending chars */
570 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
Michael Clark95f55a72009-04-27 08:16:58 +0000571 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000572 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000573 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000574 }
575 break;
576
577 case json_tokener_state_boolean:
Andrea Fauldsbda05402013-11-14 21:13:32 +0000578 {
579 int size1, size2;
580 printbuf_memappend_fast(tok->pb, &c, 1);
Eric Haszlakiewicz0eedf382014-03-09 16:41:33 -0400581 size1 = json_min(tok->st_pos+1, json_true_str_len);
582 size2 = json_min(tok->st_pos+1, json_false_str_len);
Andrea Fauldsbda05402013-11-14 21:13:32 +0000583 if((!(tok->flags & JSON_TOKENER_STRICT) &&
584 strncasecmp(json_true_str, tok->pb->buf, size1) == 0)
585 || (strncmp(json_true_str, tok->pb->buf, size1) == 0)
586 ) {
Eric Haszlakiewicz0eedf382014-03-09 16:41:33 -0400587 if(tok->st_pos == json_true_str_len) {
Andrea Fauldsbda05402013-11-14 21:13:32 +0000588 current = json_object_new_boolean(1);
589 saved_state = json_tokener_state_finish;
590 state = json_tokener_state_eatws;
591 goto redo_char;
592 }
593 } else if((!(tok->flags & JSON_TOKENER_STRICT) &&
594 strncasecmp(json_false_str, tok->pb->buf, size2) == 0)
595 || (strncmp(json_false_str, tok->pb->buf, size2) == 0)) {
Eric Haszlakiewicz0eedf382014-03-09 16:41:33 -0400596 if(tok->st_pos == json_false_str_len) {
Andrea Fauldsbda05402013-11-14 21:13:32 +0000597 current = json_object_new_boolean(0);
598 saved_state = json_tokener_state_finish;
599 state = json_tokener_state_eatws;
600 goto redo_char;
601 }
602 } else {
603 tok->err = json_tokener_error_parse_boolean;
604 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000605 }
Andrea Fauldsbda05402013-11-14 21:13:32 +0000606 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000607 }
608 break;
609
610 case json_tokener_state_number:
Michael Clark95f55a72009-04-27 08:16:58 +0000611 {
612 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000613 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000614 int case_len=0;
615 while(c && strchr(json_number_chars, c)) {
616 ++case_len;
Eric Haszlakiewiczf931f612012-04-24 22:17:13 -0500617 if(c == '.' || c == 'e' || c == 'E')
618 tok->is_double = 1;
William Dignaziobb492d42013-03-06 12:29:33 -0500619 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000620 printbuf_memappend_fast(tok->pb, case_start, case_len);
621 goto out;
622 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000623 }
Michael Clark95f55a72009-04-27 08:16:58 +0000624 if (case_len>0)
625 printbuf_memappend_fast(tok->pb, case_start, case_len);
626 }
627 {
Michael Clarkc4dceae2010-10-06 16:39:20 +0000628 int64_t num64;
629 double numd;
630 if (!tok->is_double && json_parse_int64(tok->pb->buf, &num64) == 0) {
Remi Collete9ee4ae2013-06-13 13:40:01 +0200631 if (num64 && tok->pb->buf[0]=='0' && (tok->flags & JSON_TOKENER_STRICT)) {
Eric Haszlakiewiczd032aad2013-06-19 09:14:19 -0500632 /* in strict mode, number must not start with 0 */
Remi Collete9ee4ae2013-06-13 13:40:01 +0200633 tok->err = json_tokener_error_parse_number;
634 goto out;
635 }
ehaszla252669c2010-12-07 18:15:35 +0000636 current = json_object_new_int64(num64);
Eric Haszlakiewicz51993c22013-09-11 20:27:39 -0500637 }
638 else if(tok->is_double && json_parse_double(tok->pb->buf, &numd) == 0)
639 {
640 current = json_object_new_double_s(numd, tok->pb->buf);
Michael Clark95f55a72009-04-27 08:16:58 +0000641 } else {
642 tok->err = json_tokener_error_parse_number;
643 goto out;
644 }
645 saved_state = json_tokener_state_finish;
646 state = json_tokener_state_eatws;
647 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000648 }
649 break;
650
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500651 case json_tokener_state_array_after_sep:
Michael Clarkf0d08882007-03-13 08:26:18 +0000652 case json_tokener_state_array:
653 if(c == ']') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500654 if (state == json_tokener_state_array_after_sep &&
655 (tok->flags & JSON_TOKENER_STRICT))
656 {
657 tok->err = json_tokener_error_parse_unexpected;
658 goto out;
659 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000660 saved_state = json_tokener_state_finish;
661 state = json_tokener_state_eatws;
662 } else {
Remi Collet197cb1d2012-11-27 09:01:45 +0100663 if(tok->depth >= tok->max_depth-1) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000664 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000665 goto out;
666 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000667 state = json_tokener_state_array_add;
668 tok->depth++;
669 json_tokener_reset_level(tok, tok->depth);
670 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000671 }
672 break;
673
Michael Clarka850f8e2007-03-13 08:26:26 +0000674 case json_tokener_state_array_add:
675 json_object_array_add(current, obj);
676 saved_state = json_tokener_state_array_sep;
677 state = json_tokener_state_eatws;
678 goto redo_char;
679
Michael Clarkf0d08882007-03-13 08:26:18 +0000680 case json_tokener_state_array_sep:
681 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000682 saved_state = json_tokener_state_finish;
683 state = json_tokener_state_eatws;
684 } else if(c == ',') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500685 saved_state = json_tokener_state_array_after_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000686 state = json_tokener_state_eatws;
687 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000688 tok->err = json_tokener_error_parse_array;
689 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000690 }
691 break;
692
Michael Clarkf0d08882007-03-13 08:26:18 +0000693 case json_tokener_state_object_field_start:
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500694 case json_tokener_state_object_field_start_after_sep:
Michael Clarkf0d08882007-03-13 08:26:18 +0000695 if(c == '}') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500696 if (state == json_tokener_state_object_field_start_after_sep &&
697 (tok->flags & JSON_TOKENER_STRICT))
698 {
699 tok->err = json_tokener_error_parse_unexpected;
700 goto out;
701 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000702 saved_state = json_tokener_state_finish;
703 state = json_tokener_state_eatws;
704 } else if (c == '"' || c == '\'') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000705 tok->quote_char = c;
706 printbuf_reset(tok->pb);
Michael Clarkf0d08882007-03-13 08:26:18 +0000707 state = json_tokener_state_object_field;
Michael Clark0370baa2007-03-13 08:26:22 +0000708 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000709 tok->err = json_tokener_error_parse_object_key_name;
Michael Clark0370baa2007-03-13 08:26:22 +0000710 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000711 }
712 break;
713
714 case json_tokener_state_object_field:
Michael Clark95f55a72009-04-27 08:16:58 +0000715 {
716 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000717 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000718 while(1) {
719 if(c == tok->quote_char) {
720 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
721 obj_field_name = strdup(tok->pb->buf);
722 saved_state = json_tokener_state_object_field_end;
723 state = json_tokener_state_eatws;
724 break;
725 } else if(c == '\\') {
726 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
727 saved_state = json_tokener_state_object_field;
728 state = json_tokener_state_string_escape;
729 break;
730 }
William Dignaziobb492d42013-03-06 12:29:33 -0500731 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000732 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
733 goto out;
734 }
735 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000736 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000737 break;
738
739 case json_tokener_state_object_field_end:
740 if(c == ':') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000741 saved_state = json_tokener_state_object_value;
742 state = json_tokener_state_eatws;
743 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000744 tok->err = json_tokener_error_parse_object_key_sep;
745 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000746 }
747 break;
748
749 case json_tokener_state_object_value:
Remi Collet197cb1d2012-11-27 09:01:45 +0100750 if(tok->depth >= tok->max_depth-1) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000751 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000752 goto out;
753 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000754 state = json_tokener_state_object_value_add;
755 tok->depth++;
756 json_tokener_reset_level(tok, tok->depth);
757 goto redo_char;
758
759 case json_tokener_state_object_value_add:
Michael Clarkf0d08882007-03-13 08:26:18 +0000760 json_object_object_add(current, obj_field_name, obj);
761 free(obj_field_name);
762 obj_field_name = NULL;
763 saved_state = json_tokener_state_object_sep;
764 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000765 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000766
767 case json_tokener_state_object_sep:
768 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000769 saved_state = json_tokener_state_finish;
770 state = json_tokener_state_eatws;
771 } else if(c == ',') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500772 saved_state = json_tokener_state_object_field_start_after_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000773 state = json_tokener_state_eatws;
774 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000775 tok->err = json_tokener_error_parse_object_value_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000776 goto out;
777 }
778 break;
779
780 }
Michael Clark95f55a72009-04-27 08:16:58 +0000781 if (!ADVANCE_CHAR(str, tok))
782 goto out;
783 } /* while(POP_CHAR) */
Michael Clarkf0d08882007-03-13 08:26:18 +0000784
785 out:
Remi Collet4039f912013-08-23 13:40:01 +0200786 if (c &&
787 (state == json_tokener_state_finish) &&
788 (tok->depth == 0) &&
789 (tok->flags & JSON_TOKENER_STRICT)) {
790 /* unexpected char after JSON data */
791 tok->err = json_tokener_error_parse_unexpected;
792 }
Michael Clark95f55a72009-04-27 08:16:58 +0000793 if (!c) { /* We hit an eof char (0) */
794 if(state != json_tokener_state_finish &&
795 saved_state != json_tokener_state_finish)
796 tok->err = json_tokener_error_parse_eof;
797 }
798
Remi Colleta01b6592012-12-13 09:47:33 +0100799#ifdef HAVE_SETLOCALE
800 setlocale(LC_NUMERIC, oldlocale);
801 if (oldlocale) free(oldlocale);
802#endif
803
William Dignaziobb492d42013-03-06 12:29:33 -0500804 if (tok->err == json_tokener_success)
Eric Haszlakiewiczd809fa62012-03-31 22:53:43 -0500805 {
806 json_object *ret = json_object_get(current);
807 int ii;
808
809 /* Partially reset, so we parse additional objects on subsequent calls. */
810 for(ii = tok->depth; ii >= 0; ii--)
811 json_tokener_reset_level(tok, ii);
812 return ret;
813 }
814
Michael Clarkdfaf6702007-10-25 02:26:00 +0000815 MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
Michael Clarka850f8e2007-03-13 08:26:26 +0000816 json_tokener_errors[tok->err], tok->char_offset);
817 return NULL;
Michael Clarkf0d08882007-03-13 08:26:18 +0000818}
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500819
820void json_tokener_set_flags(struct json_tokener *tok, int flags)
821{
822 tok->flags = flags;
823}