blob: 7c596032930d3e9f04c6f2a731a15518160de395 [file] [log] [blame]
Michael Clarkf0d08882007-03-13 08:26:18 +00001/*
Michael Clarka850f8e2007-03-13 08:26:26 +00002 * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
Michael Clarkf0d08882007-03-13 08:26:18 +00003 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00004 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
Michael Clarkf0d08882007-03-13 08:26:18 +00005 * Michael Clark <michael@metaparadigm.com>
6 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00007 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the MIT license. See COPYING for details.
Michael Clarkf0d08882007-03-13 08:26:18 +00009 *
Michael Clark95f55a72009-04-27 08:16:58 +000010 *
11 * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
12 * The copyrights to the contents of this file are licensed under the MIT License
13 * (http://www.opensource.org/licenses/mit-license.php)
Michael Clarkf0d08882007-03-13 08:26:18 +000014 */
15
Michael Clark4504df72007-03-13 08:26:20 +000016#include "config.h"
17
Michael Clarkf0d08882007-03-13 08:26:18 +000018#include <stdio.h>
19#include <stdlib.h>
Michael Clarkc8f4a6e2007-12-07 02:44:24 +000020#include <stddef.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000021#include <ctype.h>
22#include <string.h>
Michael Clarkc4dceae2010-10-06 16:39:20 +000023#include <limits.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000024
25#include "bits.h"
26#include "debug.h"
27#include "printbuf.h"
28#include "arraylist.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000029#include "json_inttypes.h"
Michael Clarkf0d08882007-03-13 08:26:18 +000030#include "json_object.h"
31#include "json_tokener.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000032#include "json_util.h"
Michael Clarka850f8e2007-03-13 08:26:26 +000033
Remi Colleta01b6592012-12-13 09:47:33 +010034#ifdef HAVE_LOCALE_H
35#include <locale.h>
36#endif /* HAVE_LOCALE_H */
37
Mateusz Loskota6f39a32012-05-21 23:22:36 +010038#if !HAVE_STRDUP && defined(_MSC_VER)
39 /* MSC has the version as _strdup */
40# define strdup _strdup
41#elif !HAVE_STRDUP
42# error You do not have strdup on your system.
43#endif /* HAVE_STRDUP */
44
Michael Clark837240f2007-03-13 08:26:25 +000045#if !HAVE_STRNCASECMP && defined(_MSC_VER)
46 /* MSC has the version as _strnicmp */
47# define strncasecmp _strnicmp
48#elif !HAVE_STRNCASECMP
49# error You do not have strncasecmp on your system.
50#endif /* HAVE_STRNCASECMP */
51
Michael Clarka850f8e2007-03-13 08:26:26 +000052static const char* json_null_str = "null";
53static const char* json_true_str = "true";
54static const char* json_false_str = "false";
Michael Clarkf0d08882007-03-13 08:26:18 +000055
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060056// XXX after v0.10 this array will become static:
Michael Clarka850f8e2007-03-13 08:26:26 +000057const char* json_tokener_errors[] = {
58 "success",
59 "continue",
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -050060 "nesting too deep",
Michael Clarka850f8e2007-03-13 08:26:26 +000061 "unexpected end of data",
62 "unexpected character",
63 "null expected",
64 "boolean expected",
65 "number expected",
66 "array value separator ',' expected",
67 "quoted object property name expected",
68 "object property name separator ':' expected",
69 "object value separator ',' expected",
70 "invalid string sequence",
71 "expected comment",
72};
73
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060074const char *json_tokener_error_desc(enum json_tokener_error jerr)
75{
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -060076 int jerr_int = (int)jerr;
Even Rouault86dd55a2013-09-08 11:31:38 +020077 if (jerr_int < 0 || jerr_int > (int)(sizeof(json_tokener_errors) / sizeof(json_tokener_errors[0])))
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060078 return "Unknown error, invalid json_tokener_error value passed to json_tokener_error_desc()";
79 return json_tokener_errors[jerr];
80}
81
82enum json_tokener_error json_tokener_get_error(json_tokener *tok)
83{
84 return tok->err;
85}
86
Brent Miller126ad952009-08-20 06:50:22 +000087/* Stuff for decoding unicode sequences */
88#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
89#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
90#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
91static unsigned char utf8_replacement_char[3] = { 0xEF, 0xBF, 0xBD };
92
Remi Collet197cb1d2012-11-27 09:01:45 +010093struct json_tokener* json_tokener_new_ex(int depth)
Michael Clarkf0d08882007-03-13 08:26:18 +000094{
Michael Clarkaaec1ef2009-02-25 02:31:32 +000095 struct json_tokener *tok;
96
97 tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
Christopher Watford543bb142009-07-08 03:46:10 +000098 if (!tok) return NULL;
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -060099 tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec));
Remi Collet197cb1d2012-11-27 09:01:45 +0100100 if (!tok->stack) {
101 free(tok);
102 return NULL;
103 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000104 tok->pb = printbuf_new();
Remi Collet197cb1d2012-11-27 09:01:45 +0100105 tok->max_depth = depth;
Michael Clarka850f8e2007-03-13 08:26:26 +0000106 json_tokener_reset(tok);
107 return tok;
108}
109
Remi Collet197cb1d2012-11-27 09:01:45 +0100110struct json_tokener* json_tokener_new(void)
111{
112 return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH);
113}
114
Michael Clarka850f8e2007-03-13 08:26:26 +0000115void json_tokener_free(struct json_tokener *tok)
116{
117 json_tokener_reset(tok);
Remi Collet197cb1d2012-11-27 09:01:45 +0100118 if (tok->pb) printbuf_free(tok->pb);
119 if (tok->stack) free(tok->stack);
Michael Clarka850f8e2007-03-13 08:26:26 +0000120 free(tok);
121}
122
123static void json_tokener_reset_level(struct json_tokener *tok, int depth)
124{
125 tok->stack[depth].state = json_tokener_state_eatws;
126 tok->stack[depth].saved_state = json_tokener_state_start;
127 json_object_put(tok->stack[depth].current);
128 tok->stack[depth].current = NULL;
129 free(tok->stack[depth].obj_field_name);
130 tok->stack[depth].obj_field_name = NULL;
131}
132
133void json_tokener_reset(struct json_tokener *tok)
134{
135 int i;
Michael Clark22dee7c2009-02-25 01:51:40 +0000136 if (!tok)
137 return;
138
Michael Clarka850f8e2007-03-13 08:26:26 +0000139 for(i = tok->depth; i >= 0; i--)
140 json_tokener_reset_level(tok, i);
141 tok->depth = 0;
142 tok->err = json_tokener_success;
143}
144
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000145struct json_object* json_tokener_parse(const char *str)
Michael Clarka850f8e2007-03-13 08:26:26 +0000146{
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500147 enum json_tokener_error jerr_ignored;
148 struct json_object* obj;
149 obj = json_tokener_parse_verbose(str, &jerr_ignored);
150 return obj;
Michael Clarkf0d08882007-03-13 08:26:18 +0000151}
152
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000153struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
154{
155 struct json_tokener* tok;
156 struct json_object* obj;
157
158 tok = json_tokener_new();
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500159 if (!tok)
160 return NULL;
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000161 obj = json_tokener_parse_ex(tok, str, -1);
162 *error = tok->err;
163 if(tok->err != json_tokener_success) {
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500164 if (obj != NULL)
165 json_object_put(obj);
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000166 obj = NULL;
167 }
168
169 json_tokener_free(tok);
170 return obj;
171}
172
Michael Clarka850f8e2007-03-13 08:26:26 +0000173#define state tok->stack[tok->depth].state
174#define saved_state tok->stack[tok->depth].saved_state
175#define current tok->stack[tok->depth].current
176#define obj_field_name tok->stack[tok->depth].obj_field_name
177
Michael Clark95f55a72009-04-27 08:16:58 +0000178/* Optimization:
179 * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
180 * iterating character-by character. A large performance boost is
181 * achieved by using tighter loops to locally handle units such as
William Dignaziobb492d42013-03-06 12:29:33 -0500182 * comments and strings. Loops that handle an entire token within
183 * their scope also gather entire strings and pass them to
Michael Clark95f55a72009-04-27 08:16:58 +0000184 * printbuf_memappend() in a single call, rather than calling
185 * printbuf_memappend() one char at a time.
186 *
William Dignaziobb492d42013-03-06 12:29:33 -0500187 * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is
Michael Clark95f55a72009-04-27 08:16:58 +0000188 * common to both the main loop and the tighter loops.
189 */
190
William Dignaziobb492d42013-03-06 12:29:33 -0500191/* PEEK_CHAR(dest, tok) macro:
192 * Peeks at the current char and stores it in dest.
Michael Clark95f55a72009-04-27 08:16:58 +0000193 * Returns 1 on success, sets tok->err and returns 0 if no more chars.
194 * Implicit inputs: str, len vars
195 */
William Dignaziobb492d42013-03-06 12:29:33 -0500196#define PEEK_CHAR(dest, tok) \
Michael Clark95f55a72009-04-27 08:16:58 +0000197 (((tok)->char_offset == len) ? \
198 (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
199 (((tok)->err = json_tokener_success), 0) \
200 : \
201 (((tok)->err = json_tokener_continue), 0) \
202 ) : \
203 (((dest) = *str), 1) \
204 )
William Dignaziobb492d42013-03-06 12:29:33 -0500205
Michael Clark95f55a72009-04-27 08:16:58 +0000206/* ADVANCE_CHAR() macro:
207 * Incrementes str & tok->char_offset.
208 * For convenience of existing conditionals, returns the old value of c (0 on eof)
209 * Implicit inputs: c var
210 */
211#define ADVANCE_CHAR(str, tok) \
212 ( ++(str), ((tok)->char_offset)++, c)
213
Brent Miller126ad952009-08-20 06:50:22 +0000214
Michael Clark95f55a72009-04-27 08:16:58 +0000215/* End optimization macro defs */
216
217
Michael Clarka850f8e2007-03-13 08:26:26 +0000218struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000219 const char *str, int len)
Michael Clarkf0d08882007-03-13 08:26:18 +0000220{
Michael Clarka850f8e2007-03-13 08:26:26 +0000221 struct json_object *obj = NULL;
Michael Clark95f55a72009-04-27 08:16:58 +0000222 char c = '\1';
Remi Colleta01b6592012-12-13 09:47:33 +0100223#ifdef HAVE_SETLOCALE
224 char *oldlocale=NULL, *tmplocale;
225
226 tmplocale = setlocale(LC_NUMERIC, NULL);
227 if (tmplocale) oldlocale = strdup(tmplocale);
228 setlocale(LC_NUMERIC, "C");
229#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000230
Michael Clarka850f8e2007-03-13 08:26:26 +0000231 tok->char_offset = 0;
232 tok->err = json_tokener_success;
Michael Clarkf0d08882007-03-13 08:26:18 +0000233
William Dignaziobb492d42013-03-06 12:29:33 -0500234 while (PEEK_CHAR(c, tok)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000235
Michael Clarka850f8e2007-03-13 08:26:26 +0000236 redo_char:
Michael Clarkf0d08882007-03-13 08:26:18 +0000237 switch(state) {
238
239 case json_tokener_state_eatws:
Michael Clark95f55a72009-04-27 08:16:58 +0000240 /* Advance until we change state */
ehaszla252669c2010-12-07 18:15:35 +0000241 while (isspace((int)c)) {
William Dignaziobb492d42013-03-06 12:29:33 -0500242 if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok)))
Michael Clark95f55a72009-04-27 08:16:58 +0000243 goto out;
244 }
Remi Collet87fa32d2013-08-21 15:41:40 +0200245 if(c == '/' && !(tok->flags & JSON_TOKENER_STRICT)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000246 printbuf_reset(tok->pb);
Michael Clark95f55a72009-04-27 08:16:58 +0000247 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000248 state = json_tokener_state_comment_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000249 } else {
250 state = saved_state;
Michael Clarka850f8e2007-03-13 08:26:26 +0000251 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000252 }
253 break;
254
255 case json_tokener_state_start:
256 switch(c) {
257 case '{':
258 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000259 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000260 current = json_object_new_object();
Michael Clarkf0d08882007-03-13 08:26:18 +0000261 break;
262 case '[':
263 state = json_tokener_state_eatws;
264 saved_state = json_tokener_state_array;
265 current = json_object_new_array();
Michael Clarkf0d08882007-03-13 08:26:18 +0000266 break;
267 case 'N':
268 case 'n':
269 state = json_tokener_state_null;
Michael Clarka850f8e2007-03-13 08:26:26 +0000270 printbuf_reset(tok->pb);
271 tok->st_pos = 0;
272 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000273 case '\'':
Remi Colleta07ef3d2013-08-06 10:41:14 +0200274 if (tok->flags & JSON_TOKENER_STRICT) {
275 /* in STRICT mode only double-quote are allowed */
276 tok->err = json_tokener_error_parse_unexpected;
277 goto out;
278 }
279 case '"':
Michael Clarkf0d08882007-03-13 08:26:18 +0000280 state = json_tokener_state_string;
Michael Clarka850f8e2007-03-13 08:26:26 +0000281 printbuf_reset(tok->pb);
282 tok->quote_char = c;
Michael Clarkf0d08882007-03-13 08:26:18 +0000283 break;
284 case 'T':
285 case 't':
286 case 'F':
287 case 'f':
288 state = json_tokener_state_boolean;
Michael Clarka850f8e2007-03-13 08:26:26 +0000289 printbuf_reset(tok->pb);
290 tok->st_pos = 0;
291 goto redo_char;
Michael Clark4504df72007-03-13 08:26:20 +0000292#if defined(__GNUC__)
293 case '0' ... '9':
294#else
295 case '0':
296 case '1':
297 case '2':
298 case '3':
299 case '4':
300 case '5':
301 case '6':
302 case '7':
303 case '8':
304 case '9':
305#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000306 case '-':
Michael Clarkf0d08882007-03-13 08:26:18 +0000307 state = json_tokener_state_number;
Michael Clarka850f8e2007-03-13 08:26:26 +0000308 printbuf_reset(tok->pb);
309 tok->is_double = 0;
310 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000311 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000312 tok->err = json_tokener_error_parse_unexpected;
Michael Clarkf0d08882007-03-13 08:26:18 +0000313 goto out;
314 }
315 break;
316
317 case json_tokener_state_finish:
Michael Clarka850f8e2007-03-13 08:26:26 +0000318 if(tok->depth == 0) goto out;
319 obj = json_object_get(current);
320 json_tokener_reset_level(tok, tok->depth);
321 tok->depth--;
322 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000323
324 case json_tokener_state_null:
Andrea Fauldsbda05402013-11-14 21:13:32 +0000325 {
326 int size;
327 printbuf_memappend_fast(tok->pb, &c, 1);
328 size = json_min(tok->st_pos+1, (int)strlen(json_null_str));
329 if((!(tok->flags & JSON_TOKENER_STRICT) &&
330 strncasecmp(json_null_str, tok->pb->buf, size) == 0)
331 || (strncmp(json_null_str, tok->pb->buf, size) == 0)
332 ) {
333 if(tok->st_pos == (int)strlen(json_null_str)) {
334 current = NULL;
335 saved_state = json_tokener_state_finish;
336 state = json_tokener_state_eatws;
337 goto redo_char;
338 }
339 } else {
340 tok->err = json_tokener_error_parse_null;
341 goto out;
Michael Clarka850f8e2007-03-13 08:26:26 +0000342 }
Andrea Fauldsbda05402013-11-14 21:13:32 +0000343 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000344 }
345 break;
346
347 case json_tokener_state_comment_start:
348 if(c == '*') {
349 state = json_tokener_state_comment;
350 } else if(c == '/') {
351 state = json_tokener_state_comment_eol;
352 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000353 tok->err = json_tokener_error_parse_comment;
Michael Clarkf0d08882007-03-13 08:26:18 +0000354 goto out;
355 }
Michael Clark95f55a72009-04-27 08:16:58 +0000356 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000357 break;
358
359 case json_tokener_state_comment:
Michael Clark95f55a72009-04-27 08:16:58 +0000360 {
361 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000362 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000363 while(c != '*') {
William Dignaziobb492d42013-03-06 12:29:33 -0500364 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000365 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
366 goto out;
William Dignaziobb492d42013-03-06 12:29:33 -0500367 }
Michael Clark95f55a72009-04-27 08:16:58 +0000368 }
369 printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
370 state = json_tokener_state_comment_end;
371 }
372 break;
Michael Clarkf0d08882007-03-13 08:26:18 +0000373
374 case json_tokener_state_comment_eol:
Michael Clark95f55a72009-04-27 08:16:58 +0000375 {
376 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000377 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000378 while(c != '\n') {
William Dignaziobb492d42013-03-06 12:29:33 -0500379 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000380 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
381 goto out;
382 }
383 }
384 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Michael Clarkdfaf6702007-10-25 02:26:00 +0000385 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000386 state = json_tokener_state_eatws;
387 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000388 break;
389
390 case json_tokener_state_comment_end:
Michael Clark95f55a72009-04-27 08:16:58 +0000391 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000392 if(c == '/') {
Michael Clarkdfaf6702007-10-25 02:26:00 +0000393 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000394 state = json_tokener_state_eatws;
395 } else {
396 state = json_tokener_state_comment;
397 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000398 break;
399
400 case json_tokener_state_string:
Michael Clark95f55a72009-04-27 08:16:58 +0000401 {
402 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000403 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000404 while(1) {
405 if(c == tok->quote_char) {
406 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Eric Haszlakiewicz4e4af932012-12-09 16:32:11 -0600407 current = json_object_new_string_len(tok->pb->buf, tok->pb->bpos);
Michael Clark95f55a72009-04-27 08:16:58 +0000408 saved_state = json_tokener_state_finish;
409 state = json_tokener_state_eatws;
410 break;
411 } else if(c == '\\') {
412 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
413 saved_state = json_tokener_state_string;
414 state = json_tokener_state_string_escape;
415 break;
416 }
William Dignaziobb492d42013-03-06 12:29:33 -0500417 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000418 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
419 goto out;
420 }
421 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000422 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000423 break;
424
425 case json_tokener_state_string_escape:
426 switch(c) {
427 case '"':
428 case '\\':
Michael Clarka850f8e2007-03-13 08:26:26 +0000429 case '/':
Michael Clark95f55a72009-04-27 08:16:58 +0000430 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000431 state = saved_state;
432 break;
433 case 'b':
434 case 'n':
435 case 'r':
436 case 't':
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500437 case 'f':
Michael Clark95f55a72009-04-27 08:16:58 +0000438 if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
439 else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
440 else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
441 else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500442 else if(c == 'f') printbuf_memappend_fast(tok->pb, "\f", 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000443 state = saved_state;
444 break;
445 case 'u':
Michael Clarka850f8e2007-03-13 08:26:26 +0000446 tok->ucs_char = 0;
447 tok->st_pos = 0;
Michael Clarkf0d08882007-03-13 08:26:18 +0000448 state = json_tokener_state_escape_unicode;
449 break;
450 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000451 tok->err = json_tokener_error_parse_string;
Michael Clarkf0d08882007-03-13 08:26:18 +0000452 goto out;
453 }
454 break;
455
456 case json_tokener_state_escape_unicode:
Michael Clark95f55a72009-04-27 08:16:58 +0000457 {
Brent Miller126ad952009-08-20 06:50:22 +0000458 unsigned int got_hi_surrogate = 0;
459
460 /* Handle a 4-byte sequence, or two sequences if a surrogate pair */
Michael Clark95f55a72009-04-27 08:16:58 +0000461 while(1) {
462 if(strchr(json_hex_chars, c)) {
463 tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
464 if(tok->st_pos == 4) {
Brent Miller126ad952009-08-20 06:50:22 +0000465 unsigned char unescaped_utf[4];
466
467 if (got_hi_surrogate) {
468 if (IS_LOW_SURROGATE(tok->ucs_char)) {
469 /* Recalculate the ucs_char, then fall thru to process normally */
470 tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char);
471 } else {
472 /* Hi surrogate was not followed by a low surrogate */
473 /* Replace the hi and process the rest normally */
474 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
475 }
476 got_hi_surrogate = 0;
477 }
478
Michael Clark95f55a72009-04-27 08:16:58 +0000479 if (tok->ucs_char < 0x80) {
Brent Miller126ad952009-08-20 06:50:22 +0000480 unescaped_utf[0] = tok->ucs_char;
481 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1);
Michael Clark95f55a72009-04-27 08:16:58 +0000482 } else if (tok->ucs_char < 0x800) {
Brent Miller126ad952009-08-20 06:50:22 +0000483 unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
484 unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
485 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2);
486 } else if (IS_HIGH_SURROGATE(tok->ucs_char)) {
487 /* Got a high surrogate. Remember it and look for the
488 * the beginning of another sequence, which should be the
489 * low surrogate.
490 */
491 got_hi_surrogate = tok->ucs_char;
492 /* Not at end, and the next two chars should be "\u" */
493 if ((tok->char_offset+1 != len) &&
494 (tok->char_offset+2 != len) &&
495 (str[1] == '\\') &&
496 (str[2] == 'u'))
497 {
William Dignazio32eddd62013-03-06 20:18:14 -0500498 /* Advance through the 16 bit surrogate, and move on to the
499 * next sequence. The next step is to process the following
500 * characters.
501 */
502 if( !ADVANCE_CHAR(str, tok) || !ADVANCE_CHAR(str, tok) ) {
503 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
504 }
Brent Miller126ad952009-08-20 06:50:22 +0000505 /* Advance to the first char of the next sequence and
506 * continue processing with the next sequence.
507 */
William Dignaziobb492d42013-03-06 12:29:33 -0500508 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Brent Miller126ad952009-08-20 06:50:22 +0000509 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
510 goto out;
511 }
512 tok->ucs_char = 0;
513 tok->st_pos = 0;
514 continue; /* other json_tokener_state_escape_unicode */
515 } else {
516 /* Got a high surrogate without another sequence following
517 * it. Put a replacement char in for the hi surrogate
518 * and pretend we finished.
519 */
520 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
521 }
522 } else if (IS_LOW_SURROGATE(tok->ucs_char)) {
523 /* Got a low surrogate not preceded by a high */
524 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
525 } else if (tok->ucs_char < 0x10000) {
526 unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
527 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
528 unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
529 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3);
530 } else if (tok->ucs_char < 0x110000) {
531 unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
532 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
533 unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
534 unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
535 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4);
Michael Clark95f55a72009-04-27 08:16:58 +0000536 } else {
Brent Miller126ad952009-08-20 06:50:22 +0000537 /* Don't know what we got--insert the replacement char */
538 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
539 }
Michael Clark95f55a72009-04-27 08:16:58 +0000540 state = saved_state;
541 break;
542 }
543 } else {
544 tok->err = json_tokener_error_parse_string;
545 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000546 }
William Dignaziobb492d42013-03-06 12:29:33 -0500547 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Brent Miller126ad952009-08-20 06:50:22 +0000548 if (got_hi_surrogate) /* Clean up any pending chars */
549 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
Michael Clark95f55a72009-04-27 08:16:58 +0000550 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000551 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000552 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000553 }
554 break;
555
556 case json_tokener_state_boolean:
Andrea Fauldsbda05402013-11-14 21:13:32 +0000557 {
558 int size1, size2;
559 printbuf_memappend_fast(tok->pb, &c, 1);
560 size1 = json_min(tok->st_pos+1, (int)strlen(json_true_str));
561 size2 = json_min(tok->st_pos+1, (int)strlen(json_false_str));
562 if((!(tok->flags & JSON_TOKENER_STRICT) &&
563 strncasecmp(json_true_str, tok->pb->buf, size1) == 0)
564 || (strncmp(json_true_str, tok->pb->buf, size1) == 0)
565 ) {
566 if(tok->st_pos == (int)strlen(json_true_str)) {
567 current = json_object_new_boolean(1);
568 saved_state = json_tokener_state_finish;
569 state = json_tokener_state_eatws;
570 goto redo_char;
571 }
572 } else if((!(tok->flags & JSON_TOKENER_STRICT) &&
573 strncasecmp(json_false_str, tok->pb->buf, size2) == 0)
574 || (strncmp(json_false_str, tok->pb->buf, size2) == 0)) {
575 if(tok->st_pos == (int)strlen(json_false_str)) {
576 current = json_object_new_boolean(0);
577 saved_state = json_tokener_state_finish;
578 state = json_tokener_state_eatws;
579 goto redo_char;
580 }
581 } else {
582 tok->err = json_tokener_error_parse_boolean;
583 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000584 }
Andrea Fauldsbda05402013-11-14 21:13:32 +0000585 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000586 }
587 break;
588
589 case json_tokener_state_number:
Michael Clark95f55a72009-04-27 08:16:58 +0000590 {
591 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000592 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000593 int case_len=0;
594 while(c && strchr(json_number_chars, c)) {
595 ++case_len;
Eric Haszlakiewiczf931f612012-04-24 22:17:13 -0500596 if(c == '.' || c == 'e' || c == 'E')
597 tok->is_double = 1;
William Dignaziobb492d42013-03-06 12:29:33 -0500598 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000599 printbuf_memappend_fast(tok->pb, case_start, case_len);
600 goto out;
601 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000602 }
Michael Clark95f55a72009-04-27 08:16:58 +0000603 if (case_len>0)
604 printbuf_memappend_fast(tok->pb, case_start, case_len);
605 }
606 {
Michael Clarkc4dceae2010-10-06 16:39:20 +0000607 int64_t num64;
608 double numd;
609 if (!tok->is_double && json_parse_int64(tok->pb->buf, &num64) == 0) {
Remi Collete9ee4ae2013-06-13 13:40:01 +0200610 if (num64 && tok->pb->buf[0]=='0' && (tok->flags & JSON_TOKENER_STRICT)) {
Eric Haszlakiewiczd032aad2013-06-19 09:14:19 -0500611 /* in strict mode, number must not start with 0 */
Remi Collete9ee4ae2013-06-13 13:40:01 +0200612 tok->err = json_tokener_error_parse_number;
613 goto out;
614 }
ehaszla252669c2010-12-07 18:15:35 +0000615 current = json_object_new_int64(num64);
Eric Haszlakiewicz51993c22013-09-11 20:27:39 -0500616 }
617 else if(tok->is_double && json_parse_double(tok->pb->buf, &numd) == 0)
618 {
619 current = json_object_new_double_s(numd, tok->pb->buf);
Michael Clark95f55a72009-04-27 08:16:58 +0000620 } else {
621 tok->err = json_tokener_error_parse_number;
622 goto out;
623 }
624 saved_state = json_tokener_state_finish;
625 state = json_tokener_state_eatws;
626 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000627 }
628 break;
629
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500630 case json_tokener_state_array_after_sep:
Michael Clarkf0d08882007-03-13 08:26:18 +0000631 case json_tokener_state_array:
632 if(c == ']') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500633 if (state == json_tokener_state_array_after_sep &&
634 (tok->flags & JSON_TOKENER_STRICT))
635 {
636 tok->err = json_tokener_error_parse_unexpected;
637 goto out;
638 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000639 saved_state = json_tokener_state_finish;
640 state = json_tokener_state_eatws;
641 } else {
Remi Collet197cb1d2012-11-27 09:01:45 +0100642 if(tok->depth >= tok->max_depth-1) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000643 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000644 goto out;
645 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000646 state = json_tokener_state_array_add;
647 tok->depth++;
648 json_tokener_reset_level(tok, tok->depth);
649 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000650 }
651 break;
652
Michael Clarka850f8e2007-03-13 08:26:26 +0000653 case json_tokener_state_array_add:
654 json_object_array_add(current, obj);
655 saved_state = json_tokener_state_array_sep;
656 state = json_tokener_state_eatws;
657 goto redo_char;
658
Michael Clarkf0d08882007-03-13 08:26:18 +0000659 case json_tokener_state_array_sep:
660 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000661 saved_state = json_tokener_state_finish;
662 state = json_tokener_state_eatws;
663 } else if(c == ',') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500664 saved_state = json_tokener_state_array_after_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000665 state = json_tokener_state_eatws;
666 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000667 tok->err = json_tokener_error_parse_array;
668 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000669 }
670 break;
671
Michael Clarkf0d08882007-03-13 08:26:18 +0000672 case json_tokener_state_object_field_start:
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500673 case json_tokener_state_object_field_start_after_sep:
Michael Clarkf0d08882007-03-13 08:26:18 +0000674 if(c == '}') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500675 if (state == json_tokener_state_object_field_start_after_sep &&
676 (tok->flags & JSON_TOKENER_STRICT))
677 {
678 tok->err = json_tokener_error_parse_unexpected;
679 goto out;
680 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000681 saved_state = json_tokener_state_finish;
682 state = json_tokener_state_eatws;
683 } else if (c == '"' || c == '\'') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000684 tok->quote_char = c;
685 printbuf_reset(tok->pb);
Michael Clarkf0d08882007-03-13 08:26:18 +0000686 state = json_tokener_state_object_field;
Michael Clark0370baa2007-03-13 08:26:22 +0000687 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000688 tok->err = json_tokener_error_parse_object_key_name;
Michael Clark0370baa2007-03-13 08:26:22 +0000689 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000690 }
691 break;
692
693 case json_tokener_state_object_field:
Michael Clark95f55a72009-04-27 08:16:58 +0000694 {
695 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000696 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000697 while(1) {
698 if(c == tok->quote_char) {
699 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
700 obj_field_name = strdup(tok->pb->buf);
701 saved_state = json_tokener_state_object_field_end;
702 state = json_tokener_state_eatws;
703 break;
704 } else if(c == '\\') {
705 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
706 saved_state = json_tokener_state_object_field;
707 state = json_tokener_state_string_escape;
708 break;
709 }
William Dignaziobb492d42013-03-06 12:29:33 -0500710 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000711 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
712 goto out;
713 }
714 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000715 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000716 break;
717
718 case json_tokener_state_object_field_end:
719 if(c == ':') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000720 saved_state = json_tokener_state_object_value;
721 state = json_tokener_state_eatws;
722 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000723 tok->err = json_tokener_error_parse_object_key_sep;
724 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000725 }
726 break;
727
728 case json_tokener_state_object_value:
Remi Collet197cb1d2012-11-27 09:01:45 +0100729 if(tok->depth >= tok->max_depth-1) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000730 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000731 goto out;
732 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000733 state = json_tokener_state_object_value_add;
734 tok->depth++;
735 json_tokener_reset_level(tok, tok->depth);
736 goto redo_char;
737
738 case json_tokener_state_object_value_add:
Michael Clarkf0d08882007-03-13 08:26:18 +0000739 json_object_object_add(current, obj_field_name, obj);
740 free(obj_field_name);
741 obj_field_name = NULL;
742 saved_state = json_tokener_state_object_sep;
743 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000744 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000745
746 case json_tokener_state_object_sep:
747 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000748 saved_state = json_tokener_state_finish;
749 state = json_tokener_state_eatws;
750 } else if(c == ',') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500751 saved_state = json_tokener_state_object_field_start_after_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000752 state = json_tokener_state_eatws;
753 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000754 tok->err = json_tokener_error_parse_object_value_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000755 goto out;
756 }
757 break;
758
759 }
Michael Clark95f55a72009-04-27 08:16:58 +0000760 if (!ADVANCE_CHAR(str, tok))
761 goto out;
762 } /* while(POP_CHAR) */
Michael Clarkf0d08882007-03-13 08:26:18 +0000763
764 out:
Remi Collet4039f912013-08-23 13:40:01 +0200765 if (c &&
766 (state == json_tokener_state_finish) &&
767 (tok->depth == 0) &&
768 (tok->flags & JSON_TOKENER_STRICT)) {
769 /* unexpected char after JSON data */
770 tok->err = json_tokener_error_parse_unexpected;
771 }
Michael Clark95f55a72009-04-27 08:16:58 +0000772 if (!c) { /* We hit an eof char (0) */
773 if(state != json_tokener_state_finish &&
774 saved_state != json_tokener_state_finish)
775 tok->err = json_tokener_error_parse_eof;
776 }
777
Remi Colleta01b6592012-12-13 09:47:33 +0100778#ifdef HAVE_SETLOCALE
779 setlocale(LC_NUMERIC, oldlocale);
780 if (oldlocale) free(oldlocale);
781#endif
782
William Dignaziobb492d42013-03-06 12:29:33 -0500783 if (tok->err == json_tokener_success)
Eric Haszlakiewiczd809fa62012-03-31 22:53:43 -0500784 {
785 json_object *ret = json_object_get(current);
786 int ii;
787
788 /* Partially reset, so we parse additional objects on subsequent calls. */
789 for(ii = tok->depth; ii >= 0; ii--)
790 json_tokener_reset_level(tok, ii);
791 return ret;
792 }
793
Michael Clarkdfaf6702007-10-25 02:26:00 +0000794 MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
Michael Clarka850f8e2007-03-13 08:26:26 +0000795 json_tokener_errors[tok->err], tok->char_offset);
796 return NULL;
Michael Clarkf0d08882007-03-13 08:26:18 +0000797}
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500798
799void json_tokener_set_flags(struct json_tokener *tok, int flags)
800{
801 tok->flags = flags;
802}