blob: 7ce53ca70677618a7e6a5e58ad4205015def8112 [file] [log] [blame]
Michael Clarkf0d08882007-03-13 08:26:18 +00001/*
Michael Clarka850f8e2007-03-13 08:26:26 +00002 * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
Michael Clarkf0d08882007-03-13 08:26:18 +00003 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00004 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
Michael Clarkf0d08882007-03-13 08:26:18 +00005 * Michael Clark <michael@metaparadigm.com>
6 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00007 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the MIT license. See COPYING for details.
Michael Clarkf0d08882007-03-13 08:26:18 +00009 *
Michael Clark95f55a72009-04-27 08:16:58 +000010 *
11 * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
12 * The copyrights to the contents of this file are licensed under the MIT License
13 * (http://www.opensource.org/licenses/mit-license.php)
Michael Clarkf0d08882007-03-13 08:26:18 +000014 */
15
Michael Clark4504df72007-03-13 08:26:20 +000016#include "config.h"
17
Michael Clarkf0d08882007-03-13 08:26:18 +000018#include <stdio.h>
19#include <stdlib.h>
Michael Clarkc8f4a6e2007-12-07 02:44:24 +000020#include <stddef.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000021#include <ctype.h>
22#include <string.h>
Michael Clarkc4dceae2010-10-06 16:39:20 +000023#include <limits.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000024
25#include "bits.h"
26#include "debug.h"
27#include "printbuf.h"
28#include "arraylist.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000029#include "json_inttypes.h"
Michael Clarkf0d08882007-03-13 08:26:18 +000030#include "json_object.h"
31#include "json_tokener.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000032#include "json_util.h"
Michael Clarka850f8e2007-03-13 08:26:26 +000033
Remi Colleta01b6592012-12-13 09:47:33 +010034#ifdef HAVE_LOCALE_H
35#include <locale.h>
36#endif /* HAVE_LOCALE_H */
37
Mateusz Loskota6f39a32012-05-21 23:22:36 +010038#if !HAVE_STRDUP && defined(_MSC_VER)
39 /* MSC has the version as _strdup */
40# define strdup _strdup
41#elif !HAVE_STRDUP
42# error You do not have strdup on your system.
43#endif /* HAVE_STRDUP */
44
Michael Clark837240f2007-03-13 08:26:25 +000045#if !HAVE_STRNCASECMP && defined(_MSC_VER)
46 /* MSC has the version as _strnicmp */
47# define strncasecmp _strnicmp
48#elif !HAVE_STRNCASECMP
49# error You do not have strncasecmp on your system.
50#endif /* HAVE_STRNCASECMP */
51
Michael Clarka850f8e2007-03-13 08:26:26 +000052static const char* json_null_str = "null";
53static const char* json_true_str = "true";
54static const char* json_false_str = "false";
Michael Clarkf0d08882007-03-13 08:26:18 +000055
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060056// XXX after v0.10 this array will become static:
Michael Clarka850f8e2007-03-13 08:26:26 +000057const char* json_tokener_errors[] = {
58 "success",
59 "continue",
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -050060 "nesting too deep",
Michael Clarka850f8e2007-03-13 08:26:26 +000061 "unexpected end of data",
62 "unexpected character",
63 "null expected",
64 "boolean expected",
65 "number expected",
66 "array value separator ',' expected",
67 "quoted object property name expected",
68 "object property name separator ':' expected",
69 "object value separator ',' expected",
70 "invalid string sequence",
71 "expected comment",
72};
73
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060074const char *json_tokener_error_desc(enum json_tokener_error jerr)
75{
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -060076 int jerr_int = (int)jerr;
77 if (jerr_int < 0 || jerr_int > (int)sizeof(json_tokener_errors))
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060078 return "Unknown error, invalid json_tokener_error value passed to json_tokener_error_desc()";
79 return json_tokener_errors[jerr];
80}
81
82enum json_tokener_error json_tokener_get_error(json_tokener *tok)
83{
84 return tok->err;
85}
86
Brent Miller126ad952009-08-20 06:50:22 +000087/* Stuff for decoding unicode sequences */
88#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
89#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
90#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
91static unsigned char utf8_replacement_char[3] = { 0xEF, 0xBF, 0xBD };
92
Remi Collet197cb1d2012-11-27 09:01:45 +010093struct json_tokener* json_tokener_new_ex(int depth)
Michael Clarkf0d08882007-03-13 08:26:18 +000094{
Michael Clarkaaec1ef2009-02-25 02:31:32 +000095 struct json_tokener *tok;
96
97 tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
Christopher Watford543bb142009-07-08 03:46:10 +000098 if (!tok) return NULL;
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -060099 tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec));
Remi Collet197cb1d2012-11-27 09:01:45 +0100100 if (!tok->stack) {
101 free(tok);
102 return NULL;
103 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000104 tok->pb = printbuf_new();
Remi Collet197cb1d2012-11-27 09:01:45 +0100105 tok->max_depth = depth;
Michael Clarka850f8e2007-03-13 08:26:26 +0000106 json_tokener_reset(tok);
107 return tok;
108}
109
Remi Collet197cb1d2012-11-27 09:01:45 +0100110struct json_tokener* json_tokener_new(void)
111{
112 return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH);
113}
114
Michael Clarka850f8e2007-03-13 08:26:26 +0000115void json_tokener_free(struct json_tokener *tok)
116{
117 json_tokener_reset(tok);
Remi Collet197cb1d2012-11-27 09:01:45 +0100118 if (tok->pb) printbuf_free(tok->pb);
119 if (tok->stack) free(tok->stack);
Michael Clarka850f8e2007-03-13 08:26:26 +0000120 free(tok);
121}
122
123static void json_tokener_reset_level(struct json_tokener *tok, int depth)
124{
125 tok->stack[depth].state = json_tokener_state_eatws;
126 tok->stack[depth].saved_state = json_tokener_state_start;
127 json_object_put(tok->stack[depth].current);
128 tok->stack[depth].current = NULL;
129 free(tok->stack[depth].obj_field_name);
130 tok->stack[depth].obj_field_name = NULL;
131}
132
133void json_tokener_reset(struct json_tokener *tok)
134{
135 int i;
Michael Clark22dee7c2009-02-25 01:51:40 +0000136 if (!tok)
137 return;
138
Michael Clarka850f8e2007-03-13 08:26:26 +0000139 for(i = tok->depth; i >= 0; i--)
140 json_tokener_reset_level(tok, i);
141 tok->depth = 0;
142 tok->err = json_tokener_success;
143}
144
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000145struct json_object* json_tokener_parse(const char *str)
Michael Clarka850f8e2007-03-13 08:26:26 +0000146{
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500147 enum json_tokener_error jerr_ignored;
148 struct json_object* obj;
149 obj = json_tokener_parse_verbose(str, &jerr_ignored);
150 return obj;
Michael Clarkf0d08882007-03-13 08:26:18 +0000151}
152
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000153struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
154{
155 struct json_tokener* tok;
156 struct json_object* obj;
157
158 tok = json_tokener_new();
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500159 if (!tok)
160 return NULL;
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000161 obj = json_tokener_parse_ex(tok, str, -1);
162 *error = tok->err;
163 if(tok->err != json_tokener_success) {
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500164 if (obj != NULL)
165 json_object_put(obj);
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000166 obj = NULL;
167 }
168
169 json_tokener_free(tok);
170 return obj;
171}
172
Michael Clarka850f8e2007-03-13 08:26:26 +0000173
Michael Clark4504df72007-03-13 08:26:20 +0000174#if !HAVE_STRNDUP
175/* CAW: compliant version of strndup() */
176char* strndup(const char* str, size_t n)
177{
Michael Clarka850f8e2007-03-13 08:26:26 +0000178 if(str) {
179 size_t len = strlen(str);
Michael Clark7fb9b032009-07-25 00:13:44 +0000180 size_t nn = json_min(len,n);
Michael Clarka850f8e2007-03-13 08:26:26 +0000181 char* s = (char*)malloc(sizeof(char) * (nn + 1));
Michael Clark4504df72007-03-13 08:26:20 +0000182
Michael Clarka850f8e2007-03-13 08:26:26 +0000183 if(s) {
184 memcpy(s, str, nn);
185 s[nn] = '\0';
186 }
Michael Clark4504df72007-03-13 08:26:20 +0000187
Michael Clarka850f8e2007-03-13 08:26:26 +0000188 return s;
189 }
Michael Clark4504df72007-03-13 08:26:20 +0000190
Michael Clarka850f8e2007-03-13 08:26:26 +0000191 return NULL;
Michael Clark4504df72007-03-13 08:26:20 +0000192}
193#endif
194
Michael Clarka850f8e2007-03-13 08:26:26 +0000195
196#define state tok->stack[tok->depth].state
197#define saved_state tok->stack[tok->depth].saved_state
198#define current tok->stack[tok->depth].current
199#define obj_field_name tok->stack[tok->depth].obj_field_name
200
Michael Clark95f55a72009-04-27 08:16:58 +0000201/* Optimization:
202 * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
203 * iterating character-by character. A large performance boost is
204 * achieved by using tighter loops to locally handle units such as
William Dignaziobb492d42013-03-06 12:29:33 -0500205 * comments and strings. Loops that handle an entire token within
206 * their scope also gather entire strings and pass them to
Michael Clark95f55a72009-04-27 08:16:58 +0000207 * printbuf_memappend() in a single call, rather than calling
208 * printbuf_memappend() one char at a time.
209 *
William Dignaziobb492d42013-03-06 12:29:33 -0500210 * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is
Michael Clark95f55a72009-04-27 08:16:58 +0000211 * common to both the main loop and the tighter loops.
212 */
213
William Dignaziobb492d42013-03-06 12:29:33 -0500214/* PEEK_CHAR(dest, tok) macro:
215 * Peeks at the current char and stores it in dest.
Michael Clark95f55a72009-04-27 08:16:58 +0000216 * Returns 1 on success, sets tok->err and returns 0 if no more chars.
217 * Implicit inputs: str, len vars
218 */
William Dignaziobb492d42013-03-06 12:29:33 -0500219#define PEEK_CHAR(dest, tok) \
Michael Clark95f55a72009-04-27 08:16:58 +0000220 (((tok)->char_offset == len) ? \
221 (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
222 (((tok)->err = json_tokener_success), 0) \
223 : \
224 (((tok)->err = json_tokener_continue), 0) \
225 ) : \
226 (((dest) = *str), 1) \
227 )
William Dignaziobb492d42013-03-06 12:29:33 -0500228
Michael Clark95f55a72009-04-27 08:16:58 +0000229/* ADVANCE_CHAR() macro:
230 * Incrementes str & tok->char_offset.
231 * For convenience of existing conditionals, returns the old value of c (0 on eof)
232 * Implicit inputs: c var
233 */
234#define ADVANCE_CHAR(str, tok) \
235 ( ++(str), ((tok)->char_offset)++, c)
236
Brent Miller126ad952009-08-20 06:50:22 +0000237
Michael Clark95f55a72009-04-27 08:16:58 +0000238/* End optimization macro defs */
239
240
Michael Clarka850f8e2007-03-13 08:26:26 +0000241struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000242 const char *str, int len)
Michael Clarkf0d08882007-03-13 08:26:18 +0000243{
Michael Clarka850f8e2007-03-13 08:26:26 +0000244 struct json_object *obj = NULL;
Michael Clark95f55a72009-04-27 08:16:58 +0000245 char c = '\1';
Remi Colleta01b6592012-12-13 09:47:33 +0100246#ifdef HAVE_SETLOCALE
247 char *oldlocale=NULL, *tmplocale;
248
249 tmplocale = setlocale(LC_NUMERIC, NULL);
250 if (tmplocale) oldlocale = strdup(tmplocale);
251 setlocale(LC_NUMERIC, "C");
252#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000253
Michael Clarka850f8e2007-03-13 08:26:26 +0000254 tok->char_offset = 0;
255 tok->err = json_tokener_success;
Michael Clarkf0d08882007-03-13 08:26:18 +0000256
William Dignaziobb492d42013-03-06 12:29:33 -0500257 while (PEEK_CHAR(c, tok)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000258
Michael Clarka850f8e2007-03-13 08:26:26 +0000259 redo_char:
Michael Clarkf0d08882007-03-13 08:26:18 +0000260 switch(state) {
261
262 case json_tokener_state_eatws:
Michael Clark95f55a72009-04-27 08:16:58 +0000263 /* Advance until we change state */
ehaszla252669c2010-12-07 18:15:35 +0000264 while (isspace((int)c)) {
William Dignaziobb492d42013-03-06 12:29:33 -0500265 if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok)))
Michael Clark95f55a72009-04-27 08:16:58 +0000266 goto out;
267 }
Remi Collet87fa32d2013-08-21 15:41:40 +0200268 if(c == '/' && !(tok->flags & JSON_TOKENER_STRICT)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000269 printbuf_reset(tok->pb);
Michael Clark95f55a72009-04-27 08:16:58 +0000270 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000271 state = json_tokener_state_comment_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000272 } else {
273 state = saved_state;
Michael Clarka850f8e2007-03-13 08:26:26 +0000274 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000275 }
276 break;
277
278 case json_tokener_state_start:
279 switch(c) {
280 case '{':
281 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000282 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000283 current = json_object_new_object();
Michael Clarkf0d08882007-03-13 08:26:18 +0000284 break;
285 case '[':
286 state = json_tokener_state_eatws;
287 saved_state = json_tokener_state_array;
288 current = json_object_new_array();
Michael Clarkf0d08882007-03-13 08:26:18 +0000289 break;
290 case 'N':
291 case 'n':
292 state = json_tokener_state_null;
Michael Clarka850f8e2007-03-13 08:26:26 +0000293 printbuf_reset(tok->pb);
294 tok->st_pos = 0;
295 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000296 case '\'':
Remi Colleta07ef3d2013-08-06 10:41:14 +0200297 if (tok->flags & JSON_TOKENER_STRICT) {
298 /* in STRICT mode only double-quote are allowed */
299 tok->err = json_tokener_error_parse_unexpected;
300 goto out;
301 }
302 case '"':
Michael Clarkf0d08882007-03-13 08:26:18 +0000303 state = json_tokener_state_string;
Michael Clarka850f8e2007-03-13 08:26:26 +0000304 printbuf_reset(tok->pb);
305 tok->quote_char = c;
Michael Clarkf0d08882007-03-13 08:26:18 +0000306 break;
307 case 'T':
308 case 't':
309 case 'F':
310 case 'f':
311 state = json_tokener_state_boolean;
Michael Clarka850f8e2007-03-13 08:26:26 +0000312 printbuf_reset(tok->pb);
313 tok->st_pos = 0;
314 goto redo_char;
Michael Clark4504df72007-03-13 08:26:20 +0000315#if defined(__GNUC__)
316 case '0' ... '9':
317#else
318 case '0':
319 case '1':
320 case '2':
321 case '3':
322 case '4':
323 case '5':
324 case '6':
325 case '7':
326 case '8':
327 case '9':
328#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000329 case '-':
Michael Clarkf0d08882007-03-13 08:26:18 +0000330 state = json_tokener_state_number;
Michael Clarka850f8e2007-03-13 08:26:26 +0000331 printbuf_reset(tok->pb);
332 tok->is_double = 0;
333 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000334 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000335 tok->err = json_tokener_error_parse_unexpected;
Michael Clarkf0d08882007-03-13 08:26:18 +0000336 goto out;
337 }
338 break;
339
340 case json_tokener_state_finish:
Michael Clarka850f8e2007-03-13 08:26:26 +0000341 if(tok->depth == 0) goto out;
342 obj = json_object_get(current);
343 json_tokener_reset_level(tok, tok->depth);
344 tok->depth--;
345 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000346
347 case json_tokener_state_null:
Michael Clark95f55a72009-04-27 08:16:58 +0000348 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000349 if(strncasecmp(json_null_str, tok->pb->buf,
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -0600350 json_min(tok->st_pos+1, (int)strlen(json_null_str))) == 0) {
351 if(tok->st_pos == (int)strlen(json_null_str)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000352 current = NULL;
353 saved_state = json_tokener_state_finish;
354 state = json_tokener_state_eatws;
355 goto redo_char;
356 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000357 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000358 tok->err = json_tokener_error_parse_null;
359 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000360 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000361 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000362 break;
363
364 case json_tokener_state_comment_start:
365 if(c == '*') {
366 state = json_tokener_state_comment;
367 } else if(c == '/') {
368 state = json_tokener_state_comment_eol;
369 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000370 tok->err = json_tokener_error_parse_comment;
Michael Clarkf0d08882007-03-13 08:26:18 +0000371 goto out;
372 }
Michael Clark95f55a72009-04-27 08:16:58 +0000373 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000374 break;
375
376 case json_tokener_state_comment:
Michael Clark95f55a72009-04-27 08:16:58 +0000377 {
378 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000379 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000380 while(c != '*') {
William Dignaziobb492d42013-03-06 12:29:33 -0500381 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000382 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
383 goto out;
William Dignaziobb492d42013-03-06 12:29:33 -0500384 }
Michael Clark95f55a72009-04-27 08:16:58 +0000385 }
386 printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
387 state = json_tokener_state_comment_end;
388 }
389 break;
Michael Clarkf0d08882007-03-13 08:26:18 +0000390
391 case json_tokener_state_comment_eol:
Michael Clark95f55a72009-04-27 08:16:58 +0000392 {
393 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000394 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000395 while(c != '\n') {
William Dignaziobb492d42013-03-06 12:29:33 -0500396 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000397 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
398 goto out;
399 }
400 }
401 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Michael Clarkdfaf6702007-10-25 02:26:00 +0000402 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000403 state = json_tokener_state_eatws;
404 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000405 break;
406
407 case json_tokener_state_comment_end:
Michael Clark95f55a72009-04-27 08:16:58 +0000408 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000409 if(c == '/') {
Michael Clarkdfaf6702007-10-25 02:26:00 +0000410 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000411 state = json_tokener_state_eatws;
412 } else {
413 state = json_tokener_state_comment;
414 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000415 break;
416
417 case json_tokener_state_string:
Michael Clark95f55a72009-04-27 08:16:58 +0000418 {
419 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000420 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000421 while(1) {
422 if(c == tok->quote_char) {
423 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Eric Haszlakiewicz4e4af932012-12-09 16:32:11 -0600424 current = json_object_new_string_len(tok->pb->buf, tok->pb->bpos);
Michael Clark95f55a72009-04-27 08:16:58 +0000425 saved_state = json_tokener_state_finish;
426 state = json_tokener_state_eatws;
427 break;
428 } else if(c == '\\') {
429 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
430 saved_state = json_tokener_state_string;
431 state = json_tokener_state_string_escape;
432 break;
433 }
William Dignaziobb492d42013-03-06 12:29:33 -0500434 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000435 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
436 goto out;
437 }
438 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000439 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000440 break;
441
442 case json_tokener_state_string_escape:
443 switch(c) {
444 case '"':
445 case '\\':
Michael Clarka850f8e2007-03-13 08:26:26 +0000446 case '/':
Michael Clark95f55a72009-04-27 08:16:58 +0000447 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000448 state = saved_state;
449 break;
450 case 'b':
451 case 'n':
452 case 'r':
453 case 't':
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500454 case 'f':
Michael Clark95f55a72009-04-27 08:16:58 +0000455 if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
456 else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
457 else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
458 else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500459 else if(c == 'f') printbuf_memappend_fast(tok->pb, "\f", 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000460 state = saved_state;
461 break;
462 case 'u':
Michael Clarka850f8e2007-03-13 08:26:26 +0000463 tok->ucs_char = 0;
464 tok->st_pos = 0;
Michael Clarkf0d08882007-03-13 08:26:18 +0000465 state = json_tokener_state_escape_unicode;
466 break;
467 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000468 tok->err = json_tokener_error_parse_string;
Michael Clarkf0d08882007-03-13 08:26:18 +0000469 goto out;
470 }
471 break;
472
473 case json_tokener_state_escape_unicode:
Michael Clark95f55a72009-04-27 08:16:58 +0000474 {
Brent Miller126ad952009-08-20 06:50:22 +0000475 unsigned int got_hi_surrogate = 0;
476
477 /* Handle a 4-byte sequence, or two sequences if a surrogate pair */
Michael Clark95f55a72009-04-27 08:16:58 +0000478 while(1) {
479 if(strchr(json_hex_chars, c)) {
480 tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
481 if(tok->st_pos == 4) {
Brent Miller126ad952009-08-20 06:50:22 +0000482 unsigned char unescaped_utf[4];
483
484 if (got_hi_surrogate) {
485 if (IS_LOW_SURROGATE(tok->ucs_char)) {
486 /* Recalculate the ucs_char, then fall thru to process normally */
487 tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char);
488 } else {
489 /* Hi surrogate was not followed by a low surrogate */
490 /* Replace the hi and process the rest normally */
491 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
492 }
493 got_hi_surrogate = 0;
494 }
495
Michael Clark95f55a72009-04-27 08:16:58 +0000496 if (tok->ucs_char < 0x80) {
Brent Miller126ad952009-08-20 06:50:22 +0000497 unescaped_utf[0] = tok->ucs_char;
498 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1);
Michael Clark95f55a72009-04-27 08:16:58 +0000499 } else if (tok->ucs_char < 0x800) {
Brent Miller126ad952009-08-20 06:50:22 +0000500 unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
501 unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
502 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2);
503 } else if (IS_HIGH_SURROGATE(tok->ucs_char)) {
504 /* Got a high surrogate. Remember it and look for the
505 * the beginning of another sequence, which should be the
506 * low surrogate.
507 */
508 got_hi_surrogate = tok->ucs_char;
509 /* Not at end, and the next two chars should be "\u" */
510 if ((tok->char_offset+1 != len) &&
511 (tok->char_offset+2 != len) &&
512 (str[1] == '\\') &&
513 (str[2] == 'u'))
514 {
William Dignazio32eddd62013-03-06 20:18:14 -0500515 /* Advance through the 16 bit surrogate, and move on to the
516 * next sequence. The next step is to process the following
517 * characters.
518 */
519 if( !ADVANCE_CHAR(str, tok) || !ADVANCE_CHAR(str, tok) ) {
520 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
521 }
Brent Miller126ad952009-08-20 06:50:22 +0000522 /* Advance to the first char of the next sequence and
523 * continue processing with the next sequence.
524 */
William Dignaziobb492d42013-03-06 12:29:33 -0500525 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Brent Miller126ad952009-08-20 06:50:22 +0000526 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
527 goto out;
528 }
529 tok->ucs_char = 0;
530 tok->st_pos = 0;
531 continue; /* other json_tokener_state_escape_unicode */
532 } else {
533 /* Got a high surrogate without another sequence following
534 * it. Put a replacement char in for the hi surrogate
535 * and pretend we finished.
536 */
537 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
538 }
539 } else if (IS_LOW_SURROGATE(tok->ucs_char)) {
540 /* Got a low surrogate not preceded by a high */
541 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
542 } else if (tok->ucs_char < 0x10000) {
543 unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
544 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
545 unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
546 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3);
547 } else if (tok->ucs_char < 0x110000) {
548 unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
549 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
550 unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
551 unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
552 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4);
Michael Clark95f55a72009-04-27 08:16:58 +0000553 } else {
Brent Miller126ad952009-08-20 06:50:22 +0000554 /* Don't know what we got--insert the replacement char */
555 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
556 }
Michael Clark95f55a72009-04-27 08:16:58 +0000557 state = saved_state;
558 break;
559 }
560 } else {
561 tok->err = json_tokener_error_parse_string;
562 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000563 }
William Dignaziobb492d42013-03-06 12:29:33 -0500564 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Brent Miller126ad952009-08-20 06:50:22 +0000565 if (got_hi_surrogate) /* Clean up any pending chars */
566 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
Michael Clark95f55a72009-04-27 08:16:58 +0000567 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000568 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000569 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000570 }
571 break;
572
573 case json_tokener_state_boolean:
Michael Clark95f55a72009-04-27 08:16:58 +0000574 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000575 if(strncasecmp(json_true_str, tok->pb->buf,
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -0600576 json_min(tok->st_pos+1, (int)strlen(json_true_str))) == 0) {
577 if(tok->st_pos == (int)strlen(json_true_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000578 current = json_object_new_boolean(1);
579 saved_state = json_tokener_state_finish;
580 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000581 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000582 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000583 } else if(strncasecmp(json_false_str, tok->pb->buf,
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -0600584 json_min(tok->st_pos+1, (int)strlen(json_false_str))) == 0) {
585 if(tok->st_pos == (int)strlen(json_false_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000586 current = json_object_new_boolean(0);
587 saved_state = json_tokener_state_finish;
588 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000589 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000590 }
591 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000592 tok->err = json_tokener_error_parse_boolean;
Michael Clarkf0d08882007-03-13 08:26:18 +0000593 goto out;
594 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000595 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000596 break;
597
598 case json_tokener_state_number:
Michael Clark95f55a72009-04-27 08:16:58 +0000599 {
600 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000601 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000602 int case_len=0;
603 while(c && strchr(json_number_chars, c)) {
604 ++case_len;
Eric Haszlakiewiczf931f612012-04-24 22:17:13 -0500605 if(c == '.' || c == 'e' || c == 'E')
606 tok->is_double = 1;
William Dignaziobb492d42013-03-06 12:29:33 -0500607 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000608 printbuf_memappend_fast(tok->pb, case_start, case_len);
609 goto out;
610 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000611 }
Michael Clark95f55a72009-04-27 08:16:58 +0000612 if (case_len>0)
613 printbuf_memappend_fast(tok->pb, case_start, case_len);
614 }
615 {
Michael Clarkc4dceae2010-10-06 16:39:20 +0000616 int64_t num64;
617 double numd;
618 if (!tok->is_double && json_parse_int64(tok->pb->buf, &num64) == 0) {
Remi Collete9ee4ae2013-06-13 13:40:01 +0200619 if (num64 && tok->pb->buf[0]=='0' && (tok->flags & JSON_TOKENER_STRICT)) {
Eric Haszlakiewiczd032aad2013-06-19 09:14:19 -0500620 /* in strict mode, number must not start with 0 */
Remi Collete9ee4ae2013-06-13 13:40:01 +0200621 tok->err = json_tokener_error_parse_number;
622 goto out;
623 }
ehaszla252669c2010-12-07 18:15:35 +0000624 current = json_object_new_int64(num64);
Remi Collet16a4a322012-11-27 11:06:49 +0100625 } else if(tok->is_double && json_parse_double(tok->pb->buf, &numd) == 0) {
Michael Clark95f55a72009-04-27 08:16:58 +0000626 current = json_object_new_double(numd);
627 } else {
628 tok->err = json_tokener_error_parse_number;
629 goto out;
630 }
631 saved_state = json_tokener_state_finish;
632 state = json_tokener_state_eatws;
633 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000634 }
635 break;
636
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500637 case json_tokener_state_array_after_sep:
Michael Clarkf0d08882007-03-13 08:26:18 +0000638 case json_tokener_state_array:
639 if(c == ']') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500640 if (state == json_tokener_state_array_after_sep &&
641 (tok->flags & JSON_TOKENER_STRICT))
642 {
643 tok->err = json_tokener_error_parse_unexpected;
644 goto out;
645 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000646 saved_state = json_tokener_state_finish;
647 state = json_tokener_state_eatws;
648 } else {
Remi Collet197cb1d2012-11-27 09:01:45 +0100649 if(tok->depth >= tok->max_depth-1) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000650 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000651 goto out;
652 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000653 state = json_tokener_state_array_add;
654 tok->depth++;
655 json_tokener_reset_level(tok, tok->depth);
656 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000657 }
658 break;
659
Michael Clarka850f8e2007-03-13 08:26:26 +0000660 case json_tokener_state_array_add:
661 json_object_array_add(current, obj);
662 saved_state = json_tokener_state_array_sep;
663 state = json_tokener_state_eatws;
664 goto redo_char;
665
Michael Clarkf0d08882007-03-13 08:26:18 +0000666 case json_tokener_state_array_sep:
667 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000668 saved_state = json_tokener_state_finish;
669 state = json_tokener_state_eatws;
670 } else if(c == ',') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500671 saved_state = json_tokener_state_array_after_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000672 state = json_tokener_state_eatws;
673 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000674 tok->err = json_tokener_error_parse_array;
675 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000676 }
677 break;
678
Michael Clarkf0d08882007-03-13 08:26:18 +0000679 case json_tokener_state_object_field_start:
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500680 case json_tokener_state_object_field_start_after_sep:
Michael Clarkf0d08882007-03-13 08:26:18 +0000681 if(c == '}') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500682 if (state == json_tokener_state_object_field_start_after_sep &&
683 (tok->flags & JSON_TOKENER_STRICT))
684 {
685 tok->err = json_tokener_error_parse_unexpected;
686 goto out;
687 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000688 saved_state = json_tokener_state_finish;
689 state = json_tokener_state_eatws;
690 } else if (c == '"' || c == '\'') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000691 tok->quote_char = c;
692 printbuf_reset(tok->pb);
Michael Clarkf0d08882007-03-13 08:26:18 +0000693 state = json_tokener_state_object_field;
Michael Clark0370baa2007-03-13 08:26:22 +0000694 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000695 tok->err = json_tokener_error_parse_object_key_name;
Michael Clark0370baa2007-03-13 08:26:22 +0000696 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000697 }
698 break;
699
700 case json_tokener_state_object_field:
Michael Clark95f55a72009-04-27 08:16:58 +0000701 {
702 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000703 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000704 while(1) {
705 if(c == tok->quote_char) {
706 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
707 obj_field_name = strdup(tok->pb->buf);
708 saved_state = json_tokener_state_object_field_end;
709 state = json_tokener_state_eatws;
710 break;
711 } else if(c == '\\') {
712 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
713 saved_state = json_tokener_state_object_field;
714 state = json_tokener_state_string_escape;
715 break;
716 }
William Dignaziobb492d42013-03-06 12:29:33 -0500717 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000718 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
719 goto out;
720 }
721 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000722 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000723 break;
724
725 case json_tokener_state_object_field_end:
726 if(c == ':') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000727 saved_state = json_tokener_state_object_value;
728 state = json_tokener_state_eatws;
729 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000730 tok->err = json_tokener_error_parse_object_key_sep;
731 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000732 }
733 break;
734
735 case json_tokener_state_object_value:
Remi Collet197cb1d2012-11-27 09:01:45 +0100736 if(tok->depth >= tok->max_depth-1) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000737 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000738 goto out;
739 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000740 state = json_tokener_state_object_value_add;
741 tok->depth++;
742 json_tokener_reset_level(tok, tok->depth);
743 goto redo_char;
744
745 case json_tokener_state_object_value_add:
Michael Clarkf0d08882007-03-13 08:26:18 +0000746 json_object_object_add(current, obj_field_name, obj);
747 free(obj_field_name);
748 obj_field_name = NULL;
749 saved_state = json_tokener_state_object_sep;
750 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000751 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000752
753 case json_tokener_state_object_sep:
754 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000755 saved_state = json_tokener_state_finish;
756 state = json_tokener_state_eatws;
757 } else if(c == ',') {
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500758 saved_state = json_tokener_state_object_field_start_after_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000759 state = json_tokener_state_eatws;
760 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000761 tok->err = json_tokener_error_parse_object_value_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000762 goto out;
763 }
764 break;
765
766 }
Michael Clark95f55a72009-04-27 08:16:58 +0000767 if (!ADVANCE_CHAR(str, tok))
768 goto out;
769 } /* while(POP_CHAR) */
Michael Clarkf0d08882007-03-13 08:26:18 +0000770
771 out:
Michael Clark95f55a72009-04-27 08:16:58 +0000772 if (!c) { /* We hit an eof char (0) */
773 if(state != json_tokener_state_finish &&
774 saved_state != json_tokener_state_finish)
775 tok->err = json_tokener_error_parse_eof;
776 }
777
Remi Colleta01b6592012-12-13 09:47:33 +0100778#ifdef HAVE_SETLOCALE
779 setlocale(LC_NUMERIC, oldlocale);
780 if (oldlocale) free(oldlocale);
781#endif
782
William Dignaziobb492d42013-03-06 12:29:33 -0500783 if (tok->err == json_tokener_success)
Eric Haszlakiewiczd809fa62012-03-31 22:53:43 -0500784 {
785 json_object *ret = json_object_get(current);
786 int ii;
787
788 /* Partially reset, so we parse additional objects on subsequent calls. */
789 for(ii = tok->depth; ii >= 0; ii--)
790 json_tokener_reset_level(tok, ii);
791 return ret;
792 }
793
Michael Clarkdfaf6702007-10-25 02:26:00 +0000794 MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
Michael Clarka850f8e2007-03-13 08:26:26 +0000795 json_tokener_errors[tok->err], tok->char_offset);
796 return NULL;
Michael Clarkf0d08882007-03-13 08:26:18 +0000797}
Eric Haszlakiewicze8161a12013-03-31 20:05:36 -0500798
799void json_tokener_set_flags(struct json_tokener *tok, int flags)
800{
801 tok->flags = flags;
802}