blob: afb7bb2b9198a4ffacfa7e7b25c0f43f28835c2b [file] [log] [blame]
Michael Clarkf0d08882007-03-13 08:26:18 +00001/*
Michael Clarka850f8e2007-03-13 08:26:26 +00002 * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
Michael Clarkf0d08882007-03-13 08:26:18 +00003 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00004 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
Michael Clarkf0d08882007-03-13 08:26:18 +00005 * Michael Clark <michael@metaparadigm.com>
6 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00007 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the MIT license. See COPYING for details.
Michael Clarkf0d08882007-03-13 08:26:18 +00009 *
Michael Clark95f55a72009-04-27 08:16:58 +000010 *
11 * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
12 * The copyrights to the contents of this file are licensed under the MIT License
13 * (http://www.opensource.org/licenses/mit-license.php)
Michael Clarkf0d08882007-03-13 08:26:18 +000014 */
15
Michael Clark4504df72007-03-13 08:26:20 +000016#include "config.h"
17
Michael Clarkf0d08882007-03-13 08:26:18 +000018#include <stdio.h>
19#include <stdlib.h>
Michael Clarkc8f4a6e2007-12-07 02:44:24 +000020#include <stddef.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000021#include <ctype.h>
22#include <string.h>
Michael Clarkc4dceae2010-10-06 16:39:20 +000023#include <limits.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000024
25#include "bits.h"
26#include "debug.h"
27#include "printbuf.h"
28#include "arraylist.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000029#include "json_inttypes.h"
Michael Clarkf0d08882007-03-13 08:26:18 +000030#include "json_object.h"
31#include "json_tokener.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000032#include "json_util.h"
Michael Clarka850f8e2007-03-13 08:26:26 +000033
Remi Colleta01b6592012-12-13 09:47:33 +010034#ifdef HAVE_LOCALE_H
35#include <locale.h>
36#endif /* HAVE_LOCALE_H */
37
Mateusz Loskota6f39a32012-05-21 23:22:36 +010038#if !HAVE_STRDUP && defined(_MSC_VER)
39 /* MSC has the version as _strdup */
40# define strdup _strdup
41#elif !HAVE_STRDUP
42# error You do not have strdup on your system.
43#endif /* HAVE_STRDUP */
44
Michael Clark837240f2007-03-13 08:26:25 +000045#if !HAVE_STRNCASECMP && defined(_MSC_VER)
46 /* MSC has the version as _strnicmp */
47# define strncasecmp _strnicmp
48#elif !HAVE_STRNCASECMP
49# error You do not have strncasecmp on your system.
50#endif /* HAVE_STRNCASECMP */
51
Michael Clarka850f8e2007-03-13 08:26:26 +000052static const char* json_null_str = "null";
53static const char* json_true_str = "true";
54static const char* json_false_str = "false";
Michael Clarkf0d08882007-03-13 08:26:18 +000055
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060056// XXX after v0.10 this array will become static:
Michael Clarka850f8e2007-03-13 08:26:26 +000057const char* json_tokener_errors[] = {
58 "success",
59 "continue",
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -050060 "nesting too deep",
Michael Clarka850f8e2007-03-13 08:26:26 +000061 "unexpected end of data",
62 "unexpected character",
63 "null expected",
64 "boolean expected",
65 "number expected",
66 "array value separator ',' expected",
67 "quoted object property name expected",
68 "object property name separator ':' expected",
69 "object value separator ',' expected",
70 "invalid string sequence",
71 "expected comment",
72};
73
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060074const char *json_tokener_error_desc(enum json_tokener_error jerr)
75{
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -060076 int jerr_int = (int)jerr;
77 if (jerr_int < 0 || jerr_int > (int)sizeof(json_tokener_errors))
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060078 return "Unknown error, invalid json_tokener_error value passed to json_tokener_error_desc()";
79 return json_tokener_errors[jerr];
80}
81
82enum json_tokener_error json_tokener_get_error(json_tokener *tok)
83{
84 return tok->err;
85}
86
Brent Miller126ad952009-08-20 06:50:22 +000087/* Stuff for decoding unicode sequences */
88#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
89#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
90#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
91static unsigned char utf8_replacement_char[3] = { 0xEF, 0xBF, 0xBD };
92
Remi Collet197cb1d2012-11-27 09:01:45 +010093struct json_tokener* json_tokener_new_ex(int depth)
Michael Clarkf0d08882007-03-13 08:26:18 +000094{
Michael Clarkaaec1ef2009-02-25 02:31:32 +000095 struct json_tokener *tok;
96
97 tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
Christopher Watford543bb142009-07-08 03:46:10 +000098 if (!tok) return NULL;
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -060099 tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec));
Remi Collet197cb1d2012-11-27 09:01:45 +0100100 if (!tok->stack) {
101 free(tok);
102 return NULL;
103 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000104 tok->pb = printbuf_new();
Remi Collet197cb1d2012-11-27 09:01:45 +0100105 tok->max_depth = depth;
Michael Clarka850f8e2007-03-13 08:26:26 +0000106 json_tokener_reset(tok);
107 return tok;
108}
109
Remi Collet197cb1d2012-11-27 09:01:45 +0100110struct json_tokener* json_tokener_new(void)
111{
112 return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH);
113}
114
Michael Clarka850f8e2007-03-13 08:26:26 +0000115void json_tokener_free(struct json_tokener *tok)
116{
117 json_tokener_reset(tok);
Remi Collet197cb1d2012-11-27 09:01:45 +0100118 if (tok->pb) printbuf_free(tok->pb);
119 if (tok->stack) free(tok->stack);
Michael Clarka850f8e2007-03-13 08:26:26 +0000120 free(tok);
121}
122
123static void json_tokener_reset_level(struct json_tokener *tok, int depth)
124{
125 tok->stack[depth].state = json_tokener_state_eatws;
126 tok->stack[depth].saved_state = json_tokener_state_start;
127 json_object_put(tok->stack[depth].current);
128 tok->stack[depth].current = NULL;
129 free(tok->stack[depth].obj_field_name);
130 tok->stack[depth].obj_field_name = NULL;
131}
132
133void json_tokener_reset(struct json_tokener *tok)
134{
135 int i;
Michael Clark22dee7c2009-02-25 01:51:40 +0000136 if (!tok)
137 return;
138
Michael Clarka850f8e2007-03-13 08:26:26 +0000139 for(i = tok->depth; i >= 0; i--)
140 json_tokener_reset_level(tok, i);
141 tok->depth = 0;
142 tok->err = json_tokener_success;
143}
144
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000145struct json_object* json_tokener_parse(const char *str)
Michael Clarka850f8e2007-03-13 08:26:26 +0000146{
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500147 enum json_tokener_error jerr_ignored;
148 struct json_object* obj;
149 obj = json_tokener_parse_verbose(str, &jerr_ignored);
150 return obj;
Michael Clarkf0d08882007-03-13 08:26:18 +0000151}
152
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000153struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
154{
155 struct json_tokener* tok;
156 struct json_object* obj;
157
158 tok = json_tokener_new();
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500159 if (!tok)
160 return NULL;
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000161 obj = json_tokener_parse_ex(tok, str, -1);
162 *error = tok->err;
163 if(tok->err != json_tokener_success) {
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500164 if (obj != NULL)
165 json_object_put(obj);
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000166 obj = NULL;
167 }
168
169 json_tokener_free(tok);
170 return obj;
171}
172
Michael Clarka850f8e2007-03-13 08:26:26 +0000173
Michael Clark4504df72007-03-13 08:26:20 +0000174#if !HAVE_STRNDUP
175/* CAW: compliant version of strndup() */
176char* strndup(const char* str, size_t n)
177{
Michael Clarka850f8e2007-03-13 08:26:26 +0000178 if(str) {
179 size_t len = strlen(str);
Michael Clark7fb9b032009-07-25 00:13:44 +0000180 size_t nn = json_min(len,n);
Michael Clarka850f8e2007-03-13 08:26:26 +0000181 char* s = (char*)malloc(sizeof(char) * (nn + 1));
Michael Clark4504df72007-03-13 08:26:20 +0000182
Michael Clarka850f8e2007-03-13 08:26:26 +0000183 if(s) {
184 memcpy(s, str, nn);
185 s[nn] = '\0';
186 }
Michael Clark4504df72007-03-13 08:26:20 +0000187
Michael Clarka850f8e2007-03-13 08:26:26 +0000188 return s;
189 }
Michael Clark4504df72007-03-13 08:26:20 +0000190
Michael Clarka850f8e2007-03-13 08:26:26 +0000191 return NULL;
Michael Clark4504df72007-03-13 08:26:20 +0000192}
193#endif
194
Michael Clarka850f8e2007-03-13 08:26:26 +0000195
196#define state tok->stack[tok->depth].state
197#define saved_state tok->stack[tok->depth].saved_state
198#define current tok->stack[tok->depth].current
199#define obj_field_name tok->stack[tok->depth].obj_field_name
200
Michael Clark95f55a72009-04-27 08:16:58 +0000201/* Optimization:
202 * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
203 * iterating character-by character. A large performance boost is
204 * achieved by using tighter loops to locally handle units such as
William Dignaziobb492d42013-03-06 12:29:33 -0500205 * comments and strings. Loops that handle an entire token within
206 * their scope also gather entire strings and pass them to
Michael Clark95f55a72009-04-27 08:16:58 +0000207 * printbuf_memappend() in a single call, rather than calling
208 * printbuf_memappend() one char at a time.
209 *
William Dignaziobb492d42013-03-06 12:29:33 -0500210 * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is
Michael Clark95f55a72009-04-27 08:16:58 +0000211 * common to both the main loop and the tighter loops.
212 */
213
William Dignaziobb492d42013-03-06 12:29:33 -0500214/* PEEK_CHAR(dest, tok) macro:
215 * Peeks at the current char and stores it in dest.
Michael Clark95f55a72009-04-27 08:16:58 +0000216 * Returns 1 on success, sets tok->err and returns 0 if no more chars.
217 * Implicit inputs: str, len vars
218 */
William Dignaziobb492d42013-03-06 12:29:33 -0500219#define PEEK_CHAR(dest, tok) \
Michael Clark95f55a72009-04-27 08:16:58 +0000220 (((tok)->char_offset == len) ? \
221 (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
222 (((tok)->err = json_tokener_success), 0) \
223 : \
224 (((tok)->err = json_tokener_continue), 0) \
225 ) : \
226 (((dest) = *str), 1) \
227 )
William Dignaziobb492d42013-03-06 12:29:33 -0500228
Michael Clark95f55a72009-04-27 08:16:58 +0000229/* ADVANCE_CHAR() macro:
230 * Incrementes str & tok->char_offset.
231 * For convenience of existing conditionals, returns the old value of c (0 on eof)
232 * Implicit inputs: c var
233 */
234#define ADVANCE_CHAR(str, tok) \
235 ( ++(str), ((tok)->char_offset)++, c)
236
Brent Miller126ad952009-08-20 06:50:22 +0000237
Michael Clark95f55a72009-04-27 08:16:58 +0000238/* End optimization macro defs */
239
240
Michael Clarka850f8e2007-03-13 08:26:26 +0000241struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000242 const char *str, int len)
Michael Clarkf0d08882007-03-13 08:26:18 +0000243{
Michael Clarka850f8e2007-03-13 08:26:26 +0000244 struct json_object *obj = NULL;
Michael Clark95f55a72009-04-27 08:16:58 +0000245 char c = '\1';
Remi Colleta01b6592012-12-13 09:47:33 +0100246#ifdef HAVE_SETLOCALE
247 char *oldlocale=NULL, *tmplocale;
248
249 tmplocale = setlocale(LC_NUMERIC, NULL);
250 if (tmplocale) oldlocale = strdup(tmplocale);
251 setlocale(LC_NUMERIC, "C");
252#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000253
Michael Clarka850f8e2007-03-13 08:26:26 +0000254 tok->char_offset = 0;
255 tok->err = json_tokener_success;
Michael Clarkf0d08882007-03-13 08:26:18 +0000256
William Dignaziobb492d42013-03-06 12:29:33 -0500257 while (PEEK_CHAR(c, tok)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000258
Michael Clarka850f8e2007-03-13 08:26:26 +0000259 redo_char:
Michael Clarkf0d08882007-03-13 08:26:18 +0000260 switch(state) {
261
262 case json_tokener_state_eatws:
Michael Clark95f55a72009-04-27 08:16:58 +0000263 /* Advance until we change state */
ehaszla252669c2010-12-07 18:15:35 +0000264 while (isspace((int)c)) {
William Dignaziobb492d42013-03-06 12:29:33 -0500265 if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok)))
Michael Clark95f55a72009-04-27 08:16:58 +0000266 goto out;
267 }
268 if(c == '/') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000269 printbuf_reset(tok->pb);
Michael Clark95f55a72009-04-27 08:16:58 +0000270 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000271 state = json_tokener_state_comment_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000272 } else {
273 state = saved_state;
Michael Clarka850f8e2007-03-13 08:26:26 +0000274 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000275 }
276 break;
277
278 case json_tokener_state_start:
279 switch(c) {
280 case '{':
281 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000282 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000283 current = json_object_new_object();
Michael Clarkf0d08882007-03-13 08:26:18 +0000284 break;
285 case '[':
286 state = json_tokener_state_eatws;
287 saved_state = json_tokener_state_array;
288 current = json_object_new_array();
Michael Clarkf0d08882007-03-13 08:26:18 +0000289 break;
290 case 'N':
291 case 'n':
292 state = json_tokener_state_null;
Michael Clarka850f8e2007-03-13 08:26:26 +0000293 printbuf_reset(tok->pb);
294 tok->st_pos = 0;
295 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000296 case '"':
297 case '\'':
Michael Clarkf0d08882007-03-13 08:26:18 +0000298 state = json_tokener_state_string;
Michael Clarka850f8e2007-03-13 08:26:26 +0000299 printbuf_reset(tok->pb);
300 tok->quote_char = c;
Michael Clarkf0d08882007-03-13 08:26:18 +0000301 break;
302 case 'T':
303 case 't':
304 case 'F':
305 case 'f':
306 state = json_tokener_state_boolean;
Michael Clarka850f8e2007-03-13 08:26:26 +0000307 printbuf_reset(tok->pb);
308 tok->st_pos = 0;
309 goto redo_char;
Michael Clark4504df72007-03-13 08:26:20 +0000310#if defined(__GNUC__)
311 case '0' ... '9':
312#else
313 case '0':
314 case '1':
315 case '2':
316 case '3':
317 case '4':
318 case '5':
319 case '6':
320 case '7':
321 case '8':
322 case '9':
323#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000324 case '-':
Michael Clarkf0d08882007-03-13 08:26:18 +0000325 state = json_tokener_state_number;
Michael Clarka850f8e2007-03-13 08:26:26 +0000326 printbuf_reset(tok->pb);
327 tok->is_double = 0;
328 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000329 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000330 tok->err = json_tokener_error_parse_unexpected;
Michael Clarkf0d08882007-03-13 08:26:18 +0000331 goto out;
332 }
333 break;
334
335 case json_tokener_state_finish:
Michael Clarka850f8e2007-03-13 08:26:26 +0000336 if(tok->depth == 0) goto out;
337 obj = json_object_get(current);
338 json_tokener_reset_level(tok, tok->depth);
339 tok->depth--;
340 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000341
342 case json_tokener_state_null:
Michael Clark95f55a72009-04-27 08:16:58 +0000343 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000344 if(strncasecmp(json_null_str, tok->pb->buf,
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -0600345 json_min(tok->st_pos+1, (int)strlen(json_null_str))) == 0) {
346 if(tok->st_pos == (int)strlen(json_null_str)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000347 current = NULL;
348 saved_state = json_tokener_state_finish;
349 state = json_tokener_state_eatws;
350 goto redo_char;
351 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000352 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000353 tok->err = json_tokener_error_parse_null;
354 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000355 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000356 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000357 break;
358
359 case json_tokener_state_comment_start:
360 if(c == '*') {
361 state = json_tokener_state_comment;
362 } else if(c == '/') {
363 state = json_tokener_state_comment_eol;
364 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000365 tok->err = json_tokener_error_parse_comment;
Michael Clarkf0d08882007-03-13 08:26:18 +0000366 goto out;
367 }
Michael Clark95f55a72009-04-27 08:16:58 +0000368 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000369 break;
370
371 case json_tokener_state_comment:
Michael Clark95f55a72009-04-27 08:16:58 +0000372 {
373 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000374 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000375 while(c != '*') {
William Dignaziobb492d42013-03-06 12:29:33 -0500376 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000377 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
378 goto out;
William Dignaziobb492d42013-03-06 12:29:33 -0500379 }
Michael Clark95f55a72009-04-27 08:16:58 +0000380 }
381 printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
382 state = json_tokener_state_comment_end;
383 }
384 break;
Michael Clarkf0d08882007-03-13 08:26:18 +0000385
386 case json_tokener_state_comment_eol:
Michael Clark95f55a72009-04-27 08:16:58 +0000387 {
388 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000389 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000390 while(c != '\n') {
William Dignaziobb492d42013-03-06 12:29:33 -0500391 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000392 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
393 goto out;
394 }
395 }
396 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Michael Clarkdfaf6702007-10-25 02:26:00 +0000397 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000398 state = json_tokener_state_eatws;
399 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000400 break;
401
402 case json_tokener_state_comment_end:
Michael Clark95f55a72009-04-27 08:16:58 +0000403 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000404 if(c == '/') {
Michael Clarkdfaf6702007-10-25 02:26:00 +0000405 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000406 state = json_tokener_state_eatws;
407 } else {
408 state = json_tokener_state_comment;
409 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000410 break;
411
412 case json_tokener_state_string:
Michael Clark95f55a72009-04-27 08:16:58 +0000413 {
414 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000415 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000416 while(1) {
417 if(c == tok->quote_char) {
418 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Eric Haszlakiewicz4e4af932012-12-09 16:32:11 -0600419 current = json_object_new_string_len(tok->pb->buf, tok->pb->bpos);
Michael Clark95f55a72009-04-27 08:16:58 +0000420 saved_state = json_tokener_state_finish;
421 state = json_tokener_state_eatws;
422 break;
423 } else if(c == '\\') {
424 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
425 saved_state = json_tokener_state_string;
426 state = json_tokener_state_string_escape;
427 break;
428 }
William Dignaziobb492d42013-03-06 12:29:33 -0500429 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000430 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
431 goto out;
432 }
433 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000434 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000435 break;
436
437 case json_tokener_state_string_escape:
438 switch(c) {
439 case '"':
440 case '\\':
Michael Clarka850f8e2007-03-13 08:26:26 +0000441 case '/':
Michael Clark95f55a72009-04-27 08:16:58 +0000442 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000443 state = saved_state;
444 break;
445 case 'b':
446 case 'n':
447 case 'r':
448 case 't':
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500449 case 'f':
Michael Clark95f55a72009-04-27 08:16:58 +0000450 if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
451 else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
452 else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
453 else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500454 else if(c == 'f') printbuf_memappend_fast(tok->pb, "\f", 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000455 state = saved_state;
456 break;
457 case 'u':
Michael Clarka850f8e2007-03-13 08:26:26 +0000458 tok->ucs_char = 0;
459 tok->st_pos = 0;
Michael Clarkf0d08882007-03-13 08:26:18 +0000460 state = json_tokener_state_escape_unicode;
461 break;
462 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000463 tok->err = json_tokener_error_parse_string;
Michael Clarkf0d08882007-03-13 08:26:18 +0000464 goto out;
465 }
466 break;
467
468 case json_tokener_state_escape_unicode:
Michael Clark95f55a72009-04-27 08:16:58 +0000469 {
Brent Miller126ad952009-08-20 06:50:22 +0000470 unsigned int got_hi_surrogate = 0;
471
472 /* Handle a 4-byte sequence, or two sequences if a surrogate pair */
Michael Clark95f55a72009-04-27 08:16:58 +0000473 while(1) {
474 if(strchr(json_hex_chars, c)) {
475 tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
476 if(tok->st_pos == 4) {
Brent Miller126ad952009-08-20 06:50:22 +0000477 unsigned char unescaped_utf[4];
478
479 if (got_hi_surrogate) {
480 if (IS_LOW_SURROGATE(tok->ucs_char)) {
481 /* Recalculate the ucs_char, then fall thru to process normally */
482 tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char);
483 } else {
484 /* Hi surrogate was not followed by a low surrogate */
485 /* Replace the hi and process the rest normally */
486 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
487 }
488 got_hi_surrogate = 0;
489 }
490
Michael Clark95f55a72009-04-27 08:16:58 +0000491 if (tok->ucs_char < 0x80) {
Brent Miller126ad952009-08-20 06:50:22 +0000492 unescaped_utf[0] = tok->ucs_char;
493 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1);
Michael Clark95f55a72009-04-27 08:16:58 +0000494 } else if (tok->ucs_char < 0x800) {
Brent Miller126ad952009-08-20 06:50:22 +0000495 unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
496 unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
497 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2);
498 } else if (IS_HIGH_SURROGATE(tok->ucs_char)) {
499 /* Got a high surrogate. Remember it and look for the
500 * the beginning of another sequence, which should be the
501 * low surrogate.
502 */
503 got_hi_surrogate = tok->ucs_char;
504 /* Not at end, and the next two chars should be "\u" */
505 if ((tok->char_offset+1 != len) &&
506 (tok->char_offset+2 != len) &&
507 (str[1] == '\\') &&
508 (str[2] == 'u'))
509 {
510 ADVANCE_CHAR(str, tok);
511 ADVANCE_CHAR(str, tok);
512
513 /* Advance to the first char of the next sequence and
514 * continue processing with the next sequence.
515 */
William Dignaziobb492d42013-03-06 12:29:33 -0500516 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Brent Miller126ad952009-08-20 06:50:22 +0000517 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
518 goto out;
519 }
520 tok->ucs_char = 0;
521 tok->st_pos = 0;
522 continue; /* other json_tokener_state_escape_unicode */
523 } else {
524 /* Got a high surrogate without another sequence following
525 * it. Put a replacement char in for the hi surrogate
526 * and pretend we finished.
527 */
528 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
529 }
530 } else if (IS_LOW_SURROGATE(tok->ucs_char)) {
531 /* Got a low surrogate not preceded by a high */
532 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
533 } else if (tok->ucs_char < 0x10000) {
534 unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
535 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
536 unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
537 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3);
538 } else if (tok->ucs_char < 0x110000) {
539 unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
540 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
541 unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
542 unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
543 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4);
Michael Clark95f55a72009-04-27 08:16:58 +0000544 } else {
Brent Miller126ad952009-08-20 06:50:22 +0000545 /* Don't know what we got--insert the replacement char */
546 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
547 }
Michael Clark95f55a72009-04-27 08:16:58 +0000548 state = saved_state;
549 break;
550 }
551 } else {
552 tok->err = json_tokener_error_parse_string;
553 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000554 }
William Dignaziobb492d42013-03-06 12:29:33 -0500555 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Brent Miller126ad952009-08-20 06:50:22 +0000556 if (got_hi_surrogate) /* Clean up any pending chars */
557 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
Michael Clark95f55a72009-04-27 08:16:58 +0000558 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000559 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000560 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000561 }
562 break;
563
564 case json_tokener_state_boolean:
Michael Clark95f55a72009-04-27 08:16:58 +0000565 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000566 if(strncasecmp(json_true_str, tok->pb->buf,
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -0600567 json_min(tok->st_pos+1, (int)strlen(json_true_str))) == 0) {
568 if(tok->st_pos == (int)strlen(json_true_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000569 current = json_object_new_boolean(1);
570 saved_state = json_tokener_state_finish;
571 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000572 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000573 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000574 } else if(strncasecmp(json_false_str, tok->pb->buf,
Eric Haszlakiewiczca8b27d2013-02-09 16:35:24 -0600575 json_min(tok->st_pos+1, (int)strlen(json_false_str))) == 0) {
576 if(tok->st_pos == (int)strlen(json_false_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000577 current = json_object_new_boolean(0);
578 saved_state = json_tokener_state_finish;
579 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000580 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000581 }
582 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000583 tok->err = json_tokener_error_parse_boolean;
Michael Clarkf0d08882007-03-13 08:26:18 +0000584 goto out;
585 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000586 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000587 break;
588
589 case json_tokener_state_number:
Michael Clark95f55a72009-04-27 08:16:58 +0000590 {
591 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000592 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000593 int case_len=0;
594 while(c && strchr(json_number_chars, c)) {
595 ++case_len;
Eric Haszlakiewiczf931f612012-04-24 22:17:13 -0500596 if(c == '.' || c == 'e' || c == 'E')
597 tok->is_double = 1;
William Dignaziobb492d42013-03-06 12:29:33 -0500598 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000599 printbuf_memappend_fast(tok->pb, case_start, case_len);
600 goto out;
601 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000602 }
Michael Clark95f55a72009-04-27 08:16:58 +0000603 if (case_len>0)
604 printbuf_memappend_fast(tok->pb, case_start, case_len);
605 }
606 {
Michael Clarkc4dceae2010-10-06 16:39:20 +0000607 int64_t num64;
608 double numd;
609 if (!tok->is_double && json_parse_int64(tok->pb->buf, &num64) == 0) {
ehaszla252669c2010-12-07 18:15:35 +0000610 current = json_object_new_int64(num64);
Remi Collet16a4a322012-11-27 11:06:49 +0100611 } else if(tok->is_double && json_parse_double(tok->pb->buf, &numd) == 0) {
Michael Clark95f55a72009-04-27 08:16:58 +0000612 current = json_object_new_double(numd);
613 } else {
614 tok->err = json_tokener_error_parse_number;
615 goto out;
616 }
617 saved_state = json_tokener_state_finish;
618 state = json_tokener_state_eatws;
619 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000620 }
621 break;
622
623 case json_tokener_state_array:
624 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000625 saved_state = json_tokener_state_finish;
626 state = json_tokener_state_eatws;
627 } else {
Remi Collet197cb1d2012-11-27 09:01:45 +0100628 if(tok->depth >= tok->max_depth-1) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000629 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000630 goto out;
631 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000632 state = json_tokener_state_array_add;
633 tok->depth++;
634 json_tokener_reset_level(tok, tok->depth);
635 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000636 }
637 break;
638
Michael Clarka850f8e2007-03-13 08:26:26 +0000639 case json_tokener_state_array_add:
640 json_object_array_add(current, obj);
641 saved_state = json_tokener_state_array_sep;
642 state = json_tokener_state_eatws;
643 goto redo_char;
644
Michael Clarkf0d08882007-03-13 08:26:18 +0000645 case json_tokener_state_array_sep:
646 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000647 saved_state = json_tokener_state_finish;
648 state = json_tokener_state_eatws;
649 } else if(c == ',') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000650 saved_state = json_tokener_state_array;
651 state = json_tokener_state_eatws;
652 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000653 tok->err = json_tokener_error_parse_array;
654 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000655 }
656 break;
657
Michael Clarkf0d08882007-03-13 08:26:18 +0000658 case json_tokener_state_object_field_start:
659 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000660 saved_state = json_tokener_state_finish;
661 state = json_tokener_state_eatws;
662 } else if (c == '"' || c == '\'') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000663 tok->quote_char = c;
664 printbuf_reset(tok->pb);
Michael Clarkf0d08882007-03-13 08:26:18 +0000665 state = json_tokener_state_object_field;
Michael Clark0370baa2007-03-13 08:26:22 +0000666 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000667 tok->err = json_tokener_error_parse_object_key_name;
Michael Clark0370baa2007-03-13 08:26:22 +0000668 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000669 }
670 break;
671
672 case json_tokener_state_object_field:
Michael Clark95f55a72009-04-27 08:16:58 +0000673 {
674 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000675 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000676 while(1) {
677 if(c == tok->quote_char) {
678 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
679 obj_field_name = strdup(tok->pb->buf);
680 saved_state = json_tokener_state_object_field_end;
681 state = json_tokener_state_eatws;
682 break;
683 } else if(c == '\\') {
684 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
685 saved_state = json_tokener_state_object_field;
686 state = json_tokener_state_string_escape;
687 break;
688 }
William Dignaziobb492d42013-03-06 12:29:33 -0500689 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000690 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
691 goto out;
692 }
693 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000694 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000695 break;
696
697 case json_tokener_state_object_field_end:
698 if(c == ':') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000699 saved_state = json_tokener_state_object_value;
700 state = json_tokener_state_eatws;
701 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000702 tok->err = json_tokener_error_parse_object_key_sep;
703 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000704 }
705 break;
706
707 case json_tokener_state_object_value:
Remi Collet197cb1d2012-11-27 09:01:45 +0100708 if(tok->depth >= tok->max_depth-1) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000709 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000710 goto out;
711 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000712 state = json_tokener_state_object_value_add;
713 tok->depth++;
714 json_tokener_reset_level(tok, tok->depth);
715 goto redo_char;
716
717 case json_tokener_state_object_value_add:
Michael Clarkf0d08882007-03-13 08:26:18 +0000718 json_object_object_add(current, obj_field_name, obj);
719 free(obj_field_name);
720 obj_field_name = NULL;
721 saved_state = json_tokener_state_object_sep;
722 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000723 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000724
725 case json_tokener_state_object_sep:
726 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000727 saved_state = json_tokener_state_finish;
728 state = json_tokener_state_eatws;
729 } else if(c == ',') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000730 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000731 state = json_tokener_state_eatws;
732 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000733 tok->err = json_tokener_error_parse_object_value_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000734 goto out;
735 }
736 break;
737
738 }
Michael Clark95f55a72009-04-27 08:16:58 +0000739 if (!ADVANCE_CHAR(str, tok))
740 goto out;
741 } /* while(POP_CHAR) */
Michael Clarkf0d08882007-03-13 08:26:18 +0000742
743 out:
Michael Clark95f55a72009-04-27 08:16:58 +0000744 if (!c) { /* We hit an eof char (0) */
745 if(state != json_tokener_state_finish &&
746 saved_state != json_tokener_state_finish)
747 tok->err = json_tokener_error_parse_eof;
748 }
749
Remi Colleta01b6592012-12-13 09:47:33 +0100750#ifdef HAVE_SETLOCALE
751 setlocale(LC_NUMERIC, oldlocale);
752 if (oldlocale) free(oldlocale);
753#endif
754
William Dignaziobb492d42013-03-06 12:29:33 -0500755 if (tok->err == json_tokener_success)
Eric Haszlakiewiczd809fa62012-03-31 22:53:43 -0500756 {
757 json_object *ret = json_object_get(current);
758 int ii;
759
760 /* Partially reset, so we parse additional objects on subsequent calls. */
761 for(ii = tok->depth; ii >= 0; ii--)
762 json_tokener_reset_level(tok, ii);
763 return ret;
764 }
765
Michael Clarkdfaf6702007-10-25 02:26:00 +0000766 MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
Michael Clarka850f8e2007-03-13 08:26:26 +0000767 json_tokener_errors[tok->err], tok->char_offset);
768 return NULL;
Michael Clarkf0d08882007-03-13 08:26:18 +0000769}