blob: 63bb41b397fe0bc7058219dbcd8d667c1f8aaf07 [file] [log] [blame]
Michael Clarkf0d08882007-03-13 08:26:18 +00001/*
Michael Clarka850f8e2007-03-13 08:26:26 +00002 * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
Michael Clarkf0d08882007-03-13 08:26:18 +00003 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00004 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
Michael Clarkf0d08882007-03-13 08:26:18 +00005 * Michael Clark <michael@metaparadigm.com>
6 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00007 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the MIT license. See COPYING for details.
Michael Clarkf0d08882007-03-13 08:26:18 +00009 *
Michael Clark95f55a72009-04-27 08:16:58 +000010 *
11 * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
12 * The copyrights to the contents of this file are licensed under the MIT License
13 * (http://www.opensource.org/licenses/mit-license.php)
Michael Clarkf0d08882007-03-13 08:26:18 +000014 */
15
Michael Clark4504df72007-03-13 08:26:20 +000016#include "config.h"
17
Michael Clarkf0d08882007-03-13 08:26:18 +000018#include <stdio.h>
19#include <stdlib.h>
Michael Clarkc8f4a6e2007-12-07 02:44:24 +000020#include <stddef.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000021#include <ctype.h>
22#include <string.h>
Michael Clarkc4dceae2010-10-06 16:39:20 +000023#include <limits.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000024
25#include "bits.h"
26#include "debug.h"
27#include "printbuf.h"
28#include "arraylist.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000029#include "json_inttypes.h"
Michael Clarkf0d08882007-03-13 08:26:18 +000030#include "json_object.h"
31#include "json_tokener.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000032#include "json_util.h"
Michael Clarka850f8e2007-03-13 08:26:26 +000033
Remi Colleta01b6592012-12-13 09:47:33 +010034#ifdef HAVE_LOCALE_H
35#include <locale.h>
36#endif /* HAVE_LOCALE_H */
37
Mateusz Loskota6f39a32012-05-21 23:22:36 +010038#if !HAVE_STRDUP && defined(_MSC_VER)
39 /* MSC has the version as _strdup */
40# define strdup _strdup
41#elif !HAVE_STRDUP
42# error You do not have strdup on your system.
43#endif /* HAVE_STRDUP */
44
Michael Clark837240f2007-03-13 08:26:25 +000045#if !HAVE_STRNCASECMP && defined(_MSC_VER)
46 /* MSC has the version as _strnicmp */
47# define strncasecmp _strnicmp
48#elif !HAVE_STRNCASECMP
49# error You do not have strncasecmp on your system.
50#endif /* HAVE_STRNCASECMP */
51
Michael Clarka850f8e2007-03-13 08:26:26 +000052static const char* json_null_str = "null";
53static const char* json_true_str = "true";
54static const char* json_false_str = "false";
Michael Clarkf0d08882007-03-13 08:26:18 +000055
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060056// XXX after v0.10 this array will become static:
Michael Clarka850f8e2007-03-13 08:26:26 +000057const char* json_tokener_errors[] = {
58 "success",
59 "continue",
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -050060 "nesting too deep",
Michael Clarka850f8e2007-03-13 08:26:26 +000061 "unexpected end of data",
62 "unexpected character",
63 "null expected",
64 "boolean expected",
65 "number expected",
66 "array value separator ',' expected",
67 "quoted object property name expected",
68 "object property name separator ':' expected",
69 "object value separator ',' expected",
70 "invalid string sequence",
71 "expected comment",
72};
73
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060074const char *json_tokener_error_desc(enum json_tokener_error jerr)
75{
76 if (jerr < 0 || jerr > sizeof(json_tokener_errors))
77 return "Unknown error, invalid json_tokener_error value passed to json_tokener_error_desc()";
78 return json_tokener_errors[jerr];
79}
80
81enum json_tokener_error json_tokener_get_error(json_tokener *tok)
82{
83 return tok->err;
84}
85
Brent Miller126ad952009-08-20 06:50:22 +000086/* Stuff for decoding unicode sequences */
87#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
88#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
89#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
90static unsigned char utf8_replacement_char[3] = { 0xEF, 0xBF, 0xBD };
91
Michael Clarka850f8e2007-03-13 08:26:26 +000092
Michael Clarke8de0782009-02-25 01:45:00 +000093struct json_tokener* json_tokener_new(void)
Michael Clarkf0d08882007-03-13 08:26:18 +000094{
Michael Clarkaaec1ef2009-02-25 02:31:32 +000095 struct json_tokener *tok;
96
97 tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
Christopher Watford543bb142009-07-08 03:46:10 +000098 if (!tok) return NULL;
Michael Clarka850f8e2007-03-13 08:26:26 +000099 tok->pb = printbuf_new();
100 json_tokener_reset(tok);
101 return tok;
102}
103
104void json_tokener_free(struct json_tokener *tok)
105{
106 json_tokener_reset(tok);
107 if(tok) printbuf_free(tok->pb);
108 free(tok);
109}
110
111static void json_tokener_reset_level(struct json_tokener *tok, int depth)
112{
113 tok->stack[depth].state = json_tokener_state_eatws;
114 tok->stack[depth].saved_state = json_tokener_state_start;
115 json_object_put(tok->stack[depth].current);
116 tok->stack[depth].current = NULL;
117 free(tok->stack[depth].obj_field_name);
118 tok->stack[depth].obj_field_name = NULL;
119}
120
121void json_tokener_reset(struct json_tokener *tok)
122{
123 int i;
Michael Clark22dee7c2009-02-25 01:51:40 +0000124 if (!tok)
125 return;
126
Michael Clarka850f8e2007-03-13 08:26:26 +0000127 for(i = tok->depth; i >= 0; i--)
128 json_tokener_reset_level(tok, i);
129 tok->depth = 0;
130 tok->err = json_tokener_success;
131}
132
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000133struct json_object* json_tokener_parse(const char *str)
Michael Clarka850f8e2007-03-13 08:26:26 +0000134{
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500135 enum json_tokener_error jerr_ignored;
136 struct json_object* obj;
137 obj = json_tokener_parse_verbose(str, &jerr_ignored);
138 return obj;
Michael Clarkf0d08882007-03-13 08:26:18 +0000139}
140
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000141struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
142{
143 struct json_tokener* tok;
144 struct json_object* obj;
145
146 tok = json_tokener_new();
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500147 if (!tok)
148 return NULL;
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000149 obj = json_tokener_parse_ex(tok, str, -1);
150 *error = tok->err;
151 if(tok->err != json_tokener_success) {
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500152 if (obj != NULL)
153 json_object_put(obj);
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000154 obj = NULL;
155 }
156
157 json_tokener_free(tok);
158 return obj;
159}
160
Michael Clarka850f8e2007-03-13 08:26:26 +0000161
Michael Clark4504df72007-03-13 08:26:20 +0000162#if !HAVE_STRNDUP
163/* CAW: compliant version of strndup() */
164char* strndup(const char* str, size_t n)
165{
Michael Clarka850f8e2007-03-13 08:26:26 +0000166 if(str) {
167 size_t len = strlen(str);
Michael Clark7fb9b032009-07-25 00:13:44 +0000168 size_t nn = json_min(len,n);
Michael Clarka850f8e2007-03-13 08:26:26 +0000169 char* s = (char*)malloc(sizeof(char) * (nn + 1));
Michael Clark4504df72007-03-13 08:26:20 +0000170
Michael Clarka850f8e2007-03-13 08:26:26 +0000171 if(s) {
172 memcpy(s, str, nn);
173 s[nn] = '\0';
174 }
Michael Clark4504df72007-03-13 08:26:20 +0000175
Michael Clarka850f8e2007-03-13 08:26:26 +0000176 return s;
177 }
Michael Clark4504df72007-03-13 08:26:20 +0000178
Michael Clarka850f8e2007-03-13 08:26:26 +0000179 return NULL;
Michael Clark4504df72007-03-13 08:26:20 +0000180}
181#endif
182
Michael Clarka850f8e2007-03-13 08:26:26 +0000183
184#define state tok->stack[tok->depth].state
185#define saved_state tok->stack[tok->depth].saved_state
186#define current tok->stack[tok->depth].current
187#define obj_field_name tok->stack[tok->depth].obj_field_name
188
Michael Clark95f55a72009-04-27 08:16:58 +0000189/* Optimization:
190 * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
191 * iterating character-by character. A large performance boost is
192 * achieved by using tighter loops to locally handle units such as
193 * comments and strings. Loops that handle an entire token within
194 * their scope also gather entire strings and pass them to
195 * printbuf_memappend() in a single call, rather than calling
196 * printbuf_memappend() one char at a time.
197 *
198 * POP_CHAR() and ADVANCE_CHAR() macros are used for code that is
199 * common to both the main loop and the tighter loops.
200 */
201
202/* POP_CHAR(dest, tok) macro:
203 * Not really a pop()...peeks at the current char and stores it in dest.
204 * Returns 1 on success, sets tok->err and returns 0 if no more chars.
205 * Implicit inputs: str, len vars
206 */
207#define POP_CHAR(dest, tok) \
208 (((tok)->char_offset == len) ? \
209 (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
210 (((tok)->err = json_tokener_success), 0) \
211 : \
212 (((tok)->err = json_tokener_continue), 0) \
213 ) : \
214 (((dest) = *str), 1) \
215 )
216
217/* ADVANCE_CHAR() macro:
218 * Incrementes str & tok->char_offset.
219 * For convenience of existing conditionals, returns the old value of c (0 on eof)
220 * Implicit inputs: c var
221 */
222#define ADVANCE_CHAR(str, tok) \
223 ( ++(str), ((tok)->char_offset)++, c)
224
Brent Miller126ad952009-08-20 06:50:22 +0000225
Michael Clark95f55a72009-04-27 08:16:58 +0000226/* End optimization macro defs */
227
228
Michael Clarka850f8e2007-03-13 08:26:26 +0000229struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000230 const char *str, int len)
Michael Clarkf0d08882007-03-13 08:26:18 +0000231{
Michael Clarka850f8e2007-03-13 08:26:26 +0000232 struct json_object *obj = NULL;
Michael Clark95f55a72009-04-27 08:16:58 +0000233 char c = '\1';
Remi Colleta01b6592012-12-13 09:47:33 +0100234#ifdef HAVE_SETLOCALE
235 char *oldlocale=NULL, *tmplocale;
236
237 tmplocale = setlocale(LC_NUMERIC, NULL);
238 if (tmplocale) oldlocale = strdup(tmplocale);
239 setlocale(LC_NUMERIC, "C");
240#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000241
Michael Clarka850f8e2007-03-13 08:26:26 +0000242 tok->char_offset = 0;
243 tok->err = json_tokener_success;
Michael Clarkf0d08882007-03-13 08:26:18 +0000244
Michael Clark95f55a72009-04-27 08:16:58 +0000245 while (POP_CHAR(c, tok)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000246
Michael Clarka850f8e2007-03-13 08:26:26 +0000247 redo_char:
Michael Clarkf0d08882007-03-13 08:26:18 +0000248 switch(state) {
249
250 case json_tokener_state_eatws:
Michael Clark95f55a72009-04-27 08:16:58 +0000251 /* Advance until we change state */
ehaszla252669c2010-12-07 18:15:35 +0000252 while (isspace((int)c)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000253 if ((!ADVANCE_CHAR(str, tok)) || (!POP_CHAR(c, tok)))
254 goto out;
255 }
256 if(c == '/') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000257 printbuf_reset(tok->pb);
Michael Clark95f55a72009-04-27 08:16:58 +0000258 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000259 state = json_tokener_state_comment_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000260 } else {
261 state = saved_state;
Michael Clarka850f8e2007-03-13 08:26:26 +0000262 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000263 }
264 break;
265
266 case json_tokener_state_start:
267 switch(c) {
268 case '{':
269 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000270 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000271 current = json_object_new_object();
Michael Clarkf0d08882007-03-13 08:26:18 +0000272 break;
273 case '[':
274 state = json_tokener_state_eatws;
275 saved_state = json_tokener_state_array;
276 current = json_object_new_array();
Michael Clarkf0d08882007-03-13 08:26:18 +0000277 break;
278 case 'N':
279 case 'n':
280 state = json_tokener_state_null;
Michael Clarka850f8e2007-03-13 08:26:26 +0000281 printbuf_reset(tok->pb);
282 tok->st_pos = 0;
283 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000284 case '"':
285 case '\'':
Michael Clarkf0d08882007-03-13 08:26:18 +0000286 state = json_tokener_state_string;
Michael Clarka850f8e2007-03-13 08:26:26 +0000287 printbuf_reset(tok->pb);
288 tok->quote_char = c;
Michael Clarkf0d08882007-03-13 08:26:18 +0000289 break;
290 case 'T':
291 case 't':
292 case 'F':
293 case 'f':
294 state = json_tokener_state_boolean;
Michael Clarka850f8e2007-03-13 08:26:26 +0000295 printbuf_reset(tok->pb);
296 tok->st_pos = 0;
297 goto redo_char;
Michael Clark4504df72007-03-13 08:26:20 +0000298#if defined(__GNUC__)
299 case '0' ... '9':
300#else
301 case '0':
302 case '1':
303 case '2':
304 case '3':
305 case '4':
306 case '5':
307 case '6':
308 case '7':
309 case '8':
310 case '9':
311#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000312 case '-':
Michael Clarkf0d08882007-03-13 08:26:18 +0000313 state = json_tokener_state_number;
Michael Clarka850f8e2007-03-13 08:26:26 +0000314 printbuf_reset(tok->pb);
315 tok->is_double = 0;
316 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000317 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000318 tok->err = json_tokener_error_parse_unexpected;
Michael Clarkf0d08882007-03-13 08:26:18 +0000319 goto out;
320 }
321 break;
322
323 case json_tokener_state_finish:
Michael Clarka850f8e2007-03-13 08:26:26 +0000324 if(tok->depth == 0) goto out;
325 obj = json_object_get(current);
326 json_tokener_reset_level(tok, tok->depth);
327 tok->depth--;
328 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000329
330 case json_tokener_state_null:
Michael Clark95f55a72009-04-27 08:16:58 +0000331 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000332 if(strncasecmp(json_null_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000333 json_min(tok->st_pos+1, strlen(json_null_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000334 if(tok->st_pos == strlen(json_null_str)) {
335 current = NULL;
336 saved_state = json_tokener_state_finish;
337 state = json_tokener_state_eatws;
338 goto redo_char;
339 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000340 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000341 tok->err = json_tokener_error_parse_null;
342 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000343 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000344 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000345 break;
346
347 case json_tokener_state_comment_start:
348 if(c == '*') {
349 state = json_tokener_state_comment;
350 } else if(c == '/') {
351 state = json_tokener_state_comment_eol;
352 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000353 tok->err = json_tokener_error_parse_comment;
Michael Clarkf0d08882007-03-13 08:26:18 +0000354 goto out;
355 }
Michael Clark95f55a72009-04-27 08:16:58 +0000356 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000357 break;
358
359 case json_tokener_state_comment:
Michael Clark95f55a72009-04-27 08:16:58 +0000360 {
361 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000362 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000363 while(c != '*') {
364 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
365 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
366 goto out;
367 }
368 }
369 printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
370 state = json_tokener_state_comment_end;
371 }
372 break;
Michael Clarkf0d08882007-03-13 08:26:18 +0000373
374 case json_tokener_state_comment_eol:
Michael Clark95f55a72009-04-27 08:16:58 +0000375 {
376 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000377 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000378 while(c != '\n') {
379 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
380 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
381 goto out;
382 }
383 }
384 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Michael Clarkdfaf6702007-10-25 02:26:00 +0000385 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000386 state = json_tokener_state_eatws;
387 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000388 break;
389
390 case json_tokener_state_comment_end:
Michael Clark95f55a72009-04-27 08:16:58 +0000391 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000392 if(c == '/') {
Michael Clarkdfaf6702007-10-25 02:26:00 +0000393 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000394 state = json_tokener_state_eatws;
395 } else {
396 state = json_tokener_state_comment;
397 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000398 break;
399
400 case json_tokener_state_string:
Michael Clark95f55a72009-04-27 08:16:58 +0000401 {
402 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000403 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000404 while(1) {
405 if(c == tok->quote_char) {
406 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
407 current = json_object_new_string(tok->pb->buf);
408 saved_state = json_tokener_state_finish;
409 state = json_tokener_state_eatws;
410 break;
411 } else if(c == '\\') {
412 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
413 saved_state = json_tokener_state_string;
414 state = json_tokener_state_string_escape;
415 break;
416 }
417 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
418 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
419 goto out;
420 }
421 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000422 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000423 break;
424
425 case json_tokener_state_string_escape:
426 switch(c) {
427 case '"':
428 case '\\':
Michael Clarka850f8e2007-03-13 08:26:26 +0000429 case '/':
Michael Clark95f55a72009-04-27 08:16:58 +0000430 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000431 state = saved_state;
432 break;
433 case 'b':
434 case 'n':
435 case 'r':
436 case 't':
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500437 case 'f':
Michael Clark95f55a72009-04-27 08:16:58 +0000438 if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
439 else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
440 else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
441 else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
Eric Haszlakiewicz92f31bd2012-07-29 12:31:07 -0500442 else if(c == 'f') printbuf_memappend_fast(tok->pb, "\f", 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000443 state = saved_state;
444 break;
445 case 'u':
Michael Clarka850f8e2007-03-13 08:26:26 +0000446 tok->ucs_char = 0;
447 tok->st_pos = 0;
Michael Clarkf0d08882007-03-13 08:26:18 +0000448 state = json_tokener_state_escape_unicode;
449 break;
450 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000451 tok->err = json_tokener_error_parse_string;
Michael Clarkf0d08882007-03-13 08:26:18 +0000452 goto out;
453 }
454 break;
455
456 case json_tokener_state_escape_unicode:
Michael Clark95f55a72009-04-27 08:16:58 +0000457 {
Brent Miller126ad952009-08-20 06:50:22 +0000458 unsigned int got_hi_surrogate = 0;
459
460 /* Handle a 4-byte sequence, or two sequences if a surrogate pair */
Michael Clark95f55a72009-04-27 08:16:58 +0000461 while(1) {
462 if(strchr(json_hex_chars, c)) {
463 tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
464 if(tok->st_pos == 4) {
Brent Miller126ad952009-08-20 06:50:22 +0000465 unsigned char unescaped_utf[4];
466
467 if (got_hi_surrogate) {
468 if (IS_LOW_SURROGATE(tok->ucs_char)) {
469 /* Recalculate the ucs_char, then fall thru to process normally */
470 tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char);
471 } else {
472 /* Hi surrogate was not followed by a low surrogate */
473 /* Replace the hi and process the rest normally */
474 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
475 }
476 got_hi_surrogate = 0;
477 }
478
Michael Clark95f55a72009-04-27 08:16:58 +0000479 if (tok->ucs_char < 0x80) {
Brent Miller126ad952009-08-20 06:50:22 +0000480 unescaped_utf[0] = tok->ucs_char;
481 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1);
Michael Clark95f55a72009-04-27 08:16:58 +0000482 } else if (tok->ucs_char < 0x800) {
Brent Miller126ad952009-08-20 06:50:22 +0000483 unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
484 unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
485 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2);
486 } else if (IS_HIGH_SURROGATE(tok->ucs_char)) {
487 /* Got a high surrogate. Remember it and look for the
488 * the beginning of another sequence, which should be the
489 * low surrogate.
490 */
491 got_hi_surrogate = tok->ucs_char;
492 /* Not at end, and the next two chars should be "\u" */
493 if ((tok->char_offset+1 != len) &&
494 (tok->char_offset+2 != len) &&
495 (str[1] == '\\') &&
496 (str[2] == 'u'))
497 {
498 ADVANCE_CHAR(str, tok);
499 ADVANCE_CHAR(str, tok);
500
501 /* Advance to the first char of the next sequence and
502 * continue processing with the next sequence.
503 */
504 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
505 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
506 goto out;
507 }
508 tok->ucs_char = 0;
509 tok->st_pos = 0;
510 continue; /* other json_tokener_state_escape_unicode */
511 } else {
512 /* Got a high surrogate without another sequence following
513 * it. Put a replacement char in for the hi surrogate
514 * and pretend we finished.
515 */
516 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
517 }
518 } else if (IS_LOW_SURROGATE(tok->ucs_char)) {
519 /* Got a low surrogate not preceded by a high */
520 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
521 } else if (tok->ucs_char < 0x10000) {
522 unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
523 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
524 unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
525 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3);
526 } else if (tok->ucs_char < 0x110000) {
527 unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
528 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
529 unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
530 unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
531 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4);
Michael Clark95f55a72009-04-27 08:16:58 +0000532 } else {
Brent Miller126ad952009-08-20 06:50:22 +0000533 /* Don't know what we got--insert the replacement char */
534 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
535 }
Michael Clark95f55a72009-04-27 08:16:58 +0000536 state = saved_state;
537 break;
538 }
539 } else {
540 tok->err = json_tokener_error_parse_string;
541 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000542 }
543 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
544 if (got_hi_surrogate) /* Clean up any pending chars */
545 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
Michael Clark95f55a72009-04-27 08:16:58 +0000546 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000547 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000548 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000549 }
550 break;
551
552 case json_tokener_state_boolean:
Michael Clark95f55a72009-04-27 08:16:58 +0000553 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000554 if(strncasecmp(json_true_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000555 json_min(tok->st_pos+1, strlen(json_true_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000556 if(tok->st_pos == strlen(json_true_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000557 current = json_object_new_boolean(1);
558 saved_state = json_tokener_state_finish;
559 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000560 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000561 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000562 } else if(strncasecmp(json_false_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000563 json_min(tok->st_pos+1, strlen(json_false_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000564 if(tok->st_pos == strlen(json_false_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000565 current = json_object_new_boolean(0);
566 saved_state = json_tokener_state_finish;
567 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000568 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000569 }
570 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000571 tok->err = json_tokener_error_parse_boolean;
Michael Clarkf0d08882007-03-13 08:26:18 +0000572 goto out;
573 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000574 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000575 break;
576
577 case json_tokener_state_number:
Michael Clark95f55a72009-04-27 08:16:58 +0000578 {
579 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000580 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000581 int case_len=0;
582 while(c && strchr(json_number_chars, c)) {
583 ++case_len;
Eric Haszlakiewiczf931f612012-04-24 22:17:13 -0500584 if(c == '.' || c == 'e' || c == 'E')
585 tok->is_double = 1;
Michael Clark95f55a72009-04-27 08:16:58 +0000586 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
587 printbuf_memappend_fast(tok->pb, case_start, case_len);
588 goto out;
589 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000590 }
Michael Clark95f55a72009-04-27 08:16:58 +0000591 if (case_len>0)
592 printbuf_memappend_fast(tok->pb, case_start, case_len);
593 }
594 {
Michael Clarkc4dceae2010-10-06 16:39:20 +0000595 int64_t num64;
596 double numd;
597 if (!tok->is_double && json_parse_int64(tok->pb->buf, &num64) == 0) {
ehaszla252669c2010-12-07 18:15:35 +0000598 current = json_object_new_int64(num64);
Remi Collet16a4a322012-11-27 11:06:49 +0100599 } else if(tok->is_double && json_parse_double(tok->pb->buf, &numd) == 0) {
Michael Clark95f55a72009-04-27 08:16:58 +0000600 current = json_object_new_double(numd);
601 } else {
602 tok->err = json_tokener_error_parse_number;
603 goto out;
604 }
605 saved_state = json_tokener_state_finish;
606 state = json_tokener_state_eatws;
607 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000608 }
609 break;
610
611 case json_tokener_state_array:
612 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000613 saved_state = json_tokener_state_finish;
614 state = json_tokener_state_eatws;
615 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000616 if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
617 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000618 goto out;
619 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000620 state = json_tokener_state_array_add;
621 tok->depth++;
622 json_tokener_reset_level(tok, tok->depth);
623 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000624 }
625 break;
626
Michael Clarka850f8e2007-03-13 08:26:26 +0000627 case json_tokener_state_array_add:
628 json_object_array_add(current, obj);
629 saved_state = json_tokener_state_array_sep;
630 state = json_tokener_state_eatws;
631 goto redo_char;
632
Michael Clarkf0d08882007-03-13 08:26:18 +0000633 case json_tokener_state_array_sep:
634 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000635 saved_state = json_tokener_state_finish;
636 state = json_tokener_state_eatws;
637 } else if(c == ',') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000638 saved_state = json_tokener_state_array;
639 state = json_tokener_state_eatws;
640 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000641 tok->err = json_tokener_error_parse_array;
642 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000643 }
644 break;
645
Michael Clarkf0d08882007-03-13 08:26:18 +0000646 case json_tokener_state_object_field_start:
647 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000648 saved_state = json_tokener_state_finish;
649 state = json_tokener_state_eatws;
650 } else if (c == '"' || c == '\'') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000651 tok->quote_char = c;
652 printbuf_reset(tok->pb);
Michael Clarkf0d08882007-03-13 08:26:18 +0000653 state = json_tokener_state_object_field;
Michael Clark0370baa2007-03-13 08:26:22 +0000654 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000655 tok->err = json_tokener_error_parse_object_key_name;
Michael Clark0370baa2007-03-13 08:26:22 +0000656 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000657 }
658 break;
659
660 case json_tokener_state_object_field:
Michael Clark95f55a72009-04-27 08:16:58 +0000661 {
662 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000663 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000664 while(1) {
665 if(c == tok->quote_char) {
666 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
667 obj_field_name = strdup(tok->pb->buf);
668 saved_state = json_tokener_state_object_field_end;
669 state = json_tokener_state_eatws;
670 break;
671 } else if(c == '\\') {
672 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
673 saved_state = json_tokener_state_object_field;
674 state = json_tokener_state_string_escape;
675 break;
676 }
677 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
678 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
679 goto out;
680 }
681 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000682 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000683 break;
684
685 case json_tokener_state_object_field_end:
686 if(c == ':') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000687 saved_state = json_tokener_state_object_value;
688 state = json_tokener_state_eatws;
689 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000690 tok->err = json_tokener_error_parse_object_key_sep;
691 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000692 }
693 break;
694
695 case json_tokener_state_object_value:
Michael Clarka850f8e2007-03-13 08:26:26 +0000696 if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
697 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000698 goto out;
699 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000700 state = json_tokener_state_object_value_add;
701 tok->depth++;
702 json_tokener_reset_level(tok, tok->depth);
703 goto redo_char;
704
705 case json_tokener_state_object_value_add:
Michael Clarkf0d08882007-03-13 08:26:18 +0000706 json_object_object_add(current, obj_field_name, obj);
707 free(obj_field_name);
708 obj_field_name = NULL;
709 saved_state = json_tokener_state_object_sep;
710 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000711 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000712
713 case json_tokener_state_object_sep:
714 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000715 saved_state = json_tokener_state_finish;
716 state = json_tokener_state_eatws;
717 } else if(c == ',') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000718 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000719 state = json_tokener_state_eatws;
720 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000721 tok->err = json_tokener_error_parse_object_value_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000722 goto out;
723 }
724 break;
725
726 }
Michael Clark95f55a72009-04-27 08:16:58 +0000727 if (!ADVANCE_CHAR(str, tok))
728 goto out;
729 } /* while(POP_CHAR) */
Michael Clarkf0d08882007-03-13 08:26:18 +0000730
731 out:
Michael Clark95f55a72009-04-27 08:16:58 +0000732 if (!c) { /* We hit an eof char (0) */
733 if(state != json_tokener_state_finish &&
734 saved_state != json_tokener_state_finish)
735 tok->err = json_tokener_error_parse_eof;
736 }
737
Remi Colleta01b6592012-12-13 09:47:33 +0100738#ifdef HAVE_SETLOCALE
739 setlocale(LC_NUMERIC, oldlocale);
740 if (oldlocale) free(oldlocale);
741#endif
742
Eric Haszlakiewiczd809fa62012-03-31 22:53:43 -0500743 if (tok->err == json_tokener_success)
744 {
745 json_object *ret = json_object_get(current);
746 int ii;
747
748 /* Partially reset, so we parse additional objects on subsequent calls. */
749 for(ii = tok->depth; ii >= 0; ii--)
750 json_tokener_reset_level(tok, ii);
751 return ret;
752 }
753
Michael Clarkdfaf6702007-10-25 02:26:00 +0000754 MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
Michael Clarka850f8e2007-03-13 08:26:26 +0000755 json_tokener_errors[tok->err], tok->char_offset);
756 return NULL;
Michael Clarkf0d08882007-03-13 08:26:18 +0000757}