blob: 04950b5081360b4ec135d29926b35b0f324dcf58 [file] [log] [blame]
Michael Clarkf0d08882007-03-13 08:26:18 +00001/*
Michael Clarka850f8e2007-03-13 08:26:26 +00002 * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
Michael Clarkf0d08882007-03-13 08:26:18 +00003 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00004 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
Michael Clarkf0d08882007-03-13 08:26:18 +00005 * Michael Clark <michael@metaparadigm.com>
6 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00007 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the MIT license. See COPYING for details.
Michael Clarkf0d08882007-03-13 08:26:18 +00009 *
Michael Clark95f55a72009-04-27 08:16:58 +000010 *
11 * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
12 * The copyrights to the contents of this file are licensed under the MIT License
13 * (http://www.opensource.org/licenses/mit-license.php)
Michael Clarkf0d08882007-03-13 08:26:18 +000014 */
15
Michael Clark4504df72007-03-13 08:26:20 +000016#include "config.h"
17
Michael Clarkf0d08882007-03-13 08:26:18 +000018#include <stdio.h>
19#include <stdlib.h>
Michael Clarkc8f4a6e2007-12-07 02:44:24 +000020#include <stddef.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000021#include <ctype.h>
22#include <string.h>
Michael Clarkc4dceae2010-10-06 16:39:20 +000023#include <limits.h>
Michael Clarkf0d08882007-03-13 08:26:18 +000024
25#include "bits.h"
26#include "debug.h"
27#include "printbuf.h"
28#include "arraylist.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000029#include "json_inttypes.h"
Michael Clarkf0d08882007-03-13 08:26:18 +000030#include "json_object.h"
31#include "json_tokener.h"
Michael Clarkc4dceae2010-10-06 16:39:20 +000032#include "json_util.h"
Michael Clarka850f8e2007-03-13 08:26:26 +000033
Michael Clark837240f2007-03-13 08:26:25 +000034#if !HAVE_STRNCASECMP && defined(_MSC_VER)
35 /* MSC has the version as _strnicmp */
36# define strncasecmp _strnicmp
37#elif !HAVE_STRNCASECMP
38# error You do not have strncasecmp on your system.
39#endif /* HAVE_STRNCASECMP */
40
41
Michael Clarka850f8e2007-03-13 08:26:26 +000042static const char* json_null_str = "null";
43static const char* json_true_str = "true";
44static const char* json_false_str = "false";
Michael Clarkf0d08882007-03-13 08:26:18 +000045
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060046// XXX after v0.10 this array will become static:
Michael Clarka850f8e2007-03-13 08:26:26 +000047const char* json_tokener_errors[] = {
48 "success",
49 "continue",
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -050050 "nesting too deep",
Michael Clarka850f8e2007-03-13 08:26:26 +000051 "unexpected end of data",
52 "unexpected character",
53 "null expected",
54 "boolean expected",
55 "number expected",
56 "array value separator ',' expected",
57 "quoted object property name expected",
58 "object property name separator ':' expected",
59 "object value separator ',' expected",
60 "invalid string sequence",
61 "expected comment",
62};
63
Eric Haszlakiewicz2f9091f2012-02-22 08:24:40 -060064const char *json_tokener_error_desc(enum json_tokener_error jerr)
65{
66 if (jerr < 0 || jerr > sizeof(json_tokener_errors))
67 return "Unknown error, invalid json_tokener_error value passed to json_tokener_error_desc()";
68 return json_tokener_errors[jerr];
69}
70
71enum json_tokener_error json_tokener_get_error(json_tokener *tok)
72{
73 return tok->err;
74}
75
Brent Miller126ad952009-08-20 06:50:22 +000076/* Stuff for decoding unicode sequences */
77#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
78#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
79#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
80static unsigned char utf8_replacement_char[3] = { 0xEF, 0xBF, 0xBD };
81
Michael Clarka850f8e2007-03-13 08:26:26 +000082
Michael Clarke8de0782009-02-25 01:45:00 +000083struct json_tokener* json_tokener_new(void)
Michael Clarkf0d08882007-03-13 08:26:18 +000084{
Michael Clarkaaec1ef2009-02-25 02:31:32 +000085 struct json_tokener *tok;
86
87 tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
Christopher Watford543bb142009-07-08 03:46:10 +000088 if (!tok) return NULL;
Michael Clarka850f8e2007-03-13 08:26:26 +000089 tok->pb = printbuf_new();
90 json_tokener_reset(tok);
91 return tok;
92}
93
94void json_tokener_free(struct json_tokener *tok)
95{
96 json_tokener_reset(tok);
97 if(tok) printbuf_free(tok->pb);
98 free(tok);
99}
100
101static void json_tokener_reset_level(struct json_tokener *tok, int depth)
102{
103 tok->stack[depth].state = json_tokener_state_eatws;
104 tok->stack[depth].saved_state = json_tokener_state_start;
105 json_object_put(tok->stack[depth].current);
106 tok->stack[depth].current = NULL;
107 free(tok->stack[depth].obj_field_name);
108 tok->stack[depth].obj_field_name = NULL;
109}
110
111void json_tokener_reset(struct json_tokener *tok)
112{
113 int i;
Michael Clark22dee7c2009-02-25 01:51:40 +0000114 if (!tok)
115 return;
116
Michael Clarka850f8e2007-03-13 08:26:26 +0000117 for(i = tok->depth; i >= 0; i--)
118 json_tokener_reset_level(tok, i);
119 tok->depth = 0;
120 tok->err = json_tokener_success;
121}
122
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000123struct json_object* json_tokener_parse(const char *str)
Michael Clarka850f8e2007-03-13 08:26:26 +0000124{
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500125 enum json_tokener_error jerr_ignored;
126 struct json_object* obj;
127 obj = json_tokener_parse_verbose(str, &jerr_ignored);
128 return obj;
Michael Clarkf0d08882007-03-13 08:26:18 +0000129}
130
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000131struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
132{
133 struct json_tokener* tok;
134 struct json_object* obj;
135
136 tok = json_tokener_new();
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500137 if (!tok)
138 return NULL;
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000139 obj = json_tokener_parse_ex(tok, str, -1);
140 *error = tok->err;
141 if(tok->err != json_tokener_success) {
Eric Haszlakiewicz3620cba2012-03-31 12:52:59 -0500142 if (obj != NULL)
143 json_object_put(obj);
Jehiah Czebotara503ee82010-12-08 03:52:07 +0000144 obj = NULL;
145 }
146
147 json_tokener_free(tok);
148 return obj;
149}
150
Michael Clarka850f8e2007-03-13 08:26:26 +0000151
Michael Clark4504df72007-03-13 08:26:20 +0000152#if !HAVE_STRNDUP
153/* CAW: compliant version of strndup() */
154char* strndup(const char* str, size_t n)
155{
Michael Clarka850f8e2007-03-13 08:26:26 +0000156 if(str) {
157 size_t len = strlen(str);
Michael Clark7fb9b032009-07-25 00:13:44 +0000158 size_t nn = json_min(len,n);
Michael Clarka850f8e2007-03-13 08:26:26 +0000159 char* s = (char*)malloc(sizeof(char) * (nn + 1));
Michael Clark4504df72007-03-13 08:26:20 +0000160
Michael Clarka850f8e2007-03-13 08:26:26 +0000161 if(s) {
162 memcpy(s, str, nn);
163 s[nn] = '\0';
164 }
Michael Clark4504df72007-03-13 08:26:20 +0000165
Michael Clarka850f8e2007-03-13 08:26:26 +0000166 return s;
167 }
Michael Clark4504df72007-03-13 08:26:20 +0000168
Michael Clarka850f8e2007-03-13 08:26:26 +0000169 return NULL;
Michael Clark4504df72007-03-13 08:26:20 +0000170}
171#endif
172
Michael Clarka850f8e2007-03-13 08:26:26 +0000173
174#define state tok->stack[tok->depth].state
175#define saved_state tok->stack[tok->depth].saved_state
176#define current tok->stack[tok->depth].current
177#define obj_field_name tok->stack[tok->depth].obj_field_name
178
Michael Clark95f55a72009-04-27 08:16:58 +0000179/* Optimization:
180 * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
181 * iterating character-by character. A large performance boost is
182 * achieved by using tighter loops to locally handle units such as
183 * comments and strings. Loops that handle an entire token within
184 * their scope also gather entire strings and pass them to
185 * printbuf_memappend() in a single call, rather than calling
186 * printbuf_memappend() one char at a time.
187 *
188 * POP_CHAR() and ADVANCE_CHAR() macros are used for code that is
189 * common to both the main loop and the tighter loops.
190 */
191
192/* POP_CHAR(dest, tok) macro:
193 * Not really a pop()...peeks at the current char and stores it in dest.
194 * Returns 1 on success, sets tok->err and returns 0 if no more chars.
195 * Implicit inputs: str, len vars
196 */
197#define POP_CHAR(dest, tok) \
198 (((tok)->char_offset == len) ? \
199 (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
200 (((tok)->err = json_tokener_success), 0) \
201 : \
202 (((tok)->err = json_tokener_continue), 0) \
203 ) : \
204 (((dest) = *str), 1) \
205 )
206
207/* ADVANCE_CHAR() macro:
208 * Incrementes str & tok->char_offset.
209 * For convenience of existing conditionals, returns the old value of c (0 on eof)
210 * Implicit inputs: c var
211 */
212#define ADVANCE_CHAR(str, tok) \
213 ( ++(str), ((tok)->char_offset)++, c)
214
Brent Miller126ad952009-08-20 06:50:22 +0000215
Michael Clark95f55a72009-04-27 08:16:58 +0000216/* End optimization macro defs */
217
218
Michael Clarka850f8e2007-03-13 08:26:26 +0000219struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000220 const char *str, int len)
Michael Clarkf0d08882007-03-13 08:26:18 +0000221{
Michael Clarka850f8e2007-03-13 08:26:26 +0000222 struct json_object *obj = NULL;
Michael Clark95f55a72009-04-27 08:16:58 +0000223 char c = '\1';
Michael Clarkf0d08882007-03-13 08:26:18 +0000224
Michael Clarka850f8e2007-03-13 08:26:26 +0000225 tok->char_offset = 0;
226 tok->err = json_tokener_success;
Michael Clarkf0d08882007-03-13 08:26:18 +0000227
Michael Clark95f55a72009-04-27 08:16:58 +0000228 while (POP_CHAR(c, tok)) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000229
Michael Clarka850f8e2007-03-13 08:26:26 +0000230 redo_char:
Michael Clarkf0d08882007-03-13 08:26:18 +0000231 switch(state) {
232
233 case json_tokener_state_eatws:
Michael Clark95f55a72009-04-27 08:16:58 +0000234 /* Advance until we change state */
ehaszla252669c2010-12-07 18:15:35 +0000235 while (isspace((int)c)) {
Michael Clark95f55a72009-04-27 08:16:58 +0000236 if ((!ADVANCE_CHAR(str, tok)) || (!POP_CHAR(c, tok)))
237 goto out;
238 }
239 if(c == '/') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000240 printbuf_reset(tok->pb);
Michael Clark95f55a72009-04-27 08:16:58 +0000241 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000242 state = json_tokener_state_comment_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000243 } else {
244 state = saved_state;
Michael Clarka850f8e2007-03-13 08:26:26 +0000245 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000246 }
247 break;
248
249 case json_tokener_state_start:
250 switch(c) {
251 case '{':
252 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000253 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000254 current = json_object_new_object();
Michael Clarkf0d08882007-03-13 08:26:18 +0000255 break;
256 case '[':
257 state = json_tokener_state_eatws;
258 saved_state = json_tokener_state_array;
259 current = json_object_new_array();
Michael Clarkf0d08882007-03-13 08:26:18 +0000260 break;
261 case 'N':
262 case 'n':
263 state = json_tokener_state_null;
Michael Clarka850f8e2007-03-13 08:26:26 +0000264 printbuf_reset(tok->pb);
265 tok->st_pos = 0;
266 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000267 case '"':
268 case '\'':
Michael Clarkf0d08882007-03-13 08:26:18 +0000269 state = json_tokener_state_string;
Michael Clarka850f8e2007-03-13 08:26:26 +0000270 printbuf_reset(tok->pb);
271 tok->quote_char = c;
Michael Clarkf0d08882007-03-13 08:26:18 +0000272 break;
273 case 'T':
274 case 't':
275 case 'F':
276 case 'f':
277 state = json_tokener_state_boolean;
Michael Clarka850f8e2007-03-13 08:26:26 +0000278 printbuf_reset(tok->pb);
279 tok->st_pos = 0;
280 goto redo_char;
Michael Clark4504df72007-03-13 08:26:20 +0000281#if defined(__GNUC__)
282 case '0' ... '9':
283#else
284 case '0':
285 case '1':
286 case '2':
287 case '3':
288 case '4':
289 case '5':
290 case '6':
291 case '7':
292 case '8':
293 case '9':
294#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000295 case '-':
Michael Clarkf0d08882007-03-13 08:26:18 +0000296 state = json_tokener_state_number;
Michael Clarka850f8e2007-03-13 08:26:26 +0000297 printbuf_reset(tok->pb);
298 tok->is_double = 0;
299 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000300 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000301 tok->err = json_tokener_error_parse_unexpected;
Michael Clarkf0d08882007-03-13 08:26:18 +0000302 goto out;
303 }
304 break;
305
306 case json_tokener_state_finish:
Michael Clarka850f8e2007-03-13 08:26:26 +0000307 if(tok->depth == 0) goto out;
308 obj = json_object_get(current);
309 json_tokener_reset_level(tok, tok->depth);
310 tok->depth--;
311 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000312
313 case json_tokener_state_null:
Michael Clark95f55a72009-04-27 08:16:58 +0000314 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000315 if(strncasecmp(json_null_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000316 json_min(tok->st_pos+1, strlen(json_null_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000317 if(tok->st_pos == strlen(json_null_str)) {
318 current = NULL;
319 saved_state = json_tokener_state_finish;
320 state = json_tokener_state_eatws;
321 goto redo_char;
322 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000323 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000324 tok->err = json_tokener_error_parse_null;
325 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000326 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000327 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000328 break;
329
330 case json_tokener_state_comment_start:
331 if(c == '*') {
332 state = json_tokener_state_comment;
333 } else if(c == '/') {
334 state = json_tokener_state_comment_eol;
335 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000336 tok->err = json_tokener_error_parse_comment;
Michael Clarkf0d08882007-03-13 08:26:18 +0000337 goto out;
338 }
Michael Clark95f55a72009-04-27 08:16:58 +0000339 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000340 break;
341
342 case json_tokener_state_comment:
Michael Clark95f55a72009-04-27 08:16:58 +0000343 {
344 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000345 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000346 while(c != '*') {
347 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
348 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
349 goto out;
350 }
351 }
352 printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
353 state = json_tokener_state_comment_end;
354 }
355 break;
Michael Clarkf0d08882007-03-13 08:26:18 +0000356
357 case json_tokener_state_comment_eol:
Michael Clark95f55a72009-04-27 08:16:58 +0000358 {
359 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000360 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000361 while(c != '\n') {
362 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
363 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
364 goto out;
365 }
366 }
367 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
Michael Clarkdfaf6702007-10-25 02:26:00 +0000368 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000369 state = json_tokener_state_eatws;
370 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000371 break;
372
373 case json_tokener_state_comment_end:
Michael Clark95f55a72009-04-27 08:16:58 +0000374 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000375 if(c == '/') {
Michael Clarkdfaf6702007-10-25 02:26:00 +0000376 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
Michael Clarkf0d08882007-03-13 08:26:18 +0000377 state = json_tokener_state_eatws;
378 } else {
379 state = json_tokener_state_comment;
380 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000381 break;
382
383 case json_tokener_state_string:
Michael Clark95f55a72009-04-27 08:16:58 +0000384 {
385 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000386 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000387 while(1) {
388 if(c == tok->quote_char) {
389 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
390 current = json_object_new_string(tok->pb->buf);
391 saved_state = json_tokener_state_finish;
392 state = json_tokener_state_eatws;
393 break;
394 } else if(c == '\\') {
395 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
396 saved_state = json_tokener_state_string;
397 state = json_tokener_state_string_escape;
398 break;
399 }
400 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
401 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
402 goto out;
403 }
404 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000405 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000406 break;
407
408 case json_tokener_state_string_escape:
409 switch(c) {
410 case '"':
411 case '\\':
Michael Clarka850f8e2007-03-13 08:26:26 +0000412 case '/':
Michael Clark95f55a72009-04-27 08:16:58 +0000413 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000414 state = saved_state;
415 break;
416 case 'b':
417 case 'n':
418 case 'r':
419 case 't':
Michael Clark95f55a72009-04-27 08:16:58 +0000420 if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
421 else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
422 else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
423 else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000424 state = saved_state;
425 break;
426 case 'u':
Michael Clarka850f8e2007-03-13 08:26:26 +0000427 tok->ucs_char = 0;
428 tok->st_pos = 0;
Michael Clarkf0d08882007-03-13 08:26:18 +0000429 state = json_tokener_state_escape_unicode;
430 break;
431 default:
Michael Clarka850f8e2007-03-13 08:26:26 +0000432 tok->err = json_tokener_error_parse_string;
Michael Clarkf0d08882007-03-13 08:26:18 +0000433 goto out;
434 }
435 break;
436
437 case json_tokener_state_escape_unicode:
Michael Clark95f55a72009-04-27 08:16:58 +0000438 {
Brent Miller126ad952009-08-20 06:50:22 +0000439 unsigned int got_hi_surrogate = 0;
440
441 /* Handle a 4-byte sequence, or two sequences if a surrogate pair */
Michael Clark95f55a72009-04-27 08:16:58 +0000442 while(1) {
443 if(strchr(json_hex_chars, c)) {
444 tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
445 if(tok->st_pos == 4) {
Brent Miller126ad952009-08-20 06:50:22 +0000446 unsigned char unescaped_utf[4];
447
448 if (got_hi_surrogate) {
449 if (IS_LOW_SURROGATE(tok->ucs_char)) {
450 /* Recalculate the ucs_char, then fall thru to process normally */
451 tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char);
452 } else {
453 /* Hi surrogate was not followed by a low surrogate */
454 /* Replace the hi and process the rest normally */
455 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
456 }
457 got_hi_surrogate = 0;
458 }
459
Michael Clark95f55a72009-04-27 08:16:58 +0000460 if (tok->ucs_char < 0x80) {
Brent Miller126ad952009-08-20 06:50:22 +0000461 unescaped_utf[0] = tok->ucs_char;
462 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1);
Michael Clark95f55a72009-04-27 08:16:58 +0000463 } else if (tok->ucs_char < 0x800) {
Brent Miller126ad952009-08-20 06:50:22 +0000464 unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
465 unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
466 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2);
467 } else if (IS_HIGH_SURROGATE(tok->ucs_char)) {
468 /* Got a high surrogate. Remember it and look for the
469 * the beginning of another sequence, which should be the
470 * low surrogate.
471 */
472 got_hi_surrogate = tok->ucs_char;
473 /* Not at end, and the next two chars should be "\u" */
474 if ((tok->char_offset+1 != len) &&
475 (tok->char_offset+2 != len) &&
476 (str[1] == '\\') &&
477 (str[2] == 'u'))
478 {
479 ADVANCE_CHAR(str, tok);
480 ADVANCE_CHAR(str, tok);
481
482 /* Advance to the first char of the next sequence and
483 * continue processing with the next sequence.
484 */
485 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
486 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
487 goto out;
488 }
489 tok->ucs_char = 0;
490 tok->st_pos = 0;
491 continue; /* other json_tokener_state_escape_unicode */
492 } else {
493 /* Got a high surrogate without another sequence following
494 * it. Put a replacement char in for the hi surrogate
495 * and pretend we finished.
496 */
497 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
498 }
499 } else if (IS_LOW_SURROGATE(tok->ucs_char)) {
500 /* Got a low surrogate not preceded by a high */
501 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
502 } else if (tok->ucs_char < 0x10000) {
503 unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
504 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
505 unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
506 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3);
507 } else if (tok->ucs_char < 0x110000) {
508 unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
509 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
510 unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
511 unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
512 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4);
Michael Clark95f55a72009-04-27 08:16:58 +0000513 } else {
Brent Miller126ad952009-08-20 06:50:22 +0000514 /* Don't know what we got--insert the replacement char */
515 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
516 }
Michael Clark95f55a72009-04-27 08:16:58 +0000517 state = saved_state;
518 break;
519 }
520 } else {
521 tok->err = json_tokener_error_parse_string;
522 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000523 }
524 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
525 if (got_hi_surrogate) /* Clean up any pending chars */
526 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
Michael Clark95f55a72009-04-27 08:16:58 +0000527 goto out;
Brent Miller126ad952009-08-20 06:50:22 +0000528 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000529 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000530 }
531 break;
532
533 case json_tokener_state_boolean:
Michael Clark95f55a72009-04-27 08:16:58 +0000534 printbuf_memappend_fast(tok->pb, &c, 1);
Michael Clarka850f8e2007-03-13 08:26:26 +0000535 if(strncasecmp(json_true_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000536 json_min(tok->st_pos+1, strlen(json_true_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000537 if(tok->st_pos == strlen(json_true_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000538 current = json_object_new_boolean(1);
539 saved_state = json_tokener_state_finish;
540 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000541 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000542 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000543 } else if(strncasecmp(json_false_str, tok->pb->buf,
Michael Clark7fb9b032009-07-25 00:13:44 +0000544 json_min(tok->st_pos+1, strlen(json_false_str))) == 0) {
Michael Clarka850f8e2007-03-13 08:26:26 +0000545 if(tok->st_pos == strlen(json_false_str)) {
Michael Clarkf0d08882007-03-13 08:26:18 +0000546 current = json_object_new_boolean(0);
547 saved_state = json_tokener_state_finish;
548 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000549 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000550 }
551 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000552 tok->err = json_tokener_error_parse_boolean;
Michael Clarkf0d08882007-03-13 08:26:18 +0000553 goto out;
554 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000555 tok->st_pos++;
Michael Clarkf0d08882007-03-13 08:26:18 +0000556 break;
557
558 case json_tokener_state_number:
Michael Clark95f55a72009-04-27 08:16:58 +0000559 {
560 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000561 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000562 int case_len=0;
563 while(c && strchr(json_number_chars, c)) {
564 ++case_len;
565 if(c == '.' || c == 'e') tok->is_double = 1;
566 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
567 printbuf_memappend_fast(tok->pb, case_start, case_len);
568 goto out;
569 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000570 }
Michael Clark95f55a72009-04-27 08:16:58 +0000571 if (case_len>0)
572 printbuf_memappend_fast(tok->pb, case_start, case_len);
573 }
574 {
Michael Clarkc4dceae2010-10-06 16:39:20 +0000575 int64_t num64;
576 double numd;
577 if (!tok->is_double && json_parse_int64(tok->pb->buf, &num64) == 0) {
ehaszla252669c2010-12-07 18:15:35 +0000578 current = json_object_new_int64(num64);
Michael Clarkc4dceae2010-10-06 16:39:20 +0000579 } else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
Michael Clark95f55a72009-04-27 08:16:58 +0000580 current = json_object_new_double(numd);
581 } else {
582 tok->err = json_tokener_error_parse_number;
583 goto out;
584 }
585 saved_state = json_tokener_state_finish;
586 state = json_tokener_state_eatws;
587 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000588 }
589 break;
590
591 case json_tokener_state_array:
592 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000593 saved_state = json_tokener_state_finish;
594 state = json_tokener_state_eatws;
595 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000596 if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
597 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000598 goto out;
599 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000600 state = json_tokener_state_array_add;
601 tok->depth++;
602 json_tokener_reset_level(tok, tok->depth);
603 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000604 }
605 break;
606
Michael Clarka850f8e2007-03-13 08:26:26 +0000607 case json_tokener_state_array_add:
608 json_object_array_add(current, obj);
609 saved_state = json_tokener_state_array_sep;
610 state = json_tokener_state_eatws;
611 goto redo_char;
612
Michael Clarkf0d08882007-03-13 08:26:18 +0000613 case json_tokener_state_array_sep:
614 if(c == ']') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000615 saved_state = json_tokener_state_finish;
616 state = json_tokener_state_eatws;
617 } else if(c == ',') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000618 saved_state = json_tokener_state_array;
619 state = json_tokener_state_eatws;
620 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000621 tok->err = json_tokener_error_parse_array;
622 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000623 }
624 break;
625
Michael Clarkf0d08882007-03-13 08:26:18 +0000626 case json_tokener_state_object_field_start:
627 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000628 saved_state = json_tokener_state_finish;
629 state = json_tokener_state_eatws;
630 } else if (c == '"' || c == '\'') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000631 tok->quote_char = c;
632 printbuf_reset(tok->pb);
Michael Clarkf0d08882007-03-13 08:26:18 +0000633 state = json_tokener_state_object_field;
Michael Clark0370baa2007-03-13 08:26:22 +0000634 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000635 tok->err = json_tokener_error_parse_object_key_name;
Michael Clark0370baa2007-03-13 08:26:22 +0000636 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000637 }
638 break;
639
640 case json_tokener_state_object_field:
Michael Clark95f55a72009-04-27 08:16:58 +0000641 {
642 /* Advance until we change state */
Christopher Watfordb1a22ac2009-07-08 04:02:05 +0000643 const char *case_start = str;
Michael Clark95f55a72009-04-27 08:16:58 +0000644 while(1) {
645 if(c == tok->quote_char) {
646 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
647 obj_field_name = strdup(tok->pb->buf);
648 saved_state = json_tokener_state_object_field_end;
649 state = json_tokener_state_eatws;
650 break;
651 } else if(c == '\\') {
652 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
653 saved_state = json_tokener_state_object_field;
654 state = json_tokener_state_string_escape;
655 break;
656 }
657 if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
658 printbuf_memappend_fast(tok->pb, case_start, str-case_start);
659 goto out;
660 }
661 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000662 }
Michael Clarkf0d08882007-03-13 08:26:18 +0000663 break;
664
665 case json_tokener_state_object_field_end:
666 if(c == ':') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000667 saved_state = json_tokener_state_object_value;
668 state = json_tokener_state_eatws;
669 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000670 tok->err = json_tokener_error_parse_object_key_sep;
671 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000672 }
673 break;
674
675 case json_tokener_state_object_value:
Michael Clarka850f8e2007-03-13 08:26:26 +0000676 if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
677 tok->err = json_tokener_error_depth;
Michael Clarkf0d08882007-03-13 08:26:18 +0000678 goto out;
679 }
Michael Clarka850f8e2007-03-13 08:26:26 +0000680 state = json_tokener_state_object_value_add;
681 tok->depth++;
682 json_tokener_reset_level(tok, tok->depth);
683 goto redo_char;
684
685 case json_tokener_state_object_value_add:
Michael Clarkf0d08882007-03-13 08:26:18 +0000686 json_object_object_add(current, obj_field_name, obj);
687 free(obj_field_name);
688 obj_field_name = NULL;
689 saved_state = json_tokener_state_object_sep;
690 state = json_tokener_state_eatws;
Michael Clarka850f8e2007-03-13 08:26:26 +0000691 goto redo_char;
Michael Clarkf0d08882007-03-13 08:26:18 +0000692
693 case json_tokener_state_object_sep:
694 if(c == '}') {
Michael Clarkf0d08882007-03-13 08:26:18 +0000695 saved_state = json_tokener_state_finish;
696 state = json_tokener_state_eatws;
697 } else if(c == ',') {
Michael Clarka850f8e2007-03-13 08:26:26 +0000698 saved_state = json_tokener_state_object_field_start;
Michael Clarkf0d08882007-03-13 08:26:18 +0000699 state = json_tokener_state_eatws;
700 } else {
Michael Clarka850f8e2007-03-13 08:26:26 +0000701 tok->err = json_tokener_error_parse_object_value_sep;
Michael Clarkf0d08882007-03-13 08:26:18 +0000702 goto out;
703 }
704 break;
705
706 }
Michael Clark95f55a72009-04-27 08:16:58 +0000707 if (!ADVANCE_CHAR(str, tok))
708 goto out;
709 } /* while(POP_CHAR) */
Michael Clarkf0d08882007-03-13 08:26:18 +0000710
711 out:
Michael Clark95f55a72009-04-27 08:16:58 +0000712 if (!c) { /* We hit an eof char (0) */
713 if(state != json_tokener_state_finish &&
714 saved_state != json_tokener_state_finish)
715 tok->err = json_tokener_error_parse_eof;
716 }
717
Michael Clarka850f8e2007-03-13 08:26:26 +0000718 if(tok->err == json_tokener_success) return json_object_get(current);
Michael Clarkdfaf6702007-10-25 02:26:00 +0000719 MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
Michael Clarka850f8e2007-03-13 08:26:26 +0000720 json_tokener_errors[tok->err], tok->char_offset);
721 return NULL;
Michael Clarkf0d08882007-03-13 08:26:18 +0000722}