blob: 6a1ea893a615bf8ee018cbc447007c6179085aae [file] [log] [blame]
Michael Clarkf0d08882007-03-13 08:26:18 +00001/*
Michael Clarkf6a6e482007-03-13 08:26:23 +00002 * $Id: json_tokener.c,v 1.18 2006/01/26 02:16:28 mclark Exp $
Michael Clarkf0d08882007-03-13 08:26:18 +00003 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00004 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
Michael Clarkf0d08882007-03-13 08:26:18 +00005 * Michael Clark <michael@metaparadigm.com>
6 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00007 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the MIT license. See COPYING for details.
Michael Clarkf0d08882007-03-13 08:26:18 +00009 *
10 */
11
Michael Clark4504df72007-03-13 08:26:20 +000012#include "config.h"
13
Michael Clarkf0d08882007-03-13 08:26:18 +000014#include <stdio.h>
15#include <stdlib.h>
16#include <ctype.h>
17#include <string.h>
18
19#include "bits.h"
20#include "debug.h"
21#include "printbuf.h"
22#include "arraylist.h"
23#include "json_object.h"
24#include "json_tokener.h"
25
Michael Clarkf0d08882007-03-13 08:26:18 +000026static struct json_object* json_tokener_do_parse(struct json_tokener *this);
27
28struct json_object* json_tokener_parse(char * s)
29{
30 struct json_tokener tok;
31 struct json_object* obj;
32
33 tok.source = s;
34 tok.pos = 0;
35 tok.pb = printbuf_new();
36 obj = json_tokener_do_parse(&tok);
37 printbuf_free(tok.pb);
38 return obj;
39}
40
Michael Clark4504df72007-03-13 08:26:20 +000041#if !HAVE_STRNDUP
42/* CAW: compliant version of strndup() */
43char* strndup(const char* str, size_t n)
44{
45 if(str) {
46 size_t len = strlen(str);
47 size_t nn = min(len,n);
48 char* s = (char*)malloc(sizeof(char) * (nn + 1));
49
50 if(s) {
51 memcpy(s, str, nn);
52 s[nn] = '\0';
53 }
54
55 return s;
56 }
57
58 return NULL;
59}
60#endif
61
Michael Clarkf0d08882007-03-13 08:26:18 +000062static struct json_object* json_tokener_do_parse(struct json_tokener *this)
63{
64 enum json_tokener_state state, saved_state;
65 enum json_tokener_error err = json_tokener_success;
66 struct json_object *current = NULL, *obj;
67 char *obj_field_name = NULL;
68 char quote_char;
69 int deemed_double, start_offset;
Michael Clark4504df72007-03-13 08:26:20 +000070 char c;
Michael Clarkf0d08882007-03-13 08:26:18 +000071
72 state = json_tokener_state_eatws;
73 saved_state = json_tokener_state_start;
74
Michael Clarkf0d08882007-03-13 08:26:18 +000075 do {
76 c = this->source[this->pos];
77 switch(state) {
78
79 case json_tokener_state_eatws:
80 if(isspace(c)) {
81 this->pos++;
82 } else if(c == '/') {
83 state = json_tokener_state_comment_start;
84 start_offset = this->pos++;
85 } else {
86 state = saved_state;
87 }
88 break;
89
90 case json_tokener_state_start:
91 switch(c) {
92 case '{':
93 state = json_tokener_state_eatws;
94 saved_state = json_tokener_state_object;
95 current = json_object_new_object();
96 this->pos++;
97 break;
98 case '[':
99 state = json_tokener_state_eatws;
100 saved_state = json_tokener_state_array;
101 current = json_object_new_array();
102 this->pos++;
103 break;
104 case 'N':
105 case 'n':
106 state = json_tokener_state_null;
107 start_offset = this->pos++;
108 break;
109 case '"':
110 case '\'':
111 quote_char = c;
112 printbuf_reset(this->pb);
113 state = json_tokener_state_string;
114 start_offset = ++this->pos;
115 break;
116 case 'T':
117 case 't':
118 case 'F':
119 case 'f':
120 state = json_tokener_state_boolean;
121 start_offset = this->pos++;
122 break;
Michael Clark4504df72007-03-13 08:26:20 +0000123#if defined(__GNUC__)
124 case '0' ... '9':
125#else
126 case '0':
127 case '1':
128 case '2':
129 case '3':
130 case '4':
131 case '5':
132 case '6':
133 case '7':
134 case '8':
135 case '9':
136#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000137 case '-':
138 deemed_double = 0;
139 state = json_tokener_state_number;
140 start_offset = this->pos++;
141 break;
142 default:
143 err = json_tokener_error_parse_unexpected;
144 goto out;
145 }
146 break;
147
148 case json_tokener_state_finish:
149 goto out;
150
151 case json_tokener_state_null:
152 if(strncasecmp("null", this->source + start_offset,
153 this->pos - start_offset))
154 return error_ptr(-json_tokener_error_parse_null);
155 if(this->pos - start_offset == 4) {
156 current = NULL;
157 saved_state = json_tokener_state_finish;
158 state = json_tokener_state_eatws;
159 } else {
160 this->pos++;
161 }
162 break;
163
164 case json_tokener_state_comment_start:
165 if(c == '*') {
166 state = json_tokener_state_comment;
167 } else if(c == '/') {
168 state = json_tokener_state_comment_eol;
169 } else {
170 err = json_tokener_error_parse_comment;
171 goto out;
172 }
173 this->pos++;
174 break;
175
176 case json_tokener_state_comment:
177 if(c == '*') state = json_tokener_state_comment_end;
178 this->pos++;
179 break;
180
181 case json_tokener_state_comment_eol:
182 if(c == '\n') {
183 if(mc_get_debug()) {
184 char *tmp = strndup(this->source + start_offset,
185 this->pos - start_offset);
186 mc_debug("json_tokener_comment: %s\n", tmp);
187 free(tmp);
188 }
189 state = json_tokener_state_eatws;
190 }
191 this->pos++;
192 break;
193
194 case json_tokener_state_comment_end:
195 if(c == '/') {
196 if(mc_get_debug()) {
197 char *tmp = strndup(this->source + start_offset,
198 this->pos - start_offset + 1);
199 mc_debug("json_tokener_comment: %s\n", tmp);
200 free(tmp);
201 }
202 state = json_tokener_state_eatws;
203 } else {
204 state = json_tokener_state_comment;
205 }
206 this->pos++;
207 break;
208
209 case json_tokener_state_string:
210 if(c == quote_char) {
211 printbuf_memappend(this->pb, this->source + start_offset,
212 this->pos - start_offset);
213 current = json_object_new_string(this->pb->buf);
214 saved_state = json_tokener_state_finish;
215 state = json_tokener_state_eatws;
216 } else if(c == '\\') {
217 saved_state = json_tokener_state_string;
218 state = json_tokener_state_string_escape;
219 }
220 this->pos++;
221 break;
222
223 case json_tokener_state_string_escape:
224 switch(c) {
225 case '"':
226 case '\\':
227 printbuf_memappend(this->pb, this->source + start_offset,
228 this->pos - start_offset - 1);
229 start_offset = this->pos++;
230 state = saved_state;
231 break;
232 case 'b':
233 case 'n':
234 case 'r':
235 case 't':
236 printbuf_memappend(this->pb, this->source + start_offset,
237 this->pos - start_offset - 1);
238 if(c == 'b') printbuf_memappend(this->pb, "\b", 1);
239 else if(c == 'n') printbuf_memappend(this->pb, "\n", 1);
240 else if(c == 'r') printbuf_memappend(this->pb, "\r", 1);
241 else if(c == 't') printbuf_memappend(this->pb, "\t", 1);
242 start_offset = ++this->pos;
243 state = saved_state;
244 break;
245 case 'u':
246 printbuf_memappend(this->pb, this->source + start_offset,
247 this->pos - start_offset - 1);
248 start_offset = ++this->pos;
249 state = json_tokener_state_escape_unicode;
250 break;
251 default:
252 err = json_tokener_error_parse_string;
253 goto out;
254 }
255 break;
256
257 case json_tokener_state_escape_unicode:
258 if(strchr(json_hex_chars, c)) {
259 this->pos++;
260 if(this->pos - start_offset == 4) {
261 unsigned char utf_out[3];
262 unsigned int ucs_char =
263 (hexdigit(*(this->source + start_offset)) << 12) +
264 (hexdigit(*(this->source + start_offset + 1)) << 8) +
265 (hexdigit(*(this->source + start_offset + 2)) << 4) +
266 hexdigit(*(this->source + start_offset + 3));
267 if (ucs_char < 0x80) {
268 utf_out[0] = ucs_char;
Michael Clark7b899b62007-03-13 08:26:21 +0000269 printbuf_memappend(this->pb, (char*)utf_out, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000270 } else if (ucs_char < 0x800) {
271 utf_out[0] = 0xc0 | (ucs_char >> 6);
272 utf_out[1] = 0x80 | (ucs_char & 0x3f);
Michael Clark7b899b62007-03-13 08:26:21 +0000273 printbuf_memappend(this->pb, (char*)utf_out, 2);
Michael Clarkf0d08882007-03-13 08:26:18 +0000274 } else {
275 utf_out[0] = 0xe0 | (ucs_char >> 12);
276 utf_out[1] = 0x80 | ((ucs_char >> 6) & 0x3f);
277 utf_out[2] = 0x80 | (ucs_char & 0x3f);
Michael Clark7b899b62007-03-13 08:26:21 +0000278 printbuf_memappend(this->pb, (char*)utf_out, 3);
Michael Clarkf0d08882007-03-13 08:26:18 +0000279 }
280 start_offset = this->pos;
281 state = saved_state;
282 }
283 } else {
284 err = json_tokener_error_parse_string;
285 goto out;
286 }
287 break;
288
289 case json_tokener_state_boolean:
290 if(strncasecmp("true", this->source + start_offset,
291 this->pos - start_offset) == 0) {
292 if(this->pos - start_offset == 4) {
293 current = json_object_new_boolean(1);
294 saved_state = json_tokener_state_finish;
295 state = json_tokener_state_eatws;
296 } else {
297 this->pos++;
298 }
299 } else if(strncasecmp("false", this->source + start_offset,
300 this->pos - start_offset) == 0) {
301 if(this->pos - start_offset == 5) {
302 current = json_object_new_boolean(0);
303 saved_state = json_tokener_state_finish;
304 state = json_tokener_state_eatws;
305 } else {
306 this->pos++;
307 }
308 } else {
309 err = json_tokener_error_parse_boolean;
310 goto out;
311 }
312 break;
313
314 case json_tokener_state_number:
315 if(!c || !strchr(json_number_chars, c)) {
316 int numi;
317 double numd;
318 char *tmp = strndup(this->source + start_offset,
319 this->pos - start_offset);
320 if(!deemed_double && sscanf(tmp, "%d", &numi) == 1) {
321 current = json_object_new_int(numi);
322 } else if(deemed_double && sscanf(tmp, "%lf", &numd) == 1) {
323 current = json_object_new_double(numd);
324 } else {
325 free(tmp);
326 err = json_tokener_error_parse_number;
327 goto out;
328 }
329 free(tmp);
330 saved_state = json_tokener_state_finish;
331 state = json_tokener_state_eatws;
332 } else {
333 if(c == '.' || c == 'e') deemed_double = 1;
334 this->pos++;
335 }
336 break;
337
338 case json_tokener_state_array:
339 if(c == ']') {
340 this->pos++;
341 saved_state = json_tokener_state_finish;
342 state = json_tokener_state_eatws;
343 } else {
344 obj = json_tokener_do_parse(this);
345 if(is_error(obj)) {
Michael Clark0370baa2007-03-13 08:26:22 +0000346 err = -(enum json_tokener_error)obj;
Michael Clarkf0d08882007-03-13 08:26:18 +0000347 goto out;
348 }
349 json_object_array_add(current, obj);
350 saved_state = json_tokener_state_array_sep;
351 state = json_tokener_state_eatws;
352 }
353 break;
354
355 case json_tokener_state_array_sep:
356 if(c == ']') {
357 this->pos++;
358 saved_state = json_tokener_state_finish;
359 state = json_tokener_state_eatws;
360 } else if(c == ',') {
361 this->pos++;
362 saved_state = json_tokener_state_array;
363 state = json_tokener_state_eatws;
364 } else {
365 json_object_put(current);
366 return error_ptr(-json_tokener_error_parse_array);
367 }
368 break;
369
370 case json_tokener_state_object:
371 state = json_tokener_state_object_field_start;
372 start_offset = this->pos;
373 break;
374
375 case json_tokener_state_object_field_start:
376 if(c == '}') {
377 this->pos++;
378 saved_state = json_tokener_state_finish;
379 state = json_tokener_state_eatws;
380 } else if (c == '"' || c == '\'') {
381 quote_char = c;
382 printbuf_reset(this->pb);
383 state = json_tokener_state_object_field;
384 start_offset = ++this->pos;
Michael Clark0370baa2007-03-13 08:26:22 +0000385 } else {
386 err = json_tokener_error_parse_object;
387 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000388 }
389 break;
390
391 case json_tokener_state_object_field:
392 if(c == quote_char) {
393 printbuf_memappend(this->pb, this->source + start_offset,
394 this->pos - start_offset);
395 obj_field_name = strdup(this->pb->buf);
396 saved_state = json_tokener_state_object_field_end;
397 state = json_tokener_state_eatws;
398 } else if(c == '\\') {
399 saved_state = json_tokener_state_object_field;
400 state = json_tokener_state_string_escape;
401 }
402 this->pos++;
403 break;
404
405 case json_tokener_state_object_field_end:
406 if(c == ':') {
407 this->pos++;
408 saved_state = json_tokener_state_object_value;
409 state = json_tokener_state_eatws;
410 } else {
411 return error_ptr(-json_tokener_error_parse_object);
412 }
413 break;
414
415 case json_tokener_state_object_value:
416 obj = json_tokener_do_parse(this);
417 if(is_error(obj)) {
Michael Clark0370baa2007-03-13 08:26:22 +0000418 err = -(enum json_tokener_error)obj;
Michael Clarkf0d08882007-03-13 08:26:18 +0000419 goto out;
420 }
421 json_object_object_add(current, obj_field_name, obj);
422 free(obj_field_name);
423 obj_field_name = NULL;
424 saved_state = json_tokener_state_object_sep;
425 state = json_tokener_state_eatws;
426 break;
427
428 case json_tokener_state_object_sep:
429 if(c == '}') {
430 this->pos++;
431 saved_state = json_tokener_state_finish;
432 state = json_tokener_state_eatws;
433 } else if(c == ',') {
434 this->pos++;
435 saved_state = json_tokener_state_object;
436 state = json_tokener_state_eatws;
437 } else {
438 err = json_tokener_error_parse_object;
439 goto out;
440 }
441 break;
442
443 }
444 } while(c);
445
446 if(state != json_tokener_state_finish &&
447 saved_state != json_tokener_state_finish)
448 err = json_tokener_error_parse_eof;
449
450 out:
451 free(obj_field_name);
452 if(err == json_tokener_success) return current;
453 mc_debug("json_tokener_do_parse: error=%d state=%d char=%c\n",
454 err, state, c);
455 json_object_put(current);
Michael Clark0370baa2007-03-13 08:26:22 +0000456 return error_ptr(-err);
Michael Clarkf0d08882007-03-13 08:26:18 +0000457}