blob: a3ebf10878b2619151e8325c21fbca9ac7dcb981 [file] [log] [blame]
Michael Clarkf0d08882007-03-13 08:26:18 +00001/*
Michael Clark837240f2007-03-13 08:26:25 +00002 * $Id: json_tokener.c,v 1.19 2006/01/30 23:07:57 mclark Exp $
Michael Clarkf0d08882007-03-13 08:26:18 +00003 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00004 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
Michael Clarkf0d08882007-03-13 08:26:18 +00005 * Michael Clark <michael@metaparadigm.com>
6 *
Michael Clarkf6a6e482007-03-13 08:26:23 +00007 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the MIT license. See COPYING for details.
Michael Clarkf0d08882007-03-13 08:26:18 +00009 *
10 */
11
Michael Clark4504df72007-03-13 08:26:20 +000012#include "config.h"
13
Michael Clarkf0d08882007-03-13 08:26:18 +000014#include <stdio.h>
15#include <stdlib.h>
16#include <ctype.h>
17#include <string.h>
18
19#include "bits.h"
20#include "debug.h"
21#include "printbuf.h"
22#include "arraylist.h"
23#include "json_object.h"
24#include "json_tokener.h"
25
Michael Clark837240f2007-03-13 08:26:25 +000026#if !HAVE_STRNCASECMP && defined(_MSC_VER)
27 /* MSC has the version as _strnicmp */
28# define strncasecmp _strnicmp
29#elif !HAVE_STRNCASECMP
30# error You do not have strncasecmp on your system.
31#endif /* HAVE_STRNCASECMP */
32
33
Michael Clarkf0d08882007-03-13 08:26:18 +000034static struct json_object* json_tokener_do_parse(struct json_tokener *this);
35
36struct json_object* json_tokener_parse(char * s)
37{
38 struct json_tokener tok;
39 struct json_object* obj;
40
41 tok.source = s;
42 tok.pos = 0;
43 tok.pb = printbuf_new();
44 obj = json_tokener_do_parse(&tok);
45 printbuf_free(tok.pb);
46 return obj;
47}
48
Michael Clark4504df72007-03-13 08:26:20 +000049#if !HAVE_STRNDUP
50/* CAW: compliant version of strndup() */
51char* strndup(const char* str, size_t n)
52{
53 if(str) {
54 size_t len = strlen(str);
55 size_t nn = min(len,n);
56 char* s = (char*)malloc(sizeof(char) * (nn + 1));
57
58 if(s) {
59 memcpy(s, str, nn);
60 s[nn] = '\0';
61 }
62
63 return s;
64 }
65
66 return NULL;
67}
68#endif
69
Michael Clarkf0d08882007-03-13 08:26:18 +000070static struct json_object* json_tokener_do_parse(struct json_tokener *this)
71{
72 enum json_tokener_state state, saved_state;
73 enum json_tokener_error err = json_tokener_success;
74 struct json_object *current = NULL, *obj;
75 char *obj_field_name = NULL;
76 char quote_char;
77 int deemed_double, start_offset;
Michael Clark4504df72007-03-13 08:26:20 +000078 char c;
Michael Clarkf0d08882007-03-13 08:26:18 +000079
80 state = json_tokener_state_eatws;
81 saved_state = json_tokener_state_start;
82
Michael Clarkf0d08882007-03-13 08:26:18 +000083 do {
84 c = this->source[this->pos];
85 switch(state) {
86
87 case json_tokener_state_eatws:
88 if(isspace(c)) {
89 this->pos++;
90 } else if(c == '/') {
91 state = json_tokener_state_comment_start;
92 start_offset = this->pos++;
93 } else {
94 state = saved_state;
95 }
96 break;
97
98 case json_tokener_state_start:
99 switch(c) {
100 case '{':
101 state = json_tokener_state_eatws;
102 saved_state = json_tokener_state_object;
103 current = json_object_new_object();
104 this->pos++;
105 break;
106 case '[':
107 state = json_tokener_state_eatws;
108 saved_state = json_tokener_state_array;
109 current = json_object_new_array();
110 this->pos++;
111 break;
112 case 'N':
113 case 'n':
114 state = json_tokener_state_null;
115 start_offset = this->pos++;
116 break;
117 case '"':
118 case '\'':
119 quote_char = c;
120 printbuf_reset(this->pb);
121 state = json_tokener_state_string;
122 start_offset = ++this->pos;
123 break;
124 case 'T':
125 case 't':
126 case 'F':
127 case 'f':
128 state = json_tokener_state_boolean;
129 start_offset = this->pos++;
130 break;
Michael Clark4504df72007-03-13 08:26:20 +0000131#if defined(__GNUC__)
132 case '0' ... '9':
133#else
134 case '0':
135 case '1':
136 case '2':
137 case '3':
138 case '4':
139 case '5':
140 case '6':
141 case '7':
142 case '8':
143 case '9':
144#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000145 case '-':
146 deemed_double = 0;
147 state = json_tokener_state_number;
148 start_offset = this->pos++;
149 break;
150 default:
151 err = json_tokener_error_parse_unexpected;
152 goto out;
153 }
154 break;
155
156 case json_tokener_state_finish:
157 goto out;
158
159 case json_tokener_state_null:
160 if(strncasecmp("null", this->source + start_offset,
161 this->pos - start_offset))
162 return error_ptr(-json_tokener_error_parse_null);
163 if(this->pos - start_offset == 4) {
164 current = NULL;
165 saved_state = json_tokener_state_finish;
166 state = json_tokener_state_eatws;
167 } else {
168 this->pos++;
169 }
170 break;
171
172 case json_tokener_state_comment_start:
173 if(c == '*') {
174 state = json_tokener_state_comment;
175 } else if(c == '/') {
176 state = json_tokener_state_comment_eol;
177 } else {
178 err = json_tokener_error_parse_comment;
179 goto out;
180 }
181 this->pos++;
182 break;
183
184 case json_tokener_state_comment:
185 if(c == '*') state = json_tokener_state_comment_end;
186 this->pos++;
187 break;
188
189 case json_tokener_state_comment_eol:
190 if(c == '\n') {
191 if(mc_get_debug()) {
192 char *tmp = strndup(this->source + start_offset,
193 this->pos - start_offset);
194 mc_debug("json_tokener_comment: %s\n", tmp);
195 free(tmp);
196 }
197 state = json_tokener_state_eatws;
198 }
199 this->pos++;
200 break;
201
202 case json_tokener_state_comment_end:
203 if(c == '/') {
204 if(mc_get_debug()) {
205 char *tmp = strndup(this->source + start_offset,
206 this->pos - start_offset + 1);
207 mc_debug("json_tokener_comment: %s\n", tmp);
208 free(tmp);
209 }
210 state = json_tokener_state_eatws;
211 } else {
212 state = json_tokener_state_comment;
213 }
214 this->pos++;
215 break;
216
217 case json_tokener_state_string:
218 if(c == quote_char) {
219 printbuf_memappend(this->pb, this->source + start_offset,
220 this->pos - start_offset);
221 current = json_object_new_string(this->pb->buf);
222 saved_state = json_tokener_state_finish;
223 state = json_tokener_state_eatws;
224 } else if(c == '\\') {
225 saved_state = json_tokener_state_string;
226 state = json_tokener_state_string_escape;
227 }
228 this->pos++;
229 break;
230
231 case json_tokener_state_string_escape:
232 switch(c) {
233 case '"':
234 case '\\':
235 printbuf_memappend(this->pb, this->source + start_offset,
236 this->pos - start_offset - 1);
237 start_offset = this->pos++;
238 state = saved_state;
239 break;
240 case 'b':
241 case 'n':
242 case 'r':
243 case 't':
244 printbuf_memappend(this->pb, this->source + start_offset,
245 this->pos - start_offset - 1);
246 if(c == 'b') printbuf_memappend(this->pb, "\b", 1);
247 else if(c == 'n') printbuf_memappend(this->pb, "\n", 1);
248 else if(c == 'r') printbuf_memappend(this->pb, "\r", 1);
249 else if(c == 't') printbuf_memappend(this->pb, "\t", 1);
250 start_offset = ++this->pos;
251 state = saved_state;
252 break;
253 case 'u':
254 printbuf_memappend(this->pb, this->source + start_offset,
255 this->pos - start_offset - 1);
256 start_offset = ++this->pos;
257 state = json_tokener_state_escape_unicode;
258 break;
259 default:
260 err = json_tokener_error_parse_string;
261 goto out;
262 }
263 break;
264
265 case json_tokener_state_escape_unicode:
266 if(strchr(json_hex_chars, c)) {
267 this->pos++;
268 if(this->pos - start_offset == 4) {
269 unsigned char utf_out[3];
270 unsigned int ucs_char =
271 (hexdigit(*(this->source + start_offset)) << 12) +
272 (hexdigit(*(this->source + start_offset + 1)) << 8) +
273 (hexdigit(*(this->source + start_offset + 2)) << 4) +
274 hexdigit(*(this->source + start_offset + 3));
275 if (ucs_char < 0x80) {
276 utf_out[0] = ucs_char;
Michael Clark7b899b62007-03-13 08:26:21 +0000277 printbuf_memappend(this->pb, (char*)utf_out, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000278 } else if (ucs_char < 0x800) {
279 utf_out[0] = 0xc0 | (ucs_char >> 6);
280 utf_out[1] = 0x80 | (ucs_char & 0x3f);
Michael Clark7b899b62007-03-13 08:26:21 +0000281 printbuf_memappend(this->pb, (char*)utf_out, 2);
Michael Clarkf0d08882007-03-13 08:26:18 +0000282 } else {
283 utf_out[0] = 0xe0 | (ucs_char >> 12);
284 utf_out[1] = 0x80 | ((ucs_char >> 6) & 0x3f);
285 utf_out[2] = 0x80 | (ucs_char & 0x3f);
Michael Clark7b899b62007-03-13 08:26:21 +0000286 printbuf_memappend(this->pb, (char*)utf_out, 3);
Michael Clarkf0d08882007-03-13 08:26:18 +0000287 }
288 start_offset = this->pos;
289 state = saved_state;
290 }
291 } else {
292 err = json_tokener_error_parse_string;
293 goto out;
294 }
295 break;
296
297 case json_tokener_state_boolean:
298 if(strncasecmp("true", this->source + start_offset,
299 this->pos - start_offset) == 0) {
300 if(this->pos - start_offset == 4) {
301 current = json_object_new_boolean(1);
302 saved_state = json_tokener_state_finish;
303 state = json_tokener_state_eatws;
304 } else {
305 this->pos++;
306 }
307 } else if(strncasecmp("false", this->source + start_offset,
308 this->pos - start_offset) == 0) {
309 if(this->pos - start_offset == 5) {
310 current = json_object_new_boolean(0);
311 saved_state = json_tokener_state_finish;
312 state = json_tokener_state_eatws;
313 } else {
314 this->pos++;
315 }
316 } else {
317 err = json_tokener_error_parse_boolean;
318 goto out;
319 }
320 break;
321
322 case json_tokener_state_number:
323 if(!c || !strchr(json_number_chars, c)) {
324 int numi;
325 double numd;
326 char *tmp = strndup(this->source + start_offset,
327 this->pos - start_offset);
328 if(!deemed_double && sscanf(tmp, "%d", &numi) == 1) {
329 current = json_object_new_int(numi);
330 } else if(deemed_double && sscanf(tmp, "%lf", &numd) == 1) {
331 current = json_object_new_double(numd);
332 } else {
333 free(tmp);
334 err = json_tokener_error_parse_number;
335 goto out;
336 }
337 free(tmp);
338 saved_state = json_tokener_state_finish;
339 state = json_tokener_state_eatws;
340 } else {
341 if(c == '.' || c == 'e') deemed_double = 1;
342 this->pos++;
343 }
344 break;
345
346 case json_tokener_state_array:
347 if(c == ']') {
348 this->pos++;
349 saved_state = json_tokener_state_finish;
350 state = json_tokener_state_eatws;
351 } else {
352 obj = json_tokener_do_parse(this);
353 if(is_error(obj)) {
Michael Clark0370baa2007-03-13 08:26:22 +0000354 err = -(enum json_tokener_error)obj;
Michael Clarkf0d08882007-03-13 08:26:18 +0000355 goto out;
356 }
357 json_object_array_add(current, obj);
358 saved_state = json_tokener_state_array_sep;
359 state = json_tokener_state_eatws;
360 }
361 break;
362
363 case json_tokener_state_array_sep:
364 if(c == ']') {
365 this->pos++;
366 saved_state = json_tokener_state_finish;
367 state = json_tokener_state_eatws;
368 } else if(c == ',') {
369 this->pos++;
370 saved_state = json_tokener_state_array;
371 state = json_tokener_state_eatws;
372 } else {
373 json_object_put(current);
374 return error_ptr(-json_tokener_error_parse_array);
375 }
376 break;
377
378 case json_tokener_state_object:
379 state = json_tokener_state_object_field_start;
380 start_offset = this->pos;
381 break;
382
383 case json_tokener_state_object_field_start:
384 if(c == '}') {
385 this->pos++;
386 saved_state = json_tokener_state_finish;
387 state = json_tokener_state_eatws;
388 } else if (c == '"' || c == '\'') {
389 quote_char = c;
390 printbuf_reset(this->pb);
391 state = json_tokener_state_object_field;
392 start_offset = ++this->pos;
Michael Clark0370baa2007-03-13 08:26:22 +0000393 } else {
394 err = json_tokener_error_parse_object;
395 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000396 }
397 break;
398
399 case json_tokener_state_object_field:
400 if(c == quote_char) {
401 printbuf_memappend(this->pb, this->source + start_offset,
402 this->pos - start_offset);
403 obj_field_name = strdup(this->pb->buf);
404 saved_state = json_tokener_state_object_field_end;
405 state = json_tokener_state_eatws;
406 } else if(c == '\\') {
407 saved_state = json_tokener_state_object_field;
408 state = json_tokener_state_string_escape;
409 }
410 this->pos++;
411 break;
412
413 case json_tokener_state_object_field_end:
414 if(c == ':') {
415 this->pos++;
416 saved_state = json_tokener_state_object_value;
417 state = json_tokener_state_eatws;
418 } else {
419 return error_ptr(-json_tokener_error_parse_object);
420 }
421 break;
422
423 case json_tokener_state_object_value:
424 obj = json_tokener_do_parse(this);
425 if(is_error(obj)) {
Michael Clark0370baa2007-03-13 08:26:22 +0000426 err = -(enum json_tokener_error)obj;
Michael Clarkf0d08882007-03-13 08:26:18 +0000427 goto out;
428 }
429 json_object_object_add(current, obj_field_name, obj);
430 free(obj_field_name);
431 obj_field_name = NULL;
432 saved_state = json_tokener_state_object_sep;
433 state = json_tokener_state_eatws;
434 break;
435
436 case json_tokener_state_object_sep:
437 if(c == '}') {
438 this->pos++;
439 saved_state = json_tokener_state_finish;
440 state = json_tokener_state_eatws;
441 } else if(c == ',') {
442 this->pos++;
443 saved_state = json_tokener_state_object;
444 state = json_tokener_state_eatws;
445 } else {
446 err = json_tokener_error_parse_object;
447 goto out;
448 }
449 break;
450
451 }
452 } while(c);
453
454 if(state != json_tokener_state_finish &&
455 saved_state != json_tokener_state_finish)
456 err = json_tokener_error_parse_eof;
457
458 out:
459 free(obj_field_name);
460 if(err == json_tokener_success) return current;
461 mc_debug("json_tokener_do_parse: error=%d state=%d char=%c\n",
462 err, state, c);
463 json_object_put(current);
Michael Clark0370baa2007-03-13 08:26:22 +0000464 return error_ptr(-err);
Michael Clarkf0d08882007-03-13 08:26:18 +0000465}