blob: fdb51135b0a4a0dd42e4c61eb637a2d26de5a238 [file] [log] [blame]
Michael Clarkf0d08882007-03-13 08:26:18 +00001/*
Michael Clark0370baa2007-03-13 08:26:22 +00002 * $Id: json_tokener.c,v 1.17 2005/07/26 07:49:11 mclark Exp $
Michael Clarkf0d08882007-03-13 08:26:18 +00003 *
4 * Copyright Metaparadigm Pte. Ltd. 2004.
5 * Michael Clark <michael@metaparadigm.com>
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public (LGPL)
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details: http://www.gnu.org/
16 *
17 */
18
Michael Clark4504df72007-03-13 08:26:20 +000019#include "config.h"
20
Michael Clarkf0d08882007-03-13 08:26:18 +000021#include <stdio.h>
22#include <stdlib.h>
23#include <ctype.h>
24#include <string.h>
25
26#include "bits.h"
27#include "debug.h"
28#include "printbuf.h"
29#include "arraylist.h"
30#include "json_object.h"
31#include "json_tokener.h"
32
Michael Clarkf0d08882007-03-13 08:26:18 +000033static struct json_object* json_tokener_do_parse(struct json_tokener *this);
34
35struct json_object* json_tokener_parse(char * s)
36{
37 struct json_tokener tok;
38 struct json_object* obj;
39
40 tok.source = s;
41 tok.pos = 0;
42 tok.pb = printbuf_new();
43 obj = json_tokener_do_parse(&tok);
44 printbuf_free(tok.pb);
45 return obj;
46}
47
Michael Clark4504df72007-03-13 08:26:20 +000048#if !HAVE_STRNDUP
49/* CAW: compliant version of strndup() */
50char* strndup(const char* str, size_t n)
51{
52 if(str) {
53 size_t len = strlen(str);
54 size_t nn = min(len,n);
55 char* s = (char*)malloc(sizeof(char) * (nn + 1));
56
57 if(s) {
58 memcpy(s, str, nn);
59 s[nn] = '\0';
60 }
61
62 return s;
63 }
64
65 return NULL;
66}
67#endif
68
Michael Clarkf0d08882007-03-13 08:26:18 +000069static struct json_object* json_tokener_do_parse(struct json_tokener *this)
70{
71 enum json_tokener_state state, saved_state;
72 enum json_tokener_error err = json_tokener_success;
73 struct json_object *current = NULL, *obj;
74 char *obj_field_name = NULL;
75 char quote_char;
76 int deemed_double, start_offset;
Michael Clark4504df72007-03-13 08:26:20 +000077 char c;
Michael Clarkf0d08882007-03-13 08:26:18 +000078
79 state = json_tokener_state_eatws;
80 saved_state = json_tokener_state_start;
81
Michael Clarkf0d08882007-03-13 08:26:18 +000082 do {
83 c = this->source[this->pos];
84 switch(state) {
85
86 case json_tokener_state_eatws:
87 if(isspace(c)) {
88 this->pos++;
89 } else if(c == '/') {
90 state = json_tokener_state_comment_start;
91 start_offset = this->pos++;
92 } else {
93 state = saved_state;
94 }
95 break;
96
97 case json_tokener_state_start:
98 switch(c) {
99 case '{':
100 state = json_tokener_state_eatws;
101 saved_state = json_tokener_state_object;
102 current = json_object_new_object();
103 this->pos++;
104 break;
105 case '[':
106 state = json_tokener_state_eatws;
107 saved_state = json_tokener_state_array;
108 current = json_object_new_array();
109 this->pos++;
110 break;
111 case 'N':
112 case 'n':
113 state = json_tokener_state_null;
114 start_offset = this->pos++;
115 break;
116 case '"':
117 case '\'':
118 quote_char = c;
119 printbuf_reset(this->pb);
120 state = json_tokener_state_string;
121 start_offset = ++this->pos;
122 break;
123 case 'T':
124 case 't':
125 case 'F':
126 case 'f':
127 state = json_tokener_state_boolean;
128 start_offset = this->pos++;
129 break;
Michael Clark4504df72007-03-13 08:26:20 +0000130#if defined(__GNUC__)
131 case '0' ... '9':
132#else
133 case '0':
134 case '1':
135 case '2':
136 case '3':
137 case '4':
138 case '5':
139 case '6':
140 case '7':
141 case '8':
142 case '9':
143#endif
Michael Clarkf0d08882007-03-13 08:26:18 +0000144 case '-':
145 deemed_double = 0;
146 state = json_tokener_state_number;
147 start_offset = this->pos++;
148 break;
149 default:
150 err = json_tokener_error_parse_unexpected;
151 goto out;
152 }
153 break;
154
155 case json_tokener_state_finish:
156 goto out;
157
158 case json_tokener_state_null:
159 if(strncasecmp("null", this->source + start_offset,
160 this->pos - start_offset))
161 return error_ptr(-json_tokener_error_parse_null);
162 if(this->pos - start_offset == 4) {
163 current = NULL;
164 saved_state = json_tokener_state_finish;
165 state = json_tokener_state_eatws;
166 } else {
167 this->pos++;
168 }
169 break;
170
171 case json_tokener_state_comment_start:
172 if(c == '*') {
173 state = json_tokener_state_comment;
174 } else if(c == '/') {
175 state = json_tokener_state_comment_eol;
176 } else {
177 err = json_tokener_error_parse_comment;
178 goto out;
179 }
180 this->pos++;
181 break;
182
183 case json_tokener_state_comment:
184 if(c == '*') state = json_tokener_state_comment_end;
185 this->pos++;
186 break;
187
188 case json_tokener_state_comment_eol:
189 if(c == '\n') {
190 if(mc_get_debug()) {
191 char *tmp = strndup(this->source + start_offset,
192 this->pos - start_offset);
193 mc_debug("json_tokener_comment: %s\n", tmp);
194 free(tmp);
195 }
196 state = json_tokener_state_eatws;
197 }
198 this->pos++;
199 break;
200
201 case json_tokener_state_comment_end:
202 if(c == '/') {
203 if(mc_get_debug()) {
204 char *tmp = strndup(this->source + start_offset,
205 this->pos - start_offset + 1);
206 mc_debug("json_tokener_comment: %s\n", tmp);
207 free(tmp);
208 }
209 state = json_tokener_state_eatws;
210 } else {
211 state = json_tokener_state_comment;
212 }
213 this->pos++;
214 break;
215
216 case json_tokener_state_string:
217 if(c == quote_char) {
218 printbuf_memappend(this->pb, this->source + start_offset,
219 this->pos - start_offset);
220 current = json_object_new_string(this->pb->buf);
221 saved_state = json_tokener_state_finish;
222 state = json_tokener_state_eatws;
223 } else if(c == '\\') {
224 saved_state = json_tokener_state_string;
225 state = json_tokener_state_string_escape;
226 }
227 this->pos++;
228 break;
229
230 case json_tokener_state_string_escape:
231 switch(c) {
232 case '"':
233 case '\\':
234 printbuf_memappend(this->pb, this->source + start_offset,
235 this->pos - start_offset - 1);
236 start_offset = this->pos++;
237 state = saved_state;
238 break;
239 case 'b':
240 case 'n':
241 case 'r':
242 case 't':
243 printbuf_memappend(this->pb, this->source + start_offset,
244 this->pos - start_offset - 1);
245 if(c == 'b') printbuf_memappend(this->pb, "\b", 1);
246 else if(c == 'n') printbuf_memappend(this->pb, "\n", 1);
247 else if(c == 'r') printbuf_memappend(this->pb, "\r", 1);
248 else if(c == 't') printbuf_memappend(this->pb, "\t", 1);
249 start_offset = ++this->pos;
250 state = saved_state;
251 break;
252 case 'u':
253 printbuf_memappend(this->pb, this->source + start_offset,
254 this->pos - start_offset - 1);
255 start_offset = ++this->pos;
256 state = json_tokener_state_escape_unicode;
257 break;
258 default:
259 err = json_tokener_error_parse_string;
260 goto out;
261 }
262 break;
263
264 case json_tokener_state_escape_unicode:
265 if(strchr(json_hex_chars, c)) {
266 this->pos++;
267 if(this->pos - start_offset == 4) {
268 unsigned char utf_out[3];
269 unsigned int ucs_char =
270 (hexdigit(*(this->source + start_offset)) << 12) +
271 (hexdigit(*(this->source + start_offset + 1)) << 8) +
272 (hexdigit(*(this->source + start_offset + 2)) << 4) +
273 hexdigit(*(this->source + start_offset + 3));
274 if (ucs_char < 0x80) {
275 utf_out[0] = ucs_char;
Michael Clark7b899b62007-03-13 08:26:21 +0000276 printbuf_memappend(this->pb, (char*)utf_out, 1);
Michael Clarkf0d08882007-03-13 08:26:18 +0000277 } else if (ucs_char < 0x800) {
278 utf_out[0] = 0xc0 | (ucs_char >> 6);
279 utf_out[1] = 0x80 | (ucs_char & 0x3f);
Michael Clark7b899b62007-03-13 08:26:21 +0000280 printbuf_memappend(this->pb, (char*)utf_out, 2);
Michael Clarkf0d08882007-03-13 08:26:18 +0000281 } else {
282 utf_out[0] = 0xe0 | (ucs_char >> 12);
283 utf_out[1] = 0x80 | ((ucs_char >> 6) & 0x3f);
284 utf_out[2] = 0x80 | (ucs_char & 0x3f);
Michael Clark7b899b62007-03-13 08:26:21 +0000285 printbuf_memappend(this->pb, (char*)utf_out, 3);
Michael Clarkf0d08882007-03-13 08:26:18 +0000286 }
287 start_offset = this->pos;
288 state = saved_state;
289 }
290 } else {
291 err = json_tokener_error_parse_string;
292 goto out;
293 }
294 break;
295
296 case json_tokener_state_boolean:
297 if(strncasecmp("true", this->source + start_offset,
298 this->pos - start_offset) == 0) {
299 if(this->pos - start_offset == 4) {
300 current = json_object_new_boolean(1);
301 saved_state = json_tokener_state_finish;
302 state = json_tokener_state_eatws;
303 } else {
304 this->pos++;
305 }
306 } else if(strncasecmp("false", this->source + start_offset,
307 this->pos - start_offset) == 0) {
308 if(this->pos - start_offset == 5) {
309 current = json_object_new_boolean(0);
310 saved_state = json_tokener_state_finish;
311 state = json_tokener_state_eatws;
312 } else {
313 this->pos++;
314 }
315 } else {
316 err = json_tokener_error_parse_boolean;
317 goto out;
318 }
319 break;
320
321 case json_tokener_state_number:
322 if(!c || !strchr(json_number_chars, c)) {
323 int numi;
324 double numd;
325 char *tmp = strndup(this->source + start_offset,
326 this->pos - start_offset);
327 if(!deemed_double && sscanf(tmp, "%d", &numi) == 1) {
328 current = json_object_new_int(numi);
329 } else if(deemed_double && sscanf(tmp, "%lf", &numd) == 1) {
330 current = json_object_new_double(numd);
331 } else {
332 free(tmp);
333 err = json_tokener_error_parse_number;
334 goto out;
335 }
336 free(tmp);
337 saved_state = json_tokener_state_finish;
338 state = json_tokener_state_eatws;
339 } else {
340 if(c == '.' || c == 'e') deemed_double = 1;
341 this->pos++;
342 }
343 break;
344
345 case json_tokener_state_array:
346 if(c == ']') {
347 this->pos++;
348 saved_state = json_tokener_state_finish;
349 state = json_tokener_state_eatws;
350 } else {
351 obj = json_tokener_do_parse(this);
352 if(is_error(obj)) {
Michael Clark0370baa2007-03-13 08:26:22 +0000353 err = -(enum json_tokener_error)obj;
Michael Clarkf0d08882007-03-13 08:26:18 +0000354 goto out;
355 }
356 json_object_array_add(current, obj);
357 saved_state = json_tokener_state_array_sep;
358 state = json_tokener_state_eatws;
359 }
360 break;
361
362 case json_tokener_state_array_sep:
363 if(c == ']') {
364 this->pos++;
365 saved_state = json_tokener_state_finish;
366 state = json_tokener_state_eatws;
367 } else if(c == ',') {
368 this->pos++;
369 saved_state = json_tokener_state_array;
370 state = json_tokener_state_eatws;
371 } else {
372 json_object_put(current);
373 return error_ptr(-json_tokener_error_parse_array);
374 }
375 break;
376
377 case json_tokener_state_object:
378 state = json_tokener_state_object_field_start;
379 start_offset = this->pos;
380 break;
381
382 case json_tokener_state_object_field_start:
383 if(c == '}') {
384 this->pos++;
385 saved_state = json_tokener_state_finish;
386 state = json_tokener_state_eatws;
387 } else if (c == '"' || c == '\'') {
388 quote_char = c;
389 printbuf_reset(this->pb);
390 state = json_tokener_state_object_field;
391 start_offset = ++this->pos;
Michael Clark0370baa2007-03-13 08:26:22 +0000392 } else {
393 err = json_tokener_error_parse_object;
394 goto out;
Michael Clarkf0d08882007-03-13 08:26:18 +0000395 }
396 break;
397
398 case json_tokener_state_object_field:
399 if(c == quote_char) {
400 printbuf_memappend(this->pb, this->source + start_offset,
401 this->pos - start_offset);
402 obj_field_name = strdup(this->pb->buf);
403 saved_state = json_tokener_state_object_field_end;
404 state = json_tokener_state_eatws;
405 } else if(c == '\\') {
406 saved_state = json_tokener_state_object_field;
407 state = json_tokener_state_string_escape;
408 }
409 this->pos++;
410 break;
411
412 case json_tokener_state_object_field_end:
413 if(c == ':') {
414 this->pos++;
415 saved_state = json_tokener_state_object_value;
416 state = json_tokener_state_eatws;
417 } else {
418 return error_ptr(-json_tokener_error_parse_object);
419 }
420 break;
421
422 case json_tokener_state_object_value:
423 obj = json_tokener_do_parse(this);
424 if(is_error(obj)) {
Michael Clark0370baa2007-03-13 08:26:22 +0000425 err = -(enum json_tokener_error)obj;
Michael Clarkf0d08882007-03-13 08:26:18 +0000426 goto out;
427 }
428 json_object_object_add(current, obj_field_name, obj);
429 free(obj_field_name);
430 obj_field_name = NULL;
431 saved_state = json_tokener_state_object_sep;
432 state = json_tokener_state_eatws;
433 break;
434
435 case json_tokener_state_object_sep:
436 if(c == '}') {
437 this->pos++;
438 saved_state = json_tokener_state_finish;
439 state = json_tokener_state_eatws;
440 } else if(c == ',') {
441 this->pos++;
442 saved_state = json_tokener_state_object;
443 state = json_tokener_state_eatws;
444 } else {
445 err = json_tokener_error_parse_object;
446 goto out;
447 }
448 break;
449
450 }
451 } while(c);
452
453 if(state != json_tokener_state_finish &&
454 saved_state != json_tokener_state_finish)
455 err = json_tokener_error_parse_eof;
456
457 out:
458 free(obj_field_name);
459 if(err == json_tokener_success) return current;
460 mc_debug("json_tokener_do_parse: error=%d state=%d char=%c\n",
461 err, state, c);
462 json_object_put(current);
Michael Clark0370baa2007-03-13 08:26:22 +0000463 return error_ptr(-err);
Michael Clarkf0d08882007-03-13 08:26:18 +0000464}