json_tokener.c - platform/external/json-c - Gitiles

 /*
  * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
  *
  * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
  * Michael Clark <michael@metaparadigm.com>
  *
  * This library is free software; you can redistribute it and/or modify
  * it under the terms of the MIT license. See COPYING for details.
  *
  *
  * Copyright (c) 2008-2009 Yahoo! Inc.  All rights reserved.
  * The copyrights to the contents of this file are licensed under the MIT License
  * (http://www.opensource.org/licenses/mit-license.php)
  */

 #include "config.h"

 #include <stdio.h>
 #include <stdlib.h>
 #include <stddef.h>
 #include <ctype.h>
 #include <string.h>
 #include <limits.h>

 #include "bits.h"
 #include "debug.h"
 #include "printbuf.h"
 #include "arraylist.h"
 #include "json_inttypes.h"
 #include "json_object.h"
 #include "json_tokener.h"
 #include "json_util.h"

 #if !HAVE_STRNCASECMP && defined(_MSC_VER)
   /* MSC has the version as _strnicmp */
 # define strncasecmp _strnicmp
 #elif !HAVE_STRNCASECMP
 # error You do not have strncasecmp on your system.
 #endif /* HAVE_STRNCASECMP */


 static const char* json_null_str = "null";
 static const char* json_true_str = "true";
 static const char* json_false_str = "false";

 // XXX after v0.10 this array will become static:
 const char* json_tokener_errors[] = {
   "success",
   "continue",
   "nesting too deep",
   "unexpected end of data",
   "unexpected character",
   "null expected",
   "boolean expected",
   "number expected",
   "array value separator ',' expected",
   "quoted object property name expected",
   "object property name separator ':' expected",
   "object value separator ',' expected",
   "invalid string sequence",
   "expected comment",
 };

 const char *json_tokener_error_desc(enum json_tokener_error jerr)
 {
 	if (jerr < 0 || jerr > sizeof(json_tokener_errors))
 		return "Unknown error, invalid json_tokener_error value passed to json_tokener_error_desc()";
 	return json_tokener_errors[jerr];
 }

 enum json_tokener_error json_tokener_get_error(json_tokener *tok)
 {
 	return tok->err;
 }

 /* Stuff for decoding unicode sequences */
 #define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
 #define IS_LOW_SURROGATE(uc)  (((uc) & 0xFC00) == 0xDC00)
 #define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
 static unsigned char utf8_replacement_char[3] = { 0xEF, 0xBF, 0xBD };


 struct json_tokener* json_tokener_new(void)
 {
   struct json_tokener *tok;

   tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
   if (!tok) return NULL;
   tok->pb = printbuf_new();
   json_tokener_reset(tok);
   return tok;
 }

 void json_tokener_free(struct json_tokener *tok)
 {
   json_tokener_reset(tok);
   if(tok) printbuf_free(tok->pb);
   free(tok);
 }

 static void json_tokener_reset_level(struct json_tokener *tok, int depth)
 {
   tok->stack[depth].state = json_tokener_state_eatws;
   tok->stack[depth].saved_state = json_tokener_state_start;
   json_object_put(tok->stack[depth].current);
   tok->stack[depth].current = NULL;
   free(tok->stack[depth].obj_field_name);
   tok->stack[depth].obj_field_name = NULL;
 }

 void json_tokener_reset(struct json_tokener *tok)
 {
   int i;
   if (!tok)
     return;

   for(i = tok->depth; i >= 0; i--)
     json_tokener_reset_level(tok, i);
   tok->depth = 0;
   tok->err = json_tokener_success;
 }

 struct json_object* json_tokener_parse(const char *str)
 {
     enum json_tokener_error jerr_ignored;
     struct json_object* obj;
     obj = json_tokener_parse_verbose(str, &jerr_ignored);
     return obj;
 }

 struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
 {
     struct json_tokener* tok;
     struct json_object* obj;

     tok = json_tokener_new();
     if (!tok)
       return NULL;
     obj = json_tokener_parse_ex(tok, str, -1);
     *error = tok->err;
     if(tok->err != json_tokener_success) {
 		if (obj != NULL)
 			json_object_put(obj);
         obj = NULL;
     }

     json_tokener_free(tok);
     return obj;
 }


 #if !HAVE_STRNDUP
 /* CAW: compliant version of strndup() */
 char* strndup(const char* str, size_t n)
 {
   if(str) {
     size_t len = strlen(str);
     size_t nn = json_min(len,n);
     char* s = (char*)malloc(sizeof(char) * (nn + 1));

     if(s) {
       memcpy(s, str, nn);
       s[nn] = '\0';
     }

     return s;
   }

   return NULL;
 }
 #endif


 #define state  tok->stack[tok->depth].state
 #define saved_state  tok->stack[tok->depth].saved_state
 #define current tok->stack[tok->depth].current
 #define obj_field_name tok->stack[tok->depth].obj_field_name

 /* Optimization:
  * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
  * iterating character-by character.  A large performance boost is
  * achieved by using tighter loops to locally handle units such as
  * comments and strings.  Loops that handle an entire token within
  * their scope also gather entire strings and pass them to
  * printbuf_memappend() in a single call, rather than calling
  * printbuf_memappend() one char at a time.
  *
  * POP_CHAR() and ADVANCE_CHAR() macros are used for code that is
  * common to both the main loop and the tighter loops.
  */

 /* POP_CHAR(dest, tok) macro:
  *   Not really a pop()...peeks at the current char and stores it in dest.
  *   Returns 1 on success, sets tok->err and returns 0 if no more chars.
  *   Implicit inputs:  str, len vars
  */
 #define POP_CHAR(dest, tok)                                                  \
   (((tok)->char_offset == len) ?                                          \
    (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
     (((tok)->err = json_tokener_success), 0)                              \
     :                                                                   \
     (((tok)->err = json_tokener_continue), 0)                             \
     ) :                                                                 \
    (((dest) = *str), 1)                                                 \
    )

 /* ADVANCE_CHAR() macro:
  *   Incrementes str & tok->char_offset.
  *   For convenience of existing conditionals, returns the old value of c (0 on eof)
  *   Implicit inputs:  c var
  */
 #define ADVANCE_CHAR(str, tok) \
   ( ++(str), ((tok)->char_offset)++, c)


 /* End optimization macro defs */


 struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
 					  const char *str, int len)
 {
   struct json_object *obj = NULL;
   char c = '\1';

   tok->char_offset = 0;
   tok->err = json_tokener_success;

   while (POP_CHAR(c, tok)) {

   redo_char:
     switch(state) {

     case json_tokener_state_eatws:
       /* Advance until we change state */
       while (isspace((int)c)) {
 	if ((!ADVANCE_CHAR(str, tok)) || (!POP_CHAR(c, tok)))
 	  goto out;
       }
       if(c == '/') {
 	printbuf_reset(tok->pb);
 	printbuf_memappend_fast(tok->pb, &c, 1);
 	state = json_tokener_state_comment_start;
       } else {
 	state = saved_state;
 	goto redo_char;
       }
       break;

     case json_tokener_state_start:
       switch(c) {
       case '{':
 	state = json_tokener_state_eatws;
 	saved_state = json_tokener_state_object_field_start;
 	current = json_object_new_object();
 	break;
       case '[':
 	state = json_tokener_state_eatws;
 	saved_state = json_tokener_state_array;
 	current = json_object_new_array();
 	break;
       case 'N':
       case 'n':
 	state = json_tokener_state_null;
 	printbuf_reset(tok->pb);
 	tok->st_pos = 0;
 	goto redo_char;
       case '"':
       case '\'':
 	state = json_tokener_state_string;
 	printbuf_reset(tok->pb);
 	tok->quote_char = c;
 	break;
       case 'T':
       case 't':
       case 'F':
       case 'f':
 	state = json_tokener_state_boolean;
 	printbuf_reset(tok->pb);
 	tok->st_pos = 0;
 	goto redo_char;
 #if defined(__GNUC__)
 	  case '0' ... '9':
 #else
 	  case '0':
       case '1':
       case '2':
       case '3':
       case '4':
       case '5':
       case '6':
       case '7':
       case '8':
       case '9':
 #endif
       case '-':
 	state = json_tokener_state_number;
 	printbuf_reset(tok->pb);
 	tok->is_double = 0;
 	goto redo_char;
       default:
 	tok->err = json_tokener_error_parse_unexpected;
 	goto out;
       }
       break;

     case json_tokener_state_finish:
       if(tok->depth == 0) goto out;
       obj = json_object_get(current);
       json_tokener_reset_level(tok, tok->depth);
       tok->depth--;
       goto redo_char;

     case json_tokener_state_null:
       printbuf_memappend_fast(tok->pb, &c, 1);
       if(strncasecmp(json_null_str, tok->pb->buf,
 		     json_min(tok->st_pos+1, strlen(json_null_str))) == 0) {
 	if(tok->st_pos == strlen(json_null_str)) {
 	  current = NULL;
 	  saved_state = json_tokener_state_finish;
 	  state = json_tokener_state_eatws;
 	  goto redo_char;
 	}
       } else {
 	tok->err = json_tokener_error_parse_null;
 	goto out;
       }
       tok->st_pos++;
       break;

     case json_tokener_state_comment_start:
       if(c == '*') {
 	state = json_tokener_state_comment;
       } else if(c == '/') {
 	state = json_tokener_state_comment_eol;
       } else {
 	tok->err = json_tokener_error_parse_comment;
 	goto out;
       }
       printbuf_memappend_fast(tok->pb, &c, 1);
       break;

     case json_tokener_state_comment:
               {
           /* Advance until we change state */
           const char *case_start = str;
           while(c != '*') {
             if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
               printbuf_memappend_fast(tok->pb, case_start, str-case_start);
               goto out;
             }
           }
           printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
           state = json_tokener_state_comment_end;
         }
             break;

     case json_tokener_state_comment_eol:
       {
 	/* Advance until we change state */
 	const char *case_start = str;
 	while(c != '\n') {
 	  if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
 	    printbuf_memappend_fast(tok->pb, case_start, str-case_start);
 	    goto out;
 	  }
 	}
 	printbuf_memappend_fast(tok->pb, case_start, str-case_start);
 	MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
 	state = json_tokener_state_eatws;
       }
       break;

     case json_tokener_state_comment_end:
       printbuf_memappend_fast(tok->pb, &c, 1);
       if(c == '/') {
 	MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
 	state = json_tokener_state_eatws;
       } else {
 	state = json_tokener_state_comment;
       }
       break;

     case json_tokener_state_string:
       {
 	/* Advance until we change state */
 	const char *case_start = str;
 	while(1) {
 	  if(c == tok->quote_char) {
 	    printbuf_memappend_fast(tok->pb, case_start, str-case_start);
 	    current = json_object_new_string(tok->pb->buf);
 	    saved_state = json_tokener_state_finish;
 	    state = json_tokener_state_eatws;
 	    break;
 	  } else if(c == '\\') {
 	    printbuf_memappend_fast(tok->pb, case_start, str-case_start);
 	    saved_state = json_tokener_state_string;
 	    state = json_tokener_state_string_escape;
 	    break;
 	  }
 	  if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
 	    printbuf_memappend_fast(tok->pb, case_start, str-case_start);
 	    goto out;
 	  }
 	}
       }
       break;

     case json_tokener_state_string_escape:
       switch(c) {
       case '"':
       case '\\':
       case '/':
 	printbuf_memappend_fast(tok->pb, &c, 1);
 	state = saved_state;
 	break;
       case 'b':
       case 'n':
       case 'r':
       case 't':
 	if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
 	else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
 	else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
 	else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
 	state = saved_state;
 	break;
       case 'u':
 	tok->ucs_char = 0;
 	tok->st_pos = 0;
 	state = json_tokener_state_escape_unicode;
 	break;
       default:
 	tok->err = json_tokener_error_parse_string;
 	goto out;
       }
       break;

     case json_tokener_state_escape_unicode:
 	{
           unsigned int got_hi_surrogate = 0;

 	  /* Handle a 4-byte sequence, or two sequences if a surrogate pair */
 	  while(1) {
 	    if(strchr(json_hex_chars, c)) {
 	      tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
 	      if(tok->st_pos == 4) {
 		unsigned char unescaped_utf[4];

                 if (got_hi_surrogate) {
 		  if (IS_LOW_SURROGATE(tok->ucs_char)) {
                     /* Recalculate the ucs_char, then fall thru to process normally */
                     tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char);
                   } else {
                     /* Hi surrogate was not followed by a low surrogate */
                     /* Replace the hi and process the rest normally */
 		    printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
                   }
                   got_hi_surrogate = 0;
                 }

 		if (tok->ucs_char < 0x80) {
 		  unescaped_utf[0] = tok->ucs_char;
 		  printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1);
 		} else if (tok->ucs_char < 0x800) {
 		  unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
 		  unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
 		  printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2);
 		} else if (IS_HIGH_SURROGATE(tok->ucs_char)) {
                   /* Got a high surrogate.  Remember it and look for the
                    * the beginning of another sequence, which should be the
                    * low surrogate.
                    */
                   got_hi_surrogate = tok->ucs_char;
                   /* Not at end, and the next two chars should be "\u" */
                   if ((tok->char_offset+1 != len) &&
                       (tok->char_offset+2 != len) &&
                       (str[1] == '\\') &&
                       (str[2] == 'u'))
                   {
 	            ADVANCE_CHAR(str, tok);
 	            ADVANCE_CHAR(str, tok);

                     /* Advance to the first char of the next sequence and
                      * continue processing with the next sequence.
                      */
 	            if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
 	              printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
 	              goto out;
                     }
 	            tok->ucs_char = 0;
                     tok->st_pos = 0;
                     continue; /* other json_tokener_state_escape_unicode */
                   } else {
                     /* Got a high surrogate without another sequence following
                      * it.  Put a replacement char in for the hi surrogate
                      * and pretend we finished.
                      */
 		    printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
                   }
 		} else if (IS_LOW_SURROGATE(tok->ucs_char)) {
                   /* Got a low surrogate not preceded by a high */
 		  printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
                 } else if (tok->ucs_char < 0x10000) {
 		  unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
 		  unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
 		  unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
 		  printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3);
 		} else if (tok->ucs_char < 0x110000) {
 		  unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
 		  unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
 		  unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
 		  unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
 		  printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4);
 		} else {
                   /* Don't know what we got--insert the replacement char */
 		  printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
                 }
 		state = saved_state;
 		break;
 	      }
 	    } else {
 	      tok->err = json_tokener_error_parse_string;
 	      goto out;
 	    }
 	  if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
             if (got_hi_surrogate) /* Clean up any pending chars */
 	      printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
 	    goto out;
 	  }
 	}
       }
       break;

     case json_tokener_state_boolean:
       printbuf_memappend_fast(tok->pb, &c, 1);
       if(strncasecmp(json_true_str, tok->pb->buf,
 		     json_min(tok->st_pos+1, strlen(json_true_str))) == 0) {
 	if(tok->st_pos == strlen(json_true_str)) {
 	  current = json_object_new_boolean(1);
 	  saved_state = json_tokener_state_finish;
 	  state = json_tokener_state_eatws;
 	  goto redo_char;
 	}
       } else if(strncasecmp(json_false_str, tok->pb->buf,
 			    json_min(tok->st_pos+1, strlen(json_false_str))) == 0) {
 	if(tok->st_pos == strlen(json_false_str)) {
 	  current = json_object_new_boolean(0);
 	  saved_state = json_tokener_state_finish;
 	  state = json_tokener_state_eatws;
 	  goto redo_char;
 	}
       } else {
 	tok->err = json_tokener_error_parse_boolean;
 	goto out;
       }
       tok->st_pos++;
       break;

     case json_tokener_state_number:
       {
 	/* Advance until we change state */
 	const char *case_start = str;
 	int case_len=0;
 	while(c && strchr(json_number_chars, c)) {
 	  ++case_len;
 	  if(c == '.' || c == 'e') tok->is_double = 1;
 	  if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
 	    printbuf_memappend_fast(tok->pb, case_start, case_len);
 	    goto out;
 	  }
 	}
         if (case_len>0)
           printbuf_memappend_fast(tok->pb, case_start, case_len);
       }
       {
 	int64_t num64;
 	double  numd;
 	if (!tok->is_double && json_parse_int64(tok->pb->buf, &num64) == 0) {
 		current = json_object_new_int64(num64);
 	} else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
           current = json_object_new_double(numd);
         } else {
           tok->err = json_tokener_error_parse_number;
           goto out;
         }
         saved_state = json_tokener_state_finish;
         state = json_tokener_state_eatws;
         goto redo_char;
       }
       break;

     case json_tokener_state_array:
       if(c == ']') {
 	saved_state = json_tokener_state_finish;
 	state = json_tokener_state_eatws;
       } else {
 	if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
 	  tok->err = json_tokener_error_depth;
 	  goto out;
 	}
 	state = json_tokener_state_array_add;
 	tok->depth++;
 	json_tokener_reset_level(tok, tok->depth);
 	goto redo_char;
       }
       break;

     case json_tokener_state_array_add:
       json_object_array_add(current, obj);
       saved_state = json_tokener_state_array_sep;
       state = json_tokener_state_eatws;
       goto redo_char;

     case json_tokener_state_array_sep:
       if(c == ']') {
 	saved_state = json_tokener_state_finish;
 	state = json_tokener_state_eatws;
       } else if(c == ',') {
 	saved_state = json_tokener_state_array;
 	state = json_tokener_state_eatws;
       } else {
 	tok->err = json_tokener_error_parse_array;
 	goto out;
       }
       break;

     case json_tokener_state_object_field_start:
       if(c == '}') {
 	saved_state = json_tokener_state_finish;
 	state = json_tokener_state_eatws;
       } else if (c == '"' || c == '\'') {
 	tok->quote_char = c;
 	printbuf_reset(tok->pb);
 	state = json_tokener_state_object_field;
       } else {
 	tok->err = json_tokener_error_parse_object_key_name;
 	goto out;
       }
       break;

     case json_tokener_state_object_field:
       {
 	/* Advance until we change state */
 	const char *case_start = str;
 	while(1) {
 	  if(c == tok->quote_char) {
 	    printbuf_memappend_fast(tok->pb, case_start, str-case_start);
 	    obj_field_name = strdup(tok->pb->buf);
 	    saved_state = json_tokener_state_object_field_end;
 	    state = json_tokener_state_eatws;
 	    break;
 	  } else if(c == '\\') {
 	    printbuf_memappend_fast(tok->pb, case_start, str-case_start);
 	    saved_state = json_tokener_state_object_field;
 	    state = json_tokener_state_string_escape;
 	    break;
 	  }
 	  if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
 	    printbuf_memappend_fast(tok->pb, case_start, str-case_start);
 	    goto out;
 	  }
 	}
       }
       break;

     case json_tokener_state_object_field_end:
       if(c == ':') {
 	saved_state = json_tokener_state_object_value;
 	state = json_tokener_state_eatws;
       } else {
 	tok->err = json_tokener_error_parse_object_key_sep;
 	goto out;
       }
       break;

     case json_tokener_state_object_value:
       if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
 	tok->err = json_tokener_error_depth;
 	goto out;
       }
       state = json_tokener_state_object_value_add;
       tok->depth++;
       json_tokener_reset_level(tok, tok->depth);
       goto redo_char;

     case json_tokener_state_object_value_add:
       json_object_object_add(current, obj_field_name, obj);
       free(obj_field_name);
       obj_field_name = NULL;
       saved_state = json_tokener_state_object_sep;
       state = json_tokener_state_eatws;
       goto redo_char;

     case json_tokener_state_object_sep:
       if(c == '}') {
 	saved_state = json_tokener_state_finish;
 	state = json_tokener_state_eatws;
       } else if(c == ',') {
 	saved_state = json_tokener_state_object_field_start;
 	state = json_tokener_state_eatws;
       } else {
 	tok->err = json_tokener_error_parse_object_value_sep;
 	goto out;
       }
       break;

     }
     if (!ADVANCE_CHAR(str, tok))
       goto out;
   } /* while(POP_CHAR) */

  out:
   if (!c) { /* We hit an eof char (0) */
     if(state != json_tokener_state_finish &&
        saved_state != json_tokener_state_finish)
       tok->err = json_tokener_error_parse_eof;
   }

   if (tok->err == json_tokener_success)
   {
     json_object *ret = json_object_get(current);
 	int ii;

 	/* Partially reset, so we parse additional objects on subsequent calls. */
     for(ii = tok->depth; ii >= 0; ii--)
       json_tokener_reset_level(tok, ii);
     return ret;
   }

   MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
 	   json_tokener_errors[tok->err], tok->char_offset);
   return NULL;
 }
	/*
	* $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
	*
	* Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
	* Michael Clark <michael@metaparadigm.com>
	*
	* This library is free software; you can redistribute it and/or modify
	* it under the terms of the MIT license. See COPYING for details.
	*
	*
	* Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
	* The copyrights to the contents of this file are licensed under the MIT License
	* (http://www.opensource.org/licenses/mit-license.php)
	*/

	#include "config.h"

	#include <stdio.h>
	#include <stdlib.h>
	#include <stddef.h>
	#include <ctype.h>
	#include <string.h>
	#include <limits.h>

	#include "bits.h"
	#include "debug.h"
	#include "printbuf.h"
	#include "arraylist.h"
	#include "json_inttypes.h"
	#include "json_object.h"
	#include "json_tokener.h"
	#include "json_util.h"

	#if !HAVE_STRNCASECMP && defined(_MSC_VER)
	/* MSC has the version as _strnicmp */
	# define strncasecmp _strnicmp
	#elif !HAVE_STRNCASECMP
	# error You do not have strncasecmp on your system.
	#endif /* HAVE_STRNCASECMP */


	static const char* json_null_str = "null";
	static const char* json_true_str = "true";
	static const char* json_false_str = "false";

	// XXX after v0.10 this array will become static:
	const char* json_tokener_errors[] = {
	"success",
	"continue",
	"nesting too deep",
	"unexpected end of data",
	"unexpected character",
	"null expected",
	"boolean expected",
	"number expected",
	"array value separator ',' expected",
	"quoted object property name expected",
	"object property name separator ':' expected",
	"object value separator ',' expected",
	"invalid string sequence",
	"expected comment",
	};

	const char *json_tokener_error_desc(enum json_tokener_error jerr)
	{
	if (jerr < 0 \|\| jerr > sizeof(json_tokener_errors))
	return "Unknown error, invalid json_tokener_error value passed to json_tokener_error_desc()";
	return json_tokener_errors[jerr];
	}

	enum json_tokener_error json_tokener_get_error(json_tokener *tok)
	{
	return tok->err;
	}

	/* Stuff for decoding unicode sequences */
	#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
	#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
	#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
	static unsigned char utf8_replacement_char[3] = { 0xEF, 0xBF, 0xBD };


	struct json_tokener* json_tokener_new(void)
	{
	struct json_tokener *tok;

	tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
	if (!tok) return NULL;
	tok->pb = printbuf_new();
	json_tokener_reset(tok);
	return tok;
	}

	void json_tokener_free(struct json_tokener *tok)
	{
	json_tokener_reset(tok);
	if(tok) printbuf_free(tok->pb);
	free(tok);
	}

	static void json_tokener_reset_level(struct json_tokener *tok, int depth)
	{
	tok->stack[depth].state = json_tokener_state_eatws;
	tok->stack[depth].saved_state = json_tokener_state_start;
	json_object_put(tok->stack[depth].current);
	tok->stack[depth].current = NULL;
	free(tok->stack[depth].obj_field_name);
	tok->stack[depth].obj_field_name = NULL;
	}

	void json_tokener_reset(struct json_tokener *tok)
	{
	int i;
	if (!tok)
	return;

	for(i = tok->depth; i >= 0; i--)
	json_tokener_reset_level(tok, i);
	tok->depth = 0;
	tok->err = json_tokener_success;
	}

	struct json_object* json_tokener_parse(const char *str)
	{
	enum json_tokener_error jerr_ignored;
	struct json_object* obj;
	obj = json_tokener_parse_verbose(str, &jerr_ignored);
	return obj;
	}

	struct json_object* json_tokener_parse_verbose(const char str, enum json_tokener_error error)
	{
	struct json_tokener* tok;
	struct json_object* obj;

	tok = json_tokener_new();
	if (!tok)
	return NULL;
	obj = json_tokener_parse_ex(tok, str, -1);
	*error = tok->err;
	if(tok->err != json_tokener_success) {
	if (obj != NULL)
	json_object_put(obj);
	obj = NULL;
	}

	json_tokener_free(tok);
	return obj;
	}


	#if !HAVE_STRNDUP
	/* CAW: compliant version of strndup() */
	char* strndup(const char* str, size_t n)
	{
	if(str) {
	size_t len = strlen(str);
	size_t nn = json_min(len,n);
	char* s = (char)malloc(sizeof(char) (nn + 1));

	if(s) {
	memcpy(s, str, nn);
	s[nn] = '\0';
	}

	return s;
	}

	return NULL;
	}
	#endif


	#define state tok->stack[tok->depth].state
	#define saved_state tok->stack[tok->depth].saved_state
	#define current tok->stack[tok->depth].current
	#define obj_field_name tok->stack[tok->depth].obj_field_name

	/* Optimization:
	* json_tokener_parse_ex() consumed a lot of CPU in its main loop,
	* iterating character-by character. A large performance boost is
	* achieved by using tighter loops to locally handle units such as
	* comments and strings. Loops that handle an entire token within
	* their scope also gather entire strings and pass them to
	* printbuf_memappend() in a single call, rather than calling
	* printbuf_memappend() one char at a time.
	*
	* POP_CHAR() and ADVANCE_CHAR() macros are used for code that is
	* common to both the main loop and the tighter loops.
	*/

	/* POP_CHAR(dest, tok) macro:
	* Not really a pop()...peeks at the current char and stores it in dest.
	* Returns 1 on success, sets tok->err and returns 0 if no more chars.
	* Implicit inputs: str, len vars
	*/
	#define POP_CHAR(dest, tok) \
	(((tok)->char_offset == len) ? \
	(((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
	(((tok)->err = json_tokener_success), 0) \
	: \
	(((tok)->err = json_tokener_continue), 0) \
	) : \
	(((dest) = *str), 1) \
	)

	/* ADVANCE_CHAR() macro:
	* Incrementes str & tok->char_offset.
	* For convenience of existing conditionals, returns the old value of c (0 on eof)
	* Implicit inputs: c var
	*/
	#define ADVANCE_CHAR(str, tok) \
	( ++(str), ((tok)->char_offset)++, c)


	/* End optimization macro defs */


	struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
	const char *str, int len)
	{
	struct json_object *obj = NULL;
	char c = '\1';

	tok->char_offset = 0;
	tok->err = json_tokener_success;

	while (POP_CHAR(c, tok)) {

	redo_char:
	switch(state) {

	case json_tokener_state_eatws:
	/* Advance until we change state */
	while (isspace((int)c)) {
	if ((!ADVANCE_CHAR(str, tok)) \|\| (!POP_CHAR(c, tok)))
	goto out;
	}
	if(c == '/') {
	printbuf_reset(tok->pb);
	printbuf_memappend_fast(tok->pb, &c, 1);
	state = json_tokener_state_comment_start;
	} else {
	state = saved_state;
	goto redo_char;
	}
	break;

	case json_tokener_state_start:
	switch(c) {
	case '{':
	state = json_tokener_state_eatws;
	saved_state = json_tokener_state_object_field_start;
	current = json_object_new_object();
	break;
	case '[':
	state = json_tokener_state_eatws;
	saved_state = json_tokener_state_array;
	current = json_object_new_array();
	break;
	case 'N':
	case 'n':
	state = json_tokener_state_null;
	printbuf_reset(tok->pb);
	tok->st_pos = 0;
	goto redo_char;
	case '"':
	case '\'':
	state = json_tokener_state_string;
	printbuf_reset(tok->pb);
	tok->quote_char = c;
	break;
	case 'T':
	case 't':
	case 'F':
	case 'f':
	state = json_tokener_state_boolean;
	printbuf_reset(tok->pb);
	tok->st_pos = 0;
	goto redo_char;
	#if defined(__GNUC__)
	case '0' ... '9':
	#else
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	case '8':
	case '9':
	#endif
	case '-':
	state = json_tokener_state_number;
	printbuf_reset(tok->pb);
	tok->is_double = 0;
	goto redo_char;
	default:
	tok->err = json_tokener_error_parse_unexpected;
	goto out;
	}
	break;

	case json_tokener_state_finish:
	if(tok->depth == 0) goto out;
	obj = json_object_get(current);
	json_tokener_reset_level(tok, tok->depth);
	tok->depth--;
	goto redo_char;

	case json_tokener_state_null:
	printbuf_memappend_fast(tok->pb, &c, 1);
	if(strncasecmp(json_null_str, tok->pb->buf,
	json_min(tok->st_pos+1, strlen(json_null_str))) == 0) {
	if(tok->st_pos == strlen(json_null_str)) {
	current = NULL;
	saved_state = json_tokener_state_finish;
	state = json_tokener_state_eatws;
	goto redo_char;
	}
	} else {
	tok->err = json_tokener_error_parse_null;
	goto out;
	}
	tok->st_pos++;
	break;

	case json_tokener_state_comment_start:
	if(c == '*') {
	state = json_tokener_state_comment;
	} else if(c == '/') {
	state = json_tokener_state_comment_eol;
	} else {
	tok->err = json_tokener_error_parse_comment;
	goto out;
	}
	printbuf_memappend_fast(tok->pb, &c, 1);
	break;

	case json_tokener_state_comment:
	{
	/* Advance until we change state */
	const char *case_start = str;
	while(c != '*') {
	if (!ADVANCE_CHAR(str, tok) \|\| !POP_CHAR(c, tok)) {
	printbuf_memappend_fast(tok->pb, case_start, str-case_start);
	goto out;
	}
	}
	printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
	state = json_tokener_state_comment_end;
	}
	break;

	case json_tokener_state_comment_eol:
	{
	/* Advance until we change state */
	const char *case_start = str;
	while(c != '\n') {
	if (!ADVANCE_CHAR(str, tok) \|\| !POP_CHAR(c, tok)) {
	printbuf_memappend_fast(tok->pb, case_start, str-case_start);
	goto out;
	}
	}
	printbuf_memappend_fast(tok->pb, case_start, str-case_start);
	MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
	state = json_tokener_state_eatws;
	}
	break;

	case json_tokener_state_comment_end:
	printbuf_memappend_fast(tok->pb, &c, 1);
	if(c == '/') {
	MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
	state = json_tokener_state_eatws;
	} else {
	state = json_tokener_state_comment;
	}
	break;

	case json_tokener_state_string:
	{
	/* Advance until we change state */
	const char *case_start = str;
	while(1) {
	if(c == tok->quote_char) {
	printbuf_memappend_fast(tok->pb, case_start, str-case_start);
	current = json_object_new_string(tok->pb->buf);
	saved_state = json_tokener_state_finish;
	state = json_tokener_state_eatws;
	break;
	} else if(c == '\\') {
	printbuf_memappend_fast(tok->pb, case_start, str-case_start);
	saved_state = json_tokener_state_string;
	state = json_tokener_state_string_escape;
	break;
	}
	if (!ADVANCE_CHAR(str, tok) \|\| !POP_CHAR(c, tok)) {
	printbuf_memappend_fast(tok->pb, case_start, str-case_start);
	goto out;
	}
	}
	}
	break;

	case json_tokener_state_string_escape:
	switch(c) {
	case '"':
	case '\\':
	case '/':
	printbuf_memappend_fast(tok->pb, &c, 1);
	state = saved_state;
	break;
	case 'b':
	case 'n':
	case 'r':
	case 't':
	if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
	else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
	else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
	else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
	state = saved_state;
	break;
	case 'u':
	tok->ucs_char = 0;
	tok->st_pos = 0;
	state = json_tokener_state_escape_unicode;
	break;
	default:
	tok->err = json_tokener_error_parse_string;
	goto out;
	}
	break;

	case json_tokener_state_escape_unicode:
	{
	unsigned int got_hi_surrogate = 0;

	/* Handle a 4-byte sequence, or two sequences if a surrogate pair */
	while(1) {
	if(strchr(json_hex_chars, c)) {
	tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
	if(tok->st_pos == 4) {
	unsigned char unescaped_utf[4];

	if (got_hi_surrogate) {
	if (IS_LOW_SURROGATE(tok->ucs_char)) {
	/* Recalculate the ucs_char, then fall thru to process normally */
	tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char);
	} else {
	/* Hi surrogate was not followed by a low surrogate */
	/* Replace the hi and process the rest normally */
	printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
	}
	got_hi_surrogate = 0;
	}

	if (tok->ucs_char < 0x80) {
	unescaped_utf[0] = tok->ucs_char;
	printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1);
	} else if (tok->ucs_char < 0x800) {
	unescaped_utf[0] = 0xc0 \| (tok->ucs_char >> 6);
	unescaped_utf[1] = 0x80 \| (tok->ucs_char & 0x3f);
	printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2);
	} else if (IS_HIGH_SURROGATE(tok->ucs_char)) {
	/* Got a high surrogate. Remember it and look for the
	* the beginning of another sequence, which should be the
	* low surrogate.
	*/
	got_hi_surrogate = tok->ucs_char;
	/* Not at end, and the next two chars should be "\u" */
	if ((tok->char_offset+1 != len) &&
	(tok->char_offset+2 != len) &&
	(str[1] == '\\') &&
	(str[2] == 'u'))
	{
	ADVANCE_CHAR(str, tok);
	ADVANCE_CHAR(str, tok);

	/* Advance to the first char of the next sequence and
	* continue processing with the next sequence.
	*/
	if (!ADVANCE_CHAR(str, tok) \|\| !POP_CHAR(c, tok)) {
	printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
	goto out;
	}
	tok->ucs_char = 0;
	tok->st_pos = 0;
	continue; /* other json_tokener_state_escape_unicode */
	} else {
	/* Got a high surrogate without another sequence following
	* it. Put a replacement char in for the hi surrogate
	* and pretend we finished.
	*/
	printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
	}
	} else if (IS_LOW_SURROGATE(tok->ucs_char)) {
	/* Got a low surrogate not preceded by a high */
	printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
	} else if (tok->ucs_char < 0x10000) {
	unescaped_utf[0] = 0xe0 \| (tok->ucs_char >> 12);
	unescaped_utf[1] = 0x80 \| ((tok->ucs_char >> 6) & 0x3f);
	unescaped_utf[2] = 0x80 \| (tok->ucs_char & 0x3f);
	printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3);
	} else if (tok->ucs_char < 0x110000) {
	unescaped_utf[0] = 0xf0 \| ((tok->ucs_char >> 18) & 0x07);
	unescaped_utf[1] = 0x80 \| ((tok->ucs_char >> 12) & 0x3f);
	unescaped_utf[2] = 0x80 \| ((tok->ucs_char >> 6) & 0x3f);
	unescaped_utf[3] = 0x80 \| (tok->ucs_char & 0x3f);
	printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4);
	} else {
	/* Don't know what we got--insert the replacement char */
	printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
	}
	state = saved_state;
	break;
	}
	} else {
	tok->err = json_tokener_error_parse_string;
	goto out;
	}
	if (!ADVANCE_CHAR(str, tok) \|\| !POP_CHAR(c, tok)) {
	if (got_hi_surrogate) /* Clean up any pending chars */
	printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
	goto out;
	}
	}
	}
	break;

	case json_tokener_state_boolean:
	printbuf_memappend_fast(tok->pb, &c, 1);
	if(strncasecmp(json_true_str, tok->pb->buf,
	json_min(tok->st_pos+1, strlen(json_true_str))) == 0) {
	if(tok->st_pos == strlen(json_true_str)) {
	current = json_object_new_boolean(1);
	saved_state = json_tokener_state_finish;
	state = json_tokener_state_eatws;
	goto redo_char;
	}
	} else if(strncasecmp(json_false_str, tok->pb->buf,
	json_min(tok->st_pos+1, strlen(json_false_str))) == 0) {
	if(tok->st_pos == strlen(json_false_str)) {
	current = json_object_new_boolean(0);
	saved_state = json_tokener_state_finish;
	state = json_tokener_state_eatws;
	goto redo_char;
	}
	} else {
	tok->err = json_tokener_error_parse_boolean;
	goto out;
	}
	tok->st_pos++;
	break;

	case json_tokener_state_number:
	{
	/* Advance until we change state */
	const char *case_start = str;
	int case_len=0;
	while(c && strchr(json_number_chars, c)) {
	++case_len;
	if(c == '.' \|\| c == 'e') tok->is_double = 1;
	if (!ADVANCE_CHAR(str, tok) \|\| !POP_CHAR(c, tok)) {
	printbuf_memappend_fast(tok->pb, case_start, case_len);
	goto out;
	}
	}
	if (case_len>0)
	printbuf_memappend_fast(tok->pb, case_start, case_len);
	}
	{
	int64_t num64;
	double numd;
	if (!tok->is_double && json_parse_int64(tok->pb->buf, &num64) == 0) {
	current = json_object_new_int64(num64);
	} else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
	current = json_object_new_double(numd);
	} else {
	tok->err = json_tokener_error_parse_number;
	goto out;
	}
	saved_state = json_tokener_state_finish;
	state = json_tokener_state_eatws;
	goto redo_char;
	}
	break;

	case json_tokener_state_array:
	if(c == ']') {
	saved_state = json_tokener_state_finish;
	state = json_tokener_state_eatws;
	} else {
	if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
	tok->err = json_tokener_error_depth;
	goto out;
	}
	state = json_tokener_state_array_add;
	tok->depth++;
	json_tokener_reset_level(tok, tok->depth);
	goto redo_char;
	}
	break;

	case json_tokener_state_array_add:
	json_object_array_add(current, obj);
	saved_state = json_tokener_state_array_sep;
	state = json_tokener_state_eatws;
	goto redo_char;

	case json_tokener_state_array_sep:
	if(c == ']') {
	saved_state = json_tokener_state_finish;
	state = json_tokener_state_eatws;
	} else if(c == ',') {
	saved_state = json_tokener_state_array;
	state = json_tokener_state_eatws;
	} else {
	tok->err = json_tokener_error_parse_array;
	goto out;
	}
	break;

	case json_tokener_state_object_field_start:
	if(c == '}') {
	saved_state = json_tokener_state_finish;
	state = json_tokener_state_eatws;
	} else if (c == '"' \|\| c == '\'') {
	tok->quote_char = c;
	printbuf_reset(tok->pb);
	state = json_tokener_state_object_field;
	} else {
	tok->err = json_tokener_error_parse_object_key_name;
	goto out;
	}
	break;

	case json_tokener_state_object_field:
	{
	/* Advance until we change state */
	const char *case_start = str;
	while(1) {
	if(c == tok->quote_char) {
	printbuf_memappend_fast(tok->pb, case_start, str-case_start);
	obj_field_name = strdup(tok->pb->buf);
	saved_state = json_tokener_state_object_field_end;
	state = json_tokener_state_eatws;
	break;
	} else if(c == '\\') {
	printbuf_memappend_fast(tok->pb, case_start, str-case_start);
	saved_state = json_tokener_state_object_field;
	state = json_tokener_state_string_escape;
	break;
	}
	if (!ADVANCE_CHAR(str, tok) \|\| !POP_CHAR(c, tok)) {
	printbuf_memappend_fast(tok->pb, case_start, str-case_start);
	goto out;
	}
	}
	}
	break;

	case json_tokener_state_object_field_end:
	if(c == ':') {
	saved_state = json_tokener_state_object_value;
	state = json_tokener_state_eatws;
	} else {
	tok->err = json_tokener_error_parse_object_key_sep;
	goto out;
	}
	break;

	case json_tokener_state_object_value:
	if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
	tok->err = json_tokener_error_depth;
	goto out;
	}
	state = json_tokener_state_object_value_add;
	tok->depth++;
	json_tokener_reset_level(tok, tok->depth);
	goto redo_char;

	case json_tokener_state_object_value_add:
	json_object_object_add(current, obj_field_name, obj);
	free(obj_field_name);
	obj_field_name = NULL;
	saved_state = json_tokener_state_object_sep;
	state = json_tokener_state_eatws;
	goto redo_char;

	case json_tokener_state_object_sep:
	if(c == '}') {
	saved_state = json_tokener_state_finish;
	state = json_tokener_state_eatws;
	} else if(c == ',') {
	saved_state = json_tokener_state_object_field_start;
	state = json_tokener_state_eatws;
	} else {
	tok->err = json_tokener_error_parse_object_value_sep;
	goto out;
	}
	break;

	}
	if (!ADVANCE_CHAR(str, tok))
	goto out;
	} /* while(POP_CHAR) */

	out:
	if (!c) { /* We hit an eof char (0) */
	if(state != json_tokener_state_finish &&
	saved_state != json_tokener_state_finish)
	tok->err = json_tokener_error_parse_eof;
	}

	if (tok->err == json_tokener_success)
	{
	json_object *ret = json_object_get(current);
	int ii;

	/* Partially reset, so we parse additional objects on subsequent calls. */
	for(ii = tok->depth; ii >= 0; ii--)
	json_tokener_reset_level(tok, ii);
	return ret;
	}

	MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
	json_tokener_errors[tok->err], tok->char_offset);
	return NULL;
	}