Enforce XML_PARSER_EOF state handling through the parser
That condition is one raised when the parser should positively stop
processing further even to report errors. Best is to test is after
most GROW call especially within loops
diff --git a/parser.c b/parser.c
index 7ec33b9..5bff84e 100644
--- a/parser.c
+++ b/parser.c
@@ -2157,6 +2157,8 @@
"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
}
ret = inputPush(ctxt, input);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
GROW;
return(ret);
}
@@ -2193,6 +2195,8 @@
if (count++ > 20) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(0);
}
if ((RAW >= '0') && (RAW <= '9'))
val = val * 16 + (CUR - '0');
@@ -2224,6 +2228,8 @@
if (count++ > 20) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(0);
}
if ((RAW >= '0') && (RAW <= '9'))
val = val * 10 + (CUR - '0');
@@ -2572,6 +2578,8 @@
* the amount of data in the buffer.
*/
GROW
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
if ((ctxt->input->end - ctxt->input->cur)>=4) {
start[0] = RAW;
start[1] = NXT(1);
@@ -3190,6 +3198,8 @@
* Handler for more complex cases
*/
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
c = CUR_CHAR(l);
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
/*
@@ -3241,6 +3251,8 @@
if (count++ > 100) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
}
len += l;
NEXTL(l);
@@ -3265,6 +3277,8 @@
if (count++ > 100) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
}
len += l;
NEXTL(l);
@@ -3373,6 +3387,8 @@
}
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
}
len += l;
NEXTL(l);
@@ -3463,6 +3479,8 @@
const xmlChar *ret;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
in = ctxt->input->cur;
while (*in != 0 && *in == *cmp) {
@@ -3602,6 +3620,8 @@
#endif
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
c = CUR_CHAR(l);
while (xmlIsNameChar(ctxt, c)) {
@@ -3630,6 +3650,10 @@
if (count++ > 100) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buffer);
+ return(NULL);
+ }
}
if (len + 10 > max) {
xmlChar *tmp;
@@ -3711,6 +3735,10 @@
ctxt->instate = XML_PARSER_ENTITY_VALUE;
input = ctxt->input;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return(NULL);
+ }
NEXT;
c = CUR_CHAR(l);
/*
@@ -3722,8 +3750,8 @@
* In practice it means we stop the loop only when back at parsing
* the initial entity and the quote is found
*/
- while ((IS_CHAR(c)) && ((c != stop) || /* checked */
- (ctxt->input != input))) {
+ while (((IS_CHAR(c)) && ((c != stop) || /* checked */
+ (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
if (len + 5 >= size) {
xmlChar *tmp;
@@ -3752,6 +3780,10 @@
}
}
buf[len] = 0;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return(NULL);
+ }
/*
* Raise problem w.r.t. '&' and '%' being used in non-entities
@@ -3799,12 +3831,12 @@
*/
ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
0, 0, 0);
- if (orig != NULL)
+ if (orig != NULL)
*orig = buf;
else
xmlFree(buf);
}
-
+
return(ret);
}
@@ -3855,8 +3887,9 @@
* OK loop until we reach one of the ending char or a size limit.
*/
c = CUR_CHAR(l);
- while ((NXT(0) != limit) && /* checked */
- (IS_CHAR(c)) && (c != '<')) {
+ while (((NXT(0) != limit) && /* checked */
+ (IS_CHAR(c)) && (c != '<')) &&
+ (ctxt->instate != XML_PARSER_EOF)) {
/*
* Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
* special option is given
@@ -4001,6 +4034,9 @@
GROW;
c = CUR_CHAR(l);
}
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto error;
+
if ((in_space) && (normalize)) {
while (buf[len - 1] == 0x20) len--;
}
@@ -4033,6 +4069,7 @@
mem_error:
xmlErrMemory(ctxt, NULL);
+error:
if (buf != NULL)
xmlFree(buf);
if (rep != NULL)
@@ -4145,6 +4182,10 @@
if (count > 50) {
GROW;
count = 0;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return(NULL);
+ }
}
COPY_BUF(l,buf,len,cur);
NEXTL(l);
@@ -4228,6 +4269,10 @@
if (count > 50) {
GROW;
count = 0;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return(NULL);
+ }
}
NEXT;
cur = CUR;
@@ -4434,6 +4479,8 @@
}
SHRINK;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
in = ctxt->input->cur;
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
nbchar = 0;
@@ -4502,6 +4549,8 @@
if (count > 50) {
GROW;
count = 0;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
}
NEXTL(l);
cur = CUR_CHAR(l);
@@ -4713,6 +4762,10 @@
if (count > 50) {
GROW;
count = 0;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return;
+ }
}
NEXTL(l);
cur = CUR_CHAR(l);
@@ -4871,6 +4924,10 @@
}
SHRINK;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return;
+ }
in = ctxt->input->cur;
if (*in == '-') {
if (in[1] == '-') {
@@ -5108,6 +5165,10 @@
count++;
if (count > 50) {
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return;
+ }
count = 0;
if ((len > XML_MAX_TEXT_LENGTH) &&
((ctxt->options & XML_PARSE_HUGE) == 0)) {
@@ -5864,7 +5925,7 @@
}
SKIP_BLANKS;
GROW;
- while (RAW != '>') {
+ while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *check = CUR_PTR;
int type;
int def;
@@ -6013,7 +6074,7 @@
ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
if (ret == NULL) return(NULL);
}
- while (RAW == '|') {
+ while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
NEXT;
if (elem == NULL) {
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
@@ -6157,7 +6218,7 @@
}
SKIP_BLANKS;
SHRINK;
- while (RAW != ')') {
+ while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
/*
* Each loop we parse one separator and one element.
*/
@@ -6436,6 +6497,8 @@
}
NEXT;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
SKIP_BLANKS;
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
tree = xmlParseElementMixedContentDecl(ctxt, inputid);
@@ -6603,8 +6666,8 @@
"Entering INCLUDE Conditional Section\n");
}
- while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
- (NXT(2) != '>'))) {
+ while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
+ (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *check = CUR_PTR;
unsigned int cons = ctxt->input->consumed;
@@ -6672,7 +6735,8 @@
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
ctxt->instate = XML_PARSER_IGNORE;
- while ((depth >= 0) && (RAW != 0)) {
+ while (((depth >= 0) && (RAW != 0)) &&
+ (ctxt->instate != XML_PARSER_EOF)) {
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
depth++;
SKIP(3);
@@ -6943,7 +7007,7 @@
break;
}
}
-
+
if (RAW != 0) {
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
}
@@ -7395,6 +7459,8 @@
xmlEntityPtr ent = NULL;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (RAW != '&')
return(NULL);
@@ -7925,6 +7991,10 @@
if (count++ > 100) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlBufferFree(buf);
+ return(-1);
+ }
}
NEXTL(l);
c = CUR_CHAR(l);
@@ -8158,7 +8228,7 @@
* PEReferences.
* Subsequence (markupdecl | PEReference | S)*
*/
- while (RAW != ']') {
+ while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *check = CUR_PTR;
unsigned int cons = ctxt->input->consumed;
@@ -8344,9 +8414,9 @@
SKIP_BLANKS;
GROW;
- while ((RAW != '>') &&
+ while (((RAW != '>') &&
((RAW != '/') || (NXT(1) != '>')) &&
- (IS_BYTE_CHAR(RAW))) {
+ (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *q = CUR_PTR;
unsigned int cons = ctxt->input->consumed;
@@ -8770,6 +8840,8 @@
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (oldbase != ctxt->input->base) {
long delta = ctxt->input->base - oldbase;
start = start + delta;
@@ -8790,6 +8862,8 @@
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (oldbase != ctxt->input->base) {
long delta = ctxt->input->base - oldbase;
start = start + delta;
@@ -8816,6 +8890,8 @@
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (oldbase != ctxt->input->base) {
long delta = ctxt->input->base - oldbase;
start = start + delta;
@@ -8845,6 +8921,8 @@
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (oldbase != ctxt->input->base) {
long delta = ctxt->input->base - oldbase;
start = start + delta;
@@ -9088,9 +9166,9 @@
GROW;
if (ctxt->input->base != base) goto base_changed;
- while ((RAW != '>') &&
+ while (((RAW != '>') &&
((RAW != '/') || (NXT(1) != '>')) &&
- (IS_BYTE_CHAR(RAW))) {
+ (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *q = CUR_PTR;
unsigned int cons = ctxt->input->consumed;
int len = -1, alloc = 0;
@@ -9261,6 +9339,8 @@
failed:
GROW
+ if (ctxt->instate == XML_PARSER_EOF)
+ break;
if (ctxt->input->base != base) goto base_changed;
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
break;
@@ -9498,6 +9578,8 @@
* We should definitely be at the ending "S? '>'" part
*/
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
SKIP_BLANKS;
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
@@ -9613,6 +9695,10 @@
count++;
if (count > 50) {
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return;
+ }
count = 0;
}
NEXTL(l);
@@ -10383,9 +10469,10 @@
void
xmlParseMisc(xmlParserCtxtPtr ctxt) {
- while (((RAW == '<') && (NXT(1) == '?')) ||
- (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
- IS_BLANK_CH(CUR)) {
+ while ((ctxt->instate != XML_PARSER_EOF) &&
+ (((RAW == '<') && (NXT(1) == '?')) ||
+ (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
+ IS_BLANK_CH(CUR))) {
if ((RAW == '<') && (NXT(1) == '?')) {
xmlParsePI(ctxt);
} else if (IS_BLANK_CH(CUR)) {
@@ -11913,6 +12000,8 @@
return(XML_ERR_INTERNAL_ERROR);
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
return(ctxt->errNo);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
if (ctxt->instate == XML_PARSER_START)
xmlDetectSAX2(ctxt);
if ((size > 0) && (chunk != NULL) && (!terminate) &&