- valid.c: removed a state explosion exhibited by RSS
- test/valid/rss.xml result/valid/rss.xml*: added the testcase
from bug #51872
Daniel
diff --git a/ChangeLog b/ChangeLog
index 9d557c0..42b3b08 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Fri Apr 20 15:46:04 CEST 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
+
+ * valid.c: removed a state explosion exhibited by RSS
+ * test/valid/rss.xml result/valid/rss.xml*: added the testcase
+ from bug #51872
+
Fri Apr 20 14:52:44 CEST 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
* valid.[ch] tree.h: worked *hard* to get non-determinist content
diff --git a/result/valid/rss.xml b/result/valid/rss.xml
new file mode 100644
index 0000000..ee5145f
--- /dev/null
+++ b/result/valid/rss.xml
@@ -0,0 +1,170 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE rss [
+<!--
+
+ Rich Site Summary (RSS) 0.91 official DTD, proposed.
+
+ RSS is an XML vocabulary for describing
+ metadata about websites, and enabling the display of
+ "channels" on the "My Netscape" website.
+
+ RSS Info can be found at http://my.netscape.com/publish/
+ XML Info can be found at http://www.w3.org/XML/
+
+ copyright Netscape Communications, 1999
+
+ Dan Libby - danda@netscape.com
+
+ Based on RSS DTD originally created by
+ Lars Marius Garshol - larsga@ifi.uio.no.
+ $Id: rss-0.91.dtd,v 1.1 1999/07/25 07:59:31 danda Exp $
+
+--><!ELEMENT rss (channel)>
+<!ATTLIST rss version CDATA #REQUIRED>
+<!-- must be "0.91"> --><!ELEMENT channel (title | description | link | language | item+ | rating? | image? | textinput? | copyright? | pubDate? | lastBuildDate? | docs? | managingEditor? | webMaster? | skipHours? | skipDays?)*>
+<!ELEMENT title (#PCDATA)>
+<!ELEMENT description (#PCDATA)>
+<!ELEMENT link (#PCDATA)>
+<!ELEMENT image (title | url | link | width? | height? | description?)*>
+<!ELEMENT url (#PCDATA)>
+<!ELEMENT item (title | link | description)*>
+<!ELEMENT textinput (title | description | name | link)*>
+<!ELEMENT name (#PCDATA)>
+<!ELEMENT rating (#PCDATA)>
+<!ELEMENT language (#PCDATA)>
+<!ELEMENT width (#PCDATA)>
+<!ELEMENT height (#PCDATA)>
+<!ELEMENT copyright (#PCDATA)>
+<!ELEMENT pubDate (#PCDATA)>
+<!ELEMENT lastBuildDate (#PCDATA)>
+<!ELEMENT docs (#PCDATA)>
+<!ELEMENT managingEditor (#PCDATA)>
+<!ELEMENT webMaster (#PCDATA)>
+<!ELEMENT hour (#PCDATA)>
+<!ELEMENT day (#PCDATA)>
+<!ELEMENT skipHours (hour)+>
+<!ELEMENT skipDays (day)+>
+<!--
+ Copied from HTML 3.2 DTD, with modifications (removed CDATA)
+ http://www.w3.org/TR/REC-html32.html#dtd
+ =============== BEGIN ===================
+--><!--
+ Character Entities for ISO Latin-1
+
+ (C) International Organization for Standardization 1986
+ Permission to copy in any form is granted for use with
+ conforming SGML systems and applications as defined in
+ ISO 8879, provided this notice is included in all copies.
+ This has been extended for use with HTML to cover the full
+ set of codes in the range 160-255 decimal.
+--><!-- Character entity set. Typical invocation:
+ <!ENTITY % ISOlat1 PUBLIC
+ "ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML">
+ %ISOlat1;
+--><!ENTITY nbsp " ">
+<!-- no-break space --><!ENTITY iexcl "¡">
+<!-- inverted exclamation mark --><!ENTITY cent "¢">
+<!-- cent sign --><!ENTITY pound "£">
+<!-- pound sterling sign --><!ENTITY curren "¤">
+<!-- general currency sign --><!ENTITY yen "¥">
+<!-- yen sign --><!ENTITY brvbar "¦">
+<!-- broken (vertical) bar --><!ENTITY sect "§">
+<!-- section sign --><!ENTITY uml "¨">
+<!-- umlaut (dieresis) --><!ENTITY copy "©">
+<!-- copyright sign --><!ENTITY ordf "ª">
+<!-- ordinal indicator, feminine --><!ENTITY laquo "«">
+<!-- angle quotation mark, left --><!ENTITY not "¬">
+<!-- not sign --><!ENTITY shy "­">
+<!-- soft hyphen --><!ENTITY reg "®">
+<!-- registered sign --><!ENTITY macr "¯">
+<!-- macron --><!ENTITY deg "°">
+<!-- degree sign --><!ENTITY plusmn "±">
+<!-- plus-or-minus sign --><!ENTITY sup2 "²">
+<!-- superscript two --><!ENTITY sup3 "³">
+<!-- superscript three --><!ENTITY acute "´">
+<!-- acute accent --><!ENTITY micro "µ">
+<!-- micro sign --><!ENTITY para "¶">
+<!-- pilcrow (paragraph sign) --><!ENTITY middot "·">
+<!-- middle dot --><!ENTITY cedil "¸">
+<!-- cedilla --><!ENTITY sup1 "¹">
+<!-- superscript one --><!ENTITY ordm "º">
+<!-- ordinal indicator, masculine --><!ENTITY raquo "»">
+<!-- angle quotation mark, right --><!ENTITY frac14 "¼">
+<!-- fraction one-quarter --><!ENTITY frac12 "½">
+<!-- fraction one-half --><!ENTITY frac34 "¾">
+<!-- fraction three-quarters --><!ENTITY iquest "¿">
+<!-- inverted question mark --><!ENTITY Agrave "À">
+<!-- capital A, grave accent --><!ENTITY Aacute "Á">
+<!-- capital A, acute accent --><!ENTITY Acirc "Â">
+<!-- capital A, circumflex accent --><!ENTITY Atilde "Ã">
+<!-- capital A, tilde --><!ENTITY Auml "Ä">
+<!-- capital A, dieresis or umlaut mark --><!ENTITY Aring "Å">
+<!-- capital A, ring --><!ENTITY AElig "Æ">
+<!-- capital AE diphthong (ligature) --><!ENTITY Ccedil "Ç">
+<!-- capital C, cedilla --><!ENTITY Egrave "È">
+<!-- capital E, grave accent --><!ENTITY Eacute "É">
+<!-- capital E, acute accent --><!ENTITY Ecirc "Ê">
+<!-- capital E, circumflex accent --><!ENTITY Euml "Ë">
+<!-- capital E, dieresis or umlaut mark --><!ENTITY Igrave "Ì">
+<!-- capital I, grave accent --><!ENTITY Iacute "Í">
+<!-- capital I, acute accent --><!ENTITY Icirc "Î">
+<!-- capital I, circumflex accent --><!ENTITY Iuml "Ï">
+<!-- capital I, dieresis or umlaut mark --><!ENTITY ETH "Ð">
+<!-- capital Eth, Icelandic --><!ENTITY Ntilde "Ñ">
+<!-- capital N, tilde --><!ENTITY Ograve "Ò">
+<!-- capital O, grave accent --><!ENTITY Oacute "Ó">
+<!-- capital O, acute accent --><!ENTITY Ocirc "Ô">
+<!-- capital O, circumflex accent --><!ENTITY Otilde "Õ">
+<!-- capital O, tilde --><!ENTITY Ouml "Ö">
+<!-- capital O, dieresis or umlaut mark --><!ENTITY times "×">
+<!-- multiply sign --><!ENTITY Oslash "Ø">
+<!-- capital O, slash --><!ENTITY Ugrave "Ù">
+<!-- capital U, grave accent --><!ENTITY Uacute "Ú">
+<!-- capital U, acute accent --><!ENTITY Ucirc "Û">
+<!-- capital U, circumflex accent --><!ENTITY Uuml "Ü">
+<!-- capital U, dieresis or umlaut mark --><!ENTITY Yacute "Ý">
+<!-- capital Y, acute accent --><!ENTITY THORN "Þ">
+<!-- capital THORN, Icelandic --><!ENTITY szlig "ß">
+<!-- small sharp s, German (sz ligature) --><!ENTITY agrave "à">
+<!-- small a, grave accent --><!ENTITY aacute "á">
+<!-- small a, acute accent --><!ENTITY acirc "â">
+<!-- small a, circumflex accent --><!ENTITY atilde "ã">
+<!-- small a, tilde --><!ENTITY auml "ä">
+<!-- small a, dieresis or umlaut mark --><!ENTITY aring "å">
+<!-- small a, ring --><!ENTITY aelig "æ">
+<!-- small ae diphthong (ligature) --><!ENTITY ccedil "ç">
+<!-- small c, cedilla --><!ENTITY egrave "è">
+<!-- small e, grave accent --><!ENTITY eacute "é">
+<!-- small e, acute accent --><!ENTITY ecirc "ê">
+<!-- small e, circumflex accent --><!ENTITY euml "ë">
+<!-- small e, dieresis or umlaut mark --><!ENTITY igrave "ì">
+<!-- small i, grave accent --><!ENTITY iacute "í">
+<!-- small i, acute accent --><!ENTITY icirc "î">
+<!-- small i, circumflex accent --><!ENTITY iuml "ï">
+<!-- small i, dieresis or umlaut mark --><!ENTITY eth "ð">
+<!-- small eth, Icelandic --><!ENTITY ntilde "ñ">
+<!-- small n, tilde --><!ENTITY ograve "ò">
+<!-- small o, grave accent --><!ENTITY oacute "ó">
+<!-- small o, acute accent --><!ENTITY ocirc "ô">
+<!-- small o, circumflex accent --><!ENTITY otilde "õ">
+<!-- small o, tilde --><!ENTITY ouml "ö">
+<!-- small o, dieresis or umlaut mark --><!ENTITY divide "÷">
+<!-- divide sign --><!ENTITY oslash "ø">
+<!-- small o, slash --><!ENTITY ugrave "ù">
+<!-- small u, grave accent --><!ENTITY uacute "ú">
+<!-- small u, acute accent --><!ENTITY ucirc "û">
+<!-- small u, circumflex accent --><!ENTITY uuml "ü">
+<!-- small u, dieresis or umlaut mark --><!ENTITY yacute "ý">
+<!-- small y, acute accent --><!ENTITY thorn "þ">
+<!-- small thorn, Icelandic --><!ENTITY yuml "ÿ">
+<!-- small y, dieresis or umlaut mark --><!--
+ Copied from HTML 3.2 DTD, with modifications (removed CDATA)
+ http://www.w3.org/TR/REC-html32.html#dtd
+ ================= END ===================
+-->]>
+<rss>
+<channel>
+<image/>
+<title>PP</title>
+</channel>
+</rss>
diff --git a/result/valid/rss.xml.err b/result/valid/rss.xml.err
new file mode 100644
index 0000000..307a8cc
--- /dev/null
+++ b/result/valid/rss.xml.err
@@ -0,0 +1,3 @@
+./test/valid/rss.xml:178: validity error: Element rss doesn't carry attribute version
+</rss>
+ ^
diff --git a/test/valid/rss.xml b/test/valid/rss.xml
new file mode 100644
index 0000000..429e0fe
--- /dev/null
+++ b/test/valid/rss.xml
@@ -0,0 +1,178 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE rss [
+<!--
+
+ Rich Site Summary (RSS) 0.91 official DTD, proposed.
+
+ RSS is an XML vocabulary for describing
+ metadata about websites, and enabling the display of
+ "channels" on the "My Netscape" website.
+
+ RSS Info can be found at http://my.netscape.com/publish/
+ XML Info can be found at http://www.w3.org/XML/
+
+ copyright Netscape Communications, 1999
+
+ Dan Libby - danda@netscape.com
+
+ Based on RSS DTD originally created by
+ Lars Marius Garshol - larsga@ifi.uio.no.
+ $Id: rss-0.91.dtd,v 1.1 1999/07/25 07:59:31 danda Exp $
+
+-->
+<!ELEMENT rss (channel)>
+<!ATTLIST rss version CDATA #REQUIRED>
+<!-- must be "0.91"> --><!ELEMENT channel (title | description | link
+| language | item+ | rating? | image? | textinput? | copyright? |
+pubDate? | lastBuildDate? | docs? | managingEditor? | webMaster? |
+skipHours? | skipDays?)*>
+<!ELEMENT title (#PCDATA)>
+<!ELEMENT description (#PCDATA)>
+<!ELEMENT link (#PCDATA)>
+<!ELEMENT image (title | url | link | width? | height? | description?)*>
+<!ELEMENT url (#PCDATA)>
+<!ELEMENT item (title | link | description)*>
+<!ELEMENT textinput (title | description | name | link)*>
+<!ELEMENT name (#PCDATA)>
+<!ELEMENT rating (#PCDATA)>
+<!ELEMENT language (#PCDATA)>
+<!ELEMENT width (#PCDATA)>
+<!ELEMENT height (#PCDATA)>
+<!ELEMENT copyright (#PCDATA)>
+<!ELEMENT pubDate (#PCDATA)>
+<!ELEMENT lastBuildDate (#PCDATA)>
+<!ELEMENT docs (#PCDATA)>
+<!ELEMENT managingEditor (#PCDATA)>
+<!ELEMENT webMaster (#PCDATA)>
+<!ELEMENT hour (#PCDATA)>
+<!ELEMENT day (#PCDATA)>
+<!ELEMENT skipHours (hour)+>
+<!ELEMENT skipDays (day)+>
+<!--
+ Copied from HTML 3.2 DTD, with modifications (removed CDATA)
+ http://www.w3.org/TR/REC-html32.html#dtd
+ =============== BEGIN ===================
+-->
+<!--
+ Character Entities for ISO Latin-1
+
+ (C) International Organization for Standardization 1986
+ Permission to copy in any form is granted for use with
+ conforming SGML systems and applications as defined in
+ ISO 8879, provided this notice is included in all copies.
+ This has been extended for use with HTML to cover the full
+ set of codes in the range 160-255 decimal.
+-->
+<!-- Character entity set. Typical invocation:
+ <!ENTITY % ISOlat1 PUBLIC
+ "ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML">
+ %ISOlat1;
+-->
+<!ENTITY nbsp " "> <!-- no-break space -->
+<!ENTITY iexcl "¡">
+<!-- inverted exclamation mark --><!ENTITY cent "¢">
+<!-- cent sign --><!ENTITY pound "£">
+<!-- pound sterling sign --><!ENTITY curren "¤">
+<!-- general currency sign --><!ENTITY yen "¥">
+<!-- yen sign --><!ENTITY brvbar "¦">
+<!-- broken (vertical) bar --><!ENTITY sect "§">
+<!-- section sign --><!ENTITY uml "¨">
+<!-- umlaut (dieresis) --><!ENTITY copy "©">
+<!-- copyright sign --><!ENTITY ordf "ª">
+<!-- ordinal indicator, feminine --><!ENTITY laquo "«">
+<!-- angle quotation mark, left --><!ENTITY not "¬">
+<!-- not sign --><!ENTITY shy "­">
+<!-- soft hyphen --><!ENTITY reg "®">
+<!-- registered sign --><!ENTITY macr "¯">
+<!-- macron --><!ENTITY deg "°">
+<!-- degree sign --><!ENTITY plusmn "±">
+<!-- plus-or-minus sign --><!ENTITY sup2 "²">
+<!-- superscript two --><!ENTITY sup3 "³">
+<!-- superscript three --><!ENTITY acute "´">
+<!-- acute accent --><!ENTITY micro "µ">
+<!-- micro sign --><!ENTITY para "¶">
+<!-- pilcrow (paragraph sign) --><!ENTITY middot "·">
+<!-- middle dot --><!ENTITY cedil "¸">
+<!-- cedilla --><!ENTITY sup1 "¹">
+<!-- superscript one --><!ENTITY ordm "º">
+<!-- ordinal indicator, masculine --><!ENTITY raquo "»">
+<!-- angle quotation mark, right --><!ENTITY frac14 "¼">
+<!-- fraction one-quarter --><!ENTITY frac12 "½">
+<!-- fraction one-half --><!ENTITY frac34 "¾">
+<!-- fraction three-quarters --><!ENTITY iquest "¿">
+<!-- inverted question mark --><!ENTITY Agrave "À">
+<!-- capital A, grave accent --><!ENTITY Aacute "Á">
+<!-- capital A, acute accent --><!ENTITY Acirc "Â">
+<!-- capital A, circumflex accent --><!ENTITY Atilde "Ã">
+<!-- capital A, tilde --><!ENTITY Auml "Ä">
+<!-- capital A, dieresis or umlaut mark --><!ENTITY Aring "Å">
+<!-- capital A, ring --><!ENTITY AElig "Æ">
+<!-- capital AE diphthong (ligature) --><!ENTITY Ccedil "Ç">
+<!-- capital C, cedilla --><!ENTITY Egrave "È">
+<!-- capital E, grave accent --><!ENTITY Eacute "É">
+<!-- capital E, acute accent --><!ENTITY Ecirc "Ê">
+<!-- capital E, circumflex accent --><!ENTITY Euml "Ë">
+<!-- capital E, dieresis or umlaut mark --><!ENTITY Igrave "Ì">
+<!-- capital I, grave accent --><!ENTITY Iacute "Í">
+<!-- capital I, acute accent --><!ENTITY Icirc "Î">
+<!-- capital I, circumflex accent --><!ENTITY Iuml "Ï">
+<!-- capital I, dieresis or umlaut mark --><!ENTITY ETH "Ð">
+<!-- capital Eth, Icelandic --><!ENTITY Ntilde "Ñ">
+<!-- capital N, tilde --><!ENTITY Ograve "Ò">
+<!-- capital O, grave accent --><!ENTITY Oacute "Ó">
+<!-- capital O, acute accent --><!ENTITY Ocirc "Ô">
+<!-- capital O, circumflex accent --><!ENTITY Otilde "Õ">
+<!-- capital O, tilde --><!ENTITY Ouml "Ö">
+<!-- capital O, dieresis or umlaut mark --><!ENTITY times "×">
+<!-- multiply sign --><!ENTITY Oslash "Ø">
+<!-- capital O, slash --><!ENTITY Ugrave "Ù">
+<!-- capital U, grave accent --><!ENTITY Uacute "Ú">
+<!-- capital U, acute accent --><!ENTITY Ucirc "Û">
+<!-- capital U, circumflex accent --><!ENTITY Uuml "Ü">
+<!-- capital U, dieresis or umlaut mark --><!ENTITY Yacute "Ý">
+<!-- capital Y, acute accent --><!ENTITY THORN "Þ">
+<!-- capital THORN, Icelandic --><!ENTITY szlig "ß">
+<!-- small sharp s, German (sz ligature) --><!ENTITY agrave "à">
+<!-- small a, grave accent --><!ENTITY aacute "á">
+<!-- small a, acute accent --><!ENTITY acirc "â">
+<!-- small a, circumflex accent --><!ENTITY atilde "ã">
+<!-- small a, tilde --><!ENTITY auml "ä">
+<!-- small a, dieresis or umlaut mark --><!ENTITY aring "å">
+<!-- small a, ring --><!ENTITY aelig "æ">
+<!-- small ae diphthong (ligature) --><!ENTITY ccedil "ç">
+<!-- small c, cedilla --><!ENTITY egrave "è">
+<!-- small e, grave accent --><!ENTITY eacute "é">
+<!-- small e, acute accent --><!ENTITY ecirc "ê">
+<!-- small e, circumflex accent --><!ENTITY euml "ë">
+<!-- small e, dieresis or umlaut mark --><!ENTITY igrave "ì">
+<!-- small i, grave accent --><!ENTITY iacute "í">
+<!-- small i, acute accent --><!ENTITY icirc "î">
+<!-- small i, circumflex accent --><!ENTITY iuml "ï">
+<!-- small i, dieresis or umlaut mark --><!ENTITY eth "ð">
+<!-- small eth, Icelandic --><!ENTITY ntilde "ñ">
+<!-- small n, tilde --><!ENTITY ograve "ò">
+<!-- small o, grave accent --><!ENTITY oacute "ó">
+<!-- small o, acute accent --><!ENTITY ocirc "ô">
+<!-- small o, circumflex accent --><!ENTITY otilde "õ">
+<!-- small o, tilde --><!ENTITY ouml "ö">
+<!-- small o, dieresis or umlaut mark --><!ENTITY divide "÷">
+<!-- divide sign --><!ENTITY oslash "ø">
+<!-- small o, slash --><!ENTITY ugrave "ù">
+<!-- small u, grave accent --><!ENTITY uacute "ú">
+<!-- small u, acute accent --><!ENTITY ucirc "û">
+<!-- small u, circumflex accent --><!ENTITY uuml "ü">
+<!-- small u, dieresis or umlaut mark --><!ENTITY yacute "ý">
+<!-- small y, acute accent --><!ENTITY thorn "þ">
+<!-- small thorn, Icelandic --><!ENTITY yuml "ÿ">
+<!-- small y, dieresis or umlaut mark --><!--
+ Copied from HTML 3.2 DTD, with modifications (removed CDATA)
+ http://www.w3.org/TR/REC-html32.html#dtd
+ ================= END ===================
+-->
+]>
+<rss>
+<channel>
+<image/>
+<title>PP</title>
+</channel>
+</rss>
diff --git a/valid.c b/valid.c
index c36930f..b8077b4 100644
--- a/valid.c
+++ b/valid.c
@@ -3655,6 +3655,18 @@
break;
case XML_ELEMENT_CONTENT_OR:
/*
+ * Small optimization.
+ */
+ if (CONT->c1->type == XML_ELEMENT_CONTENT_ELEMENT) {
+ if ((NODE == NULL) ||
+ (!xmlStrEqual(NODE->name, CONT->c1->name))) {
+ DEPTH++;
+ CONT = CONT->c2;
+ goto cont;
+ }
+ }
+
+ /*
* save the second branch 'or' branch
*/
DEBUG_VALID_MSG("saving 'or' branch");