The last of the Kxml correctness fixes for Expat compatibility.

With this change we should be able to drop the Expat pull parser
and use Kxml exclusively. I'm deferring that change until after
the current release.

Change-Id: I7c6d6dfe6c1e9ae9417c48603068ddd4ade78b76
http://b/3090550
diff --git a/expectations/knownfailures.txt b/expectations/knownfailures.txt
index 5b806ab..54cf6ad 100644
--- a/expectations/knownfailures.txt
+++ b/expectations/knownfailures.txt
@@ -3,6 +3,54 @@
  */
 [
 {
+  description: "KxmlParser doesn't expose DTD text",
+  name: "libcore.xml.KxmlPullParserDtdTest#testDoctypeWithNextToken",
+  bug: 3241492
+},
+{
+  description: "Expat relaxed is different from Kxml relaxed",
+  names: [
+    "libcore.xml.ExpatPullParserTest#testAttributeNoValueWithRelaxed",
+    "libcore.xml.ExpatPullParserTest#testAttributeUnquotedValueWithRelaxed",
+    "libcore.xml.ExpatPullParserTest#testMissingEntitiesWithRelaxed",
+    "libcore.xml.ExpatPullParserTest#testUnterminatedEntityWithRelaxed"
+  ],
+  bug: 3090550
+},
+{
+  description: "ExpatPullParser doesn't support nextToken() or line numbers",
+  names: [
+    "libcore.xml.ExpatPullParserDtdTest#testDoctypeWithNextToken",
+    "libcore.xml.ExpatPullParserTest#testCdataUsingNextToken",
+    "libcore.xml.ExpatPullParserTest#testCommentUsingNextToken",
+    "libcore.xml.ExpatPullParserTest#testCustomEntitiesAreNotEvaluated",
+    "libcore.xml.ExpatPullParserTest#testCustomEntitiesUsingNext",
+    "libcore.xml.ExpatPullParserTest#testCustomEntitiesUsingNextToken",
+    "libcore.xml.ExpatPullParserTest#testEmptyCdataUsingNextToken",
+    "libcore.xml.ExpatPullParserTest#testEmptyComment",
+    "libcore.xml.ExpatPullParserTest#testEmptyEntityReferenceUsingNext",
+    "libcore.xml.ExpatPullParserTest#testEmptyEntityReferenceUsingNextToken",
+    "libcore.xml.ExpatPullParserTest#testEntityInAttributeUsingNextToken",
+    "libcore.xml.ExpatPullParserTest#testLinesAndColumns",
+    "libcore.xml.ExpatPullParserTest#testMissingEntitiesInAttributesUsingNextToken",
+    "libcore.xml.ExpatPullParserTest#testMissingEntitiesInAttributesUsingNextTokenWithRelaxed",
+    "libcore.xml.ExpatPullParserTest#testMissingEntitiesInAttributesUsingNextWithRelaxed",
+    "libcore.xml.ExpatPullParserTest#testMissingEntitiesUsingNextToken",
+    "libcore.xml.ExpatPullParserTest#testMissingEntitiesUsingNextTokenWithRelaxed",
+    "libcore.xml.ExpatPullParserTest#testProcessingInstructionUsingNextToken",
+    "libcore.xml.ExpatPullParserTest#testRegularNumericEntities",
+    "libcore.xml.ExpatPullParserTest#testWhitespaceUsingNextToken",
+    "libcore.xml.ExpatPullParserTest#testXmlDeclaration",
+    "libcore.xml.ExpatPullParserTest#testXmlDeclarationExtraAttributes"
+  ],
+  bug: 3090550
+},
+{
+  description: "ExpatPullParser doesn't handle surrogates properly",
+  name: "libcore.xml.ExpatPullParserTest#testCharacterReferenceOfLastUtf16Surrogate",
+  bug: 3090550
+},
+{
   description: "RandomAccessFile missing finalizer",
   name: "libcore.java.io.RandomAccessFileTest#testRandomAccessFileHasCleanupFinalizer",
   bug: 3015023
diff --git a/luni/src/test/java/libcore/xml/PullParserDtdTest.java b/luni/src/test/java/libcore/xml/PullParserDtdTest.java
index 38b7d8a..0101d7b 100644
--- a/luni/src/test/java/libcore/xml/PullParserDtdTest.java
+++ b/luni/src/test/java/libcore/xml/PullParserDtdTest.java
@@ -182,6 +182,20 @@
         assertEquals(XmlPullParser.END_DOCUMENT, parser.next());
     }
 
+    /**
+     * Android's ExpatPullParser replaces missing entities with the empty string
+     * when an external DTD is declared.
+     */
+    public void testExternalDtdAndMissingEntity() throws Exception {
+        String xml = "<!DOCTYPE foo SYSTEM \"http://127.0.0.1:1/no-such-file.dtd\">"
+                + "<foo>&a;</foo>";
+        XmlPullParser parser = newPullParser(xml);
+        assertEquals(XmlPullParser.START_TAG, parser.next());
+        assertEquals(XmlPullParser.END_TAG, parser.next());
+        assertEquals(XmlPullParser.END_DOCUMENT, parser.next());
+    }
+
+
     public void testExternalIdIsCaseSensitive() throws Exception {
         // The spec requires 'SYSTEM' in upper case
         String xml = "<!DOCTYPE foo ["
@@ -448,6 +462,10 @@
         }
     }
 
+    /**
+     * In honeycomb, KxmlParser's DTD handling was improved but no longer
+     * supports returning the full DTD text. http://b/3241492
+     */
     public void testDoctypeWithNextToken() throws Exception {
         String xml = "<!DOCTYPE foo [<!ENTITY bb \"bar baz\">]><foo>a&bb;c</foo>";
         XmlPullParser parser = newPullParser(xml);
@@ -459,7 +477,7 @@
         assertEquals("a", parser.getText());
         assertEquals(XmlPullParser.ENTITY_REF, parser.nextToken());
         assertEquals("bb", parser.getName());
-        assertEquals("bar baz", parser.getText()); // TODO: this fails on gingerbread
+        assertEquals("bar baz", parser.getText());
         assertEquals(XmlPullParser.TEXT, parser.nextToken());
         assertEquals("c", parser.getText());
         assertEquals(XmlPullParser.END_TAG, parser.next());
diff --git a/luni/src/test/java/libcore/xml/PullParserTest.java b/luni/src/test/java/libcore/xml/PullParserTest.java
index 21342b2..2862de0 100644
--- a/luni/src/test/java/libcore/xml/PullParserTest.java
+++ b/luni/src/test/java/libcore/xml/PullParserTest.java
@@ -102,16 +102,14 @@
         XmlPullParser parser = newPullParser();
         parser.setInput(new StringReader(
                 "<foo>&#2147483647; &#-2147483648;</foo>"));
-        assertEquals(XmlPullParser.START_TAG, parser.next());
-        assertNextFails(parser);
+        assertParseFailure(parser);
     }
 
     public void testNumericEntitiesLargerThanInt() throws Exception {
         XmlPullParser parser = newPullParser();
         parser.setInput(new StringReader(
                 "<foo>&#2147483648;</foo>"));
-        assertEquals(XmlPullParser.START_TAG, parser.next());
-        assertNextFails(parser);
+        assertParseFailure(parser);
     }
 
     public void testCharacterReferenceOfHexUtf16Surrogates() throws Exception {
@@ -144,8 +142,7 @@
     public void testOmittedNumericEntities() throws Exception {
         XmlPullParser parser = newPullParser();
         parser.setInput(new StringReader("<foo>&#;</foo>"));
-        assertEquals(XmlPullParser.START_TAG, parser.next());
-        assertNextFails(parser);
+        assertParseFailure(parser);
     }
 
     /**
@@ -243,8 +240,7 @@
     public void testMissingEntities() throws Exception {
         XmlPullParser parser = newPullParser();
         parser.setInput(new StringReader("<foo>&aaa;</foo>"));
-        assertEquals(XmlPullParser.START_TAG, parser.next());
-        assertNextFails(parser);
+        assertParseFailure(parser);
     }
 
     public void testMissingEntitiesWithRelaxed() throws Exception {
@@ -291,7 +287,7 @@
     public void testMissingEntitiesInAttributesUsingNext() throws Exception {
         XmlPullParser parser = newPullParser();
         parser.setInput(new StringReader("<foo b='&aaa;'></foo>"));
-        assertNextFails(parser);
+        assertParseFailure(parser);
     }
 
     public void testMissingEntitiesInAttributesUsingNextWithRelaxed() throws Exception {
@@ -345,14 +341,13 @@
     public void testLessThanInText() throws Exception{
         XmlPullParser parser = newPullParser();
         parser.setInput(new StringReader("<foo><</foo>"));
-        assertEquals(XmlPullParser.START_TAG, parser.next());
-        assertNextFails(parser);
+        assertParseFailure(parser);
     }
 
     public void testLessThanInAttribute() throws Exception{
         XmlPullParser parser = newPullParser();
         parser.setInput(new StringReader("<foo a='<'></foo>"));
-        assertNextFails(parser);
+        assertParseFailure(parser);
     }
 
     public void testQuotesInAttribute() throws Exception{
@@ -381,36 +376,31 @@
     public void testCdataDelimiterInText() throws Exception{
         XmlPullParser parser = newPullParser();
         parser.setInput(new StringReader("<foo>]]></foo>"));
-        assertEquals(XmlPullParser.START_TAG, parser.next());
-        assertNextFails(parser);
+        assertParseFailure(parser);
     }
 
     public void testUnexpectedEof() throws Exception {
         XmlPullParser parser = newPullParser();
         parser.setInput(new StringReader("<foo><![C"));
-        assertEquals(XmlPullParser.START_TAG, parser.next());
-        assertNextFails(parser);
+        assertParseFailure(parser);
     }
 
     public void testUnexpectedSequence() throws Exception {
         XmlPullParser parser = newPullParser();
         parser.setInput(new StringReader("<foo><![Cdata[bar]]></foo>"));
-        assertEquals(XmlPullParser.START_TAG, parser.next());
-        assertNextFails(parser);
+        assertParseFailure(parser);
     }
 
     public void testThreeDashCommentDelimiter() throws Exception {
         XmlPullParser parser = newPullParser();
         parser.setInput(new StringReader("<foo><!--a---></foo>"));
-        assertEquals(XmlPullParser.START_TAG, parser.next());
-        assertNextFails(parser);
+        assertParseFailure(parser);
     }
 
     public void testTwoDashesInComment() throws Exception {
         XmlPullParser parser = newPullParser();
         parser.setInput(new StringReader("<foo><!-- -- --></foo>"));
-        assertEquals(XmlPullParser.START_TAG, parser.next());
-        assertNextFails(parser);
+        assertParseFailure(parser);
     }
 
     public void testEmptyComment() throws Exception {
@@ -719,9 +709,10 @@
         assertEquals("ns:default", parser.getNamespaceUri(0));
     }
 
-    private void assertNextFails(XmlPullParser parser) throws IOException {
+    private void assertParseFailure(XmlPullParser parser) throws IOException {
         try {
-            parser.next();
+            while (parser.next() != XmlPullParser.END_DOCUMENT) {
+            }
             fail();
         } catch (XmlPullParserException expected) {
         }
diff --git a/xml/src/main/java/org/kxml2/io/KXmlParser.java b/xml/src/main/java/org/kxml2/io/KXmlParser.java
index ba905a9..5cd2810 100644
--- a/xml/src/main/java/org/kxml2/io/KXmlParser.java
+++ b/xml/src/main/java/org/kxml2/io/KXmlParser.java
@@ -879,17 +879,17 @@
 
         skip();
         int quote = peekCharacter();
+        String entityValue;
         if (quote == '"' || quote == '\'') {
             position++;
-            String value = readValue((char) quote, true, false, ValueContext.ENTITY_DECLARATION);
+            entityValue = readValue((char) quote, true, false, ValueContext.ENTITY_DECLARATION);
             position++;
-            if (generalEntity && processDocDecl) {
-                if (documentEntities == null) {
-                    documentEntities = new HashMap<String, char[]>();
-                }
-                documentEntities.put(name, value.toCharArray());
-            }
         } else if (readExternalId(true, false)) {
+            /*
+             * Map external entities to the empty string. This is dishonest,
+             * but it's consistent with Android's Expat pull parser.
+             */
+            entityValue = "";
             skip();
             if (peekCharacter() == NDATA[0]) {
                 read(NDATA);
@@ -900,6 +900,13 @@
             throw new XmlPullParserException("Expected entity value or external ID", this, null);
         }
 
+        if (generalEntity && processDocDecl) {
+            if (documentEntities == null) {
+                documentEntities = new HashMap<String, char[]>();
+            }
+            documentEntities.put(name, entityValue.toCharArray());
+        }
+
         skip();
         read('>');
     }
@@ -1220,6 +1227,17 @@
             return;
         }
 
+        /*
+         * The parser skipped an external DTD, and now we've encountered an
+         * unknown entity that could have been declared there. Map it to the
+         * empty string. This is dishonest, but it's consistent with Android's
+         * old ExpatPullParser.
+         */
+        if (systemId != null) {
+            out.delete(start, out.length());
+            return;
+        }
+
         // keep the unresolved entity "&code;" in the text for relaxed clients
         unresolved = true;
         if (throwOnResolveFailure) {
@@ -1257,8 +1275,9 @@
          * If we're lucky (which we usually are), we'll return a single slice of
          * the buffer. This fast path avoids allocating a string builder.
          *
-         * There are 5 unlucky characters we could encounter:
+         * There are 6 unlucky characters we could encounter:
          *  - "&":  entities must be resolved.
+         *  - "%":  parameter entities are unsupported in entity values.
          *  - "<":  this isn't permitted in attributes unless relaxed.
          *  - "]":  this requires a lookahead to defend against the forbidden
          *          CDATA section delimiter "]]>".
@@ -1312,7 +1331,8 @@
                     && (c != '\n' || valueContext != ValueContext.ATTRIBUTE)
                     && c != '&'
                     && c != '<'
-                    && (c != ']' || valueContext != ValueContext.TEXT)) {
+                    && (c != ']' || valueContext != ValueContext.TEXT)
+                    && (c != '%' || valueContext != ValueContext.ENTITY_DECLARATION)) {
                 isWhitespace &= (c <= ' ');
                 position++;
                 continue;
@@ -1355,6 +1375,10 @@
                 }
                 isWhitespace = false;
 
+            } else if (c == '%') {
+                throw new XmlPullParserException("This parser doesn't support parameter entities",
+                        this, null);
+
             } else {
                 throw new AssertionError();
             }