[SkXMLParser] Initial text node support

Also disable entity processing.

R=bungeman@google.com,robertphillips@google.com
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2154853002

Review-Url: https://codereview.chromium.org/2154853002
diff --git a/src/xml/SkXMLParser.cpp b/src/xml/SkXMLParser.cpp
index 77903cb..23c4e67 100644
--- a/src/xml/SkXMLParser.cpp
+++ b/src/xml/SkXMLParser.cpp
@@ -8,6 +8,8 @@
 #include "expat.h"
 
 #include "SkStream.h"
+#include "SkString.h"
+#include "SkTypes.h"
 #include "SkXMLParser.h"
 
 static char const* const gErrorStrings[] = {
@@ -59,18 +61,68 @@
     sk_free
 };
 
-void XMLCALL start_element_handler(void *data, const char* tag, const char** attributes) {
-    SkXMLParser* parser = static_cast<SkXMLParser*>(data);
+struct ParsingContext {
+    ParsingContext(SkXMLParser* parser)
+        : fParser(parser)
+        , fXMLParser(XML_ParserCreate_MM(nullptr, &sk_XML_alloc, nullptr)) { }
 
-    parser->startElement(tag);
+    void flushText() {
+        if (!fBufferedText.isEmpty()) {
+            fParser->text(fBufferedText.c_str(), SkTo<int>(fBufferedText.size()));
+            fBufferedText.reset();
+        }
+    }
+
+    void appendText(const char* txt, size_t len) {
+        fBufferedText.append(txt, len);
+    }
+
+    SkXMLParser* fParser;
+    SkAutoTCallVProc<skstd::remove_pointer_t<XML_Parser>, XML_ParserFree> fXMLParser;
+
+private:
+    SkString fBufferedText;
+};
+
+#define HANDLER_CONTEXT(arg, name) ParsingContext* name = static_cast<ParsingContext*>(arg);
+
+void XMLCALL start_element_handler(void *data, const char* tag, const char** attributes) {
+    HANDLER_CONTEXT(data, ctx);
+    ctx->flushText();
+
+    ctx->fParser->startElement(tag);
 
     for (size_t i = 0; attributes[i]; i += 2) {
-        parser->addAttribute(attributes[i], attributes[i + 1]);
+        ctx->fParser->addAttribute(attributes[i], attributes[i + 1]);
     }
 }
 
 void XMLCALL end_element_handler(void* data, const char* tag) {
-    static_cast<SkXMLParser*>(data)->endElement(tag);
+    HANDLER_CONTEXT(data, ctx);
+    ctx->flushText();
+
+    ctx->fParser->endElement(tag);
+}
+
+void XMLCALL text_handler(void *data, const char* txt, int len) {
+    HANDLER_CONTEXT(data, ctx);
+
+    ctx->appendText(txt, SkTo<size_t>(len));
+}
+
+void XMLCALL entity_decl_handler(void *data,
+                                 const XML_Char *entityName,
+                                 int is_parameter_entity,
+                                 const XML_Char *value,
+                                 int value_length,
+                                 const XML_Char *base,
+                                 const XML_Char *systemId,
+                                 const XML_Char *publicId,
+                                 const XML_Char *notationName) {
+    HANDLER_CONTEXT(data, ctx);
+
+    SkDebugf("'%s' entity declaration found, stopping processing", entityName);
+    XML_StopParser(ctx->fXMLParser, XML_FALSE);
 }
 
 } // anonymous namespace
@@ -85,20 +137,23 @@
 
 bool SkXMLParser::parse(SkStream& docStream)
 {
-    SkAutoTCallVProc<skstd::remove_pointer_t<XML_Parser>, XML_ParserFree>
-    parser(XML_ParserCreate_MM(nullptr, &sk_XML_alloc, nullptr));
-    if (!parser) {
+    ParsingContext ctx(this);
+    if (!ctx.fXMLParser) {
         SkDebugf("could not create XML parser\n");
         return false;
     }
 
-    XML_SetUserData(parser, this);
-    XML_SetElementHandler(parser, start_element_handler, end_element_handler);
+    XML_SetUserData(ctx.fXMLParser, &ctx);
+    XML_SetElementHandler(ctx.fXMLParser, start_element_handler, end_element_handler);
+    XML_SetCharacterDataHandler(ctx.fXMLParser, text_handler);
+
+    // Disable entity processing, to inhibit internal entity expansion. See expat CVE-2013-0340.
+    XML_SetEntityDeclHandler(ctx.fXMLParser, entity_decl_handler);
 
     static const int kBufferSize = 512 SkDEBUGCODE( - 507);
     bool done = false;
     do {
-        void* buffer = XML_GetBuffer(parser, kBufferSize);
+        void* buffer = XML_GetBuffer(ctx.fXMLParser, kBufferSize);
         if (!buffer) {
             SkDebugf("could not buffer enough to continue\n");
             return false;
@@ -106,11 +161,11 @@
 
         size_t len = docStream.read(buffer, kBufferSize);
         done = docStream.isAtEnd();
-        XML_Status status = XML_ParseBuffer(parser, SkToS32(len), done);
+        XML_Status status = XML_ParseBuffer(ctx.fXMLParser, SkToS32(len), done);
         if (XML_STATUS_ERROR == status) {
-            XML_Error error = XML_GetErrorCode(parser);
-            int line = XML_GetCurrentLineNumber(parser);
-            int column = XML_GetCurrentColumnNumber(parser);
+            XML_Error error = XML_GetErrorCode(ctx.fXMLParser);
+            int line = XML_GetCurrentLineNumber(ctx.fXMLParser);
+            int column = XML_GetCurrentColumnNumber(ctx.fXMLParser);
             const XML_LChar* errorString = XML_ErrorString(error);
             SkDebugf("parse error @%d:%d: %d (%s).\n", line, column, error, errorString);
             return false;
diff --git a/tests/SkDOMTest.cpp b/tests/SkDOMTest.cpp
index a7fbda7..6ea4e78 100644
--- a/tests/SkDOMTest.cpp
+++ b/tests/SkDOMTest.cpp
@@ -12,14 +12,26 @@
 
 #include "SkDOM.h"
 
+static const SkDOM::Node* check_node(skiatest::Reporter* r, const SkDOM& dom,
+                                     const SkDOM::Node* node, const char* expectedName,
+                                     SkDOM::Type expectedType) {
+    REPORTER_ASSERT(r, node);
+    if (node) {
+        REPORTER_ASSERT(r, !strcmp(dom.getName(node), expectedName));
+        REPORTER_ASSERT(r, dom.getType(node) == expectedType);
+    }
+    return node;
+}
+
 DEF_TEST(SkDOM_test, r) {
     static const char gDoc[] =
         "<root a='1' b='2'>"
             "<elem1 c='3' />"
             "<elem2 d='4' />"
             "<elem3 e='5'>"
-                "<subelem1/>"
+                "<subelem1>Some text.</subelem1>"
                 "<subelem2 f='6' g='7'/>"
+                "<subelem3>Some more text.</subelem3>"
             "</elem3>"
             "<elem4 h='8'/>"
         "</root>"
@@ -42,6 +54,33 @@
 
     REPORTER_ASSERT(r, dom.getFirstChild(root, "elem1"));
     REPORTER_ASSERT(r, !dom.getFirstChild(root, "subelem1"));
+
+    {
+        const auto* elem1 = check_node(r, dom, dom.getFirstChild(root),
+                                       "elem1", SkDOM::kElement_Type);
+        const auto* elem2 = check_node(r, dom, dom.getNextSibling(elem1),
+                                       "elem2", SkDOM::kElement_Type);
+        const auto* elem3 = check_node(r, dom, dom.getNextSibling(elem2),
+                                       "elem3", SkDOM::kElement_Type);
+        {
+            const auto* subelem1 = check_node(r, dom, dom.getFirstChild(elem3),
+                                              "subelem1", SkDOM::kElement_Type);
+            {
+                check_node(r, dom, dom.getFirstChild(subelem1),
+                           "Some text.", SkDOM::kText_Type);
+            }
+            const auto* subelem2 = check_node(r, dom, dom.getNextSibling(subelem1),
+                                              "subelem2", SkDOM::kElement_Type);
+            const auto* subelem3 = check_node(r, dom, dom.getNextSibling(subelem2),
+                                              "subelem3", SkDOM::kElement_Type);
+            {
+                check_node(r, dom, dom.getFirstChild(subelem3),
+                           "Some more text.", SkDOM::kText_Type);
+            }
+        }
+        check_node(r, dom, dom.getNextSibling(elem3),
+                   "elem4", SkDOM::kElement_Type);
+    }
 }
 
 #endif // SK_XML