Use CFX_XML instead of CXML in CPDF_Metadata

This CL converts CPDF_Metadata to use the CFX_XML classes instead of
CXML classes. This also moves the CFX_XML classes from being XFA only to
being used everywhere.

Change-Id: Idb784f8aaa0bc843d8a3415ba5262ccf4949308a
Reviewed-on: https://pdfium-review.googlesource.com/30650
Reviewed-by: Henrique Nakashima <hnakashima@chromium.org>
Commit-Queue: dsinclair <dsinclair@chromium.org>
diff --git a/BUILD.gn b/BUILD.gn
index 45857e5..f28a879 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -821,6 +821,8 @@
     "core/fxcrt/cfx_binarybuf.h",
     "core/fxcrt/cfx_bitstream.cpp",
     "core/fxcrt/cfx_bitstream.h",
+    "core/fxcrt/cfx_blockbuffer.cpp",
+    "core/fxcrt/cfx_blockbuffer.h",
     "core/fxcrt/cfx_datetime.cpp",
     "core/fxcrt/cfx_datetime.h",
     "core/fxcrt/cfx_fileaccess_posix.cpp",
@@ -832,6 +834,8 @@
     "core/fxcrt/cfx_memorystream.h",
     "core/fxcrt/cfx_seekablemultistream.cpp",
     "core/fxcrt/cfx_seekablemultistream.h",
+    "core/fxcrt/cfx_seekablestreamproxy.cpp",
+    "core/fxcrt/cfx_seekablestreamproxy.h",
     "core/fxcrt/cfx_utf8decoder.cpp",
     "core/fxcrt/cfx_utf8decoder.h",
     "core/fxcrt/cfx_widetextbuf.cpp",
@@ -871,6 +875,20 @@
     "core/fxcrt/weak_ptr.h",
     "core/fxcrt/widestring.cpp",
     "core/fxcrt/widestring.h",
+    "core/fxcrt/xml/cfx_xmlattributenode.cpp",
+    "core/fxcrt/xml/cfx_xmlattributenode.h",
+    "core/fxcrt/xml/cfx_xmlchardata.cpp",
+    "core/fxcrt/xml/cfx_xmlchardata.h",
+    "core/fxcrt/xml/cfx_xmlelement.cpp",
+    "core/fxcrt/xml/cfx_xmlelement.h",
+    "core/fxcrt/xml/cfx_xmlinstruction.cpp",
+    "core/fxcrt/xml/cfx_xmlinstruction.h",
+    "core/fxcrt/xml/cfx_xmlnode.cpp",
+    "core/fxcrt/xml/cfx_xmlnode.h",
+    "core/fxcrt/xml/cfx_xmlparser.cpp",
+    "core/fxcrt/xml/cfx_xmlparser.h",
+    "core/fxcrt/xml/cfx_xmltext.cpp",
+    "core/fxcrt/xml/cfx_xmltext.h",
     "core/fxcrt/xml/cxml_attritem.cpp",
     "core/fxcrt/xml/cxml_attritem.h",
     "core/fxcrt/xml/cxml_content.cpp",
@@ -897,14 +915,10 @@
 
   if (pdf_enable_xfa) {
     sources += [
-      "core/fxcrt/cfx_blockbuffer.cpp",
-      "core/fxcrt/cfx_blockbuffer.h",
       "core/fxcrt/cfx_char.cpp",
       "core/fxcrt/cfx_char.h",
       "core/fxcrt/cfx_decimal.cpp",
       "core/fxcrt/cfx_decimal.h",
-      "core/fxcrt/cfx_seekablestreamproxy.cpp",
-      "core/fxcrt/cfx_seekablestreamproxy.h",
       "core/fxcrt/css/cfx_css.h",
       "core/fxcrt/css/cfx_csscolorvalue.cpp",
       "core/fxcrt/css/cfx_csscolorvalue.h",
@@ -949,20 +963,6 @@
       "core/fxcrt/fx_arabic.cpp",
       "core/fxcrt/fx_arabic.h",
       "core/fxcrt/locale_iface.h",
-      "core/fxcrt/xml/cfx_xmlattributenode.cpp",
-      "core/fxcrt/xml/cfx_xmlattributenode.h",
-      "core/fxcrt/xml/cfx_xmlchardata.cpp",
-      "core/fxcrt/xml/cfx_xmlchardata.h",
-      "core/fxcrt/xml/cfx_xmlelement.cpp",
-      "core/fxcrt/xml/cfx_xmlelement.h",
-      "core/fxcrt/xml/cfx_xmlinstruction.cpp",
-      "core/fxcrt/xml/cfx_xmlinstruction.h",
-      "core/fxcrt/xml/cfx_xmlnode.cpp",
-      "core/fxcrt/xml/cfx_xmlnode.h",
-      "core/fxcrt/xml/cfx_xmlparser.cpp",
-      "core/fxcrt/xml/cfx_xmlparser.h",
-      "core/fxcrt/xml/cfx_xmltext.cpp",
-      "core/fxcrt/xml/cfx_xmltext.h",
     ]
   }
 }
@@ -2897,6 +2897,7 @@
     "core/fxcrt/unowned_ptr_unittest.cpp",
     "core/fxcrt/weak_ptr_unittest.cpp",
     "core/fxcrt/widestring_unittest.cpp",
+    "core/fxcrt/xml/cfx_xmlparser_unittest.cpp",
     "core/fxge/dib/cfx_dibitmap_unittest.cpp",
     "core/fxge/dib/cstretchengine_unittest.cpp",
     "fpdfsdk/fpdf_catalog_unittest.cpp",
@@ -2918,7 +2919,6 @@
       "core/fxcrt/css/cfx_cssdeclaration_unittest.cpp",
       "core/fxcrt/css/cfx_cssstylesheet_unittest.cpp",
       "core/fxcrt/css/cfx_cssvaluelistparser_unittest.cpp",
-      "core/fxcrt/xml/cfx_xmlparser_unittest.cpp",
       "fxbarcode/oned/BC_OnedCodaBarWriter_unittest.cpp",
       "fxbarcode/oned/BC_OnedCode128Writer_unittest.cpp",
       "fxbarcode/oned/BC_OnedCode39Writer_unittest.cpp",
diff --git a/core/fpdfdoc/cpdf_metadata.cpp b/core/fpdfdoc/cpdf_metadata.cpp
index 11fde82..972569a 100644
--- a/core/fpdfdoc/cpdf_metadata.cpp
+++ b/core/fpdfdoc/cpdf_metadata.cpp
@@ -8,53 +8,54 @@
 
 #include "core/fpdfapi/parser/cpdf_stream.h"
 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
-#include "core/fxcrt/xml/cxml_content.h"
-#include "core/fxcrt/xml/cxml_element.h"
+#include "core/fxcrt/fx_codepage.h"
+#include "core/fxcrt/xml/cfx_xmlelement.h"
+#include "core/fxcrt/xml/cfx_xmlparser.h"
 
 namespace {
 
-void CheckForSharedFormInternal(CXML_Element* element,
+void CheckForSharedFormInternal(CFX_XMLElement* element,
                                 std::vector<UnsupportedFeature>* unsupported) {
-  size_t count = element->CountAttrs();
-  for (size_t i = 0; i < count; ++i) {
-    ByteString space;
-    ByteString name;
-    WideString value;
-    element->GetAttrByIndex(i, &space, &name, &value);
-    if (space != "xmlns" || name != "adhocwf" ||
-        value != L"http://ns.adobe.com/AcrobatAdhocWorkflow/1.0/") {
+  for (const auto& pair : element->GetAttributes()) {
+    if (pair.first != L"xmlns:adhocwf" ||
+        pair.second != L"http://ns.adobe.com/AcrobatAdhocWorkflow/1.0/") {
       continue;
     }
 
-    CXML_Element* pVersion = element->GetElement("adhocwf", "workflowType", 0);
-    if (!pVersion)
-      continue;
+    for (const auto* child = element->GetFirstChild(); child;
+         child = child->GetNextSibling()) {
+      if (child->GetType() != FX_XMLNODE_Element)
+        continue;
 
-    CXML_Content* pContent = ToContent(pVersion->GetChild(0));
-    if (!pContent)
-      continue;
+      const auto* child_elem = static_cast<const CFX_XMLElement*>(child);
+      if (child_elem->GetName() != L"adhocwf:workflowType")
+        continue;
 
-    switch (pContent->m_Content.GetInteger()) {
-      case 0:
-        unsupported->push_back(UnsupportedFeature::kDocumentSharedFormEmail);
-        break;
-      case 1:
-        unsupported->push_back(UnsupportedFeature::kDocumentSharedFormAcrobat);
-        break;
-      case 2:
-        unsupported->push_back(
-            UnsupportedFeature::kDocumentSharedFormFilesystem);
-        break;
+      switch (child_elem->GetTextData().GetInteger()) {
+        case 0:
+          unsupported->push_back(UnsupportedFeature::kDocumentSharedFormEmail);
+          break;
+        case 1:
+          unsupported->push_back(
+              UnsupportedFeature::kDocumentSharedFormAcrobat);
+          break;
+        case 2:
+          unsupported->push_back(
+              UnsupportedFeature::kDocumentSharedFormFilesystem);
+          break;
+      }
+      // We only care about the first one we find.
+      break;
     }
   }
 
-  count = element->CountChildren();
-  for (size_t i = 0; i < count; ++i) {
-    CXML_Element* child = ToElement(element->GetChild(i));
-    if (!child)
+  for (auto* child = element->GetFirstChild(); child;
+       child = child->GetNextSibling()) {
+    if (child->GetType() != FX_XMLNODE_Element)
       continue;
 
-    CheckForSharedFormInternal(child, unsupported);
+    CheckForSharedFormInternal(static_cast<CFX_XMLElement*>(child),
+                               unsupported);
   }
 }
 
@@ -70,12 +71,16 @@
   auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(stream_.Get());
   pAcc->LoadAllDataFiltered();
 
-  std::unique_ptr<CXML_Element> xml_root =
-      CXML_Element::Parse(pAcc->GetData(), pAcc->GetSize());
-  if (!xml_root)
+  auto root = pdfium::MakeUnique<CFX_XMLElement>(L"root");
+  auto proxy = pdfium::MakeRetain<CFX_SeekableStreamProxy>(pAcc->GetData(),
+                                                           pAcc->GetSize());
+  proxy->SetCodePage(FX_CODEPAGE_UTF8);
+
+  CFX_XMLParser parser(root.get(), proxy);
+  if (!parser.Parse())
     return {};
 
   std::vector<UnsupportedFeature> unsupported;
-  CheckForSharedFormInternal(xml_root.get(), &unsupported);
+  CheckForSharedFormInternal(root.get(), &unsupported);
   return unsupported;
 }
diff --git a/core/fpdfdoc/cpdf_metadata_unittest.cpp b/core/fpdfdoc/cpdf_metadata_unittest.cpp
index 6e6d2f6..1a39948 100644
--- a/core/fpdfdoc/cpdf_metadata_unittest.cpp
+++ b/core/fpdfdoc/cpdf_metadata_unittest.cpp
@@ -10,7 +10,7 @@
 
 TEST(CPDF_MetadataTest, CheckSharedFormEmailAtTopLevel) {
   const char* data =
-      "<?xml charset=utf-8?>\n"
+      "<?xml charset=\"utf-8\"?>\n"
       "<node xmlns:adhocwf=\"http://ns.adobe.com/AcrobatAdhocWorkflow/1.0/\">\n"
       "<adhocwf:workflowType>0</adhocwf:workflowType>\n"
       "<adhocwf:version>1.1</adhocwf:version>\n"
@@ -27,7 +27,7 @@
 
 TEST(CPDF_MetadataTest, CheckSharedFormAcrobatAtTopLevel) {
   const char* data =
-      "<?xml charset=utf-8?>\n"
+      "<?xml charset=\"utf-8\"?>\n"
       "<node xmlns:adhocwf=\"http://ns.adobe.com/AcrobatAdhocWorkflow/1.0/\">\n"
       "<adhocwf:workflowType>1</adhocwf:workflowType>\n"
       "<adhocwf:version>1.1</adhocwf:version>\n"
@@ -44,7 +44,7 @@
 
 TEST(CPDF_MetadataTest, CheckSharedFormFilesystemAtTopLevel) {
   const char* data =
-      "<?xml charset=utf-8?>\n"
+      "<?xml charset=\"utf-8\"?>\n"
       "<node xmlns:adhocwf=\"http://ns.adobe.com/AcrobatAdhocWorkflow/1.0/\">\n"
       "<adhocwf:workflowType>2</adhocwf:workflowType>\n"
       "<adhocwf:version>1.1</adhocwf:version>\n"
@@ -61,7 +61,7 @@
 
 TEST(CPDF_MetadataTest, CheckSharedFormWithoutWorkflow) {
   const char* data =
-      "<?xml charset=utf-8?>\n"
+      "<?xml charset=\"utf-8\"?>\n"
       "<node xmlns:adhocwf=\"http://ns.adobe.com/AcrobatAdhocWorkflow/1.0/\">\n"
       "<adhocwf:state>2</adhocwf:state>\n"
       "<adhocwf:version>1.1</adhocwf:version>\n"
@@ -77,7 +77,7 @@
 
 TEST(CPDF_MetadataTest, CheckSharedFormAsChild) {
   const char* data =
-      "<?xml charset=utf-8?>\n"
+      "<?xml charset=\"utf-8\"?>\n"
       "<grandparent><parent>\n"
       "<node xmlns:adhocwf=\"http://ns.adobe.com/AcrobatAdhocWorkflow/1.0/\">\n"
       "<adhocwf:workflowType>0</adhocwf:workflowType>\n"
@@ -96,7 +96,7 @@
 
 TEST(CPDF_MetadataTest, CheckSharedFormAsNoAdhoc) {
   const char* data =
-      "<?xml charset=utf-8?>\n"
+      "<?xml charset=\"utf-8\"?>\n"
       "<node></node>";
 
   CPDF_Stream stream;
@@ -109,7 +109,7 @@
 
 TEST(CPDF_MetadataTest, CheckSharedFormWrongNamespace) {
   const char* data =
-      "<?xml charset=utf-8?>\n"
+      "<?xml charset=\"utf-8\"?>\n"
       "<node xmlns:adhocwf=\"http://ns.adobe.com/AcrobatAdhocWorkflow/2.0/\">\n"
       "<adhocwf:workflowType>1</adhocwf:workflowType>\n"
       "<adhocwf:version>1.1</adhocwf:version>\n"
@@ -125,7 +125,7 @@
 
 TEST(CPDF_MetadataTest, CheckSharedFormMultipleErrors) {
   const char* data =
-      "<?xml charset=utf-8?>\n"
+      "<?xml charset=\"utf-8\"?>\n"
       "<grandparent>"
       "<parent>\n"
       "<node xmlns:adhocwf=\"http://ns.adobe.com/AcrobatAdhocWorkflow/1.0/\">\n"
diff --git a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp
index d22925f..39ddc32 100644
--- a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp
+++ b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp
@@ -8,8 +8,10 @@
 
 #include "core/fxcrt/cfx_seekablestreamproxy.h"
 #include "core/fxcrt/fx_codepage.h"
+#include "core/fxcrt/xml/cfx_xmlnode.h"
 #include "testing/gtest/include/gtest/gtest.h"
 #include "testing/test_support.h"
+#include "third_party/base/ptr_util.h"
 
 class CFX_XMLTestParser : public CFX_XMLParser {
  public:
diff --git a/testing/libfuzzer/BUILD.gn b/testing/libfuzzer/BUILD.gn
index 16c62bb..ba36894 100644
--- a/testing/libfuzzer/BUILD.gn
+++ b/testing/libfuzzer/BUILD.gn
@@ -34,6 +34,7 @@
     ":pdf_jpx_fuzzer",
     ":pdf_psengine_fuzzer",
     ":pdf_streamparser_fuzzer",
+    ":pdf_xml_fuzzer",
   ]
   if (pdf_enable_xfa) {
     deps += [
@@ -47,7 +48,6 @@
       ":pdf_fm2js_fuzzer",
       ":pdf_formcalc_fuzzer",
       ":pdf_lzw_fuzzer",
-      ":pdf_xml_fuzzer",
     ]
   }
 }
@@ -135,12 +135,6 @@
       "pdf_lzw_fuzzer.cc",
     ]
   }
-
-  pdfium_fuzzer("pdf_xml_fuzzer") {
-    sources = [
-      "pdf_xml_fuzzer.cc",
-    ]
-  }
 }
 
 pdfium_fuzzer("pdf_cmap_fuzzer") {
@@ -211,3 +205,9 @@
     "pdf_streamparser_fuzzer.cc",
   ]
 }
+
+pdfium_fuzzer("pdf_xml_fuzzer") {
+  sources = [
+    "pdf_xml_fuzzer.cc",
+  ]
+}