Dan Sinclair | ac35589 | 2017-04-03 16:46:21 -0400 | [diff] [blame] | 1 | // Copyright 2017 PDFium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | |
| 7 | #ifndef XFA_FDE_XML_CFDE_XMLSYNTAXPARSER_H_ |
| 8 | #define XFA_FDE_XML_CFDE_XMLSYNTAXPARSER_H_ |
| 9 | |
| 10 | #include <stack> |
dan sinclair | bf58fbb | 2017-04-06 10:55:39 -0400 | [diff] [blame] | 11 | #include <vector> |
Dan Sinclair | ac35589 | 2017-04-03 16:46:21 -0400 | [diff] [blame] | 12 | |
| 13 | #include "core/fxcrt/cfx_blockbuffer.h" |
| 14 | #include "core/fxcrt/cfx_retain_ptr.h" |
| 15 | #include "core/fxcrt/fx_string.h" |
| 16 | #include "xfa/fde/xml/cfde_xmlnode.h" |
| 17 | #include "xfa/fgas/crt/ifgas_stream.h" |
| 18 | |
| 19 | enum class FDE_XmlSyntaxResult { |
| 20 | None, |
| 21 | InstructionOpen, |
| 22 | InstructionClose, |
| 23 | ElementOpen, |
| 24 | ElementBreak, |
| 25 | ElementClose, |
| 26 | TargetName, |
| 27 | TagName, |
| 28 | AttriName, |
| 29 | AttriValue, |
| 30 | Text, |
| 31 | CData, |
| 32 | TargetData, |
| 33 | Error, |
| 34 | EndOfString |
| 35 | }; |
| 36 | |
| 37 | class CFDE_XMLSyntaxParser { |
| 38 | public: |
Dan Sinclair | 480f62b | 2017-04-05 16:34:44 -0400 | [diff] [blame] | 39 | explicit CFDE_XMLSyntaxParser(const CFX_RetainPtr<IFGAS_Stream>& pStream); |
Dan Sinclair | ac35589 | 2017-04-03 16:46:21 -0400 | [diff] [blame] | 40 | ~CFDE_XMLSyntaxParser(); |
| 41 | |
Dan Sinclair | ac35589 | 2017-04-03 16:46:21 -0400 | [diff] [blame] | 42 | FDE_XmlSyntaxResult DoSyntaxParse(); |
| 43 | |
| 44 | int32_t GetStatus() const; |
Nicolas Pena | ad22ac4 | 2017-04-06 12:28:47 -0400 | [diff] [blame^] | 45 | FX_FILESIZE GetCurrentPos() const { return m_ParsedChars + m_Start; } |
Dan Sinclair | ac35589 | 2017-04-03 16:46:21 -0400 | [diff] [blame] | 46 | FX_FILESIZE GetCurrentBinaryPos() const; |
| 47 | int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; } |
| 48 | int32_t GetLastNodeNumber() const { return m_iLastNodeNum; } |
| 49 | |
Dan Sinclair | 5f983bb | 2017-04-05 16:33:44 -0400 | [diff] [blame] | 50 | CFX_WideString GetTargetName() const { |
| 51 | return m_BlockBuffer.GetTextData(0, m_iTextDataLength); |
Dan Sinclair | ac35589 | 2017-04-03 16:46:21 -0400 | [diff] [blame] | 52 | } |
Dan Sinclair | ee62ba2 | 2017-04-04 15:07:31 -0400 | [diff] [blame] | 53 | |
Dan Sinclair | 5f983bb | 2017-04-05 16:33:44 -0400 | [diff] [blame] | 54 | CFX_WideString GetTagName() const { |
| 55 | return m_BlockBuffer.GetTextData(0, m_iTextDataLength); |
Dan Sinclair | ac35589 | 2017-04-03 16:46:21 -0400 | [diff] [blame] | 56 | } |
Dan Sinclair | ee62ba2 | 2017-04-04 15:07:31 -0400 | [diff] [blame] | 57 | |
Dan Sinclair | 5f983bb | 2017-04-05 16:33:44 -0400 | [diff] [blame] | 58 | CFX_WideString GetAttributeName() const { |
| 59 | return m_BlockBuffer.GetTextData(0, m_iTextDataLength); |
Dan Sinclair | ac35589 | 2017-04-03 16:46:21 -0400 | [diff] [blame] | 60 | } |
Dan Sinclair | ee62ba2 | 2017-04-04 15:07:31 -0400 | [diff] [blame] | 61 | |
Dan Sinclair | 5f983bb | 2017-04-05 16:33:44 -0400 | [diff] [blame] | 62 | CFX_WideString GetAttributeValue() const { |
| 63 | return m_BlockBuffer.GetTextData(0, m_iTextDataLength); |
Dan Sinclair | ac35589 | 2017-04-03 16:46:21 -0400 | [diff] [blame] | 64 | } |
Dan Sinclair | ee62ba2 | 2017-04-04 15:07:31 -0400 | [diff] [blame] | 65 | |
Dan Sinclair | 5f983bb | 2017-04-05 16:33:44 -0400 | [diff] [blame] | 66 | CFX_WideString GetTextData() const { |
| 67 | return m_BlockBuffer.GetTextData(0, m_iTextDataLength); |
Dan Sinclair | ac35589 | 2017-04-03 16:46:21 -0400 | [diff] [blame] | 68 | } |
Dan Sinclair | ee62ba2 | 2017-04-04 15:07:31 -0400 | [diff] [blame] | 69 | |
Dan Sinclair | 5f983bb | 2017-04-05 16:33:44 -0400 | [diff] [blame] | 70 | CFX_WideString GetTargetData() const { |
| 71 | return m_BlockBuffer.GetTextData(0, m_iTextDataLength); |
Dan Sinclair | ac35589 | 2017-04-03 16:46:21 -0400 | [diff] [blame] | 72 | } |
| 73 | |
| 74 | protected: |
| 75 | enum class FDE_XmlSyntaxState { |
| 76 | Text, |
| 77 | Node, |
| 78 | Target, |
| 79 | Tag, |
| 80 | AttriName, |
| 81 | AttriEqualSign, |
| 82 | AttriQuotation, |
| 83 | AttriValue, |
| 84 | Entity, |
| 85 | EntityDecimal, |
| 86 | EntityHex, |
| 87 | CloseInstruction, |
| 88 | BreakElement, |
| 89 | CloseElement, |
| 90 | SkipDeclNode, |
| 91 | DeclCharData, |
| 92 | SkipComment, |
| 93 | SkipCommentOrDecl, |
| 94 | SkipCData, |
| 95 | TargetData |
| 96 | }; |
| 97 | |
| 98 | void ParseTextChar(wchar_t ch); |
| 99 | |
| 100 | CFX_RetainPtr<IFGAS_Stream> m_pStream; |
| 101 | int32_t m_iXMLPlaneSize; |
| 102 | int32_t m_iCurrentPos; |
| 103 | int32_t m_iCurrentNodeNum; |
| 104 | int32_t m_iLastNodeNum; |
Dan Sinclair | ac35589 | 2017-04-03 16:46:21 -0400 | [diff] [blame] | 105 | int32_t m_iParsedBytes; |
Nicolas Pena | ad22ac4 | 2017-04-06 12:28:47 -0400 | [diff] [blame^] | 106 | FX_FILESIZE m_ParsedChars; |
dan sinclair | bf58fbb | 2017-04-06 10:55:39 -0400 | [diff] [blame] | 107 | std::vector<wchar_t> m_Buffer; |
Dan Sinclair | ac35589 | 2017-04-03 16:46:21 -0400 | [diff] [blame] | 108 | int32_t m_iBufferChars; |
| 109 | bool m_bEOS; |
Nicolas Pena | ad22ac4 | 2017-04-06 12:28:47 -0400 | [diff] [blame^] | 110 | FX_FILESIZE m_Start; // Start position in m_Buffer |
| 111 | FX_FILESIZE m_End; // End position in m_Buffer |
Dan Sinclair | ac35589 | 2017-04-03 16:46:21 -0400 | [diff] [blame] | 112 | FDE_XMLNODE m_CurNode; |
| 113 | std::stack<FDE_XMLNODE> m_XMLNodeStack; |
| 114 | CFX_BlockBuffer m_BlockBuffer; |
| 115 | int32_t m_iAllocStep; |
| 116 | int32_t& m_iDataLength; |
dan sinclair | bf58fbb | 2017-04-06 10:55:39 -0400 | [diff] [blame] | 117 | wchar_t* m_pCurrentBlock; // Pointer into CFX_BlockBuffer |
Dan Sinclair | ac35589 | 2017-04-03 16:46:21 -0400 | [diff] [blame] | 118 | int32_t m_iIndexInBlock; |
| 119 | int32_t m_iTextDataLength; |
| 120 | FDE_XmlSyntaxResult m_syntaxParserResult; |
| 121 | FDE_XmlSyntaxState m_syntaxParserState; |
| 122 | wchar_t m_wQuotationMark; |
| 123 | int32_t m_iEntityStart; |
| 124 | std::stack<wchar_t> m_SkipStack; |
| 125 | wchar_t m_SkipChar; |
| 126 | }; |
| 127 | |
| 128 | #endif // XFA_FDE_XML_CFDE_XMLSYNTAXPARSER_H_ |