blob: 6fa310476e4cfaefca5f19081296c39c8c0d7318 [file] [log] [blame]
Dan Sinclairac355892017-04-03 16:46:21 -04001// Copyright 2017 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#ifndef XFA_FDE_XML_CFDE_XMLSYNTAXPARSER_H_
8#define XFA_FDE_XML_CFDE_XMLSYNTAXPARSER_H_
9
10#include <stack>
dan sinclairbf58fbb2017-04-06 10:55:39 -040011#include <vector>
Dan Sinclairac355892017-04-03 16:46:21 -040012
13#include "core/fxcrt/cfx_blockbuffer.h"
14#include "core/fxcrt/cfx_retain_ptr.h"
15#include "core/fxcrt/fx_string.h"
16#include "xfa/fde/xml/cfde_xmlnode.h"
17#include "xfa/fgas/crt/ifgas_stream.h"
18
19enum class FDE_XmlSyntaxResult {
20 None,
21 InstructionOpen,
22 InstructionClose,
23 ElementOpen,
24 ElementBreak,
25 ElementClose,
26 TargetName,
27 TagName,
28 AttriName,
29 AttriValue,
30 Text,
31 CData,
32 TargetData,
33 Error,
34 EndOfString
35};
36
37class CFDE_XMLSyntaxParser {
38 public:
Dan Sinclair480f62b2017-04-05 16:34:44 -040039 explicit CFDE_XMLSyntaxParser(const CFX_RetainPtr<IFGAS_Stream>& pStream);
Dan Sinclairac355892017-04-03 16:46:21 -040040 ~CFDE_XMLSyntaxParser();
41
Dan Sinclairac355892017-04-03 16:46:21 -040042 FDE_XmlSyntaxResult DoSyntaxParse();
43
44 int32_t GetStatus() const;
Nicolas Penaad22ac42017-04-06 12:28:47 -040045 FX_FILESIZE GetCurrentPos() const { return m_ParsedChars + m_Start; }
Dan Sinclairac355892017-04-03 16:46:21 -040046 FX_FILESIZE GetCurrentBinaryPos() const;
47 int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; }
48 int32_t GetLastNodeNumber() const { return m_iLastNodeNum; }
49
Dan Sinclair5f983bb2017-04-05 16:33:44 -040050 CFX_WideString GetTargetName() const {
51 return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
Dan Sinclairac355892017-04-03 16:46:21 -040052 }
Dan Sinclairee62ba22017-04-04 15:07:31 -040053
Dan Sinclair5f983bb2017-04-05 16:33:44 -040054 CFX_WideString GetTagName() const {
55 return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
Dan Sinclairac355892017-04-03 16:46:21 -040056 }
Dan Sinclairee62ba22017-04-04 15:07:31 -040057
Dan Sinclair5f983bb2017-04-05 16:33:44 -040058 CFX_WideString GetAttributeName() const {
59 return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
Dan Sinclairac355892017-04-03 16:46:21 -040060 }
Dan Sinclairee62ba22017-04-04 15:07:31 -040061
Dan Sinclair5f983bb2017-04-05 16:33:44 -040062 CFX_WideString GetAttributeValue() const {
63 return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
Dan Sinclairac355892017-04-03 16:46:21 -040064 }
Dan Sinclairee62ba22017-04-04 15:07:31 -040065
Dan Sinclair5f983bb2017-04-05 16:33:44 -040066 CFX_WideString GetTextData() const {
67 return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
Dan Sinclairac355892017-04-03 16:46:21 -040068 }
Dan Sinclairee62ba22017-04-04 15:07:31 -040069
Dan Sinclair5f983bb2017-04-05 16:33:44 -040070 CFX_WideString GetTargetData() const {
71 return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
Dan Sinclairac355892017-04-03 16:46:21 -040072 }
73
74 protected:
75 enum class FDE_XmlSyntaxState {
76 Text,
77 Node,
78 Target,
79 Tag,
80 AttriName,
81 AttriEqualSign,
82 AttriQuotation,
83 AttriValue,
84 Entity,
85 EntityDecimal,
86 EntityHex,
87 CloseInstruction,
88 BreakElement,
89 CloseElement,
90 SkipDeclNode,
91 DeclCharData,
92 SkipComment,
93 SkipCommentOrDecl,
94 SkipCData,
95 TargetData
96 };
97
98 void ParseTextChar(wchar_t ch);
99
100 CFX_RetainPtr<IFGAS_Stream> m_pStream;
101 int32_t m_iXMLPlaneSize;
102 int32_t m_iCurrentPos;
103 int32_t m_iCurrentNodeNum;
104 int32_t m_iLastNodeNum;
Dan Sinclairac355892017-04-03 16:46:21 -0400105 int32_t m_iParsedBytes;
Nicolas Penaad22ac42017-04-06 12:28:47 -0400106 FX_FILESIZE m_ParsedChars;
dan sinclairbf58fbb2017-04-06 10:55:39 -0400107 std::vector<wchar_t> m_Buffer;
Dan Sinclairac355892017-04-03 16:46:21 -0400108 int32_t m_iBufferChars;
109 bool m_bEOS;
Nicolas Penaad22ac42017-04-06 12:28:47 -0400110 FX_FILESIZE m_Start; // Start position in m_Buffer
111 FX_FILESIZE m_End; // End position in m_Buffer
Dan Sinclairac355892017-04-03 16:46:21 -0400112 FDE_XMLNODE m_CurNode;
113 std::stack<FDE_XMLNODE> m_XMLNodeStack;
114 CFX_BlockBuffer m_BlockBuffer;
115 int32_t m_iAllocStep;
116 int32_t& m_iDataLength;
dan sinclairbf58fbb2017-04-06 10:55:39 -0400117 wchar_t* m_pCurrentBlock; // Pointer into CFX_BlockBuffer
Dan Sinclairac355892017-04-03 16:46:21 -0400118 int32_t m_iIndexInBlock;
119 int32_t m_iTextDataLength;
120 FDE_XmlSyntaxResult m_syntaxParserResult;
121 FDE_XmlSyntaxState m_syntaxParserState;
122 wchar_t m_wQuotationMark;
123 int32_t m_iEntityStart;
124 std::stack<wchar_t> m_SkipStack;
125 wchar_t m_SkipChar;
126};
127
128#endif // XFA_FDE_XML_CFDE_XMLSYNTAXPARSER_H_