blob: b93bbb680124f793cb2f7bfba622b3a79705cf10 [file] [log] [blame]
Dan Sinclairac355892017-04-03 16:46:21 -04001// Copyright 2017 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
Dan Sinclair0d86ecb2017-04-19 09:19:57 -04007#ifndef CORE_FXCRT_XML_CFX_XMLSYNTAXPARSER_H_
8#define CORE_FXCRT_XML_CFX_XMLSYNTAXPARSER_H_
Dan Sinclairac355892017-04-03 16:46:21 -04009
10#include <stack>
dan sinclairbf58fbb2017-04-06 10:55:39 -040011#include <vector>
Dan Sinclairac355892017-04-03 16:46:21 -040012
13#include "core/fxcrt/cfx_blockbuffer.h"
Dan Sinclair3b71d262017-04-19 08:58:54 -040014#include "core/fxcrt/cfx_seekablestreamproxy.h"
Dan Sinclairac355892017-04-03 16:46:21 -040015#include "core/fxcrt/fx_string.h"
Dan Sinclair0b950422017-09-21 15:49:49 -040016#include "core/fxcrt/retain_ptr.h"
Dan Sinclair0d86ecb2017-04-19 09:19:57 -040017#include "core/fxcrt/xml/cfx_xmlnode.h"
Dan Sinclairac355892017-04-03 16:46:21 -040018
Dan Sinclair0d86ecb2017-04-19 09:19:57 -040019enum class FX_XmlSyntaxResult {
Dan Sinclairac355892017-04-03 16:46:21 -040020 None,
21 InstructionOpen,
22 InstructionClose,
23 ElementOpen,
24 ElementBreak,
25 ElementClose,
26 TargetName,
27 TagName,
28 AttriName,
29 AttriValue,
30 Text,
31 CData,
32 TargetData,
33 Error,
34 EndOfString
35};
36
Dan Sinclair0d86ecb2017-04-19 09:19:57 -040037class CFX_XMLSyntaxParser {
Dan Sinclairac355892017-04-03 16:46:21 -040038 public:
Tom Sepezaeee1872017-04-20 14:31:18 -070039 static bool IsXMLNameChar(wchar_t ch, bool bFirstChar);
40
Dan Sinclair0d86ecb2017-04-19 09:19:57 -040041 explicit CFX_XMLSyntaxParser(
Dan Sinclair0b950422017-09-21 15:49:49 -040042 const RetainPtr<CFX_SeekableStreamProxy>& pStream);
Dan Sinclair0d86ecb2017-04-19 09:19:57 -040043 ~CFX_XMLSyntaxParser();
Dan Sinclairac355892017-04-03 16:46:21 -040044
Dan Sinclair0d86ecb2017-04-19 09:19:57 -040045 FX_XmlSyntaxResult DoSyntaxParse();
Dan Sinclairac355892017-04-03 16:46:21 -040046
47 int32_t GetStatus() const;
Nicolas Penaad22ac42017-04-06 12:28:47 -040048 FX_FILESIZE GetCurrentPos() const { return m_ParsedChars + m_Start; }
Dan Sinclairac355892017-04-03 16:46:21 -040049 FX_FILESIZE GetCurrentBinaryPos() const;
50 int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; }
51 int32_t GetLastNodeNumber() const { return m_iLastNodeNum; }
52
Ryan Harrison275e2602017-09-18 14:23:18 -040053 WideString GetTargetName() const {
Dan Sinclair5f983bb2017-04-05 16:33:44 -040054 return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
Dan Sinclairac355892017-04-03 16:46:21 -040055 }
Dan Sinclairee62ba22017-04-04 15:07:31 -040056
Ryan Harrison275e2602017-09-18 14:23:18 -040057 WideString GetTagName() const {
Dan Sinclair5f983bb2017-04-05 16:33:44 -040058 return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
Dan Sinclairac355892017-04-03 16:46:21 -040059 }
Dan Sinclairee62ba22017-04-04 15:07:31 -040060
Ryan Harrison275e2602017-09-18 14:23:18 -040061 WideString GetAttributeName() const {
Dan Sinclair5f983bb2017-04-05 16:33:44 -040062 return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
Dan Sinclairac355892017-04-03 16:46:21 -040063 }
Dan Sinclairee62ba22017-04-04 15:07:31 -040064
Ryan Harrison275e2602017-09-18 14:23:18 -040065 WideString GetAttributeValue() const {
Dan Sinclair5f983bb2017-04-05 16:33:44 -040066 return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
Dan Sinclairac355892017-04-03 16:46:21 -040067 }
Dan Sinclairee62ba22017-04-04 15:07:31 -040068
Ryan Harrison275e2602017-09-18 14:23:18 -040069 WideString GetTextData() const {
Dan Sinclair5f983bb2017-04-05 16:33:44 -040070 return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
Dan Sinclairac355892017-04-03 16:46:21 -040071 }
Dan Sinclairee62ba22017-04-04 15:07:31 -040072
Ryan Harrison275e2602017-09-18 14:23:18 -040073 WideString GetTargetData() const {
Dan Sinclair5f983bb2017-04-05 16:33:44 -040074 return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
Dan Sinclairac355892017-04-03 16:46:21 -040075 }
76
77 protected:
78 enum class FDE_XmlSyntaxState {
79 Text,
80 Node,
81 Target,
82 Tag,
83 AttriName,
84 AttriEqualSign,
85 AttriQuotation,
86 AttriValue,
87 Entity,
88 EntityDecimal,
89 EntityHex,
90 CloseInstruction,
91 BreakElement,
92 CloseElement,
93 SkipDeclNode,
94 DeclCharData,
95 SkipComment,
96 SkipCommentOrDecl,
97 SkipCData,
98 TargetData
99 };
100
101 void ParseTextChar(wchar_t ch);
102
Dan Sinclair0b950422017-09-21 15:49:49 -0400103 RetainPtr<CFX_SeekableStreamProxy> m_pStream;
Ryan Harrison875e98c2017-09-27 10:53:11 -0400104 size_t m_iXMLPlaneSize;
Ryan Harrison980a3ea2017-08-30 10:22:55 -0400105 FX_FILESIZE m_iCurrentPos;
Dan Sinclairac355892017-04-03 16:46:21 -0400106 int32_t m_iCurrentNodeNum;
107 int32_t m_iLastNodeNum;
Dan Sinclairac355892017-04-03 16:46:21 -0400108 int32_t m_iParsedBytes;
Nicolas Penaad22ac42017-04-06 12:28:47 -0400109 FX_FILESIZE m_ParsedChars;
dan sinclairbf58fbb2017-04-06 10:55:39 -0400110 std::vector<wchar_t> m_Buffer;
Ryan Harrison875e98c2017-09-27 10:53:11 -0400111 size_t m_iBufferChars;
Dan Sinclairac355892017-04-03 16:46:21 -0400112 bool m_bEOS;
Nicolas Penaad22ac42017-04-06 12:28:47 -0400113 FX_FILESIZE m_Start; // Start position in m_Buffer
114 FX_FILESIZE m_End; // End position in m_Buffer
Dan Sinclair0d86ecb2017-04-19 09:19:57 -0400115 FX_XMLNODE m_CurNode;
116 std::stack<FX_XMLNODE> m_XMLNodeStack;
Dan Sinclairac355892017-04-03 16:46:21 -0400117 CFX_BlockBuffer m_BlockBuffer;
118 int32_t m_iAllocStep;
dan sinclairbf58fbb2017-04-06 10:55:39 -0400119 wchar_t* m_pCurrentBlock; // Pointer into CFX_BlockBuffer
Dan Sinclairac355892017-04-03 16:46:21 -0400120 int32_t m_iIndexInBlock;
121 int32_t m_iTextDataLength;
Dan Sinclair0d86ecb2017-04-19 09:19:57 -0400122 FX_XmlSyntaxResult m_syntaxParserResult;
Dan Sinclairac355892017-04-03 16:46:21 -0400123 FDE_XmlSyntaxState m_syntaxParserState;
124 wchar_t m_wQuotationMark;
125 int32_t m_iEntityStart;
126 std::stack<wchar_t> m_SkipStack;
127 wchar_t m_SkipChar;
128};
129
Dan Sinclair0d86ecb2017-04-19 09:19:57 -0400130#endif // CORE_FXCRT_XML_CFX_XMLSYNTAXPARSER_H_