blob: 372b2192e2b1b3647ccd226a44f744590d464ede [file] [log] [blame]
Jarkko Poyry3c827362014-09-02 11:48:52 +03001#ifndef _XEXMLPARSER_HPP
2#define _XEXMLPARSER_HPP
3/*-------------------------------------------------------------------------
4 * drawElements Quality Program Test Executor
5 * ------------------------------------------
6 *
7 * Copyright 2014 The Android Open Source Project
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 *//*!
22 * \file
23 * \brief XML Parser.
24 *
25 * \todo [2012-06-07 pyry] Not supported / handled properly:
26 * - xml namespaces (<ns:Element>)
27 * - backslash escapes in strings
28 * - &quot; -style escapes
29 * - utf-8
30 *//*--------------------------------------------------------------------*/
31
32#include "xeDefs.hpp"
33#include "deRingBuffer.hpp"
34
35#include <string>
36#include <map>
37
38namespace xe
39{
40namespace xml
41{
42
43enum Token
44{
45 TOKEN_INCOMPLETE = 0, //!< Not enough data to determine token.
46 TOKEN_END_OF_STRING, //!< End of document string.
47 TOKEN_DATA, //!< Block of data (anything outside tags).
48 TOKEN_COMMENT, //!< <!-- comment -->
49 TOKEN_IDENTIFIER, //!< Identifier (in tags).
50 TOKEN_STRING, //!< String (in tags).
51 TOKEN_TAG_START, //!< <
52 TOKEN_TAG_END, //!< >
53 TOKEN_END_TAG_START, //!< </
54 TOKEN_EMPTY_ELEMENT_END, //!< />
55 TOKEN_PROCESSING_INSTRUCTION_START, //!< <?
56 TOKEN_PROCESSING_INSTRUCTION_END, //!< ?>
57 TOKEN_EQUAL, //!< =
58 TOKEN_ENTITY, //!< Entity reference, such as &amp;
59
60 TOKEN_LAST
61};
62
63enum Element
64{
65 ELEMENT_INCOMPLETE = 0, //!< Incomplete element.
66 ELEMENT_START, //!< Element start.
67 ELEMENT_END, //!< Element end.
68 ELEMENT_DATA, //!< Data element.
69 ELEMENT_END_OF_STRING, //!< End of document string.
70
71 ELEMENT_LAST
72};
73
74const char* getTokenName (Token token);
75
76// \todo [2012-10-17 pyry] Add line number etc.
77class ParseError : public xe::ParseError
78{
79public:
80 ParseError (const std::string& message) : xe::ParseError(message) {}
81};
82
83class Tokenizer
84{
85public:
86 Tokenizer (void);
87 ~Tokenizer (void);
88
89 void clear (void); //!< Resets tokenizer to initial state.
90
91 void feed (const deUint8* bytes, int numBytes);
92 void advance (void);
93
94 Token getToken (void) const { return m_curToken; }
95 int getTokenLen (void) const { return m_curTokenLen; }
96 deUint8 getTokenByte (int offset) const { DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING); return m_buf.peekBack(offset); }
97 void getTokenStr (std::string& dst) const;
98 void appendTokenStr (std::string& dst) const;
99
100 void getString (std::string& dst) const;
101
102private:
103 Tokenizer (const Tokenizer& other);
104 Tokenizer& operator= (const Tokenizer& other);
105
106 int getChar (int offset) const;
107
108 void error (const std::string& what);
109
110 enum State
111 {
112 STATE_DATA = 0,
113 STATE_TAG,
114 STATE_IDENTIFIER,
115 STATE_VALUE,
116 STATE_COMMENT,
117 STATE_ENTITY,
118
119 STATE_LAST
120 };
121
122 enum
123 {
124 END_OF_STRING = 0, //!< End of string (0).
125 END_OF_BUFFER = 0xffffffff //!< End of current data buffer.
126 };
127
128 Token m_curToken; //!< Current token.
129 int m_curTokenLen; //!< Length of current token.
130
131 State m_state; //!< Tokenization state.
132
133 de::RingBuffer<deUint8> m_buf;
134};
135
136class Parser
137{
138public:
139 typedef std::map<std::string, std::string> AttributeMap;
140 typedef AttributeMap::const_iterator AttributeIter;
141
142 Parser (void);
143 ~Parser (void);
144
145 void clear (void); //!< Resets parser to initial state.
146
147 void feed (const deUint8* bytes, int numBytes);
148 void advance (void);
149
150 Element getElement (void) const { return m_element; }
151
152 // For ELEMENT_START / ELEMENT_END.
153 const char* getElementName (void) const { return m_elementName.c_str(); }
154
155 // For ELEMENT_START.
156 bool hasAttribute (const char* name) const { return m_attributes.find(name) != m_attributes.end(); }
157 const char* getAttribute (const char* name) const { return m_attributes.find(name)->second.c_str(); }
158 const AttributeMap& attributes (void) const { return m_attributes; }
159
160 // For ELEMENT_DATA.
161 int getDataSize (void) const;
162 deUint8 getDataByte (int offset) const;
163 void getDataStr (std::string& dst) const;
164 void appendDataStr (std::string& dst) const;
165
166private:
167 Parser (const Parser& other);
168 Parser& operator= (const Parser& other);
169
170 void parseEntityValue (void);
171
172 void error (const std::string& what);
173
174 enum State
175 {
176 STATE_DATA = 0, //!< Initial state - assuming data or tag open.
177 STATE_ENTITY, //!< Parsed entity is stored - overrides data.
178 STATE_IN_PROCESSING_INSTRUCTION, //!< In processing instruction.
179 STATE_START_TAG_OPEN, //!< Start tag open.
180 STATE_END_TAG_OPEN, //!< End tag open.
181 STATE_EXPECTING_END_TAG_CLOSE, //!< Expecting end tag close.
182 STATE_ATTRIBUTE_LIST, //!< Expecting attribute list.
183 STATE_EXPECTING_ATTRIBUTE_EQ, //!< Got attribute name, expecting =.
184 STATE_EXPECTING_ATTRIBUTE_VALUE, //!< Expecting attribute value.
185 STATE_YIELD_EMPTY_ELEMENT_END, //!< Empty element: start has been reported but not end.
186
187 STATE_LAST
188 };
189
190 Tokenizer m_tokenizer;
191
192 Element m_element;
193 std::string m_elementName;
194 AttributeMap m_attributes;
195
196 State m_state;
197 std::string m_attribName;
198 std::string m_entityValue; //!< Data override, such as entity value.
199};
200
201// Inline implementations
202
203inline void Tokenizer::getTokenStr (std::string& dst) const
204{
205 DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING);
206 dst.resize(m_curTokenLen);
207 for (int ndx = 0; ndx < m_curTokenLen; ndx++)
208 dst[ndx] = m_buf.peekBack(ndx);
209}
210
211inline void Tokenizer::appendTokenStr (std::string& dst) const
212{
213 DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING);
214
215 size_t oldLen = dst.size();
216 dst.resize(oldLen+m_curTokenLen);
217
218 for (int ndx = 0; ndx < m_curTokenLen; ndx++)
219 dst[oldLen+ndx] = m_buf.peekBack(ndx);
220}
221
222inline int Parser::getDataSize (void) const
223{
224 if (m_state != STATE_ENTITY)
225 return m_tokenizer.getTokenLen();
226 else
227 return (int)m_entityValue.size();
228}
229
230inline deUint8 Parser::getDataByte (int offset) const
231{
232 if (m_state != STATE_ENTITY)
233 return m_tokenizer.getTokenByte(offset);
234 else
235 return (deUint8)m_entityValue[offset];
236}
237
238inline void Parser::getDataStr (std::string& dst) const
239{
240 if (m_state != STATE_ENTITY)
241 return m_tokenizer.getTokenStr(dst);
242 else
243 dst = m_entityValue;
244}
245
246inline void Parser::appendDataStr (std::string& dst) const
247{
248 if (m_state != STATE_ENTITY)
249 return m_tokenizer.appendTokenStr(dst);
250 else
251 dst += m_entityValue;
252}
253
254} // xml
255} // xe
256
257#endif // _XEXMLPARSER_HPP