blob: 93328b841a168bba00f7ec6ec704ed623bb1b238 [file] [log] [blame]
Victor Stinner759e30e2017-09-05 01:58:08 +02001/* This file is included!
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000-2017 Expat development team
11 Licensed under the MIT license:
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
19 following conditions:
20
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000031*/
32
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070033#ifdef XML_TOK_IMPL_C
34
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000035#ifndef IS_INVALID_CHAR
36#define IS_INVALID_CHAR(enc, ptr, n) (0)
37#endif
38
39#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
40 case BT_LEAD ## n: \
41 if (end - ptr < n) \
Martin v. Löwisfc03a942003-01-25 22:41:29 +000042 return XML_TOK_PARTIAL_CHAR; \
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000043 if (IS_INVALID_CHAR(enc, ptr, n)) { \
44 *(nextTokPtr) = (ptr); \
45 return XML_TOK_INVALID; \
46 } \
47 ptr += n; \
48 break;
49
50#define INVALID_CASES(ptr, nextTokPtr) \
51 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
52 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
53 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
54 case BT_NONXML: \
55 case BT_MALFORM: \
56 case BT_TRAIL: \
57 *(nextTokPtr) = (ptr); \
58 return XML_TOK_INVALID;
59
60#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
61 case BT_LEAD ## n: \
62 if (end - ptr < n) \
63 return XML_TOK_PARTIAL_CHAR; \
64 if (!IS_NAME_CHAR(enc, ptr, n)) { \
65 *nextTokPtr = ptr; \
66 return XML_TOK_INVALID; \
67 } \
68 ptr += n; \
69 break;
70
71#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
72 case BT_NONASCII: \
73 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
74 *nextTokPtr = ptr; \
75 return XML_TOK_INVALID; \
76 } \
77 case BT_NMSTRT: \
78 case BT_HEX: \
79 case BT_DIGIT: \
80 case BT_NAME: \
81 case BT_MINUS: \
82 ptr += MINBPC(enc); \
83 break; \
84 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
85 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
86 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
87
88#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
89 case BT_LEAD ## n: \
90 if (end - ptr < n) \
91 return XML_TOK_PARTIAL_CHAR; \
92 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
93 *nextTokPtr = ptr; \
94 return XML_TOK_INVALID; \
95 } \
96 ptr += n; \
97 break;
98
99#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
100 case BT_NONASCII: \
101 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
102 *nextTokPtr = ptr; \
103 return XML_TOK_INVALID; \
104 } \
105 case BT_NMSTRT: \
106 case BT_HEX: \
107 ptr += MINBPC(enc); \
108 break; \
109 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
110 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
111 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
112
113#ifndef PREFIX
114#define PREFIX(ident) ident
115#endif
116
Victor Stinner23ec4b52017-06-15 00:54:36 +0200117
118#define HAS_CHARS(enc, ptr, end, count) \
119 (end - ptr >= count * MINBPC(enc))
120
121#define HAS_CHAR(enc, ptr, end) \
122 HAS_CHARS(enc, ptr, end, 1)
123
124#define REQUIRE_CHARS(enc, ptr, end, count) \
125 { \
126 if (! HAS_CHARS(enc, ptr, end, count)) { \
127 return XML_TOK_PARTIAL; \
128 } \
129 }
130
131#define REQUIRE_CHAR(enc, ptr, end) \
132 REQUIRE_CHARS(enc, ptr, end, 1)
133
134
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000135/* ptr points to character following "<!-" */
136
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000137static int PTRCALL
138PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
139 const char *end, const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000140{
Victor Stinner23ec4b52017-06-15 00:54:36 +0200141 if (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000142 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
143 *nextTokPtr = ptr;
144 return XML_TOK_INVALID;
145 }
146 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200147 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000148 switch (BYTE_TYPE(enc, ptr)) {
149 INVALID_CASES(ptr, nextTokPtr)
150 case BT_MINUS:
Victor Stinner23ec4b52017-06-15 00:54:36 +0200151 ptr += MINBPC(enc);
152 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000153 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
Victor Stinner23ec4b52017-06-15 00:54:36 +0200154 ptr += MINBPC(enc);
155 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000156 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
157 *nextTokPtr = ptr;
158 return XML_TOK_INVALID;
159 }
160 *nextTokPtr = ptr + MINBPC(enc);
161 return XML_TOK_COMMENT;
162 }
163 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000164 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000165 ptr += MINBPC(enc);
166 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000167 }
168 }
169 }
170 return XML_TOK_PARTIAL;
171}
172
173/* ptr points to character following "<!" */
174
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000175static int PTRCALL
176PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
177 const char *end, const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000178{
Victor Stinner23ec4b52017-06-15 00:54:36 +0200179 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000180 switch (BYTE_TYPE(enc, ptr)) {
181 case BT_MINUS:
182 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
183 case BT_LSQB:
184 *nextTokPtr = ptr + MINBPC(enc);
185 return XML_TOK_COND_SECT_OPEN;
186 case BT_NMSTRT:
187 case BT_HEX:
188 ptr += MINBPC(enc);
189 break;
190 default:
191 *nextTokPtr = ptr;
192 return XML_TOK_INVALID;
193 }
Victor Stinner23ec4b52017-06-15 00:54:36 +0200194 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000195 switch (BYTE_TYPE(enc, ptr)) {
196 case BT_PERCNT:
Victor Stinner23ec4b52017-06-15 00:54:36 +0200197 REQUIRE_CHARS(enc, ptr, end, 2);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000198 /* don't allow <!ENTITY% foo "whatever"> */
199 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
200 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000201 *nextTokPtr = ptr;
202 return XML_TOK_INVALID;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000203 }
204 /* fall through */
205 case BT_S: case BT_CR: case BT_LF:
206 *nextTokPtr = ptr;
207 return XML_TOK_DECL_OPEN;
208 case BT_NMSTRT:
209 case BT_HEX:
210 ptr += MINBPC(enc);
211 break;
212 default:
213 *nextTokPtr = ptr;
214 return XML_TOK_INVALID;
215 }
216 }
217 return XML_TOK_PARTIAL;
218}
219
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000220static int PTRCALL
Victor Stinner23ec4b52017-06-15 00:54:36 +0200221PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000222 const char *end, int *tokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000223{
224 int upper = 0;
225 *tokPtr = XML_TOK_PI;
226 if (end - ptr != MINBPC(enc)*3)
227 return 1;
228 switch (BYTE_TO_ASCII(enc, ptr)) {
229 case ASCII_x:
230 break;
231 case ASCII_X:
232 upper = 1;
233 break;
234 default:
235 return 1;
236 }
237 ptr += MINBPC(enc);
238 switch (BYTE_TO_ASCII(enc, ptr)) {
239 case ASCII_m:
240 break;
241 case ASCII_M:
242 upper = 1;
243 break;
244 default:
245 return 1;
246 }
247 ptr += MINBPC(enc);
248 switch (BYTE_TO_ASCII(enc, ptr)) {
249 case ASCII_l:
250 break;
251 case ASCII_L:
252 upper = 1;
253 break;
254 default:
255 return 1;
256 }
257 if (upper)
258 return 0;
259 *tokPtr = XML_TOK_XML_DECL;
260 return 1;
261}
262
263/* ptr points to character following "<?" */
264
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000265static int PTRCALL
266PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
267 const char *end, const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000268{
269 int tok;
270 const char *target = ptr;
Victor Stinner23ec4b52017-06-15 00:54:36 +0200271 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000272 switch (BYTE_TYPE(enc, ptr)) {
273 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
274 default:
275 *nextTokPtr = ptr;
276 return XML_TOK_INVALID;
277 }
Victor Stinner23ec4b52017-06-15 00:54:36 +0200278 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000279 switch (BYTE_TYPE(enc, ptr)) {
280 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
281 case BT_S: case BT_CR: case BT_LF:
282 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000283 *nextTokPtr = ptr;
284 return XML_TOK_INVALID;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000285 }
286 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200287 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000288 switch (BYTE_TYPE(enc, ptr)) {
289 INVALID_CASES(ptr, nextTokPtr)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000290 case BT_QUEST:
291 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200292 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000293 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
294 *nextTokPtr = ptr + MINBPC(enc);
295 return tok;
296 }
297 break;
298 default:
299 ptr += MINBPC(enc);
300 break;
301 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000302 }
303 return XML_TOK_PARTIAL;
304 case BT_QUEST:
305 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000306 *nextTokPtr = ptr;
307 return XML_TOK_INVALID;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000308 }
309 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200310 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000311 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000312 *nextTokPtr = ptr + MINBPC(enc);
313 return tok;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000314 }
315 /* fall through */
316 default:
317 *nextTokPtr = ptr;
318 return XML_TOK_INVALID;
319 }
320 }
321 return XML_TOK_PARTIAL;
322}
323
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000324static int PTRCALL
Victor Stinner23ec4b52017-06-15 00:54:36 +0200325PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000326 const char *end, const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000327{
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000328 static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
329 ASCII_T, ASCII_A, ASCII_LSQB };
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000330 int i;
331 /* CDATA[ */
Victor Stinner23ec4b52017-06-15 00:54:36 +0200332 REQUIRE_CHARS(enc, ptr, end, 6);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000333 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
334 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
335 *nextTokPtr = ptr;
336 return XML_TOK_INVALID;
337 }
338 }
339 *nextTokPtr = ptr;
340 return XML_TOK_CDATA_SECT_OPEN;
341}
342
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000343static int PTRCALL
344PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
345 const char *end, const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000346{
Victor Stinner23ec4b52017-06-15 00:54:36 +0200347 if (ptr >= end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000348 return XML_TOK_NONE;
349 if (MINBPC(enc) > 1) {
350 size_t n = end - ptr;
351 if (n & (MINBPC(enc) - 1)) {
352 n &= ~(MINBPC(enc) - 1);
353 if (n == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000354 return XML_TOK_PARTIAL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000355 end = ptr + n;
356 }
357 }
358 switch (BYTE_TYPE(enc, ptr)) {
359 case BT_RSQB:
360 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200361 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000362 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
363 break;
364 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200365 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000366 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
367 ptr -= MINBPC(enc);
368 break;
369 }
370 *nextTokPtr = ptr + MINBPC(enc);
371 return XML_TOK_CDATA_SECT_CLOSE;
372 case BT_CR:
373 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200374 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000375 if (BYTE_TYPE(enc, ptr) == BT_LF)
376 ptr += MINBPC(enc);
377 *nextTokPtr = ptr;
378 return XML_TOK_DATA_NEWLINE;
379 case BT_LF:
380 *nextTokPtr = ptr + MINBPC(enc);
381 return XML_TOK_DATA_NEWLINE;
382 INVALID_CASES(ptr, nextTokPtr)
383 default:
384 ptr += MINBPC(enc);
385 break;
386 }
Victor Stinner23ec4b52017-06-15 00:54:36 +0200387 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000388 switch (BYTE_TYPE(enc, ptr)) {
389#define LEAD_CASE(n) \
390 case BT_LEAD ## n: \
391 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000392 *nextTokPtr = ptr; \
393 return XML_TOK_DATA_CHARS; \
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000394 } \
395 ptr += n; \
396 break;
397 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
398#undef LEAD_CASE
399 case BT_NONXML:
400 case BT_MALFORM:
401 case BT_TRAIL:
402 case BT_CR:
403 case BT_LF:
404 case BT_RSQB:
405 *nextTokPtr = ptr;
406 return XML_TOK_DATA_CHARS;
407 default:
408 ptr += MINBPC(enc);
409 break;
410 }
411 }
412 *nextTokPtr = ptr;
413 return XML_TOK_DATA_CHARS;
414}
415
416/* ptr points to character following "</" */
417
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000418static int PTRCALL
419PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
420 const char *end, const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000421{
Victor Stinner23ec4b52017-06-15 00:54:36 +0200422 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000423 switch (BYTE_TYPE(enc, ptr)) {
424 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
425 default:
426 *nextTokPtr = ptr;
427 return XML_TOK_INVALID;
428 }
Victor Stinner23ec4b52017-06-15 00:54:36 +0200429 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000430 switch (BYTE_TYPE(enc, ptr)) {
431 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
432 case BT_S: case BT_CR: case BT_LF:
Victor Stinner23ec4b52017-06-15 00:54:36 +0200433 for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000434 switch (BYTE_TYPE(enc, ptr)) {
435 case BT_S: case BT_CR: case BT_LF:
436 break;
437 case BT_GT:
438 *nextTokPtr = ptr + MINBPC(enc);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000439 return XML_TOK_END_TAG;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000440 default:
441 *nextTokPtr = ptr;
442 return XML_TOK_INVALID;
443 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000444 }
445 return XML_TOK_PARTIAL;
446#ifdef XML_NS
447 case BT_COLON:
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000448 /* no need to check qname syntax here,
449 since end-tag must match exactly */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000450 ptr += MINBPC(enc);
451 break;
452#endif
453 case BT_GT:
454 *nextTokPtr = ptr + MINBPC(enc);
455 return XML_TOK_END_TAG;
456 default:
457 *nextTokPtr = ptr;
458 return XML_TOK_INVALID;
459 }
460 }
461 return XML_TOK_PARTIAL;
462}
463
464/* ptr points to character following "&#X" */
465
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000466static int PTRCALL
467PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
468 const char *end, const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000469{
Victor Stinner23ec4b52017-06-15 00:54:36 +0200470 if (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000471 switch (BYTE_TYPE(enc, ptr)) {
472 case BT_DIGIT:
473 case BT_HEX:
474 break;
475 default:
476 *nextTokPtr = ptr;
477 return XML_TOK_INVALID;
478 }
Victor Stinner23ec4b52017-06-15 00:54:36 +0200479 for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000480 switch (BYTE_TYPE(enc, ptr)) {
481 case BT_DIGIT:
482 case BT_HEX:
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000483 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000484 case BT_SEMI:
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000485 *nextTokPtr = ptr + MINBPC(enc);
486 return XML_TOK_CHAR_REF;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000487 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000488 *nextTokPtr = ptr;
489 return XML_TOK_INVALID;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000490 }
491 }
492 }
493 return XML_TOK_PARTIAL;
494}
495
496/* ptr points to character following "&#" */
497
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000498static int PTRCALL
499PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
500 const char *end, const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000501{
Victor Stinner23ec4b52017-06-15 00:54:36 +0200502 if (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000503 if (CHAR_MATCHES(enc, ptr, ASCII_x))
504 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
505 switch (BYTE_TYPE(enc, ptr)) {
506 case BT_DIGIT:
507 break;
508 default:
509 *nextTokPtr = ptr;
510 return XML_TOK_INVALID;
511 }
Victor Stinner23ec4b52017-06-15 00:54:36 +0200512 for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000513 switch (BYTE_TYPE(enc, ptr)) {
514 case BT_DIGIT:
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000515 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000516 case BT_SEMI:
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000517 *nextTokPtr = ptr + MINBPC(enc);
518 return XML_TOK_CHAR_REF;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000519 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000520 *nextTokPtr = ptr;
521 return XML_TOK_INVALID;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000522 }
523 }
524 }
525 return XML_TOK_PARTIAL;
526}
527
528/* ptr points to character following "&" */
529
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000530static int PTRCALL
531PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
532 const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000533{
Victor Stinner23ec4b52017-06-15 00:54:36 +0200534 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000535 switch (BYTE_TYPE(enc, ptr)) {
536 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
537 case BT_NUM:
538 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
539 default:
540 *nextTokPtr = ptr;
541 return XML_TOK_INVALID;
542 }
Victor Stinner23ec4b52017-06-15 00:54:36 +0200543 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000544 switch (BYTE_TYPE(enc, ptr)) {
545 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
546 case BT_SEMI:
547 *nextTokPtr = ptr + MINBPC(enc);
548 return XML_TOK_ENTITY_REF;
549 default:
550 *nextTokPtr = ptr;
551 return XML_TOK_INVALID;
552 }
553 }
554 return XML_TOK_PARTIAL;
555}
556
557/* ptr points to character following first character of attribute name */
558
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000559static int PTRCALL
560PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
561 const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000562{
563#ifdef XML_NS
564 int hadColon = 0;
565#endif
Victor Stinner23ec4b52017-06-15 00:54:36 +0200566 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000567 switch (BYTE_TYPE(enc, ptr)) {
568 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
569#ifdef XML_NS
570 case BT_COLON:
571 if (hadColon) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000572 *nextTokPtr = ptr;
573 return XML_TOK_INVALID;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000574 }
575 hadColon = 1;
576 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200577 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000578 switch (BYTE_TYPE(enc, ptr)) {
579 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
580 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000581 *nextTokPtr = ptr;
582 return XML_TOK_INVALID;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000583 }
584 break;
585#endif
586 case BT_S: case BT_CR: case BT_LF:
587 for (;;) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000588 int t;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000589
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000590 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200591 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000592 t = BYTE_TYPE(enc, ptr);
593 if (t == BT_EQUALS)
594 break;
595 switch (t) {
596 case BT_S:
597 case BT_LF:
598 case BT_CR:
599 break;
600 default:
601 *nextTokPtr = ptr;
602 return XML_TOK_INVALID;
603 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000604 }
605 /* fall through */
606 case BT_EQUALS:
607 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000608 int open;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000609#ifdef XML_NS
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000610 hadColon = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000611#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000612 for (;;) {
613 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200614 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000615 open = BYTE_TYPE(enc, ptr);
616 if (open == BT_QUOT || open == BT_APOS)
617 break;
618 switch (open) {
619 case BT_S:
620 case BT_LF:
621 case BT_CR:
622 break;
623 default:
624 *nextTokPtr = ptr;
625 return XML_TOK_INVALID;
626 }
627 }
628 ptr += MINBPC(enc);
629 /* in attribute value */
630 for (;;) {
631 int t;
Victor Stinner23ec4b52017-06-15 00:54:36 +0200632 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000633 t = BYTE_TYPE(enc, ptr);
634 if (t == open)
635 break;
636 switch (t) {
637 INVALID_CASES(ptr, nextTokPtr)
638 case BT_AMP:
639 {
640 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
641 if (tok <= 0) {
642 if (tok == XML_TOK_INVALID)
643 *nextTokPtr = ptr;
644 return tok;
645 }
646 break;
647 }
648 case BT_LT:
649 *nextTokPtr = ptr;
650 return XML_TOK_INVALID;
651 default:
652 ptr += MINBPC(enc);
653 break;
654 }
655 }
656 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200657 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000658 switch (BYTE_TYPE(enc, ptr)) {
659 case BT_S:
660 case BT_CR:
661 case BT_LF:
662 break;
663 case BT_SOL:
664 goto sol;
665 case BT_GT:
666 goto gt;
667 default:
668 *nextTokPtr = ptr;
669 return XML_TOK_INVALID;
670 }
671 /* ptr points to closing quote */
672 for (;;) {
673 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200674 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000675 switch (BYTE_TYPE(enc, ptr)) {
676 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
677 case BT_S: case BT_CR: case BT_LF:
678 continue;
679 case BT_GT:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000680 gt:
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000681 *nextTokPtr = ptr + MINBPC(enc);
682 return XML_TOK_START_TAG_WITH_ATTS;
683 case BT_SOL:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000684 sol:
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000685 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200686 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000687 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
688 *nextTokPtr = ptr;
689 return XML_TOK_INVALID;
690 }
691 *nextTokPtr = ptr + MINBPC(enc);
692 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
693 default:
694 *nextTokPtr = ptr;
695 return XML_TOK_INVALID;
696 }
697 break;
698 }
699 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000700 }
701 default:
702 *nextTokPtr = ptr;
703 return XML_TOK_INVALID;
704 }
705 }
706 return XML_TOK_PARTIAL;
707}
708
709/* ptr points to character following "<" */
710
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000711static int PTRCALL
712PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
713 const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000714{
715#ifdef XML_NS
716 int hadColon;
717#endif
Victor Stinner23ec4b52017-06-15 00:54:36 +0200718 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000719 switch (BYTE_TYPE(enc, ptr)) {
720 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
721 case BT_EXCL:
Victor Stinner23ec4b52017-06-15 00:54:36 +0200722 ptr += MINBPC(enc);
723 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000724 switch (BYTE_TYPE(enc, ptr)) {
725 case BT_MINUS:
726 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
727 case BT_LSQB:
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000728 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
729 end, nextTokPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000730 }
731 *nextTokPtr = ptr;
732 return XML_TOK_INVALID;
733 case BT_QUEST:
734 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
735 case BT_SOL:
736 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
737 default:
738 *nextTokPtr = ptr;
739 return XML_TOK_INVALID;
740 }
741#ifdef XML_NS
742 hadColon = 0;
743#endif
744 /* we have a start-tag */
Victor Stinner23ec4b52017-06-15 00:54:36 +0200745 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000746 switch (BYTE_TYPE(enc, ptr)) {
747 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
748#ifdef XML_NS
749 case BT_COLON:
750 if (hadColon) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000751 *nextTokPtr = ptr;
752 return XML_TOK_INVALID;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000753 }
754 hadColon = 1;
755 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200756 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000757 switch (BYTE_TYPE(enc, ptr)) {
758 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
759 default:
760 *nextTokPtr = ptr;
761 return XML_TOK_INVALID;
762 }
763 break;
764#endif
765 case BT_S: case BT_CR: case BT_LF:
766 {
767 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200768 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000769 switch (BYTE_TYPE(enc, ptr)) {
770 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
771 case BT_GT:
772 goto gt;
773 case BT_SOL:
774 goto sol;
775 case BT_S: case BT_CR: case BT_LF:
776 ptr += MINBPC(enc);
777 continue;
778 default:
779 *nextTokPtr = ptr;
780 return XML_TOK_INVALID;
781 }
782 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
783 }
784 return XML_TOK_PARTIAL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000785 }
786 case BT_GT:
787 gt:
788 *nextTokPtr = ptr + MINBPC(enc);
789 return XML_TOK_START_TAG_NO_ATTS;
790 case BT_SOL:
791 sol:
792 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200793 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000794 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000795 *nextTokPtr = ptr;
796 return XML_TOK_INVALID;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000797 }
798 *nextTokPtr = ptr + MINBPC(enc);
799 return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
800 default:
801 *nextTokPtr = ptr;
802 return XML_TOK_INVALID;
803 }
804 }
805 return XML_TOK_PARTIAL;
806}
807
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000808static int PTRCALL
809PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
810 const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000811{
Victor Stinner23ec4b52017-06-15 00:54:36 +0200812 if (ptr >= end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000813 return XML_TOK_NONE;
814 if (MINBPC(enc) > 1) {
815 size_t n = end - ptr;
816 if (n & (MINBPC(enc) - 1)) {
817 n &= ~(MINBPC(enc) - 1);
818 if (n == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000819 return XML_TOK_PARTIAL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000820 end = ptr + n;
821 }
822 }
823 switch (BYTE_TYPE(enc, ptr)) {
824 case BT_LT:
825 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
826 case BT_AMP:
827 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
828 case BT_CR:
829 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200830 if (! HAS_CHAR(enc, ptr, end))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000831 return XML_TOK_TRAILING_CR;
832 if (BYTE_TYPE(enc, ptr) == BT_LF)
833 ptr += MINBPC(enc);
834 *nextTokPtr = ptr;
835 return XML_TOK_DATA_NEWLINE;
836 case BT_LF:
837 *nextTokPtr = ptr + MINBPC(enc);
838 return XML_TOK_DATA_NEWLINE;
839 case BT_RSQB:
840 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200841 if (! HAS_CHAR(enc, ptr, end))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000842 return XML_TOK_TRAILING_RSQB;
843 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
844 break;
845 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200846 if (! HAS_CHAR(enc, ptr, end))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000847 return XML_TOK_TRAILING_RSQB;
848 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
849 ptr -= MINBPC(enc);
850 break;
851 }
852 *nextTokPtr = ptr;
853 return XML_TOK_INVALID;
854 INVALID_CASES(ptr, nextTokPtr)
855 default:
856 ptr += MINBPC(enc);
857 break;
858 }
Victor Stinner23ec4b52017-06-15 00:54:36 +0200859 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000860 switch (BYTE_TYPE(enc, ptr)) {
861#define LEAD_CASE(n) \
862 case BT_LEAD ## n: \
863 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000864 *nextTokPtr = ptr; \
865 return XML_TOK_DATA_CHARS; \
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000866 } \
867 ptr += n; \
868 break;
869 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
870#undef LEAD_CASE
871 case BT_RSQB:
Victor Stinner23ec4b52017-06-15 00:54:36 +0200872 if (HAS_CHARS(enc, ptr, end, 2)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000873 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
874 ptr += MINBPC(enc);
875 break;
876 }
Victor Stinner23ec4b52017-06-15 00:54:36 +0200877 if (HAS_CHARS(enc, ptr, end, 3)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000878 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
879 ptr += MINBPC(enc);
880 break;
881 }
882 *nextTokPtr = ptr + 2*MINBPC(enc);
883 return XML_TOK_INVALID;
884 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000885 }
886 /* fall through */
887 case BT_AMP:
888 case BT_LT:
889 case BT_NONXML:
890 case BT_MALFORM:
891 case BT_TRAIL:
892 case BT_CR:
893 case BT_LF:
894 *nextTokPtr = ptr;
895 return XML_TOK_DATA_CHARS;
896 default:
897 ptr += MINBPC(enc);
898 break;
899 }
900 }
901 *nextTokPtr = ptr;
902 return XML_TOK_DATA_CHARS;
903}
904
905/* ptr points to character following "%" */
906
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000907static int PTRCALL
908PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
909 const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000910{
Victor Stinner23ec4b52017-06-15 00:54:36 +0200911 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000912 switch (BYTE_TYPE(enc, ptr)) {
913 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
914 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
915 *nextTokPtr = ptr;
916 return XML_TOK_PERCENT;
917 default:
918 *nextTokPtr = ptr;
919 return XML_TOK_INVALID;
920 }
Victor Stinner23ec4b52017-06-15 00:54:36 +0200921 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000922 switch (BYTE_TYPE(enc, ptr)) {
923 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
924 case BT_SEMI:
925 *nextTokPtr = ptr + MINBPC(enc);
926 return XML_TOK_PARAM_ENTITY_REF;
927 default:
928 *nextTokPtr = ptr;
929 return XML_TOK_INVALID;
930 }
931 }
932 return XML_TOK_PARTIAL;
933}
934
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000935static int PTRCALL
936PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
937 const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000938{
Victor Stinner23ec4b52017-06-15 00:54:36 +0200939 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000940 switch (BYTE_TYPE(enc, ptr)) {
941 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
942 default:
943 *nextTokPtr = ptr;
944 return XML_TOK_INVALID;
945 }
Victor Stinner23ec4b52017-06-15 00:54:36 +0200946 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000947 switch (BYTE_TYPE(enc, ptr)) {
948 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
949 case BT_CR: case BT_LF: case BT_S:
950 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
951 *nextTokPtr = ptr;
952 return XML_TOK_POUND_NAME;
953 default:
954 *nextTokPtr = ptr;
955 return XML_TOK_INVALID;
956 }
957 }
958 return -XML_TOK_POUND_NAME;
959}
960
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000961static int PTRCALL
962PREFIX(scanLit)(int open, const ENCODING *enc,
963 const char *ptr, const char *end,
964 const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000965{
Victor Stinner23ec4b52017-06-15 00:54:36 +0200966 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000967 int t = BYTE_TYPE(enc, ptr);
968 switch (t) {
969 INVALID_CASES(ptr, nextTokPtr)
970 case BT_QUOT:
971 case BT_APOS:
972 ptr += MINBPC(enc);
973 if (t != open)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000974 break;
Victor Stinner23ec4b52017-06-15 00:54:36 +0200975 if (! HAS_CHAR(enc, ptr, end))
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000976 return -XML_TOK_LITERAL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000977 *nextTokPtr = ptr;
978 switch (BYTE_TYPE(enc, ptr)) {
979 case BT_S: case BT_CR: case BT_LF:
980 case BT_GT: case BT_PERCNT: case BT_LSQB:
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000981 return XML_TOK_LITERAL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000982 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000983 return XML_TOK_INVALID;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000984 }
985 default:
986 ptr += MINBPC(enc);
987 break;
988 }
989 }
990 return XML_TOK_PARTIAL;
991}
992
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000993static int PTRCALL
994PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
995 const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000996{
997 int tok;
Victor Stinner23ec4b52017-06-15 00:54:36 +0200998 if (ptr >= end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000999 return XML_TOK_NONE;
1000 if (MINBPC(enc) > 1) {
1001 size_t n = end - ptr;
1002 if (n & (MINBPC(enc) - 1)) {
1003 n &= ~(MINBPC(enc) - 1);
1004 if (n == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001005 return XML_TOK_PARTIAL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001006 end = ptr + n;
1007 }
1008 }
1009 switch (BYTE_TYPE(enc, ptr)) {
1010 case BT_QUOT:
1011 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
1012 case BT_APOS:
1013 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
1014 case BT_LT:
1015 {
1016 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +02001017 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001018 switch (BYTE_TYPE(enc, ptr)) {
1019 case BT_EXCL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001020 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001021 case BT_QUEST:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001022 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001023 case BT_NMSTRT:
1024 case BT_HEX:
1025 case BT_NONASCII:
1026 case BT_LEAD2:
1027 case BT_LEAD3:
1028 case BT_LEAD4:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001029 *nextTokPtr = ptr - MINBPC(enc);
1030 return XML_TOK_INSTANCE_START;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001031 }
1032 *nextTokPtr = ptr;
1033 return XML_TOK_INVALID;
1034 }
1035 case BT_CR:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001036 if (ptr + MINBPC(enc) == end) {
1037 *nextTokPtr = end;
1038 /* indicate that this might be part of a CR/LF pair */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001039 return -XML_TOK_PROLOG_S;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001040 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001041 /* fall through */
1042 case BT_S: case BT_LF:
1043 for (;;) {
1044 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +02001045 if (! HAS_CHAR(enc, ptr, end))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001046 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001047 switch (BYTE_TYPE(enc, ptr)) {
1048 case BT_S: case BT_LF:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001049 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001050 case BT_CR:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001051 /* don't split CR/LF pair */
1052 if (ptr + MINBPC(enc) != end)
1053 break;
1054 /* fall through */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001055 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001056 *nextTokPtr = ptr;
1057 return XML_TOK_PROLOG_S;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001058 }
1059 }
1060 *nextTokPtr = ptr;
1061 return XML_TOK_PROLOG_S;
1062 case BT_PERCNT:
1063 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1064 case BT_COMMA:
1065 *nextTokPtr = ptr + MINBPC(enc);
1066 return XML_TOK_COMMA;
1067 case BT_LSQB:
1068 *nextTokPtr = ptr + MINBPC(enc);
1069 return XML_TOK_OPEN_BRACKET;
1070 case BT_RSQB:
1071 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +02001072 if (! HAS_CHAR(enc, ptr, end))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001073 return -XML_TOK_CLOSE_BRACKET;
1074 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02001075 REQUIRE_CHARS(enc, ptr, end, 2);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001076 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001077 *nextTokPtr = ptr + 2*MINBPC(enc);
1078 return XML_TOK_COND_SECT_CLOSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001079 }
1080 }
1081 *nextTokPtr = ptr;
1082 return XML_TOK_CLOSE_BRACKET;
1083 case BT_LPAR:
1084 *nextTokPtr = ptr + MINBPC(enc);
1085 return XML_TOK_OPEN_PAREN;
1086 case BT_RPAR:
1087 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +02001088 if (! HAS_CHAR(enc, ptr, end))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001089 return -XML_TOK_CLOSE_PAREN;
1090 switch (BYTE_TYPE(enc, ptr)) {
1091 case BT_AST:
1092 *nextTokPtr = ptr + MINBPC(enc);
1093 return XML_TOK_CLOSE_PAREN_ASTERISK;
1094 case BT_QUEST:
1095 *nextTokPtr = ptr + MINBPC(enc);
1096 return XML_TOK_CLOSE_PAREN_QUESTION;
1097 case BT_PLUS:
1098 *nextTokPtr = ptr + MINBPC(enc);
1099 return XML_TOK_CLOSE_PAREN_PLUS;
1100 case BT_CR: case BT_LF: case BT_S:
1101 case BT_GT: case BT_COMMA: case BT_VERBAR:
1102 case BT_RPAR:
1103 *nextTokPtr = ptr;
1104 return XML_TOK_CLOSE_PAREN;
1105 }
1106 *nextTokPtr = ptr;
1107 return XML_TOK_INVALID;
1108 case BT_VERBAR:
1109 *nextTokPtr = ptr + MINBPC(enc);
1110 return XML_TOK_OR;
1111 case BT_GT:
1112 *nextTokPtr = ptr + MINBPC(enc);
1113 return XML_TOK_DECL_CLOSE;
1114 case BT_NUM:
1115 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1116#define LEAD_CASE(n) \
1117 case BT_LEAD ## n: \
1118 if (end - ptr < n) \
1119 return XML_TOK_PARTIAL_CHAR; \
1120 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1121 ptr += n; \
1122 tok = XML_TOK_NAME; \
1123 break; \
1124 } \
1125 if (IS_NAME_CHAR(enc, ptr, n)) { \
1126 ptr += n; \
1127 tok = XML_TOK_NMTOKEN; \
1128 break; \
1129 } \
1130 *nextTokPtr = ptr; \
1131 return XML_TOK_INVALID;
1132 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1133#undef LEAD_CASE
1134 case BT_NMSTRT:
1135 case BT_HEX:
1136 tok = XML_TOK_NAME;
1137 ptr += MINBPC(enc);
1138 break;
1139 case BT_DIGIT:
1140 case BT_NAME:
1141 case BT_MINUS:
1142#ifdef XML_NS
1143 case BT_COLON:
1144#endif
1145 tok = XML_TOK_NMTOKEN;
1146 ptr += MINBPC(enc);
1147 break;
1148 case BT_NONASCII:
1149 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1150 ptr += MINBPC(enc);
1151 tok = XML_TOK_NAME;
1152 break;
1153 }
1154 if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1155 ptr += MINBPC(enc);
1156 tok = XML_TOK_NMTOKEN;
1157 break;
1158 }
1159 /* fall through */
1160 default:
1161 *nextTokPtr = ptr;
1162 return XML_TOK_INVALID;
1163 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02001164 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001165 switch (BYTE_TYPE(enc, ptr)) {
1166 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1167 case BT_GT: case BT_RPAR: case BT_COMMA:
1168 case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
1169 case BT_S: case BT_CR: case BT_LF:
1170 *nextTokPtr = ptr;
1171 return tok;
1172#ifdef XML_NS
1173 case BT_COLON:
1174 ptr += MINBPC(enc);
1175 switch (tok) {
1176 case XML_TOK_NAME:
Victor Stinner23ec4b52017-06-15 00:54:36 +02001177 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001178 tok = XML_TOK_PREFIXED_NAME;
1179 switch (BYTE_TYPE(enc, ptr)) {
1180 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1181 default:
1182 tok = XML_TOK_NMTOKEN;
1183 break;
1184 }
1185 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001186 case XML_TOK_PREFIXED_NAME:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001187 tok = XML_TOK_NMTOKEN;
1188 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001189 }
1190 break;
1191#endif
1192 case BT_PLUS:
1193 if (tok == XML_TOK_NMTOKEN) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001194 *nextTokPtr = ptr;
1195 return XML_TOK_INVALID;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001196 }
1197 *nextTokPtr = ptr + MINBPC(enc);
1198 return XML_TOK_NAME_PLUS;
1199 case BT_AST:
1200 if (tok == XML_TOK_NMTOKEN) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001201 *nextTokPtr = ptr;
1202 return XML_TOK_INVALID;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001203 }
1204 *nextTokPtr = ptr + MINBPC(enc);
1205 return XML_TOK_NAME_ASTERISK;
1206 case BT_QUEST:
1207 if (tok == XML_TOK_NMTOKEN) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001208 *nextTokPtr = ptr;
1209 return XML_TOK_INVALID;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001210 }
1211 *nextTokPtr = ptr + MINBPC(enc);
1212 return XML_TOK_NAME_QUESTION;
1213 default:
1214 *nextTokPtr = ptr;
1215 return XML_TOK_INVALID;
1216 }
1217 }
1218 return -tok;
1219}
1220
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001221static int PTRCALL
1222PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
1223 const char *end, const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001224{
1225 const char *start;
Victor Stinner23ec4b52017-06-15 00:54:36 +02001226 if (ptr >= end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001227 return XML_TOK_NONE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001228 else if (! HAS_CHAR(enc, ptr, end)) {
1229 /* This line cannot be executed. The incoming data has already
1230 * been tokenized once, so incomplete characters like this have
1231 * already been eliminated from the input. Retaining the paranoia
1232 * check is still valuable, however.
1233 */
1234 return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1235 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001236 start = ptr;
Victor Stinner23ec4b52017-06-15 00:54:36 +02001237 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001238 switch (BYTE_TYPE(enc, ptr)) {
1239#define LEAD_CASE(n) \
1240 case BT_LEAD ## n: ptr += n; break;
1241 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1242#undef LEAD_CASE
1243 case BT_AMP:
1244 if (ptr == start)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001245 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001246 *nextTokPtr = ptr;
1247 return XML_TOK_DATA_CHARS;
1248 case BT_LT:
1249 /* this is for inside entity references */
1250 *nextTokPtr = ptr;
1251 return XML_TOK_INVALID;
1252 case BT_LF:
1253 if (ptr == start) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001254 *nextTokPtr = ptr + MINBPC(enc);
1255 return XML_TOK_DATA_NEWLINE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001256 }
1257 *nextTokPtr = ptr;
1258 return XML_TOK_DATA_CHARS;
1259 case BT_CR:
1260 if (ptr == start) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001261 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +02001262 if (! HAS_CHAR(enc, ptr, end))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001263 return XML_TOK_TRAILING_CR;
1264 if (BYTE_TYPE(enc, ptr) == BT_LF)
1265 ptr += MINBPC(enc);
1266 *nextTokPtr = ptr;
1267 return XML_TOK_DATA_NEWLINE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001268 }
1269 *nextTokPtr = ptr;
1270 return XML_TOK_DATA_CHARS;
1271 case BT_S:
1272 if (ptr == start) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001273 *nextTokPtr = ptr + MINBPC(enc);
1274 return XML_TOK_ATTRIBUTE_VALUE_S;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001275 }
1276 *nextTokPtr = ptr;
1277 return XML_TOK_DATA_CHARS;
1278 default:
1279 ptr += MINBPC(enc);
1280 break;
1281 }
1282 }
1283 *nextTokPtr = ptr;
1284 return XML_TOK_DATA_CHARS;
1285}
1286
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001287static int PTRCALL
1288PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
1289 const char *end, const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001290{
1291 const char *start;
Victor Stinner23ec4b52017-06-15 00:54:36 +02001292 if (ptr >= end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001293 return XML_TOK_NONE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001294 else if (! HAS_CHAR(enc, ptr, end)) {
1295 /* This line cannot be executed. The incoming data has already
1296 * been tokenized once, so incomplete characters like this have
1297 * already been eliminated from the input. Retaining the paranoia
1298 * check is still valuable, however.
1299 */
1300 return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1301 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001302 start = ptr;
Victor Stinner23ec4b52017-06-15 00:54:36 +02001303 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001304 switch (BYTE_TYPE(enc, ptr)) {
1305#define LEAD_CASE(n) \
1306 case BT_LEAD ## n: ptr += n; break;
1307 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1308#undef LEAD_CASE
1309 case BT_AMP:
1310 if (ptr == start)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001311 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001312 *nextTokPtr = ptr;
1313 return XML_TOK_DATA_CHARS;
1314 case BT_PERCNT:
1315 if (ptr == start) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001316 int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
1317 end, nextTokPtr);
1318 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001319 }
1320 *nextTokPtr = ptr;
1321 return XML_TOK_DATA_CHARS;
1322 case BT_LF:
1323 if (ptr == start) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001324 *nextTokPtr = ptr + MINBPC(enc);
1325 return XML_TOK_DATA_NEWLINE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001326 }
1327 *nextTokPtr = ptr;
1328 return XML_TOK_DATA_CHARS;
1329 case BT_CR:
1330 if (ptr == start) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001331 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +02001332 if (! HAS_CHAR(enc, ptr, end))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001333 return XML_TOK_TRAILING_CR;
1334 if (BYTE_TYPE(enc, ptr) == BT_LF)
1335 ptr += MINBPC(enc);
1336 *nextTokPtr = ptr;
1337 return XML_TOK_DATA_NEWLINE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001338 }
1339 *nextTokPtr = ptr;
1340 return XML_TOK_DATA_CHARS;
1341 default:
1342 ptr += MINBPC(enc);
1343 break;
1344 }
1345 }
1346 *nextTokPtr = ptr;
1347 return XML_TOK_DATA_CHARS;
1348}
1349
1350#ifdef XML_DTD
1351
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001352static int PTRCALL
1353PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
1354 const char *end, const char **nextTokPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001355{
1356 int level = 0;
1357 if (MINBPC(enc) > 1) {
1358 size_t n = end - ptr;
1359 if (n & (MINBPC(enc) - 1)) {
1360 n &= ~(MINBPC(enc) - 1);
1361 end = ptr + n;
1362 }
1363 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02001364 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001365 switch (BYTE_TYPE(enc, ptr)) {
1366 INVALID_CASES(ptr, nextTokPtr)
1367 case BT_LT:
Victor Stinner23ec4b52017-06-15 00:54:36 +02001368 ptr += MINBPC(enc);
1369 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001370 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02001371 ptr += MINBPC(enc);
1372 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001373 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1374 ++level;
1375 ptr += MINBPC(enc);
1376 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001377 }
1378 break;
1379 case BT_RSQB:
Victor Stinner23ec4b52017-06-15 00:54:36 +02001380 ptr += MINBPC(enc);
1381 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001382 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02001383 ptr += MINBPC(enc);
1384 REQUIRE_CHAR(enc, ptr, end);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001385 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1386 ptr += MINBPC(enc);
1387 if (level == 0) {
1388 *nextTokPtr = ptr;
1389 return XML_TOK_IGNORE_SECT;
1390 }
1391 --level;
1392 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001393 }
1394 break;
1395 default:
1396 ptr += MINBPC(enc);
1397 break;
1398 }
1399 }
1400 return XML_TOK_PARTIAL;
1401}
1402
1403#endif /* XML_DTD */
1404
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001405static int PTRCALL
1406PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
1407 const char **badPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001408{
1409 ptr += MINBPC(enc);
1410 end -= MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +02001411 for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001412 switch (BYTE_TYPE(enc, ptr)) {
1413 case BT_DIGIT:
1414 case BT_HEX:
1415 case BT_MINUS:
1416 case BT_APOS:
1417 case BT_LPAR:
1418 case BT_RPAR:
1419 case BT_PLUS:
1420 case BT_COMMA:
1421 case BT_SOL:
1422 case BT_EQUALS:
1423 case BT_QUEST:
1424 case BT_CR:
1425 case BT_LF:
1426 case BT_SEMI:
1427 case BT_EXCL:
1428 case BT_AST:
1429 case BT_PERCNT:
1430 case BT_NUM:
1431#ifdef XML_NS
1432 case BT_COLON:
1433#endif
1434 break;
1435 case BT_S:
1436 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001437 *badPtr = ptr;
1438 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001439 }
1440 break;
1441 case BT_NAME:
1442 case BT_NMSTRT:
1443 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001444 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001445 default:
1446 switch (BYTE_TO_ASCII(enc, ptr)) {
1447 case 0x24: /* $ */
1448 case 0x40: /* @ */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001449 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001450 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001451 *badPtr = ptr;
1452 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001453 }
1454 break;
1455 }
1456 }
1457 return 1;
1458}
1459
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001460/* This must only be called for a well-formed start-tag or empty
1461 element tag. Returns the number of attributes. Pointers to the
1462 first attsMax attributes are stored in atts.
1463*/
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001464
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001465static int PTRCALL
1466PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
1467 int attsMax, ATTRIBUTE *atts)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001468{
1469 enum { other, inName, inValue } state = inName;
1470 int nAtts = 0;
1471 int open = 0; /* defined when state == inValue;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001472 initialization just to shut up compilers */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001473
1474 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1475 switch (BYTE_TYPE(enc, ptr)) {
1476#define START_NAME \
1477 if (state == other) { \
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001478 if (nAtts < attsMax) { \
1479 atts[nAtts].name = ptr; \
1480 atts[nAtts].normalized = 1; \
1481 } \
1482 state = inName; \
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001483 }
1484#define LEAD_CASE(n) \
1485 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
1486 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1487#undef LEAD_CASE
1488 case BT_NONASCII:
1489 case BT_NMSTRT:
1490 case BT_HEX:
1491 START_NAME
1492 break;
1493#undef START_NAME
1494 case BT_QUOT:
1495 if (state != inValue) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001496 if (nAtts < attsMax)
1497 atts[nAtts].valuePtr = ptr + MINBPC(enc);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001498 state = inValue;
1499 open = BT_QUOT;
1500 }
1501 else if (open == BT_QUOT) {
1502 state = other;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001503 if (nAtts < attsMax)
1504 atts[nAtts].valueEnd = ptr;
1505 nAtts++;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001506 }
1507 break;
1508 case BT_APOS:
1509 if (state != inValue) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001510 if (nAtts < attsMax)
1511 atts[nAtts].valuePtr = ptr + MINBPC(enc);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001512 state = inValue;
1513 open = BT_APOS;
1514 }
1515 else if (open == BT_APOS) {
1516 state = other;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001517 if (nAtts < attsMax)
1518 atts[nAtts].valueEnd = ptr;
1519 nAtts++;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001520 }
1521 break;
1522 case BT_AMP:
1523 if (nAtts < attsMax)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001524 atts[nAtts].normalized = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001525 break;
1526 case BT_S:
1527 if (state == inName)
1528 state = other;
1529 else if (state == inValue
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001530 && nAtts < attsMax
1531 && atts[nAtts].normalized
1532 && (ptr == atts[nAtts].valuePtr
1533 || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1534 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1535 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1536 atts[nAtts].normalized = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001537 break;
1538 case BT_CR: case BT_LF:
1539 /* This case ensures that the first attribute name is counted
1540 Apart from that we could just change state on the quote. */
1541 if (state == inName)
1542 state = other;
1543 else if (state == inValue && nAtts < attsMax)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001544 atts[nAtts].normalized = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001545 break;
1546 case BT_GT:
1547 case BT_SOL:
1548 if (state != inValue)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001549 return nAtts;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001550 break;
1551 default:
1552 break;
1553 }
1554 }
1555 /* not reached */
1556}
1557
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001558static int PTRFASTCALL
Victor Stinner23ec4b52017-06-15 00:54:36 +02001559PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001560{
1561 int result = 0;
1562 /* skip &# */
1563 ptr += 2*MINBPC(enc);
1564 if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001565 for (ptr += MINBPC(enc);
1566 !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
1567 ptr += MINBPC(enc)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001568 int c = BYTE_TO_ASCII(enc, ptr);
1569 switch (c) {
1570 case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
1571 case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001572 result <<= 4;
1573 result |= (c - ASCII_0);
1574 break;
1575 case ASCII_A: case ASCII_B: case ASCII_C:
1576 case ASCII_D: case ASCII_E: case ASCII_F:
1577 result <<= 4;
1578 result += 10 + (c - ASCII_A);
1579 break;
1580 case ASCII_a: case ASCII_b: case ASCII_c:
1581 case ASCII_d: case ASCII_e: case ASCII_f:
1582 result <<= 4;
1583 result += 10 + (c - ASCII_a);
1584 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001585 }
1586 if (result >= 0x110000)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001587 return -1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001588 }
1589 }
1590 else {
1591 for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1592 int c = BYTE_TO_ASCII(enc, ptr);
1593 result *= 10;
1594 result += (c - ASCII_0);
1595 if (result >= 0x110000)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001596 return -1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001597 }
1598 }
1599 return checkCharRefNumber(result);
1600}
1601
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001602static int PTRCALL
Victor Stinner23ec4b52017-06-15 00:54:36 +02001603PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001604 const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001605{
1606 switch ((end - ptr)/MINBPC(enc)) {
1607 case 2:
1608 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1609 switch (BYTE_TO_ASCII(enc, ptr)) {
1610 case ASCII_l:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001611 return ASCII_LT;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001612 case ASCII_g:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001613 return ASCII_GT;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001614 }
1615 }
1616 break;
1617 case 3:
1618 if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1619 ptr += MINBPC(enc);
1620 if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001621 ptr += MINBPC(enc);
1622 if (CHAR_MATCHES(enc, ptr, ASCII_p))
1623 return ASCII_AMP;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001624 }
1625 }
1626 break;
1627 case 4:
1628 switch (BYTE_TO_ASCII(enc, ptr)) {
1629 case ASCII_q:
1630 ptr += MINBPC(enc);
1631 if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001632 ptr += MINBPC(enc);
1633 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1634 ptr += MINBPC(enc);
1635 if (CHAR_MATCHES(enc, ptr, ASCII_t))
1636 return ASCII_QUOT;
1637 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001638 }
1639 break;
1640 case ASCII_a:
1641 ptr += MINBPC(enc);
1642 if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001643 ptr += MINBPC(enc);
1644 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1645 ptr += MINBPC(enc);
1646 if (CHAR_MATCHES(enc, ptr, ASCII_s))
1647 return ASCII_APOS;
1648 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001649 }
1650 break;
1651 }
1652 }
1653 return 0;
1654}
1655
Victor Stinner93d0cb52017-08-18 23:43:54 +02001656/* This function does not appear to be called from anywhere within the
1657 * library code. It is used via the macro XmlSameName(), which is
1658 * defined but never used. Since it appears in the encoding function
1659 * table, removing it is not a thing to be undertaken lightly. For
1660 * the moment, we simply exclude it from coverage tests.
1661 *
1662 * LCOV_EXCL_START
1663 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001664static int PTRCALL
1665PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001666{
1667 for (;;) {
1668 switch (BYTE_TYPE(enc, ptr1)) {
1669#define LEAD_CASE(n) \
1670 case BT_LEAD ## n: \
1671 if (*ptr1++ != *ptr2++) \
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001672 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001673 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
1674#undef LEAD_CASE
1675 /* fall through */
1676 if (*ptr1++ != *ptr2++)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001677 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001678 break;
1679 case BT_NONASCII:
1680 case BT_NMSTRT:
1681#ifdef XML_NS
1682 case BT_COLON:
1683#endif
1684 case BT_HEX:
1685 case BT_DIGIT:
1686 case BT_NAME:
1687 case BT_MINUS:
1688 if (*ptr2++ != *ptr1++)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001689 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001690 if (MINBPC(enc) > 1) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001691 if (*ptr2++ != *ptr1++)
1692 return 0;
1693 if (MINBPC(enc) > 2) {
1694 if (*ptr2++ != *ptr1++)
1695 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001696 if (MINBPC(enc) > 3) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001697 if (*ptr2++ != *ptr1++)
1698 return 0;
1699 }
1700 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001701 }
1702 break;
1703 default:
1704 if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001705 return 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001706 switch (BYTE_TYPE(enc, ptr2)) {
1707 case BT_LEAD2:
1708 case BT_LEAD3:
1709 case BT_LEAD4:
1710 case BT_NONASCII:
1711 case BT_NMSTRT:
1712#ifdef XML_NS
1713 case BT_COLON:
1714#endif
1715 case BT_HEX:
1716 case BT_DIGIT:
1717 case BT_NAME:
1718 case BT_MINUS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001719 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001720 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001721 return 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001722 }
1723 }
1724 }
1725 /* not reached */
1726}
Victor Stinner93d0cb52017-08-18 23:43:54 +02001727/* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001728
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001729static int PTRCALL
Victor Stinner23ec4b52017-06-15 00:54:36 +02001730PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001731 const char *end1, const char *ptr2)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001732{
1733 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02001734 if (end1 - ptr1 < MINBPC(enc)) {
1735 /* This line cannot be executed. THe incoming data has already
1736 * been tokenized once, so imcomplete characters like this have
1737 * already been eliminated from the input. Retaining the
1738 * paranoia check is still valuable, however.
1739 */
1740 return 0; /* LCOV_EXCL_LINE */
1741 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001742 if (!CHAR_MATCHES(enc, ptr1, *ptr2))
1743 return 0;
1744 }
1745 return ptr1 == end1;
1746}
1747
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001748static int PTRFASTCALL
1749PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001750{
1751 const char *start = ptr;
1752 for (;;) {
1753 switch (BYTE_TYPE(enc, ptr)) {
1754#define LEAD_CASE(n) \
1755 case BT_LEAD ## n: ptr += n; break;
1756 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1757#undef LEAD_CASE
1758 case BT_NONASCII:
1759 case BT_NMSTRT:
1760#ifdef XML_NS
1761 case BT_COLON:
1762#endif
1763 case BT_HEX:
1764 case BT_DIGIT:
1765 case BT_NAME:
1766 case BT_MINUS:
1767 ptr += MINBPC(enc);
1768 break;
1769 default:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001770 return (int)(ptr - start);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001771 }
1772 }
1773}
1774
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001775static const char * PTRFASTCALL
1776PREFIX(skipS)(const ENCODING *enc, const char *ptr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001777{
1778 for (;;) {
1779 switch (BYTE_TYPE(enc, ptr)) {
1780 case BT_LF:
1781 case BT_CR:
1782 case BT_S:
1783 ptr += MINBPC(enc);
1784 break;
1785 default:
1786 return ptr;
1787 }
1788 }
1789}
1790
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001791static void PTRCALL
1792PREFIX(updatePosition)(const ENCODING *enc,
1793 const char *ptr,
1794 const char *end,
1795 POSITION *pos)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001796{
Victor Stinner23ec4b52017-06-15 00:54:36 +02001797 while (HAS_CHAR(enc, ptr, end)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001798 switch (BYTE_TYPE(enc, ptr)) {
1799#define LEAD_CASE(n) \
1800 case BT_LEAD ## n: \
1801 ptr += n; \
1802 break;
1803 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1804#undef LEAD_CASE
1805 case BT_LF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001806 pos->columnNumber = (XML_Size)-1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001807 pos->lineNumber++;
1808 ptr += MINBPC(enc);
1809 break;
1810 case BT_CR:
1811 pos->lineNumber++;
1812 ptr += MINBPC(enc);
Victor Stinner23ec4b52017-06-15 00:54:36 +02001813 if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001814 ptr += MINBPC(enc);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001815 pos->columnNumber = (XML_Size)-1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001816 break;
1817 default:
1818 ptr += MINBPC(enc);
1819 break;
1820 }
1821 pos->columnNumber++;
1822 }
1823}
1824
1825#undef DO_LEAD_CASE
1826#undef MULTIBYTE_CASES
1827#undef INVALID_CASES
1828#undef CHECK_NAME_CASE
1829#undef CHECK_NAME_CASES
1830#undef CHECK_NMSTRT_CASE
1831#undef CHECK_NMSTRT_CASES
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001832
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001833#endif /* XML_TOK_IMPL_C */