blob: bdb50c63176905ec0016014498a0e729d1e7fa22 [file] [log] [blame]
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001/*
2Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
3See the file COPYING for copying permission.
4*/
5
6static char RCSId[]
7 = "$Header$";
8
9#ifdef COMPILED_FROM_DSP
10# include "winconfig.h"
11#else
Martin v. Löwis481f68a2002-02-11 23:16:32 +000012#ifdef HAVE_CONFIG_H
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000013# include <config.h>
Martin v. Löwis481f68a2002-02-11 23:16:32 +000014#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000015#endif /* ndef COMPILED_FROM_DSP */
16
17#include "xmlrole.h"
18#include "ascii.h"
19
20/* Doesn't check:
21
22 that ,| are not mixed in a model group
23 content of literals
24
25*/
26
27static const char KW_ANY[] = { ASCII_A, ASCII_N, ASCII_Y, '\0' };
28static const char KW_ATTLIST[] = { ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
29static const char KW_CDATA[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
30static const char KW_DOCTYPE[] = { ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
31static const char KW_ELEMENT[] = { ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
32static const char KW_EMPTY[] = { ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
33static const char KW_ENTITIES[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
34static const char KW_ENTITY[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
35static const char KW_FIXED[] = { ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
36static const char KW_ID[] = { ASCII_I, ASCII_D, '\0' };
37static const char KW_IDREF[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
38static const char KW_IDREFS[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
39static const char KW_IGNORE[] = { ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
40static const char KW_IMPLIED[] = { ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
41static const char KW_INCLUDE[] = { ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
42static const char KW_NDATA[] = { ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
43static const char KW_NMTOKEN[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
44static const char KW_NMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
45static const char KW_NOTATION[] = { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, '\0' };
46static const char KW_PCDATA[] = { ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
47static const char KW_PUBLIC[] = { ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
48static const char KW_REQUIRED[] = { ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, '\0' };
49static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
50
51#ifndef MIN_BYTES_PER_CHAR
52#define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
53#endif
54
55#ifdef XML_DTD
56#define setTopLevel(state) \
57 ((state)->handler = ((state)->documentEntity \
58 ? internalSubset \
59 : externalSubset1))
60#else /* not XML_DTD */
61#define setTopLevel(state) ((state)->handler = internalSubset)
62#endif /* not XML_DTD */
63
64typedef int PROLOG_HANDLER(PROLOG_STATE *state,
65 int tok,
66 const char *ptr,
67 const char *end,
68 const ENCODING *enc);
69
70static PROLOG_HANDLER
71 prolog0, prolog1, prolog2,
72 doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
73 internalSubset,
74 entity0, entity1, entity2, entity3, entity4, entity5, entity6,
75 entity7, entity8, entity9,
76 notation0, notation1, notation2, notation3, notation4,
77 attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
78 attlist7, attlist8, attlist9,
79 element0, element1, element2, element3, element4, element5, element6,
80 element7,
81#ifdef XML_DTD
82 externalSubset0, externalSubset1,
83 condSect0, condSect1, condSect2,
84#endif /* XML_DTD */
85 declClose,
86 error;
87
88static
89int common(PROLOG_STATE *state, int tok);
90
91static
92int prolog0(PROLOG_STATE *state,
93 int tok,
94 const char *ptr,
95 const char *end,
96 const ENCODING *enc)
97{
98 switch (tok) {
99 case XML_TOK_PROLOG_S:
100 state->handler = prolog1;
101 return XML_ROLE_NONE;
102 case XML_TOK_XML_DECL:
103 state->handler = prolog1;
104 return XML_ROLE_XML_DECL;
105 case XML_TOK_PI:
106 state->handler = prolog1;
107 return XML_ROLE_NONE;
108 case XML_TOK_COMMENT:
109 state->handler = prolog1;
110 case XML_TOK_BOM:
111 return XML_ROLE_NONE;
112 case XML_TOK_DECL_OPEN:
113 if (!XmlNameMatchesAscii(enc,
114 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
115 end,
116 KW_DOCTYPE))
117 break;
118 state->handler = doctype0;
119 return XML_ROLE_NONE;
120 case XML_TOK_INSTANCE_START:
121 state->handler = error;
122 return XML_ROLE_INSTANCE_START;
123 }
124 return common(state, tok);
125}
126
127static
128int prolog1(PROLOG_STATE *state,
129 int tok,
130 const char *ptr,
131 const char *end,
132 const ENCODING *enc)
133{
134 switch (tok) {
135 case XML_TOK_PROLOG_S:
136 return XML_ROLE_NONE;
137 case XML_TOK_PI:
138 case XML_TOK_COMMENT:
139 case XML_TOK_BOM:
140 return XML_ROLE_NONE;
141 case XML_TOK_DECL_OPEN:
142 if (!XmlNameMatchesAscii(enc,
143 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
144 end,
145 KW_DOCTYPE))
146 break;
147 state->handler = doctype0;
148 return XML_ROLE_NONE;
149 case XML_TOK_INSTANCE_START:
150 state->handler = error;
151 return XML_ROLE_INSTANCE_START;
152 }
153 return common(state, tok);
154}
155
156static
157int prolog2(PROLOG_STATE *state,
158 int tok,
159 const char *ptr,
160 const char *end,
161 const ENCODING *enc)
162{
163 switch (tok) {
164 case XML_TOK_PROLOG_S:
165 return XML_ROLE_NONE;
166 case XML_TOK_PI:
167 case XML_TOK_COMMENT:
168 return XML_ROLE_NONE;
169 case XML_TOK_INSTANCE_START:
170 state->handler = error;
171 return XML_ROLE_INSTANCE_START;
172 }
173 return common(state, tok);
174}
175
176static
177int doctype0(PROLOG_STATE *state,
178 int tok,
179 const char *ptr,
180 const char *end,
181 const ENCODING *enc)
182{
183 switch (tok) {
184 case XML_TOK_PROLOG_S:
185 return XML_ROLE_NONE;
186 case XML_TOK_NAME:
187 case XML_TOK_PREFIXED_NAME:
188 state->handler = doctype1;
189 return XML_ROLE_DOCTYPE_NAME;
190 }
191 return common(state, tok);
192}
193
194static
195int doctype1(PROLOG_STATE *state,
196 int tok,
197 const char *ptr,
198 const char *end,
199 const ENCODING *enc)
200{
201 switch (tok) {
202 case XML_TOK_PROLOG_S:
203 return XML_ROLE_NONE;
204 case XML_TOK_OPEN_BRACKET:
205 state->handler = internalSubset;
206 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
207 case XML_TOK_DECL_CLOSE:
208 state->handler = prolog2;
209 return XML_ROLE_DOCTYPE_CLOSE;
210 case XML_TOK_NAME:
211 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
212 state->handler = doctype3;
213 return XML_ROLE_NONE;
214 }
215 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
216 state->handler = doctype2;
217 return XML_ROLE_NONE;
218 }
219 break;
220 }
221 return common(state, tok);
222}
223
224static
225int doctype2(PROLOG_STATE *state,
226 int tok,
227 const char *ptr,
228 const char *end,
229 const ENCODING *enc)
230{
231 switch (tok) {
232 case XML_TOK_PROLOG_S:
233 return XML_ROLE_NONE;
234 case XML_TOK_LITERAL:
235 state->handler = doctype3;
236 return XML_ROLE_DOCTYPE_PUBLIC_ID;
237 }
238 return common(state, tok);
239}
240
241static
242int doctype3(PROLOG_STATE *state,
243 int tok,
244 const char *ptr,
245 const char *end,
246 const ENCODING *enc)
247{
248 switch (tok) {
249 case XML_TOK_PROLOG_S:
250 return XML_ROLE_NONE;
251 case XML_TOK_LITERAL:
252 state->handler = doctype4;
253 return XML_ROLE_DOCTYPE_SYSTEM_ID;
254 }
255 return common(state, tok);
256}
257
258static
259int doctype4(PROLOG_STATE *state,
260 int tok,
261 const char *ptr,
262 const char *end,
263 const ENCODING *enc)
264{
265 switch (tok) {
266 case XML_TOK_PROLOG_S:
267 return XML_ROLE_NONE;
268 case XML_TOK_OPEN_BRACKET:
269 state->handler = internalSubset;
270 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
271 case XML_TOK_DECL_CLOSE:
272 state->handler = prolog2;
273 return XML_ROLE_DOCTYPE_CLOSE;
274 }
275 return common(state, tok);
276}
277
278static
279int doctype5(PROLOG_STATE *state,
280 int tok,
281 const char *ptr,
282 const char *end,
283 const ENCODING *enc)
284{
285 switch (tok) {
286 case XML_TOK_PROLOG_S:
287 return XML_ROLE_NONE;
288 case XML_TOK_DECL_CLOSE:
289 state->handler = prolog2;
290 return XML_ROLE_DOCTYPE_CLOSE;
291 }
292 return common(state, tok);
293}
294
295static
296int internalSubset(PROLOG_STATE *state,
297 int tok,
298 const char *ptr,
299 const char *end,
300 const ENCODING *enc)
301{
302 switch (tok) {
303 case XML_TOK_PROLOG_S:
304 return XML_ROLE_NONE;
305 case XML_TOK_DECL_OPEN:
306 if (XmlNameMatchesAscii(enc,
307 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
308 end,
309 KW_ENTITY)) {
310 state->handler = entity0;
311 return XML_ROLE_NONE;
312 }
313 if (XmlNameMatchesAscii(enc,
314 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
315 end,
316 KW_ATTLIST)) {
317 state->handler = attlist0;
318 return XML_ROLE_NONE;
319 }
320 if (XmlNameMatchesAscii(enc,
321 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
322 end,
323 KW_ELEMENT)) {
324 state->handler = element0;
325 return XML_ROLE_NONE;
326 }
327 if (XmlNameMatchesAscii(enc,
328 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
329 end,
330 KW_NOTATION)) {
331 state->handler = notation0;
332 return XML_ROLE_NONE;
333 }
334 break;
335 case XML_TOK_PI:
336 case XML_TOK_COMMENT:
337 return XML_ROLE_NONE;
338 case XML_TOK_PARAM_ENTITY_REF:
339 return XML_ROLE_PARAM_ENTITY_REF;
340 case XML_TOK_CLOSE_BRACKET:
341 state->handler = doctype5;
342 return XML_ROLE_NONE;
343 }
344 return common(state, tok);
345}
346
347#ifdef XML_DTD
348
349static
350int externalSubset0(PROLOG_STATE *state,
351 int tok,
352 const char *ptr,
353 const char *end,
354 const ENCODING *enc)
355{
356 state->handler = externalSubset1;
357 if (tok == XML_TOK_XML_DECL)
358 return XML_ROLE_TEXT_DECL;
359 return externalSubset1(state, tok, ptr, end, enc);
360}
361
362static
363int externalSubset1(PROLOG_STATE *state,
364 int tok,
365 const char *ptr,
366 const char *end,
367 const ENCODING *enc)
368{
369 switch (tok) {
370 case XML_TOK_COND_SECT_OPEN:
371 state->handler = condSect0;
372 return XML_ROLE_NONE;
373 case XML_TOK_COND_SECT_CLOSE:
374 if (state->includeLevel == 0)
375 break;
376 state->includeLevel -= 1;
377 return XML_ROLE_NONE;
378 case XML_TOK_PROLOG_S:
379 return XML_ROLE_NONE;
380 case XML_TOK_CLOSE_BRACKET:
381 break;
382 case XML_TOK_NONE:
383 if (state->includeLevel)
384 break;
385 return XML_ROLE_NONE;
386 default:
387 return internalSubset(state, tok, ptr, end, enc);
388 }
389 return common(state, tok);
390}
391
392#endif /* XML_DTD */
393
394static
395int entity0(PROLOG_STATE *state,
396 int tok,
397 const char *ptr,
398 const char *end,
399 const ENCODING *enc)
400{
401 switch (tok) {
402 case XML_TOK_PROLOG_S:
403 return XML_ROLE_NONE;
404 case XML_TOK_PERCENT:
405 state->handler = entity1;
406 return XML_ROLE_NONE;
407 case XML_TOK_NAME:
408 state->handler = entity2;
409 return XML_ROLE_GENERAL_ENTITY_NAME;
410 }
411 return common(state, tok);
412}
413
414static
415int entity1(PROLOG_STATE *state,
416 int tok,
417 const char *ptr,
418 const char *end,
419 const ENCODING *enc)
420{
421 switch (tok) {
422 case XML_TOK_PROLOG_S:
423 return XML_ROLE_NONE;
424 case XML_TOK_NAME:
425 state->handler = entity7;
426 return XML_ROLE_PARAM_ENTITY_NAME;
427 }
428 return common(state, tok);
429}
430
431static
432int entity2(PROLOG_STATE *state,
433 int tok,
434 const char *ptr,
435 const char *end,
436 const ENCODING *enc)
437{
438 switch (tok) {
439 case XML_TOK_PROLOG_S:
440 return XML_ROLE_NONE;
441 case XML_TOK_NAME:
442 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
443 state->handler = entity4;
444 return XML_ROLE_NONE;
445 }
446 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
447 state->handler = entity3;
448 return XML_ROLE_NONE;
449 }
450 break;
451 case XML_TOK_LITERAL:
452 state->handler = declClose;
453 return XML_ROLE_ENTITY_VALUE;
454 }
455 return common(state, tok);
456}
457
458static
459int entity3(PROLOG_STATE *state,
460 int tok,
461 const char *ptr,
462 const char *end,
463 const ENCODING *enc)
464{
465 switch (tok) {
466 case XML_TOK_PROLOG_S:
467 return XML_ROLE_NONE;
468 case XML_TOK_LITERAL:
469 state->handler = entity4;
470 return XML_ROLE_ENTITY_PUBLIC_ID;
471 }
472 return common(state, tok);
473}
474
475
476static
477int entity4(PROLOG_STATE *state,
478 int tok,
479 const char *ptr,
480 const char *end,
481 const ENCODING *enc)
482{
483 switch (tok) {
484 case XML_TOK_PROLOG_S:
485 return XML_ROLE_NONE;
486 case XML_TOK_LITERAL:
487 state->handler = entity5;
488 return XML_ROLE_ENTITY_SYSTEM_ID;
489 }
490 return common(state, tok);
491}
492
493static
494int entity5(PROLOG_STATE *state,
495 int tok,
496 const char *ptr,
497 const char *end,
498 const ENCODING *enc)
499{
500 switch (tok) {
501 case XML_TOK_PROLOG_S:
502 return XML_ROLE_NONE;
503 case XML_TOK_DECL_CLOSE:
504 setTopLevel(state);
505 return XML_ROLE_ENTITY_COMPLETE;
506 case XML_TOK_NAME:
507 if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
508 state->handler = entity6;
509 return XML_ROLE_NONE;
510 }
511 break;
512 }
513 return common(state, tok);
514}
515
516static
517int entity6(PROLOG_STATE *state,
518 int tok,
519 const char *ptr,
520 const char *end,
521 const ENCODING *enc)
522{
523 switch (tok) {
524 case XML_TOK_PROLOG_S:
525 return XML_ROLE_NONE;
526 case XML_TOK_NAME:
527 state->handler = declClose;
528 return XML_ROLE_ENTITY_NOTATION_NAME;
529 }
530 return common(state, tok);
531}
532
533static
534int entity7(PROLOG_STATE *state,
535 int tok,
536 const char *ptr,
537 const char *end,
538 const ENCODING *enc)
539{
540 switch (tok) {
541 case XML_TOK_PROLOG_S:
542 return XML_ROLE_NONE;
543 case XML_TOK_NAME:
544 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
545 state->handler = entity9;
546 return XML_ROLE_NONE;
547 }
548 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
549 state->handler = entity8;
550 return XML_ROLE_NONE;
551 }
552 break;
553 case XML_TOK_LITERAL:
554 state->handler = declClose;
555 return XML_ROLE_ENTITY_VALUE;
556 }
557 return common(state, tok);
558}
559
560static
561int entity8(PROLOG_STATE *state,
562 int tok,
563 const char *ptr,
564 const char *end,
565 const ENCODING *enc)
566{
567 switch (tok) {
568 case XML_TOK_PROLOG_S:
569 return XML_ROLE_NONE;
570 case XML_TOK_LITERAL:
571 state->handler = entity9;
572 return XML_ROLE_ENTITY_PUBLIC_ID;
573 }
574 return common(state, tok);
575}
576
577static
578int entity9(PROLOG_STATE *state,
579 int tok,
580 const char *ptr,
581 const char *end,
582 const ENCODING *enc)
583{
584 switch (tok) {
585 case XML_TOK_PROLOG_S:
586 return XML_ROLE_NONE;
587 case XML_TOK_LITERAL:
588 state->handler = declClose;
589 return XML_ROLE_ENTITY_SYSTEM_ID;
590 }
591 return common(state, tok);
592}
593
594static
595int notation0(PROLOG_STATE *state,
596 int tok,
597 const char *ptr,
598 const char *end,
599 const ENCODING *enc)
600{
601 switch (tok) {
602 case XML_TOK_PROLOG_S:
603 return XML_ROLE_NONE;
604 case XML_TOK_NAME:
605 state->handler = notation1;
606 return XML_ROLE_NOTATION_NAME;
607 }
608 return common(state, tok);
609}
610
611static
612int notation1(PROLOG_STATE *state,
613 int tok,
614 const char *ptr,
615 const char *end,
616 const ENCODING *enc)
617{
618 switch (tok) {
619 case XML_TOK_PROLOG_S:
620 return XML_ROLE_NONE;
621 case XML_TOK_NAME:
622 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
623 state->handler = notation3;
624 return XML_ROLE_NONE;
625 }
626 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
627 state->handler = notation2;
628 return XML_ROLE_NONE;
629 }
630 break;
631 }
632 return common(state, tok);
633}
634
635static
636int notation2(PROLOG_STATE *state,
637 int tok,
638 const char *ptr,
639 const char *end,
640 const ENCODING *enc)
641{
642 switch (tok) {
643 case XML_TOK_PROLOG_S:
644 return XML_ROLE_NONE;
645 case XML_TOK_LITERAL:
646 state->handler = notation4;
647 return XML_ROLE_NOTATION_PUBLIC_ID;
648 }
649 return common(state, tok);
650}
651
652static
653int notation3(PROLOG_STATE *state,
654 int tok,
655 const char *ptr,
656 const char *end,
657 const ENCODING *enc)
658{
659 switch (tok) {
660 case XML_TOK_PROLOG_S:
661 return XML_ROLE_NONE;
662 case XML_TOK_LITERAL:
663 state->handler = declClose;
664 return XML_ROLE_NOTATION_SYSTEM_ID;
665 }
666 return common(state, tok);
667}
668
669static
670int notation4(PROLOG_STATE *state,
671 int tok,
672 const char *ptr,
673 const char *end,
674 const ENCODING *enc)
675{
676 switch (tok) {
677 case XML_TOK_PROLOG_S:
678 return XML_ROLE_NONE;
679 case XML_TOK_LITERAL:
680 state->handler = declClose;
681 return XML_ROLE_NOTATION_SYSTEM_ID;
682 case XML_TOK_DECL_CLOSE:
683 setTopLevel(state);
684 return XML_ROLE_NOTATION_NO_SYSTEM_ID;
685 }
686 return common(state, tok);
687}
688
689static
690int attlist0(PROLOG_STATE *state,
691 int tok,
692 const char *ptr,
693 const char *end,
694 const ENCODING *enc)
695{
696 switch (tok) {
697 case XML_TOK_PROLOG_S:
698 return XML_ROLE_NONE;
699 case XML_TOK_NAME:
700 case XML_TOK_PREFIXED_NAME:
701 state->handler = attlist1;
702 return XML_ROLE_ATTLIST_ELEMENT_NAME;
703 }
704 return common(state, tok);
705}
706
707static
708int attlist1(PROLOG_STATE *state,
709 int tok,
710 const char *ptr,
711 const char *end,
712 const ENCODING *enc)
713{
714 switch (tok) {
715 case XML_TOK_PROLOG_S:
716 return XML_ROLE_NONE;
717 case XML_TOK_DECL_CLOSE:
718 setTopLevel(state);
719 return XML_ROLE_NONE;
720 case XML_TOK_NAME:
721 case XML_TOK_PREFIXED_NAME:
722 state->handler = attlist2;
723 return XML_ROLE_ATTRIBUTE_NAME;
724 }
725 return common(state, tok);
726}
727
728static
729int attlist2(PROLOG_STATE *state,
730 int tok,
731 const char *ptr,
732 const char *end,
733 const ENCODING *enc)
734{
735 switch (tok) {
736 case XML_TOK_PROLOG_S:
737 return XML_ROLE_NONE;
738 case XML_TOK_NAME:
739 {
740 static const char *types[] = {
741 KW_CDATA,
742 KW_ID,
743 KW_IDREF,
744 KW_IDREFS,
745 KW_ENTITY,
746 KW_ENTITIES,
747 KW_NMTOKEN,
748 KW_NMTOKENS,
749 };
750 int i;
751 for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
752 if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
753 state->handler = attlist8;
754 return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
755 }
756 }
757 if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
758 state->handler = attlist5;
759 return XML_ROLE_NONE;
760 }
761 break;
762 case XML_TOK_OPEN_PAREN:
763 state->handler = attlist3;
764 return XML_ROLE_NONE;
765 }
766 return common(state, tok);
767}
768
769static
770int attlist3(PROLOG_STATE *state,
771 int tok,
772 const char *ptr,
773 const char *end,
774 const ENCODING *enc)
775{
776 switch (tok) {
777 case XML_TOK_PROLOG_S:
778 return XML_ROLE_NONE;
779 case XML_TOK_NMTOKEN:
780 case XML_TOK_NAME:
781 case XML_TOK_PREFIXED_NAME:
782 state->handler = attlist4;
783 return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
784 }
785 return common(state, tok);
786}
787
788static
789int attlist4(PROLOG_STATE *state,
790 int tok,
791 const char *ptr,
792 const char *end,
793 const ENCODING *enc)
794{
795 switch (tok) {
796 case XML_TOK_PROLOG_S:
797 return XML_ROLE_NONE;
798 case XML_TOK_CLOSE_PAREN:
799 state->handler = attlist8;
800 return XML_ROLE_NONE;
801 case XML_TOK_OR:
802 state->handler = attlist3;
803 return XML_ROLE_NONE;
804 }
805 return common(state, tok);
806}
807
808static
809int attlist5(PROLOG_STATE *state,
810 int tok,
811 const char *ptr,
812 const char *end,
813 const ENCODING *enc)
814{
815 switch (tok) {
816 case XML_TOK_PROLOG_S:
817 return XML_ROLE_NONE;
818 case XML_TOK_OPEN_PAREN:
819 state->handler = attlist6;
820 return XML_ROLE_NONE;
821 }
822 return common(state, tok);
823}
824
825
826static
827int attlist6(PROLOG_STATE *state,
828 int tok,
829 const char *ptr,
830 const char *end,
831 const ENCODING *enc)
832{
833 switch (tok) {
834 case XML_TOK_PROLOG_S:
835 return XML_ROLE_NONE;
836 case XML_TOK_NAME:
837 state->handler = attlist7;
838 return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
839 }
840 return common(state, tok);
841}
842
843static
844int attlist7(PROLOG_STATE *state,
845 int tok,
846 const char *ptr,
847 const char *end,
848 const ENCODING *enc)
849{
850 switch (tok) {
851 case XML_TOK_PROLOG_S:
852 return XML_ROLE_NONE;
853 case XML_TOK_CLOSE_PAREN:
854 state->handler = attlist8;
855 return XML_ROLE_NONE;
856 case XML_TOK_OR:
857 state->handler = attlist6;
858 return XML_ROLE_NONE;
859 }
860 return common(state, tok);
861}
862
863/* default value */
864static
865int attlist8(PROLOG_STATE *state,
866 int tok,
867 const char *ptr,
868 const char *end,
869 const ENCODING *enc)
870{
871 switch (tok) {
872 case XML_TOK_PROLOG_S:
873 return XML_ROLE_NONE;
874 case XML_TOK_POUND_NAME:
875 if (XmlNameMatchesAscii(enc,
876 ptr + MIN_BYTES_PER_CHAR(enc),
877 end,
878 KW_IMPLIED)) {
879 state->handler = attlist1;
880 return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
881 }
882 if (XmlNameMatchesAscii(enc,
883 ptr + MIN_BYTES_PER_CHAR(enc),
884 end,
885 KW_REQUIRED)) {
886 state->handler = attlist1;
887 return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
888 }
889 if (XmlNameMatchesAscii(enc,
890 ptr + MIN_BYTES_PER_CHAR(enc),
891 end,
892 KW_FIXED)) {
893 state->handler = attlist9;
894 return XML_ROLE_NONE;
895 }
896 break;
897 case XML_TOK_LITERAL:
898 state->handler = attlist1;
899 return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
900 }
901 return common(state, tok);
902}
903
904static
905int attlist9(PROLOG_STATE *state,
906 int tok,
907 const char *ptr,
908 const char *end,
909 const ENCODING *enc)
910{
911 switch (tok) {
912 case XML_TOK_PROLOG_S:
913 return XML_ROLE_NONE;
914 case XML_TOK_LITERAL:
915 state->handler = attlist1;
916 return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
917 }
918 return common(state, tok);
919}
920
921static
922int element0(PROLOG_STATE *state,
923 int tok,
924 const char *ptr,
925 const char *end,
926 const ENCODING *enc)
927{
928 switch (tok) {
929 case XML_TOK_PROLOG_S:
930 return XML_ROLE_NONE;
931 case XML_TOK_NAME:
932 case XML_TOK_PREFIXED_NAME:
933 state->handler = element1;
934 return XML_ROLE_ELEMENT_NAME;
935 }
936 return common(state, tok);
937}
938
939static
940int element1(PROLOG_STATE *state,
941 int tok,
942 const char *ptr,
943 const char *end,
944 const ENCODING *enc)
945{
946 switch (tok) {
947 case XML_TOK_PROLOG_S:
948 return XML_ROLE_NONE;
949 case XML_TOK_NAME:
950 if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
951 state->handler = declClose;
952 return XML_ROLE_CONTENT_EMPTY;
953 }
954 if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
955 state->handler = declClose;
956 return XML_ROLE_CONTENT_ANY;
957 }
958 break;
959 case XML_TOK_OPEN_PAREN:
960 state->handler = element2;
961 state->level = 1;
962 return XML_ROLE_GROUP_OPEN;
963 }
964 return common(state, tok);
965}
966
967static
968int element2(PROLOG_STATE *state,
969 int tok,
970 const char *ptr,
971 const char *end,
972 const ENCODING *enc)
973{
974 switch (tok) {
975 case XML_TOK_PROLOG_S:
976 return XML_ROLE_NONE;
977 case XML_TOK_POUND_NAME:
978 if (XmlNameMatchesAscii(enc,
979 ptr + MIN_BYTES_PER_CHAR(enc),
980 end,
981 KW_PCDATA)) {
982 state->handler = element3;
983 return XML_ROLE_CONTENT_PCDATA;
984 }
985 break;
986 case XML_TOK_OPEN_PAREN:
987 state->level = 2;
988 state->handler = element6;
989 return XML_ROLE_GROUP_OPEN;
990 case XML_TOK_NAME:
991 case XML_TOK_PREFIXED_NAME:
992 state->handler = element7;
993 return XML_ROLE_CONTENT_ELEMENT;
994 case XML_TOK_NAME_QUESTION:
995 state->handler = element7;
996 return XML_ROLE_CONTENT_ELEMENT_OPT;
997 case XML_TOK_NAME_ASTERISK:
998 state->handler = element7;
999 return XML_ROLE_CONTENT_ELEMENT_REP;
1000 case XML_TOK_NAME_PLUS:
1001 state->handler = element7;
1002 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1003 }
1004 return common(state, tok);
1005}
1006
1007static
1008int element3(PROLOG_STATE *state,
1009 int tok,
1010 const char *ptr,
1011 const char *end,
1012 const ENCODING *enc)
1013{
1014 switch (tok) {
1015 case XML_TOK_PROLOG_S:
1016 return XML_ROLE_NONE;
1017 case XML_TOK_CLOSE_PAREN:
1018 state->handler = declClose;
1019 return XML_ROLE_GROUP_CLOSE;
1020 case XML_TOK_CLOSE_PAREN_ASTERISK:
1021 state->handler = declClose;
1022 return XML_ROLE_GROUP_CLOSE_REP;
1023 case XML_TOK_OR:
1024 state->handler = element4;
1025 return XML_ROLE_NONE;
1026 }
1027 return common(state, tok);
1028}
1029
1030static
1031int element4(PROLOG_STATE *state,
1032 int tok,
1033 const char *ptr,
1034 const char *end,
1035 const ENCODING *enc)
1036{
1037 switch (tok) {
1038 case XML_TOK_PROLOG_S:
1039 return XML_ROLE_NONE;
1040 case XML_TOK_NAME:
1041 case XML_TOK_PREFIXED_NAME:
1042 state->handler = element5;
1043 return XML_ROLE_CONTENT_ELEMENT;
1044 }
1045 return common(state, tok);
1046}
1047
1048static
1049int element5(PROLOG_STATE *state,
1050 int tok,
1051 const char *ptr,
1052 const char *end,
1053 const ENCODING *enc)
1054{
1055 switch (tok) {
1056 case XML_TOK_PROLOG_S:
1057 return XML_ROLE_NONE;
1058 case XML_TOK_CLOSE_PAREN_ASTERISK:
1059 state->handler = declClose;
1060 return XML_ROLE_GROUP_CLOSE_REP;
1061 case XML_TOK_OR:
1062 state->handler = element4;
1063 return XML_ROLE_NONE;
1064 }
1065 return common(state, tok);
1066}
1067
1068static
1069int element6(PROLOG_STATE *state,
1070 int tok,
1071 const char *ptr,
1072 const char *end,
1073 const ENCODING *enc)
1074{
1075 switch (tok) {
1076 case XML_TOK_PROLOG_S:
1077 return XML_ROLE_NONE;
1078 case XML_TOK_OPEN_PAREN:
1079 state->level += 1;
1080 return XML_ROLE_GROUP_OPEN;
1081 case XML_TOK_NAME:
1082 case XML_TOK_PREFIXED_NAME:
1083 state->handler = element7;
1084 return XML_ROLE_CONTENT_ELEMENT;
1085 case XML_TOK_NAME_QUESTION:
1086 state->handler = element7;
1087 return XML_ROLE_CONTENT_ELEMENT_OPT;
1088 case XML_TOK_NAME_ASTERISK:
1089 state->handler = element7;
1090 return XML_ROLE_CONTENT_ELEMENT_REP;
1091 case XML_TOK_NAME_PLUS:
1092 state->handler = element7;
1093 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1094 }
1095 return common(state, tok);
1096}
1097
1098static
1099int element7(PROLOG_STATE *state,
1100 int tok,
1101 const char *ptr,
1102 const char *end,
1103 const ENCODING *enc)
1104{
1105 switch (tok) {
1106 case XML_TOK_PROLOG_S:
1107 return XML_ROLE_NONE;
1108 case XML_TOK_CLOSE_PAREN:
1109 state->level -= 1;
1110 if (state->level == 0)
1111 state->handler = declClose;
1112 return XML_ROLE_GROUP_CLOSE;
1113 case XML_TOK_CLOSE_PAREN_ASTERISK:
1114 state->level -= 1;
1115 if (state->level == 0)
1116 state->handler = declClose;
1117 return XML_ROLE_GROUP_CLOSE_REP;
1118 case XML_TOK_CLOSE_PAREN_QUESTION:
1119 state->level -= 1;
1120 if (state->level == 0)
1121 state->handler = declClose;
1122 return XML_ROLE_GROUP_CLOSE_OPT;
1123 case XML_TOK_CLOSE_PAREN_PLUS:
1124 state->level -= 1;
1125 if (state->level == 0)
1126 state->handler = declClose;
1127 return XML_ROLE_GROUP_CLOSE_PLUS;
1128 case XML_TOK_COMMA:
1129 state->handler = element6;
1130 return XML_ROLE_GROUP_SEQUENCE;
1131 case XML_TOK_OR:
1132 state->handler = element6;
1133 return XML_ROLE_GROUP_CHOICE;
1134 }
1135 return common(state, tok);
1136}
1137
1138#ifdef XML_DTD
1139
1140static
1141int condSect0(PROLOG_STATE *state,
1142 int tok,
1143 const char *ptr,
1144 const char *end,
1145 const ENCODING *enc)
1146{
1147 switch (tok) {
1148 case XML_TOK_PROLOG_S:
1149 return XML_ROLE_NONE;
1150 case XML_TOK_NAME:
1151 if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1152 state->handler = condSect1;
1153 return XML_ROLE_NONE;
1154 }
1155 if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1156 state->handler = condSect2;
1157 return XML_ROLE_NONE;
1158 }
1159 break;
1160 }
1161 return common(state, tok);
1162}
1163
1164static
1165int condSect1(PROLOG_STATE *state,
1166 int tok,
1167 const char *ptr,
1168 const char *end,
1169 const ENCODING *enc)
1170{
1171 switch (tok) {
1172 case XML_TOK_PROLOG_S:
1173 return XML_ROLE_NONE;
1174 case XML_TOK_OPEN_BRACKET:
1175 state->handler = externalSubset1;
1176 state->includeLevel += 1;
1177 return XML_ROLE_NONE;
1178 }
1179 return common(state, tok);
1180}
1181
1182static
1183int condSect2(PROLOG_STATE *state,
1184 int tok,
1185 const char *ptr,
1186 const char *end,
1187 const ENCODING *enc)
1188{
1189 switch (tok) {
1190 case XML_TOK_PROLOG_S:
1191 return XML_ROLE_NONE;
1192 case XML_TOK_OPEN_BRACKET:
1193 state->handler = externalSubset1;
1194 return XML_ROLE_IGNORE_SECT;
1195 }
1196 return common(state, tok);
1197}
1198
1199#endif /* XML_DTD */
1200
1201static
1202int declClose(PROLOG_STATE *state,
1203 int tok,
1204 const char *ptr,
1205 const char *end,
1206 const ENCODING *enc)
1207{
1208 switch (tok) {
1209 case XML_TOK_PROLOG_S:
1210 return XML_ROLE_NONE;
1211 case XML_TOK_DECL_CLOSE:
1212 setTopLevel(state);
1213 return XML_ROLE_NONE;
1214 }
1215 return common(state, tok);
1216}
1217
1218#if 0
1219
1220static
1221int ignore(PROLOG_STATE *state,
1222 int tok,
1223 const char *ptr,
1224 const char *end,
1225 const ENCODING *enc)
1226{
1227 switch (tok) {
1228 case XML_TOK_DECL_CLOSE:
1229 state->handler = internalSubset;
1230 return 0;
1231 default:
1232 return XML_ROLE_NONE;
1233 }
1234 return common(state, tok);
1235}
1236#endif
1237
1238static
1239int error(PROLOG_STATE *state,
1240 int tok,
1241 const char *ptr,
1242 const char *end,
1243 const ENCODING *enc)
1244{
1245 return XML_ROLE_NONE;
1246}
1247
1248static
1249int common(PROLOG_STATE *state, int tok)
1250{
1251#ifdef XML_DTD
1252 if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1253 return XML_ROLE_INNER_PARAM_ENTITY_REF;
1254#endif
1255 state->handler = error;
1256 return XML_ROLE_ERROR;
1257}
1258
1259void XmlPrologStateInit(PROLOG_STATE *state)
1260{
1261 state->handler = prolog0;
1262#ifdef XML_DTD
1263 state->documentEntity = 1;
1264 state->includeLevel = 0;
1265#endif /* XML_DTD */
1266}
1267
1268#ifdef XML_DTD
1269
1270void XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1271{
1272 state->handler = externalSubset0;
1273 state->documentEntity = 0;
1274 state->includeLevel = 0;
1275}
1276
1277#endif /* XML_DTD */