blob: 20a14441550e193acb9f188b2fd43e25167655c9 [file] [log] [blame]
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001/*
2Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
3See the file COPYING for copying permission.
4*/
5
6static char RCSId[]
7 = "$Header$";
8
9#ifdef COMPILED_FROM_DSP
10# include "winconfig.h"
11#else
12# include <config.h>
13#endif /* ndef COMPILED_FROM_DSP */
14
15#include "xmlrole.h"
16#include "ascii.h"
17
18/* Doesn't check:
19
20 that ,| are not mixed in a model group
21 content of literals
22
23*/
24
25static const char KW_ANY[] = { ASCII_A, ASCII_N, ASCII_Y, '\0' };
26static const char KW_ATTLIST[] = { ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
27static const char KW_CDATA[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
28static const char KW_DOCTYPE[] = { ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
29static const char KW_ELEMENT[] = { ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
30static const char KW_EMPTY[] = { ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
31static const char KW_ENTITIES[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
32static const char KW_ENTITY[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
33static const char KW_FIXED[] = { ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
34static const char KW_ID[] = { ASCII_I, ASCII_D, '\0' };
35static const char KW_IDREF[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
36static const char KW_IDREFS[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
37static const char KW_IGNORE[] = { ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
38static const char KW_IMPLIED[] = { ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
39static const char KW_INCLUDE[] = { ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
40static const char KW_NDATA[] = { ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
41static const char KW_NMTOKEN[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
42static const char KW_NMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
43static const char KW_NOTATION[] = { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, '\0' };
44static const char KW_PCDATA[] = { ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
45static const char KW_PUBLIC[] = { ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
46static const char KW_REQUIRED[] = { ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, '\0' };
47static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
48
49#ifndef MIN_BYTES_PER_CHAR
50#define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
51#endif
52
53#ifdef XML_DTD
54#define setTopLevel(state) \
55 ((state)->handler = ((state)->documentEntity \
56 ? internalSubset \
57 : externalSubset1))
58#else /* not XML_DTD */
59#define setTopLevel(state) ((state)->handler = internalSubset)
60#endif /* not XML_DTD */
61
62typedef int PROLOG_HANDLER(PROLOG_STATE *state,
63 int tok,
64 const char *ptr,
65 const char *end,
66 const ENCODING *enc);
67
68static PROLOG_HANDLER
69 prolog0, prolog1, prolog2,
70 doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
71 internalSubset,
72 entity0, entity1, entity2, entity3, entity4, entity5, entity6,
73 entity7, entity8, entity9,
74 notation0, notation1, notation2, notation3, notation4,
75 attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
76 attlist7, attlist8, attlist9,
77 element0, element1, element2, element3, element4, element5, element6,
78 element7,
79#ifdef XML_DTD
80 externalSubset0, externalSubset1,
81 condSect0, condSect1, condSect2,
82#endif /* XML_DTD */
83 declClose,
84 error;
85
86static
87int common(PROLOG_STATE *state, int tok);
88
89static
90int prolog0(PROLOG_STATE *state,
91 int tok,
92 const char *ptr,
93 const char *end,
94 const ENCODING *enc)
95{
96 switch (tok) {
97 case XML_TOK_PROLOG_S:
98 state->handler = prolog1;
99 return XML_ROLE_NONE;
100 case XML_TOK_XML_DECL:
101 state->handler = prolog1;
102 return XML_ROLE_XML_DECL;
103 case XML_TOK_PI:
104 state->handler = prolog1;
105 return XML_ROLE_NONE;
106 case XML_TOK_COMMENT:
107 state->handler = prolog1;
108 case XML_TOK_BOM:
109 return XML_ROLE_NONE;
110 case XML_TOK_DECL_OPEN:
111 if (!XmlNameMatchesAscii(enc,
112 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
113 end,
114 KW_DOCTYPE))
115 break;
116 state->handler = doctype0;
117 return XML_ROLE_NONE;
118 case XML_TOK_INSTANCE_START:
119 state->handler = error;
120 return XML_ROLE_INSTANCE_START;
121 }
122 return common(state, tok);
123}
124
125static
126int prolog1(PROLOG_STATE *state,
127 int tok,
128 const char *ptr,
129 const char *end,
130 const ENCODING *enc)
131{
132 switch (tok) {
133 case XML_TOK_PROLOG_S:
134 return XML_ROLE_NONE;
135 case XML_TOK_PI:
136 case XML_TOK_COMMENT:
137 case XML_TOK_BOM:
138 return XML_ROLE_NONE;
139 case XML_TOK_DECL_OPEN:
140 if (!XmlNameMatchesAscii(enc,
141 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
142 end,
143 KW_DOCTYPE))
144 break;
145 state->handler = doctype0;
146 return XML_ROLE_NONE;
147 case XML_TOK_INSTANCE_START:
148 state->handler = error;
149 return XML_ROLE_INSTANCE_START;
150 }
151 return common(state, tok);
152}
153
154static
155int prolog2(PROLOG_STATE *state,
156 int tok,
157 const char *ptr,
158 const char *end,
159 const ENCODING *enc)
160{
161 switch (tok) {
162 case XML_TOK_PROLOG_S:
163 return XML_ROLE_NONE;
164 case XML_TOK_PI:
165 case XML_TOK_COMMENT:
166 return XML_ROLE_NONE;
167 case XML_TOK_INSTANCE_START:
168 state->handler = error;
169 return XML_ROLE_INSTANCE_START;
170 }
171 return common(state, tok);
172}
173
174static
175int doctype0(PROLOG_STATE *state,
176 int tok,
177 const char *ptr,
178 const char *end,
179 const ENCODING *enc)
180{
181 switch (tok) {
182 case XML_TOK_PROLOG_S:
183 return XML_ROLE_NONE;
184 case XML_TOK_NAME:
185 case XML_TOK_PREFIXED_NAME:
186 state->handler = doctype1;
187 return XML_ROLE_DOCTYPE_NAME;
188 }
189 return common(state, tok);
190}
191
192static
193int doctype1(PROLOG_STATE *state,
194 int tok,
195 const char *ptr,
196 const char *end,
197 const ENCODING *enc)
198{
199 switch (tok) {
200 case XML_TOK_PROLOG_S:
201 return XML_ROLE_NONE;
202 case XML_TOK_OPEN_BRACKET:
203 state->handler = internalSubset;
204 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
205 case XML_TOK_DECL_CLOSE:
206 state->handler = prolog2;
207 return XML_ROLE_DOCTYPE_CLOSE;
208 case XML_TOK_NAME:
209 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
210 state->handler = doctype3;
211 return XML_ROLE_NONE;
212 }
213 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
214 state->handler = doctype2;
215 return XML_ROLE_NONE;
216 }
217 break;
218 }
219 return common(state, tok);
220}
221
222static
223int doctype2(PROLOG_STATE *state,
224 int tok,
225 const char *ptr,
226 const char *end,
227 const ENCODING *enc)
228{
229 switch (tok) {
230 case XML_TOK_PROLOG_S:
231 return XML_ROLE_NONE;
232 case XML_TOK_LITERAL:
233 state->handler = doctype3;
234 return XML_ROLE_DOCTYPE_PUBLIC_ID;
235 }
236 return common(state, tok);
237}
238
239static
240int doctype3(PROLOG_STATE *state,
241 int tok,
242 const char *ptr,
243 const char *end,
244 const ENCODING *enc)
245{
246 switch (tok) {
247 case XML_TOK_PROLOG_S:
248 return XML_ROLE_NONE;
249 case XML_TOK_LITERAL:
250 state->handler = doctype4;
251 return XML_ROLE_DOCTYPE_SYSTEM_ID;
252 }
253 return common(state, tok);
254}
255
256static
257int doctype4(PROLOG_STATE *state,
258 int tok,
259 const char *ptr,
260 const char *end,
261 const ENCODING *enc)
262{
263 switch (tok) {
264 case XML_TOK_PROLOG_S:
265 return XML_ROLE_NONE;
266 case XML_TOK_OPEN_BRACKET:
267 state->handler = internalSubset;
268 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
269 case XML_TOK_DECL_CLOSE:
270 state->handler = prolog2;
271 return XML_ROLE_DOCTYPE_CLOSE;
272 }
273 return common(state, tok);
274}
275
276static
277int doctype5(PROLOG_STATE *state,
278 int tok,
279 const char *ptr,
280 const char *end,
281 const ENCODING *enc)
282{
283 switch (tok) {
284 case XML_TOK_PROLOG_S:
285 return XML_ROLE_NONE;
286 case XML_TOK_DECL_CLOSE:
287 state->handler = prolog2;
288 return XML_ROLE_DOCTYPE_CLOSE;
289 }
290 return common(state, tok);
291}
292
293static
294int internalSubset(PROLOG_STATE *state,
295 int tok,
296 const char *ptr,
297 const char *end,
298 const ENCODING *enc)
299{
300 switch (tok) {
301 case XML_TOK_PROLOG_S:
302 return XML_ROLE_NONE;
303 case XML_TOK_DECL_OPEN:
304 if (XmlNameMatchesAscii(enc,
305 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
306 end,
307 KW_ENTITY)) {
308 state->handler = entity0;
309 return XML_ROLE_NONE;
310 }
311 if (XmlNameMatchesAscii(enc,
312 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
313 end,
314 KW_ATTLIST)) {
315 state->handler = attlist0;
316 return XML_ROLE_NONE;
317 }
318 if (XmlNameMatchesAscii(enc,
319 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
320 end,
321 KW_ELEMENT)) {
322 state->handler = element0;
323 return XML_ROLE_NONE;
324 }
325 if (XmlNameMatchesAscii(enc,
326 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
327 end,
328 KW_NOTATION)) {
329 state->handler = notation0;
330 return XML_ROLE_NONE;
331 }
332 break;
333 case XML_TOK_PI:
334 case XML_TOK_COMMENT:
335 return XML_ROLE_NONE;
336 case XML_TOK_PARAM_ENTITY_REF:
337 return XML_ROLE_PARAM_ENTITY_REF;
338 case XML_TOK_CLOSE_BRACKET:
339 state->handler = doctype5;
340 return XML_ROLE_NONE;
341 }
342 return common(state, tok);
343}
344
345#ifdef XML_DTD
346
347static
348int externalSubset0(PROLOG_STATE *state,
349 int tok,
350 const char *ptr,
351 const char *end,
352 const ENCODING *enc)
353{
354 state->handler = externalSubset1;
355 if (tok == XML_TOK_XML_DECL)
356 return XML_ROLE_TEXT_DECL;
357 return externalSubset1(state, tok, ptr, end, enc);
358}
359
360static
361int externalSubset1(PROLOG_STATE *state,
362 int tok,
363 const char *ptr,
364 const char *end,
365 const ENCODING *enc)
366{
367 switch (tok) {
368 case XML_TOK_COND_SECT_OPEN:
369 state->handler = condSect0;
370 return XML_ROLE_NONE;
371 case XML_TOK_COND_SECT_CLOSE:
372 if (state->includeLevel == 0)
373 break;
374 state->includeLevel -= 1;
375 return XML_ROLE_NONE;
376 case XML_TOK_PROLOG_S:
377 return XML_ROLE_NONE;
378 case XML_TOK_CLOSE_BRACKET:
379 break;
380 case XML_TOK_NONE:
381 if (state->includeLevel)
382 break;
383 return XML_ROLE_NONE;
384 default:
385 return internalSubset(state, tok, ptr, end, enc);
386 }
387 return common(state, tok);
388}
389
390#endif /* XML_DTD */
391
392static
393int entity0(PROLOG_STATE *state,
394 int tok,
395 const char *ptr,
396 const char *end,
397 const ENCODING *enc)
398{
399 switch (tok) {
400 case XML_TOK_PROLOG_S:
401 return XML_ROLE_NONE;
402 case XML_TOK_PERCENT:
403 state->handler = entity1;
404 return XML_ROLE_NONE;
405 case XML_TOK_NAME:
406 state->handler = entity2;
407 return XML_ROLE_GENERAL_ENTITY_NAME;
408 }
409 return common(state, tok);
410}
411
412static
413int entity1(PROLOG_STATE *state,
414 int tok,
415 const char *ptr,
416 const char *end,
417 const ENCODING *enc)
418{
419 switch (tok) {
420 case XML_TOK_PROLOG_S:
421 return XML_ROLE_NONE;
422 case XML_TOK_NAME:
423 state->handler = entity7;
424 return XML_ROLE_PARAM_ENTITY_NAME;
425 }
426 return common(state, tok);
427}
428
429static
430int entity2(PROLOG_STATE *state,
431 int tok,
432 const char *ptr,
433 const char *end,
434 const ENCODING *enc)
435{
436 switch (tok) {
437 case XML_TOK_PROLOG_S:
438 return XML_ROLE_NONE;
439 case XML_TOK_NAME:
440 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
441 state->handler = entity4;
442 return XML_ROLE_NONE;
443 }
444 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
445 state->handler = entity3;
446 return XML_ROLE_NONE;
447 }
448 break;
449 case XML_TOK_LITERAL:
450 state->handler = declClose;
451 return XML_ROLE_ENTITY_VALUE;
452 }
453 return common(state, tok);
454}
455
456static
457int entity3(PROLOG_STATE *state,
458 int tok,
459 const char *ptr,
460 const char *end,
461 const ENCODING *enc)
462{
463 switch (tok) {
464 case XML_TOK_PROLOG_S:
465 return XML_ROLE_NONE;
466 case XML_TOK_LITERAL:
467 state->handler = entity4;
468 return XML_ROLE_ENTITY_PUBLIC_ID;
469 }
470 return common(state, tok);
471}
472
473
474static
475int entity4(PROLOG_STATE *state,
476 int tok,
477 const char *ptr,
478 const char *end,
479 const ENCODING *enc)
480{
481 switch (tok) {
482 case XML_TOK_PROLOG_S:
483 return XML_ROLE_NONE;
484 case XML_TOK_LITERAL:
485 state->handler = entity5;
486 return XML_ROLE_ENTITY_SYSTEM_ID;
487 }
488 return common(state, tok);
489}
490
491static
492int entity5(PROLOG_STATE *state,
493 int tok,
494 const char *ptr,
495 const char *end,
496 const ENCODING *enc)
497{
498 switch (tok) {
499 case XML_TOK_PROLOG_S:
500 return XML_ROLE_NONE;
501 case XML_TOK_DECL_CLOSE:
502 setTopLevel(state);
503 return XML_ROLE_ENTITY_COMPLETE;
504 case XML_TOK_NAME:
505 if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
506 state->handler = entity6;
507 return XML_ROLE_NONE;
508 }
509 break;
510 }
511 return common(state, tok);
512}
513
514static
515int entity6(PROLOG_STATE *state,
516 int tok,
517 const char *ptr,
518 const char *end,
519 const ENCODING *enc)
520{
521 switch (tok) {
522 case XML_TOK_PROLOG_S:
523 return XML_ROLE_NONE;
524 case XML_TOK_NAME:
525 state->handler = declClose;
526 return XML_ROLE_ENTITY_NOTATION_NAME;
527 }
528 return common(state, tok);
529}
530
531static
532int entity7(PROLOG_STATE *state,
533 int tok,
534 const char *ptr,
535 const char *end,
536 const ENCODING *enc)
537{
538 switch (tok) {
539 case XML_TOK_PROLOG_S:
540 return XML_ROLE_NONE;
541 case XML_TOK_NAME:
542 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
543 state->handler = entity9;
544 return XML_ROLE_NONE;
545 }
546 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
547 state->handler = entity8;
548 return XML_ROLE_NONE;
549 }
550 break;
551 case XML_TOK_LITERAL:
552 state->handler = declClose;
553 return XML_ROLE_ENTITY_VALUE;
554 }
555 return common(state, tok);
556}
557
558static
559int entity8(PROLOG_STATE *state,
560 int tok,
561 const char *ptr,
562 const char *end,
563 const ENCODING *enc)
564{
565 switch (tok) {
566 case XML_TOK_PROLOG_S:
567 return XML_ROLE_NONE;
568 case XML_TOK_LITERAL:
569 state->handler = entity9;
570 return XML_ROLE_ENTITY_PUBLIC_ID;
571 }
572 return common(state, tok);
573}
574
575static
576int entity9(PROLOG_STATE *state,
577 int tok,
578 const char *ptr,
579 const char *end,
580 const ENCODING *enc)
581{
582 switch (tok) {
583 case XML_TOK_PROLOG_S:
584 return XML_ROLE_NONE;
585 case XML_TOK_LITERAL:
586 state->handler = declClose;
587 return XML_ROLE_ENTITY_SYSTEM_ID;
588 }
589 return common(state, tok);
590}
591
592static
593int notation0(PROLOG_STATE *state,
594 int tok,
595 const char *ptr,
596 const char *end,
597 const ENCODING *enc)
598{
599 switch (tok) {
600 case XML_TOK_PROLOG_S:
601 return XML_ROLE_NONE;
602 case XML_TOK_NAME:
603 state->handler = notation1;
604 return XML_ROLE_NOTATION_NAME;
605 }
606 return common(state, tok);
607}
608
609static
610int notation1(PROLOG_STATE *state,
611 int tok,
612 const char *ptr,
613 const char *end,
614 const ENCODING *enc)
615{
616 switch (tok) {
617 case XML_TOK_PROLOG_S:
618 return XML_ROLE_NONE;
619 case XML_TOK_NAME:
620 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
621 state->handler = notation3;
622 return XML_ROLE_NONE;
623 }
624 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
625 state->handler = notation2;
626 return XML_ROLE_NONE;
627 }
628 break;
629 }
630 return common(state, tok);
631}
632
633static
634int notation2(PROLOG_STATE *state,
635 int tok,
636 const char *ptr,
637 const char *end,
638 const ENCODING *enc)
639{
640 switch (tok) {
641 case XML_TOK_PROLOG_S:
642 return XML_ROLE_NONE;
643 case XML_TOK_LITERAL:
644 state->handler = notation4;
645 return XML_ROLE_NOTATION_PUBLIC_ID;
646 }
647 return common(state, tok);
648}
649
650static
651int notation3(PROLOG_STATE *state,
652 int tok,
653 const char *ptr,
654 const char *end,
655 const ENCODING *enc)
656{
657 switch (tok) {
658 case XML_TOK_PROLOG_S:
659 return XML_ROLE_NONE;
660 case XML_TOK_LITERAL:
661 state->handler = declClose;
662 return XML_ROLE_NOTATION_SYSTEM_ID;
663 }
664 return common(state, tok);
665}
666
667static
668int notation4(PROLOG_STATE *state,
669 int tok,
670 const char *ptr,
671 const char *end,
672 const ENCODING *enc)
673{
674 switch (tok) {
675 case XML_TOK_PROLOG_S:
676 return XML_ROLE_NONE;
677 case XML_TOK_LITERAL:
678 state->handler = declClose;
679 return XML_ROLE_NOTATION_SYSTEM_ID;
680 case XML_TOK_DECL_CLOSE:
681 setTopLevel(state);
682 return XML_ROLE_NOTATION_NO_SYSTEM_ID;
683 }
684 return common(state, tok);
685}
686
687static
688int attlist0(PROLOG_STATE *state,
689 int tok,
690 const char *ptr,
691 const char *end,
692 const ENCODING *enc)
693{
694 switch (tok) {
695 case XML_TOK_PROLOG_S:
696 return XML_ROLE_NONE;
697 case XML_TOK_NAME:
698 case XML_TOK_PREFIXED_NAME:
699 state->handler = attlist1;
700 return XML_ROLE_ATTLIST_ELEMENT_NAME;
701 }
702 return common(state, tok);
703}
704
705static
706int attlist1(PROLOG_STATE *state,
707 int tok,
708 const char *ptr,
709 const char *end,
710 const ENCODING *enc)
711{
712 switch (tok) {
713 case XML_TOK_PROLOG_S:
714 return XML_ROLE_NONE;
715 case XML_TOK_DECL_CLOSE:
716 setTopLevel(state);
717 return XML_ROLE_NONE;
718 case XML_TOK_NAME:
719 case XML_TOK_PREFIXED_NAME:
720 state->handler = attlist2;
721 return XML_ROLE_ATTRIBUTE_NAME;
722 }
723 return common(state, tok);
724}
725
726static
727int attlist2(PROLOG_STATE *state,
728 int tok,
729 const char *ptr,
730 const char *end,
731 const ENCODING *enc)
732{
733 switch (tok) {
734 case XML_TOK_PROLOG_S:
735 return XML_ROLE_NONE;
736 case XML_TOK_NAME:
737 {
738 static const char *types[] = {
739 KW_CDATA,
740 KW_ID,
741 KW_IDREF,
742 KW_IDREFS,
743 KW_ENTITY,
744 KW_ENTITIES,
745 KW_NMTOKEN,
746 KW_NMTOKENS,
747 };
748 int i;
749 for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
750 if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
751 state->handler = attlist8;
752 return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
753 }
754 }
755 if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
756 state->handler = attlist5;
757 return XML_ROLE_NONE;
758 }
759 break;
760 case XML_TOK_OPEN_PAREN:
761 state->handler = attlist3;
762 return XML_ROLE_NONE;
763 }
764 return common(state, tok);
765}
766
767static
768int attlist3(PROLOG_STATE *state,
769 int tok,
770 const char *ptr,
771 const char *end,
772 const ENCODING *enc)
773{
774 switch (tok) {
775 case XML_TOK_PROLOG_S:
776 return XML_ROLE_NONE;
777 case XML_TOK_NMTOKEN:
778 case XML_TOK_NAME:
779 case XML_TOK_PREFIXED_NAME:
780 state->handler = attlist4;
781 return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
782 }
783 return common(state, tok);
784}
785
786static
787int attlist4(PROLOG_STATE *state,
788 int tok,
789 const char *ptr,
790 const char *end,
791 const ENCODING *enc)
792{
793 switch (tok) {
794 case XML_TOK_PROLOG_S:
795 return XML_ROLE_NONE;
796 case XML_TOK_CLOSE_PAREN:
797 state->handler = attlist8;
798 return XML_ROLE_NONE;
799 case XML_TOK_OR:
800 state->handler = attlist3;
801 return XML_ROLE_NONE;
802 }
803 return common(state, tok);
804}
805
806static
807int attlist5(PROLOG_STATE *state,
808 int tok,
809 const char *ptr,
810 const char *end,
811 const ENCODING *enc)
812{
813 switch (tok) {
814 case XML_TOK_PROLOG_S:
815 return XML_ROLE_NONE;
816 case XML_TOK_OPEN_PAREN:
817 state->handler = attlist6;
818 return XML_ROLE_NONE;
819 }
820 return common(state, tok);
821}
822
823
824static
825int attlist6(PROLOG_STATE *state,
826 int tok,
827 const char *ptr,
828 const char *end,
829 const ENCODING *enc)
830{
831 switch (tok) {
832 case XML_TOK_PROLOG_S:
833 return XML_ROLE_NONE;
834 case XML_TOK_NAME:
835 state->handler = attlist7;
836 return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
837 }
838 return common(state, tok);
839}
840
841static
842int attlist7(PROLOG_STATE *state,
843 int tok,
844 const char *ptr,
845 const char *end,
846 const ENCODING *enc)
847{
848 switch (tok) {
849 case XML_TOK_PROLOG_S:
850 return XML_ROLE_NONE;
851 case XML_TOK_CLOSE_PAREN:
852 state->handler = attlist8;
853 return XML_ROLE_NONE;
854 case XML_TOK_OR:
855 state->handler = attlist6;
856 return XML_ROLE_NONE;
857 }
858 return common(state, tok);
859}
860
861/* default value */
862static
863int attlist8(PROLOG_STATE *state,
864 int tok,
865 const char *ptr,
866 const char *end,
867 const ENCODING *enc)
868{
869 switch (tok) {
870 case XML_TOK_PROLOG_S:
871 return XML_ROLE_NONE;
872 case XML_TOK_POUND_NAME:
873 if (XmlNameMatchesAscii(enc,
874 ptr + MIN_BYTES_PER_CHAR(enc),
875 end,
876 KW_IMPLIED)) {
877 state->handler = attlist1;
878 return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
879 }
880 if (XmlNameMatchesAscii(enc,
881 ptr + MIN_BYTES_PER_CHAR(enc),
882 end,
883 KW_REQUIRED)) {
884 state->handler = attlist1;
885 return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
886 }
887 if (XmlNameMatchesAscii(enc,
888 ptr + MIN_BYTES_PER_CHAR(enc),
889 end,
890 KW_FIXED)) {
891 state->handler = attlist9;
892 return XML_ROLE_NONE;
893 }
894 break;
895 case XML_TOK_LITERAL:
896 state->handler = attlist1;
897 return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
898 }
899 return common(state, tok);
900}
901
902static
903int attlist9(PROLOG_STATE *state,
904 int tok,
905 const char *ptr,
906 const char *end,
907 const ENCODING *enc)
908{
909 switch (tok) {
910 case XML_TOK_PROLOG_S:
911 return XML_ROLE_NONE;
912 case XML_TOK_LITERAL:
913 state->handler = attlist1;
914 return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
915 }
916 return common(state, tok);
917}
918
919static
920int element0(PROLOG_STATE *state,
921 int tok,
922 const char *ptr,
923 const char *end,
924 const ENCODING *enc)
925{
926 switch (tok) {
927 case XML_TOK_PROLOG_S:
928 return XML_ROLE_NONE;
929 case XML_TOK_NAME:
930 case XML_TOK_PREFIXED_NAME:
931 state->handler = element1;
932 return XML_ROLE_ELEMENT_NAME;
933 }
934 return common(state, tok);
935}
936
937static
938int element1(PROLOG_STATE *state,
939 int tok,
940 const char *ptr,
941 const char *end,
942 const ENCODING *enc)
943{
944 switch (tok) {
945 case XML_TOK_PROLOG_S:
946 return XML_ROLE_NONE;
947 case XML_TOK_NAME:
948 if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
949 state->handler = declClose;
950 return XML_ROLE_CONTENT_EMPTY;
951 }
952 if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
953 state->handler = declClose;
954 return XML_ROLE_CONTENT_ANY;
955 }
956 break;
957 case XML_TOK_OPEN_PAREN:
958 state->handler = element2;
959 state->level = 1;
960 return XML_ROLE_GROUP_OPEN;
961 }
962 return common(state, tok);
963}
964
965static
966int element2(PROLOG_STATE *state,
967 int tok,
968 const char *ptr,
969 const char *end,
970 const ENCODING *enc)
971{
972 switch (tok) {
973 case XML_TOK_PROLOG_S:
974 return XML_ROLE_NONE;
975 case XML_TOK_POUND_NAME:
976 if (XmlNameMatchesAscii(enc,
977 ptr + MIN_BYTES_PER_CHAR(enc),
978 end,
979 KW_PCDATA)) {
980 state->handler = element3;
981 return XML_ROLE_CONTENT_PCDATA;
982 }
983 break;
984 case XML_TOK_OPEN_PAREN:
985 state->level = 2;
986 state->handler = element6;
987 return XML_ROLE_GROUP_OPEN;
988 case XML_TOK_NAME:
989 case XML_TOK_PREFIXED_NAME:
990 state->handler = element7;
991 return XML_ROLE_CONTENT_ELEMENT;
992 case XML_TOK_NAME_QUESTION:
993 state->handler = element7;
994 return XML_ROLE_CONTENT_ELEMENT_OPT;
995 case XML_TOK_NAME_ASTERISK:
996 state->handler = element7;
997 return XML_ROLE_CONTENT_ELEMENT_REP;
998 case XML_TOK_NAME_PLUS:
999 state->handler = element7;
1000 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1001 }
1002 return common(state, tok);
1003}
1004
1005static
1006int element3(PROLOG_STATE *state,
1007 int tok,
1008 const char *ptr,
1009 const char *end,
1010 const ENCODING *enc)
1011{
1012 switch (tok) {
1013 case XML_TOK_PROLOG_S:
1014 return XML_ROLE_NONE;
1015 case XML_TOK_CLOSE_PAREN:
1016 state->handler = declClose;
1017 return XML_ROLE_GROUP_CLOSE;
1018 case XML_TOK_CLOSE_PAREN_ASTERISK:
1019 state->handler = declClose;
1020 return XML_ROLE_GROUP_CLOSE_REP;
1021 case XML_TOK_OR:
1022 state->handler = element4;
1023 return XML_ROLE_NONE;
1024 }
1025 return common(state, tok);
1026}
1027
1028static
1029int element4(PROLOG_STATE *state,
1030 int tok,
1031 const char *ptr,
1032 const char *end,
1033 const ENCODING *enc)
1034{
1035 switch (tok) {
1036 case XML_TOK_PROLOG_S:
1037 return XML_ROLE_NONE;
1038 case XML_TOK_NAME:
1039 case XML_TOK_PREFIXED_NAME:
1040 state->handler = element5;
1041 return XML_ROLE_CONTENT_ELEMENT;
1042 }
1043 return common(state, tok);
1044}
1045
1046static
1047int element5(PROLOG_STATE *state,
1048 int tok,
1049 const char *ptr,
1050 const char *end,
1051 const ENCODING *enc)
1052{
1053 switch (tok) {
1054 case XML_TOK_PROLOG_S:
1055 return XML_ROLE_NONE;
1056 case XML_TOK_CLOSE_PAREN_ASTERISK:
1057 state->handler = declClose;
1058 return XML_ROLE_GROUP_CLOSE_REP;
1059 case XML_TOK_OR:
1060 state->handler = element4;
1061 return XML_ROLE_NONE;
1062 }
1063 return common(state, tok);
1064}
1065
1066static
1067int element6(PROLOG_STATE *state,
1068 int tok,
1069 const char *ptr,
1070 const char *end,
1071 const ENCODING *enc)
1072{
1073 switch (tok) {
1074 case XML_TOK_PROLOG_S:
1075 return XML_ROLE_NONE;
1076 case XML_TOK_OPEN_PAREN:
1077 state->level += 1;
1078 return XML_ROLE_GROUP_OPEN;
1079 case XML_TOK_NAME:
1080 case XML_TOK_PREFIXED_NAME:
1081 state->handler = element7;
1082 return XML_ROLE_CONTENT_ELEMENT;
1083 case XML_TOK_NAME_QUESTION:
1084 state->handler = element7;
1085 return XML_ROLE_CONTENT_ELEMENT_OPT;
1086 case XML_TOK_NAME_ASTERISK:
1087 state->handler = element7;
1088 return XML_ROLE_CONTENT_ELEMENT_REP;
1089 case XML_TOK_NAME_PLUS:
1090 state->handler = element7;
1091 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1092 }
1093 return common(state, tok);
1094}
1095
1096static
1097int element7(PROLOG_STATE *state,
1098 int tok,
1099 const char *ptr,
1100 const char *end,
1101 const ENCODING *enc)
1102{
1103 switch (tok) {
1104 case XML_TOK_PROLOG_S:
1105 return XML_ROLE_NONE;
1106 case XML_TOK_CLOSE_PAREN:
1107 state->level -= 1;
1108 if (state->level == 0)
1109 state->handler = declClose;
1110 return XML_ROLE_GROUP_CLOSE;
1111 case XML_TOK_CLOSE_PAREN_ASTERISK:
1112 state->level -= 1;
1113 if (state->level == 0)
1114 state->handler = declClose;
1115 return XML_ROLE_GROUP_CLOSE_REP;
1116 case XML_TOK_CLOSE_PAREN_QUESTION:
1117 state->level -= 1;
1118 if (state->level == 0)
1119 state->handler = declClose;
1120 return XML_ROLE_GROUP_CLOSE_OPT;
1121 case XML_TOK_CLOSE_PAREN_PLUS:
1122 state->level -= 1;
1123 if (state->level == 0)
1124 state->handler = declClose;
1125 return XML_ROLE_GROUP_CLOSE_PLUS;
1126 case XML_TOK_COMMA:
1127 state->handler = element6;
1128 return XML_ROLE_GROUP_SEQUENCE;
1129 case XML_TOK_OR:
1130 state->handler = element6;
1131 return XML_ROLE_GROUP_CHOICE;
1132 }
1133 return common(state, tok);
1134}
1135
1136#ifdef XML_DTD
1137
1138static
1139int condSect0(PROLOG_STATE *state,
1140 int tok,
1141 const char *ptr,
1142 const char *end,
1143 const ENCODING *enc)
1144{
1145 switch (tok) {
1146 case XML_TOK_PROLOG_S:
1147 return XML_ROLE_NONE;
1148 case XML_TOK_NAME:
1149 if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1150 state->handler = condSect1;
1151 return XML_ROLE_NONE;
1152 }
1153 if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1154 state->handler = condSect2;
1155 return XML_ROLE_NONE;
1156 }
1157 break;
1158 }
1159 return common(state, tok);
1160}
1161
1162static
1163int condSect1(PROLOG_STATE *state,
1164 int tok,
1165 const char *ptr,
1166 const char *end,
1167 const ENCODING *enc)
1168{
1169 switch (tok) {
1170 case XML_TOK_PROLOG_S:
1171 return XML_ROLE_NONE;
1172 case XML_TOK_OPEN_BRACKET:
1173 state->handler = externalSubset1;
1174 state->includeLevel += 1;
1175 return XML_ROLE_NONE;
1176 }
1177 return common(state, tok);
1178}
1179
1180static
1181int condSect2(PROLOG_STATE *state,
1182 int tok,
1183 const char *ptr,
1184 const char *end,
1185 const ENCODING *enc)
1186{
1187 switch (tok) {
1188 case XML_TOK_PROLOG_S:
1189 return XML_ROLE_NONE;
1190 case XML_TOK_OPEN_BRACKET:
1191 state->handler = externalSubset1;
1192 return XML_ROLE_IGNORE_SECT;
1193 }
1194 return common(state, tok);
1195}
1196
1197#endif /* XML_DTD */
1198
1199static
1200int declClose(PROLOG_STATE *state,
1201 int tok,
1202 const char *ptr,
1203 const char *end,
1204 const ENCODING *enc)
1205{
1206 switch (tok) {
1207 case XML_TOK_PROLOG_S:
1208 return XML_ROLE_NONE;
1209 case XML_TOK_DECL_CLOSE:
1210 setTopLevel(state);
1211 return XML_ROLE_NONE;
1212 }
1213 return common(state, tok);
1214}
1215
1216#if 0
1217
1218static
1219int ignore(PROLOG_STATE *state,
1220 int tok,
1221 const char *ptr,
1222 const char *end,
1223 const ENCODING *enc)
1224{
1225 switch (tok) {
1226 case XML_TOK_DECL_CLOSE:
1227 state->handler = internalSubset;
1228 return 0;
1229 default:
1230 return XML_ROLE_NONE;
1231 }
1232 return common(state, tok);
1233}
1234#endif
1235
1236static
1237int error(PROLOG_STATE *state,
1238 int tok,
1239 const char *ptr,
1240 const char *end,
1241 const ENCODING *enc)
1242{
1243 return XML_ROLE_NONE;
1244}
1245
1246static
1247int common(PROLOG_STATE *state, int tok)
1248{
1249#ifdef XML_DTD
1250 if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1251 return XML_ROLE_INNER_PARAM_ENTITY_REF;
1252#endif
1253 state->handler = error;
1254 return XML_ROLE_ERROR;
1255}
1256
1257void XmlPrologStateInit(PROLOG_STATE *state)
1258{
1259 state->handler = prolog0;
1260#ifdef XML_DTD
1261 state->documentEntity = 1;
1262 state->includeLevel = 0;
1263#endif /* XML_DTD */
1264}
1265
1266#ifdef XML_DTD
1267
1268void XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1269{
1270 state->handler = externalSubset0;
1271 state->documentEntity = 0;
1272 state->includeLevel = 0;
1273}
1274
1275#endif /* XML_DTD */