Daniel Veillard | 1e346af | 1999-02-22 10:33:01 +0000 | [diff] [blame] | 1 | /* |
| 2 | * parserInternals.h : internals routines exported by the parser. |
| 3 | * |
| 4 | * See Copyright for the status of this software. |
| 5 | * |
| 6 | * Daniel.Veillard@w3.org |
| 7 | */ |
| 8 | |
| 9 | #ifndef __XML_PARSER_INTERNALS_H__ |
| 10 | #define __XML_PARSER_INTERNALS_H__ |
| 11 | |
Daniel Veillard | 361d845 | 2000-04-03 19:48:13 +0000 | [diff] [blame] | 12 | #include <libxml/parser.h> |
Daniel Veillard | 1e346af | 1999-02-22 10:33:01 +0000 | [diff] [blame] | 13 | |
| 14 | #ifdef __cplusplus |
| 15 | extern "C" { |
| 16 | #endif |
| 17 | |
Daniel Veillard | e0854c3 | 2000-08-27 21:12:29 +0000 | [diff] [blame] | 18 | #define XML_MAX_NAMELEN 100 |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 19 | |
Daniel Veillard | 64068b3 | 1999-03-24 20:42:16 +0000 | [diff] [blame] | 20 | /************************************************************************ |
| 21 | * * |
| 22 | * UNICODE version of the macros. * |
| 23 | * * |
| 24 | ************************************************************************/ |
| 25 | /* |
| 26 | * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
| 27 | * | [#x10000-#x10FFFF] |
| 28 | * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. |
| 29 | */ |
| 30 | #define IS_CHAR(c) \ |
Daniel Veillard | 496a1cf | 2000-05-03 14:20:55 +0000 | [diff] [blame] | 31 | (((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \ |
| 32 | (((c) >= 0x20) && ((c) <= 0xD7FF)) || \ |
| 33 | (((c) >= 0xE000) && ((c) <= 0xFFFD)) || \ |
| 34 | (((c) >= 0x10000) && ((c) <= 0x10FFFF))) |
Daniel Veillard | 64068b3 | 1999-03-24 20:42:16 +0000 | [diff] [blame] | 35 | |
| 36 | /* |
| 37 | * [3] S ::= (#x20 | #x9 | #xD | #xA)+ |
| 38 | */ |
Daniel Veillard | fc708e2 | 2000-04-08 13:17:27 +0000 | [diff] [blame] | 39 | #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || \ |
Daniel Veillard | 64068b3 | 1999-03-24 20:42:16 +0000 | [diff] [blame] | 40 | ((c) == 0x0D)) |
| 41 | |
| 42 | /* |
| 43 | * [85] BaseChar ::= ... long list see REC ... |
Daniel Veillard | 64068b3 | 1999-03-24 20:42:16 +0000 | [diff] [blame] | 44 | */ |
Daniel Veillard | dd477ce | 2000-09-10 13:23:08 +0000 | [diff] [blame] | 45 | #define IS_BASECHAR(c) xmlIsBaseChar(c) |
Daniel Veillard | 64068b3 | 1999-03-24 20:42:16 +0000 | [diff] [blame] | 46 | |
| 47 | /* |
| 48 | * [88] Digit ::= ... long list see REC ... |
| 49 | */ |
Daniel Veillard | dd477ce | 2000-09-10 13:23:08 +0000 | [diff] [blame] | 50 | #define IS_DIGIT(c) xmlIsDigit(c) |
Daniel Veillard | 64068b3 | 1999-03-24 20:42:16 +0000 | [diff] [blame] | 51 | |
| 52 | /* |
| 53 | * [87] CombiningChar ::= ... long list see REC ... |
| 54 | */ |
Daniel Veillard | dd477ce | 2000-09-10 13:23:08 +0000 | [diff] [blame] | 55 | #define IS_COMBINING(c) xmlIsCombining(c) |
Daniel Veillard | 64068b3 | 1999-03-24 20:42:16 +0000 | [diff] [blame] | 56 | |
| 57 | /* |
| 58 | * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | |
| 59 | * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | |
| 60 | * [#x309D-#x309E] | [#x30FC-#x30FE] |
| 61 | */ |
Daniel Veillard | dd477ce | 2000-09-10 13:23:08 +0000 | [diff] [blame] | 62 | #define IS_EXTENDER(c) xmlIsExtender(c) |
Daniel Veillard | 64068b3 | 1999-03-24 20:42:16 +0000 | [diff] [blame] | 63 | |
| 64 | /* |
| 65 | * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] |
| 66 | */ |
Daniel Veillard | dd477ce | 2000-09-10 13:23:08 +0000 | [diff] [blame] | 67 | #define IS_IDEOGRAPHIC(c) xmlIsIdeographic(c) |
Daniel Veillard | 64068b3 | 1999-03-24 20:42:16 +0000 | [diff] [blame] | 68 | |
| 69 | /* |
| 70 | * [84] Letter ::= BaseChar | Ideographic |
| 71 | */ |
| 72 | #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)) |
| 73 | |
Daniel Veillard | 64068b3 | 1999-03-24 20:42:16 +0000 | [diff] [blame] | 74 | |
| 75 | /* |
| 76 | * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] |
| 77 | */ |
Daniel Veillard | dd477ce | 2000-09-10 13:23:08 +0000 | [diff] [blame] | 78 | #define IS_PUBIDCHAR(c) xmlIsPubidChar(c) |
Daniel Veillard | 64068b3 | 1999-03-24 20:42:16 +0000 | [diff] [blame] | 79 | |
| 80 | #define SKIP_EOL(p) \ |
| 81 | if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \ |
| 82 | if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; } |
| 83 | |
| 84 | #define MOVETO_ENDTAG(p) \ |
Daniel Veillard | cf46199 | 2000-03-14 18:30:20 +0000 | [diff] [blame] | 85 | while ((*p) && (*(p) != '>')) (p)++ |
Daniel Veillard | 64068b3 | 1999-03-24 20:42:16 +0000 | [diff] [blame] | 86 | |
| 87 | #define MOVETO_STARTTAG(p) \ |
Daniel Veillard | cf46199 | 2000-03-14 18:30:20 +0000 | [diff] [blame] | 88 | while ((*p) && (*(p) != '<')) (p)++ |
Daniel Veillard | 011b63c | 1999-06-02 17:44:04 +0000 | [diff] [blame] | 89 | |
Daniel Veillard | dd477ce | 2000-09-10 13:23:08 +0000 | [diff] [blame] | 90 | /* |
| 91 | * Function to finish teh work of the macros where needed |
| 92 | */ |
| 93 | int xmlIsBaseChar (int c); |
| 94 | int xmlIsBlank (int c); |
| 95 | int xmlIsPubidChar (int c); |
| 96 | int xmlIsLetter (int c); |
| 97 | int xmlIsDigit (int c); |
| 98 | int xmlIsIdeographic(int c); |
| 99 | int xmlIsCombining (int c); |
| 100 | int xmlIsExtender (int c); |
| 101 | int xmlIsCombining (int c); |
| 102 | int xmlIsChar (int c); |
| 103 | |
Daniel Veillard | 011b63c | 1999-06-02 17:44:04 +0000 | [diff] [blame] | 104 | /** |
Daniel Veillard | f0cc7cc | 2000-08-26 21:40:43 +0000 | [diff] [blame] | 105 | * Not for the faint of heart |
| 106 | */ |
| 107 | |
| 108 | extern int xmlParserDebugEntities; |
| 109 | |
| 110 | /** |
Daniel Veillard | d692aa4 | 1999-02-28 21:54:31 +0000 | [diff] [blame] | 111 | * Parser context |
| 112 | */ |
Daniel Veillard | dd6b367 | 1999-09-23 22:19:22 +0000 | [diff] [blame] | 113 | xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur); |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 114 | xmlParserCtxtPtr xmlCreateFileParserCtxt (const char *filename); |
| 115 | xmlParserCtxtPtr xmlCreateMemoryParserCtxt(char *buffer, |
| 116 | int size); |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 117 | xmlParserCtxtPtr xmlNewParserCtxt (void); |
Daniel Veillard | cf46199 | 2000-03-14 18:30:20 +0000 | [diff] [blame] | 118 | xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL, |
| 119 | const xmlChar *ID, |
| 120 | const xmlChar *base); |
Daniel Veillard | 496a1cf | 2000-05-03 14:20:55 +0000 | [diff] [blame] | 121 | int xmlSwitchEncoding (xmlParserCtxtPtr ctxt, |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 122 | xmlCharEncoding enc); |
Daniel Veillard | 496a1cf | 2000-05-03 14:20:55 +0000 | [diff] [blame] | 123 | int xmlSwitchToEncoding (xmlParserCtxtPtr ctxt, |
| 124 | xmlCharEncodingHandlerPtr handler); |
Daniel Veillard | cf46199 | 2000-03-14 18:30:20 +0000 | [diff] [blame] | 125 | void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); |
Daniel Veillard | d692aa4 | 1999-02-28 21:54:31 +0000 | [diff] [blame] | 126 | |
Daniel Veillard | bc50b59 | 1999-03-01 12:28:53 +0000 | [diff] [blame] | 127 | /** |
Daniel Veillard | 1e346af | 1999-02-22 10:33:01 +0000 | [diff] [blame] | 128 | * Entities |
| 129 | */ |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 130 | void xmlHandleEntity (xmlParserCtxtPtr ctxt, |
| 131 | xmlEntityPtr entity); |
Daniel Veillard | 1e346af | 1999-02-22 10:33:01 +0000 | [diff] [blame] | 132 | |
Daniel Veillard | bc50b59 | 1999-03-01 12:28:53 +0000 | [diff] [blame] | 133 | /** |
| 134 | * Input Streams |
| 135 | */ |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 136 | xmlParserInputPtr xmlNewEntityInputStream (xmlParserCtxtPtr ctxt, |
| 137 | xmlEntityPtr entity); |
| 138 | void xmlPushInput (xmlParserCtxtPtr ctxt, |
| 139 | xmlParserInputPtr input); |
Daniel Veillard | dd6b367 | 1999-09-23 22:19:22 +0000 | [diff] [blame] | 140 | xmlChar xmlPopInput (xmlParserCtxtPtr ctxt); |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 141 | void xmlFreeInputStream (xmlParserInputPtr input); |
| 142 | xmlParserInputPtr xmlNewInputFromFile (xmlParserCtxtPtr ctxt, |
| 143 | const char *filename); |
Daniel Veillard | bc50b59 | 1999-03-01 12:28:53 +0000 | [diff] [blame] | 144 | |
| 145 | /** |
Daniel Veillard | 1e346af | 1999-02-22 10:33:01 +0000 | [diff] [blame] | 146 | * Namespaces. |
| 147 | */ |
Daniel Veillard | cf46199 | 2000-03-14 18:30:20 +0000 | [diff] [blame] | 148 | xmlChar * xmlSplitQName (xmlParserCtxtPtr ctxt, |
| 149 | const xmlChar *name, |
Daniel Veillard | dd6b367 | 1999-09-23 22:19:22 +0000 | [diff] [blame] | 150 | xmlChar **prefix); |
Daniel Veillard | cf46199 | 2000-03-14 18:30:20 +0000 | [diff] [blame] | 151 | xmlChar * xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt); |
| 152 | xmlChar * xmlNamespaceParseQName (xmlParserCtxtPtr ctxt, |
Daniel Veillard | dd6b367 | 1999-09-23 22:19:22 +0000 | [diff] [blame] | 153 | xmlChar **prefix); |
Daniel Veillard | cf46199 | 2000-03-14 18:30:20 +0000 | [diff] [blame] | 154 | xmlChar * xmlNamespaceParseNSDef (xmlParserCtxtPtr ctxt); |
| 155 | xmlChar * xmlParseQuotedString (xmlParserCtxtPtr ctxt); |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 156 | void xmlParseNamespace (xmlParserCtxtPtr ctxt); |
Daniel Veillard | 1e346af | 1999-02-22 10:33:01 +0000 | [diff] [blame] | 157 | |
Daniel Veillard | bc50b59 | 1999-03-01 12:28:53 +0000 | [diff] [blame] | 158 | /** |
Daniel Veillard | 1e346af | 1999-02-22 10:33:01 +0000 | [diff] [blame] | 159 | * Generic production rules |
| 160 | */ |
Daniel Veillard | cf46199 | 2000-03-14 18:30:20 +0000 | [diff] [blame] | 161 | xmlChar * xmlScanName (xmlParserCtxtPtr ctxt); |
| 162 | xmlChar * xmlParseName (xmlParserCtxtPtr ctxt); |
| 163 | xmlChar * xmlParseNmtoken (xmlParserCtxtPtr ctxt); |
| 164 | xmlChar * xmlParseEntityValue (xmlParserCtxtPtr ctxt, |
Daniel Veillard | dd6b367 | 1999-09-23 22:19:22 +0000 | [diff] [blame] | 165 | xmlChar **orig); |
Daniel Veillard | cf46199 | 2000-03-14 18:30:20 +0000 | [diff] [blame] | 166 | xmlChar * xmlParseAttValue (xmlParserCtxtPtr ctxt); |
| 167 | xmlChar * xmlParseSystemLiteral (xmlParserCtxtPtr ctxt); |
| 168 | xmlChar * xmlParsePubidLiteral (xmlParserCtxtPtr ctxt); |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 169 | void xmlParseCharData (xmlParserCtxtPtr ctxt, |
| 170 | int cdata); |
Daniel Veillard | cf46199 | 2000-03-14 18:30:20 +0000 | [diff] [blame] | 171 | xmlChar * xmlParseExternalID (xmlParserCtxtPtr ctxt, |
Daniel Veillard | dd6b367 | 1999-09-23 22:19:22 +0000 | [diff] [blame] | 172 | xmlChar **publicID, |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 173 | int strict); |
| 174 | void xmlParseComment (xmlParserCtxtPtr ctxt); |
Daniel Veillard | cf46199 | 2000-03-14 18:30:20 +0000 | [diff] [blame] | 175 | xmlChar * xmlParsePITarget (xmlParserCtxtPtr ctxt); |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 176 | void xmlParsePI (xmlParserCtxtPtr ctxt); |
| 177 | void xmlParseNotationDecl (xmlParserCtxtPtr ctxt); |
| 178 | void xmlParseEntityDecl (xmlParserCtxtPtr ctxt); |
| 179 | int xmlParseDefaultDecl (xmlParserCtxtPtr ctxt, |
Daniel Veillard | dd6b367 | 1999-09-23 22:19:22 +0000 | [diff] [blame] | 180 | xmlChar **value); |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 181 | xmlEnumerationPtr xmlParseNotationType (xmlParserCtxtPtr ctxt); |
| 182 | xmlEnumerationPtr xmlParseEnumerationType (xmlParserCtxtPtr ctxt); |
| 183 | int xmlParseEnumeratedType (xmlParserCtxtPtr ctxt, |
| 184 | xmlEnumerationPtr *tree); |
| 185 | int xmlParseAttributeType (xmlParserCtxtPtr ctxt, |
| 186 | xmlEnumerationPtr *tree); |
| 187 | void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt); |
| 188 | xmlElementContentPtr xmlParseElementMixedContentDecl |
| 189 | (xmlParserCtxtPtr ctxt); |
| 190 | xmlElementContentPtr xmlParseElementChildrenContentDecl |
| 191 | (xmlParserCtxtPtr ctxt); |
| 192 | int xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, |
Daniel Veillard | dd6b367 | 1999-09-23 22:19:22 +0000 | [diff] [blame] | 193 | xmlChar *name, |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 194 | xmlElementContentPtr *result); |
| 195 | int xmlParseElementDecl (xmlParserCtxtPtr ctxt); |
| 196 | void xmlParseMarkupDecl (xmlParserCtxtPtr ctxt); |
| 197 | int xmlParseCharRef (xmlParserCtxtPtr ctxt); |
| 198 | xmlEntityPtr xmlParseEntityRef (xmlParserCtxtPtr ctxt); |
| 199 | void xmlParseReference (xmlParserCtxtPtr ctxt); |
| 200 | void xmlParsePEReference (xmlParserCtxtPtr ctxt); |
| 201 | void xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt); |
Daniel Veillard | cf46199 | 2000-03-14 18:30:20 +0000 | [diff] [blame] | 202 | xmlChar * xmlParseAttribute (xmlParserCtxtPtr ctxt, |
Daniel Veillard | dd6b367 | 1999-09-23 22:19:22 +0000 | [diff] [blame] | 203 | xmlChar **value); |
Daniel Veillard | cf46199 | 2000-03-14 18:30:20 +0000 | [diff] [blame] | 204 | xmlChar * xmlParseStartTag (xmlParserCtxtPtr ctxt); |
Daniel Veillard | dbfd641 | 1999-12-28 16:35:14 +0000 | [diff] [blame] | 205 | void xmlParseEndTag (xmlParserCtxtPtr ctxt); |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 206 | void xmlParseCDSect (xmlParserCtxtPtr ctxt); |
| 207 | void xmlParseContent (xmlParserCtxtPtr ctxt); |
| 208 | void xmlParseElement (xmlParserCtxtPtr ctxt); |
Daniel Veillard | cf46199 | 2000-03-14 18:30:20 +0000 | [diff] [blame] | 209 | xmlChar * xmlParseVersionNum (xmlParserCtxtPtr ctxt); |
| 210 | xmlChar * xmlParseVersionInfo (xmlParserCtxtPtr ctxt); |
| 211 | xmlChar * xmlParseEncName (xmlParserCtxtPtr ctxt); |
| 212 | xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt); |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 213 | int xmlParseSDDecl (xmlParserCtxtPtr ctxt); |
| 214 | void xmlParseXMLDecl (xmlParserCtxtPtr ctxt); |
Daniel Veillard | cf46199 | 2000-03-14 18:30:20 +0000 | [diff] [blame] | 215 | void xmlParseTextDecl (xmlParserCtxtPtr ctxt); |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 216 | void xmlParseMisc (xmlParserCtxtPtr ctxt); |
| 217 | void xmlParseExternalSubset (xmlParserCtxtPtr ctxt, |
Daniel Veillard | dd6b367 | 1999-09-23 22:19:22 +0000 | [diff] [blame] | 218 | const xmlChar *ExternalID, |
| 219 | const xmlChar *SystemID); |
Daniel Veillard | 517752b | 1999-04-05 12:20:10 +0000 | [diff] [blame] | 220 | /* |
Daniel Veillard | 011b63c | 1999-06-02 17:44:04 +0000 | [diff] [blame] | 221 | * Entities substitution |
| 222 | */ |
| 223 | #define XML_SUBSTITUTE_NONE 0 |
| 224 | #define XML_SUBSTITUTE_REF 1 |
| 225 | #define XML_SUBSTITUTE_PEREF 2 |
| 226 | #define XML_SUBSTITUTE_BOTH 3 |
| 227 | |
Daniel Veillard | cf46199 | 2000-03-14 18:30:20 +0000 | [diff] [blame] | 228 | xmlChar * xmlDecodeEntities (xmlParserCtxtPtr ctxt, |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 229 | int len, |
| 230 | int what, |
Daniel Veillard | dd6b367 | 1999-09-23 22:19:22 +0000 | [diff] [blame] | 231 | xmlChar end, |
| 232 | xmlChar end2, |
| 233 | xmlChar end3); |
Daniel Veillard | cf46199 | 2000-03-14 18:30:20 +0000 | [diff] [blame] | 234 | xmlChar * xmlStringDecodeEntities (xmlParserCtxtPtr ctxt, |
| 235 | const xmlChar *str, |
| 236 | int what, |
| 237 | xmlChar end, |
| 238 | xmlChar end2, |
| 239 | xmlChar end3); |
Daniel Veillard | 011b63c | 1999-06-02 17:44:04 +0000 | [diff] [blame] | 240 | |
| 241 | /* |
Daniel Veillard | 517752b | 1999-04-05 12:20:10 +0000 | [diff] [blame] | 242 | * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP |
| 243 | */ |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 244 | int nodePush (xmlParserCtxtPtr ctxt, |
| 245 | xmlNodePtr value); |
| 246 | xmlNodePtr nodePop (xmlParserCtxtPtr ctxt); |
| 247 | int inputPush (xmlParserCtxtPtr ctxt, |
| 248 | xmlParserInputPtr value); |
| 249 | xmlParserInputPtr inputPop (xmlParserCtxtPtr ctxt); |
Daniel Veillard | 1e346af | 1999-02-22 10:33:01 +0000 | [diff] [blame] | 250 | |
Daniel Veillard | 32bc74e | 2000-07-14 14:49:25 +0000 | [diff] [blame] | 251 | /* |
| 252 | * Really core function shared with HTML parser |
| 253 | */ |
| 254 | int xmlCurrentChar (xmlParserCtxtPtr ctxt, |
| 255 | int *len); |
| 256 | int xmlCopyChar (int len, |
| 257 | xmlChar *out, |
| 258 | int val); |
| 259 | void xmlNextChar (xmlParserCtxtPtr ctxt); |
| 260 | void xmlParserInputShrink (xmlParserInputPtr in); |
Daniel Veillard | c08a2c6 | 1999-09-08 21:35:25 +0000 | [diff] [blame] | 261 | #ifdef __cplusplus |
| 262 | } |
| 263 | #endif |
Daniel Veillard | 1e346af | 1999-02-22 10:33:01 +0000 | [diff] [blame] | 264 | #endif /* __XML_PARSER_INTERNALS_H__ */ |