Victor Stinner | 759e30e | 2017-09-05 01:58:08 +0200 | [diff] [blame] | 1 | /* This file is included! |
| 2 | __ __ _ |
| 3 | ___\ \/ /_ __ __ _| |_ |
| 4 | / _ \\ /| '_ \ / _` | __| |
| 5 | | __// \| |_) | (_| | |_ |
| 6 | \___/_/\_\ .__/ \__,_|\__| |
| 7 | |_| XML parser |
| 8 | |
| 9 | Copyright (c) 1997-2000 Thai Open Source Software Center Ltd |
| 10 | Copyright (c) 2000-2017 Expat development team |
| 11 | Licensed under the MIT license: |
| 12 | |
| 13 | Permission is hereby granted, free of charge, to any person obtaining |
| 14 | a copy of this software and associated documentation files (the |
| 15 | "Software"), to deal in the Software without restriction, including |
| 16 | without limitation the rights to use, copy, modify, merge, publish, |
| 17 | distribute, sublicense, and/or sell copies of the Software, and to permit |
| 18 | persons to whom the Software is furnished to do so, subject to the |
| 19 | following conditions: |
| 20 | |
| 21 | The above copyright notice and this permission notice shall be included |
| 22 | in all copies or substantial portions of the Software. |
| 23 | |
| 24 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 25 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 26 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN |
| 27 | NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, |
| 28 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
| 29 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
| 30 | USE OR OTHER DEALINGS IN THE SOFTWARE. |
Gregory P. Smith | 7c6309c | 2012-07-14 14:12:35 -0700 | [diff] [blame] | 31 | */ |
| 32 | |
Gregory P. Smith | 7c6309c | 2012-07-14 14:12:35 -0700 | [diff] [blame] | 33 | #ifdef XML_TOK_NS_C |
| 34 | |
Martin v. Löwis | fc03a94 | 2003-01-25 22:41:29 +0000 | [diff] [blame] | 35 | const ENCODING * |
| 36 | NS(XmlGetUtf8InternalEncoding)(void) |
Martin v. Löwis | 1dbb1ca | 2002-02-11 23:13:04 +0000 | [diff] [blame] | 37 | { |
| 38 | return &ns(internal_utf8_encoding).enc; |
| 39 | } |
| 40 | |
Martin v. Löwis | fc03a94 | 2003-01-25 22:41:29 +0000 | [diff] [blame] | 41 | const ENCODING * |
| 42 | NS(XmlGetUtf16InternalEncoding)(void) |
Martin v. Löwis | 1dbb1ca | 2002-02-11 23:13:04 +0000 | [diff] [blame] | 43 | { |
Martin v. Löwis | fc03a94 | 2003-01-25 22:41:29 +0000 | [diff] [blame] | 44 | #if BYTEORDER == 1234 |
Martin v. Löwis | 1dbb1ca | 2002-02-11 23:13:04 +0000 | [diff] [blame] | 45 | return &ns(internal_little2_encoding).enc; |
Martin v. Löwis | fc03a94 | 2003-01-25 22:41:29 +0000 | [diff] [blame] | 46 | #elif BYTEORDER == 4321 |
Martin v. Löwis | 1dbb1ca | 2002-02-11 23:13:04 +0000 | [diff] [blame] | 47 | return &ns(internal_big2_encoding).enc; |
| 48 | #else |
| 49 | const short n = 1; |
Martin v. Löwis | fc03a94 | 2003-01-25 22:41:29 +0000 | [diff] [blame] | 50 | return (*(const char *)&n |
| 51 | ? &ns(internal_little2_encoding).enc |
| 52 | : &ns(internal_big2_encoding).enc); |
Martin v. Löwis | 1dbb1ca | 2002-02-11 23:13:04 +0000 | [diff] [blame] | 53 | #endif |
| 54 | } |
| 55 | |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 56 | static const ENCODING * const NS(encodings)[] = { |
Martin v. Löwis | 1dbb1ca | 2002-02-11 23:13:04 +0000 | [diff] [blame] | 57 | &ns(latin1_encoding).enc, |
| 58 | &ns(ascii_encoding).enc, |
| 59 | &ns(utf8_encoding).enc, |
| 60 | &ns(big2_encoding).enc, |
| 61 | &ns(big2_encoding).enc, |
| 62 | &ns(little2_encoding).enc, |
| 63 | &ns(utf8_encoding).enc /* NO_ENC */ |
| 64 | }; |
| 65 | |
Martin v. Löwis | fc03a94 | 2003-01-25 22:41:29 +0000 | [diff] [blame] | 66 | static int PTRCALL |
| 67 | NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end, |
| 68 | const char **nextTokPtr) |
Martin v. Löwis | 1dbb1ca | 2002-02-11 23:13:04 +0000 | [diff] [blame] | 69 | { |
Martin v. Löwis | fc03a94 | 2003-01-25 22:41:29 +0000 | [diff] [blame] | 70 | return initScan(NS(encodings), (const INIT_ENCODING *)enc, |
| 71 | XML_PROLOG_STATE, ptr, end, nextTokPtr); |
Martin v. Löwis | 1dbb1ca | 2002-02-11 23:13:04 +0000 | [diff] [blame] | 72 | } |
| 73 | |
Martin v. Löwis | fc03a94 | 2003-01-25 22:41:29 +0000 | [diff] [blame] | 74 | static int PTRCALL |
| 75 | NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end, |
| 76 | const char **nextTokPtr) |
Martin v. Löwis | 1dbb1ca | 2002-02-11 23:13:04 +0000 | [diff] [blame] | 77 | { |
Martin v. Löwis | fc03a94 | 2003-01-25 22:41:29 +0000 | [diff] [blame] | 78 | return initScan(NS(encodings), (const INIT_ENCODING *)enc, |
| 79 | XML_CONTENT_STATE, ptr, end, nextTokPtr); |
Martin v. Löwis | 1dbb1ca | 2002-02-11 23:13:04 +0000 | [diff] [blame] | 80 | } |
| 81 | |
Martin v. Löwis | fc03a94 | 2003-01-25 22:41:29 +0000 | [diff] [blame] | 82 | int |
| 83 | NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, |
| 84 | const char *name) |
Martin v. Löwis | 1dbb1ca | 2002-02-11 23:13:04 +0000 | [diff] [blame] | 85 | { |
| 86 | int i = getEncodingIndex(name); |
| 87 | if (i == UNKNOWN_ENC) |
| 88 | return 0; |
| 89 | SET_INIT_ENC_INDEX(p, i); |
| 90 | p->initEnc.scanners[XML_PROLOG_STATE] = NS(initScanProlog); |
| 91 | p->initEnc.scanners[XML_CONTENT_STATE] = NS(initScanContent); |
| 92 | p->initEnc.updatePosition = initUpdatePosition; |
| 93 | p->encPtr = encPtr; |
| 94 | *encPtr = &(p->initEnc); |
| 95 | return 1; |
| 96 | } |
| 97 | |
Martin v. Löwis | fc03a94 | 2003-01-25 22:41:29 +0000 | [diff] [blame] | 98 | static const ENCODING * |
| 99 | NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) |
Martin v. Löwis | 1dbb1ca | 2002-02-11 23:13:04 +0000 | [diff] [blame] | 100 | { |
| 101 | #define ENCODING_MAX 128 |
| 102 | char buf[ENCODING_MAX]; |
| 103 | char *p = buf; |
| 104 | int i; |
| 105 | XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1); |
| 106 | if (ptr != end) |
| 107 | return 0; |
| 108 | *p = 0; |
| 109 | if (streqci(buf, KW_UTF_16) && enc->minBytesPerChar == 2) |
| 110 | return enc; |
| 111 | i = getEncodingIndex(buf); |
| 112 | if (i == UNKNOWN_ENC) |
| 113 | return 0; |
| 114 | return NS(encodings)[i]; |
| 115 | } |
| 116 | |
Martin v. Löwis | fc03a94 | 2003-01-25 22:41:29 +0000 | [diff] [blame] | 117 | int |
| 118 | NS(XmlParseXmlDecl)(int isGeneralTextEntity, |
| 119 | const ENCODING *enc, |
| 120 | const char *ptr, |
| 121 | const char *end, |
| 122 | const char **badPtr, |
| 123 | const char **versionPtr, |
| 124 | const char **versionEndPtr, |
| 125 | const char **encodingName, |
| 126 | const ENCODING **encoding, |
| 127 | int *standalone) |
Martin v. Löwis | 1dbb1ca | 2002-02-11 23:13:04 +0000 | [diff] [blame] | 128 | { |
| 129 | return doParseXmlDecl(NS(findEncoding), |
Martin v. Löwis | fc03a94 | 2003-01-25 22:41:29 +0000 | [diff] [blame] | 130 | isGeneralTextEntity, |
| 131 | enc, |
| 132 | ptr, |
| 133 | end, |
| 134 | badPtr, |
| 135 | versionPtr, |
| 136 | versionEndPtr, |
| 137 | encodingName, |
| 138 | encoding, |
| 139 | standalone); |
Martin v. Löwis | 1dbb1ca | 2002-02-11 23:13:04 +0000 | [diff] [blame] | 140 | } |
Gregory P. Smith | 7c6309c | 2012-07-14 14:12:35 -0700 | [diff] [blame] | 141 | |
| 142 | #endif /* XML_TOK_NS_C */ |