blob: 58dce9091ccdfd397d47bf08aa8a123057913b07 [file] [log] [blame]
Elliott Hughes72472942018-01-10 08:36:10 -08001/*
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000-2017 Expat development team
11 Licensed under the MIT license:
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
19 following conditions:
20
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
The Android Open Source Projectb80e2872009-03-03 19:29:30 -080031*/
32
33#include <stddef.h>
Haibo Huang40a71912019-10-11 11:13:39 -070034#include <string.h> /* memcpy */
Elliott Hughesf648a292021-04-01 15:10:13 -070035#include <stdbool.h>
Elliott Hughes72472942018-01-10 08:36:10 -080036
Elliott Hughes72472942018-01-10 08:36:10 -080037#ifdef _WIN32
Haibo Huang40a71912019-10-11 11:13:39 -070038# include "winconfig.h"
The Android Open Source Projectb80e2872009-03-03 19:29:30 -080039#else
Haibo Huang40a71912019-10-11 11:13:39 -070040# ifdef HAVE_EXPAT_CONFIG_H
41# include <expat_config.h>
42# endif
Elliott Hughes72472942018-01-10 08:36:10 -080043#endif /* ndef _WIN32 */
The Android Open Source Projectb80e2872009-03-03 19:29:30 -080044
45#include "expat_external.h"
46#include "internal.h"
47#include "xmltok.h"
48#include "nametab.h"
49
50#ifdef XML_DTD
Haibo Huang40a71912019-10-11 11:13:39 -070051# define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -080052#else
Haibo Huang40a71912019-10-11 11:13:39 -070053# define IGNORE_SECTION_TOK_VTABLE /* as nothing */
The Android Open Source Projectb80e2872009-03-03 19:29:30 -080054#endif
55
Haibo Huang40a71912019-10-11 11:13:39 -070056#define VTABLE1 \
57 {PREFIX(prologTok), PREFIX(contentTok), \
58 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE}, \
59 {PREFIX(attributeValueTok), PREFIX(entityValueTok)}, \
60 PREFIX(nameMatchesAscii), PREFIX(nameLength), PREFIX(skipS), \
61 PREFIX(getAtts), PREFIX(charRefNumber), PREFIX(predefinedEntityName), \
62 PREFIX(updatePosition), PREFIX(isPublicId)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -080063
64#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
65
Haibo Huang40a71912019-10-11 11:13:39 -070066#define UCS2_GET_NAMING(pages, hi, lo) \
67 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo)&0x1F)))
The Android Open Source Projectb80e2872009-03-03 19:29:30 -080068
69/* A 2 byte UTF-8 representation splits the characters 11 bits between
70 the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
71 pages, 3 bits to add to that index and 5 bits to generate the mask.
72*/
Haibo Huang40a71912019-10-11 11:13:39 -070073#define UTF8_GET_NAMING2(pages, byte) \
74 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
75 + ((((byte)[0]) & 3) << 1) + ((((byte)[1]) >> 5) & 1)] \
76 & (1u << (((byte)[1]) & 0x1F)))
The Android Open Source Projectb80e2872009-03-03 19:29:30 -080077
78/* A 3 byte UTF-8 representation splits the characters 16 bits between
79 the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index
80 into pages, 3 bits to add to that index and 5 bits to generate the
81 mask.
82*/
Haibo Huang40a71912019-10-11 11:13:39 -070083#define UTF8_GET_NAMING3(pages, byte) \
84 (namingBitmap \
85 [((pages)[((((byte)[0]) & 0xF) << 4) + ((((byte)[1]) >> 2) & 0xF)] \
86 << 3) \
87 + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)] \
88 & (1u << (((byte)[2]) & 0x1F)))
The Android Open Source Projectb80e2872009-03-03 19:29:30 -080089
Haibo Huang40a71912019-10-11 11:13:39 -070090#define UTF8_GET_NAMING(pages, p, n) \
91 ((n) == 2 \
92 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
93 : ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0))
The Android Open Source Projectb80e2872009-03-03 19:29:30 -080094
95/* Detection of invalid UTF-8 sequences is based on Table 3.1B
96 of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
97 with the additional restriction of not allowing the Unicode
98 code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
99 Implementation details:
100 (A & 0x80) == 0 means A < 0x80
101 and
102 (A & 0xC0) == 0xC0 means A > 0xBF
103*/
104
Haibo Huang40a71912019-10-11 11:13:39 -0700105#define UTF8_INVALID2(p) \
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800106 ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
107
Haibo Huang40a71912019-10-11 11:13:39 -0700108#define UTF8_INVALID3(p) \
109 (((p)[2] & 0x80) == 0 \
110 || ((*p) == 0xEF && (p)[1] == 0xBF ? (p)[2] > 0xBD \
111 : ((p)[2] & 0xC0) == 0xC0) \
112 || ((*p) == 0xE0 \
113 ? (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \
114 : ((p)[1] & 0x80) == 0 \
115 || ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800116
Haibo Huang40a71912019-10-11 11:13:39 -0700117#define UTF8_INVALID4(p) \
118 (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 || ((p)[2] & 0x80) == 0 \
119 || ((p)[2] & 0xC0) == 0xC0 \
120 || ((*p) == 0xF0 \
121 ? (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \
122 : ((p)[1] & 0x80) == 0 \
123 || ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800124
125static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700126isNever(const ENCODING *enc, const char *p) {
127 UNUSED_P(enc);
128 UNUSED_P(p);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800129 return 0;
130}
131
132static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700133utf8_isName2(const ENCODING *enc, const char *p) {
134 UNUSED_P(enc);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800135 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
136}
137
138static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700139utf8_isName3(const ENCODING *enc, const char *p) {
140 UNUSED_P(enc);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800141 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
142}
143
144#define utf8_isName4 isNever
145
146static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700147utf8_isNmstrt2(const ENCODING *enc, const char *p) {
148 UNUSED_P(enc);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800149 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
150}
151
152static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700153utf8_isNmstrt3(const ENCODING *enc, const char *p) {
154 UNUSED_P(enc);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800155 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
156}
157
158#define utf8_isNmstrt4 isNever
159
160static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700161utf8_isInvalid2(const ENCODING *enc, const char *p) {
162 UNUSED_P(enc);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800163 return UTF8_INVALID2((const unsigned char *)p);
164}
165
166static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700167utf8_isInvalid3(const ENCODING *enc, const char *p) {
168 UNUSED_P(enc);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800169 return UTF8_INVALID3((const unsigned char *)p);
170}
171
172static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700173utf8_isInvalid4(const ENCODING *enc, const char *p) {
174 UNUSED_P(enc);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800175 return UTF8_INVALID4((const unsigned char *)p);
176}
177
178struct normal_encoding {
179 ENCODING enc;
180 unsigned char type[256];
181#ifdef XML_MIN_SIZE
Haibo Huang40a71912019-10-11 11:13:39 -0700182 int(PTRFASTCALL *byteType)(const ENCODING *, const char *);
183 int(PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
184 int(PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *);
185 int(PTRFASTCALL *byteToAscii)(const ENCODING *, const char *);
186 int(PTRCALL *charMatches)(const ENCODING *, const char *, int);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800187#endif /* XML_MIN_SIZE */
Haibo Huang40a71912019-10-11 11:13:39 -0700188 int(PTRFASTCALL *isName2)(const ENCODING *, const char *);
189 int(PTRFASTCALL *isName3)(const ENCODING *, const char *);
190 int(PTRFASTCALL *isName4)(const ENCODING *, const char *);
191 int(PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *);
192 int(PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *);
193 int(PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *);
194 int(PTRFASTCALL *isInvalid2)(const ENCODING *, const char *);
195 int(PTRFASTCALL *isInvalid3)(const ENCODING *, const char *);
196 int(PTRFASTCALL *isInvalid4)(const ENCODING *, const char *);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800197};
198
Haibo Huang40a71912019-10-11 11:13:39 -0700199#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *)(enc))
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800200
201#ifdef XML_MIN_SIZE
202
Haibo Huang40a71912019-10-11 11:13:39 -0700203# define STANDARD_VTABLE(E) \
204 E##byteType, E##isNameMin, E##isNmstrtMin, E##byteToAscii, E##charMatches,
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800205
206#else
207
Haibo Huang40a71912019-10-11 11:13:39 -0700208# define STANDARD_VTABLE(E) /* as nothing */
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800209
210#endif
211
Haibo Huang40a71912019-10-11 11:13:39 -0700212#define NORMAL_VTABLE(E) \
213 E##isName2, E##isName3, E##isName4, E##isNmstrt2, E##isNmstrt3, \
214 E##isNmstrt4, E##isInvalid2, E##isInvalid3, E##isInvalid4
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800215
Haibo Huang40a71912019-10-11 11:13:39 -0700216#define NULL_VTABLE \
217 /* isName2 */ NULL, /* isName3 */ NULL, /* isName4 */ NULL, \
218 /* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, /* isNmstrt4 */ NULL, \
219 /* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL
Paul Duffinba34a0c2017-02-27 14:40:16 +0000220
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800221static int FASTCALL checkCharRefNumber(int);
222
223#include "xmltok_impl.h"
224#include "ascii.h"
225
226#ifdef XML_MIN_SIZE
Haibo Huang40a71912019-10-11 11:13:39 -0700227# define sb_isNameMin isNever
228# define sb_isNmstrtMin isNever
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800229#endif
230
231#ifdef XML_MIN_SIZE
Haibo Huang40a71912019-10-11 11:13:39 -0700232# define MINBPC(enc) ((enc)->minBytesPerChar)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800233#else
234/* minimum bytes per character */
Haibo Huang40a71912019-10-11 11:13:39 -0700235# define MINBPC(enc) 1
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800236#endif
237
Haibo Huang40a71912019-10-11 11:13:39 -0700238#define SB_BYTE_TYPE(enc, p) \
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800239 (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
240
241#ifdef XML_MIN_SIZE
242static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700243sb_byteType(const ENCODING *enc, const char *p) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800244 return SB_BYTE_TYPE(enc, p);
245}
Haibo Huang40a71912019-10-11 11:13:39 -0700246# define BYTE_TYPE(enc, p) (AS_NORMAL_ENCODING(enc)->byteType(enc, p))
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800247#else
Haibo Huang40a71912019-10-11 11:13:39 -0700248# define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800249#endif
250
251#ifdef XML_MIN_SIZE
Haibo Huang40a71912019-10-11 11:13:39 -0700252# define BYTE_TO_ASCII(enc, p) (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800253static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700254sb_byteToAscii(const ENCODING *enc, const char *p) {
255 UNUSED_P(enc);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800256 return *p;
257}
258#else
Haibo Huang40a71912019-10-11 11:13:39 -0700259# define BYTE_TO_ASCII(enc, p) (*(p))
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800260#endif
261
Haibo Huang40a71912019-10-11 11:13:39 -0700262#define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p))
263#define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p))
264#define IS_INVALID_CHAR(enc, p, n) \
265 (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800266
267#ifdef XML_MIN_SIZE
Haibo Huang40a71912019-10-11 11:13:39 -0700268# define IS_NAME_CHAR_MINBPC(enc, p) \
269 (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
270# define IS_NMSTRT_CHAR_MINBPC(enc, p) \
271 (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800272#else
Haibo Huang40a71912019-10-11 11:13:39 -0700273# define IS_NAME_CHAR_MINBPC(enc, p) (0)
274# define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800275#endif
276
277#ifdef XML_MIN_SIZE
Haibo Huang40a71912019-10-11 11:13:39 -0700278# define CHAR_MATCHES(enc, p, c) \
279 (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800280static int PTRCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700281sb_charMatches(const ENCODING *enc, const char *p, int c) {
282 UNUSED_P(enc);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800283 return *p == c;
284}
285#else
286/* c is an ASCII character */
Haibo Huang40a71912019-10-11 11:13:39 -0700287# define CHAR_MATCHES(enc, p, c) (*(p) == c)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800288#endif
289
Haibo Huang40a71912019-10-11 11:13:39 -0700290#define PREFIX(ident) normal_##ident
Elliott Hughesd07d5a72009-09-25 16:04:37 -0700291#define XML_TOK_IMPL_C
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800292#include "xmltok_impl.c"
Elliott Hughesd07d5a72009-09-25 16:04:37 -0700293#undef XML_TOK_IMPL_C
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800294
295#undef MINBPC
296#undef BYTE_TYPE
297#undef BYTE_TO_ASCII
298#undef CHAR_MATCHES
299#undef IS_NAME_CHAR
300#undef IS_NAME_CHAR_MINBPC
301#undef IS_NMSTRT_CHAR
302#undef IS_NMSTRT_CHAR_MINBPC
303#undef IS_INVALID_CHAR
304
Haibo Huang40a71912019-10-11 11:13:39 -0700305enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
306 UTF8_cval1 = 0x00,
307 UTF8_cval2 = 0xc0,
308 UTF8_cval3 = 0xe0,
309 UTF8_cval4 = 0xf0
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800310};
311
Paul Duffinba34a0c2017-02-27 14:40:16 +0000312void
Haibo Huang40a71912019-10-11 11:13:39 -0700313_INTERNAL_trim_to_complete_utf8_characters(const char *from,
314 const char **fromLimRef) {
315 const char *fromLim = *fromLimRef;
Paul Duffinba34a0c2017-02-27 14:40:16 +0000316 size_t walked = 0;
317 for (; fromLim > from; fromLim--, walked++) {
318 const unsigned char prev = (unsigned char)fromLim[-1];
Haibo Huang40a71912019-10-11 11:13:39 -0700319 if ((prev & 0xf8u)
320 == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
Paul Duffinba34a0c2017-02-27 14:40:16 +0000321 if (walked + 1 >= 4) {
322 fromLim += 4 - 1;
323 break;
324 } else {
325 walked = 0;
326 }
Haibo Huang40a71912019-10-11 11:13:39 -0700327 } else if ((prev & 0xf0u)
328 == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
Paul Duffinba34a0c2017-02-27 14:40:16 +0000329 if (walked + 1 >= 3) {
330 fromLim += 3 - 1;
331 break;
332 } else {
333 walked = 0;
334 }
Haibo Huang40a71912019-10-11 11:13:39 -0700335 } else if ((prev & 0xe0u)
336 == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
Paul Duffinba34a0c2017-02-27 14:40:16 +0000337 if (walked + 1 >= 2) {
338 fromLim += 2 - 1;
339 break;
340 } else {
341 walked = 0;
342 }
Haibo Huang40a71912019-10-11 11:13:39 -0700343 } else if ((prev & 0x80u)
344 == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
Paul Duffinba34a0c2017-02-27 14:40:16 +0000345 break;
346 }
347 }
348 *fromLimRef = fromLim;
349}
350
Paul Duffin7b64b722016-05-13 12:35:25 +0100351static enum XML_Convert_Result PTRCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700352utf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
353 char **toP, const char *toLim) {
Elliott Hughes72472942018-01-10 08:36:10 -0800354 bool input_incomplete = false;
355 bool output_exhausted = false;
Paul Duffin7b64b722016-05-13 12:35:25 +0100356
Elliott Hughes72472942018-01-10 08:36:10 -0800357 /* Avoid copying partial characters (due to limited space). */
358 const ptrdiff_t bytesAvailable = fromLim - *fromP;
359 const ptrdiff_t bytesStorable = toLim - *toP;
Haibo Huang40a71912019-10-11 11:13:39 -0700360 UNUSED_P(enc);
Elliott Hughes72472942018-01-10 08:36:10 -0800361 if (bytesAvailable > bytesStorable) {
362 fromLim = *fromP + bytesStorable;
363 output_exhausted = true;
364 }
365
366 /* Avoid copying partial characters (from incomplete input). */
367 {
Haibo Huang40a71912019-10-11 11:13:39 -0700368 const char *const fromLimBefore = fromLim;
Elliott Hughes72472942018-01-10 08:36:10 -0800369 _INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim);
370 if (fromLim < fromLimBefore) {
371 input_incomplete = true;
372 }
373 }
374
375 {
376 const ptrdiff_t bytesToCopy = fromLim - *fromP;
377 memcpy(*toP, *fromP, bytesToCopy);
378 *fromP += bytesToCopy;
379 *toP += bytesToCopy;
380 }
381
Haibo Huang40a71912019-10-11 11:13:39 -0700382 if (output_exhausted) /* needs to go first */
Paul Duffin7b64b722016-05-13 12:35:25 +0100383 return XML_CONVERT_OUTPUT_EXHAUSTED;
Elliott Hughes72472942018-01-10 08:36:10 -0800384 else if (input_incomplete)
385 return XML_CONVERT_INPUT_INCOMPLETE;
Paul Duffin7b64b722016-05-13 12:35:25 +0100386 else
Elliott Hughes72472942018-01-10 08:36:10 -0800387 return XML_CONVERT_COMPLETED;
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800388}
389
Paul Duffin7b64b722016-05-13 12:35:25 +0100390static enum XML_Convert_Result PTRCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700391utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
392 unsigned short **toP, const unsigned short *toLim) {
Paul Duffin7b64b722016-05-13 12:35:25 +0100393 enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800394 unsigned short *to = *toP;
395 const char *from = *fromP;
Paul Duffin7b64b722016-05-13 12:35:25 +0100396 while (from < fromLim && to < toLim) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800397 switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
398 case BT_LEAD2:
Paul Duffin7b64b722016-05-13 12:35:25 +0100399 if (fromLim - from < 2) {
400 res = XML_CONVERT_INPUT_INCOMPLETE;
Elliott Hughes72472942018-01-10 08:36:10 -0800401 goto after;
Paul Duffin7b64b722016-05-13 12:35:25 +0100402 }
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800403 *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
404 from += 2;
405 break;
406 case BT_LEAD3:
Paul Duffin7b64b722016-05-13 12:35:25 +0100407 if (fromLim - from < 3) {
408 res = XML_CONVERT_INPUT_INCOMPLETE;
Elliott Hughes72472942018-01-10 08:36:10 -0800409 goto after;
Paul Duffin7b64b722016-05-13 12:35:25 +0100410 }
Haibo Huang40a71912019-10-11 11:13:39 -0700411 *to++ = (unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6)
412 | (from[2] & 0x3f));
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800413 from += 3;
414 break;
Haibo Huang40a71912019-10-11 11:13:39 -0700415 case BT_LEAD4: {
416 unsigned long n;
417 if (toLim - to < 2) {
418 res = XML_CONVERT_OUTPUT_EXHAUSTED;
419 goto after;
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800420 }
Haibo Huang40a71912019-10-11 11:13:39 -0700421 if (fromLim - from < 4) {
422 res = XML_CONVERT_INPUT_INCOMPLETE;
423 goto after;
424 }
425 n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
426 | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
427 n -= 0x10000;
428 to[0] = (unsigned short)((n >> 10) | 0xD800);
429 to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
430 to += 2;
431 from += 4;
432 } break;
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800433 default:
434 *to++ = *from++;
435 break;
436 }
437 }
Elliott Hughes72472942018-01-10 08:36:10 -0800438 if (from < fromLim)
439 res = XML_CONVERT_OUTPUT_EXHAUSTED;
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800440after:
441 *fromP = from;
442 *toP = to;
Paul Duffin7b64b722016-05-13 12:35:25 +0100443 return res;
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800444}
445
446#ifdef XML_NS
Haibo Huang40a71912019-10-11 11:13:39 -0700447static const struct normal_encoding utf8_encoding_ns
448 = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
449 {
450# include "asciitab.h"
451# include "utf8tab.h"
452 },
453 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800454#endif
455
Haibo Huang40a71912019-10-11 11:13:39 -0700456static const struct normal_encoding utf8_encoding
457 = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
458 {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800459#define BT_COLON BT_NMSTRT
460#include "asciitab.h"
461#undef BT_COLON
462#include "utf8tab.h"
Haibo Huang40a71912019-10-11 11:13:39 -0700463 },
464 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800465
466#ifdef XML_NS
467
Haibo Huang40a71912019-10-11 11:13:39 -0700468static const struct normal_encoding internal_utf8_encoding_ns
469 = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
470 {
471# include "iasciitab.h"
472# include "utf8tab.h"
473 },
474 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800475
476#endif
477
Haibo Huang40a71912019-10-11 11:13:39 -0700478static const struct normal_encoding internal_utf8_encoding
479 = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
480 {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800481#define BT_COLON BT_NMSTRT
482#include "iasciitab.h"
483#undef BT_COLON
484#include "utf8tab.h"
Haibo Huang40a71912019-10-11 11:13:39 -0700485 },
486 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800487
Paul Duffin7b64b722016-05-13 12:35:25 +0100488static enum XML_Convert_Result PTRCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700489latin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
490 char **toP, const char *toLim) {
491 UNUSED_P(enc);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800492 for (;;) {
493 unsigned char c;
494 if (*fromP == fromLim)
Paul Duffin7b64b722016-05-13 12:35:25 +0100495 return XML_CONVERT_COMPLETED;
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800496 c = (unsigned char)**fromP;
497 if (c & 0x80) {
498 if (toLim - *toP < 2)
Paul Duffin7b64b722016-05-13 12:35:25 +0100499 return XML_CONVERT_OUTPUT_EXHAUSTED;
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800500 *(*toP)++ = (char)((c >> 6) | UTF8_cval2);
501 *(*toP)++ = (char)((c & 0x3f) | 0x80);
502 (*fromP)++;
Haibo Huang40a71912019-10-11 11:13:39 -0700503 } else {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800504 if (*toP == toLim)
Paul Duffin7b64b722016-05-13 12:35:25 +0100505 return XML_CONVERT_OUTPUT_EXHAUSTED;
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800506 *(*toP)++ = *(*fromP)++;
507 }
508 }
509}
510
Paul Duffin7b64b722016-05-13 12:35:25 +0100511static enum XML_Convert_Result PTRCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700512latin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
513 unsigned short **toP, const unsigned short *toLim) {
514 UNUSED_P(enc);
Paul Duffin7b64b722016-05-13 12:35:25 +0100515 while (*fromP < fromLim && *toP < toLim)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800516 *(*toP)++ = (unsigned char)*(*fromP)++;
Paul Duffin7b64b722016-05-13 12:35:25 +0100517
518 if ((*toP == toLim) && (*fromP < fromLim))
519 return XML_CONVERT_OUTPUT_EXHAUSTED;
520 else
521 return XML_CONVERT_COMPLETED;
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800522}
523
524#ifdef XML_NS
525
Haibo Huang40a71912019-10-11 11:13:39 -0700526static const struct normal_encoding latin1_encoding_ns
527 = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0},
528 {
529# include "asciitab.h"
530# include "latin1tab.h"
531 },
532 STANDARD_VTABLE(sb_) NULL_VTABLE};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800533
534#endif
535
Haibo Huang40a71912019-10-11 11:13:39 -0700536static const struct normal_encoding latin1_encoding
537 = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0},
538 {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800539#define BT_COLON BT_NMSTRT
540#include "asciitab.h"
541#undef BT_COLON
542#include "latin1tab.h"
Haibo Huang40a71912019-10-11 11:13:39 -0700543 },
544 STANDARD_VTABLE(sb_) NULL_VTABLE};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800545
Paul Duffin7b64b722016-05-13 12:35:25 +0100546static enum XML_Convert_Result PTRCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700547ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
548 char **toP, const char *toLim) {
549 UNUSED_P(enc);
Paul Duffin7b64b722016-05-13 12:35:25 +0100550 while (*fromP < fromLim && *toP < toLim)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800551 *(*toP)++ = *(*fromP)++;
Paul Duffin7b64b722016-05-13 12:35:25 +0100552
553 if ((*toP == toLim) && (*fromP < fromLim))
554 return XML_CONVERT_OUTPUT_EXHAUSTED;
555 else
556 return XML_CONVERT_COMPLETED;
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800557}
558
559#ifdef XML_NS
560
Haibo Huang40a71912019-10-11 11:13:39 -0700561static const struct normal_encoding ascii_encoding_ns
562 = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0},
563 {
564# include "asciitab.h"
565 /* BT_NONXML == 0 */
566 },
567 STANDARD_VTABLE(sb_) NULL_VTABLE};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800568
569#endif
570
Haibo Huang40a71912019-10-11 11:13:39 -0700571static const struct normal_encoding ascii_encoding
572 = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0},
573 {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800574#define BT_COLON BT_NMSTRT
575#include "asciitab.h"
576#undef BT_COLON
Haibo Huang40a71912019-10-11 11:13:39 -0700577 /* BT_NONXML == 0 */
578 },
579 STANDARD_VTABLE(sb_) NULL_VTABLE};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800580
581static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700582unicode_byte_type(char hi, char lo) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800583 switch ((unsigned char)hi) {
Haibo Huangd1a324a2020-10-28 22:19:36 -0700584 /* 0xD800-0xDBFF first 16-bit code unit or high surrogate (W1) */
Haibo Huang40a71912019-10-11 11:13:39 -0700585 case 0xD8:
586 case 0xD9:
587 case 0xDA:
588 case 0xDB:
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800589 return BT_LEAD4;
Haibo Huangd1a324a2020-10-28 22:19:36 -0700590 /* 0xDC00-0xDFFF second 16-bit code unit or low surrogate (W2) */
Haibo Huang40a71912019-10-11 11:13:39 -0700591 case 0xDC:
592 case 0xDD:
593 case 0xDE:
594 case 0xDF:
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800595 return BT_TRAIL;
596 case 0xFF:
597 switch ((unsigned char)lo) {
Haibo Huang40a71912019-10-11 11:13:39 -0700598 case 0xFF: /* noncharacter-FFFF */
599 case 0xFE: /* noncharacter-FFFE */
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800600 return BT_NONXML;
601 }
602 break;
603 }
604 return BT_NONASCII;
605}
606
Haibo Huang40a71912019-10-11 11:13:39 -0700607#define DEFINE_UTF16_TO_UTF8(E) \
608 static enum XML_Convert_Result PTRCALL E##toUtf8( \
609 const ENCODING *enc, const char **fromP, const char *fromLim, \
610 char **toP, const char *toLim) { \
611 const char *from = *fromP; \
612 UNUSED_P(enc); \
613 fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \
614 for (; from < fromLim; from += 2) { \
615 int plane; \
616 unsigned char lo2; \
617 unsigned char lo = GET_LO(from); \
618 unsigned char hi = GET_HI(from); \
619 switch (hi) { \
620 case 0: \
621 if (lo < 0x80) { \
622 if (*toP == toLim) { \
623 *fromP = from; \
624 return XML_CONVERT_OUTPUT_EXHAUSTED; \
625 } \
626 *(*toP)++ = lo; \
627 break; \
628 } \
629 /* fall through */ \
630 case 0x1: \
631 case 0x2: \
632 case 0x3: \
633 case 0x4: \
634 case 0x5: \
635 case 0x6: \
636 case 0x7: \
637 if (toLim - *toP < 2) { \
638 *fromP = from; \
639 return XML_CONVERT_OUTPUT_EXHAUSTED; \
640 } \
641 *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
642 *(*toP)++ = ((lo & 0x3f) | 0x80); \
643 break; \
644 default: \
645 if (toLim - *toP < 3) { \
646 *fromP = from; \
647 return XML_CONVERT_OUTPUT_EXHAUSTED; \
648 } \
649 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
650 *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
651 *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
652 *(*toP)++ = ((lo & 0x3f) | 0x80); \
653 break; \
654 case 0xD8: \
655 case 0xD9: \
656 case 0xDA: \
657 case 0xDB: \
658 if (toLim - *toP < 4) { \
659 *fromP = from; \
660 return XML_CONVERT_OUTPUT_EXHAUSTED; \
661 } \
662 if (fromLim - from < 4) { \
663 *fromP = from; \
664 return XML_CONVERT_INPUT_INCOMPLETE; \
665 } \
666 plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
667 *(*toP)++ = (char)((plane >> 2) | UTF8_cval4); \
668 *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
669 from += 2; \
670 lo2 = GET_LO(from); \
671 *(*toP)++ = (((lo & 0x3) << 4) | ((GET_HI(from) & 0x3) << 2) \
672 | (lo2 >> 6) | 0x80); \
673 *(*toP)++ = ((lo2 & 0x3f) | 0x80); \
674 break; \
675 } \
676 } \
677 *fromP = from; \
678 if (from < fromLim) \
679 return XML_CONVERT_INPUT_INCOMPLETE; \
680 else \
681 return XML_CONVERT_COMPLETED; \
682 }
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800683
Haibo Huang40a71912019-10-11 11:13:39 -0700684#define DEFINE_UTF16_TO_UTF16(E) \
685 static enum XML_Convert_Result PTRCALL E##toUtf16( \
686 const ENCODING *enc, const char **fromP, const char *fromLim, \
687 unsigned short **toP, const unsigned short *toLim) { \
688 enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \
689 UNUSED_P(enc); \
690 fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \
691 /* Avoid copying first half only of surrogate */ \
692 if (fromLim - *fromP > ((toLim - *toP) << 1) \
693 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
694 fromLim -= 2; \
695 res = XML_CONVERT_INPUT_INCOMPLETE; \
696 } \
697 for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \
698 *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
699 if ((*toP == toLim) && (*fromP < fromLim)) \
700 return XML_CONVERT_OUTPUT_EXHAUSTED; \
701 else \
702 return res; \
703 }
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800704
Haibo Huang40a71912019-10-11 11:13:39 -0700705#define SET2(ptr, ch) (((ptr)[0] = ((ch)&0xff)), ((ptr)[1] = ((ch) >> 8)))
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800706#define GET_LO(ptr) ((unsigned char)(ptr)[0])
707#define GET_HI(ptr) ((unsigned char)(ptr)[1])
708
709DEFINE_UTF16_TO_UTF8(little2_)
710DEFINE_UTF16_TO_UTF16(little2_)
711
712#undef SET2
713#undef GET_LO
714#undef GET_HI
715
Haibo Huang40a71912019-10-11 11:13:39 -0700716#define SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch)&0xFF)))
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800717#define GET_LO(ptr) ((unsigned char)(ptr)[1])
718#define GET_HI(ptr) ((unsigned char)(ptr)[0])
719
720DEFINE_UTF16_TO_UTF8(big2_)
721DEFINE_UTF16_TO_UTF16(big2_)
722
723#undef SET2
724#undef GET_LO
725#undef GET_HI
726
Haibo Huang40a71912019-10-11 11:13:39 -0700727#define LITTLE2_BYTE_TYPE(enc, p) \
728 ((p)[1] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
729 : unicode_byte_type((p)[1], (p)[0]))
730#define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1)
731#define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == c)
732#define LITTLE2_IS_NAME_CHAR_MINBPC(p) \
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800733 UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
Haibo Huang40a71912019-10-11 11:13:39 -0700734#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(p) \
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800735 UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
736
737#ifdef XML_MIN_SIZE
738
739static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700740little2_byteType(const ENCODING *enc, const char *p) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800741 return LITTLE2_BYTE_TYPE(enc, p);
742}
743
744static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700745little2_byteToAscii(const ENCODING *enc, const char *p) {
746 UNUSED_P(enc);
747 return LITTLE2_BYTE_TO_ASCII(p);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800748}
749
750static int PTRCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700751little2_charMatches(const ENCODING *enc, const char *p, int c) {
752 UNUSED_P(enc);
753 return LITTLE2_CHAR_MATCHES(p, c);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800754}
755
756static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700757little2_isNameMin(const ENCODING *enc, const char *p) {
758 UNUSED_P(enc);
759 return LITTLE2_IS_NAME_CHAR_MINBPC(p);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800760}
761
762static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700763little2_isNmstrtMin(const ENCODING *enc, const char *p) {
764 UNUSED_P(enc);
765 return LITTLE2_IS_NMSTRT_CHAR_MINBPC(p);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800766}
767
Haibo Huang40a71912019-10-11 11:13:39 -0700768# undef VTABLE
769# define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800770
771#else /* not XML_MIN_SIZE */
772
Haibo Huang40a71912019-10-11 11:13:39 -0700773# undef PREFIX
774# define PREFIX(ident) little2_##ident
775# define MINBPC(enc) 2
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800776/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
Haibo Huang40a71912019-10-11 11:13:39 -0700777# define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
778# define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(p)
779# define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(p, c)
780# define IS_NAME_CHAR(enc, p, n) 0
781# define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(p)
782# define IS_NMSTRT_CHAR(enc, p, n) (0)
783# define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(p)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800784
Haibo Huang40a71912019-10-11 11:13:39 -0700785# define XML_TOK_IMPL_C
786# include "xmltok_impl.c"
787# undef XML_TOK_IMPL_C
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800788
Haibo Huang40a71912019-10-11 11:13:39 -0700789# undef MINBPC
790# undef BYTE_TYPE
791# undef BYTE_TO_ASCII
792# undef CHAR_MATCHES
793# undef IS_NAME_CHAR
794# undef IS_NAME_CHAR_MINBPC
795# undef IS_NMSTRT_CHAR
796# undef IS_NMSTRT_CHAR_MINBPC
797# undef IS_INVALID_CHAR
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800798
799#endif /* not XML_MIN_SIZE */
800
801#ifdef XML_NS
802
Haibo Huang40a71912019-10-11 11:13:39 -0700803static const struct normal_encoding little2_encoding_ns
804 = {{VTABLE, 2, 0,
805# if BYTEORDER == 1234
806 1
807# else
808 0
809# endif
810 },
811 {
812# include "asciitab.h"
813# include "latin1tab.h"
814 },
815 STANDARD_VTABLE(little2_) NULL_VTABLE};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800816
817#endif
818
Haibo Huang40a71912019-10-11 11:13:39 -0700819static const struct normal_encoding little2_encoding
820 = {{VTABLE, 2, 0,
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800821#if BYTEORDER == 1234
Haibo Huang40a71912019-10-11 11:13:39 -0700822 1
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800823#else
Haibo Huang40a71912019-10-11 11:13:39 -0700824 0
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800825#endif
Haibo Huang40a71912019-10-11 11:13:39 -0700826 },
827 {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800828#define BT_COLON BT_NMSTRT
829#include "asciitab.h"
830#undef BT_COLON
831#include "latin1tab.h"
Haibo Huang40a71912019-10-11 11:13:39 -0700832 },
833 STANDARD_VTABLE(little2_) NULL_VTABLE};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800834
835#if BYTEORDER != 4321
836
Haibo Huang40a71912019-10-11 11:13:39 -0700837# ifdef XML_NS
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800838
Haibo Huang40a71912019-10-11 11:13:39 -0700839static const struct normal_encoding internal_little2_encoding_ns
840 = {{VTABLE, 2, 0, 1},
841 {
842# include "iasciitab.h"
843# include "latin1tab.h"
844 },
845 STANDARD_VTABLE(little2_) NULL_VTABLE};
846
847# endif
848
849static const struct normal_encoding internal_little2_encoding
850 = {{VTABLE, 2, 0, 1},
851 {
852# define BT_COLON BT_NMSTRT
853# include "iasciitab.h"
854# undef BT_COLON
855# include "latin1tab.h"
856 },
857 STANDARD_VTABLE(little2_) NULL_VTABLE};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800858
859#endif
860
Haibo Huang40a71912019-10-11 11:13:39 -0700861#define BIG2_BYTE_TYPE(enc, p) \
862 ((p)[0] == 0 \
863 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
864 : unicode_byte_type((p)[0], (p)[1]))
865#define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1)
866#define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == c)
867#define BIG2_IS_NAME_CHAR_MINBPC(p) \
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800868 UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
Haibo Huang40a71912019-10-11 11:13:39 -0700869#define BIG2_IS_NMSTRT_CHAR_MINBPC(p) \
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800870 UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
871
872#ifdef XML_MIN_SIZE
873
874static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700875big2_byteType(const ENCODING *enc, const char *p) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800876 return BIG2_BYTE_TYPE(enc, p);
877}
878
879static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700880big2_byteToAscii(const ENCODING *enc, const char *p) {
881 UNUSED_P(enc);
882 return BIG2_BYTE_TO_ASCII(p);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800883}
884
885static int PTRCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700886big2_charMatches(const ENCODING *enc, const char *p, int c) {
887 UNUSED_P(enc);
888 return BIG2_CHAR_MATCHES(p, c);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800889}
890
891static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700892big2_isNameMin(const ENCODING *enc, const char *p) {
893 UNUSED_P(enc);
894 return BIG2_IS_NAME_CHAR_MINBPC(p);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800895}
896
897static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700898big2_isNmstrtMin(const ENCODING *enc, const char *p) {
899 UNUSED_P(enc);
900 return BIG2_IS_NMSTRT_CHAR_MINBPC(p);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800901}
902
Haibo Huang40a71912019-10-11 11:13:39 -0700903# undef VTABLE
904# define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800905
906#else /* not XML_MIN_SIZE */
907
Haibo Huang40a71912019-10-11 11:13:39 -0700908# undef PREFIX
909# define PREFIX(ident) big2_##ident
910# define MINBPC(enc) 2
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800911/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
Haibo Huang40a71912019-10-11 11:13:39 -0700912# define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
913# define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(p)
914# define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(p, c)
915# define IS_NAME_CHAR(enc, p, n) 0
916# define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(p)
917# define IS_NMSTRT_CHAR(enc, p, n) (0)
918# define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(p)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800919
Haibo Huang40a71912019-10-11 11:13:39 -0700920# define XML_TOK_IMPL_C
921# include "xmltok_impl.c"
922# undef XML_TOK_IMPL_C
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800923
Haibo Huang40a71912019-10-11 11:13:39 -0700924# undef MINBPC
925# undef BYTE_TYPE
926# undef BYTE_TO_ASCII
927# undef CHAR_MATCHES
928# undef IS_NAME_CHAR
929# undef IS_NAME_CHAR_MINBPC
930# undef IS_NMSTRT_CHAR
931# undef IS_NMSTRT_CHAR_MINBPC
932# undef IS_INVALID_CHAR
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800933
934#endif /* not XML_MIN_SIZE */
935
936#ifdef XML_NS
937
Haibo Huang40a71912019-10-11 11:13:39 -0700938static const struct normal_encoding big2_encoding_ns
939 = {{VTABLE, 2, 0,
940# if BYTEORDER == 4321
941 1
942# else
943 0
944# endif
945 },
946 {
947# include "asciitab.h"
948# include "latin1tab.h"
949 },
950 STANDARD_VTABLE(big2_) NULL_VTABLE};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800951
952#endif
953
Haibo Huang40a71912019-10-11 11:13:39 -0700954static const struct normal_encoding big2_encoding
955 = {{VTABLE, 2, 0,
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800956#if BYTEORDER == 4321
Haibo Huang40a71912019-10-11 11:13:39 -0700957 1
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800958#else
Haibo Huang40a71912019-10-11 11:13:39 -0700959 0
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800960#endif
Haibo Huang40a71912019-10-11 11:13:39 -0700961 },
962 {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800963#define BT_COLON BT_NMSTRT
964#include "asciitab.h"
965#undef BT_COLON
966#include "latin1tab.h"
Haibo Huang40a71912019-10-11 11:13:39 -0700967 },
968 STANDARD_VTABLE(big2_) NULL_VTABLE};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800969
970#if BYTEORDER != 1234
971
Haibo Huang40a71912019-10-11 11:13:39 -0700972# ifdef XML_NS
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800973
Haibo Huang40a71912019-10-11 11:13:39 -0700974static const struct normal_encoding internal_big2_encoding_ns
975 = {{VTABLE, 2, 0, 1},
976 {
977# include "iasciitab.h"
978# include "latin1tab.h"
979 },
980 STANDARD_VTABLE(big2_) NULL_VTABLE};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800981
Haibo Huang40a71912019-10-11 11:13:39 -0700982# endif
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800983
Haibo Huang40a71912019-10-11 11:13:39 -0700984static const struct normal_encoding internal_big2_encoding
985 = {{VTABLE, 2, 0, 1},
986 {
987# define BT_COLON BT_NMSTRT
988# include "iasciitab.h"
989# undef BT_COLON
990# include "latin1tab.h"
991 },
992 STANDARD_VTABLE(big2_) NULL_VTABLE};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -0800993
994#endif
995
996#undef PREFIX
997
998static int FASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -0700999streqci(const char *s1, const char *s2) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001000 for (;;) {
1001 char c1 = *s1++;
1002 char c2 = *s2++;
1003 if (ASCII_a <= c1 && c1 <= ASCII_z)
1004 c1 += ASCII_A - ASCII_a;
1005 if (ASCII_a <= c2 && c2 <= ASCII_z)
Elliott Hughes72472942018-01-10 08:36:10 -08001006 /* The following line will never get executed. streqci() is
1007 * only called from two places, both of which guarantee to put
1008 * upper-case strings into s2.
1009 */
1010 c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001011 if (c1 != c2)
1012 return 0;
Haibo Huang40a71912019-10-11 11:13:39 -07001013 if (! c1)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001014 break;
1015 }
1016 return 1;
1017}
1018
1019static void PTRCALL
Haibo Huang40a71912019-10-11 11:13:39 -07001020initUpdatePosition(const ENCODING *enc, const char *ptr, const char *end,
1021 POSITION *pos) {
1022 UNUSED_P(enc);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001023 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
1024}
1025
1026static int
Haibo Huang40a71912019-10-11 11:13:39 -07001027toAscii(const ENCODING *enc, const char *ptr, const char *end) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001028 char buf[1];
1029 char *p = buf;
1030 XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
1031 if (p == buf)
1032 return -1;
1033 else
1034 return buf[0];
1035}
1036
1037static int FASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -07001038isSpace(int c) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001039 switch (c) {
1040 case 0x20:
1041 case 0xD:
1042 case 0xA:
1043 case 0x9:
1044 return 1;
1045 }
1046 return 0;
1047}
1048
1049/* Return 1 if there's just optional white space or there's an S
1050 followed by name=val.
1051*/
1052static int
Haibo Huang40a71912019-10-11 11:13:39 -07001053parsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end,
1054 const char **namePtr, const char **nameEndPtr,
1055 const char **valPtr, const char **nextTokPtr) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001056 int c;
1057 char open;
1058 if (ptr == end) {
1059 *namePtr = NULL;
1060 return 1;
1061 }
Haibo Huang40a71912019-10-11 11:13:39 -07001062 if (! isSpace(toAscii(enc, ptr, end))) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001063 *nextTokPtr = ptr;
1064 return 0;
1065 }
1066 do {
1067 ptr += enc->minBytesPerChar;
1068 } while (isSpace(toAscii(enc, ptr, end)));
1069 if (ptr == end) {
1070 *namePtr = NULL;
1071 return 1;
1072 }
1073 *namePtr = ptr;
1074 for (;;) {
1075 c = toAscii(enc, ptr, end);
1076 if (c == -1) {
1077 *nextTokPtr = ptr;
1078 return 0;
1079 }
1080 if (c == ASCII_EQUALS) {
1081 *nameEndPtr = ptr;
1082 break;
1083 }
1084 if (isSpace(c)) {
1085 *nameEndPtr = ptr;
1086 do {
1087 ptr += enc->minBytesPerChar;
1088 } while (isSpace(c = toAscii(enc, ptr, end)));
1089 if (c != ASCII_EQUALS) {
1090 *nextTokPtr = ptr;
1091 return 0;
1092 }
1093 break;
1094 }
1095 ptr += enc->minBytesPerChar;
1096 }
1097 if (ptr == *namePtr) {
1098 *nextTokPtr = ptr;
1099 return 0;
1100 }
1101 ptr += enc->minBytesPerChar;
1102 c = toAscii(enc, ptr, end);
1103 while (isSpace(c)) {
1104 ptr += enc->minBytesPerChar;
1105 c = toAscii(enc, ptr, end);
1106 }
1107 if (c != ASCII_QUOT && c != ASCII_APOS) {
1108 *nextTokPtr = ptr;
1109 return 0;
1110 }
1111 open = (char)c;
1112 ptr += enc->minBytesPerChar;
1113 *valPtr = ptr;
1114 for (;; ptr += enc->minBytesPerChar) {
1115 c = toAscii(enc, ptr, end);
1116 if (c == open)
1117 break;
Haibo Huang40a71912019-10-11 11:13:39 -07001118 if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)
1119 && ! (ASCII_0 <= c && c <= ASCII_9) && c != ASCII_PERIOD
1120 && c != ASCII_MINUS && c != ASCII_UNDERSCORE) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001121 *nextTokPtr = ptr;
1122 return 0;
1123 }
1124 }
1125 *nextTokPtr = ptr + enc->minBytesPerChar;
1126 return 1;
1127}
1128
Haibo Huang40a71912019-10-11 11:13:39 -07001129static const char KW_version[]
1130 = {ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001131
Haibo Huang40a71912019-10-11 11:13:39 -07001132static const char KW_encoding[] = {ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d,
1133 ASCII_i, ASCII_n, ASCII_g, '\0'};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001134
Haibo Huang40a71912019-10-11 11:13:39 -07001135static const char KW_standalone[]
1136 = {ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a,
1137 ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0'};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001138
Haibo Huang40a71912019-10-11 11:13:39 -07001139static const char KW_yes[] = {ASCII_y, ASCII_e, ASCII_s, '\0'};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001140
Haibo Huang40a71912019-10-11 11:13:39 -07001141static const char KW_no[] = {ASCII_n, ASCII_o, '\0'};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001142
1143static int
Haibo Huang40a71912019-10-11 11:13:39 -07001144doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *,
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001145 const char *),
Haibo Huang40a71912019-10-11 11:13:39 -07001146 int isGeneralTextEntity, const ENCODING *enc, const char *ptr,
1147 const char *end, const char **badPtr, const char **versionPtr,
1148 const char **versionEndPtr, const char **encodingName,
1149 const ENCODING **encoding, int *standalone) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001150 const char *val = NULL;
1151 const char *name = NULL;
1152 const char *nameEnd = NULL;
1153 ptr += 5 * enc->minBytesPerChar;
1154 end -= 2 * enc->minBytesPerChar;
Haibo Huang40a71912019-10-11 11:13:39 -07001155 if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)
1156 || ! name) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001157 *badPtr = ptr;
1158 return 0;
1159 }
Haibo Huang40a71912019-10-11 11:13:39 -07001160 if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) {
1161 if (! isGeneralTextEntity) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001162 *badPtr = name;
1163 return 0;
1164 }
Haibo Huang40a71912019-10-11 11:13:39 -07001165 } else {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001166 if (versionPtr)
1167 *versionPtr = val;
1168 if (versionEndPtr)
1169 *versionEndPtr = ptr;
Haibo Huang40a71912019-10-11 11:13:39 -07001170 if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001171 *badPtr = ptr;
1172 return 0;
1173 }
Haibo Huang40a71912019-10-11 11:13:39 -07001174 if (! name) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001175 if (isGeneralTextEntity) {
1176 /* a TextDecl must have an EncodingDecl */
1177 *badPtr = ptr;
1178 return 0;
1179 }
1180 return 1;
1181 }
1182 }
1183 if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) {
1184 int c = toAscii(enc, val, end);
Haibo Huang40a71912019-10-11 11:13:39 -07001185 if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001186 *badPtr = val;
1187 return 0;
1188 }
1189 if (encodingName)
1190 *encodingName = val;
1191 if (encoding)
1192 *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
Haibo Huang40a71912019-10-11 11:13:39 -07001193 if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001194 *badPtr = ptr;
1195 return 0;
1196 }
Haibo Huang40a71912019-10-11 11:13:39 -07001197 if (! name)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001198 return 1;
1199 }
Haibo Huang40a71912019-10-11 11:13:39 -07001200 if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001201 || isGeneralTextEntity) {
1202 *badPtr = name;
1203 return 0;
1204 }
1205 if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) {
1206 if (standalone)
1207 *standalone = 1;
Haibo Huang40a71912019-10-11 11:13:39 -07001208 } else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001209 if (standalone)
1210 *standalone = 0;
Haibo Huang40a71912019-10-11 11:13:39 -07001211 } else {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001212 *badPtr = val;
1213 return 0;
1214 }
1215 while (isSpace(toAscii(enc, ptr, end)))
1216 ptr += enc->minBytesPerChar;
1217 if (ptr != end) {
1218 *badPtr = ptr;
1219 return 0;
1220 }
1221 return 1;
1222}
1223
1224static int FASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -07001225checkCharRefNumber(int result) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001226 switch (result >> 8) {
Haibo Huang40a71912019-10-11 11:13:39 -07001227 case 0xD8:
1228 case 0xD9:
1229 case 0xDA:
1230 case 0xDB:
1231 case 0xDC:
1232 case 0xDD:
1233 case 0xDE:
1234 case 0xDF:
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001235 return -1;
1236 case 0:
1237 if (latin1_encoding.type[result] == BT_NONXML)
1238 return -1;
1239 break;
1240 case 0xFF:
1241 if (result == 0xFFFE || result == 0xFFFF)
1242 return -1;
1243 break;
1244 }
1245 return result;
1246}
1247
1248int FASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -07001249XmlUtf8Encode(int c, char *buf) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001250 enum {
1251 /* minN is minimum legal resulting value for N byte sequence */
1252 min2 = 0x80,
1253 min3 = 0x800,
1254 min4 = 0x10000
1255 };
1256
1257 if (c < 0)
Elliott Hughes72472942018-01-10 08:36:10 -08001258 return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001259 if (c < min2) {
1260 buf[0] = (char)(c | UTF8_cval1);
1261 return 1;
1262 }
1263 if (c < min3) {
1264 buf[0] = (char)((c >> 6) | UTF8_cval2);
1265 buf[1] = (char)((c & 0x3f) | 0x80);
1266 return 2;
1267 }
1268 if (c < min4) {
1269 buf[0] = (char)((c >> 12) | UTF8_cval3);
1270 buf[1] = (char)(((c >> 6) & 0x3f) | 0x80);
1271 buf[2] = (char)((c & 0x3f) | 0x80);
1272 return 3;
1273 }
1274 if (c < 0x110000) {
1275 buf[0] = (char)((c >> 18) | UTF8_cval4);
1276 buf[1] = (char)(((c >> 12) & 0x3f) | 0x80);
1277 buf[2] = (char)(((c >> 6) & 0x3f) | 0x80);
1278 buf[3] = (char)((c & 0x3f) | 0x80);
1279 return 4;
1280 }
Elliott Hughes72472942018-01-10 08:36:10 -08001281 return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001282}
1283
1284int FASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -07001285XmlUtf16Encode(int charNum, unsigned short *buf) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001286 if (charNum < 0)
1287 return 0;
1288 if (charNum < 0x10000) {
1289 buf[0] = (unsigned short)charNum;
1290 return 1;
1291 }
1292 if (charNum < 0x110000) {
1293 charNum -= 0x10000;
1294 buf[0] = (unsigned short)((charNum >> 10) + 0xD800);
1295 buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00);
1296 return 2;
1297 }
1298 return 0;
1299}
1300
1301struct unknown_encoding {
1302 struct normal_encoding normal;
1303 CONVERTER convert;
1304 void *userData;
1305 unsigned short utf16[256];
1306 char utf8[256][4];
1307};
1308
Haibo Huang40a71912019-10-11 11:13:39 -07001309#define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *)(enc))
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001310
1311int
Haibo Huang40a71912019-10-11 11:13:39 -07001312XmlSizeOfUnknownEncoding(void) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001313 return sizeof(struct unknown_encoding);
1314}
1315
1316static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -07001317unknown_isName(const ENCODING *enc, const char *p) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001318 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1319 int c = uenc->convert(uenc->userData, p);
1320 if (c & ~0xFFFF)
1321 return 0;
1322 return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
1323}
1324
1325static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -07001326unknown_isNmstrt(const ENCODING *enc, const char *p) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001327 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1328 int c = uenc->convert(uenc->userData, p);
1329 if (c & ~0xFFFF)
1330 return 0;
1331 return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
1332}
1333
1334static int PTRFASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -07001335unknown_isInvalid(const ENCODING *enc, const char *p) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001336 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1337 int c = uenc->convert(uenc->userData, p);
1338 return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
1339}
1340
Paul Duffin7b64b722016-05-13 12:35:25 +01001341static enum XML_Convert_Result PTRCALL
Haibo Huang40a71912019-10-11 11:13:39 -07001342unknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
1343 char **toP, const char *toLim) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001344 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1345 char buf[XML_UTF8_ENCODE_MAX];
1346 for (;;) {
1347 const char *utf8;
1348 int n;
1349 if (*fromP == fromLim)
Paul Duffin7b64b722016-05-13 12:35:25 +01001350 return XML_CONVERT_COMPLETED;
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001351 utf8 = uenc->utf8[(unsigned char)**fromP];
1352 n = *utf8++;
1353 if (n == 0) {
1354 int c = uenc->convert(uenc->userData, *fromP);
1355 n = XmlUtf8Encode(c, buf);
1356 if (n > toLim - *toP)
Paul Duffin7b64b722016-05-13 12:35:25 +01001357 return XML_CONVERT_OUTPUT_EXHAUSTED;
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001358 utf8 = buf;
1359 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1360 - (BT_LEAD2 - 2));
Haibo Huang40a71912019-10-11 11:13:39 -07001361 } else {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001362 if (n > toLim - *toP)
Paul Duffin7b64b722016-05-13 12:35:25 +01001363 return XML_CONVERT_OUTPUT_EXHAUSTED;
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001364 (*fromP)++;
1365 }
Elliott Hughes72472942018-01-10 08:36:10 -08001366 memcpy(*toP, utf8, n);
1367 *toP += n;
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001368 }
1369}
1370
Paul Duffin7b64b722016-05-13 12:35:25 +01001371static enum XML_Convert_Result PTRCALL
Haibo Huang40a71912019-10-11 11:13:39 -07001372unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
1373 unsigned short **toP, const unsigned short *toLim) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001374 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
Paul Duffin7b64b722016-05-13 12:35:25 +01001375 while (*fromP < fromLim && *toP < toLim) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001376 unsigned short c = uenc->utf16[(unsigned char)**fromP];
1377 if (c == 0) {
Haibo Huang40a71912019-10-11 11:13:39 -07001378 c = (unsigned short)uenc->convert(uenc->userData, *fromP);
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001379 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1380 - (BT_LEAD2 - 2));
Haibo Huang40a71912019-10-11 11:13:39 -07001381 } else
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001382 (*fromP)++;
1383 *(*toP)++ = c;
1384 }
Paul Duffin7b64b722016-05-13 12:35:25 +01001385
1386 if ((*toP == toLim) && (*fromP < fromLim))
1387 return XML_CONVERT_OUTPUT_EXHAUSTED;
1388 else
1389 return XML_CONVERT_COMPLETED;
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001390}
1391
1392ENCODING *
Haibo Huang40a71912019-10-11 11:13:39 -07001393XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert,
1394 void *userData) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001395 int i;
1396 struct unknown_encoding *e = (struct unknown_encoding *)mem;
Haibo Huang40a71912019-10-11 11:13:39 -07001397 memcpy(mem, &latin1_encoding, sizeof(struct normal_encoding));
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001398 for (i = 0; i < 128; i++)
1399 if (latin1_encoding.type[i] != BT_OTHER
Haibo Huang40a71912019-10-11 11:13:39 -07001400 && latin1_encoding.type[i] != BT_NONXML && table[i] != i)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001401 return 0;
1402 for (i = 0; i < 256; i++) {
1403 int c = table[i];
1404 if (c == -1) {
1405 e->normal.type[i] = BT_MALFORM;
1406 /* This shouldn't really get used. */
1407 e->utf16[i] = 0xFFFF;
1408 e->utf8[i][0] = 1;
1409 e->utf8[i][1] = 0;
Haibo Huang40a71912019-10-11 11:13:39 -07001410 } else if (c < 0) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001411 if (c < -4)
1412 return 0;
Elliott Hughes72472942018-01-10 08:36:10 -08001413 /* Multi-byte sequences need a converter function */
Haibo Huang40a71912019-10-11 11:13:39 -07001414 if (! convert)
Elliott Hughes72472942018-01-10 08:36:10 -08001415 return 0;
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001416 e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
1417 e->utf8[i][0] = 0;
1418 e->utf16[i] = 0;
Haibo Huang40a71912019-10-11 11:13:39 -07001419 } else if (c < 0x80) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001420 if (latin1_encoding.type[c] != BT_OTHER
Haibo Huang40a71912019-10-11 11:13:39 -07001421 && latin1_encoding.type[c] != BT_NONXML && c != i)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001422 return 0;
1423 e->normal.type[i] = latin1_encoding.type[c];
1424 e->utf8[i][0] = 1;
1425 e->utf8[i][1] = (char)c;
1426 e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c);
Haibo Huang40a71912019-10-11 11:13:39 -07001427 } else if (checkCharRefNumber(c) < 0) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001428 e->normal.type[i] = BT_NONXML;
1429 /* This shouldn't really get used. */
1430 e->utf16[i] = 0xFFFF;
1431 e->utf8[i][0] = 1;
1432 e->utf8[i][1] = 0;
Haibo Huang40a71912019-10-11 11:13:39 -07001433 } else {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001434 if (c > 0xFFFF)
1435 return 0;
1436 if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
1437 e->normal.type[i] = BT_NMSTRT;
1438 else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
1439 e->normal.type[i] = BT_NAME;
1440 else
1441 e->normal.type[i] = BT_OTHER;
1442 e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
1443 e->utf16[i] = (unsigned short)c;
1444 }
1445 }
1446 e->userData = userData;
1447 e->convert = convert;
1448 if (convert) {
1449 e->normal.isName2 = unknown_isName;
1450 e->normal.isName3 = unknown_isName;
1451 e->normal.isName4 = unknown_isName;
1452 e->normal.isNmstrt2 = unknown_isNmstrt;
1453 e->normal.isNmstrt3 = unknown_isNmstrt;
1454 e->normal.isNmstrt4 = unknown_isNmstrt;
1455 e->normal.isInvalid2 = unknown_isInvalid;
1456 e->normal.isInvalid3 = unknown_isInvalid;
1457 e->normal.isInvalid4 = unknown_isInvalid;
1458 }
1459 e->normal.enc.utf8Convert = unknown_toUtf8;
1460 e->normal.enc.utf16Convert = unknown_toUtf16;
1461 return &(e->normal.enc);
1462}
1463
1464/* If this enumeration is changed, getEncodingIndex and encodings
1465must also be changed. */
1466enum {
1467 UNKNOWN_ENC = -1,
1468 ISO_8859_1_ENC = 0,
1469 US_ASCII_ENC,
1470 UTF_8_ENC,
1471 UTF_16_ENC,
1472 UTF_16BE_ENC,
1473 UTF_16LE_ENC,
1474 /* must match encodingNames up to here */
1475 NO_ENC
1476};
1477
Haibo Huang40a71912019-10-11 11:13:39 -07001478static const char KW_ISO_8859_1[]
1479 = {ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8,
1480 ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1, '\0'};
1481static const char KW_US_ASCII[]
1482 = {ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S,
1483 ASCII_C, ASCII_I, ASCII_I, '\0'};
1484static const char KW_UTF_8[]
1485 = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'};
1486static const char KW_UTF_16[]
1487 = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'};
1488static const char KW_UTF_16BE[]
1489 = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1,
1490 ASCII_6, ASCII_B, ASCII_E, '\0'};
1491static const char KW_UTF_16LE[]
1492 = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1,
1493 ASCII_6, ASCII_L, ASCII_E, '\0'};
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001494
1495static int FASTCALL
Haibo Huang40a71912019-10-11 11:13:39 -07001496getEncodingIndex(const char *name) {
1497 static const char *const encodingNames[] = {
1498 KW_ISO_8859_1, KW_US_ASCII, KW_UTF_8, KW_UTF_16, KW_UTF_16BE, KW_UTF_16LE,
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001499 };
1500 int i;
1501 if (name == NULL)
1502 return NO_ENC;
Haibo Huang40a71912019-10-11 11:13:39 -07001503 for (i = 0; i < (int)(sizeof(encodingNames) / sizeof(encodingNames[0])); i++)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001504 if (streqci(name, encodingNames[i]))
1505 return i;
1506 return UNKNOWN_ENC;
1507}
1508
1509/* For binary compatibility, we store the index of the encoding
1510 specified at initialization in the isUtf16 member.
1511*/
1512
1513#define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
1514#define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
1515
1516/* This is what detects the encoding. encodingTable maps from
1517 encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of
1518 the external (protocol) specified encoding; state is
1519 XML_CONTENT_STATE if we're parsing an external text entity, and
1520 XML_PROLOG_STATE otherwise.
1521*/
1522
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001523static int
Haibo Huang40a71912019-10-11 11:13:39 -07001524initScan(const ENCODING *const *encodingTable, const INIT_ENCODING *enc,
1525 int state, const char *ptr, const char *end, const char **nextTokPtr) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001526 const ENCODING **encPtr;
1527
Paul Duffin7b64b722016-05-13 12:35:25 +01001528 if (ptr >= end)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001529 return XML_TOK_NONE;
1530 encPtr = enc->encPtr;
1531 if (ptr + 1 == end) {
1532 /* only a single byte available for auto-detection */
1533#ifndef XML_DTD /* FIXME */
1534 /* a well-formed document entity must have more than one byte */
1535 if (state != XML_CONTENT_STATE)
1536 return XML_TOK_PARTIAL;
1537#endif
1538 /* so we're parsing an external text entity... */
1539 /* if UTF-16 was externally specified, then we need at least 2 bytes */
1540 switch (INIT_ENC_INDEX(enc)) {
1541 case UTF_16_ENC:
1542 case UTF_16LE_ENC:
1543 case UTF_16BE_ENC:
1544 return XML_TOK_PARTIAL;
1545 }
1546 switch ((unsigned char)*ptr) {
1547 case 0xFE:
1548 case 0xFF:
1549 case 0xEF: /* possibly first byte of UTF-8 BOM */
Haibo Huang40a71912019-10-11 11:13:39 -07001550 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001551 break;
1552 /* fall through */
1553 case 0x00:
1554 case 0x3C:
1555 return XML_TOK_PARTIAL;
1556 }
Haibo Huang40a71912019-10-11 11:13:39 -07001557 } else {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001558 switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
1559 case 0xFEFF:
Haibo Huang40a71912019-10-11 11:13:39 -07001560 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001561 break;
1562 *nextTokPtr = ptr + 2;
1563 *encPtr = encodingTable[UTF_16BE_ENC];
1564 return XML_TOK_BOM;
1565 /* 00 3C is handled in the default case */
1566 case 0x3C00:
1567 if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
1568 || INIT_ENC_INDEX(enc) == UTF_16_ENC)
1569 && state == XML_CONTENT_STATE)
1570 break;
1571 *encPtr = encodingTable[UTF_16LE_ENC];
1572 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1573 case 0xFFFE:
Haibo Huang40a71912019-10-11 11:13:39 -07001574 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001575 break;
1576 *nextTokPtr = ptr + 2;
1577 *encPtr = encodingTable[UTF_16LE_ENC];
1578 return XML_TOK_BOM;
1579 case 0xEFBB:
1580 /* Maybe a UTF-8 BOM (EF BB BF) */
1581 /* If there's an explicitly specified (external) encoding
1582 of ISO-8859-1 or some flavour of UTF-16
1583 and this is an external text entity,
1584 don't look for the BOM,
1585 because it might be a legal data.
1586 */
1587 if (state == XML_CONTENT_STATE) {
1588 int e = INIT_ENC_INDEX(enc);
Haibo Huang40a71912019-10-11 11:13:39 -07001589 if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC
1590 || e == UTF_16_ENC)
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001591 break;
1592 }
1593 if (ptr + 2 == end)
1594 return XML_TOK_PARTIAL;
1595 if ((unsigned char)ptr[2] == 0xBF) {
1596 *nextTokPtr = ptr + 3;
1597 *encPtr = encodingTable[UTF_8_ENC];
1598 return XML_TOK_BOM;
1599 }
1600 break;
1601 default:
1602 if (ptr[0] == '\0') {
1603 /* 0 isn't a legal data character. Furthermore a document
1604 entity can only start with ASCII characters. So the only
1605 way this can fail to be big-endian UTF-16 if it it's an
1606 external parsed general entity that's labelled as
1607 UTF-16LE.
1608 */
1609 if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
1610 break;
1611 *encPtr = encodingTable[UTF_16BE_ENC];
1612 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
Haibo Huang40a71912019-10-11 11:13:39 -07001613 } else if (ptr[1] == '\0') {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001614 /* We could recover here in the case:
1615 - parsing an external entity
1616 - second byte is 0
1617 - no externally specified encoding
1618 - no encoding declaration
1619 by assuming UTF-16LE. But we don't, because this would mean when
1620 presented just with a single byte, we couldn't reliably determine
1621 whether we needed further bytes.
1622 */
1623 if (state == XML_CONTENT_STATE)
1624 break;
1625 *encPtr = encodingTable[UTF_16LE_ENC];
1626 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1627 }
1628 break;
1629 }
1630 }
1631 *encPtr = encodingTable[INIT_ENC_INDEX(enc)];
1632 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1633}
1634
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001635#define NS(x) x
1636#define ns(x) x
Elliott Hughesd07d5a72009-09-25 16:04:37 -07001637#define XML_TOK_NS_C
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001638#include "xmltok_ns.c"
Elliott Hughesd07d5a72009-09-25 16:04:37 -07001639#undef XML_TOK_NS_C
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001640#undef NS
1641#undef ns
1642
1643#ifdef XML_NS
1644
Haibo Huang40a71912019-10-11 11:13:39 -07001645# define NS(x) x##NS
1646# define ns(x) x##_ns
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001647
Haibo Huang40a71912019-10-11 11:13:39 -07001648# define XML_TOK_NS_C
1649# include "xmltok_ns.c"
1650# undef XML_TOK_NS_C
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001651
Haibo Huang40a71912019-10-11 11:13:39 -07001652# undef NS
1653# undef ns
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001654
1655ENCODING *
Haibo Huang40a71912019-10-11 11:13:39 -07001656XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert,
1657 void *userData) {
The Android Open Source Projectb80e2872009-03-03 19:29:30 -08001658 ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
1659 if (enc)
1660 ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;
1661 return enc;
1662}
1663
1664#endif /* XML_NS */