Blame - DOCBparser.c - platform/external/libxml2

blob: c199b4c384333f80f58af5213d2232f2a5b484ac [file] [log] [blame]

Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	1	/*
				2	* DOCBparser.c : an attempt to parse SGML Docbook documents
				3	*
				4	* See Copyright for the status of this software.
				5	*
				6	* Daniel.Veillard@w3.org
				7	*/
				8
				9	#include "libxml.h"
				10	#ifdef LIBXML_DOCB_ENABLED
				11
				12	#include <string.h>
				13	#ifdef HAVE_CTYPE_H
				14	#include <ctype.h>
				15	#endif
				16	#ifdef HAVE_STDLIB_H
				17	#include <stdlib.h>
				18	#endif
				19	#ifdef HAVE_SYS_STAT_H
				20	#include <sys/stat.h>
				21	#endif
				22	#ifdef HAVE_FCNTL_H
				23	#include <fcntl.h>
				24	#endif
				25	#ifdef HAVE_UNISTD_H
				26	#include <unistd.h>
				27	#endif
				28	#ifdef HAVE_ZLIB_H
				29	#include <zlib.h>
				30	#endif
				31
				32	#include <libxml/xmlmemory.h>
				33	#include <libxml/tree.h>
				34	#include <libxml/SAX.h>
				35	#include <libxml/parser.h>
				36	#include <libxml/parserInternals.h>
				37	#include <libxml/xmlerror.h>
				38	#include <libxml/DOCBparser.h>
				39	#include <libxml/entities.h>
				40	#include <libxml/encoding.h>
				41	#include <libxml/valid.h>
				42	#include <libxml/xmlIO.h>
				43	#include <libxml/uri.h>
				44
				45	/*
				46	* Internal description of an SGML entity
				47	*/
				48	typedef struct _docbEntityDesc docbEntityDesc;
				49	typedef docbEntityDesc *docbEntityDescPtr;
				50	struct _docbEntityDesc {
				51	int value; /* the UNICODE value for the character */
				52	const char name; / The entity name */
				53	const char desc; / the description */
				54	};
				55
				56	#if 0
				57	docbElemDescPtr docbTagLookup (const xmlChar *tag);
				58	docbEntityDescPtr docbEntityLookup(const xmlChar *name);
				59	docbEntityDescPtr docbEntityValueLookup(int value);
				60
				61	int docbIsAutoClosed(docbDocPtr doc,
				62	docbNodePtr elem);
				63	int docbAutoCloseTag(docbDocPtr doc,
				64	const xmlChar *name,
				65	docbNodePtr elem);
				66
				67	#endif
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	68	static int docbParseCharRef(docbParserCtxtPtr ctxt);
				69	static xmlEntityPtr docbParseEntityRef(docbParserCtxtPtr ctxt,
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	70	xmlChar **str);
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	71	static void docbParseElement(docbParserCtxtPtr ctxt);
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	72
				73	/*
				74	* Internal description of an SGML element
				75	*/
				76	typedef struct _docbElemDesc docbElemDesc;
				77	typedef docbElemDesc *docbElemDescPtr;
				78	struct _docbElemDesc {
				79	const char name; / The tag name */
				80	int startTag; /* Whether the start tag can be implied */
				81	int endTag; /* Whether the end tag can be implied */
				82	int empty; /* Is this an empty element ? */
				83	int depr; /* Is this a deprecated element ? */
				84	int dtd; /* 1: only in Loose DTD, 2: only Frameset one */
				85	const char desc; / the description */
				86	};
				87
				88
				89	#define DOCB_MAX_NAMELEN 1000
				90	#define DOCB_PARSER_BIG_BUFFER_SIZE 1000
				91	#define DOCB_PARSER_BUFFER_SIZE 100
				92
				93	/* #define DEBUG */
				94	/* #define DEBUG_PUSH */
				95
				96	/************************************************************************
				97	* *
				98	* Parser stacks related functions and macros *
				99	* *
				100	************************************************************************/
				101
				102	/*
				103	* Generic function for accessing stacks in the Parser Context
				104	*/
				105
				106	#define PUSH_AND_POP(scope, type, name) \
				107	scope int docb##name##Push(docbParserCtxtPtr ctxt, type value) { \
				108	if (ctxt->name##Nr >= ctxt->name##Max) { \
				109	ctxt->name##Max *= 2; \
				110	ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
				111	ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
				112	if (ctxt->name##Tab == NULL) { \
				113	xmlGenericError(xmlGenericErrorContext, "realloc failed !\n"); \
				114	return(0); \
				115	} \
				116	} \
				117	ctxt->name##Tab[ctxt->name##Nr] = value; \
				118	ctxt->name = value; \
				119	return(ctxt->name##Nr++); \
				120	} \
				121	scope type docb##name##Pop(docbParserCtxtPtr ctxt) { \
				122	type ret; \
				123	if (ctxt->name##Nr < 0) return(0); \
				124	ctxt->name##Nr--; \
				125	if (ctxt->name##Nr < 0) return(0); \
				126	if (ctxt->name##Nr > 0) \
				127	ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
				128	else \
				129	ctxt->name = NULL; \
				130	ret = ctxt->name##Tab[ctxt->name##Nr]; \
				131	ctxt->name##Tab[ctxt->name##Nr] = 0; \
				132	return(ret); \
				133	} \
				134
				135	/* PUSH_AND_POP(static, xmlNodePtr, node) */
				136	PUSH_AND_POP(static, xmlChar*, name)
				137
				138	/*
				139	* Macros for accessing the content. Those should be used only by the parser,
				140	* and not exported.
				141	*
				142	* Dirty macros, i.e. one need to make assumption on the context to use them
				143	*
				144	* CUR_PTR return the current pointer to the xmlChar to be parsed.
				145	* CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
				146	* in ISO-Latin or UTF-8, and the current 16 bit value if compiled
				147	* in UNICODE mode. This should be used internally by the parser
				148	* only to compare to ASCII values otherwise it would break when
				149	* running with UTF-8 encoding.
				150	* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
				151	* to compare on ASCII based substring.
				152	* UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR
				153	* it should be used only to compare on ASCII based substring.
				154	* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
				155	* strings within the parser.
				156	*
				157	* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
				158	*
				159	* CURRENT Returns the current char value, with the full decoding of
				160	* UTF-8 if we are using this mode. It returns an int.
				161	* NEXT Skip to the next character, this does the proper decoding
				162	* in UTF-8 mode. It also pop-up unfinished entities on the fly.
				163	* COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
				164	*/
				165
				166	#define UPPER (toupper(*ctxt->input->cur))
				167
				168	#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
				169
				170	#define NXT(val) ctxt->input->cur[(val)]
				171
				172	#define UPP(val) (toupper(ctxt->input->cur[(val)]))
				173
				174	#define CUR_PTR ctxt->input->cur
				175
				176	#define SHRINK xmlParserInputShrink(ctxt->input)
				177
				178	#define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
				179
				180	#define CURRENT ((int) (*ctxt->input->cur))
				181
				182	#define SKIP_BLANKS docbSkipBlankChars(ctxt)
				183
				184	/* Imported from XML */
				185
				186	/* #define CUR (ctxt->token ? ctxt->token : (int) (ctxt->input->cur)) /
				187	#define CUR ((int) (*ctxt->input->cur))
				188	#define NEXT xmlNextChar(ctxt),ctxt->nbChars++
				189
				190	#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
				191	#define NXT(val) ctxt->input->cur[(val)]
				192	#define CUR_PTR ctxt->input->cur
				193
				194
				195	#define NEXTL(l) do { \
				196	if (*(ctxt->input->cur) == '\n') { \
				197	ctxt->input->line++; ctxt->input->col = 1; \
				198	} else ctxt->input->col++; \
				199	ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \
				200	} while (0)
				201
				202	/************
				203	\
				204	if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
				205	if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
				206	************/
				207
				208	#define CUR_CHAR(l) docbCurrentChar(ctxt, &l)
				209	#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
				210
				211	#define COPY_BUF(l,b,i,v) \
				212	if (l == 1) b[i++] = (xmlChar) v; \
				213	else i += xmlCopyChar(l,&b[i],v)
				214
				215	/**
				216	* docbCurrentChar:
				217	* @ctxt: the DocBook SGML parser context
				218	* @len: pointer to the length of the char read
				219	*
				220	* The current char value, if using UTF-8 this may actaully span multiple
				221	* bytes in the input buffer. Implement the end of line normalization:
				222	* 2.11 End-of-Line Handling
				223	* If the encoding is unspecified, in the case we find an ISO-Latin-1
				224	* char, then the encoding converter is plugged in automatically.
				225	*
				226	* Returns the current char value and its lenght
				227	*/
				228
				229	static int
				230	docbCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
				231	if (ctxt->instate == XML_PARSER_EOF)
				232	return(0);
				233
				234	if (ctxt->token != 0) {
				235	*len = 0;
				236	return(ctxt->token);
				237	}
				238	if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
				239	/*
				240	* We are supposed to handle UTF8, check it's valid
				241	* From rfc2044: encoding of the Unicode values on UTF-8:
				242	*
				243	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
				244	* 0000 0000-0000 007F 0xxxxxxx
				245	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
				246	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
				247	*
				248	* Check for the 0x110000 limit too
				249	*/
				250	const unsigned char *cur = ctxt->input->cur;
				251	unsigned char c;
				252	unsigned int val;
				253
				254	c = *cur;
				255	if (c & 0x80) {
				256	if (cur[1] == 0)
				257	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				258	if ((cur[1] & 0xc0) != 0x80)
				259	goto encoding_error;
				260	if ((c & 0xe0) == 0xe0) {
				261
				262	if (cur[2] == 0)
				263	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				264	if ((cur[2] & 0xc0) != 0x80)
				265	goto encoding_error;
				266	if ((c & 0xf0) == 0xf0) {
				267	if (cur[3] == 0)
				268	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				269	if (((c & 0xf8) != 0xf0) \|\|
				270	((cur[3] & 0xc0) != 0x80))
				271	goto encoding_error;
				272	/* 4-byte code */
				273	*len = 4;
				274	val = (cur[0] & 0x7) << 18;
				275	val \|= (cur[1] & 0x3f) << 12;
				276	val \|= (cur[2] & 0x3f) << 6;
				277	val \|= cur[3] & 0x3f;
				278	} else {
				279	/* 3-byte code */
				280	*len = 3;
				281	val = (cur[0] & 0xf) << 12;
				282	val \|= (cur[1] & 0x3f) << 6;
				283	val \|= cur[2] & 0x3f;
				284	}
				285	} else {
				286	/* 2-byte code */
				287	*len = 2;
				288	val = (cur[0] & 0x1f) << 6;
				289	val \|= cur[1] & 0x3f;
				290	}
				291	if (!IS_CHAR(val)) {
				292	ctxt->errNo = XML_ERR_INVALID_ENCODING;
				293	if ((ctxt->sax != NULL) &&
				294	(ctxt->sax->error != NULL))
				295	ctxt->sax->error(ctxt->userData,
				296	"Char 0x%X out of allowed range\n", val);
				297	ctxt->wellFormed = 0;
				298	ctxt->disableSAX = 1;
				299	}
				300	return(val);
				301	} else {
				302	/* 1-byte code */
				303	*len = 1;
				304	return((int) *ctxt->input->cur);
				305	}
				306	}
				307	/*
				308	* Assume it's a fixed lenght encoding (1) with
				309	* a compatibke encoding for the ASCII set, since
				310	* XML constructs only use < 128 chars
				311	*/
				312	*len = 1;
				313	if ((int) *ctxt->input->cur < 0x80)
				314	return((int) *ctxt->input->cur);
				315
				316	/*
				317	* Humm this is bad, do an automatic flow conversion
				318	*/
				319	xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
				320	ctxt->charset = XML_CHAR_ENCODING_UTF8;
				321	return(xmlCurrentChar(ctxt, len));
				322
				323	encoding_error:
				324	/*
				325	* If we detect an UTF8 error that probably mean that the
				326	* input encoding didn't get properly advertized in the
				327	* declaration header. Report the error and switch the encoding
				328	* to ISO-Latin-1 (if you don't like this policy, just declare the
				329	* encoding !)
				330	*/
				331	ctxt->errNo = XML_ERR_INVALID_ENCODING;
				332	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
				333	ctxt->sax->error(ctxt->userData,
				334	"Input is not proper UTF-8, indicate encoding !\n");
				335	ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
				336	ctxt->input->cur[0], ctxt->input->cur[1],
				337	ctxt->input->cur[2], ctxt->input->cur[3]);
				338	}
				339
				340	ctxt->charset = XML_CHAR_ENCODING_8859_1;
				341	*len = 1;
				342	return((int) *ctxt->input->cur);
				343	}
				344
				345	#if 0
				346	/**
				347	* sgmlNextChar:
				348	* @ctxt: the DocBook SGML parser context
				349	*
				350	* Skip to the next char input char.
				351	*/
				352
				353	static void
				354	sgmlNextChar(docbParserCtxtPtr ctxt) {
				355	if (ctxt->instate == XML_PARSER_EOF)
				356	return;
				357	if ((*ctxt->input->cur == 0) &&
				358	(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
				359	xmlPopInput(ctxt);
				360	} else {
				361	if (*(ctxt->input->cur) == '\n') {
				362	ctxt->input->line++; ctxt->input->col = 1;
				363	} else ctxt->input->col++;
				364	ctxt->input->cur++;
				365	ctxt->nbChars++;
				366	if (*ctxt->input->cur == 0)
				367	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				368	}
				369	}
				370	#endif
				371
				372	/**
				373	* docbSkipBlankChars:
				374	* @ctxt: the DocBook SGML parser context
				375	*
				376	* skip all blanks character found at that point in the input streams.
				377	*
				378	* Returns the number of space chars skipped
				379	*/
				380
				381	static int
				382	docbSkipBlankChars(xmlParserCtxtPtr ctxt) {
				383	int res = 0;
				384
				385	while (IS_BLANK(*(ctxt->input->cur))) {
				386	if ((*ctxt->input->cur == 0) &&
				387	(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
				388	xmlPopInput(ctxt);
				389	} else {
				390	if (*(ctxt->input->cur) == '\n') {
				391	ctxt->input->line++; ctxt->input->col = 1;
				392	} else ctxt->input->col++;
				393	ctxt->input->cur++;
				394	ctxt->nbChars++;
				395	if (*ctxt->input->cur == 0)
				396	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				397	}
				398	res++;
				399	}
				400	return(res);
				401	}
				402
				403
				404
				405	/************************************************************************
				406	* *
				407	* The list of SGML elements and their properties *
				408	* *
				409	************************************************************************/
				410
				411	/*
				412	* Start Tag: 1 means the start tag can be ommited
				413	* End Tag: 1 means the end tag can be ommited
				414	* 2 means it's forbidden (empty elements)
				415	* Depr: this element is deprecated
				416	* DTD: 1 means that this element is valid only in the Loose DTD
				417	* 2 means that this element is valid only in the Frameset DTD
				418	*
				419	* Name,Start Tag,End Tag, Empty, Depr., DTD, Description
				420	*/
				421	static docbElemDesc
				422	docbookElementTable[] = {
				423	{ "abbrev", 0, 0, 0, 3, 0, "" }, /* word */
				424	{ "abstract", 0, 0, 0, 9, 0, "" }, /* title */
				425	{ "accel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				426	{ "ackno", 0, 0, 0, 4, 0, "" }, /* docinfo */
				427	{ "acronym", 0, 0, 0, 3, 0, "" }, /* word */
				428	{ "action", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				429	{ "address", 0, 0, 0, 1, 0, "" },
				430	{ "affiliation",0, 0, 0, 9, 0, "" }, /* shortaffil */
				431	{ "alt", 0, 0, 0, 1, 0, "" },
				432	{ "anchor", 0, 2, 1, 0, 0, "" },
				433	{ "answer", 0, 0, 0, 9, 0, "" }, /* label */
				434	{ "appendix", 0, 0, 0, 9, 0, "" }, /* appendixinfo */
				435	{ "appendixinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				436	{ "application",0, 0, 0, 2, 0, "" }, /* para */
				437	{ "area", 0, 2, 1, 0, 0, "" },
				438	{ "areaset", 0, 0, 0, 9, 0, "" }, /* area */
				439	{ "areaspec", 0, 0, 0, 9, 0, "" }, /* area */
				440	{ "arg", 0, 0, 0, 1, 0, "" },
Daniel Veillard	4ec0b0f	2001-04-25 15:53:40 +0000	[diff] [blame^]	441	{ "artheader", 0, 0, 0, 9, 0, "" },
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	442	{ "article", 0, 0, 0, 9, 0, "" }, /* div.title.content */
				443	{ "articleinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				444	{ "artpagenums",0, 0, 0, 4, 0, "" }, /* docinfo */
				445	{ "attribution",0, 0, 0, 2, 0, "" }, /* para */
				446	{ "audiodata", 0, 2, 1, 0, 0, "" },
				447	{ "audioobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
				448	{ "authorblurb",0, 0, 0, 9, 0, "" }, /* title */
				449	{ "authorgroup",0, 0, 0, 9, 0, "" }, /* author */
				450	{ "authorinitials",0, 0, 0, 4, 0, "" }, /* docinfo */
				451	{ "author", 0, 0, 0, 9, 0, "" }, /* person.ident.mix */
				452	{ "beginpage", 0, 2, 1, 0, 0, "" },
				453	{ "bibliodiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				454	{ "biblioentry",0, 0, 0, 9, 0, "" }, /* articleinfo */
				455	{ "bibliography",0, 0, 0, 9, 0, "" }, /* bibliographyinfo */
				456	{ "bibliographyinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				457	{ "bibliomisc", 0, 0, 0, 2, 0, "" }, /* para */
				458	{ "bibliomixed",0, 0, 0, 1, 0, "" }, /* %bibliocomponent.mix, bibliomset) */
				459	{ "bibliomset", 0, 0, 0, 1, 0, "" }, /* %bibliocomponent.mix; \| bibliomset) */
				460	{ "biblioset", 0, 0, 0, 9, 0, "" }, /* bibliocomponent.mix */
				461	{ "blockquote", 0, 0, 0, 9, 0, "" }, /* title */
				462	{ "book", 0, 0, 0, 9, 0, "" }, /* div.title.content */
				463	{ "bookinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
				464	{ "bridgehead", 0, 0, 0, 8, 0, "" }, /* title */
				465	{ "callout", 0, 0, 0, 9, 0, "" }, /* component.mix */
				466	{ "calloutlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				467	{ "caption", 0, 0, 0, 9, 0, "" }, /* textobject.mix */
				468	{ "caution", 0, 0, 0, 9, 0, "" }, /* title */
				469	{ "chapter", 0, 0, 0, 9, 0, "" }, /* chapterinfo */
				470	{ "chapterinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				471	{ "citation", 0, 0, 0, 2, 0, "" }, /* para */
				472	{ "citerefentry",0, 0, 0, 9, 0, "" }, /* refentrytitle */
				473	{ "citetitle", 0, 0, 0, 2, 0, "" }, /* para */
				474	{ "city", 0, 0, 0, 4, 0, "" }, /* docinfo */
				475	{ "classname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				476	{ "classsynopsisinfo",0,0, 0, 9, 0, "" }, /* cptr */
				477	{ "classsynopsis",0, 0, 0, 9, 0, "" }, /* ooclass */
				478	{ "cmdsynopsis",0, 0, 0, 9, 0, "" }, /* command */
				479	{ "co", 0, 2, 1, 0, 0, "" },
				480	{ "collab", 0, 0, 0, 9, 0, "" }, /* collabname */
				481	{ "collabname", 0, 0, 0, 4, 0, "" }, /* docinfo */
				482	{ "colophon", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				483	{ "colspec", 0, 2, 1, 0, 0, "" },
				484	{ "colspec", 0, 2, 1, 0, 0, "" },
				485	{ "command", 0, 0, 0, 9, 0, "" }, /* cptr */
				486	{ "computeroutput",0, 0, 0, 9, 0, "" }, /* cptr */
				487	{ "confdates", 0, 0, 0, 4, 0, "" }, /* docinfo */
				488	{ "confgroup", 0, 0, 0, 9, 0, "" }, /* confdates */
				489	{ "confnum", 0, 0, 0, 4, 0, "" }, /* docinfo */
				490	{ "confsponsor",0, 0, 0, 4, 0, "" }, /* docinfo */
				491	{ "conftitle", 0, 0, 0, 4, 0, "" }, /* docinfo */
				492	{ "constant", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				493	{ "constructorsynopsis",0,0, 0, 9, 0, "" }, /* modifier */
				494	{ "contractnum",0, 0, 0, 4, 0, "" }, /* docinfo */
				495	{ "contractsponsor",0, 0, 0, 4, 0, "" }, /* docinfo */
				496	{ "contrib", 0, 0, 0, 4, 0, "" }, /* docinfo */
				497	{ "copyright", 0, 0, 0, 9, 0, "" }, /* year */
				498	{ "corpauthor", 0, 0, 0, 4, 0, "" }, /* docinfo */
				499	{ "corpname", 0, 0, 0, 4, 0, "" }, /* docinfo */
				500	{ "country", 0, 0, 0, 4, 0, "" }, /* docinfo */
				501	{ "database", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				502	{ "date", 0, 0, 0, 4, 0, "" }, /* docinfo */
				503	{ "dedication", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				504	{ "destructorsynopsis",0,0, 0, 9, 0, "" }, /* modifier */
				505	{ "edition", 0, 0, 0, 4, 0, "" }, /* docinfo */
				506	{ "editor", 0, 0, 0, 9, 0, "" }, /* person.ident.mix */
				507	{ "email", 0, 0, 0, 4, 0, "" }, /* docinfo */
				508	{ "emphasis", 0, 0, 0, 2, 0, "" }, /* para */
				509	{ "entry", 0, 0, 0, 9, 0, "" }, /* tbl.entry.mdl */
				510	{ "entrytbl", 0, 0, 0, 9, 0, "" }, /* tbl.entrytbl.mdl */
				511	{ "envar", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				512	{ "epigraph", 0, 0, 0, 9, 0, "" }, /* attribution */
				513	{ "equation", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				514	{ "errorcode", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				515	{ "errorname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				516	{ "errortype", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				517	{ "example", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				518	{ "exceptionname",0, 0, 0, 7, 0, "" }, /* smallcptr */
				519	{ "fax", 0, 0, 0, 4, 0, "" }, /* docinfo */
				520	{ "fieldsynopsis", 0, 0, 0, 9, 0, "" }, /* modifier */
				521	{ "figure", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				522	{ "filename", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				523	{ "firstname", 0, 0, 0, 4, 0, "" }, /* docinfo */
				524	{ "firstterm", 0, 0, 0, 3, 0, "" }, /* word */
				525	{ "footnote", 0, 0, 0, 9, 0, "" }, /* footnote.mix */
				526	{ "footnoteref",0, 2, 1, 0, 0, "" },
				527	{ "foreignphrase",0, 0, 0, 2, 0, "" }, /* para */
				528	{ "formalpara", 0, 0, 0, 9, 0, "" }, /* title */
				529	{ "funcdef", 0, 0, 0, 1, 0, "" },
				530	{ "funcparams", 0, 0, 0, 9, 0, "" }, /* cptr */
				531	{ "funcprototype",0, 0, 0, 9, 0, "" }, /* funcdef */
				532	{ "funcsynopsis",0, 0, 0, 9, 0, "" }, /* funcsynopsisinfo */
				533	{ "funcsynopsisinfo", 0, 0, 0, 9, 0, "" }, /* cptr */
				534	{ "function", 0, 0, 0, 9, 0, "" }, /* cptr */
				535	{ "glossary", 0, 0, 0, 9, 0, "" }, /* glossaryinfo */
				536	{ "glossaryinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				537	{ "glossdef", 0, 0, 0, 9, 0, "" }, /* glossdef.mix */
				538	{ "glossdiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				539	{ "glossentry", 0, 0, 0, 9, 0, "" }, /* glossterm */
				540	{ "glosslist", 0, 0, 0, 9, 0, "" }, /* glossentry */
				541	{ "glossseealso",0, 0, 0, 2, 0, "" }, /* para */
				542	{ "glosssee", 0, 0, 0, 2, 0, "" }, /* para */
				543	{ "glossterm", 0, 0, 0, 2, 0, "" }, /* para */
Daniel Veillard	4ec0b0f	2001-04-25 15:53:40 +0000	[diff] [blame^]	544	{ "graphic", 0, 0, 0, 9, 0, "" },
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	545	{ "graphicco", 0, 0, 0, 9, 0, "" }, /* areaspec */
				546	{ "group", 0, 0, 0, 9, 0, "" }, /* arg */
				547	{ "guibutton", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				548	{ "guiicon", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				549	{ "guilabel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				550	{ "guimenuitem",0, 0, 0, 7, 0, "" }, /* smallcptr */
				551	{ "guimenu", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				552	{ "guisubmenu", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				553	{ "hardware", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				554	{ "highlights", 0, 0, 0, 9, 0, "" }, /* highlights.mix */
				555	{ "holder", 0, 0, 0, 4, 0, "" }, /* docinfo */
				556	{ "honorific", 0, 0, 0, 4, 0, "" }, /* docinfo */
				557	{ "imagedata", 0, 2, 1, 0, 0, "" },
				558	{ "imageobjectco",0, 0, 0, 9, 0, "" }, /* areaspec */
				559	{ "imageobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
				560	{ "important", 0, 0, 0, 9, 0, "" }, /* title */
				561	{ "indexdiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				562	{ "indexentry", 0, 0, 0, 9, 0, "" }, /* primaryie */
				563	{ "index", 0, 0, 0, 9, 0, "" }, /* indexinfo */
				564	{ "indexinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
				565	{ "indexterm", 0, 0, 0, 9, 0, "" }, /* primary */
				566	{ "informalequation",0, 0, 0, 9, 0, "" }, /* equation.content */
				567	{ "informalexample",0, 0, 0, 9, 0, "" }, /* example.mix */
				568	{ "informalfigure",0, 0, 0, 9, 0, "" }, /* figure.mix */
				569	{ "informaltable",0, 0, 0, 9, 0, "" }, /* graphic */
				570	{ "initializer",0, 0, 0, 7, 0, "" }, /* smallcptr */
				571	{ "inlineequation",0, 0, 0, 9, 0, "" }, /* inlineequation.content */
				572	{ "inlinegraphic",0, 2, 1, 0, 0, "" },
				573	{ "inlinemediaobject",0,0, 0, 9, 0, "" }, /* objectinfo */
				574	{ "interfacename",0, 0, 0, 7, 0, "" }, /* smallcptr */
				575	{ "interface", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				576	{ "invpartnumber",0, 0, 0, 4, 0, "" }, /* docinfo */
				577	{ "isbn", 0, 0, 0, 4, 0, "" }, /* docinfo */
				578	{ "issn", 0, 0, 0, 4, 0, "" }, /* docinfo */
				579	{ "issuenum", 0, 0, 0, 4, 0, "" }, /* docinfo */
				580	{ "itemizedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				581	{ "itermset", 0, 0, 0, 9, 0, "" }, /* indexterm */
				582	{ "jobtitle", 0, 0, 0, 4, 0, "" }, /* docinfo */
				583	{ "keycap", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				584	{ "keycode", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				585	{ "keycombo", 0, 0, 0, 9, 0, "" }, /* keycap */
				586	{ "keysym", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				587	{ "keyword", 0, 0, 0, 1, 0, "" },
				588	{ "keywordset", 0, 0, 0, 9, 0, "" }, /* keyword */
				589	{ "label", 0, 0, 0, 3, 0, "" }, /* word */
				590	{ "legalnotice",0, 0, 0, 9, 0, "" }, /* title */
				591	{ "lineage", 0, 0, 0, 4, 0, "" }, /* docinfo */
				592	{ "lineannotation",0, 0, 0, 2, 0, "" }, /* para */
				593	{ "link", 0, 0, 0, 2, 0, "" }, /* para */
				594	{ "listitem", 0, 0, 0, 9, 0, "" }, /* component.mix */
				595	{ "literal", 0, 0, 0, 9, 0, "" }, /* cptr */
				596	{ "literallayout",0, 0, 0, 2, 0, "" }, /* para */
				597	{ "lot", 0, 0, 0, 9, 0, "" }, /* bookcomponent.title.content */
				598	{ "lotentry", 0, 0, 0, 2, 0, "" }, /* para */
				599	{ "manvolnum", 0, 0, 0, 3, 0, "" }, /* word */
				600	{ "markup", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				601	{ "medialabel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				602	{ "mediaobjectco",0, 0, 0, 9, 0, "" }, /* objectinfo */
				603	{ "mediaobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
				604	{ "member", 0, 0, 0, 2, 0, "" }, /* para */
				605	{ "menuchoice", 0, 0, 0, 9, 0, "" }, /* shortcut */
				606	{ "methodname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				607	{ "methodparam",0, 0, 0, 9, 0, "" }, /* modifier */
				608	{ "methodsynopsis",0, 0, 0, 9, 0, "" }, /* modifier */
				609	{ "modespec", 0, 0, 0, 4, 0, "" }, /* docinfo */
				610	{ "modifier", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				611	{ "mousebutton",0, 0, 0, 7, 0, "" }, /* smallcptr */
				612	{ "msgaud", 0, 0, 0, 2, 0, "" }, /* para */
				613	{ "msgentry", 0, 0, 0, 9, 0, "" }, /* msg */
				614	{ "msgexplan", 0, 0, 0, 9, 0, "" }, /* title */
				615	{ "msginfo", 0, 0, 0, 9, 0, "" }, /* msglevel */
				616	{ "msglevel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				617	{ "msgmain", 0, 0, 0, 9, 0, "" }, /* title */
				618	{ "msgorig", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				619	{ "msgrel", 0, 0, 0, 9, 0, "" }, /* title */
				620	{ "msgset", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				621	{ "msgsub", 0, 0, 0, 9, 0, "" }, /* title */
				622	{ "msgtext", 0, 0, 0, 9, 0, "" }, /* component.mix */
				623	{ "msg", 0, 0, 0, 9, 0, "" }, /* title */
				624	{ "note", 0, 0, 0, 9, 0, "" }, /* title */
				625	{ "objectinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
				626	{ "olink", 0, 0, 0, 2, 0, "" }, /* para */
				627	{ "ooclass", 0, 0, 0, 9, 0, "" }, /* modifier */
				628	{ "ooexception",0, 0, 0, 9, 0, "" }, /* modifier */
				629	{ "oointerface",0, 0, 0, 9, 0, "" }, /* modifier */
				630	{ "optional", 0, 0, 0, 9, 0, "" }, /* cptr */
				631	{ "option", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				632	{ "orderedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				633	{ "orgdiv", 0, 0, 0, 4, 0, "" }, /* docinfo */
				634	{ "orgname", 0, 0, 0, 4, 0, "" }, /* docinfo */
				635	{ "otheraddr", 0, 0, 0, 4, 0, "" }, /* docinfo */
				636	{ "othercredit",0, 0, 0, 9, 0, "" }, /* person.ident.mix */
				637	{ "othername", 0, 0, 0, 4, 0, "" }, /* docinfo */
				638	{ "pagenums", 0, 0, 0, 4, 0, "" }, /* docinfo */
				639	{ "paramdef", 0, 0, 0, 1, 0, "" },
				640	{ "parameter", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				641	{ "para", 0, 0, 0, 2, 0, "" }, /* para */
				642	{ "partinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
				643	{ "partintro", 0, 0, 0, 9, 0, "" }, /* div.title.content */
				644	{ "part", 0, 0, 0, 9, 0, "" }, /* partinfo */
				645	{ "phone", 0, 0, 0, 4, 0, "" }, /* docinfo */
				646	{ "phrase", 0, 0, 0, 2, 0, "" }, /* para */
				647	{ "pob", 0, 0, 0, 4, 0, "" }, /* docinfo */
				648	{ "postcode", 0, 0, 0, 4, 0, "" }, /* docinfo */
				649	{ "prefaceinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				650	{ "preface", 0, 0, 0, 9, 0, "" }, /* prefaceinfo */
				651	{ "primaryie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				652	{ "primary ", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				653	{ "printhistory",0, 0, 0, 9, 0, "" }, /* para.class */
				654	{ "procedure", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				655	{ "productname",0, 0, 0, 2, 0, "" }, /* para */
				656	{ "productnumber",0, 0, 0, 4, 0, "" }, /* docinfo */
				657	{ "programlistingco",0, 0, 0, 9, 0, "" }, /* areaspec */
				658	{ "programlisting",0, 0, 0, 2, 0, "" }, /* para */
				659	{ "prompt", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				660	{ "property", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				661	{ "pubdate", 0, 0, 0, 4, 0, "" }, /* docinfo */
				662	{ "publishername",0, 0, 0, 4, 0, "" }, /* docinfo */
				663	{ "publisher", 0, 0, 0, 9, 0, "" }, /* publishername */
				664	{ "pubsnumber", 0, 0, 0, 4, 0, "" }, /* docinfo */
				665	{ "qandadiv", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				666	{ "qandaentry", 0, 0, 0, 9, 0, "" }, /* revhistory */
				667	{ "qandaset", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				668	{ "question", 0, 0, 0, 9, 0, "" }, /* label */
				669	{ "quote", 0, 0, 0, 2, 0, "" }, /* para */
				670	{ "refclass", 0, 0, 0, 9, 0, "" }, /* refclass.char.mix */
				671	{ "refdescriptor",0, 0, 0, 9, 0, "" }, /* refname.char.mix */
				672	{ "refentryinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				673	{ "refentry", 0, 0, 0, 9, 0, "" }, /* ndxterm.class */
				674	{ "refentrytitle",0, 0, 0, 2, 0, "" }, /* para */
				675	{ "referenceinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				676	{ "reference", 0, 0, 0, 9, 0, "" }, /* referenceinfo */
				677	{ "refmeta", 0, 0, 0, 9, 0, "" }, /* ndxterm.class */
				678	{ "refmiscinfo",0, 0, 0, 4, 0, "" }, /* docinfo */
				679	{ "refnamediv", 0, 0, 0, 9, 0, "" }, /* refdescriptor */
				680	{ "refname", 0, 0, 0, 9, 0, "" }, /* refname.char.mix */
				681	{ "refpurpose", 0, 0, 0, 9, 0, "" }, /* refinline.char.mix */
				682	{ "refsect1info",0, 0, 0, 9, 0, "" }, /* graphic */
				683	{ "refsect1", 0, 0, 0, 9, 0, "" }, /* refsect */
				684	{ "refsect2info",0, 0, 0, 9, 0, "" }, /* graphic */
				685	{ "refsect2", 0, 0, 0, 9, 0, "" }, /* refsect */
				686	{ "refsect3info",0, 0, 0, 9, 0, "" }, /* graphic */
				687	{ "refsect3", 0, 0, 0, 9, 0, "" }, /* refsect */
				688	{ "refsynopsisdivinfo",0,0, 0, 9, 0, "" }, /* graphic */
				689	{ "refsynopsisdiv",0, 0, 0, 9, 0, "" }, /* refsynopsisdivinfo */
				690	{ "releaseinfo",0, 0, 0, 4, 0, "" }, /* docinfo */
				691	{ "remark", 0, 0, 0, 2, 0, "" }, /* para */
				692	{ "replaceable",0, 0, 0, 1, 0, "" },
				693	{ "returnvalue",0, 0, 0, 7, 0, "" }, /* smallcptr */
				694	{ "revdescription",0, 0, 0, 9, 0, "" }, /* revdescription.mix */
				695	{ "revhistory", 0, 0, 0, 9, 0, "" }, /* revision */
				696	{ "revision", 0, 0, 0, 9, 0, "" }, /* revnumber */
				697	{ "revnumber", 0, 0, 0, 4, 0, "" }, /* docinfo */
				698	{ "revremark", 0, 0, 0, 4, 0, "" }, /* docinfo */
				699	{ "row", 0, 0, 0, 9, 0, "" }, /* tbl.row.mdl */
				700	{ "row", 0, 0, 0, 9, 0, "" }, /* tbl.row.mdl */
				701	{ "sbr", 0, 2, 1, 0, 0, "" },
				702	{ "screenco", 0, 0, 0, 9, 0, "" }, /* areaspec */
				703	{ "screeninfo", 0, 0, 0, 2, 0, "" }, /* para */
				704	{ "screen", 0, 0, 0, 2, 0, "" }, /* para */
				705	{ "screenshot", 0, 0, 0, 9, 0, "" }, /* screeninfo */
				706	{ "secondaryie",0, 0, 0, 4, 0, "" }, /* ndxterm */
				707	{ "secondary", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				708	{ "sect1info", 0, 0, 0, 9, 0, "" }, /* graphic */
				709	{ "sect1", 0, 0, 0, 9, 0, "" }, /* sect */
				710	{ "sect2info", 0, 0, 0, 9, 0, "" }, /* graphic */
				711	{ "sect2", 0, 0, 0, 9, 0, "" }, /* sect */
				712	{ "sect3info", 0, 0, 0, 9, 0, "" }, /* graphic */
				713	{ "sect3", 0, 0, 0, 9, 0, "" }, /* sect */
				714	{ "sect4info", 0, 0, 0, 9, 0, "" }, /* graphic */
				715	{ "sect4", 0, 0, 0, 9, 0, "" }, /* sect */
				716	{ "sect5info", 0, 0, 0, 9, 0, "" }, /* graphic */
				717	{ "sect5", 0, 0, 0, 9, 0, "" }, /* sect */
				718	{ "sectioninfo",0, 0, 0, 9, 0, "" }, /* graphic */
				719	{ "section", 0, 0, 0, 9, 0, "" }, /* sectioninfo */
				720	{ "seealsoie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				721	{ "seealso", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				722	{ "seeie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				723	{ "see", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				724	{ "seglistitem",0, 0, 0, 9, 0, "" }, /* seg */
				725	{ "segmentedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				726	{ "seg", 0, 0, 0, 2, 0, "" }, /* para */
				727	{ "segtitle", 0, 0, 0, 8, 0, "" }, /* title */
				728	{ "seriesvolnums", 0, 0, 0, 4, 0, "" }, /* docinfo */
				729	{ "set", 0, 0, 0, 9, 0, "" }, /* div.title.content */
				730	{ "setindexinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				731	{ "setindex", 0, 0, 0, 9, 0, "" }, /* setindexinfo */
				732	{ "setinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
				733	{ "sgmltag", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				734	{ "shortaffil", 0, 0, 0, 4, 0, "" }, /* docinfo */
				735	{ "shortcut", 0, 0, 0, 9, 0, "" }, /* keycap */
				736	{ "sidebarinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				737	{ "sidebar", 0, 0, 0, 9, 0, "" }, /* sidebarinfo */
				738	{ "simpara", 0, 0, 0, 2, 0, "" }, /* para */
				739	{ "simplelist", 0, 0, 0, 9, 0, "" }, /* member */
				740	{ "simplemsgentry", 0, 0, 0, 9, 0, "" }, /* msgtext */
				741	{ "simplesect", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				742	{ "spanspec", 0, 2, 1, 0, 0, "" },
				743	{ "state", 0, 0, 0, 4, 0, "" }, /* docinfo */
				744	{ "step", 0, 0, 0, 9, 0, "" }, /* title */
				745	{ "street", 0, 0, 0, 4, 0, "" }, /* docinfo */
				746	{ "structfield",0, 0, 0, 7, 0, "" }, /* smallcptr */
				747	{ "structname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				748	{ "subjectset", 0, 0, 0, 9, 0, "" }, /* subject */
				749	{ "subject", 0, 0, 0, 9, 0, "" }, /* subjectterm */
				750	{ "subjectterm",0, 0, 0, 1, 0, "" },
				751	{ "subscript", 0, 0, 0, 1, 0, "" },
				752	{ "substeps", 0, 0, 0, 9, 0, "" }, /* step */
				753	{ "subtitle", 0, 0, 0, 8, 0, "" }, /* title */
				754	{ "superscript", 0, 0, 0, 1, 0, "" },
				755	{ "surname", 0, 0, 0, 4, 0, "" }, /* docinfo */
				756	{ "symbol", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				757	{ "synopfragment", 0, 0, 0, 9, 0, "" }, /* arg */
				758	{ "synopfragmentref", 0, 0, 0, 1, 0, "" },
				759	{ "synopsis", 0, 0, 0, 2, 0, "" }, /* para */
				760	{ "systemitem", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				761	{ "table", 0, 0, 0, 9, 0, "" }, /* tbl.table.mdl */
				762	/* { "%tbl.table.name;", 0, 0, 0, 9, 0, "" },/ / tbl.table.mdl */
				763	{ "tbody", 0, 0, 0, 9, 0, "" }, /* row */
				764	{ "tbody", 0, 0, 0, 9, 0, "" }, /* row */
				765	{ "term", 0, 0, 0, 2, 0, "" }, /* para */
				766	{ "tertiaryie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				767	{ "tertiary ", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				768	{ "textobject", 0, 0, 0, 9, 0, "" }, /* objectinfo */
				769	{ "tfoot", 0, 0, 0, 9, 0, "" }, /* tbl.hdft.mdl */
				770	{ "tgroup", 0, 0, 0, 9, 0, "" }, /* tbl.tgroup.mdl */
				771	{ "tgroup", 0, 0, 0, 9, 0, "" }, /* tbl.tgroup.mdl */
				772	{ "thead", 0, 0, 0, 9, 0, "" }, /* row */
				773	{ "thead", 0, 0, 0, 9, 0, "" }, /* tbl.hdft.mdl */
				774	{ "tip", 0, 0, 0, 9, 0, "" }, /* title */
				775	{ "titleabbrev",0, 0, 0, 8, 0, "" }, /* title */
				776	{ "title", 0, 0, 0, 8, 0, "" }, /* title */
				777	{ "tocback", 0, 0, 0, 2, 0, "" }, /* para */
				778	{ "toc", 0, 0, 0, 9, 0, "" }, /* bookcomponent.title.content */
				779	{ "tocchap", 0, 0, 0, 9, 0, "" }, /* tocentry */
				780	{ "tocentry", 0, 0, 0, 2, 0, "" }, /* para */
				781	{ "tocfront", 0, 0, 0, 2, 0, "" }, /* para */
				782	{ "toclevel1", 0, 0, 0, 9, 0, "" }, /* tocentry */
				783	{ "toclevel2", 0, 0, 0, 9, 0, "" }, /* tocentry */
				784	{ "toclevel3", 0, 0, 0, 9, 0, "" }, /* tocentry */
				785	{ "toclevel4", 0, 0, 0, 9, 0, "" }, /* tocentry */
				786	{ "toclevel5", 0, 0, 0, 9, 0, "" }, /* tocentry */
				787	{ "tocpart", 0, 0, 0, 9, 0, "" }, /* tocentry */
				788	{ "token", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				789	{ "trademark", 0, 0, 0, 1, 0, "" },
				790	{ "type", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				791	{ "ulink", 0, 0, 0, 2, 0, "" }, /* para */
				792	{ "userinput", 0, 0, 0, 9, 0, "" }, /* cptr */
				793	{ "varargs", 0, 2, 1, 0, 0, "" },
				794	{ "variablelist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				795	{ "varlistentry",0, 0, 0, 9, 0, "" }, /* term */
				796	{ "varname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				797	{ "videodata", 0, 2, 1, 0, 0, "" },
				798	{ "videoobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
				799	{ "void", 0, 2, 1, 0, 0, "" },
				800	{ "volumenum", 0, 0, 0, 4, 0, "" }, /* docinfo */
				801	{ "warning", 0, 0, 0, 9, 0, "" }, /* title */
				802	{ "wordasword", 0, 0, 0, 3, 0, "" }, /* word */
				803	{ "xref", 0, 2, 1, 0, 0, "" },
				804	{ "year", 0, 0, 0, 4, 0, "" }, /* docinfo */
				805	};
				806
				807	#if 0
				808	/*
				809	* start tags that imply the end of a current element
				810	* any tag of each line implies the end of the current element if the type of
				811	* that element is in the same line
				812	*/
				813	static const char *docbEquEnd[] = {
				814	"dt", "dd", "li", "option", NULL,
				815	"h1", "h2", "h3", "h4", "h5", "h6", NULL,
				816	"ol", "menu", "dir", "address", "pre", "listing", "xmp", NULL,
				817	NULL
				818	};
				819	#endif
				820
				821	/*
				822	* acording the SGML DTD, HR should be added to the 2nd line above, as it
				823	* is not allowed within a H1, H2, H3, etc. But we should tolerate that case
				824	* because many documents contain rules in headings...
				825	*/
				826
				827	/*
				828	* start tags that imply the end of current element
				829	*/
				830	static const char *docbStartClose[] = {
				831	NULL
				832	};
				833
				834	/*
				835	* The list of SGML elements which are supposed not to have
				836	* CDATA content and where a p element will be implied
				837	*
				838	* TODO: extend that list by reading the SGML SGML DtD on
				839	* implied paragraph
				840	*/
				841	static char *docbNoContentElements[] = {
				842	NULL
				843	};
				844
				845
				846	static const char** docbStartCloseIndex[100];
				847	static int docbStartCloseIndexinitialized = 0;
				848
				849	/************************************************************************
				850	* *
				851	* functions to handle SGML specific data *
				852	* *
				853	************************************************************************/
				854
				855	/**
				856	* docbInitAutoClose:
				857	*
				858	* Initialize the docbStartCloseIndex for fast lookup of closing tags names.
				859	*
				860	*/
				861	static void
				862	docbInitAutoClose(void) {
				863	int indx, i = 0;
				864
				865	if (docbStartCloseIndexinitialized) return;
				866
				867	for (indx = 0;indx < 100;indx ++) docbStartCloseIndex[indx] = NULL;
				868	indx = 0;
				869	while ((docbStartClose[i] != NULL) && (indx < 100 - 1)) {
				870	docbStartCloseIndex[indx++] = &docbStartClose[i];
				871	while (docbStartClose[i] != NULL) i++;
				872	i++;
				873	}
				874	}
				875
				876	/**
				877	* docbTagLookup:
				878	* @tag: The tag name
				879	*
				880	* Lookup the SGML tag in the ElementTable
				881	*
				882	* Returns the related docbElemDescPtr or NULL if not found.
				883	*/
				884	static docbElemDescPtr
				885	docbTagLookup(const xmlChar *tag) {
				886	unsigned int i;
				887
				888	for (i = 0; i < (sizeof(docbookElementTable) /
				889	sizeof(docbookElementTable[0]));i++) {
				890	if (xmlStrEqual(tag, BAD_CAST docbookElementTable[i].name))
				891	return(&docbookElementTable[i]);
				892	}
				893	return(NULL);
				894	}
				895
				896	/**
				897	* docbCheckAutoClose:
				898	* @newtag: The new tag name
				899	* @oldtag: The old tag name
				900	*
				901	* Checks wether the new tag is one of the registered valid tags for closing old.
				902	* Initialize the docbStartCloseIndex for fast lookup of closing tags names.
				903	*
				904	* Returns 0 if no, 1 if yes.
				905	*/
				906	static int
				907	docbCheckAutoClose(const xmlChar newtag, const xmlChar oldtag) {
				908	int i, indx;
				909	const char **closed = NULL;
				910
				911	if (docbStartCloseIndexinitialized == 0) docbInitAutoClose();
				912
				913	/* inefficient, but not a big deal */
				914	for (indx = 0; indx < 100;indx++) {
				915	closed = docbStartCloseIndex[indx];
				916	if (closed == NULL) return(0);
				917	if (xmlStrEqual(BAD_CAST *closed, newtag)) break;
				918	}
				919
				920	i = closed - docbStartClose;
				921	i++;
				922	while (docbStartClose[i] != NULL) {
				923	if (xmlStrEqual(BAD_CAST docbStartClose[i], oldtag)) {
				924	return(1);
				925	}
				926	i++;
				927	}
				928	return(0);
				929	}
				930
				931	/**
				932	* docbAutoCloseOnClose:
				933	* @ctxt: an SGML parser context
				934	* @newtag: The new tag name
				935	*
				936	* The HTmL DtD allows an ending tag to implicitely close other tags.
				937	*/
				938	static void
				939	docbAutoCloseOnClose(docbParserCtxtPtr ctxt, const xmlChar *newtag) {
				940	docbElemDescPtr info;
				941	xmlChar *oldname;
				942	int i;
				943
				944	if ((newtag[0] == '/') && (newtag[1] == 0))
				945	return;
				946
				947	#ifdef DEBUG
				948	xmlGenericError(xmlGenericErrorContext,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr);
				949	for (i = 0;i < ctxt->nameNr;i++)
				950	xmlGenericError(xmlGenericErrorContext,"%d : %s\n", i, ctxt->nameTab[i]);
				951	#endif
				952
				953	for (i = (ctxt->nameNr - 1);i >= 0;i--) {
				954	if (xmlStrEqual(newtag, ctxt->nameTab[i])) break;
				955	}
				956	if (i < 0) return;
				957
				958	while (!xmlStrEqual(newtag, ctxt->name)) {
				959	info = docbTagLookup(ctxt->name);
				960	if ((info == NULL) \|\| (info->endTag == 1)) {
				961	#ifdef DEBUG
				962	xmlGenericError(xmlGenericErrorContext,"docbAutoCloseOnClose: %s closes %s\n", newtag, ctxt->name);
				963	#endif
				964	} else {
				965	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				966	ctxt->sax->error(ctxt->userData,
				967	"Opening and ending tag mismatch: %s and %s\n",
				968	newtag, ctxt->name);
				969	ctxt->wellFormed = 0;
				970	}
				971	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				972	ctxt->sax->endElement(ctxt->userData, ctxt->name);
				973	oldname = docbnamePop(ctxt);
				974	if (oldname != NULL) {
				975	#ifdef DEBUG
				976	xmlGenericError(xmlGenericErrorContext,"docbAutoCloseOnClose: popped %s\n", oldname);
				977	#endif
				978	xmlFree(oldname);
				979	}
				980	}
				981	}
				982
				983	/**
				984	* docbAutoClose:
				985	* @ctxt: an SGML parser context
				986	* @newtag: The new tag name or NULL
				987	*
				988	* The HTmL DtD allows a tag to implicitely close other tags.
				989	* The list is kept in docbStartClose array. This function is
				990	* called when a new tag has been detected and generates the
				991	* appropriates closes if possible/needed.
				992	* If newtag is NULL this mean we are at the end of the resource
				993	* and we should check
				994	*/
				995	static void
				996	docbAutoClose(docbParserCtxtPtr ctxt, const xmlChar *newtag) {
				997	xmlChar *oldname;
				998	while ((newtag != NULL) && (ctxt->name != NULL) &&
				999	(docbCheckAutoClose(newtag, ctxt->name))) {
				1000	#ifdef DEBUG
				1001	xmlGenericError(xmlGenericErrorContext,"docbAutoClose: %s closes %s\n", newtag, ctxt->name);
				1002	#endif
				1003	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				1004	ctxt->sax->endElement(ctxt->userData, ctxt->name);
				1005	oldname = docbnamePop(ctxt);
				1006	if (oldname != NULL) {
				1007	#ifdef DEBUG
				1008	xmlGenericError(xmlGenericErrorContext,"docbAutoClose: popped %s\n", oldname);
				1009	#endif
				1010	xmlFree(oldname);
				1011	}
				1012	}
				1013	}
				1014
				1015	/**
				1016	* docbAutoCloseTag:
				1017	* @doc: the SGML document
				1018	* @name: The tag name
				1019	* @elem: the SGML element
				1020	*
				1021	* The HTmL DtD allows a tag to implicitely close other tags.
				1022	* The list is kept in docbStartClose array. This function checks
				1023	* if the element or one of it's children would autoclose the
				1024	* given tag.
				1025	*
				1026	* Returns 1 if autoclose, 0 otherwise
				1027	*/
				1028	static int
				1029	docbAutoCloseTag(docbDocPtr doc, const xmlChar *name, docbNodePtr elem) {
				1030	docbNodePtr child;
				1031
				1032	if (elem == NULL) return(1);
				1033	if (xmlStrEqual(name, elem->name)) return(0);
				1034	if (docbCheckAutoClose(elem->name, name)) return(1);
				1035	child = elem->children;
				1036	while (child != NULL) {
				1037	if (docbAutoCloseTag(doc, name, child)) return(1);
				1038	child = child->next;
				1039	}
				1040	return(0);
				1041	}
				1042
				1043	#if 0
				1044	/**
				1045	* docbIsAutoClosed:
				1046	* @doc: the SGML document
				1047	* @elem: the SGML element
				1048	*
				1049	* The list is kept in docbStartClose array. This function checks
				1050	* if a tag is autoclosed by one of it's child
				1051	*
				1052	* Returns 1 if autoclosed, 0 otherwise
				1053	*/
				1054	static int
				1055	docbIsAutoClosed(docbDocPtr doc, docbNodePtr elem) {
				1056	docbNodePtr child;
				1057
				1058	if (elem == NULL) return(1);
				1059	child = elem->children;
				1060	while (child != NULL) {
				1061	if (docbAutoCloseTag(doc, elem->name, child)) return(1);
				1062	child = child->next;
				1063	}
				1064	return(0);
				1065	}
				1066	#endif
				1067
				1068	/**
				1069	* docbCheckParagraph
				1070	* @ctxt: an SGML parser context
				1071	*
				1072	* Check whether a p element need to be implied before inserting
				1073	* characters in the current element.
				1074	*
				1075	* Returns 1 if a paragraph has been inserted, 0 if not and -1
				1076	* in case of error.
				1077	*/
				1078
				1079	static int
				1080	docbCheckParagraph(docbParserCtxtPtr ctxt) {
				1081	const xmlChar *tag;
				1082	int i;
				1083
				1084	if (ctxt == NULL)
				1085	return(-1);
				1086	tag = ctxt->name;
				1087	if (tag == NULL) {
				1088	docbAutoClose(ctxt, BAD_CAST"p");
				1089	docbnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
				1090	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
				1091	ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
				1092	return(1);
				1093	}
				1094	for (i = 0; docbNoContentElements[i] != NULL; i++) {
				1095	if (xmlStrEqual(tag, BAD_CAST docbNoContentElements[i])) {
				1096	#ifdef DEBUG
				1097	xmlGenericError(xmlGenericErrorContext,"Implied element paragraph\n");
				1098	#endif
				1099	docbAutoClose(ctxt, BAD_CAST"p");
				1100	docbnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
				1101	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
				1102	ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
				1103	return(1);
				1104	}
				1105	}
				1106	return(0);
				1107	}
				1108
				1109	/************************************************************************
				1110	* *
				1111	* The list of SGML predefined entities *
				1112	* *
				1113	************************************************************************/
				1114
				1115
				1116	static docbEntityDesc
				1117	docbookEntitiesTable[] = {
				1118	/*
				1119	* the 4 absolute ones, plus apostrophe.
				1120	*/
				1121	{ 0x0026, "amp", "AMPERSAND" },
				1122	{ 0x003C, "lt", "LESS-THAN SIGN" },
				1123
				1124	/*
				1125	* Converted with VI macros from docbook ent files
				1126	*/
				1127	{ 0x0021, "excl", "EXCLAMATION MARK" },
				1128	{ 0x0022, "quot", "QUOTATION MARK" },
				1129	{ 0x0023, "num", "NUMBER SIGN" },
				1130	{ 0x0024, "dollar", "DOLLAR SIGN" },
				1131	{ 0x0025, "percnt", "PERCENT SIGN" },
				1132	{ 0x0027, "apos", "APOSTROPHE" },
				1133	{ 0x0028, "lpar", "LEFT PARENTHESIS" },
				1134	{ 0x0029, "rpar", "RIGHT PARENTHESIS" },
				1135	{ 0x002A, "ast", "ASTERISK OPERATOR" },
				1136	{ 0x002B, "plus", "PLUS SIGN" },
				1137	{ 0x002C, "comma", "COMMA" },
				1138	{ 0x002D, "hyphen", "HYPHEN-MINUS" },
				1139	{ 0x002E, "period", "FULL STOP" },
				1140	{ 0x002F, "sol", "SOLIDUS" },
				1141	{ 0x003A, "colon", "COLON" },
				1142	{ 0x003B, "semi", "SEMICOLON" },
				1143	{ 0x003D, "equals", "EQUALS SIGN" },
				1144	{ 0x003E, "gt", "GREATER-THAN SIGN" },
				1145	{ 0x003F, "quest", "QUESTION MARK" },
				1146	{ 0x0040, "commat", "COMMERCIAL AT" },
				1147	{ 0x005B, "lsqb", "LEFT SQUARE BRACKET" },
				1148	{ 0x005C, "bsol", "REVERSE SOLIDUS" },
				1149	{ 0x005D, "rsqb", "RIGHT SQUARE BRACKET" },
				1150	{ 0x005E, "circ", "RING OPERATOR" },
				1151	{ 0x005F, "lowbar", "LOW LINE" },
				1152	{ 0x0060, "grave", "GRAVE ACCENT" },
				1153	{ 0x007B, "lcub", "LEFT CURLY BRACKET" },
				1154	{ 0x007C, "verbar", "VERTICAL LINE" },
				1155	{ 0x007D, "rcub", "RIGHT CURLY BRACKET" },
				1156	{ 0x00A0, "nbsp", "NO-BREAK SPACE" },
				1157	{ 0x00A1, "iexcl", "INVERTED EXCLAMATION MARK" },
				1158	{ 0x00A2, "cent", "CENT SIGN" },
				1159	{ 0x00A3, "pound", "POUND SIGN" },
				1160	{ 0x00A4, "curren", "CURRENCY SIGN" },
				1161	{ 0x00A5, "yen", "YEN SIGN" },
				1162	{ 0x00A6, "brvbar", "BROKEN BAR" },
				1163	{ 0x00A7, "sect", "SECTION SIGN" },
				1164	{ 0x00A8, "die", "" },
				1165	{ 0x00A8, "Dot", "" },
				1166	{ 0x00A8, "uml", "" },
				1167	{ 0x00A9, "copy", "COPYRIGHT SIGN" },
				1168	{ 0x00AA, "ordf", "FEMININE ORDINAL INDICATOR" },
				1169	{ 0x00AB, "laquo", "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK" },
				1170	{ 0x00AC, "not", "NOT SIGN" },
				1171	{ 0x00AD, "shy", "SOFT HYPHEN" },
				1172	{ 0x00AE, "reg", "REG TRADE MARK SIGN" },
				1173	{ 0x00AF, "macr", "MACRON" },
				1174	{ 0x00B0, "deg", "DEGREE SIGN" },
				1175	{ 0x00B1, "plusmn", "PLUS-MINUS SIGN" },
				1176	{ 0x00B2, "sup2", "SUPERSCRIPT TWO" },
				1177	{ 0x00B3, "sup3", "SUPERSCRIPT THREE" },
				1178	{ 0x00B4, "acute", "ACUTE ACCENT" },
				1179	{ 0x00B5, "micro", "MICRO SIGN" },
				1180	{ 0x00B6, "para", "PILCROW SIGN" },
				1181	{ 0x00B7, "middot", "MIDDLE DOT" },
				1182	{ 0x00B8, "cedil", "CEDILLA" },
				1183	{ 0x00B9, "sup1", "SUPERSCRIPT ONE" },
				1184	{ 0x00BA, "ordm", "MASCULINE ORDINAL INDICATOR" },
				1185	{ 0x00BB, "raquo", "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK" },
				1186	{ 0x00BC, "frac14", "VULGAR FRACTION ONE QUARTER" },
				1187	{ 0x00BD, "frac12", "VULGAR FRACTION ONE HALF" },
				1188	{ 0x00BD, "half", "VULGAR FRACTION ONE HALF" },
				1189	{ 0x00BE, "frac34", "VULGAR FRACTION THREE QUARTERS" },
				1190	{ 0x00BF, "iquest", "INVERTED QUESTION MARK" },
				1191	{ 0x00C0, "Agrave", "LATIN CAPITAL LETTER A WITH GRAVE" },
				1192	{ 0x00C1, "Aacute", "LATIN CAPITAL LETTER A WITH ACUTE" },
				1193	{ 0x00C2, "Acirc", "LATIN CAPITAL LETTER A WITH CIRCUMFLEX" },
				1194	{ 0x00C3, "Atilde", "LATIN CAPITAL LETTER A WITH TILDE" },
				1195	{ 0x00C4, "Auml", "LATIN CAPITAL LETTER A WITH DIAERESIS" },
				1196	{ 0x00C5, "Aring", "LATIN CAPITAL LETTER A WITH RING ABOVE" },
				1197	{ 0x00C6, "AElig", "LATIN CAPITAL LETTER AE" },
				1198	{ 0x00C7, "Ccedil", "LATIN CAPITAL LETTER C WITH CEDILLA" },
				1199	{ 0x00C8, "Egrave", "LATIN CAPITAL LETTER E WITH GRAVE" },
				1200	{ 0x00C9, "Eacute", "LATIN CAPITAL LETTER E WITH ACUTE" },
				1201	{ 0x00CA, "Ecirc", "LATIN CAPITAL LETTER E WITH CIRCUMFLEX" },
				1202	{ 0x00CB, "Euml", "LATIN CAPITAL LETTER E WITH DIAERESIS" },
				1203	{ 0x00CC, "Igrave", "LATIN CAPITAL LETTER I WITH GRAVE" },
				1204	{ 0x00CD, "Iacute", "LATIN CAPITAL LETTER I WITH ACUTE" },
				1205	{ 0x00CE, "Icirc", "LATIN CAPITAL LETTER I WITH CIRCUMFLEX" },
				1206	{ 0x00CF, "Iuml", "LATIN CAPITAL LETTER I WITH DIAERESIS" },
				1207	{ 0x00D0, "ETH", "LATIN CAPITAL LETTER ETH" },
				1208	{ 0x00D1, "Ntilde", "LATIN CAPITAL LETTER N WITH TILDE" },
				1209	{ 0x00D2, "Ograve", "LATIN CAPITAL LETTER O WITH GRAVE" },
				1210	{ 0x00D3, "Oacute", "LATIN CAPITAL LETTER O WITH ACUTE" },
				1211	{ 0x00D4, "Ocirc", "LATIN CAPITAL LETTER O WITH CIRCUMFLEX" },
				1212	{ 0x00D5, "Otilde", "LATIN CAPITAL LETTER O WITH TILDE" },
				1213	{ 0x00D6, "Ouml", "LATIN CAPITAL LETTER O WITH DIAERESIS" },
				1214	{ 0x00D7, "times", "MULTIPLICATION SIGN" },
				1215	{ 0x00D8, "Oslash", "LATIN CAPITAL LETTER O WITH STROKE" },
				1216	{ 0x00D9, "Ugrave", "LATIN CAPITAL LETTER U WITH GRAVE" },
				1217	{ 0x00DA, "Uacute", "LATIN CAPITAL LETTER U WITH ACUTE" },
				1218	{ 0x00DB, "Ucirc", "LATIN CAPITAL LETTER U WITH CIRCUMFLEX" },
				1219	{ 0x00DC, "Uuml", "LATIN CAPITAL LETTER U WITH DIAERESIS" },
				1220	{ 0x00DD, "Yacute", "LATIN CAPITAL LETTER Y WITH ACUTE" },
				1221	{ 0x00DE, "THORN", "LATIN CAPITAL LETTER THORN" },
				1222	{ 0x00DF, "szlig", "LATIN SMALL LETTER SHARP S" },
				1223	{ 0x00E0, "agrave", "LATIN SMALL LETTER A WITH GRAVE" },
				1224	{ 0x00E1, "aacute", "LATIN SMALL LETTER A WITH ACUTE" },
				1225	{ 0x00E2, "acirc", "LATIN SMALL LETTER A WITH CIRCUMFLEX" },
				1226	{ 0x00E3, "atilde", "LATIN SMALL LETTER A WITH TILDE" },
				1227	{ 0x00E4, "auml", "LATIN SMALL LETTER A WITH DIAERESIS" },
				1228	{ 0x00E5, "aring", "LATIN SMALL LETTER A WITH RING ABOVE" },
				1229	{ 0x00E6, "aelig", "LATIN SMALL LETTER AE" },
				1230	{ 0x00E7, "ccedil", "LATIN SMALL LETTER C WITH CEDILLA" },
				1231	{ 0x00E8, "egrave", "LATIN SMALL LETTER E WITH GRAVE" },
				1232	{ 0x00E9, "eacute", "LATIN SMALL LETTER E WITH ACUTE" },
				1233	{ 0x00EA, "ecirc", "LATIN SMALL LETTER E WITH CIRCUMFLEX" },
				1234	{ 0x00EB, "euml", "LATIN SMALL LETTER E WITH DIAERESIS" },
				1235	{ 0x00EC, "igrave", "LATIN SMALL LETTER I WITH GRAVE" },
				1236	{ 0x00ED, "iacute", "LATIN SMALL LETTER I WITH ACUTE" },
				1237	{ 0x00EE, "icirc", "LATIN SMALL LETTER I WITH CIRCUMFLEX" },
				1238	{ 0x00EF, "iuml", "LATIN SMALL LETTER I WITH DIAERESIS" },
				1239	{ 0x00F0, "eth", "LATIN SMALL LETTER ETH" },
				1240	{ 0x00F1, "ntilde", "LATIN SMALL LETTER N WITH TILDE" },
				1241	{ 0x00F2, "ograve", "LATIN SMALL LETTER O WITH GRAVE" },
				1242	{ 0x00F3, "oacute", "LATIN SMALL LETTER O WITH ACUTE" },
				1243	{ 0x00F4, "ocirc", "LATIN SMALL LETTER O WITH CIRCUMFLEX" },
				1244	{ 0x00F5, "otilde", "LATIN SMALL LETTER O WITH TILDE" },
				1245	{ 0x00F6, "ouml", "LATIN SMALL LETTER O WITH DIAERESIS" },
				1246	{ 0x00F7, "divide", "DIVISION SIGN" },
				1247	{ 0x00F8, "oslash", "CIRCLED DIVISION SLASH" },
				1248	{ 0x00F9, "ugrave", "LATIN SMALL LETTER U WITH GRAVE" },
				1249	{ 0x00FA, "uacute", "LATIN SMALL LETTER U WITH ACUTE" },
				1250	{ 0x00FB, "ucirc", "LATIN SMALL LETTER U WITH CIRCUMFLEX" },
				1251	{ 0x00FC, "uuml", "LATIN SMALL LETTER U WITH DIAERESIS" },
				1252	{ 0x00FD, "yacute", "LATIN SMALL LETTER Y WITH ACUTE" },
				1253	{ 0x00FE, "thorn", "LATIN SMALL LETTER THORN" },
				1254	{ 0x00FF, "yuml", "LATIN SMALL LETTER Y WITH DIAERESIS" },
				1255	{ 0x0100, "Amacr", "LATIN CAPITAL LETTER A WITH MACRON" },
				1256	{ 0x0101, "amacr", "LATIN SMALL LETTER A WITH MACRON" },
				1257	{ 0x0102, "Abreve", "LATIN CAPITAL LETTER A WITH BREVE" },
				1258	{ 0x0103, "abreve", "LATIN SMALL LETTER A WITH BREVE" },
				1259	{ 0x0104, "Aogon", "LATIN CAPITAL LETTER A WITH OGONEK" },
				1260	{ 0x0105, "aogon", "LATIN SMALL LETTER A WITH OGONEK" },
				1261	{ 0x0106, "Cacute", "LATIN CAPITAL LETTER C WITH ACUTE" },
				1262	{ 0x0107, "cacute", "LATIN SMALL LETTER C WITH ACUTE" },
				1263	{ 0x0108, "Ccirc", "LATIN CAPITAL LETTER C WITH CIRCUMFLEX" },
				1264	{ 0x0109, "ccirc", "LATIN SMALL LETTER C WITH CIRCUMFLEX" },
				1265	{ 0x010A, "Cdot", "LATIN CAPITAL LETTER C WITH DOT ABOVE" },
				1266	{ 0x010B, "cdot", "DOT OPERATOR" },
				1267	{ 0x010C, "Ccaron", "LATIN CAPITAL LETTER C WITH CARON" },
				1268	{ 0x010D, "ccaron", "LATIN SMALL LETTER C WITH CARON" },
				1269	{ 0x010E, "Dcaron", "LATIN CAPITAL LETTER D WITH CARON" },
				1270	{ 0x010F, "dcaron", "LATIN SMALL LETTER D WITH CARON" },
				1271	{ 0x0110, "Dstrok", "LATIN CAPITAL LETTER D WITH STROKE" },
				1272	{ 0x0111, "dstrok", "LATIN SMALL LETTER D WITH STROKE" },
				1273	{ 0x0112, "Emacr", "LATIN CAPITAL LETTER E WITH MACRON" },
				1274	{ 0x0113, "emacr", "LATIN SMALL LETTER E WITH MACRON" },
				1275	{ 0x0116, "Edot", "LATIN CAPITAL LETTER E WITH DOT ABOVE" },
				1276	{ 0x0117, "edot", "LATIN SMALL LETTER E WITH DOT ABOVE" },
				1277	{ 0x0118, "Eogon", "LATIN CAPITAL LETTER E WITH OGONEK" },
				1278	{ 0x0119, "eogon", "LATIN SMALL LETTER E WITH OGONEK" },
				1279	{ 0x011A, "Ecaron", "LATIN CAPITAL LETTER E WITH CARON" },
				1280	{ 0x011B, "ecaron", "LATIN SMALL LETTER E WITH CARON" },
				1281	{ 0x011C, "Gcirc", "LATIN CAPITAL LETTER G WITH CIRCUMFLEX" },
				1282	{ 0x011D, "gcirc", "LATIN SMALL LETTER G WITH CIRCUMFLEX" },
				1283	{ 0x011E, "Gbreve", "LATIN CAPITAL LETTER G WITH BREVE" },
				1284	{ 0x011F, "gbreve", "LATIN SMALL LETTER G WITH BREVE" },
				1285	{ 0x0120, "Gdot", "LATIN CAPITAL LETTER G WITH DOT ABOVE" },
				1286	{ 0x0121, "gdot", "LATIN SMALL LETTER G WITH DOT ABOVE" },
				1287	{ 0x0122, "Gcedil", "LATIN CAPITAL LETTER G WITH CEDILLA" },
				1288	{ 0x0124, "Hcirc", "LATIN CAPITAL LETTER H WITH CIRCUMFLEX" },
				1289	{ 0x0125, "hcirc", "LATIN SMALL LETTER H WITH CIRCUMFLEX" },
				1290	{ 0x0126, "Hstrok", "LATIN CAPITAL LETTER H WITH STROKE" },
				1291	{ 0x0127, "hstrok", "LATIN SMALL LETTER H WITH STROKE" },
				1292	{ 0x0128, "Itilde", "LATIN CAPITAL LETTER I WITH TILDE" },
				1293	{ 0x0129, "itilde", "LATIN SMALL LETTER I WITH TILDE" },
				1294	{ 0x012A, "Imacr", "LATIN CAPITAL LETTER I WITH MACRON" },
				1295	{ 0x012B, "imacr", "LATIN SMALL LETTER I WITH MACRON" },
				1296	{ 0x012E, "Iogon", "LATIN CAPITAL LETTER I WITH OGONEK" },
				1297	{ 0x012F, "iogon", "LATIN SMALL LETTER I WITH OGONEK" },
				1298	{ 0x0130, "Idot", "LATIN CAPITAL LETTER I WITH DOT ABOVE" },
				1299	{ 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
				1300	{ 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
				1301	{ 0x0132, "IJlig", "LATIN CAPITAL LIGATURE IJ" },
				1302	{ 0x0133, "ijlig", "LATIN SMALL LIGATURE IJ" },
				1303	{ 0x0134, "Jcirc", "LATIN CAPITAL LETTER J WITH CIRCUMFLEX" },
				1304	{ 0x0135, "jcirc", "LATIN SMALL LETTER J WITH CIRCUMFLEX" },
				1305	{ 0x0136, "Kcedil", "LATIN CAPITAL LETTER K WITH CEDILLA" },
				1306	{ 0x0137, "kcedil", "LATIN SMALL LETTER K WITH CEDILLA" },
				1307	{ 0x0138, "kgreen", "LATIN SMALL LETTER KRA" },
				1308	{ 0x0139, "Lacute", "LATIN CAPITAL LETTER L WITH ACUTE" },
				1309	{ 0x013A, "lacute", "LATIN SMALL LETTER L WITH ACUTE" },
				1310	{ 0x013B, "Lcedil", "LATIN CAPITAL LETTER L WITH CEDILLA" },
				1311	{ 0x013C, "lcedil", "LATIN SMALL LETTER L WITH CEDILLA" },
				1312	{ 0x013D, "Lcaron", "LATIN CAPITAL LETTER L WITH CARON" },
				1313	{ 0x013E, "lcaron", "LATIN SMALL LETTER L WITH CARON" },
				1314	{ 0x013F, "Lmidot", "LATIN CAPITAL LETTER L WITH MIDDLE DOT" },
				1315	{ 0x0140, "lmidot", "LATIN SMALL LETTER L WITH MIDDLE DOT" },
				1316	{ 0x0141, "Lstrok", "LATIN CAPITAL LETTER L WITH STROKE" },
				1317	{ 0x0142, "lstrok", "LATIN SMALL LETTER L WITH STROKE" },
				1318	{ 0x0143, "Nacute", "LATIN CAPITAL LETTER N WITH ACUTE" },
				1319	{ 0x0144, "nacute", "LATIN SMALL LETTER N WITH ACUTE" },
				1320	{ 0x0145, "Ncedil", "LATIN CAPITAL LETTER N WITH CEDILLA" },
				1321	{ 0x0146, "ncedil", "LATIN SMALL LETTER N WITH CEDILLA" },
				1322	{ 0x0147, "Ncaron", "LATIN CAPITAL LETTER N WITH CARON" },
				1323	{ 0x0148, "ncaron", "LATIN SMALL LETTER N WITH CARON" },
				1324	{ 0x0149, "napos", "LATIN SMALL LETTER N PRECEDED BY APOSTROPHE" },
				1325	{ 0x014A, "ENG", "LATIN CAPITAL LETTER ENG" },
				1326	{ 0x014B, "eng", "LATIN SMALL LETTER ENG" },
				1327	{ 0x014C, "Omacr", "LATIN CAPITAL LETTER O WITH MACRON" },
				1328	{ 0x014D, "omacr", "LATIN SMALL LETTER O WITH MACRON" },
				1329	{ 0x0150, "Odblac", "LATIN CAPITAL LETTER O WITH DOUBLE ACUTE" },
				1330	{ 0x0151, "odblac", "LATIN SMALL LETTER O WITH DOUBLE ACUTE" },
				1331	{ 0x0152, "OElig", "LATIN CAPITAL LIGATURE OE" },
				1332	{ 0x0153, "oelig", "LATIN SMALL LIGATURE OE" },
				1333	{ 0x0154, "Racute", "LATIN CAPITAL LETTER R WITH ACUTE" },
				1334	{ 0x0155, "racute", "LATIN SMALL LETTER R WITH ACUTE" },
				1335	{ 0x0156, "Rcedil", "LATIN CAPITAL LETTER R WITH CEDILLA" },
				1336	{ 0x0157, "rcedil", "LATIN SMALL LETTER R WITH CEDILLA" },
				1337	{ 0x0158, "Rcaron", "LATIN CAPITAL LETTER R WITH CARON" },
				1338	{ 0x0159, "rcaron", "LATIN SMALL LETTER R WITH CARON" },
				1339	{ 0x015A, "Sacute", "LATIN CAPITAL LETTER S WITH ACUTE" },
				1340	{ 0x015B, "sacute", "LATIN SMALL LETTER S WITH ACUTE" },
				1341	{ 0x015C, "Scirc", "LATIN CAPITAL LETTER S WITH CIRCUMFLEX" },
				1342	{ 0x015D, "scirc", "LATIN SMALL LETTER S WITH CIRCUMFLEX" },
				1343	{ 0x015E, "Scedil", "LATIN CAPITAL LETTER S WITH CEDILLA" },
				1344	{ 0x015F, "scedil", "LATIN SMALL LETTER S WITH CEDILLA" },
				1345	{ 0x0160, "Scaron", "LATIN CAPITAL LETTER S WITH CARON" },
				1346	{ 0x0161, "scaron", "LATIN SMALL LETTER S WITH CARON" },
				1347	{ 0x0162, "Tcedil", "LATIN CAPITAL LETTER T WITH CEDILLA" },
				1348	{ 0x0163, "tcedil", "LATIN SMALL LETTER T WITH CEDILLA" },
				1349	{ 0x0164, "Tcaron", "LATIN CAPITAL LETTER T WITH CARON" },
				1350	{ 0x0165, "tcaron", "LATIN SMALL LETTER T WITH CARON" },
				1351	{ 0x0166, "Tstrok", "LATIN CAPITAL LETTER T WITH STROKE" },
				1352	{ 0x0167, "tstrok", "LATIN SMALL LETTER T WITH STROKE" },
				1353	{ 0x0168, "Utilde", "LATIN CAPITAL LETTER U WITH TILDE" },
				1354	{ 0x0169, "utilde", "LATIN SMALL LETTER U WITH TILDE" },
				1355	{ 0x016A, "Umacr", "LATIN CAPITAL LETTER U WITH MACRON" },
				1356	{ 0x016B, "umacr", "LATIN SMALL LETTER U WITH MACRON" },
				1357	{ 0x016C, "Ubreve", "LATIN CAPITAL LETTER U WITH BREVE" },
				1358	{ 0x016D, "ubreve", "LATIN SMALL LETTER U WITH BREVE" },
				1359	{ 0x016E, "Uring", "LATIN CAPITAL LETTER U WITH RING ABOVE" },
				1360	{ 0x016F, "uring", "LATIN SMALL LETTER U WITH RING ABOVE" },
				1361	{ 0x0170, "Udblac", "LATIN CAPITAL LETTER U WITH DOUBLE ACUTE" },
				1362	{ 0x0171, "udblac", "LATIN SMALL LETTER U WITH DOUBLE ACUTE" },
				1363	{ 0x0172, "Uogon", "LATIN CAPITAL LETTER U WITH OGONEK" },
				1364	{ 0x0173, "uogon", "LATIN SMALL LETTER U WITH OGONEK" },
				1365	{ 0x0174, "Wcirc", "LATIN CAPITAL LETTER W WITH CIRCUMFLEX" },
				1366	{ 0x0175, "wcirc", "LATIN SMALL LETTER W WITH CIRCUMFLEX" },
				1367	{ 0x0176, "Ycirc", "LATIN CAPITAL LETTER Y WITH CIRCUMFLEX" },
				1368	{ 0x0177, "ycirc", "LATIN SMALL LETTER Y WITH CIRCUMFLEX" },
				1369	{ 0x0178, "Yuml", "LATIN CAPITAL LETTER Y WITH DIAERESIS" },
				1370	{ 0x0179, "Zacute", "LATIN CAPITAL LETTER Z WITH ACUTE" },
				1371	{ 0x017A, "zacute", "LATIN SMALL LETTER Z WITH ACUTE" },
				1372	{ 0x017B, "Zdot", "LATIN CAPITAL LETTER Z WITH DOT ABOVE" },
				1373	{ 0x017C, "zdot", "LATIN SMALL LETTER Z WITH DOT ABOVE" },
				1374	{ 0x017D, "Zcaron", "LATIN CAPITAL LETTER Z WITH CARON" },
				1375	{ 0x017E, "zcaron", "LATIN SMALL LETTER Z WITH CARON" },
				1376	{ 0x0192, "fnof", "LATIN SMALL LETTER F WITH HOOK" },
				1377	{ 0x01F5, "gacute", "LATIN SMALL LETTER G WITH ACUTE" },
				1378	{ 0x02C7, "caron", "CARON" },
				1379	{ 0x02D8, "breve", "BREVE" },
				1380	{ 0x02D9, "dot", "DOT ABOVE" },
				1381	{ 0x02DA, "ring", "RING ABOVE" },
				1382	{ 0x02DB, "ogon", "OGONEK" },
				1383	{ 0x02DC, "tilde", "TILDE" },
				1384	{ 0x02DD, "dblac", "DOUBLE ACUTE ACCENT" },
				1385	{ 0x0386, "Aacgr", "GREEK CAPITAL LETTER ALPHA WITH TONOS" },
				1386	{ 0x0388, "Eacgr", "GREEK CAPITAL LETTER EPSILON WITH TONOS" },
				1387	{ 0x0389, "EEacgr", "GREEK CAPITAL LETTER ETA WITH TONOS" },
				1388	{ 0x038A, "Iacgr", "GREEK CAPITAL LETTER IOTA WITH TONOS" },
				1389	{ 0x038C, "Oacgr", "GREEK CAPITAL LETTER OMICRON WITH TONOS" },
				1390	{ 0x038E, "Uacgr", "GREEK CAPITAL LETTER UPSILON WITH TONOS" },
				1391	{ 0x038F, "OHacgr", "GREEK CAPITAL LETTER OMEGA WITH TONOS" },
				1392	{ 0x0390, "idiagr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS" },
				1393	{ 0x0391, "Agr", "GREEK CAPITAL LETTER ALPHA" },
				1394	{ 0x0392, "Bgr", "GREEK CAPITAL LETTER BETA" },
				1395	{ 0x0393, "b.Gamma", "GREEK CAPITAL LETTER GAMMA" },
				1396	{ 0x0393, "Gamma", "GREEK CAPITAL LETTER GAMMA" },
				1397	{ 0x0393, "Ggr", "GREEK CAPITAL LETTER GAMMA" },
				1398	{ 0x0394, "b.Delta", "GREEK CAPITAL LETTER DELTA" },
				1399	{ 0x0394, "Delta", "GREEK CAPITAL LETTER DELTA" },
				1400	{ 0x0394, "Dgr", "GREEK CAPITAL LETTER DELTA" },
				1401	{ 0x0395, "Egr", "GREEK CAPITAL LETTER EPSILON" },
				1402	{ 0x0396, "Zgr", "GREEK CAPITAL LETTER ZETA" },
				1403	{ 0x0397, "EEgr", "GREEK CAPITAL LETTER ETA" },
				1404	{ 0x0398, "b.Theta", "GREEK CAPITAL LETTER THETA" },
				1405	{ 0x0398, "Theta", "GREEK CAPITAL LETTER THETA" },
				1406	{ 0x0398, "THgr", "GREEK CAPITAL LETTER THETA" },
				1407	{ 0x0399, "Igr", "GREEK CAPITAL LETTER IOTA" },
				1408	{ 0x039A, "Kgr", "GREEK CAPITAL LETTER KAPPA" },
				1409	{ 0x039B, "b.Lambda", "GREEK CAPITAL LETTER LAMDA" },
				1410	{ 0x039B, "Lambda", "GREEK CAPITAL LETTER LAMDA" },
				1411	{ 0x039B, "Lgr", "GREEK CAPITAL LETTER LAMDA" },
				1412	{ 0x039C, "Mgr", "GREEK CAPITAL LETTER MU" },
				1413	{ 0x039D, "Ngr", "GREEK CAPITAL LETTER NU" },
				1414	{ 0x039E, "b.Xi", "GREEK CAPITAL LETTER XI" },
				1415	{ 0x039E, "Xgr", "GREEK CAPITAL LETTER XI" },
				1416	{ 0x039E, "Xi", "GREEK CAPITAL LETTER XI" },
				1417	{ 0x039F, "Ogr", "GREEK CAPITAL LETTER OMICRON" },
				1418	{ 0x03A0, "b.Pi", "GREEK CAPITAL LETTER PI" },
				1419	{ 0x03A0, "Pgr", "GREEK CAPITAL LETTER PI" },
				1420	{ 0x03A0, "Pi", "GREEK CAPITAL LETTER PI" },
				1421	{ 0x03A1, "Rgr", "GREEK CAPITAL LETTER RHO" },
				1422	{ 0x03A3, "b.Sigma", "GREEK CAPITAL LETTER SIGMA" },
				1423	{ 0x03A3, "Sgr", "GREEK CAPITAL LETTER SIGMA" },
				1424	{ 0x03A3, "Sigma", "GREEK CAPITAL LETTER SIGMA" },
				1425	{ 0x03A4, "Tgr", "GREEK CAPITAL LETTER TAU" },
				1426	{ 0x03A5, "Ugr", "" },
				1427	{ 0x03A6, "b.Phi", "GREEK CAPITAL LETTER PHI" },
				1428	{ 0x03A6, "PHgr", "GREEK CAPITAL LETTER PHI" },
				1429	{ 0x03A6, "Phi", "GREEK CAPITAL LETTER PHI" },
				1430	{ 0x03A7, "KHgr", "GREEK CAPITAL LETTER CHI" },
				1431	{ 0x03A8, "b.Psi", "GREEK CAPITAL LETTER PSI" },
				1432	{ 0x03A8, "PSgr", "GREEK CAPITAL LETTER PSI" },
				1433	{ 0x03A8, "Psi", "GREEK CAPITAL LETTER PSI" },
				1434	{ 0x03A9, "b.Omega", "GREEK CAPITAL LETTER OMEGA" },
				1435	{ 0x03A9, "OHgr", "GREEK CAPITAL LETTER OMEGA" },
				1436	{ 0x03A9, "Omega", "GREEK CAPITAL LETTER OMEGA" },
				1437	{ 0x03AA, "Idigr", "GREEK CAPITAL LETTER IOTA WITH DIALYTIKA" },
				1438	{ 0x03AB, "Udigr", "GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA" },
				1439	{ 0x03AC, "aacgr", "GREEK SMALL LETTER ALPHA WITH TONOS" },
				1440	{ 0x03AD, "eacgr", "GREEK SMALL LETTER EPSILON WITH TONOS" },
				1441	{ 0x03AE, "eeacgr", "GREEK SMALL LETTER ETA WITH TONOS" },
				1442	{ 0x03AF, "iacgr", "GREEK SMALL LETTER IOTA WITH TONOS" },
				1443	{ 0x03B0, "udiagr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS" },
				1444	{ 0x03B1, "agr", "" },
				1445	{ 0x03B1, "alpha", "" },
				1446	{ 0x03B1, "b.alpha", "" },
				1447	{ 0x03B2, "b.beta", "GREEK SMALL LETTER BETA" },
				1448	{ 0x03B2, "beta", "GREEK SMALL LETTER BETA" },
				1449	{ 0x03B2, "bgr", "GREEK SMALL LETTER BETA" },
				1450	{ 0x03B3, "b.gamma", "GREEK SMALL LETTER GAMMA" },
				1451	{ 0x03B3, "gamma", "GREEK SMALL LETTER GAMMA" },
				1452	{ 0x03B3, "ggr", "GREEK SMALL LETTER GAMMA" },
				1453	{ 0x03B4, "b.delta", "GREEK SMALL LETTER DELTA" },
				1454	{ 0x03B4, "delta", "GREEK SMALL LETTER DELTA" },
				1455	{ 0x03B4, "dgr", "GREEK SMALL LETTER DELTA" },
				1456	{ 0x03B5, "b.epsi", "" },
				1457	{ 0x03B5, "b.epsis", "" },
				1458	{ 0x03B5, "b.epsiv", "" },
				1459	{ 0x03B5, "egr", "" },
				1460	{ 0x03B5, "epsiv", "" },
				1461	{ 0x03B6, "b.zeta", "GREEK SMALL LETTER ZETA" },
				1462	{ 0x03B6, "zeta", "GREEK SMALL LETTER ZETA" },
				1463	{ 0x03B6, "zgr", "GREEK SMALL LETTER ZETA" },
				1464	{ 0x03B7, "b.eta", "GREEK SMALL LETTER ETA" },
				1465	{ 0x03B7, "eegr", "GREEK SMALL LETTER ETA" },
				1466	{ 0x03B7, "eta", "GREEK SMALL LETTER ETA" },
				1467	{ 0x03B8, "b.thetas", "" },
				1468	{ 0x03B8, "thetas", "" },
				1469	{ 0x03B8, "thgr", "" },
				1470	{ 0x03B9, "b.iota", "GREEK SMALL LETTER IOTA" },
				1471	{ 0x03B9, "igr", "GREEK SMALL LETTER IOTA" },
				1472	{ 0x03B9, "iota", "GREEK SMALL LETTER IOTA" },
				1473	{ 0x03BA, "b.kappa", "GREEK SMALL LETTER KAPPA" },
				1474	{ 0x03BA, "kappa", "GREEK SMALL LETTER KAPPA" },
				1475	{ 0x03BA, "kgr", "GREEK SMALL LETTER KAPPA" },
				1476	{ 0x03BB, "b.lambda", "GREEK SMALL LETTER LAMDA" },
				1477	{ 0x03BB, "lambda", "GREEK SMALL LETTER LAMDA" },
				1478	{ 0x03BB, "lgr", "GREEK SMALL LETTER LAMDA" },
				1479	{ 0x03BC, "b.mu", "GREEK SMALL LETTER MU" },
				1480	{ 0x03BC, "mgr", "GREEK SMALL LETTER MU" },
				1481	{ 0x03BC, "mu", "GREEK SMALL LETTER MU" },
				1482	{ 0x03BD, "b.nu", "GREEK SMALL LETTER NU" },
				1483	{ 0x03BD, "ngr", "GREEK SMALL LETTER NU" },
				1484	{ 0x03BD, "nu", "GREEK SMALL LETTER NU" },
				1485	{ 0x03BE, "b.xi", "GREEK SMALL LETTER XI" },
				1486	{ 0x03BE, "xgr", "GREEK SMALL LETTER XI" },
				1487	{ 0x03BE, "xi", "GREEK SMALL LETTER XI" },
				1488	{ 0x03BF, "ogr", "GREEK SMALL LETTER OMICRON" },
				1489	{ 0x03C0, "b.pi", "GREEK SMALL LETTER PI" },
				1490	{ 0x03C0, "pgr", "GREEK SMALL LETTER PI" },
				1491	{ 0x03C0, "pi", "GREEK SMALL LETTER PI" },
				1492	{ 0x03C1, "b.rho", "GREEK SMALL LETTER RHO" },
				1493	{ 0x03C1, "rgr", "GREEK SMALL LETTER RHO" },
				1494	{ 0x03C1, "rho", "GREEK SMALL LETTER RHO" },
				1495	{ 0x03C2, "b.sigmav", "" },
				1496	{ 0x03C2, "sfgr", "" },
				1497	{ 0x03C2, "sigmav", "" },
				1498	{ 0x03C3, "b.sigma", "GREEK SMALL LETTER SIGMA" },
				1499	{ 0x03C3, "sgr", "GREEK SMALL LETTER SIGMA" },
				1500	{ 0x03C3, "sigma", "GREEK SMALL LETTER SIGMA" },
				1501	{ 0x03C4, "b.tau", "GREEK SMALL LETTER TAU" },
				1502	{ 0x03C4, "tau", "GREEK SMALL LETTER TAU" },
				1503	{ 0x03C4, "tgr", "GREEK SMALL LETTER TAU" },
				1504	{ 0x03C5, "b.upsi", "GREEK SMALL LETTER UPSILON" },
				1505	{ 0x03C5, "ugr", "GREEK SMALL LETTER UPSILON" },
				1506	{ 0x03C5, "upsi", "GREEK SMALL LETTER UPSILON" },
				1507	{ 0x03C6, "b.phis", "GREEK SMALL LETTER PHI" },
				1508	{ 0x03C6, "phgr", "GREEK SMALL LETTER PHI" },
				1509	{ 0x03C6, "phis", "GREEK SMALL LETTER PHI" },
				1510	{ 0x03C7, "b.chi", "GREEK SMALL LETTER CHI" },
				1511	{ 0x03C7, "chi", "GREEK SMALL LETTER CHI" },
				1512	{ 0x03C7, "khgr", "GREEK SMALL LETTER CHI" },
				1513	{ 0x03C8, "b.psi", "GREEK SMALL LETTER PSI" },
				1514	{ 0x03C8, "psgr", "GREEK SMALL LETTER PSI" },
				1515	{ 0x03C8, "psi", "GREEK SMALL LETTER PSI" },
				1516	{ 0x03C9, "b.omega", "GREEK SMALL LETTER OMEGA" },
				1517	{ 0x03C9, "ohgr", "GREEK SMALL LETTER OMEGA" },
				1518	{ 0x03C9, "omega", "GREEK SMALL LETTER OMEGA" },
				1519	{ 0x03CA, "idigr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA" },
				1520	{ 0x03CB, "udigr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA" },
				1521	{ 0x03CC, "oacgr", "GREEK SMALL LETTER OMICRON WITH TONOS" },
				1522	{ 0x03CD, "uacgr", "GREEK SMALL LETTER UPSILON WITH TONOS" },
				1523	{ 0x03CE, "ohacgr", "GREEK SMALL LETTER OMEGA WITH TONOS" },
				1524	{ 0x03D1, "b.thetav", "" },
				1525	{ 0x03D1, "thetav", "" },
				1526	{ 0x03D2, "b.Upsi", "" },
				1527	{ 0x03D2, "Upsi", "" },
				1528	{ 0x03D5, "b.phiv", "GREEK PHI SYMBOL" },
				1529	{ 0x03D5, "phiv", "GREEK PHI SYMBOL" },
				1530	{ 0x03D6, "b.piv", "GREEK PI SYMBOL" },
				1531	{ 0x03D6, "piv", "GREEK PI SYMBOL" },
				1532	{ 0x03DC, "b.gammad", "GREEK LETTER DIGAMMA" },
				1533	{ 0x03DC, "gammad", "GREEK LETTER DIGAMMA" },
				1534	{ 0x03F0, "b.kappav", "GREEK KAPPA SYMBOL" },
				1535	{ 0x03F0, "kappav", "GREEK KAPPA SYMBOL" },
				1536	{ 0x03F1, "b.rhov", "GREEK RHO SYMBOL" },
				1537	{ 0x03F1, "rhov", "GREEK RHO SYMBOL" },
				1538	{ 0x0401, "IOcy", "CYRILLIC CAPITAL LETTER IO" },
				1539	{ 0x0402, "DJcy", "CYRILLIC CAPITAL LETTER DJE" },
				1540	{ 0x0403, "GJcy", "CYRILLIC CAPITAL LETTER GJE" },
				1541	{ 0x0404, "Jukcy", "CYRILLIC CAPITAL LETTER UKRAINIAN IE" },
				1542	{ 0x0405, "DScy", "CYRILLIC CAPITAL LETTER DZE" },
				1543	{ 0x0406, "Iukcy", "CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I" },
				1544	{ 0x0407, "YIcy", "CYRILLIC CAPITAL LETTER YI" },
				1545	{ 0x0408, "Jsercy", "CYRILLIC CAPITAL LETTER JE" },
				1546	{ 0x0409, "LJcy", "CYRILLIC CAPITAL LETTER LJE" },
				1547	{ 0x040A, "NJcy", "CYRILLIC CAPITAL LETTER NJE" },
				1548	{ 0x040B, "TSHcy", "CYRILLIC CAPITAL LETTER TSHE" },
				1549	{ 0x040C, "KJcy", "CYRILLIC CAPITAL LETTER KJE" },
				1550	{ 0x040E, "Ubrcy", "CYRILLIC CAPITAL LETTER SHORT U" },
				1551	{ 0x040F, "DZcy", "CYRILLIC CAPITAL LETTER DZHE" },
				1552	{ 0x0410, "Acy", "CYRILLIC CAPITAL LETTER A" },
				1553	{ 0x0411, "Bcy", "CYRILLIC CAPITAL LETTER BE" },
				1554	{ 0x0412, "Vcy", "CYRILLIC CAPITAL LETTER VE" },
				1555	{ 0x0413, "Gcy", "CYRILLIC CAPITAL LETTER GHE" },
				1556	{ 0x0414, "Dcy", "CYRILLIC CAPITAL LETTER DE" },
				1557	{ 0x0415, "IEcy", "CYRILLIC CAPITAL LETTER IE" },
				1558	{ 0x0416, "ZHcy", "CYRILLIC CAPITAL LETTER ZHE" },
				1559	{ 0x0417, "Zcy", "CYRILLIC CAPITAL LETTER ZE" },
				1560	{ 0x0418, "Icy", "CYRILLIC CAPITAL LETTER I" },
				1561	{ 0x0419, "Jcy", "CYRILLIC CAPITAL LETTER SHORT I" },
				1562	{ 0x041A, "Kcy", "CYRILLIC CAPITAL LETTER KA" },
				1563	{ 0x041B, "Lcy", "CYRILLIC CAPITAL LETTER EL" },
				1564	{ 0x041C, "Mcy", "CYRILLIC CAPITAL LETTER EM" },
				1565	{ 0x041D, "Ncy", "CYRILLIC CAPITAL LETTER EN" },
				1566	{ 0x041E, "Ocy", "CYRILLIC CAPITAL LETTER O" },
				1567	{ 0x041F, "Pcy", "CYRILLIC CAPITAL LETTER PE" },
				1568	{ 0x0420, "Rcy", "CYRILLIC CAPITAL LETTER ER" },
				1569	{ 0x0421, "Scy", "CYRILLIC CAPITAL LETTER ES" },
				1570	{ 0x0422, "Tcy", "CYRILLIC CAPITAL LETTER TE" },
				1571	{ 0x0423, "Ucy", "CYRILLIC CAPITAL LETTER U" },
				1572	{ 0x0424, "Fcy", "CYRILLIC CAPITAL LETTER EF" },
				1573	{ 0x0425, "KHcy", "CYRILLIC CAPITAL LETTER HA" },
				1574	{ 0x0426, "TScy", "CYRILLIC CAPITAL LETTER TSE" },
				1575	{ 0x0427, "CHcy", "CYRILLIC CAPITAL LETTER CHE" },
				1576	{ 0x0428, "SHcy", "CYRILLIC CAPITAL LETTER SHA" },
				1577	{ 0x0429, "SHCHcy", "CYRILLIC CAPITAL LETTER SHCHA" },
				1578	{ 0x042A, "HARDcy", "CYRILLIC CAPITAL LETTER HARD SIGN" },
				1579	{ 0x042B, "Ycy", "CYRILLIC CAPITAL LETTER YERU" },
				1580	{ 0x042C, "SOFTcy", "CYRILLIC CAPITAL LETTER SOFT SIGN" },
				1581	{ 0x042D, "Ecy", "CYRILLIC CAPITAL LETTER E" },
				1582	{ 0x042E, "YUcy", "CYRILLIC CAPITAL LETTER YU" },
				1583	{ 0x042F, "YAcy", "CYRILLIC CAPITAL LETTER YA" },
				1584	{ 0x0430, "acy", "CYRILLIC SMALL LETTER A" },
				1585	{ 0x0431, "bcy", "CYRILLIC SMALL LETTER BE" },
				1586	{ 0x0432, "vcy", "CYRILLIC SMALL LETTER VE" },
				1587	{ 0x0433, "gcy", "CYRILLIC SMALL LETTER GHE" },
				1588	{ 0x0434, "dcy", "CYRILLIC SMALL LETTER DE" },
				1589	{ 0x0435, "iecy", "CYRILLIC SMALL LETTER IE" },
				1590	{ 0x0436, "zhcy", "CYRILLIC SMALL LETTER ZHE" },
				1591	{ 0x0437, "zcy", "CYRILLIC SMALL LETTER ZE" },
				1592	{ 0x0438, "icy", "CYRILLIC SMALL LETTER I" },
				1593	{ 0x0439, "jcy", "CYRILLIC SMALL LETTER SHORT I" },
				1594	{ 0x043A, "kcy", "CYRILLIC SMALL LETTER KA" },
				1595	{ 0x043B, "lcy", "CYRILLIC SMALL LETTER EL" },
				1596	{ 0x043C, "mcy", "CYRILLIC SMALL LETTER EM" },
				1597	{ 0x043D, "ncy", "CYRILLIC SMALL LETTER EN" },
				1598	{ 0x043E, "ocy", "CYRILLIC SMALL LETTER O" },
				1599	{ 0x043F, "pcy", "CYRILLIC SMALL LETTER PE" },
				1600	{ 0x0440, "rcy", "CYRILLIC SMALL LETTER ER" },
				1601	{ 0x0441, "scy", "CYRILLIC SMALL LETTER ES" },
				1602	{ 0x0442, "tcy", "CYRILLIC SMALL LETTER TE" },
				1603	{ 0x0443, "ucy", "CYRILLIC SMALL LETTER U" },
				1604	{ 0x0444, "fcy", "CYRILLIC SMALL LETTER EF" },
				1605	{ 0x0445, "khcy", "CYRILLIC SMALL LETTER HA" },
				1606	{ 0x0446, "tscy", "CYRILLIC SMALL LETTER TSE" },
				1607	{ 0x0447, "chcy", "CYRILLIC SMALL LETTER CHE" },
				1608	{ 0x0448, "shcy", "CYRILLIC SMALL LETTER SHA" },
				1609	{ 0x0449, "shchcy", "CYRILLIC SMALL LETTER SHCHA" },
				1610	{ 0x044A, "hardcy", "CYRILLIC SMALL LETTER HARD SIGN" },
				1611	{ 0x044B, "ycy", "CYRILLIC SMALL LETTER YERU" },
				1612	{ 0x044C, "softcy", "CYRILLIC SMALL LETTER SOFT SIGN" },
				1613	{ 0x044D, "ecy", "CYRILLIC SMALL LETTER E" },
				1614	{ 0x044E, "yucy", "CYRILLIC SMALL LETTER YU" },
				1615	{ 0x044F, "yacy", "CYRILLIC SMALL LETTER YA" },
				1616	{ 0x0451, "iocy", "CYRILLIC SMALL LETTER IO" },
				1617	{ 0x0452, "djcy", "CYRILLIC SMALL LETTER DJE" },
				1618	{ 0x0453, "gjcy", "CYRILLIC SMALL LETTER GJE" },
				1619	{ 0x0454, "jukcy", "CYRILLIC SMALL LETTER UKRAINIAN IE" },
				1620	{ 0x0455, "dscy", "CYRILLIC SMALL LETTER DZE" },
				1621	{ 0x0456, "iukcy", "CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I" },
				1622	{ 0x0457, "yicy", "CYRILLIC SMALL LETTER YI" },
				1623	{ 0x0458, "jsercy", "CYRILLIC SMALL LETTER JE" },
				1624	{ 0x0459, "ljcy", "CYRILLIC SMALL LETTER LJE" },
				1625	{ 0x045A, "njcy", "CYRILLIC SMALL LETTER NJE" },
				1626	{ 0x045B, "tshcy", "CYRILLIC SMALL LETTER TSHE" },
				1627	{ 0x045C, "kjcy", "CYRILLIC SMALL LETTER KJE" },
				1628	{ 0x045E, "ubrcy", "CYRILLIC SMALL LETTER SHORT U" },
				1629	{ 0x045F, "dzcy", "CYRILLIC SMALL LETTER DZHE" },
				1630	{ 0x2002, "ensp", "EN SPACE" },
				1631	{ 0x2003, "emsp", "EM SPACE" },
				1632	{ 0x2004, "emsp13", "THREE-PER-EM SPACE" },
				1633	{ 0x2005, "emsp14", "FOUR-PER-EM SPACE" },
				1634	{ 0x2007, "numsp", "FIGURE SPACE" },
				1635	{ 0x2008, "puncsp", "PUNCTUATION SPACE" },
				1636	{ 0x2009, "thinsp", "THIN SPACE" },
				1637	{ 0x200A, "hairsp", "HAIR SPACE" },
				1638	{ 0x2010, "dash", "HYPHEN" },
				1639	{ 0x2013, "ndash", "EN DASH" },
				1640	{ 0x2014, "mdash", "EM DASH" },
				1641	{ 0x2015, "horbar", "HORIZONTAL BAR" },
				1642	{ 0x2016, "Verbar", "DOUBLE VERTICAL LINE" },
				1643	{ 0x2018, "lsquo", "" },
				1644	{ 0x2018, "rsquor", "" },
				1645	{ 0x2019, "rsquo", "RIGHT SINGLE QUOTATION MARK" },
				1646	{ 0x201A, "lsquor", "SINGLE LOW-9 QUOTATION MARK" },
				1647	{ 0x201C, "ldquo", "" },
				1648	{ 0x201C, "rdquor", "" },
				1649	{ 0x201D, "rdquo", "RIGHT DOUBLE QUOTATION MARK" },
				1650	{ 0x201E, "ldquor", "DOUBLE LOW-9 QUOTATION MARK" },
				1651	{ 0x2020, "dagger", "DAGGER" },
				1652	{ 0x2021, "Dagger", "DOUBLE DAGGER" },
				1653	{ 0x2022, "bull", "BULLET" },
				1654	{ 0x2025, "nldr", "TWO DOT LEADER" },
				1655	{ 0x2026, "hellip", "HORIZONTAL ELLIPSIS" },
				1656	{ 0x2026, "mldr", "HORIZONTAL ELLIPSIS" },
				1657	{ 0x2030, "permil", "PER MILLE SIGN" },
				1658	{ 0x2032, "prime", "PRIME" },
				1659	{ 0x2032, "vprime", "PRIME" },
				1660	{ 0x2033, "Prime", "DOUBLE PRIME" },
				1661	{ 0x2034, "tprime", "TRIPLE PRIME" },
				1662	{ 0x2035, "bprime", "REVERSED PRIME" },
				1663	{ 0x2041, "caret", "CARET" },
				1664	{ 0x2043, "hybull", "HYPHEN BULLET" },
				1665	{ 0x20DB, "tdot", "COMBINING THREE DOTS ABOVE" },
				1666	{ 0x20DC, "DotDot", "COMBINING FOUR DOTS ABOVE" },
				1667	{ 0x2105, "incare", "CARE OF" },
				1668	{ 0x210B, "hamilt", "SCRIPT CAPITAL H" },
				1669	{ 0x210F, "planck", "PLANCK CONSTANT OVER TWO PI" },
				1670	{ 0x2111, "image", "BLACK-LETTER CAPITAL I" },
				1671	{ 0x2112, "lagran", "SCRIPT CAPITAL L" },
				1672	{ 0x2113, "ell", "SCRIPT SMALL L" },
				1673	{ 0x2116, "numero", "NUMERO SIGN" },
				1674	{ 0x2117, "copysr", "SOUND RECORDING COPYRIGHT" },
				1675	{ 0x2118, "weierp", "SCRIPT CAPITAL P" },
				1676	{ 0x211C, "real", "BLACK-LETTER CAPITAL R" },
				1677	{ 0x211E, "rx", "PRESCRIPTION TAKE" },
				1678	{ 0x2122, "trade", "TRADE MARK SIGN" },
				1679	{ 0x2126, "ohm", "OHM SIGN" },
				1680	{ 0x212B, "angst", "ANGSTROM SIGN" },
				1681	{ 0x212C, "bernou", "SCRIPT CAPITAL B" },
				1682	{ 0x2133, "phmmat", "SCRIPT CAPITAL M" },
				1683	{ 0x2134, "order", "SCRIPT SMALL O" },
				1684	{ 0x2135, "aleph", "ALEF SYMBOL" },
				1685	{ 0x2136, "beth", "BET SYMBOL" },
				1686	{ 0x2137, "gimel", "GIMEL SYMBOL" },
				1687	{ 0x2138, "daleth", "DALET SYMBOL" },
				1688	{ 0x2153, "frac13", "VULGAR FRACTION ONE THIRD" },
				1689	{ 0x2154, "frac23", "VULGAR FRACTION TWO THIRDS" },
				1690	{ 0x2155, "frac15", "VULGAR FRACTION ONE FIFTH" },
				1691	{ 0x2156, "frac25", "VULGAR FRACTION TWO FIFTHS" },
				1692	{ 0x2157, "frac35", "VULGAR FRACTION THREE FIFTHS" },
				1693	{ 0x2158, "frac45", "VULGAR FRACTION FOUR FIFTHS" },
				1694	{ 0x2159, "frac16", "VULGAR FRACTION ONE SIXTH" },
				1695	{ 0x215A, "frac56", "VULGAR FRACTION FIVE SIXTHS" },
				1696	{ 0x215B, "frac18", "" },
				1697	{ 0x215C, "frac38", "" },
				1698	{ 0x215D, "frac58", "" },
				1699	{ 0x215E, "frac78", "" },
				1700	{ 0x2190, "larr", "LEFTWARDS DOUBLE ARROW" },
				1701	{ 0x2191, "uarr", "UPWARDS ARROW" },
				1702	{ 0x2192, "rarr", "RIGHTWARDS DOUBLE ARROW" },
				1703	{ 0x2193, "darr", "DOWNWARDS ARROW" },
				1704	{ 0x2194, "harr", "LEFT RIGHT ARROW" },
				1705	{ 0x2194, "xhArr", "LEFT RIGHT ARROW" },
				1706	{ 0x2194, "xharr", "LEFT RIGHT ARROW" },
				1707	{ 0x2195, "varr", "UP DOWN ARROW" },
				1708	{ 0x2196, "nwarr", "NORTH WEST ARROW" },
				1709	{ 0x2197, "nearr", "NORTH EAST ARROW" },
				1710	{ 0x2198, "drarr", "SOUTH EAST ARROW" },
				1711	{ 0x2199, "dlarr", "SOUTH WEST ARROW" },
				1712	{ 0x219A, "nlarr", "LEFTWARDS ARROW WITH STROKE" },
				1713	{ 0x219B, "nrarr", "RIGHTWARDS ARROW WITH STROKE" },
				1714	{ 0x219D, "rarrw", "RIGHTWARDS SQUIGGLE ARROW" },
				1715	{ 0x219E, "Larr", "LEFTWARDS TWO HEADED ARROW" },
				1716	{ 0x21A0, "Rarr", "RIGHTWARDS TWO HEADED ARROW" },
				1717	{ 0x21A2, "larrtl", "LEFTWARDS ARROW WITH TAIL" },
				1718	{ 0x21A3, "rarrtl", "RIGHTWARDS ARROW WITH TAIL" },
				1719	{ 0x21A6, "map", "RIGHTWARDS ARROW FROM BAR" },
				1720	{ 0x21A9, "larrhk", "LEFTWARDS ARROW WITH HOOK" },
				1721	{ 0x21AA, "rarrhk", "RIGHTWARDS ARROW WITH HOOK" },
				1722	{ 0x21AB, "larrlp", "LEFTWARDS ARROW WITH LOOP" },
				1723	{ 0x21AC, "rarrlp", "RIGHTWARDS ARROW WITH LOOP" },
				1724	{ 0x21AD, "harrw", "LEFT RIGHT WAVE ARROW" },
				1725	{ 0x21AE, "nharr", "LEFT RIGHT ARROW WITH STROKE" },
				1726	{ 0x21B0, "lsh", "UPWARDS ARROW WITH TIP LEFTWARDS" },
				1727	{ 0x21B1, "rsh", "UPWARDS ARROW WITH TIP RIGHTWARDS" },
				1728	{ 0x21B6, "cularr", "ANTICLOCKWISE TOP SEMICIRCLE ARROW" },
				1729	{ 0x21B7, "curarr", "CLOCKWISE TOP SEMICIRCLE ARROW" },
				1730	{ 0x21BA, "olarr", "ANTICLOCKWISE OPEN CIRCLE ARROW" },
				1731	{ 0x21BB, "orarr", "CLOCKWISE OPEN CIRCLE ARROW" },
				1732	{ 0x21BC, "lharu", "LEFTWARDS HARPOON WITH BARB UPWARDS" },
				1733	{ 0x21BD, "lhard", "LEFTWARDS HARPOON WITH BARB DOWNWARDS" },
				1734	{ 0x21BE, "uharr", "UPWARDS HARPOON WITH BARB RIGHTWARDS" },
				1735	{ 0x21BF, "uharl", "UPWARDS HARPOON WITH BARB LEFTWARDS" },
				1736	{ 0x21C0, "rharu", "RIGHTWARDS HARPOON WITH BARB UPWARDS" },
				1737	{ 0x21C1, "rhard", "RIGHTWARDS HARPOON WITH BARB DOWNWARDS" },
				1738	{ 0x21C2, "dharr", "DOWNWARDS HARPOON WITH BARB RIGHTWARDS" },
				1739	{ 0x21C3, "dharl", "DOWNWARDS HARPOON WITH BARB LEFTWARDS" },
				1740	{ 0x21C4, "rlarr2", "RIGHTWARDS ARROW OVER LEFTWARDS ARROW" },
				1741	{ 0x21C6, "lrarr2", "LEFTWARDS ARROW OVER RIGHTWARDS ARROW" },
				1742	{ 0x21C7, "larr2", "LEFTWARDS PAIRED ARROWS" },
				1743	{ 0x21C8, "uarr2", "UPWARDS PAIRED ARROWS" },
				1744	{ 0x21C9, "rarr2", "RIGHTWARDS PAIRED ARROWS" },
				1745	{ 0x21CA, "darr2", "DOWNWARDS PAIRED ARROWS" },
				1746	{ 0x21CB, "lrhar2", "LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON" },
				1747	{ 0x21CC, "rlhar2", "RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON" },
				1748	{ 0x21CD, "nlArr", "LEFTWARDS DOUBLE ARROW WITH STROKE" },
				1749	{ 0x21CE, "nhArr", "LEFT RIGHT DOUBLE ARROW WITH STROKE" },
				1750	{ 0x21CF, "nrArr", "RIGHTWARDS DOUBLE ARROW WITH STROKE" },
				1751	{ 0x21D0, "lArr", "LEFTWARDS ARROW" },
				1752	{ 0x21D0, "xlArr", "LEFTWARDS DOUBLE ARROW" },
				1753	{ 0x21D1, "uArr", "UPWARDS DOUBLE ARROW" },
				1754	{ 0x21D2, "rArr", "RIGHTWARDS ARROW" },
				1755	{ 0x21D2, "xrArr", "RIGHTWARDS DOUBLE ARROW" },
				1756	{ 0x21D3, "dArr", "DOWNWARDS DOUBLE ARROW" },
				1757	{ 0x21D4, "hArr", "" },
				1758	{ 0x21D4, "iff", "LEFT RIGHT DOUBLE ARROW" },
				1759	{ 0x21D5, "vArr", "UP DOWN DOUBLE ARROW" },
				1760	{ 0x21DA, "lAarr", "LEFTWARDS TRIPLE ARROW" },
				1761	{ 0x21DB, "rAarr", "RIGHTWARDS TRIPLE ARROW" },
				1762	{ 0x2200, "forall", "" },
				1763	{ 0x2201, "comp", "COMPLEMENT" },
				1764	{ 0x2202, "part", "" },
				1765	{ 0x2203, "exist", "" },
				1766	{ 0x2204, "nexist", "THERE DOES NOT EXIST" },
				1767	{ 0x2205, "empty", "" },
				1768	{ 0x2207, "nabla", "NABLA" },
				1769	{ 0x2209, "notin", "" },
				1770	{ 0x220A, "epsi", "" },
				1771	{ 0x220A, "epsis", "" },
				1772	{ 0x220A, "isin", "" },
				1773	{ 0x220D, "bepsi", "SMALL CONTAINS AS MEMBER" },
				1774	{ 0x220D, "ni", "" },
				1775	{ 0x220F, "prod", "N-ARY PRODUCT" },
				1776	{ 0x2210, "amalg", "N-ARY COPRODUCT" },
				1777	{ 0x2210, "coprod", "N-ARY COPRODUCT" },
				1778	{ 0x2210, "samalg", "" },
				1779	{ 0x2211, "sum", "N-ARY SUMMATION" },
				1780	{ 0x2212, "minus", "MINUS SIGN" },
				1781	{ 0x2213, "mnplus", "" },
				1782	{ 0x2214, "plusdo", "DOT PLUS" },
				1783	{ 0x2216, "setmn", "SET MINUS" },
				1784	{ 0x2216, "ssetmn", "SET MINUS" },
				1785	{ 0x2217, "lowast", "ASTERISK OPERATOR" },
				1786	{ 0x2218, "compfn", "RING OPERATOR" },
				1787	{ 0x221A, "radic", "" },
				1788	{ 0x221D, "prop", "" },
				1789	{ 0x221D, "vprop", "" },
				1790	{ 0x221E, "infin", "" },
				1791	{ 0x221F, "ang90", "RIGHT ANGLE" },
				1792	{ 0x2220, "ang", "ANGLE" },
				1793	{ 0x2221, "angmsd", "MEASURED ANGLE" },
				1794	{ 0x2222, "angsph", "" },
				1795	{ 0x2223, "mid", "" },
				1796	{ 0x2224, "nmid", "DOES NOT DIVIDE" },
				1797	{ 0x2225, "par", "PARALLEL TO" },
				1798	{ 0x2225, "spar", "PARALLEL TO" },
				1799	{ 0x2226, "npar", "NOT PARALLEL TO" },
				1800	{ 0x2226, "nspar", "NOT PARALLEL TO" },
				1801	{ 0x2227, "and", "" },
				1802	{ 0x2228, "or", "" },
				1803	{ 0x2229, "cap", "" },
				1804	{ 0x222A, "cup", "" },
				1805	{ 0x222B, "int", "" },
				1806	{ 0x222E, "conint", "" },
				1807	{ 0x2234, "there4", "" },
				1808	{ 0x2235, "becaus", "BECAUSE" },
				1809	{ 0x223C, "sim", "" },
				1810	{ 0x223C, "thksim", "TILDE OPERATOR" },
				1811	{ 0x223D, "bsim", "" },
				1812	{ 0x2240, "wreath", "WREATH PRODUCT" },
				1813	{ 0x2241, "nsim", "" },
				1814	{ 0x2243, "sime", "" },
				1815	{ 0x2244, "nsime", "" },
				1816	{ 0x2245, "cong", "" },
				1817	{ 0x2247, "ncong", "NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO" },
				1818	{ 0x2248, "ap", "" },
				1819	{ 0x2248, "thkap", "ALMOST EQUAL TO" },
				1820	{ 0x2249, "nap", "NOT ALMOST EQUAL TO" },
				1821	{ 0x224A, "ape", "" },
				1822	{ 0x224C, "bcong", "ALL EQUAL TO" },
				1823	{ 0x224D, "asymp", "EQUIVALENT TO" },
				1824	{ 0x224E, "bump", "" },
				1825	{ 0x224F, "bumpe", "" },
				1826	{ 0x2250, "esdot", "" },
				1827	{ 0x2251, "eDot", "" },
				1828	{ 0x2252, "efDot", "" },
				1829	{ 0x2253, "erDot", "" },
				1830	{ 0x2254, "colone", "" },
				1831	{ 0x2255, "ecolon", "" },
				1832	{ 0x2256, "ecir", "" },
				1833	{ 0x2257, "cire", "" },
				1834	{ 0x2259, "wedgeq", "ESTIMATES" },
				1835	{ 0x225C, "trie", "" },
				1836	{ 0x2260, "ne", "" },
				1837	{ 0x2261, "equiv", "" },
				1838	{ 0x2262, "nequiv", "NOT IDENTICAL TO" },
				1839	{ 0x2264, "le", "" },
				1840	{ 0x2264, "les", "LESS-THAN OR EQUAL TO" },
				1841	{ 0x2265, "ge", "GREATER-THAN OR EQUAL TO" },
				1842	{ 0x2265, "ges", "GREATER-THAN OR EQUAL TO" },
				1843	{ 0x2266, "lE", "" },
				1844	{ 0x2267, "gE", "" },
				1845	{ 0x2268, "lnE", "" },
				1846	{ 0x2268, "lne", "" },
				1847	{ 0x2268, "lvnE", "LESS-THAN BUT NOT EQUAL TO" },
				1848	{ 0x2269, "gnE", "" },
				1849	{ 0x2269, "gne", "" },
				1850	{ 0x2269, "gvnE", "GREATER-THAN BUT NOT EQUAL TO" },
				1851	{ 0x226A, "Lt", "MUCH LESS-THAN" },
				1852	{ 0x226B, "Gt", "MUCH GREATER-THAN" },
				1853	{ 0x226C, "twixt", "BETWEEN" },
				1854	{ 0x226E, "nlt", "NOT LESS-THAN" },
				1855	{ 0x226F, "ngt", "NOT GREATER-THAN" },
				1856	{ 0x2270, "nlE", "" },
				1857	{ 0x2270, "nle", "NEITHER LESS-THAN NOR EQUAL TO" },
				1858	{ 0x2270, "nles", "" },
				1859	{ 0x2271, "ngE", "" },
				1860	{ 0x2271, "nge", "NEITHER GREATER-THAN NOR EQUAL TO" },
				1861	{ 0x2271, "nges", "" },
				1862	{ 0x2272, "lap", "LESS-THAN OR EQUIVALENT TO" },
				1863	{ 0x2272, "lsim", "LESS-THAN OR EQUIVALENT TO" },
				1864	{ 0x2273, "gap", "GREATER-THAN OR EQUIVALENT TO" },
				1865	{ 0x2273, "gsim", "GREATER-THAN OR EQUIVALENT TO" },
				1866	{ 0x2276, "lg", "LESS-THAN OR GREATER-THAN" },
				1867	{ 0x2277, "gl", "" },
				1868	{ 0x227A, "pr", "" },
				1869	{ 0x227B, "sc", "" },
				1870	{ 0x227C, "cupre", "" },
				1871	{ 0x227C, "pre", "" },
				1872	{ 0x227D, "sccue", "" },
				1873	{ 0x227D, "sce", "" },
				1874	{ 0x227E, "prap", "" },
				1875	{ 0x227E, "prsim", "" },
				1876	{ 0x227F, "scap", "" },
				1877	{ 0x227F, "scsim", "" },
				1878	{ 0x2280, "npr", "DOES NOT PRECEDE" },
				1879	{ 0x2281, "nsc", "DOES NOT SUCCEED" },
				1880	{ 0x2282, "sub", "" },
				1881	{ 0x2283, "sup", "" },
				1882	{ 0x2284, "nsub", "NOT A SUBSET OF" },
				1883	{ 0x2285, "nsup", "NOT A SUPERSET OF" },
				1884	{ 0x2286, "subE", "" },
				1885	{ 0x2286, "sube", "" },
				1886	{ 0x2287, "supE", "" },
				1887	{ 0x2287, "supe", "" },
				1888	{ 0x2288, "nsubE", "" },
				1889	{ 0x2288, "nsube", "" },
				1890	{ 0x2289, "nsupE", "" },
				1891	{ 0x2289, "nsupe", "" },
				1892	{ 0x228A, "subne", "" },
				1893	{ 0x228A, "subnE", "SUBSET OF WITH NOT EQUAL TO" },
				1894	{ 0x228A, "vsubne", "SUBSET OF WITH NOT EQUAL TO" },
				1895	{ 0x228B, "supnE", "" },
				1896	{ 0x228B, "supne", "" },
				1897	{ 0x228B, "vsupnE", "SUPERSET OF WITH NOT EQUAL TO" },
				1898	{ 0x228B, "vsupne", "SUPERSET OF WITH NOT EQUAL TO" },
				1899	{ 0x228E, "uplus", "MULTISET UNION" },
				1900	{ 0x228F, "sqsub", "" },
				1901	{ 0x2290, "sqsup", "" },
				1902	{ 0x2291, "sqsube", "" },
				1903	{ 0x2292, "sqsupe", "" },
				1904	{ 0x2293, "sqcap", "SQUARE CAP" },
				1905	{ 0x2294, "sqcup", "SQUARE CUP" },
				1906	{ 0x2295, "oplus", "CIRCLED PLUS" },
				1907	{ 0x2296, "ominus", "CIRCLED MINUS" },
				1908	{ 0x2297, "otimes", "CIRCLED TIMES" },
				1909	{ 0x2298, "osol", "CIRCLED DIVISION SLASH" },
				1910	{ 0x2299, "odot", "CIRCLED DOT OPERATOR" },
				1911	{ 0x229A, "ocir", "CIRCLED RING OPERATOR" },
				1912	{ 0x229B, "oast", "CIRCLED ASTERISK OPERATOR" },
				1913	{ 0x229D, "odash", "CIRCLED DASH" },
				1914	{ 0x229E, "plusb", "SQUARED PLUS" },
				1915	{ 0x229F, "minusb", "SQUARED MINUS" },
				1916	{ 0x22A0, "timesb", "SQUARED TIMES" },
				1917	{ 0x22A1, "sdotb", "SQUARED DOT OPERATOR" },
				1918	{ 0x22A2, "vdash", "" },
				1919	{ 0x22A3, "dashv", "" },
				1920	{ 0x22A4, "top", "DOWN TACK" },
				1921	{ 0x22A5, "bottom", "" },
				1922	{ 0x22A5, "perp", "" },
				1923	{ 0x22A7, "models", "MODELS" },
				1924	{ 0x22A8, "vDash", "" },
				1925	{ 0x22A9, "Vdash", "" },
				1926	{ 0x22AA, "Vvdash", "" },
				1927	{ 0x22AC, "nvdash", "DOES NOT PROVE" },
				1928	{ 0x22AD, "nvDash", "NOT TRUE" },
				1929	{ 0x22AE, "nVdash", "DOES NOT FORCE" },
				1930	{ 0x22AF, "nVDash", "NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE" },
				1931	{ 0x22B2, "vltri", "" },
				1932	{ 0x22B3, "vrtri", "" },
				1933	{ 0x22B4, "ltrie", "" },
				1934	{ 0x22B5, "rtrie", "" },
				1935	{ 0x22B8, "mumap", "MULTIMAP" },
				1936	{ 0x22BA, "intcal", "INTERCALATE" },
				1937	{ 0x22BB, "veebar", "" },
				1938	{ 0x22BC, "barwed", "NAND" },
				1939	{ 0x22C4, "diam", "DIAMOND OPERATOR" },
				1940	{ 0x22C5, "sdot", "DOT OPERATOR" },
				1941	{ 0x22C6, "sstarf", "STAR OPERATOR" },
				1942	{ 0x22C6, "star", "STAR OPERATOR" },
				1943	{ 0x22C7, "divonx", "DIVISION TIMES" },
				1944	{ 0x22C8, "bowtie", "" },
				1945	{ 0x22C9, "ltimes", "LEFT NORMAL FACTOR SEMIDIRECT PRODUCT" },
				1946	{ 0x22CA, "rtimes", "RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT" },
				1947	{ 0x22CB, "lthree", "LEFT SEMIDIRECT PRODUCT" },
				1948	{ 0x22CC, "rthree", "RIGHT SEMIDIRECT PRODUCT" },
				1949	{ 0x22CD, "bsime", "" },
				1950	{ 0x22CE, "cuvee", "CURLY LOGICAL OR" },
				1951	{ 0x22CF, "cuwed", "CURLY LOGICAL AND" },
				1952	{ 0x22D0, "Sub", "" },
				1953	{ 0x22D1, "Sup", "" },
				1954	{ 0x22D2, "Cap", "DOUBLE INTERSECTION" },
				1955	{ 0x22D3, "Cup", "DOUBLE UNION" },
				1956	{ 0x22D4, "fork", "" },
				1957	{ 0x22D6, "ldot", "" },
				1958	{ 0x22D7, "gsdot", "" },
				1959	{ 0x22D8, "Ll", "" },
				1960	{ 0x22D9, "Gg", "VERY MUCH GREATER-THAN" },
				1961	{ 0x22DA, "lEg", "" },
				1962	{ 0x22DA, "leg", "" },
				1963	{ 0x22DB, "gEl", "" },
				1964	{ 0x22DB, "gel", "" },
				1965	{ 0x22DC, "els", "" },
				1966	{ 0x22DD, "egs", "" },
				1967	{ 0x22DE, "cuepr", "" },
				1968	{ 0x22DF, "cuesc", "" },
				1969	{ 0x22E0, "npre", "DOES NOT PRECEDE OR EQUAL" },
				1970	{ 0x22E1, "nsce", "DOES NOT SUCCEED OR EQUAL" },
				1971	{ 0x22E6, "lnsim", "" },
				1972	{ 0x22E7, "gnsim", "GREATER-THAN BUT NOT EQUIVALENT TO" },
				1973	{ 0x22E8, "prnap", "" },
				1974	{ 0x22E8, "prnsim", "" },
				1975	{ 0x22E9, "scnap", "" },
				1976	{ 0x22E9, "scnsim", "" },
				1977	{ 0x22EA, "nltri", "NOT NORMAL SUBGROUP OF" },
				1978	{ 0x22EB, "nrtri", "DOES NOT CONTAIN AS NORMAL SUBGROUP" },
				1979	{ 0x22EC, "nltrie", "NOT NORMAL SUBGROUP OF OR EQUAL TO" },
				1980	{ 0x22ED, "nrtrie", "DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL" },
				1981	{ 0x22EE, "vellip", "" },
				1982	{ 0x2306, "Barwed", "PERSPECTIVE" },
				1983	{ 0x2308, "lceil", "LEFT CEILING" },
				1984	{ 0x2309, "rceil", "RIGHT CEILING" },
				1985	{ 0x230A, "lfloor", "LEFT FLOOR" },
				1986	{ 0x230B, "rfloor", "RIGHT FLOOR" },
				1987	{ 0x230C, "drcrop", "BOTTOM RIGHT CROP" },
				1988	{ 0x230D, "dlcrop", "BOTTOM LEFT CROP" },
				1989	{ 0x230E, "urcrop", "TOP RIGHT CROP" },
				1990	{ 0x230F, "ulcrop", "TOP LEFT CROP" },
				1991	{ 0x2315, "telrec", "TELEPHONE RECORDER" },
				1992	{ 0x2316, "target", "POSITION INDICATOR" },
				1993	{ 0x231C, "ulcorn", "TOP LEFT CORNER" },
				1994	{ 0x231D, "urcorn", "TOP RIGHT CORNER" },
				1995	{ 0x231E, "dlcorn", "BOTTOM LEFT CORNER" },
				1996	{ 0x231F, "drcorn", "BOTTOM RIGHT CORNER" },
				1997	{ 0x2322, "frown", "" },
				1998	{ 0x2322, "sfrown", "FROWN" },
				1999	{ 0x2323, "smile", "" },
				2000	{ 0x2323, "ssmile", "SMILE" },
				2001	{ 0x2423, "blank", "OPEN BOX" },
				2002	{ 0x24C8, "oS", "CIRCLED LATIN CAPITAL LETTER S" },
				2003	{ 0x2500, "boxh", "BOX DRAWINGS LIGHT HORIZONTAL" },
				2004	{ 0x2502, "boxv", "BOX DRAWINGS LIGHT VERTICAL" },
				2005	{ 0x250C, "boxdr", "BOX DRAWINGS LIGHT DOWN AND RIGHT" },
				2006	{ 0x2510, "boxdl", "BOX DRAWINGS LIGHT DOWN AND LEFT" },
				2007	{ 0x2514, "boxur", "BOX DRAWINGS LIGHT UP AND RIGHT" },
				2008	{ 0x2518, "boxul", "BOX DRAWINGS LIGHT UP AND LEFT" },
				2009	{ 0x251C, "boxvr", "BOX DRAWINGS LIGHT VERTICAL AND RIGHT" },
				2010	{ 0x2524, "boxvl", "BOX DRAWINGS LIGHT VERTICAL AND LEFT" },
				2011	{ 0x252C, "boxhd", "BOX DRAWINGS LIGHT DOWN AND HORIZONTAL" },
				2012	{ 0x2534, "boxhu", "BOX DRAWINGS LIGHT UP AND HORIZONTAL" },
				2013	{ 0x253C, "boxvh", "BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL" },
				2014	{ 0x2550, "boxH", "BOX DRAWINGS DOUBLE HORIZONTAL" },
				2015	{ 0x2551, "boxV", "BOX DRAWINGS DOUBLE VERTICAL" },
				2016	{ 0x2552, "boxDR", "BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE" },
				2017	{ 0x2553, "boxDr", "BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE" },
				2018	{ 0x2554, "boxdR", "BOX DRAWINGS DOUBLE DOWN AND RIGHT" },
				2019	{ 0x2555, "boxDL", "BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE" },
				2020	{ 0x2556, "boxdL", "BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE" },
				2021	{ 0x2557, "boxDl", "BOX DRAWINGS DOUBLE DOWN AND LEFT" },
				2022	{ 0x2558, "boxUR", "BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE" },
				2023	{ 0x2559, "boxuR", "BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE" },
				2024	{ 0x255A, "boxUr", "BOX DRAWINGS DOUBLE UP AND RIGHT" },
				2025	{ 0x255B, "boxUL", "BOX DRAWINGS UP SINGLE AND LEFT DOUBLE" },
				2026	{ 0x255C, "boxUl", "BOX DRAWINGS UP DOUBLE AND LEFT SINGLE" },
				2027	{ 0x255D, "boxuL", "BOX DRAWINGS DOUBLE UP AND LEFT" },
				2028	{ 0x255E, "boxvR", "BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE" },
				2029	{ 0x255F, "boxVR", "BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE" },
				2030	{ 0x2560, "boxVr", "BOX DRAWINGS DOUBLE VERTICAL AND RIGHT" },
				2031	{ 0x2561, "boxvL", "BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE" },
				2032	{ 0x2562, "boxVL", "BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE" },
				2033	{ 0x2563, "boxVl", "BOX DRAWINGS DOUBLE VERTICAL AND LEFT" },
				2034	{ 0x2564, "boxhD", "BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE" },
				2035	{ 0x2565, "boxHD", "BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE" },
				2036	{ 0x2566, "boxHd", "BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL" },
				2037	{ 0x2567, "boxhU", "BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE" },
				2038	{ 0x2568, "boxHU", "BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE" },
				2039	{ 0x2569, "boxHu", "BOX DRAWINGS DOUBLE UP AND HORIZONTAL" },
				2040	{ 0x256A, "boxvH", "BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE" },
				2041	{ 0x256B, "boxVH", "BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE" },
				2042	{ 0x256C, "boxVh", "BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL" },
				2043	{ 0x2580, "uhblk", "UPPER HALF BLOCK" },
				2044	{ 0x2584, "lhblk", "LOWER HALF BLOCK" },
				2045	{ 0x2588, "block", "FULL BLOCK" },
				2046	{ 0x2591, "blk14", "LIGHT SHADE" },
				2047	{ 0x2592, "blk12", "MEDIUM SHADE" },
				2048	{ 0x2593, "blk34", "DARK SHADE" },
				2049	{ 0x25A1, "square", "WHITE SQUARE" },
				2050	{ 0x25A1, "squ", "WHITE SQUARE" },
				2051	{ 0x25AA, "squf", "" },
				2052	{ 0x25AD, "rect", "WHITE RECTANGLE" },
				2053	{ 0x25AE, "marker", "BLACK VERTICAL RECTANGLE" },
				2054	{ 0x25B3, "xutri", "WHITE UP-POINTING TRIANGLE" },
				2055	{ 0x25B4, "utrif", "BLACK UP-POINTING TRIANGLE" },
				2056	{ 0x25B5, "utri", "WHITE UP-POINTING TRIANGLE" },
				2057	{ 0x25B8, "rtrif", "BLACK RIGHT-POINTING TRIANGLE" },
				2058	{ 0x25B9, "rtri", "WHITE RIGHT-POINTING TRIANGLE" },
				2059	{ 0x25BD, "xdtri", "WHITE DOWN-POINTING TRIANGLE" },
				2060	{ 0x25BE, "dtrif", "BLACK DOWN-POINTING TRIANGLE" },
				2061	{ 0x25BF, "dtri", "WHITE DOWN-POINTING TRIANGLE" },
				2062	{ 0x25C2, "ltrif", "BLACK LEFT-POINTING TRIANGLE" },
				2063	{ 0x25C3, "ltri", "WHITE LEFT-POINTING TRIANGLE" },
				2064	{ 0x25CA, "loz", "LOZENGE" },
				2065	{ 0x25CB, "cir", "WHITE CIRCLE" },
				2066	{ 0x25CB, "xcirc", "WHITE CIRCLE" },
				2067	{ 0x2605, "starf", "BLACK STAR" },
				2068	{ 0x260E, "phone", "TELEPHONE SIGN" },
				2069	{ 0x2640, "female", "" },
				2070	{ 0x2642, "male", "MALE SIGN" },
				2071	{ 0x2660, "spades", "BLACK SPADE SUIT" },
				2072	{ 0x2663, "clubs", "BLACK CLUB SUIT" },
				2073	{ 0x2665, "hearts", "BLACK HEART SUIT" },
				2074	{ 0x2666, "diams", "BLACK DIAMOND SUIT" },
				2075	{ 0x2669, "sung", "" },
				2076	{ 0x266D, "flat", "MUSIC FLAT SIGN" },
				2077	{ 0x266E, "natur", "MUSIC NATURAL SIGN" },
				2078	{ 0x266F, "sharp", "MUSIC SHARP SIGN" },
				2079	{ 0x2713, "check", "CHECK MARK" },
				2080	{ 0x2717, "cross", "BALLOT X" },
				2081	{ 0x2720, "malt", "MALTESE CROSS" },
				2082	{ 0x2726, "lozf", "" },
				2083	{ 0x2736, "sext", "SIX POINTED BLACK STAR" },
				2084	{ 0x3008, "lang", "" },
				2085	{ 0x3009, "rang", "" },
				2086	{ 0xE291, "rpargt", "" },
				2087	{ 0xE2A2, "lnap", "" },
				2088	{ 0xE2AA, "nsmid", "" },
				2089	{ 0xE2B3, "prnE", "" },
				2090	{ 0xE2B5, "scnE", "" },
				2091	{ 0xE2B8, "vsubnE", "" },
				2092	{ 0xE301, "smid", "" },
				2093	{ 0xE411, "gnap", "" },
				2094	{ 0xFB00, "fflig", "" },
				2095	{ 0xFB01, "filig", "" },
				2096	{ 0xFB02, "fllig", "" },
				2097	{ 0xFB03, "ffilig", "" },
				2098	{ 0xFB04, "ffllig", "" },
				2099	{ 0xFE68, "sbsol", "SMALL REVERSE SOLIDUS" },
				2100	};
				2101
				2102	/************************************************************************
				2103	* *
				2104	* Commodity functions to handle entities *
				2105	* *
				2106	************************************************************************/
				2107
				2108	/*
				2109	* Macro used to grow the current buffer.
				2110	*/
				2111	#define growBuffer(buffer) { \
				2112	buffer##_size *= 2; \
				2113	buffer = (xmlChar ) xmlRealloc(buffer, buffer##_size sizeof(xmlChar)); \
				2114	if (buffer == NULL) { \
				2115	perror("realloc failed"); \
				2116	return(NULL); \
				2117	} \
				2118	}
				2119
				2120	/**
				2121	* docbEntityLookup:
				2122	* @name: the entity name
				2123	*
				2124	* Lookup the given entity in EntitiesTable
				2125	*
				2126	* TODO: the linear scan is really ugly, an hash table is really needed.
				2127	*
				2128	* Returns the associated docbEntityDescPtr if found, NULL otherwise.
				2129	*/
				2130	static docbEntityDescPtr
				2131	docbEntityLookup(const xmlChar *name) {
				2132	unsigned int i;
				2133
				2134	for (i = 0;i < (sizeof(docbookEntitiesTable)/
				2135	sizeof(docbookEntitiesTable[0]));i++) {
				2136	if (xmlStrEqual(name, BAD_CAST docbookEntitiesTable[i].name)) {
				2137	#ifdef DEBUG
				2138	xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", name);
				2139	#endif
				2140	return(&docbookEntitiesTable[i]);
				2141	}
				2142	}
				2143	return(NULL);
				2144	}
				2145
				2146	/**
				2147	* docbEntityValueLookup:
				2148	* @value: the entity's unicode value
				2149	*
				2150	* Lookup the given entity in EntitiesTable
				2151	*
				2152	* TODO: the linear scan is really ugly, an hash table is really needed.
				2153	*
				2154	* Returns the associated docbEntityDescPtr if found, NULL otherwise.
				2155	*/
				2156	static docbEntityDescPtr
				2157	docbEntityValueLookup(int value) {
				2158	unsigned int i;
				2159	#ifdef DEBUG
				2160	int lv = 0;
				2161	#endif
				2162
				2163	for (i = 0;i < (sizeof(docbookEntitiesTable)/
				2164	sizeof(docbookEntitiesTable[0]));i++) {
				2165	if (docbookEntitiesTable[i].value >= value) {
				2166	if (docbookEntitiesTable[i].value > value)
				2167	break;
				2168	#ifdef DEBUG
				2169	xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", docbookEntitiesTable[i].name);
				2170	#endif
				2171	return(&docbookEntitiesTable[i]);
				2172	}
				2173	#ifdef DEBUG
				2174	if (lv > docbookEntitiesTable[i].value) {
				2175	xmlGenericError(xmlGenericErrorContext,
				2176	"docbookEntitiesTable[] is not sorted (%d > %d)!\n",
				2177	lv, docbookEntitiesTable[i].value);
				2178	}
				2179	lv = docbookEntitiesTable[i].value;
				2180	#endif
				2181	}
				2182	return(NULL);
				2183	}
				2184
				2185	#if 0
				2186	/**
				2187	* UTF8ToSgml:
				2188	* @out: a pointer to an array of bytes to store the result
				2189	* @outlen: the length of @out
				2190	* @in: a pointer to an array of UTF-8 chars
				2191	* @inlen: the length of @in
				2192	*
				2193	* Take a block of UTF-8 chars in and try to convert it to an ASCII
				2194	* plus SGML entities block of chars out.
				2195	*
				2196	* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
				2197	* The value of @inlen after return is the number of octets consumed
				2198	* as the return value is positive, else unpredictiable.
				2199	* The value of @outlen after return is the number of octets consumed.
				2200	*/
				2201	int
				2202	UTF8ToSgml(unsigned char* out, int *outlen,
				2203	const unsigned char* in, int *inlen) {
				2204	const unsigned char* processed = in;
				2205	const unsigned char* outend;
				2206	const unsigned char* outstart = out;
				2207	const unsigned char* instart = in;
				2208	const unsigned char* inend;
				2209	unsigned int c, d;
				2210	int trailing;
				2211
				2212	if (in == NULL) {
				2213	/*
				2214	* initialization nothing to do
				2215	*/
				2216	*outlen = 0;
				2217	*inlen = 0;
				2218	return(0);
				2219	}
				2220	inend = in + (*inlen);
				2221	outend = out + (*outlen);
				2222	while (in < inend) {
				2223	d = *in++;
				2224	if (d < 0x80) { c= d; trailing= 0; }
				2225	else if (d < 0xC0) {
				2226	/* trailing byte in leading position */
				2227	*outlen = out - outstart;
				2228	*inlen = processed - instart;
				2229	return(-2);
				2230	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
				2231	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
				2232	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
				2233	else {
				2234	/* no chance for this in Ascii */
				2235	*outlen = out - outstart;
				2236	*inlen = processed - instart;
				2237	return(-2);
				2238	}
				2239
				2240	if (inend - in < trailing) {
				2241	break;
				2242	}
				2243
				2244	for ( ; trailing; trailing--) {
				2245	if ((in >= inend) \|\| (((d= *in++) & 0xC0) != 0x80))
				2246	break;
				2247	c <<= 6;
				2248	c \|= d & 0x3F;
				2249	}
				2250
				2251	/* assertion: c is a single UTF-4 value */
				2252	if (c < 0x80) {
				2253	if (out + 1 >= outend)
				2254	break;
				2255	*out++ = c;
				2256	} else {
				2257	int len;
				2258	docbEntityDescPtr ent;
				2259
				2260	/*
				2261	* Try to lookup a predefined SGML entity for it
				2262	*/
				2263
				2264	ent = docbEntityValueLookup(c);
				2265	if (ent == NULL) {
				2266	/* no chance for this in Ascii */
				2267	*outlen = out - outstart;
				2268	*inlen = processed - instart;
				2269	return(-2);
				2270	}
				2271	len = strlen(ent->name);
				2272	if (out + 2 + len >= outend)
				2273	break;
				2274	*out++ = '&';
				2275	memcpy(out, ent->name, len);
				2276	out += len;
				2277	*out++ = ';';
				2278	}
				2279	processed = in;
				2280	}
				2281	*outlen = out - outstart;
				2282	*inlen = processed - instart;
				2283	return(0);
				2284	}
				2285	#endif
				2286
				2287	/**
				2288	* docbEncodeEntities:
				2289	* @out: a pointer to an array of bytes to store the result
				2290	* @outlen: the length of @out
				2291	* @in: a pointer to an array of UTF-8 chars
				2292	* @inlen: the length of @in
				2293	* @quoteChar: the quote character to escape (' or ") or zero.
				2294	*
				2295	* Take a block of UTF-8 chars in and try to convert it to an ASCII
				2296	* plus SGML entities block of chars out.
				2297	*
				2298	* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
				2299	* The value of @inlen after return is the number of octets consumed
				2300	* as the return value is positive, else unpredictiable.
				2301	* The value of @outlen after return is the number of octets consumed.
				2302	*/
				2303	int
				2304	docbEncodeEntities(unsigned char* out, int *outlen,
				2305	const unsigned char* in, int *inlen, int quoteChar) {
				2306	const unsigned char* processed = in;
				2307	const unsigned char* outend = out + (*outlen);
				2308	const unsigned char* outstart = out;
				2309	const unsigned char* instart = in;
				2310	const unsigned char* inend = in + (*inlen);
				2311	unsigned int c, d;
				2312	int trailing;
				2313
				2314	while (in < inend) {
				2315	d = *in++;
				2316	if (d < 0x80) { c= d; trailing= 0; }
				2317	else if (d < 0xC0) {
				2318	/* trailing byte in leading position */
				2319	*outlen = out - outstart;
				2320	*inlen = processed - instart;
				2321	return(-2);
				2322	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
				2323	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
				2324	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
				2325	else {
				2326	/* no chance for this in Ascii */
				2327	*outlen = out - outstart;
				2328	*inlen = processed - instart;
				2329	return(-2);
				2330	}
				2331
				2332	if (inend - in < trailing)
				2333	break;
				2334
				2335	while (trailing--) {
				2336	if (((d= *in++) & 0xC0) != 0x80) {
				2337	*outlen = out - outstart;
				2338	*inlen = processed - instart;
				2339	return(-2);
				2340	}
				2341	c <<= 6;
				2342	c \|= d & 0x3F;
				2343	}
				2344
				2345	/* assertion: c is a single UTF-4 value */
				2346	if (c < 0x80 && c != (unsigned int) quoteChar && c != '&' && c != '<' && c != '>') {
				2347	if (out >= outend)
				2348	break;
				2349	*out++ = c;
				2350	} else {
				2351	docbEntityDescPtr ent;
				2352	const char *cp;
				2353	char nbuf[16];
				2354	int len;
				2355
				2356	/*
				2357	* Try to lookup a predefined SGML entity for it
				2358	*/
				2359	ent = docbEntityValueLookup(c);
				2360	if (ent == NULL) {
				2361	sprintf(nbuf, "#%u", c);
				2362	cp = nbuf;
				2363	}
				2364	else
				2365	cp = ent->name;
				2366	len = strlen(cp);
				2367	if (out + 2 + len > outend)
				2368	break;
				2369	*out++ = '&';
				2370	memcpy(out, cp, len);
				2371	out += len;
				2372	*out++ = ';';
				2373	}
				2374	processed = in;
				2375	}
				2376	*outlen = out - outstart;
				2377	*inlen = processed - instart;
				2378	return(0);
				2379	}
				2380
				2381
				2382	/************************************************************************
				2383	* *
				2384	* Commodity functions to handle streams *
				2385	* *
				2386	************************************************************************/
				2387
				2388	/**
				2389	* docbNewInputStream:
				2390	* @ctxt: an SGML parser context
				2391	*
				2392	* Create a new input stream structure
				2393	* Returns the new input stream or NULL
				2394	*/
				2395	static docbParserInputPtr
				2396	docbNewInputStream(docbParserCtxtPtr ctxt) {
				2397	docbParserInputPtr input;
				2398
				2399	input = (xmlParserInputPtr) xmlMalloc(sizeof(docbParserInput));
				2400	if (input == NULL) {
				2401	ctxt->errNo = XML_ERR_NO_MEMORY;
				2402	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2403	ctxt->sax->error(ctxt->userData,
				2404	"malloc: couldn't allocate a new input stream\n");
				2405	return(NULL);
				2406	}
				2407	memset(input, 0, sizeof(docbParserInput));
				2408	input->filename = NULL;
				2409	input->directory = NULL;
				2410	input->base = NULL;
				2411	input->cur = NULL;
				2412	input->buf = NULL;
				2413	input->line = 1;
				2414	input->col = 1;
				2415	input->buf = NULL;
				2416	input->free = NULL;
				2417	input->version = NULL;
				2418	input->consumed = 0;
				2419	input->length = 0;
				2420	return(input);
				2421	}
				2422
				2423
				2424	/************************************************************************
				2425	* *
				2426	* Commodity functions, cleanup needed ? *
				2427	* *
				2428	************************************************************************/
				2429
				2430	/**
				2431	* areBlanks:
				2432	* @ctxt: an SGML parser context
				2433	* @str: a xmlChar *
				2434	* @len: the size of @str
				2435	*
				2436	* Is this a sequence of blank chars that one can ignore ?
				2437	*
				2438	* Returns 1 if ignorable 0 otherwise.
				2439	*/
				2440
				2441	static int areBlanks(docbParserCtxtPtr ctxt, const xmlChar *str, int len) {
				2442	int i;
				2443	xmlNodePtr lastChild;
				2444
				2445	for (i = 0;i < len;i++)
				2446	if (!(IS_BLANK(str[i]))) return(0);
				2447
				2448	if (CUR == 0) return(1);
				2449	if (CUR != '<') return(0);
				2450	if (ctxt->name == NULL)
				2451	return(1);
				2452	if (ctxt->node == NULL) return(0);
				2453	lastChild = xmlGetLastChild(ctxt->node);
				2454	if (lastChild == NULL) {
				2455	if (ctxt->node->content != NULL) return(0);
				2456	} else if (xmlNodeIsText(lastChild))
				2457	return(0);
				2458	return(1);
				2459	}
				2460
				2461	/************************************************************************
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2462	* *
				2463	* External entities support *
				2464	* *
				2465	************************************************************************/
				2466
				2467	/**
				2468	* docbParseCtxtExternalEntity:
				2469	* @ctx: the existing parsing context
				2470	* @URL: the URL for the entity to load
				2471	* @ID: the System ID for the entity to load
				2472	* @list: the return value for the set of parsed nodes
				2473	*
				2474	* Parse an external general entity within an existing parsing context
				2475	*
				2476	* Returns 0 if the entity is well formed, -1 in case of args problem and
				2477	* the parser error code otherwise
				2478	*/
				2479
				2480	static int
				2481	docbParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
				2482	const xmlChar ID, xmlNodePtr list) {
				2483	xmlParserCtxtPtr ctxt;
				2484	xmlDocPtr newDoc;
				2485	xmlSAXHandlerPtr oldsax = NULL;
				2486	int ret = 0;
				2487
				2488	if (ctx->depth > 40) {
				2489	return(XML_ERR_ENTITY_LOOP);
				2490	}
				2491
				2492	if (list != NULL)
				2493	*list = NULL;
				2494	if ((URL == NULL) && (ID == NULL))
				2495	return(-1);
				2496	if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
				2497	return(-1);
				2498
				2499
				2500	ctxt = xmlCreateEntityParserCtxt(URL, ID, ctx->myDoc->URL);
				2501	if (ctxt == NULL) return(-1);
				2502	ctxt->userData = ctxt;
				2503	oldsax = ctxt->sax;
				2504	ctxt->sax = ctx->sax;
				2505	newDoc = xmlNewDoc(BAD_CAST "1.0");
				2506	if (newDoc == NULL) {
				2507	xmlFreeParserCtxt(ctxt);
				2508	return(-1);
				2509	}
				2510	if (ctx->myDoc != NULL) {
				2511	newDoc->intSubset = ctx->myDoc->intSubset;
				2512	newDoc->extSubset = ctx->myDoc->extSubset;
				2513	}
				2514	if (ctx->myDoc->URL != NULL) {
				2515	newDoc->URL = xmlStrdup(ctx->myDoc->URL);
				2516	}
				2517	newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
				2518	if (newDoc->children == NULL) {
				2519	ctxt->sax = oldsax;
				2520	xmlFreeParserCtxt(ctxt);
				2521	newDoc->intSubset = NULL;
				2522	newDoc->extSubset = NULL;
				2523	xmlFreeDoc(newDoc);
				2524	return(-1);
				2525	}
				2526	nodePush(ctxt, newDoc->children);
				2527	if (ctx->myDoc == NULL) {
				2528	ctxt->myDoc = newDoc;
				2529	} else {
				2530	ctxt->myDoc = ctx->myDoc;
				2531	newDoc->children->doc = ctx->myDoc;
				2532	}
				2533
				2534	/*
				2535	* Parse a possible text declaration first
				2536	*/
				2537	GROW;
				2538	if ((RAW == '<') && (NXT(1) == '?') &&
				2539	(NXT(2) == 'x') && (NXT(3) == 'm') &&
				2540	(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
				2541	xmlParseTextDecl(ctxt);
				2542	}
				2543
				2544	/*
				2545	* Doing validity checking on chunk doesn't make sense
				2546	*/
				2547	ctxt->instate = XML_PARSER_CONTENT;
				2548	ctxt->validate = ctx->validate;
				2549	ctxt->loadsubset = ctx->loadsubset;
				2550	ctxt->depth = ctx->depth + 1;
				2551	ctxt->replaceEntities = ctx->replaceEntities;
				2552	if (ctxt->validate) {
				2553	ctxt->vctxt.error = ctx->vctxt.error;
				2554	ctxt->vctxt.warning = ctx->vctxt.warning;
				2555	/* Allocate the Node stack */
				2556	ctxt->vctxt.nodeTab = (xmlNodePtr ) xmlMalloc(4 sizeof(xmlNodePtr));
				2557	if (ctxt->vctxt.nodeTab == NULL) {
				2558	xmlGenericError(xmlGenericErrorContext,
				2559	"docbParseCtxtExternalEntity: out of memory\n");
				2560	ctxt->validate = 0;
				2561	ctxt->vctxt.error = NULL;
				2562	ctxt->vctxt.warning = NULL;
				2563	} else {
				2564	ctxt->vctxt.nodeNr = 0;
				2565	ctxt->vctxt.nodeMax = 4;
				2566	ctxt->vctxt.node = NULL;
				2567	}
				2568	} else {
				2569	ctxt->vctxt.error = NULL;
				2570	ctxt->vctxt.warning = NULL;
				2571	}
				2572
				2573	docbParseContent(ctxt);
				2574
				2575	if ((RAW == '<') && (NXT(1) == '/')) {
				2576	ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
				2577	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2578	ctxt->sax->error(ctxt->userData,
				2579	"chunk is not well balanced\n");
				2580	ctxt->wellFormed = 0;
				2581	ctxt->disableSAX = 1;
				2582	} else if (RAW != 0) {
				2583	ctxt->errNo = XML_ERR_EXTRA_CONTENT;
				2584	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2585	ctxt->sax->error(ctxt->userData,
				2586	"extra content at the end of well balanced chunk\n");
				2587	ctxt->wellFormed = 0;
				2588	ctxt->disableSAX = 1;
				2589	}
				2590	if (ctxt->node != newDoc->children) {
				2591	ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
				2592	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2593	ctxt->sax->error(ctxt->userData,
				2594	"chunk is not well balanced\n");
				2595	ctxt->wellFormed = 0;
				2596	ctxt->disableSAX = 1;
				2597	}
				2598
				2599	if (!ctxt->wellFormed) {
				2600	if (ctxt->errNo == 0)
				2601	ret = 1;
				2602	else
				2603	ret = ctxt->errNo;
				2604	} else {
				2605	if (list != NULL) {
				2606	xmlNodePtr cur;
				2607
				2608	/*
				2609	* Return the newly created nodeset after unlinking it from
				2610	* they pseudo parent.
				2611	*/
				2612	cur = newDoc->children->children;
				2613	*list = cur;
				2614	while (cur != NULL) {
				2615	cur->parent = NULL;
				2616	cur = cur->next;
				2617	}
				2618	newDoc->children->children = NULL;
				2619	}
				2620	ret = 0;
				2621	}
				2622	ctxt->sax = oldsax;
				2623	xmlFreeParserCtxt(ctxt);
				2624	newDoc->intSubset = NULL;
				2625	newDoc->extSubset = NULL;
				2626	xmlFreeDoc(newDoc);
				2627
				2628	return(ret);
				2629	}
				2630
				2631	/************************************************************************
				2632	* *
				2633	* The parser itself *
				2634	* *
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2635	************************************************************************/
				2636
				2637	/**
				2638	* docbParseSGMLName:
				2639	* @ctxt: an SGML parser context
				2640	*
				2641	* parse an SGML tag or attribute name, note that we convert it to lowercase
				2642	* since SGML names are not case-sensitive.
				2643	*
				2644	* Returns the Tag Name parsed or NULL
				2645	*/
				2646
				2647	static xmlChar *
				2648	docbParseSGMLName(docbParserCtxtPtr ctxt) {
				2649	xmlChar *ret = NULL;
				2650	int i = 0;
				2651	xmlChar loc[DOCB_PARSER_BUFFER_SIZE];
				2652
				2653	if (!IS_LETTER(CUR) && (CUR != '_') &&
				2654	(CUR != ':')) return(NULL);
				2655
				2656	while ((i < DOCB_PARSER_BUFFER_SIZE) &&
				2657	((IS_LETTER(CUR)) \|\| (IS_DIGIT(CUR)) \|\|
				2658	(CUR == ':') \|\| (CUR == '_'))) {
				2659	if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
				2660	else loc[i] = CUR;
				2661	i++;
				2662
				2663	NEXT;
				2664	}
				2665
				2666	ret = xmlStrndup(loc, i);
				2667
				2668	return(ret);
				2669	}
				2670
				2671	/**
				2672	* docbParseName:
				2673	* @ctxt: an SGML parser context
				2674	*
				2675	* parse an SGML name, this routine is case sensistive.
				2676	*
				2677	* Returns the Name parsed or NULL
				2678	*/
				2679
				2680	static xmlChar *
				2681	docbParseName(docbParserCtxtPtr ctxt) {
				2682	xmlChar buf[DOCB_MAX_NAMELEN];
				2683	int len = 0;
				2684
				2685	GROW;
				2686	if (!IS_LETTER(CUR) && (CUR != '_')) {
				2687	return(NULL);
				2688	}
				2689
				2690	while ((IS_LETTER(CUR)) \|\| (IS_DIGIT(CUR)) \|\|
				2691	(CUR == '.') \|\| (CUR == '-') \|\|
				2692	(CUR == '_') \|\| (CUR == ':') \|\|
				2693	(IS_COMBINING(CUR)) \|\|
				2694	(IS_EXTENDER(CUR))) {
				2695	buf[len++] = CUR;
				2696	NEXT;
				2697	if (len >= DOCB_MAX_NAMELEN) {
				2698	xmlGenericError(xmlGenericErrorContext,
				2699	"docbParseName: reached DOCB_MAX_NAMELEN limit\n");
				2700	while ((IS_LETTER(CUR)) \|\| (IS_DIGIT(CUR)) \|\|
				2701	(CUR == '.') \|\| (CUR == '-') \|\|
				2702	(CUR == '_') \|\| (CUR == ':') \|\|
				2703	(IS_COMBINING(CUR)) \|\|
				2704	(IS_EXTENDER(CUR)))
				2705	NEXT;
				2706	break;
				2707	}
				2708	}
				2709	return(xmlStrndup(buf, len));
				2710	}
				2711
				2712	/**
				2713	* docbParseSGMLAttribute:
				2714	* @ctxt: an SGML parser context
				2715	* @stop: a char stop value
				2716	*
				2717	* parse an SGML attribute value till the stop (quote), if
				2718	* stop is 0 then it stops at the first space
				2719	*
				2720	* Returns the attribute parsed or NULL
				2721	*/
				2722
				2723	static xmlChar *
				2724	docbParseSGMLAttribute(docbParserCtxtPtr ctxt, const xmlChar stop) {
				2725	xmlChar *buffer = NULL;
				2726	int buffer_size = 0;
				2727	xmlChar *out = NULL;
				2728	xmlChar *name = NULL;
				2729
				2730	xmlChar *cur = NULL;
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2731	xmlEntityPtr xent;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2732	docbEntityDescPtr ent;
				2733
				2734	/*
				2735	* allocate a translation buffer.
				2736	*/
				2737	buffer_size = DOCB_PARSER_BIG_BUFFER_SIZE;
				2738	buffer = (xmlChar ) xmlMalloc(buffer_size sizeof(xmlChar));
				2739	if (buffer == NULL) {
				2740	perror("docbParseSGMLAttribute: malloc failed");
				2741	return(NULL);
				2742	}
				2743	out = buffer;
				2744
				2745	/*
				2746	* Ok loop until we reach one of the ending chars
				2747	*/
				2748	while ((CUR != 0) && (CUR != stop) && (CUR != '>')) {
				2749	if ((stop == 0) && (IS_BLANK(CUR))) break;
				2750	if (CUR == '&') {
				2751	if (NXT(1) == '#') {
				2752	unsigned int c;
				2753	int bits;
				2754
				2755	c = docbParseCharRef(ctxt);
				2756	if (c < 0x80)
				2757	{ *out++ = c; bits= -6; }
				2758	else if (c < 0x800)
				2759	{ *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				2760	else if (c < 0x10000)
				2761	{ *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				2762	else
				2763	{ *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				2764
				2765	for ( ; bits >= 0; bits-= 6) {
				2766	*out++ = ((c >> bits) & 0x3F) \| 0x80;
				2767	}
				2768	} else {
				2769	ent = docbParseEntityRef(ctxt, &name);
				2770	if (name == NULL) {
				2771	*out++ = '&';
				2772	if (out - buffer > buffer_size - 100) {
				2773	int indx = out - buffer;
				2774
				2775	growBuffer(buffer);
				2776	out = &buffer[indx];
				2777	}
				2778	} else if (ent == NULL) {
				2779	*out++ = '&';
				2780	cur = name;
				2781	while (*cur != 0) {
				2782	if (out - buffer > buffer_size - 100) {
				2783	int indx = out - buffer;
				2784
				2785	growBuffer(buffer);
				2786	out = &buffer[indx];
				2787	}
				2788	out++ = cur++;
				2789	}
				2790	xmlFree(name);
				2791	} else {
				2792	unsigned int c;
				2793	int bits;
				2794
				2795	if (out - buffer > buffer_size - 100) {
				2796	int indx = out - buffer;
				2797
				2798	growBuffer(buffer);
				2799	out = &buffer[indx];
				2800	}
				2801	c = (xmlChar)ent->value;
				2802	if (c < 0x80)
				2803	{ *out++ = c; bits= -6; }
				2804	else if (c < 0x800)
				2805	{ *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				2806	else if (c < 0x10000)
				2807	{ *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				2808	else
				2809	{ *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				2810
				2811	for ( ; bits >= 0; bits-= 6) {
				2812	*out++ = ((c >> bits) & 0x3F) \| 0x80;
				2813	}
				2814	xmlFree(name);
				2815	}
				2816	}
				2817	} else {
				2818	unsigned int c;
				2819	int bits;
				2820
				2821	if (out - buffer > buffer_size - 100) {
				2822	int indx = out - buffer;
				2823
				2824	growBuffer(buffer);
				2825	out = &buffer[indx];
				2826	}
				2827	c = CUR;
				2828	if (c < 0x80)
				2829	{ *out++ = c; bits= -6; }
				2830	else if (c < 0x800)
				2831	{ *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				2832	else if (c < 0x10000)
				2833	{ *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				2834	else
				2835	{ *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				2836
				2837	for ( ; bits >= 0; bits-= 6) {
				2838	*out++ = ((c >> bits) & 0x3F) \| 0x80;
				2839	}
				2840	NEXT;
				2841	}
				2842	}
				2843	*out++ = 0;
				2844	return(buffer);
				2845	}
				2846
				2847
				2848	/**
				2849	* docbParseEntityRef:
				2850	* @ctxt: an SGML parser context
				2851	* @str: location to store the entity name
				2852	*
				2853	* parse an SGML ENTITY references
				2854	*
				2855	* [68] EntityRef ::= '&' Name ';'
				2856	*
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2857	* Returns the associated xmlEntityPtr if found, or NULL otherwise,
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2858	* if non-NULL *str will have to be freed by the caller.
				2859	*/
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2860	static xmlEntityPtr
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2861	docbParseEntityRef(docbParserCtxtPtr ctxt, xmlChar **str) {
				2862	xmlChar *name;
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2863	xmlEntityPtr ent = NULL;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2864	*str = NULL;
				2865
				2866	if (CUR == '&') {
				2867	NEXT;
				2868	name = docbParseName(ctxt);
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2869	if (name == NULL) {
				2870	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2871	ctxt->sax->error(ctxt->userData,
				2872	"docbParseEntityRef: no name\n");
				2873	ctxt->wellFormed = 0;
				2874	} else {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2875	GROW;
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2876	if (CUR == ';') {
				2877	*str = name;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2878
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2879	/*
				2880	* Ask first SAX for entity resolution, otherwise try the
				2881	* predefined set.
				2882	*/
				2883	if (ctxt->sax != NULL) {
				2884	if (ctxt->sax->getEntity != NULL)
				2885	ent = ctxt->sax->getEntity(ctxt->userData, name);
				2886	if (ent == NULL)
				2887	ent = xmlGetPredefinedEntity(name);
				2888	}
				2889	NEXT;
				2890	} else {
				2891	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2892	ctxt->sax->error(ctxt->userData,
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2893	"docbParseEntityRef: expecting ';'\n");
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2894	*str = name;
				2895	}
				2896	}
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2897	}
				2898	return(ent);
				2899	}
				2900
				2901	/**
				2902	* docbParseAttValue:
				2903	* @ctxt: an SGML parser context
				2904	*
				2905	* parse a value for an attribute
				2906	* Note: the parser won't do substitution of entities here, this
				2907	* will be handled later in xmlStringGetNodeList, unless it was
				2908	* asked for ctxt->replaceEntities != 0
				2909	*
				2910	* Returns the AttValue parsed or NULL.
				2911	*/
				2912
				2913	static xmlChar *
				2914	docbParseAttValue(docbParserCtxtPtr ctxt) {
				2915	xmlChar *ret = NULL;
				2916
				2917	if (CUR == '"') {
				2918	NEXT;
				2919	ret = docbParseSGMLAttribute(ctxt, '"');
				2920	if (CUR != '"') {
				2921	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2922	ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
				2923	ctxt->wellFormed = 0;
				2924	} else
				2925	NEXT;
				2926	} else if (CUR == '\'') {
				2927	NEXT;
				2928	ret = docbParseSGMLAttribute(ctxt, '\'');
				2929	if (CUR != '\'') {
				2930	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2931	ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
				2932	ctxt->wellFormed = 0;
				2933	} else
				2934	NEXT;
				2935	} else {
				2936	/*
				2937	* That's an SGMLism, the attribute value may not be quoted
				2938	*/
				2939	ret = docbParseSGMLAttribute(ctxt, 0);
				2940	if (ret == NULL) {
				2941	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2942	ctxt->sax->error(ctxt->userData, "AttValue: no value found\n");
				2943	ctxt->wellFormed = 0;
				2944	}
				2945	}
				2946	return(ret);
				2947	}
				2948
				2949	/**
				2950	* docbParseSystemLiteral:
				2951	* @ctxt: an SGML parser context
				2952	*
				2953	* parse an SGML Literal
				2954	*
				2955	* [11] SystemLiteral ::= ('"' [^"]* '"') \| ("'" [^']* "'")
				2956	*
				2957	* Returns the SystemLiteral parsed or NULL
				2958	*/
				2959
				2960	static xmlChar *
				2961	docbParseSystemLiteral(docbParserCtxtPtr ctxt) {
				2962	const xmlChar *q;
				2963	xmlChar *ret = NULL;
				2964
				2965	if (CUR == '"') {
				2966	NEXT;
				2967	q = CUR_PTR;
				2968	while ((IS_CHAR(CUR)) && (CUR != '"'))
				2969	NEXT;
				2970	if (!IS_CHAR(CUR)) {
				2971	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2972	ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
				2973	ctxt->wellFormed = 0;
				2974	} else {
				2975	ret = xmlStrndup(q, CUR_PTR - q);
				2976	NEXT;
				2977	}
				2978	} else if (CUR == '\'') {
				2979	NEXT;
				2980	q = CUR_PTR;
				2981	while ((IS_CHAR(CUR)) && (CUR != '\''))
				2982	NEXT;
				2983	if (!IS_CHAR(CUR)) {
				2984	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2985	ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
				2986	ctxt->wellFormed = 0;
				2987	} else {
				2988	ret = xmlStrndup(q, CUR_PTR - q);
				2989	NEXT;
				2990	}
				2991	} else {
				2992	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2993	ctxt->sax->error(ctxt->userData,
				2994	"SystemLiteral \" or ' expected\n");
				2995	ctxt->wellFormed = 0;
				2996	}
				2997
				2998	return(ret);
				2999	}
				3000
				3001	/**
				3002	* docbParsePubidLiteral:
				3003	* @ctxt: an SGML parser context
				3004	*
				3005	* parse an SGML public literal
				3006	*
				3007	* [12] PubidLiteral ::= '"' PubidChar* '"' \| "'" (PubidChar - "'")* "'"
				3008	*
				3009	* Returns the PubidLiteral parsed or NULL.
				3010	*/
				3011
				3012	static xmlChar *
				3013	docbParsePubidLiteral(docbParserCtxtPtr ctxt) {
				3014	const xmlChar *q;
				3015	xmlChar *ret = NULL;
				3016	/*
				3017	* Name ::= (Letter \| '_') (NameChar)*
				3018	*/
				3019	if (CUR == '"') {
				3020	NEXT;
				3021	q = CUR_PTR;
				3022	while (IS_PUBIDCHAR(CUR)) NEXT;
				3023	if (CUR != '"') {
				3024	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3025	ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
				3026	ctxt->wellFormed = 0;
				3027	} else {
				3028	ret = xmlStrndup(q, CUR_PTR - q);
				3029	NEXT;
				3030	}
				3031	} else if (CUR == '\'') {
				3032	NEXT;
				3033	q = CUR_PTR;
				3034	while ((IS_LETTER(CUR)) && (CUR != '\''))
				3035	NEXT;
				3036	if (!IS_LETTER(CUR)) {
				3037	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3038	ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
				3039	ctxt->wellFormed = 0;
				3040	} else {
				3041	ret = xmlStrndup(q, CUR_PTR - q);
				3042	NEXT;
				3043	}
				3044	} else {
				3045	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3046	ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
				3047	ctxt->wellFormed = 0;
				3048	}
				3049
				3050	return(ret);
				3051	}
				3052
				3053	/**
				3054	* docbParseCharData:
				3055	* @ctxt: an SGML parser context
				3056	* @cdata: int indicating whether we are within a CDATA section
				3057	*
				3058	* parse a CharData section.
				3059	* if we are within a CDATA section ']]>' marks an end of section.
				3060	*
				3061	* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
				3062	*/
				3063
				3064	static void
				3065	docbParseCharData(docbParserCtxtPtr ctxt) {
				3066	xmlChar buf[DOCB_PARSER_BIG_BUFFER_SIZE + 5];
				3067	int nbchar = 0;
				3068	int cur, l;
				3069
				3070	SHRINK;
				3071	cur = CUR_CHAR(l);
				3072	while (((cur != '<') \|\| (ctxt->token == '<')) &&
				3073	((cur != '&') \|\| (ctxt->token == '&')) &&
				3074	(IS_CHAR(cur))) {
				3075	COPY_BUF(l,buf,nbchar,cur);
				3076	if (nbchar >= DOCB_PARSER_BIG_BUFFER_SIZE) {
				3077	/*
				3078	* Ok the segment is to be consumed as chars.
				3079	*/
				3080	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
				3081	if (areBlanks(ctxt, buf, nbchar)) {
				3082	if (ctxt->sax->ignorableWhitespace != NULL)
				3083	ctxt->sax->ignorableWhitespace(ctxt->userData,
				3084	buf, nbchar);
				3085	} else {
				3086	docbCheckParagraph(ctxt);
				3087	if (ctxt->sax->characters != NULL)
				3088	ctxt->sax->characters(ctxt->userData, buf, nbchar);
				3089	}
				3090	}
				3091	nbchar = 0;
				3092	}
				3093	NEXTL(l);
				3094	cur = CUR_CHAR(l);
				3095	}
				3096	if (nbchar != 0) {
				3097	/*
				3098	* Ok the segment is to be consumed as chars.
				3099	*/
				3100	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
				3101	if (areBlanks(ctxt, buf, nbchar)) {
				3102	if (ctxt->sax->ignorableWhitespace != NULL)
				3103	ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
				3104	} else {
				3105	docbCheckParagraph(ctxt);
				3106	if (ctxt->sax->characters != NULL)
				3107	ctxt->sax->characters(ctxt->userData, buf, nbchar);
				3108	}
				3109	}
				3110	}
				3111	}
				3112
				3113	/**
				3114	* docbParseExternalID:
				3115	* @ctxt: an SGML parser context
				3116	* @publicID: a xmlChar** receiving PubidLiteral
				3117	*
				3118	* Parse an External ID or a Public ID
				3119	*
				3120	* Returns the function returns SystemLiteral and in the second
				3121	* case publicID receives PubidLiteral,
				3122	* it is possible to return NULL and have publicID set.
				3123	*/
				3124
				3125	static xmlChar *
				3126	docbParseExternalID(docbParserCtxtPtr ctxt, xmlChar **publicID) {
				3127	xmlChar *URI = NULL;
				3128
				3129	if ((UPPER == 'S') && (UPP(1) == 'Y') &&
				3130	(UPP(2) == 'S') && (UPP(3) == 'T') &&
				3131	(UPP(4) == 'E') && (UPP(5) == 'M')) {
				3132	SKIP(6);
				3133	if (!IS_BLANK(CUR)) {
				3134	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3135	ctxt->sax->error(ctxt->userData,
				3136	"Space required after 'SYSTEM'\n");
				3137	ctxt->wellFormed = 0;
				3138	}
				3139	SKIP_BLANKS;
				3140	URI = docbParseSystemLiteral(ctxt);
				3141	if (URI == NULL) {
				3142	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3143	ctxt->sax->error(ctxt->userData,
				3144	"docbParseExternalID: SYSTEM, no URI\n");
				3145	ctxt->wellFormed = 0;
				3146	}
				3147	} else if ((UPPER == 'P') && (UPP(1) == 'U') &&
				3148	(UPP(2) == 'B') && (UPP(3) == 'L') &&
				3149	(UPP(4) == 'I') && (UPP(5) == 'C')) {
				3150	SKIP(6);
				3151	if (!IS_BLANK(CUR)) {
				3152	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3153	ctxt->sax->error(ctxt->userData,
				3154	"Space required after 'PUBLIC'\n");
				3155	ctxt->wellFormed = 0;
				3156	}
				3157	SKIP_BLANKS;
				3158	*publicID = docbParsePubidLiteral(ctxt);
				3159	if (*publicID == NULL) {
				3160	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3161	ctxt->sax->error(ctxt->userData,
				3162	"docbParseExternalID: PUBLIC, no Public Identifier\n");
				3163	ctxt->wellFormed = 0;
				3164	}
				3165	SKIP_BLANKS;
				3166	if ((CUR == '"') \|\| (CUR == '\'')) {
				3167	URI = docbParseSystemLiteral(ctxt);
				3168	}
				3169	}
				3170	return(URI);
				3171	}
				3172
				3173	/**
				3174	* docbParseComment:
				3175	* @ctxt: an SGML parser context
				3176	*
				3177	* Parse an XML (SGML) comment <!-- .... -->
				3178	*
				3179	* [15] Comment ::= '<!--' ((Char - '-') \| ('-' (Char - '-')))* '-->'
				3180	*/
				3181	static void
				3182	docbParseComment(docbParserCtxtPtr ctxt) {
				3183	xmlChar *buf = NULL;
				3184	int len;
				3185	int size = DOCB_PARSER_BUFFER_SIZE;
				3186	int q, ql;
				3187	int r, rl;
				3188	int cur, l;
				3189	xmlParserInputState state;
				3190
				3191	/*
				3192	* Check that there is a comment right here.
				3193	*/
				3194	if ((RAW != '<') \|\| (NXT(1) != '!') \|\|
				3195	(NXT(2) != '-') \|\| (NXT(3) != '-')) return;
				3196
				3197	state = ctxt->instate;
				3198	ctxt->instate = XML_PARSER_COMMENT;
				3199	SHRINK;
				3200	SKIP(4);
				3201	buf = (xmlChar ) xmlMalloc(size sizeof(xmlChar));
				3202	if (buf == NULL) {
				3203	xmlGenericError(xmlGenericErrorContext,
				3204	"malloc of %d byte failed\n", size);
				3205	ctxt->instate = state;
				3206	return;
				3207	}
				3208	q = CUR_CHAR(ql);
				3209	NEXTL(ql);
				3210	r = CUR_CHAR(rl);
				3211	NEXTL(rl);
				3212	cur = CUR_CHAR(l);
				3213	len = 0;
				3214	while (IS_CHAR(cur) &&
				3215	((cur != '>') \|\|
				3216	(r != '-') \|\| (q != '-'))) {
				3217	if (len + 5 >= size) {
				3218	size *= 2;
				3219	buf = (xmlChar ) xmlRealloc(buf, size sizeof(xmlChar));
				3220	if (buf == NULL) {
				3221	xmlGenericError(xmlGenericErrorContext,
				3222	"realloc of %d byte failed\n", size);
				3223	ctxt->instate = state;
				3224	return;
				3225	}
				3226	}
				3227	COPY_BUF(ql,buf,len,q);
				3228	q = r;
				3229	ql = rl;
				3230	r = cur;
				3231	rl = l;
				3232	NEXTL(l);
				3233	cur = CUR_CHAR(l);
				3234	if (cur == 0) {
				3235	SHRINK;
				3236	GROW;
				3237	cur = CUR_CHAR(l);
				3238	}
				3239	}
				3240	buf[len] = 0;
				3241	if (!IS_CHAR(cur)) {
				3242	ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
				3243	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3244	ctxt->sax->error(ctxt->userData,
				3245	"Comment not terminated \n<!--%.50s\n", buf);
				3246	ctxt->wellFormed = 0;
				3247	xmlFree(buf);
				3248	} else {
				3249	NEXT;
				3250	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
				3251	(!ctxt->disableSAX))
				3252	ctxt->sax->comment(ctxt->userData, buf);
				3253	xmlFree(buf);
				3254	}
				3255	ctxt->instate = state;
				3256	}
				3257
				3258	/**
				3259	* docbParseCharRef:
				3260	* @ctxt: an SGML parser context
				3261	*
				3262	* parse Reference declarations
				3263	*
				3264	* [66] CharRef ::= '&#' [0-9]+ ';' \|
				3265	* '&#x' [0-9a-fA-F]+ ';'
				3266	*
				3267	* Returns the value parsed (as an int)
				3268	*/
				3269	static int
				3270	docbParseCharRef(docbParserCtxtPtr ctxt) {
				3271	int val = 0;
				3272
				3273	if ((CUR == '&') && (NXT(1) == '#') &&
				3274	(NXT(2) == 'x')) {
				3275	SKIP(3);
				3276	while (CUR != ';') {
				3277	if ((CUR >= '0') && (CUR <= '9'))
				3278	val = val * 16 + (CUR - '0');
				3279	else if ((CUR >= 'a') && (CUR <= 'f'))
				3280	val = val * 16 + (CUR - 'a') + 10;
				3281	else if ((CUR >= 'A') && (CUR <= 'F'))
				3282	val = val * 16 + (CUR - 'A') + 10;
				3283	else {
				3284	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3285	ctxt->sax->error(ctxt->userData,
				3286	"docbParseCharRef: invalid hexadecimal value\n");
				3287	ctxt->wellFormed = 0;
				3288	val = 0;
				3289	break;
				3290	}
				3291	NEXT;
				3292	}
				3293	if (CUR == ';')
				3294	NEXT;
				3295	} else if ((CUR == '&') && (NXT(1) == '#')) {
				3296	SKIP(2);
				3297	while (CUR != ';') {
				3298	if ((CUR >= '0') && (CUR <= '9'))
				3299	val = val * 10 + (CUR - '0');
				3300	else {
				3301	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3302	ctxt->sax->error(ctxt->userData,
				3303	"docbParseCharRef: invalid decimal value\n");
				3304	ctxt->wellFormed = 0;
				3305	val = 0;
				3306	break;
				3307	}
				3308	NEXT;
				3309	}
				3310	if (CUR == ';')
				3311	NEXT;
				3312	} else {
				3313	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3314	ctxt->sax->error(ctxt->userData, "docbParseCharRef: invalid value\n");
				3315	ctxt->wellFormed = 0;
				3316	}
				3317	/*
				3318	* Check the value IS_CHAR ...
				3319	*/
				3320	if (IS_CHAR(val)) {
				3321	return(val);
				3322	} else {
				3323	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3324	ctxt->sax->error(ctxt->userData, "docbParseCharRef: invalid xmlChar value %d\n",
				3325	val);
				3326	ctxt->wellFormed = 0;
				3327	}
				3328	return(0);
				3329	}
				3330
				3331
				3332	/**
				3333	* docbParseDocTypeDecl :
				3334	* @ctxt: an SGML parser context
				3335	*
				3336	* parse a DOCTYPE declaration
				3337	*
				3338	* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
				3339	* ('[' (markupdecl \| PEReference \| S)* ']' S?)? '>'
				3340	*/
				3341
				3342	static void
				3343	docbParseDocTypeDecl(docbParserCtxtPtr ctxt) {
				3344	xmlChar *name;
				3345	xmlChar *ExternalID = NULL;
				3346	xmlChar *URI = NULL;
				3347
				3348	/*
				3349	* We know that '<!DOCTYPE' has been detected.
				3350	*/
				3351	SKIP(9);
				3352
				3353	SKIP_BLANKS;
				3354
				3355	/*
				3356	* Parse the DOCTYPE name.
				3357	*/
				3358	name = docbParseName(ctxt);
				3359	if (name == NULL) {
				3360	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3361	ctxt->sax->error(ctxt->userData, "docbParseDocTypeDecl : no DOCTYPE name !\n");
				3362	ctxt->wellFormed = 0;
				3363	}
				3364	/*
				3365	* Check that upper(name) == "SGML" !!!!!!!!!!!!!
				3366	*/
				3367
				3368	SKIP_BLANKS;
				3369
				3370	/*
				3371	* Check for SystemID and ExternalID
				3372	*/
				3373	URI = docbParseExternalID(ctxt, &ExternalID);
				3374	SKIP_BLANKS;
				3375
				3376	/*
				3377	* Create or update the document accordingly to the DOCTYPE
				3378	*/
				3379	if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
				3380	(!ctxt->disableSAX))
				3381	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
				3382
				3383	/*
				3384	* Is there any internal subset declarations ?
				3385	* they are handled separately in docbParseInternalSubset()
				3386	*/
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	3387	if (RAW != '[') {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3388	return;
				3389
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	3390	/*
				3391	* We should be at the end of the DOCTYPE declaration.
				3392	*/
				3393	if (CUR != '>') {
				3394	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3395	ctxt->sax->error(ctxt->userData,
				3396	"DOCTYPE unproperly terminated\n");
				3397	ctxt->wellFormed = 0;
				3398	/* We shouldn't try to resynchronize ... */
				3399	}
				3400	NEXT;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3401	}
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3402
				3403	/*
				3404	* Cleanup, since we don't use all those identifiers
				3405	*/
				3406	if (URI != NULL) xmlFree(URI);
				3407	if (ExternalID != NULL) xmlFree(ExternalID);
				3408	if (name != NULL) xmlFree(name);
				3409	}
				3410
				3411	/**
				3412	* docbParseAttribute:
				3413	* @ctxt: an SGML parser context
				3414	* @value: a xmlChar ** used to store the value of the attribute
				3415	*
				3416	* parse an attribute
				3417	*
				3418	* [41] Attribute ::= Name Eq AttValue
				3419	*
				3420	* [25] Eq ::= S? '=' S?
				3421	*
				3422	* With namespace:
				3423	*
				3424	* [NS 11] Attribute ::= QName Eq AttValue
				3425	*
				3426	* Also the case QName == xmlns:??? is handled independently as a namespace
				3427	* definition.
				3428	*
				3429	* Returns the attribute name, and the value in *value.
				3430	*/
				3431
				3432	static xmlChar *
				3433	docbParseAttribute(docbParserCtxtPtr ctxt, xmlChar **value) {
				3434	xmlChar name, val = NULL;
				3435
				3436	*value = NULL;
				3437	name = docbParseName(ctxt);
				3438	if (name == NULL) {
				3439	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3440	ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
				3441	ctxt->wellFormed = 0;
				3442	return(NULL);
				3443	}
				3444
				3445	/*
				3446	* read the value
				3447	*/
				3448	SKIP_BLANKS;
				3449	if (CUR == '=') {
				3450	NEXT;
				3451	SKIP_BLANKS;
				3452	val = docbParseAttValue(ctxt);
				3453	/******
				3454	} else {
				3455	* TODO : some attribute must have values, some may not
				3456	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3457	ctxt->sax->warning(ctxt->userData,
				3458	"No value for attribute %s\n", name); */
				3459	}
				3460
				3461	*value = val;
				3462	return(name);
				3463	}
				3464
				3465	/**
				3466	* docbCheckEncoding:
				3467	* @ctxt: an SGML parser context
				3468	* @attvalue: the attribute value
				3469	*
				3470	* Checks an http-equiv attribute from a Meta tag to detect
				3471	* the encoding
				3472	* If a new encoding is detected the parser is switched to decode
				3473	* it and pass UTF8
				3474	*/
				3475	static void
				3476	docbCheckEncoding(docbParserCtxtPtr ctxt, const xmlChar *attvalue) {
				3477	const xmlChar *encoding;
				3478
				3479	if ((ctxt == NULL) \|\| (attvalue == NULL))
				3480	return;
				3481
				3482	encoding = xmlStrstr(attvalue, BAD_CAST"charset=");
				3483	if (encoding == NULL)
				3484	encoding = xmlStrstr(attvalue, BAD_CAST"Charset=");
				3485	if (encoding == NULL)
				3486	encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET=");
				3487	if (encoding != NULL) {
				3488	encoding += 8;
				3489	} else {
				3490	encoding = xmlStrstr(attvalue, BAD_CAST"charset =");
				3491	if (encoding == NULL)
				3492	encoding = xmlStrstr(attvalue, BAD_CAST"Charset =");
				3493	if (encoding == NULL)
				3494	encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET =");
				3495	if (encoding != NULL)
				3496	encoding += 9;
				3497	}
				3498	/*
				3499	* Restricted from 2.3.5 */
				3500	if (encoding != NULL) {
				3501	xmlCharEncoding enc;
				3502
				3503	if (ctxt->input->encoding != NULL)
				3504	xmlFree((xmlChar *) ctxt->input->encoding);
				3505	ctxt->input->encoding = encoding;
				3506
				3507	enc = xmlParseCharEncoding((const char *) encoding);
				3508	if (enc == XML_CHAR_ENCODING_8859_1) {
				3509	ctxt->charset = XML_CHAR_ENCODING_8859_1;
				3510	} else if (enc != XML_CHAR_ENCODING_UTF8) {
				3511	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3512	ctxt->sax->error(ctxt->userData,
				3513	"Unsupported encoding %s\n", encoding);
				3514	/* xmlFree(encoding); */
				3515	ctxt->wellFormed = 0;
				3516	ctxt->disableSAX = 1;
				3517	ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
				3518	}
				3519	}
				3520	}
				3521
				3522	/**
				3523	* docbCheckMeta:
				3524	* @ctxt: an SGML parser context
				3525	* @atts: the attributes values
				3526	*
				3527	* Checks an attributes from a Meta tag
				3528	*/
				3529	static void
				3530	docbCheckMeta(docbParserCtxtPtr ctxt, const xmlChar **atts) {
				3531	int i;
				3532	const xmlChar att, value;
				3533	int http = 0;
				3534	const xmlChar *content = NULL;
				3535
				3536	if ((ctxt == NULL) \|\| (atts == NULL))
				3537	return;
				3538
				3539	i = 0;
				3540	att = atts[i++];
				3541	while (att != NULL) {
				3542	value = atts[i++];
				3543	if ((value != NULL) &&
				3544	((xmlStrEqual(att, BAD_CAST"http-equiv")) \|\|
				3545	(xmlStrEqual(att, BAD_CAST"Http-Equiv")) \|\|
				3546	(xmlStrEqual(att, BAD_CAST"HTTP-EQUIV"))) &&
				3547	((xmlStrEqual(value, BAD_CAST"Content-Type")) \|\|
				3548	(xmlStrEqual(value, BAD_CAST"content-type")) \|\|
				3549	(xmlStrEqual(value, BAD_CAST"CONTENT-TYPE"))))
				3550	http = 1;
				3551	else if ((value != NULL) &&
				3552	((xmlStrEqual(att, BAD_CAST"content")) \|\|
				3553	(xmlStrEqual(att, BAD_CAST"Content")) \|\|
				3554	(xmlStrEqual(att, BAD_CAST"CONTENT"))))
				3555	content = value;
				3556	att = atts[i++];
				3557	}
				3558	if ((http) && (content != NULL))
				3559	docbCheckEncoding(ctxt, content);
				3560
				3561	}
				3562
				3563	/**
				3564	* docbParseStartTag:
				3565	* @ctxt: an SGML parser context
				3566	*
				3567	* parse a start of tag either for rule element or
				3568	* EmptyElement. In both case we don't parse the tag closing chars.
				3569	*
				3570	* [40] STag ::= '<' Name (S Attribute)* S? '>'
				3571	*
				3572	* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
				3573	*
				3574	* With namespace:
				3575	*
				3576	* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
				3577	*
				3578	* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
				3579	*
				3580	*/
				3581
				3582	static void
				3583	docbParseStartTag(docbParserCtxtPtr ctxt) {
				3584	xmlChar *name;
				3585	xmlChar *attname;
				3586	xmlChar *attvalue;
				3587	const xmlChar **atts = NULL;
				3588	int nbatts = 0;
				3589	int maxatts = 0;
				3590	int meta = 0;
				3591	int i;
				3592
				3593	if (CUR != '<') return;
				3594	NEXT;
				3595
				3596	GROW;
				3597	name = docbParseSGMLName(ctxt);
				3598	if (name == NULL) {
				3599	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3600	ctxt->sax->error(ctxt->userData,
				3601	"docbParseStartTag: invalid element name\n");
				3602	ctxt->wellFormed = 0;
				3603	return;
				3604	}
				3605	if (xmlStrEqual(name, BAD_CAST"meta"))
				3606	meta = 1;
				3607
				3608	/*
				3609	* Check for auto-closure of SGML elements.
				3610	*/
				3611	docbAutoClose(ctxt, name);
				3612
				3613	/*
				3614	* Now parse the attributes, it ends up with the ending
				3615	*
				3616	* (S Attribute)* S?
				3617	*/
				3618	SKIP_BLANKS;
				3619	while ((IS_CHAR(CUR)) &&
				3620	(CUR != '>') &&
				3621	((CUR != '/') \|\| (NXT(1) != '>'))) {
				3622	long cons = ctxt->nbChars;
				3623
				3624	GROW;
				3625	attname = docbParseAttribute(ctxt, &attvalue);
				3626	if (attname != NULL) {
				3627
				3628	/*
				3629	* Well formedness requires at most one declaration of an attribute
				3630	*/
				3631	for (i = 0; i < nbatts;i += 2) {
				3632	if (xmlStrEqual(atts[i], attname)) {
				3633	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3634	ctxt->sax->error(ctxt->userData,
				3635	"Attribute %s redefined\n",
				3636	attname);
				3637	ctxt->wellFormed = 0;
				3638	xmlFree(attname);
				3639	if (attvalue != NULL)
				3640	xmlFree(attvalue);
				3641	goto failed;
				3642	}
				3643	}
				3644
				3645	/*
				3646	* Add the pair to atts
				3647	*/
				3648	if (atts == NULL) {
				3649	maxatts = 10;
				3650	atts = (const xmlChar *) xmlMalloc(maxatts sizeof(xmlChar *));
				3651	if (atts == NULL) {
				3652	xmlGenericError(xmlGenericErrorContext,
				3653	"malloc of %ld byte failed\n",
				3654	maxatts * (long)sizeof(xmlChar *));
				3655	if (name != NULL) xmlFree(name);
				3656	return;
				3657	}
				3658	} else if (nbatts + 4 > maxatts) {
				3659	maxatts *= 2;
				3660	atts = (const xmlChar *) xmlRealloc(atts, maxatts sizeof(xmlChar *));
				3661	if (atts == NULL) {
				3662	xmlGenericError(xmlGenericErrorContext,
				3663	"realloc of %ld byte failed\n",
				3664	maxatts * (long)sizeof(xmlChar *));
				3665	if (name != NULL) xmlFree(name);
				3666	return;
				3667	}
				3668	}
				3669	atts[nbatts++] = attname;
				3670	atts[nbatts++] = attvalue;
				3671	atts[nbatts] = NULL;
				3672	atts[nbatts + 1] = NULL;
				3673	}
				3674
				3675	failed:
				3676	SKIP_BLANKS;
				3677	if (cons == ctxt->nbChars) {
				3678	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3679	ctxt->sax->error(ctxt->userData,
				3680	"docbParseStartTag: problem parsing attributes\n");
				3681	ctxt->wellFormed = 0;
				3682	break;
				3683	}
				3684	}
				3685
				3686	/*
				3687	* Handle specific association to the META tag
				3688	*/
				3689	if (meta)
				3690	docbCheckMeta(ctxt, atts);
				3691
				3692	/*
				3693	* SAX: Start of Element !
				3694	*/
				3695	docbnamePush(ctxt, xmlStrdup(name));
				3696	#ifdef DEBUG
				3697	xmlGenericError(xmlGenericErrorContext,"Start of element %s: pushed %s\n", name, ctxt->name);
				3698	#endif
				3699	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
				3700	ctxt->sax->startElement(ctxt->userData, name, atts);
				3701
				3702	if (atts != NULL) {
				3703	for (i = 0;i < nbatts;i++) {
				3704	if (atts[i] != NULL)
				3705	xmlFree((xmlChar *) atts[i]);
				3706	}
				3707	xmlFree((void *) atts);
				3708	}
				3709	if (name != NULL) xmlFree(name);
				3710	}
				3711
				3712	/**
				3713	* docbParseEndTag:
				3714	* @ctxt: an SGML parser context
				3715	*
				3716	* parse an end of tag
				3717	*
				3718	* [42] ETag ::= '</' Name S? '>'
				3719	*
				3720	* With namespace
				3721	*
				3722	* [NS 9] ETag ::= '</' QName S? '>'
				3723	*/
				3724
				3725	static void
				3726	docbParseEndTag(docbParserCtxtPtr ctxt) {
				3727	xmlChar *name;
				3728	xmlChar *oldname;
				3729	int i;
				3730
				3731	if ((CUR != '<') \|\| (NXT(1) != '/')) {
				3732	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3733	ctxt->sax->error(ctxt->userData, "docbParseEndTag: '</' not found\n");
				3734	ctxt->wellFormed = 0;
				3735	return;
				3736	}
				3737	SKIP(2);
				3738
				3739	name = docbParseSGMLName(ctxt);
				3740	if (name == NULL) {
				3741	if (CUR == '>') {
				3742	NEXT;
				3743	oldname = docbnamePop(ctxt);
				3744	if (oldname != NULL) {
				3745	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				3746	ctxt->sax->endElement(ctxt->userData, name);
				3747	#ifdef DEBUG
				3748	xmlGenericError(xmlGenericErrorContext,"End of tag </>: popping out %s\n", oldname);
				3749	#endif
				3750	xmlFree(oldname);
				3751	#ifdef DEBUG
				3752	} else {
				3753	xmlGenericError(xmlGenericErrorContext,"End of tag </>: stack empty !!!\n");
				3754	#endif
				3755	}
				3756	return;
				3757	} else
				3758	return;
				3759	}
				3760
				3761	/*
				3762	* We should definitely be at the ending "S? '>'" part
				3763	*/
				3764	SKIP_BLANKS;
				3765	if ((!IS_CHAR(CUR)) \|\| (CUR != '>')) {
				3766	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3767	ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
				3768	ctxt->wellFormed = 0;
				3769	} else
				3770	NEXT;
				3771
				3772	/*
				3773	* If the name read is not one of the element in the parsing stack
				3774	* then return, it's just an error.
				3775	*/
				3776	for (i = (ctxt->nameNr - 1);i >= 0;i--) {
				3777	if (xmlStrEqual(name, ctxt->nameTab[i])) break;
				3778	}
				3779	if (i < 0) {
				3780	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3781	ctxt->sax->error(ctxt->userData,
				3782	"Unexpected end tag : %s\n", name);
				3783	xmlFree(name);
				3784	ctxt->wellFormed = 0;
				3785	return;
				3786	}
				3787
				3788
				3789	/*
				3790	* Check for auto-closure of SGML elements.
				3791	*/
				3792
				3793	docbAutoCloseOnClose(ctxt, name);
				3794
				3795	/*
				3796	* Well formedness constraints, opening and closing must match.
				3797	* With the exception that the autoclose may have popped stuff out
				3798	* of the stack.
				3799	*/
				3800	if (((name[0] != '/') \|\| (name[1] != 0)) &&
				3801	(!xmlStrEqual(name, ctxt->name))) {
				3802	#ifdef DEBUG
				3803	xmlGenericError(xmlGenericErrorContext,"End of tag %s: expecting %s\n", name, ctxt->name);
				3804	#endif
				3805	if ((ctxt->name != NULL) &&
				3806	(!xmlStrEqual(ctxt->name, name))) {
				3807	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3808	ctxt->sax->error(ctxt->userData,
				3809	"Opening and ending tag mismatch: %s and %s\n",
				3810	name, ctxt->name);
				3811	ctxt->wellFormed = 0;
				3812	}
				3813	}
				3814
				3815	/*
				3816	* SAX: End of Tag
				3817	*/
				3818	oldname = ctxt->name;
				3819	if (((name[0] == '/') && (name[1] == 0)) \|\|
				3820	((oldname != NULL) && (xmlStrEqual(oldname, name)))) {
				3821	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				3822	ctxt->sax->endElement(ctxt->userData, name);
				3823	oldname = docbnamePop(ctxt);
				3824	if (oldname != NULL) {
				3825	#ifdef DEBUG
				3826	xmlGenericError(xmlGenericErrorContext,"End of tag %s: popping out %s\n", name, oldname);
				3827	#endif
				3828	xmlFree(oldname);
				3829	#ifdef DEBUG
				3830	} else {
				3831	xmlGenericError(xmlGenericErrorContext,"End of tag %s: stack empty !!!\n", name);
				3832	#endif
				3833	}
				3834	}
				3835
				3836	if (name != NULL)
				3837	xmlFree(name);
				3838
				3839	return;
				3840	}
				3841
				3842
				3843	/**
				3844	* docbParseReference:
				3845	* @ctxt: an SGML parser context
				3846	*
				3847	* parse and handle entity references in content,
				3848	* this will end-up in a call to character() since this is either a
				3849	* CharRef, or a predefined entity.
				3850	*/
				3851	static void
				3852	docbParseReference(docbParserCtxtPtr ctxt) {
				3853	docbEntityDescPtr ent;
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	3854	xmlEntityPtr xent;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3855	xmlChar out[6];
				3856	xmlChar *name;
				3857	if (CUR != '&') return;
				3858
				3859	if (NXT(1) == '#') {
				3860	unsigned int c;
				3861	int bits, i = 0;
				3862
				3863	c = docbParseCharRef(ctxt);
				3864	if (c < 0x80) { out[i++]= c; bits= -6; }
				3865	else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				3866	else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				3867	else { out[i++]=((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				3868
				3869	for ( ; bits >= 0; bits-= 6) {
				3870	out[i++]= ((c >> bits) & 0x3F) \| 0x80;
				3871	}
				3872	out[i] = 0;
				3873
				3874	docbCheckParagraph(ctxt);
				3875	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
				3876	ctxt->sax->characters(ctxt->userData, out, i);
				3877	} else {
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	3878	/*
				3879	* Lookup the entity in the table.
				3880	*/
				3881	xent = docbParseEntityRef(ctxt, &name);
				3882	if (xent != NULL) {
				3883	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
				3884	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
				3885	/*
				3886	* Create a node.
				3887	*/
				3888	ctxt->sax->reference(ctxt->userData, xent->name);
				3889	return;
				3890	} else if (ctxt->replaceEntities) {
				3891	if ((xent->children == NULL) &&
				3892	(xent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
				3893	/*
				3894	* we really need to fetch and parse the external entity
				3895	*/
				3896	int parse;
				3897	xmlNodePtr children = NULL;
				3898
				3899	parse = docbParseCtxtExternalEntity(ctxt,
				3900	xent->SystemID, xent->ExternalID, &children);
				3901	xmlAddChildList((xmlNodePtr) xent, children);
				3902	}
				3903	if ((ctxt->node != NULL) && (xent->children != NULL)) {
				3904	/*
				3905	* Seems we are generating the DOM content, do
				3906	* a simple tree copy
				3907	*/
				3908	xmlNodePtr new;
				3909	new = xmlCopyNodeList(xent->children);
				3910
				3911	xmlAddChildList(ctxt->node, new);
				3912	/*
				3913	* This is to avoid a nasty side effect, see
				3914	* characters() in SAX.c
				3915	*/
				3916	ctxt->nodemem = 0;
				3917	ctxt->nodelen = 0;
				3918	}
				3919	}
				3920	} else if (name != NULL) {
				3921	ent = docbEntityLookup(name);
				3922	if ((ent == NULL) \|\| (ent->value <= 0)) {
				3923	docbCheckParagraph(ctxt);
				3924	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
				3925	ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
				3926	ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
				3927	/* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
				3928	}
				3929	} else {
				3930	unsigned int c;
				3931	int bits, i = 0;
				3932
				3933	c = ent->value;
				3934	if (c < 0x80)
				3935	{ out[i++]= c; bits= -6; }
				3936	else if (c < 0x800)
				3937	{ out[i++]=((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				3938	else if (c < 0x10000)
				3939	{ out[i++]=((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				3940	else
				3941	{ out[i++]=((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				3942
				3943	for ( ; bits >= 0; bits-= 6) {
				3944	out[i++]= ((c >> bits) & 0x3F) \| 0x80;
				3945	}
				3946	out[i] = 0;
				3947
				3948	docbCheckParagraph(ctxt);
				3949	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
				3950	ctxt->sax->characters(ctxt->userData, out, i);
				3951	}
				3952	} else {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3953	docbCheckParagraph(ctxt);
				3954	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
				3955	ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
				3956	return;
				3957	}
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	3958	if (name != NULL)
				3959	xmlFree(name);
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3960	}
				3961	}
				3962
				3963	/**
				3964	* docbParseContent:
				3965	* @ctxt: an SGML parser context
				3966	* @name: the node name
				3967	*
				3968	* Parse a content: comment, sub-element, reference or text.
				3969	*
				3970	*/
				3971
				3972	static void
				3973	docbParseContent(docbParserCtxtPtr ctxt) {
				3974	xmlChar *currentNode;
				3975	int depth;
				3976
				3977	currentNode = xmlStrdup(ctxt->name);
				3978	depth = ctxt->nameNr;
				3979	while (1) {
				3980	long cons = ctxt->nbChars;
				3981
				3982	GROW;
				3983	/*
				3984	* Our tag or one of it's parent or children is ending.
				3985	*/
				3986	if ((CUR == '<') && (NXT(1) == '/')) {
				3987	docbParseEndTag(ctxt);
				3988	if (currentNode != NULL) xmlFree(currentNode);
				3989	return;
				3990	}
				3991
				3992	/*
				3993	* Has this node been popped out during parsing of
				3994	* the next element
				3995	*/
				3996	if ((!xmlStrEqual(currentNode, ctxt->name)) &&
				3997	(depth >= ctxt->nameNr)) {
				3998	if (currentNode != NULL) xmlFree(currentNode);
				3999	return;
				4000	}
				4001
				4002	/*
				4003	* Sometimes DOCTYPE arrives in the middle of the document
				4004	*/
				4005	if ((CUR == '<') && (NXT(1) == '!') &&
				4006	(UPP(2) == 'D') && (UPP(3) == 'O') &&
				4007	(UPP(4) == 'C') && (UPP(5) == 'T') &&
				4008	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
				4009	(UPP(8) == 'E')) {
				4010	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4011	ctxt->sax->error(ctxt->userData,
				4012	"Misplaced DOCTYPE declaration\n");
				4013	ctxt->wellFormed = 0;
				4014	docbParseDocTypeDecl(ctxt);
				4015	}
				4016
				4017	/*
				4018	* First case : a comment
				4019	*/
				4020	if ((CUR == '<') && (NXT(1) == '!') &&
				4021	(NXT(2) == '-') && (NXT(3) == '-')) {
				4022	docbParseComment(ctxt);
				4023	}
				4024
				4025	/*
				4026	* Second case : a sub-element.
				4027	*/
				4028	else if (CUR == '<') {
				4029	docbParseElement(ctxt);
				4030	}
				4031
				4032	/*
				4033	* Third case : a reference. If if has not been resolved,
				4034	* parsing returns it's Name, create the node
				4035	*/
				4036	else if (CUR == '&') {
				4037	docbParseReference(ctxt);
				4038	}
				4039
				4040	/*
				4041	* Fourth : end of the resource
				4042	*/
				4043	else if (CUR == 0) {
				4044	docbAutoClose(ctxt, NULL);
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	4045	if (ctxt->nameNr == 0)
				4046	break;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4047	}
				4048
				4049	/*
				4050	* Last case, text. Note that References are handled directly.
				4051	*/
				4052	else {
				4053	docbParseCharData(ctxt);
				4054	}
				4055
				4056	if (cons == ctxt->nbChars) {
				4057	if (ctxt->node != NULL) {
				4058	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4059	ctxt->sax->error(ctxt->userData,
				4060	"detected an error in element content\n");
				4061	ctxt->wellFormed = 0;
				4062	}
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	4063	break;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4064	}
				4065
				4066	GROW;
				4067	}
				4068	if (currentNode != NULL) xmlFree(currentNode);
				4069	}
				4070
				4071	/**
				4072	* docbParseElement:
				4073	* @ctxt: an SGML parser context
				4074	*
				4075	* parse an SGML element, this is highly recursive
				4076	*
				4077	* [39] element ::= EmptyElemTag \| STag content ETag
				4078	*
				4079	* [41] Attribute ::= Name Eq AttValue
				4080	*/
				4081
				4082	static void
				4083	docbParseElement(docbParserCtxtPtr ctxt) {
				4084	xmlChar *name;
				4085	xmlChar *currentNode = NULL;
				4086	docbElemDescPtr info;
				4087	docbParserNodeInfo node_info;
				4088	xmlChar *oldname;
				4089	int depth = ctxt->nameNr;
				4090
				4091	/* Capture start position */
				4092	if (ctxt->record_info) {
				4093	node_info.begin_pos = ctxt->input->consumed +
				4094	(CUR_PTR - ctxt->input->base);
				4095	node_info.begin_line = ctxt->input->line;
				4096	}
				4097
				4098	oldname = xmlStrdup(ctxt->name);
				4099	docbParseStartTag(ctxt);
				4100	name = ctxt->name;
				4101	#ifdef DEBUG
				4102	if (oldname == NULL)
				4103	xmlGenericError(xmlGenericErrorContext,
				4104	"Start of element %s\n", name);
				4105	else if (name == NULL)
				4106	xmlGenericError(xmlGenericErrorContext,
				4107	"Start of element failed, was %s\n", oldname);
				4108	else
				4109	xmlGenericError(xmlGenericErrorContext,
				4110	"Start of element %s, was %s\n", name, oldname);
				4111	#endif
				4112	if (((depth == ctxt->nameNr) && (xmlStrEqual(oldname, ctxt->name))) \|\|
				4113	(name == NULL)) {
				4114	if (CUR == '>')
				4115	NEXT;
				4116	if (oldname != NULL)
				4117	xmlFree(oldname);
				4118	return;
				4119	}
				4120	if (oldname != NULL)
				4121	xmlFree(oldname);
				4122
				4123	/*
				4124	* Lookup the info for that element.
				4125	*/
				4126	info = docbTagLookup(name);
				4127	if (info == NULL) {
				4128	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4129	ctxt->sax->error(ctxt->userData, "Tag %s unknown\n",
				4130	name);
				4131	ctxt->wellFormed = 0;
				4132	} else if (info->depr) {
				4133	/***************************
				4134	if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
				4135	ctxt->sax->warning(ctxt->userData, "Tag %s is deprecated\n",
				4136	name);
				4137	***************************/
				4138	}
				4139
				4140	/*
				4141	* Check for an Empty Element labelled the XML/SGML way
				4142	*/
				4143	if ((CUR == '/') && (NXT(1) == '>')) {
				4144	SKIP(2);
				4145	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				4146	ctxt->sax->endElement(ctxt->userData, name);
				4147	oldname = docbnamePop(ctxt);
				4148	#ifdef DEBUG
				4149	xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n", oldname);
				4150	#endif
				4151	if (oldname != NULL)
				4152	xmlFree(oldname);
				4153	return;
				4154	}
				4155
				4156	if (CUR == '>') {
				4157	NEXT;
				4158	} else {
				4159	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4160	ctxt->sax->error(ctxt->userData,
				4161	"Couldn't find end of Start Tag %s\n",
				4162	name);
				4163	ctxt->wellFormed = 0;
				4164
				4165	/*
				4166	* end of parsing of this node.
				4167	*/
				4168	if (xmlStrEqual(name, ctxt->name)) {
				4169	nodePop(ctxt);
				4170	oldname = docbnamePop(ctxt);
				4171	#ifdef DEBUG
				4172	xmlGenericError(xmlGenericErrorContext,"End of start tag problem: popping out %s\n", oldname);
				4173	#endif
				4174	if (oldname != NULL)
				4175	xmlFree(oldname);
				4176	}
				4177
				4178	/*
				4179	* Capture end position and add node
				4180	*/
				4181	if ( currentNode != NULL && ctxt->record_info ) {
				4182	node_info.end_pos = ctxt->input->consumed +
				4183	(CUR_PTR - ctxt->input->base);
				4184	node_info.end_line = ctxt->input->line;
				4185	node_info.node = ctxt->node;
				4186	xmlParserAddNodeInfo(ctxt, &node_info);
				4187	}
				4188	return;
				4189	}
				4190
				4191	/*
				4192	* Check for an Empty Element from DTD definition
				4193	*/
				4194	if ((info != NULL) && (info->empty)) {
				4195	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				4196	ctxt->sax->endElement(ctxt->userData, name);
				4197	oldname = docbnamePop(ctxt);
				4198	#ifdef DEBUG
				4199	xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname);
				4200	#endif
				4201	if (oldname != NULL)
				4202	xmlFree(oldname);
				4203	return;
				4204	}
				4205
				4206	/*
				4207	* Parse the content of the element:
				4208	*/
				4209	currentNode = xmlStrdup(ctxt->name);
				4210	depth = ctxt->nameNr;
				4211	while (IS_CHAR(CUR)) {
				4212	docbParseContent(ctxt);
				4213	if (ctxt->nameNr < depth) break;
				4214	}
				4215
				4216	if (!IS_CHAR(CUR)) {
				4217	/************
				4218	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4219	ctxt->sax->error(ctxt->userData,
				4220	"Premature end of data in tag %s\n", currentNode);
				4221	ctxt->wellFormed = 0;
				4222	*************/
				4223
				4224	/*
				4225	* end of parsing of this node.
				4226	*/
				4227	nodePop(ctxt);
				4228	oldname = docbnamePop(ctxt);
				4229	#ifdef DEBUG
				4230	xmlGenericError(xmlGenericErrorContext,"Premature end of tag %s : popping out %s\n", name, oldname);
				4231	#endif
				4232	if (oldname != NULL)
				4233	xmlFree(oldname);
				4234	if (currentNode != NULL)
				4235	xmlFree(currentNode);
				4236	return;
				4237	}
				4238
				4239	/*
				4240	* Capture end position and add node
				4241	*/
				4242	if ( currentNode != NULL && ctxt->record_info ) {
				4243	node_info.end_pos = ctxt->input->consumed +
				4244	(CUR_PTR - ctxt->input->base);
				4245	node_info.end_line = ctxt->input->line;
				4246	node_info.node = ctxt->node;
				4247	xmlParserAddNodeInfo(ctxt, &node_info);
				4248	}
				4249	if (currentNode != NULL)
				4250	xmlFree(currentNode);
				4251	}
				4252
				4253	/**
				4254	* docbParseEntityDecl:
				4255	* @ctxt: an SGML parser context
				4256	*
				4257	* parse <!ENTITY declarations
				4258	*
				4259	*/
				4260
				4261	static void
				4262	docbParseEntityDecl(xmlParserCtxtPtr ctxt) {
				4263	xmlChar *name = NULL;
				4264	xmlChar *value = NULL;
				4265	xmlChar URI = NULL, literal = NULL;
				4266	xmlChar *ndata = NULL;
				4267	int isParameter = 0;
				4268	xmlChar *orig = NULL;
				4269
				4270	GROW;
				4271	if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	4272	(UPP(2) == 'E') && (UPP(3) == 'N') &&
				4273	(UPP(4) == 'T') && (UPP(5) == 'I') &&
				4274	(UPP(6) == 'T') && (UPP(7) == 'Y')) {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4275	xmlParserInputPtr input = ctxt->input;
				4276	ctxt->instate = XML_PARSER_ENTITY_DECL;
				4277	SHRINK;
				4278	SKIP(8);
				4279	if (!IS_BLANK(CUR)) {
				4280	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4281	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4282	ctxt->sax->error(ctxt->userData,
				4283	"Space required after '<!ENTITY'\n");
				4284	ctxt->wellFormed = 0;
				4285	ctxt->disableSAX = 1;
				4286	}
				4287	SKIP_BLANKS;
				4288
				4289	if (RAW == '%') {
				4290	NEXT;
				4291	if (!IS_BLANK(CUR)) {
				4292	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4293	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4294	ctxt->sax->error(ctxt->userData,
				4295	"Space required after '%'\n");
				4296	ctxt->wellFormed = 0;
				4297	ctxt->disableSAX = 1;
				4298	}
				4299	SKIP_BLANKS;
				4300	isParameter = 1;
				4301	}
				4302
				4303	name = xmlParseName(ctxt);
				4304	if (name == NULL) {
				4305	ctxt->errNo = XML_ERR_NAME_REQUIRED;
				4306	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4307	ctxt->sax->error(ctxt->userData, "sgmlarseEntityDecl: no name\n");
				4308	ctxt->wellFormed = 0;
				4309	ctxt->disableSAX = 1;
				4310	return;
				4311	}
				4312	if (!IS_BLANK(CUR)) {
				4313	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4314	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4315	ctxt->sax->error(ctxt->userData,
				4316	"Space required after the entity name\n");
				4317	ctxt->wellFormed = 0;
				4318	ctxt->disableSAX = 1;
				4319	}
				4320	SKIP_BLANKS;
				4321
				4322	/*
				4323	* handle the various case of definitions...
				4324	*/
				4325	if (isParameter) {
				4326	if ((RAW == '"') \|\| (RAW == '\'')) {
				4327	value = xmlParseEntityValue(ctxt, &orig);
				4328	if (value) {
				4329	if ((ctxt->sax != NULL) &&
				4330	(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
				4331	ctxt->sax->entityDecl(ctxt->userData, name,
				4332	XML_INTERNAL_PARAMETER_ENTITY,
				4333	NULL, NULL, value);
				4334	}
				4335	} else {
				4336	URI = xmlParseExternalID(ctxt, &literal, 1);
				4337	if ((URI == NULL) && (literal == NULL)) {
				4338	ctxt->errNo = XML_ERR_VALUE_REQUIRED;
				4339	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4340	ctxt->sax->error(ctxt->userData,
				4341	"Entity value required\n");
				4342	ctxt->wellFormed = 0;
				4343	ctxt->disableSAX = 1;
				4344	}
				4345	if (URI) {
				4346	xmlURIPtr uri;
				4347
				4348	uri = xmlParseURI((const char *) URI);
				4349	if (uri == NULL) {
				4350	ctxt->errNo = XML_ERR_INVALID_URI;
				4351	if ((ctxt->sax != NULL) &&
				4352	(!ctxt->disableSAX) &&
				4353	(ctxt->sax->error != NULL))
				4354	ctxt->sax->error(ctxt->userData,
				4355	"Invalid URI: %s\n", URI);
				4356	ctxt->wellFormed = 0;
				4357	} else {
				4358	if (uri->fragment != NULL) {
				4359	ctxt->errNo = XML_ERR_URI_FRAGMENT;
				4360	if ((ctxt->sax != NULL) &&
				4361	(!ctxt->disableSAX) &&
				4362	(ctxt->sax->error != NULL))
				4363	ctxt->sax->error(ctxt->userData,
				4364	"Fragment not allowed: %s\n", URI);
				4365	ctxt->wellFormed = 0;
				4366	} else {
				4367	if ((ctxt->sax != NULL) &&
				4368	(!ctxt->disableSAX) &&
				4369	(ctxt->sax->entityDecl != NULL))
				4370	ctxt->sax->entityDecl(ctxt->userData, name,
				4371	XML_EXTERNAL_PARAMETER_ENTITY,
				4372	literal, URI, NULL);
				4373	}
				4374	xmlFreeURI(uri);
				4375	}
				4376	}
				4377	}
				4378	} else {
				4379	if ((RAW == '"') \|\| (RAW == '\'')) {
				4380	value = xmlParseEntityValue(ctxt, &orig);
				4381	if ((ctxt->sax != NULL) &&
				4382	(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
				4383	ctxt->sax->entityDecl(ctxt->userData, name,
				4384	XML_INTERNAL_GENERAL_ENTITY,
				4385	NULL, NULL, value);
				4386	} else {
				4387	URI = xmlParseExternalID(ctxt, &literal, 1);
				4388	if ((URI == NULL) && (literal == NULL)) {
				4389	ctxt->errNo = XML_ERR_VALUE_REQUIRED;
				4390	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4391	ctxt->sax->error(ctxt->userData,
				4392	"Entity value required\n");
				4393	ctxt->wellFormed = 0;
				4394	ctxt->disableSAX = 1;
				4395	}
				4396	if (URI) {
				4397	xmlURIPtr uri;
				4398
				4399	uri = xmlParseURI((const char *)URI);
				4400	if (uri == NULL) {
				4401	ctxt->errNo = XML_ERR_INVALID_URI;
				4402	if ((ctxt->sax != NULL) &&
				4403	(!ctxt->disableSAX) &&
				4404	(ctxt->sax->error != NULL))
				4405	ctxt->sax->error(ctxt->userData,
				4406	"Invalid URI: %s\n", URI);
				4407	ctxt->wellFormed = 0;
				4408	} else {
				4409	if (uri->fragment != NULL) {
				4410	ctxt->errNo = XML_ERR_URI_FRAGMENT;
				4411	if ((ctxt->sax != NULL) &&
				4412	(!ctxt->disableSAX) &&
				4413	(ctxt->sax->error != NULL))
				4414	ctxt->sax->error(ctxt->userData,
				4415	"Fragment not allowed: %s\n", URI);
				4416	ctxt->wellFormed = 0;
				4417	}
				4418	xmlFreeURI(uri);
				4419	}
				4420	}
				4421	if ((RAW != '>') && (!IS_BLANK(CUR))) {
				4422	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4423	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4424	ctxt->sax->error(ctxt->userData,
				4425	"Space required before content model\n");
				4426	ctxt->wellFormed = 0;
				4427	ctxt->disableSAX = 1;
				4428	}
				4429	SKIP_BLANKS;
				4430
				4431	/*
				4432	* SGML specific: here we can get the content model
				4433	*/
				4434	if (RAW != '>') {
				4435	xmlChar *contmod;
				4436
				4437	contmod = xmlParseName(ctxt);
				4438
				4439	if (contmod == NULL) {
				4440	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4441	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4442	ctxt->sax->error(ctxt->userData,
				4443	"Could not parse entity content model\n");
				4444	ctxt->wellFormed = 0;
				4445	ctxt->disableSAX = 1;
				4446	} else {
				4447	if (xmlStrEqual(contmod, BAD_CAST"NDATA")) {
				4448	if (!IS_BLANK(CUR)) {
				4449	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4450	if ((ctxt->sax != NULL) &&
				4451	(ctxt->sax->error != NULL))
				4452	ctxt->sax->error(ctxt->userData,
				4453	"Space required after 'NDATA'\n");
				4454	ctxt->wellFormed = 0;
				4455	ctxt->disableSAX = 1;
				4456	}
				4457	SKIP_BLANKS;
				4458	ndata = xmlParseName(ctxt);
				4459	if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
				4460	(ctxt->sax->unparsedEntityDecl != NULL)) {
				4461	ctxt->sax->unparsedEntityDecl(ctxt->userData,
				4462	name, literal, URI, ndata);
				4463	}
				4464	} else if (xmlStrEqual(contmod, BAD_CAST"SUBDOC")) {
				4465	if ((ctxt->sax != NULL) &&
				4466	(ctxt->sax->warning != NULL))
				4467	ctxt->sax->warning(ctxt->userData,
				4468	"SUBDOC entities are not supported\n");
				4469	SKIP_BLANKS;
				4470	ndata = xmlParseName(ctxt);
				4471	if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
				4472	(ctxt->sax->unparsedEntityDecl != NULL)) {
				4473	ctxt->sax->unparsedEntityDecl(ctxt->userData,
				4474	name, literal, URI, ndata);
				4475	}
				4476	} else if (xmlStrEqual(contmod, BAD_CAST"CDATA")) {
				4477	if ((ctxt->sax != NULL) &&
				4478	(ctxt->sax->warning != NULL))
				4479	ctxt->sax->warning(ctxt->userData,
				4480	"CDATA entities are not supported\n");
				4481	SKIP_BLANKS;
				4482	ndata = xmlParseName(ctxt);
				4483	if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
				4484	(ctxt->sax->unparsedEntityDecl != NULL)) {
				4485	ctxt->sax->unparsedEntityDecl(ctxt->userData,
				4486	name, literal, URI, ndata);
				4487	}
				4488	}
				4489	xmlFree(contmod);
				4490	}
				4491	} else {
				4492	if ((ctxt->sax != NULL) &&
				4493	(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
				4494	ctxt->sax->entityDecl(ctxt->userData, name,
				4495	XML_EXTERNAL_GENERAL_PARSED_ENTITY,
				4496	literal, URI, NULL);
				4497	}
				4498	}
				4499	}
				4500	SKIP_BLANKS;
				4501	if (RAW != '>') {
				4502	ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
				4503	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4504	ctxt->sax->error(ctxt->userData,
				4505	"docbParseEntityDecl: entity %s not terminated\n", name);
				4506	ctxt->wellFormed = 0;
				4507	ctxt->disableSAX = 1;
				4508	} else {
				4509	if (input != ctxt->input) {
				4510	ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
				4511	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4512	ctxt->sax->error(ctxt->userData,
				4513	"Entity declaration doesn't start and stop in the same entity\n");
				4514	ctxt->wellFormed = 0;
				4515	ctxt->disableSAX = 1;
				4516	}
				4517	NEXT;
				4518	}
				4519	if (orig != NULL) {
				4520	/*
				4521	* Ugly mechanism to save the raw entity value.
				4522	*/
				4523	xmlEntityPtr cur = NULL;
				4524
				4525	if (isParameter) {
				4526	if ((ctxt->sax != NULL) &&
				4527	(ctxt->sax->getParameterEntity != NULL))
				4528	cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
				4529	} else {
				4530	if ((ctxt->sax != NULL) &&
				4531	(ctxt->sax->getEntity != NULL))
				4532	cur = ctxt->sax->getEntity(ctxt->userData, name);
				4533	}
				4534	if (cur != NULL) {
				4535	if (cur->orig != NULL)
				4536	xmlFree(orig);
				4537	else
				4538	cur->orig = orig;
				4539	} else
				4540	xmlFree(orig);
				4541	}
				4542	if (name != NULL) xmlFree(name);
				4543	if (value != NULL) xmlFree(value);
				4544	if (URI != NULL) xmlFree(URI);
				4545	if (literal != NULL) xmlFree(literal);
				4546	if (ndata != NULL) xmlFree(ndata);
				4547	}
				4548	}
				4549
				4550	/**
				4551	* docbParseMarkupDecl:
				4552	* @ctxt: an SGML parser context
				4553	*
				4554	* parse Markup declarations
				4555	*
				4556	* [29] markupdecl ::= elementdecl \| AttlistDecl \| EntityDecl \|
				4557	* NotationDecl \| PI \| Comment
				4558	*/
				4559	static void
				4560	docbParseMarkupDecl(xmlParserCtxtPtr ctxt) {
				4561	GROW;
				4562	xmlParseElementDecl(ctxt);
				4563	xmlParseAttributeListDecl(ctxt);
				4564	docbParseEntityDecl(ctxt);
				4565	xmlParseNotationDecl(ctxt);
				4566	xmlParsePI(ctxt);
				4567	xmlParseComment(ctxt);
				4568	/*
				4569	* This is only for internal subset. On external entities,
				4570	* the replacement is done before parsing stage
				4571	*/
				4572	if ((ctxt->external == 0) && (ctxt->inputNr == 1))
				4573	xmlParsePEReference(ctxt);
				4574	ctxt->instate = XML_PARSER_DTD;
				4575	}
				4576
				4577	/**
				4578	* docbParseInternalsubset:
				4579	* @ctxt: an SGML parser context
				4580	*
				4581	* parse the internal subset declaration
				4582	*
				4583	* [28 end] ('[' (markupdecl \| PEReference \| S)* ']' S?)? '>'
				4584	*/
				4585
				4586	static void
				4587	docbParseInternalSubset(xmlParserCtxtPtr ctxt) {
				4588	/*
				4589	* Is there any DTD definition ?
				4590	*/
				4591	if (RAW == '[') {
				4592	ctxt->instate = XML_PARSER_DTD;
				4593	NEXT;
				4594	/*
				4595	* Parse the succession of Markup declarations and
				4596	* PEReferences.
				4597	* Subsequence (markupdecl \| PEReference \| S)*
				4598	*/
				4599	while (RAW != ']') {
				4600	const xmlChar *check = CUR_PTR;
				4601	int cons = ctxt->input->consumed;
				4602
				4603	SKIP_BLANKS;
				4604	docbParseMarkupDecl(ctxt);
				4605	xmlParsePEReference(ctxt);
				4606
				4607	/*
				4608	* Pop-up of finished entities.
				4609	*/
				4610	while ((RAW == 0) && (ctxt->inputNr > 1))
				4611	xmlPopInput(ctxt);
				4612
				4613	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
				4614	ctxt->errNo = XML_ERR_INTERNAL_ERROR;
				4615	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4616	ctxt->sax->error(ctxt->userData,
				4617	"docbParseInternalSubset: error detected in Markup declaration\n");
				4618	ctxt->wellFormed = 0;
				4619	ctxt->disableSAX = 1;
				4620	break;
				4621	}
				4622	}
				4623	if (RAW == ']') {
				4624	NEXT;
				4625	SKIP_BLANKS;
				4626	}
				4627	}
				4628
				4629	/*
				4630	* We should be at the end of the DOCTYPE declaration.
				4631	*/
				4632	if (RAW != '>') {
				4633	ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
				4634	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4635	ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
				4636	ctxt->wellFormed = 0;
				4637	ctxt->disableSAX = 1;
				4638	}
				4639	NEXT;
				4640	}
				4641
				4642	/**
				4643	* docbParseMisc:
				4644	* @ctxt: an XML parser context
				4645	*
				4646	* parse an XML Misc* optionnal field.
				4647	*
				4648	* [27] Misc ::= Comment \| PI \| S
				4649	*/
				4650
				4651	static void
				4652	docbParseMisc(xmlParserCtxtPtr ctxt) {
				4653	while (((RAW == '<') && (NXT(1) == '?')) \|\|
				4654	((RAW == '<') && (NXT(1) == '!') &&
				4655	(NXT(2) == '-') && (NXT(3) == '-')) \|\|
				4656	IS_BLANK(CUR)) {
				4657	if ((RAW == '<') && (NXT(1) == '?')) {
				4658	xmlParsePI(ctxt); /* TODO: SGML PIs differs */
				4659	} else if (IS_BLANK(CUR)) {
				4660	NEXT;
				4661	} else
				4662	xmlParseComment(ctxt);
				4663	}
				4664	}
				4665
				4666	/**
				4667	* docbParseDocument :
				4668	* @ctxt: an SGML parser context
				4669	*
				4670	* parse an SGML document (and build a tree if using the standard SAX
				4671	* interface).
				4672	*
				4673	* Returns 0, -1 in case of error. the parser context is augmented
				4674	* as a result of the parsing.
				4675	*/
				4676
				4677	int
				4678	docbParseDocument(docbParserCtxtPtr ctxt) {
				4679	xmlChar start[4];
				4680	xmlCharEncoding enc;
				4681	xmlDtdPtr dtd;
				4682
				4683	docbDefaultSAXHandlerInit();
				4684	ctxt->html = 2;
				4685
				4686	GROW;
				4687	/*
				4688	* SAX: beginning of the document processing.
				4689	*/
				4690	if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
				4691	ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
				4692
				4693	/*
				4694	* Get the 4 first bytes and decode the charset
				4695	* if enc != XML_CHAR_ENCODING_NONE
				4696	* plug some encoding conversion routines.
				4697	*/
				4698	start[0] = RAW;
				4699	start[1] = NXT(1);
				4700	start[2] = NXT(2);
				4701	start[3] = NXT(3);
				4702	enc = xmlDetectCharEncoding(start, 4);
				4703	if (enc != XML_CHAR_ENCODING_NONE) {
				4704	xmlSwitchEncoding(ctxt, enc);
				4705	}
				4706
				4707	/*
				4708	* Wipe out everything which is before the first '<'
				4709	*/
				4710	SKIP_BLANKS;
				4711	if (CUR == 0) {
				4712	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4713	ctxt->sax->error(ctxt->userData, "Document is empty\n");
				4714	ctxt->wellFormed = 0;
				4715	}
				4716
				4717	if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
				4718	ctxt->sax->startDocument(ctxt->userData);
				4719
				4720
				4721	/*
				4722	* The Misc part of the Prolog
				4723	*/
				4724	GROW;
				4725	docbParseMisc(ctxt);
				4726
				4727	/*
				4728	* Then possibly doc type declaration(s) and more Misc
				4729	* (doctypedecl Misc*)?
				4730	*/
				4731	GROW;
				4732	if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	4733	(UPP(2) == 'D') && (UPP(3) == 'O') &&
				4734	(UPP(4) == 'C') && (UPP(5) == 'T') &&
				4735	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
				4736	(UPP(8) == 'E')) {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4737
				4738	ctxt->inSubset = 1;
				4739	docbParseDocTypeDecl(ctxt);
				4740	if (RAW == '[') {
				4741	ctxt->instate = XML_PARSER_DTD;
				4742	docbParseInternalSubset(ctxt);
				4743	}
				4744
				4745	/*
				4746	* Create and update the external subset.
				4747	*/
				4748	ctxt->inSubset = 2;
				4749	if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
				4750	(!ctxt->disableSAX))
				4751	ctxt->sax->internalSubset(ctxt->userData, ctxt->intSubName,
				4752	ctxt->extSubSystem, ctxt->extSubURI);
				4753	ctxt->inSubset = 0;
				4754
				4755
				4756	ctxt->instate = XML_PARSER_PROLOG;
				4757	docbParseMisc(ctxt);
				4758	}
				4759
				4760	/*
				4761	* Time to start parsing the tree itself
				4762	*/
				4763	docbParseContent(ctxt);
				4764
				4765	/*
				4766	* autoclose
				4767	*/
				4768	if (CUR == 0)
				4769	docbAutoClose(ctxt, NULL);
				4770
				4771
				4772	/*
				4773	* SAX: end of the document processing.
				4774	*/
				4775	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
				4776	ctxt->sax->endDocument(ctxt->userData);
				4777
				4778	if (ctxt->myDoc != NULL) {
				4779	dtd = ctxt->myDoc->intSubset;
				4780	if (dtd == NULL)
				4781	ctxt->myDoc->intSubset =
				4782	xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML",
				4783	BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
				4784	BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
				4785	}
				4786	if (! ctxt->wellFormed) return(-1);
				4787	return(0);
				4788	}
				4789
				4790
				4791	/************************************************************************
				4792	* *
				4793	* Parser contexts handling *
				4794	* *
				4795	************************************************************************/
				4796
				4797	/**
				4798	* xmlInitParserCtxt:
				4799	* @ctxt: an SGML parser context
				4800	*
				4801	* Initialize a parser context
				4802	*/
				4803
				4804	static void
				4805	docbInitParserCtxt(docbParserCtxtPtr ctxt)
				4806	{
				4807	docbSAXHandler *sax;
				4808
				4809	if (ctxt == NULL) return;
				4810	memset(ctxt, 0, sizeof(docbParserCtxt));
				4811
				4812	sax = (docbSAXHandler *) xmlMalloc(sizeof(docbSAXHandler));
				4813	if (sax == NULL) {
				4814	xmlGenericError(xmlGenericErrorContext,
				4815	"docbInitParserCtxt: out of memory\n");
				4816	}
				4817	memset(sax, 0, sizeof(docbSAXHandler));
				4818
				4819	/* Allocate the Input stack */
				4820	ctxt->inputTab = (docbParserInputPtr *)
				4821	xmlMalloc(5 * sizeof(docbParserInputPtr));
				4822	if (ctxt->inputTab == NULL) {
				4823	xmlGenericError(xmlGenericErrorContext,
				4824	"docbInitParserCtxt: out of memory\n");
				4825	}
				4826	ctxt->inputNr = 0;
				4827	ctxt->inputMax = 5;
				4828	ctxt->input = NULL;
				4829	ctxt->version = NULL;
				4830	ctxt->encoding = NULL;
				4831	ctxt->standalone = -1;
				4832	ctxt->instate = XML_PARSER_START;
				4833
				4834	/* Allocate the Node stack */
				4835	ctxt->nodeTab = (docbNodePtr ) xmlMalloc(10 sizeof(docbNodePtr));
				4836	ctxt->nodeNr = 0;
				4837	ctxt->nodeMax = 10;
				4838	ctxt->node = NULL;
				4839
				4840	/* Allocate the Name stack */
				4841	ctxt->nameTab = (xmlChar *) xmlMalloc(10 sizeof(xmlChar *));
				4842	ctxt->nameNr = 0;
				4843	ctxt->nameMax = 10;
				4844	ctxt->name = NULL;
				4845
				4846	if (sax == NULL) ctxt->sax = &docbDefaultSAXHandler;
				4847	else {
				4848	ctxt->sax = sax;
				4849	memcpy(sax, &docbDefaultSAXHandler, sizeof(docbSAXHandler));
				4850	}
				4851	ctxt->userData = ctxt;
				4852	ctxt->myDoc = NULL;
				4853	ctxt->wellFormed = 1;
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	4854	ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4855	ctxt->html = 2;
				4856	ctxt->record_info = 0;
				4857	ctxt->validate = 0;
				4858	ctxt->nbChars = 0;
				4859	ctxt->checkIndex = 0;
				4860	xmlInitNodeInfoSeq(&ctxt->node_seq);
				4861	}
				4862
				4863	/**
				4864	* docbFreeParserCtxt:
				4865	* @ctxt: an SGML parser context
				4866	*
				4867	* Free all the memory used by a parser context. However the parsed
				4868	* document in ctxt->myDoc is not freed.
				4869	*/
				4870
				4871	void
				4872	docbFreeParserCtxt(docbParserCtxtPtr ctxt)
				4873	{
				4874	xmlFreeParserCtxt(ctxt);
				4875	}
				4876
				4877	/**
				4878	* docbCreateDocParserCtxt :
				4879	* @cur: a pointer to an array of xmlChar
				4880	* @encoding: a free form C string describing the SGML document encoding, or NULL
				4881	*
				4882	* Create a parser context for an SGML document.
				4883	*
				4884	* Returns the new parser context or NULL
				4885	*/
				4886	static docbParserCtxtPtr
				4887	docbCreateDocParserCtxt(xmlChar cur, const char encoding) {
				4888	docbParserCtxtPtr ctxt;
				4889	docbParserInputPtr input;
				4890	/* sgmlCharEncoding enc; */
				4891
				4892	ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt));
				4893	if (ctxt == NULL) {
				4894	perror("malloc");
				4895	return(NULL);
				4896	}
				4897	docbInitParserCtxt(ctxt);
				4898	input = (docbParserInputPtr) xmlMalloc(sizeof(docbParserInput));
				4899	if (input == NULL) {
				4900	perror("malloc");
				4901	xmlFree(ctxt);
				4902	return(NULL);
				4903	}
				4904	memset(input, 0, sizeof(docbParserInput));
				4905
				4906	input->line = 1;
				4907	input->col = 1;
				4908	input->base = cur;
				4909	input->cur = cur;
				4910
				4911	inputPush(ctxt, input);
				4912	return(ctxt);
				4913	}
				4914
				4915	/************************************************************************
				4916	* *
				4917	* Progressive parsing interfaces *
				4918	* *
				4919	************************************************************************/
				4920
				4921	/**
				4922	* docbParseLookupSequence:
				4923	* @ctxt: an SGML parser context
				4924	* @first: the first char to lookup
				4925	* @next: the next char to lookup or zero
				4926	* @third: the next char to lookup or zero
				4927	*
				4928	* Try to find if a sequence (first, next, third) or just (first next) or
				4929	* (first) is available in the input stream.
				4930	* This function has a side effect of (possibly) incrementing ctxt->checkIndex
				4931	* to avoid rescanning sequences of bytes, it DOES change the state of the
				4932	* parser, do not use liberally.
				4933	* This is basically similar to xmlParseLookupSequence()
				4934	*
				4935	* Returns the index to the current parsing point if the full sequence
				4936	* is available, -1 otherwise.
				4937	*/
				4938	static int
				4939	docbParseLookupSequence(docbParserCtxtPtr ctxt, xmlChar first,
				4940	xmlChar next, xmlChar third) {
				4941	int base, len;
				4942	docbParserInputPtr in;
				4943	const xmlChar *buf;
				4944
				4945	in = ctxt->input;
				4946	if (in == NULL) return(-1);
				4947	base = in->cur - in->base;
				4948	if (base < 0) return(-1);
				4949	if (ctxt->checkIndex > base)
				4950	base = ctxt->checkIndex;
				4951	if (in->buf == NULL) {
				4952	buf = in->base;
				4953	len = in->length;
				4954	} else {
				4955	buf = in->buf->buffer->content;
				4956	len = in->buf->buffer->use;
				4957	}
				4958	/* take into account the sequence length */
				4959	if (third) len -= 2;
				4960	else if (next) len --;
				4961	for (;base < len;base++) {
				4962	if (buf[base] == first) {
				4963	if (third != 0) {
				4964	if ((buf[base + 1] != next) \|\|
				4965	(buf[base + 2] != third)) continue;
				4966	} else if (next != 0) {
				4967	if (buf[base + 1] != next) continue;
				4968	}
				4969	ctxt->checkIndex = 0;
				4970	#ifdef DEBUG_PUSH
				4971	if (next == 0)
				4972	xmlGenericError(xmlGenericErrorContext,
				4973	"HPP: lookup '%c' found at %d\n",
				4974	first, base);
				4975	else if (third == 0)
				4976	xmlGenericError(xmlGenericErrorContext,
				4977	"HPP: lookup '%c%c' found at %d\n",
				4978	first, next, base);
				4979	else
				4980	xmlGenericError(xmlGenericErrorContext,
				4981	"HPP: lookup '%c%c%c' found at %d\n",
				4982	first, next, third, base);
				4983	#endif
				4984	return(base - (in->cur - in->base));
				4985	}
				4986	}
				4987	ctxt->checkIndex = base;
				4988	#ifdef DEBUG_PUSH
				4989	if (next == 0)
				4990	xmlGenericError(xmlGenericErrorContext,
				4991	"HPP: lookup '%c' failed\n", first);
				4992	else if (third == 0)
				4993	xmlGenericError(xmlGenericErrorContext,
				4994	"HPP: lookup '%c%c' failed\n", first, next);
				4995	else
				4996	xmlGenericError(xmlGenericErrorContext,
				4997	"HPP: lookup '%c%c%c' failed\n", first, next, third);
				4998	#endif
				4999	return(-1);
				5000	}
				5001
				5002	/**
				5003	* docbParseTryOrFinish:
				5004	* @ctxt: an SGML parser context
				5005	* @terminate: last chunk indicator
				5006	*
				5007	* Try to progress on parsing
				5008	*
				5009	* Returns zero if no parsing was possible
				5010	*/
				5011	static int
				5012	docbParseTryOrFinish(docbParserCtxtPtr ctxt, int terminate) {
				5013	int ret = 0;
				5014	docbParserInputPtr in;
				5015	int avail = 0;
				5016	xmlChar cur, next;
				5017
				5018	#ifdef DEBUG_PUSH
				5019	switch (ctxt->instate) {
				5020	case XML_PARSER_EOF:
				5021	xmlGenericError(xmlGenericErrorContext,
				5022	"HPP: try EOF\n"); break;
				5023	case XML_PARSER_START:
				5024	xmlGenericError(xmlGenericErrorContext,
				5025	"HPP: try START\n"); break;
				5026	case XML_PARSER_MISC:
				5027	xmlGenericError(xmlGenericErrorContext,
				5028	"HPP: try MISC\n");break;
				5029	case XML_PARSER_COMMENT:
				5030	xmlGenericError(xmlGenericErrorContext,
				5031	"HPP: try COMMENT\n");break;
				5032	case XML_PARSER_PROLOG:
				5033	xmlGenericError(xmlGenericErrorContext,
				5034	"HPP: try PROLOG\n");break;
				5035	case XML_PARSER_START_TAG:
				5036	xmlGenericError(xmlGenericErrorContext,
				5037	"HPP: try START_TAG\n");break;
				5038	case XML_PARSER_CONTENT:
				5039	xmlGenericError(xmlGenericErrorContext,
				5040	"HPP: try CONTENT\n");break;
				5041	case XML_PARSER_CDATA_SECTION:
				5042	xmlGenericError(xmlGenericErrorContext,
				5043	"HPP: try CDATA_SECTION\n");break;
				5044	case XML_PARSER_END_TAG:
				5045	xmlGenericError(xmlGenericErrorContext,
				5046	"HPP: try END_TAG\n");break;
				5047	case XML_PARSER_ENTITY_DECL:
				5048	xmlGenericError(xmlGenericErrorContext,
				5049	"HPP: try ENTITY_DECL\n");break;
				5050	case XML_PARSER_ENTITY_VALUE:
				5051	xmlGenericError(xmlGenericErrorContext,
				5052	"HPP: try ENTITY_VALUE\n");break;
				5053	case XML_PARSER_ATTRIBUTE_VALUE:
				5054	xmlGenericError(xmlGenericErrorContext,
				5055	"HPP: try ATTRIBUTE_VALUE\n");break;
				5056	case XML_PARSER_DTD:
				5057	xmlGenericError(xmlGenericErrorContext,
				5058	"HPP: try DTD\n");break;
				5059	case XML_PARSER_EPILOG:
				5060	xmlGenericError(xmlGenericErrorContext,
				5061	"HPP: try EPILOG\n");break;
				5062	case XML_PARSER_PI:
				5063	xmlGenericError(xmlGenericErrorContext,
				5064	"HPP: try PI\n");break;
				5065	}
				5066	#endif
				5067
				5068	while (1) {
				5069
				5070	in = ctxt->input;
				5071	if (in == NULL) break;
				5072	if (in->buf == NULL)
				5073	avail = in->length - (in->cur - in->base);
				5074	else
				5075	avail = in->buf->buffer->use - (in->cur - in->base);
				5076	if ((avail == 0) && (terminate)) {
				5077	docbAutoClose(ctxt, NULL);
				5078	if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
				5079	/*
				5080	* SAX: end of the document processing.
				5081	*/
				5082	ctxt->instate = XML_PARSER_EOF;
				5083	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
				5084	ctxt->sax->endDocument(ctxt->userData);
				5085	}
				5086	}
				5087	if (avail < 1)
				5088	goto done;
				5089	switch (ctxt->instate) {
				5090	case XML_PARSER_EOF:
				5091	/*
				5092	* Document parsing is done !
				5093	*/
				5094	goto done;
				5095	case XML_PARSER_START:
				5096	/*
				5097	* Very first chars read from the document flow.
				5098	*/
				5099	cur = in->cur[0];
				5100	if (IS_BLANK(cur)) {
				5101	SKIP_BLANKS;
				5102	if (in->buf == NULL)
				5103	avail = in->length - (in->cur - in->base);
				5104	else
				5105	avail = in->buf->buffer->use - (in->cur - in->base);
				5106	}
				5107	if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
				5108	ctxt->sax->setDocumentLocator(ctxt->userData,
				5109	&xmlDefaultSAXLocator);
				5110	if ((ctxt->sax) && (ctxt->sax->startDocument) &&
				5111	(!ctxt->disableSAX))
				5112	ctxt->sax->startDocument(ctxt->userData);
				5113
				5114	cur = in->cur[0];
				5115	next = in->cur[1];
				5116	if ((cur == '<') && (next == '!') &&
				5117	(UPP(2) == 'D') && (UPP(3) == 'O') &&
				5118	(UPP(4) == 'C') && (UPP(5) == 'T') &&
				5119	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
				5120	(UPP(8) == 'E')) {
				5121	if ((!terminate) &&
				5122	(docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
				5123	goto done;
				5124	#ifdef DEBUG_PUSH
				5125	xmlGenericError(xmlGenericErrorContext,
				5126	"HPP: Parsing internal subset\n");
				5127	#endif
				5128	docbParseDocTypeDecl(ctxt);
				5129	ctxt->instate = XML_PARSER_PROLOG;
				5130	#ifdef DEBUG_PUSH
				5131	xmlGenericError(xmlGenericErrorContext,
				5132	"HPP: entering PROLOG\n");
				5133	#endif
				5134	} else {
				5135	ctxt->instate = XML_PARSER_MISC;
				5136	}
				5137	#ifdef DEBUG_PUSH
				5138	xmlGenericError(xmlGenericErrorContext,
				5139	"HPP: entering MISC\n");
				5140	#endif
				5141	break;
				5142	case XML_PARSER_MISC:
				5143	SKIP_BLANKS;
				5144	if (in->buf == NULL)
				5145	avail = in->length - (in->cur - in->base);
				5146	else
				5147	avail = in->buf->buffer->use - (in->cur - in->base);
				5148	if (avail < 2)
				5149	goto done;
				5150	cur = in->cur[0];
				5151	next = in->cur[1];
				5152	if ((cur == '<') && (next == '!') &&
				5153	(in->cur[2] == '-') && (in->cur[3] == '-')) {
				5154	if ((!terminate) &&
				5155	(docbParseLookupSequence(ctxt, '-', '-', '>') < 0))
				5156	goto done;
				5157	#ifdef DEBUG_PUSH
				5158	xmlGenericError(xmlGenericErrorContext,
				5159	"HPP: Parsing Comment\n");
				5160	#endif
				5161	docbParseComment(ctxt);
				5162	ctxt->instate = XML_PARSER_MISC;
				5163	} else if ((cur == '<') && (next == '!') &&
				5164	(UPP(2) == 'D') && (UPP(3) == 'O') &&
				5165	(UPP(4) == 'C') && (UPP(5) == 'T') &&
				5166	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
				5167	(UPP(8) == 'E')) {
				5168	if ((!terminate) &&
				5169	(docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
				5170	goto done;
				5171	#ifdef DEBUG_PUSH
				5172	xmlGenericError(xmlGenericErrorContext,
				5173	"HPP: Parsing internal subset\n");
				5174	#endif
				5175	docbParseDocTypeDecl(ctxt);
				5176	ctxt->instate = XML_PARSER_PROLOG;
				5177	#ifdef DEBUG_PUSH
				5178	xmlGenericError(xmlGenericErrorContext,
				5179	"HPP: entering PROLOG\n");
				5180	#endif
				5181	} else if ((cur == '<') && (next == '!') &&
				5182	(avail < 9)) {
				5183	goto done;
				5184	} else {
				5185	ctxt->instate = XML_PARSER_START_TAG;
				5186	#ifdef DEBUG_PUSH
				5187	xmlGenericError(xmlGenericErrorContext,
				5188	"HPP: entering START_TAG\n");
				5189	#endif
				5190	}
				5191	break;
				5192	case XML_PARSER_PROLOG:
				5193	SKIP_BLANKS;
				5194	if (in->buf == NULL)
				5195	avail = in->length - (in->cur - in->base);
				5196	else
				5197	avail = in->buf->buffer->use - (in->cur - in->base);
				5198	if (avail < 2)
				5199	goto done;
				5200	cur = in->cur[0];
				5201	next = in->cur[1];
				5202	if ((cur == '<') && (next == '!') &&
				5203	(in->cur[2] == '-') && (in->cur[3] == '-')) {
				5204	if ((!terminate) &&
				5205	(docbParseLookupSequence(ctxt, '-', '-', '>') < 0))
				5206	goto done;
				5207	#ifdef DEBUG_PUSH
				5208	xmlGenericError(xmlGenericErrorContext,
				5209	"HPP: Parsing Comment\n");
				5210	#endif
				5211	docbParseComment(ctxt);
				5212	ctxt->instate = XML_PARSER_PROLOG;
				5213	} else if ((cur == '<') && (next == '!') &&
				5214	(avail < 4)) {
				5215	goto done;
				5216	} else {
				5217	ctxt->instate = XML_PARSER_START_TAG;
				5218	#ifdef DEBUG_PUSH
				5219	xmlGenericError(xmlGenericErrorContext,
				5220	"HPP: entering START_TAG\n");
				5221	#endif
				5222	}
				5223	break;
				5224	case XML_PARSER_EPILOG:
				5225	if (in->buf == NULL)
				5226	avail = in->length - (in->cur - in->base);
				5227	else
				5228	avail = in->buf->buffer->use - (in->cur - in->base);
				5229	if (avail < 1)
				5230	goto done;
				5231	cur = in->cur[0];
				5232	if (IS_BLANK(cur)) {
				5233	docbParseCharData(ctxt);
				5234	goto done;
				5235	}
				5236	if (avail < 2)
				5237	goto done;
				5238	next = in->cur[1];
				5239	if ((cur == '<') && (next == '!') &&
				5240	(in->cur[2] == '-') && (in->cur[3] == '-')) {
				5241	if ((!terminate) &&
				5242	(docbParseLookupSequence(ctxt, '-', '-', '>') < 0))
				5243	goto done;
				5244	#ifdef DEBUG_PUSH
				5245	xmlGenericError(xmlGenericErrorContext,
				5246	"HPP: Parsing Comment\n");
				5247	#endif
				5248	docbParseComment(ctxt);
				5249	ctxt->instate = XML_PARSER_EPILOG;
				5250	} else if ((cur == '<') && (next == '!') &&
				5251	(avail < 4)) {
				5252	goto done;
				5253	} else {
				5254	ctxt->errNo = XML_ERR_DOCUMENT_END;
				5255	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5256	ctxt->sax->error(ctxt->userData,
				5257	"Extra content at the end of the document\n");
				5258	ctxt->wellFormed = 0;
				5259	ctxt->instate = XML_PARSER_EOF;
				5260	#ifdef DEBUG_PUSH
				5261	xmlGenericError(xmlGenericErrorContext,
				5262	"HPP: entering EOF\n");
				5263	#endif
				5264	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
				5265	ctxt->sax->endDocument(ctxt->userData);
				5266	goto done;
				5267	}
				5268	break;
				5269	case XML_PARSER_START_TAG: {
				5270	xmlChar name, oldname;
				5271	int depth = ctxt->nameNr;
				5272	docbElemDescPtr info;
				5273
				5274	if (avail < 2)
				5275	goto done;
				5276	cur = in->cur[0];
				5277	if (cur != '<') {
				5278	ctxt->instate = XML_PARSER_CONTENT;
				5279	#ifdef DEBUG_PUSH
				5280	xmlGenericError(xmlGenericErrorContext,
				5281	"HPP: entering CONTENT\n");
				5282	#endif
				5283	break;
				5284	}
				5285	if ((!terminate) &&
				5286	(docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
				5287	goto done;
				5288
				5289	oldname = xmlStrdup(ctxt->name);
				5290	docbParseStartTag(ctxt);
				5291	name = ctxt->name;
				5292	#ifdef DEBUG
				5293	if (oldname == NULL)
				5294	xmlGenericError(xmlGenericErrorContext,
				5295	"Start of element %s\n", name);
				5296	else if (name == NULL)
				5297	xmlGenericError(xmlGenericErrorContext,
				5298	"Start of element failed, was %s\n",
				5299	oldname);
				5300	else
				5301	xmlGenericError(xmlGenericErrorContext,
				5302	"Start of element %s, was %s\n",
				5303	name, oldname);
				5304	#endif
				5305	if (((depth == ctxt->nameNr) &&
				5306	(xmlStrEqual(oldname, ctxt->name))) \|\|
				5307	(name == NULL)) {
				5308	if (CUR == '>')
				5309	NEXT;
				5310	if (oldname != NULL)
				5311	xmlFree(oldname);
				5312	break;
				5313	}
				5314	if (oldname != NULL)
				5315	xmlFree(oldname);
				5316
				5317	/*
				5318	* Lookup the info for that element.
				5319	*/
				5320	info = docbTagLookup(name);
				5321	if (info == NULL) {
				5322	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5323	ctxt->sax->error(ctxt->userData, "Tag %s unknown\n",
				5324	name);
				5325	ctxt->wellFormed = 0;
				5326	} else if (info->depr) {
				5327	/***************************
				5328	if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
				5329	ctxt->sax->warning(ctxt->userData,
				5330	"Tag %s is deprecated\n",
				5331	name);
				5332	***************************/
				5333	}
				5334
				5335	/*
				5336	* Check for an Empty Element labelled the XML/SGML way
				5337	*/
				5338	if ((CUR == '/') && (NXT(1) == '>')) {
				5339	SKIP(2);
				5340	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				5341	ctxt->sax->endElement(ctxt->userData, name);
				5342	oldname = docbnamePop(ctxt);
				5343	#ifdef DEBUG
				5344	xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n",
				5345	oldname);
				5346	#endif
				5347	if (oldname != NULL)
				5348	xmlFree(oldname);
				5349	ctxt->instate = XML_PARSER_CONTENT;
				5350	#ifdef DEBUG_PUSH
				5351	xmlGenericError(xmlGenericErrorContext,
				5352	"HPP: entering CONTENT\n");
				5353	#endif
				5354	break;
				5355	}
				5356
				5357	if (CUR == '>') {
				5358	NEXT;
				5359	} else {
				5360	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5361	ctxt->sax->error(ctxt->userData,
				5362	"Couldn't find end of Start Tag %s\n",
				5363	name);
				5364	ctxt->wellFormed = 0;
				5365
				5366	/*
				5367	* end of parsing of this node.
				5368	*/
				5369	if (xmlStrEqual(name, ctxt->name)) {
				5370	nodePop(ctxt);
				5371	oldname = docbnamePop(ctxt);
				5372	#ifdef DEBUG
				5373	xmlGenericError(xmlGenericErrorContext,
				5374	"End of start tag problem: popping out %s\n", oldname);
				5375	#endif
				5376	if (oldname != NULL)
				5377	xmlFree(oldname);
				5378	}
				5379
				5380	ctxt->instate = XML_PARSER_CONTENT;
				5381	#ifdef DEBUG_PUSH
				5382	xmlGenericError(xmlGenericErrorContext,
				5383	"HPP: entering CONTENT\n");
				5384	#endif
				5385	break;
				5386	}
				5387
				5388	/*
				5389	* Check for an Empty Element from DTD definition
				5390	*/
				5391	if ((info != NULL) && (info->empty)) {
				5392	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				5393	ctxt->sax->endElement(ctxt->userData, name);
				5394	oldname = docbnamePop(ctxt);
				5395	#ifdef DEBUG
				5396	xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname);
				5397	#endif
				5398	if (oldname != NULL)
				5399	xmlFree(oldname);
				5400	}
				5401	ctxt->instate = XML_PARSER_CONTENT;
				5402	#ifdef DEBUG_PUSH
				5403	xmlGenericError(xmlGenericErrorContext,
				5404	"HPP: entering CONTENT\n");
				5405	#endif
				5406	break;
				5407	}
				5408	case XML_PARSER_CONTENT: {
				5409	long cons;
				5410	/*
				5411	* Handle preparsed entities and charRef
				5412	*/
				5413	if (ctxt->token != 0) {
				5414	xmlChar chr[2] = { 0 , 0 } ;
				5415
				5416	chr[0] = (xmlChar) ctxt->token;
				5417	docbCheckParagraph(ctxt);
				5418	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
				5419	ctxt->sax->characters(ctxt->userData, chr, 1);
				5420	ctxt->token = 0;
				5421	ctxt->checkIndex = 0;
				5422	}
				5423	if ((avail == 1) && (terminate)) {
				5424	cur = in->cur[0];
				5425	if ((cur != '<') && (cur != '&')) {
				5426	if (ctxt->sax != NULL) {
				5427	if (IS_BLANK(cur)) {
				5428	if (ctxt->sax->ignorableWhitespace != NULL)
				5429	ctxt->sax->ignorableWhitespace(
				5430	ctxt->userData, &cur, 1);
				5431	} else {
				5432	docbCheckParagraph(ctxt);
				5433	if (ctxt->sax->characters != NULL)
				5434	ctxt->sax->characters(
				5435	ctxt->userData, &cur, 1);
				5436	}
				5437	}
				5438	ctxt->token = 0;
				5439	ctxt->checkIndex = 0;
				5440	NEXT;
				5441	}
				5442	break;
				5443	}
				5444	if (avail < 2)
				5445	goto done;
				5446	cur = in->cur[0];
				5447	next = in->cur[1];
				5448	cons = ctxt->nbChars;
				5449	/*
				5450	* Sometimes DOCTYPE arrives in the middle of the document
				5451	*/
				5452	if ((cur == '<') && (next == '!') &&
				5453	(UPP(2) == 'D') && (UPP(3) == 'O') &&
				5454	(UPP(4) == 'C') && (UPP(5) == 'T') &&
				5455	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
				5456	(UPP(8) == 'E')) {
				5457	if ((!terminate) &&
				5458	(docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
				5459	goto done;
				5460	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5461	ctxt->sax->error(ctxt->userData,
				5462	"Misplaced DOCTYPE declaration\n");
				5463	ctxt->wellFormed = 0;
				5464	docbParseDocTypeDecl(ctxt);
				5465	} else if ((cur == '<') && (next == '!') &&
				5466	(in->cur[2] == '-') && (in->cur[3] == '-')) {
				5467	if ((!terminate) &&
				5468	(docbParseLookupSequence(ctxt, '-', '-', '>') < 0))
				5469	goto done;
				5470	#ifdef DEBUG_PUSH
				5471	xmlGenericError(xmlGenericErrorContext,
				5472	"HPP: Parsing Comment\n");
				5473	#endif
				5474	docbParseComment(ctxt);
				5475	ctxt->instate = XML_PARSER_CONTENT;
				5476	} else if ((cur == '<') && (next == '!') && (avail < 4)) {
				5477	goto done;
				5478	} else if ((cur == '<') && (next == '/')) {
				5479	ctxt->instate = XML_PARSER_END_TAG;
				5480	ctxt->checkIndex = 0;
				5481	#ifdef DEBUG_PUSH
				5482	xmlGenericError(xmlGenericErrorContext,
				5483	"HPP: entering END_TAG\n");
				5484	#endif
				5485	break;
				5486	} else if (cur == '<') {
				5487	ctxt->instate = XML_PARSER_START_TAG;
				5488	ctxt->checkIndex = 0;
				5489	#ifdef DEBUG_PUSH
				5490	xmlGenericError(xmlGenericErrorContext,
				5491	"HPP: entering START_TAG\n");
				5492	#endif
				5493	break;
				5494	} else if (cur == '&') {
				5495	if ((!terminate) &&
				5496	(docbParseLookupSequence(ctxt, ';', 0, 0) < 0))
				5497	goto done;
				5498	#ifdef DEBUG_PUSH
				5499	xmlGenericError(xmlGenericErrorContext,
				5500	"HPP: Parsing Reference\n");
				5501	#endif
				5502	/* TODO: check generation of subtrees if noent !!! */
				5503	docbParseReference(ctxt);
				5504	} else {
				5505	/* TODO Avoid the extra copy, handle directly !!!!!! */
				5506	/*
				5507	* Goal of the following test is :
				5508	* - minimize calls to the SAX 'character' callback
				5509	* when they are mergeable
				5510	*/
				5511	if ((ctxt->inputNr == 1) &&
				5512	(avail < DOCB_PARSER_BIG_BUFFER_SIZE)) {
				5513	if ((!terminate) &&
				5514	(docbParseLookupSequence(ctxt, '<', 0, 0) < 0))
				5515	goto done;
				5516	}
				5517	ctxt->checkIndex = 0;
				5518	#ifdef DEBUG_PUSH
				5519	xmlGenericError(xmlGenericErrorContext,
				5520	"HPP: Parsing char data\n");
				5521	#endif
				5522	docbParseCharData(ctxt);
				5523	}
				5524	if (cons == ctxt->nbChars) {
				5525	if (ctxt->node != NULL) {
				5526	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5527	ctxt->sax->error(ctxt->userData,
				5528	"detected an error in element content\n");
				5529	ctxt->wellFormed = 0;
				5530	NEXT;
				5531	}
				5532	break;
				5533	}
				5534
				5535	break;
				5536	}
				5537	case XML_PARSER_END_TAG:
				5538	if (avail < 2)
				5539	goto done;
				5540	if ((!terminate) &&
				5541	(docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
				5542	goto done;
				5543	docbParseEndTag(ctxt);
				5544	if (ctxt->nameNr == 0) {
				5545	ctxt->instate = XML_PARSER_EPILOG;
				5546	} else {
				5547	ctxt->instate = XML_PARSER_CONTENT;
				5548	}
				5549	ctxt->checkIndex = 0;
				5550	#ifdef DEBUG_PUSH
				5551	xmlGenericError(xmlGenericErrorContext,
				5552	"HPP: entering CONTENT\n");
				5553	#endif
				5554	break;
				5555	case XML_PARSER_CDATA_SECTION:
				5556	xmlGenericError(xmlGenericErrorContext,
				5557	"HPP: internal error, state == CDATA\n");
				5558	ctxt->instate = XML_PARSER_CONTENT;
				5559	ctxt->checkIndex = 0;
				5560	#ifdef DEBUG_PUSH
				5561	xmlGenericError(xmlGenericErrorContext,
				5562	"HPP: entering CONTENT\n");
				5563	#endif
				5564	break;
				5565	case XML_PARSER_DTD:
				5566	xmlGenericError(xmlGenericErrorContext,
				5567	"HPP: internal error, state == DTD\n");
				5568	ctxt->instate = XML_PARSER_CONTENT;
				5569	ctxt->checkIndex = 0;
				5570	#ifdef DEBUG_PUSH
				5571	xmlGenericError(xmlGenericErrorContext,
				5572	"HPP: entering CONTENT\n");
				5573	#endif
				5574	break;
				5575	case XML_PARSER_COMMENT:
				5576	xmlGenericError(xmlGenericErrorContext,
				5577	"HPP: internal error, state == COMMENT\n");
				5578	ctxt->instate = XML_PARSER_CONTENT;
				5579	ctxt->checkIndex = 0;
				5580	#ifdef DEBUG_PUSH
				5581	xmlGenericError(xmlGenericErrorContext,
				5582	"HPP: entering CONTENT\n");
				5583	#endif
				5584	break;
				5585	case XML_PARSER_PI:
				5586	xmlGenericError(xmlGenericErrorContext,
				5587	"HPP: internal error, state == PI\n");
				5588	ctxt->instate = XML_PARSER_CONTENT;
				5589	ctxt->checkIndex = 0;
				5590	#ifdef DEBUG_PUSH
				5591	xmlGenericError(xmlGenericErrorContext,
				5592	"HPP: entering CONTENT\n");
				5593	#endif
				5594	break;
				5595	case XML_PARSER_ENTITY_DECL:
				5596	xmlGenericError(xmlGenericErrorContext,
				5597	"HPP: internal error, state == ENTITY_DECL\n");
				5598	ctxt->instate = XML_PARSER_CONTENT;
				5599	ctxt->checkIndex = 0;
				5600	#ifdef DEBUG_PUSH
				5601	xmlGenericError(xmlGenericErrorContext,
				5602	"HPP: entering CONTENT\n");
				5603	#endif
				5604	break;
				5605	case XML_PARSER_ENTITY_VALUE:
				5606	xmlGenericError(xmlGenericErrorContext,
				5607	"HPP: internal error, state == ENTITY_VALUE\n");
				5608	ctxt->instate = XML_PARSER_CONTENT;
				5609	ctxt->checkIndex = 0;
				5610	#ifdef DEBUG_PUSH
				5611	xmlGenericError(xmlGenericErrorContext,
				5612	"HPP: entering DTD\n");
				5613	#endif
				5614	break;
				5615	case XML_PARSER_ATTRIBUTE_VALUE:
				5616	xmlGenericError(xmlGenericErrorContext,
				5617	"HPP: internal error, state == ATTRIBUTE_VALUE\n");
				5618	ctxt->instate = XML_PARSER_START_TAG;
				5619	ctxt->checkIndex = 0;
				5620	#ifdef DEBUG_PUSH
				5621	xmlGenericError(xmlGenericErrorContext,
				5622	"HPP: entering START_TAG\n");
				5623	#endif
				5624	break;
				5625	case XML_PARSER_SYSTEM_LITERAL:
				5626	xmlGenericError(xmlGenericErrorContext,
				5627	"HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n");
				5628	ctxt->instate = XML_PARSER_CONTENT;
				5629	ctxt->checkIndex = 0;
				5630	#ifdef DEBUG_PUSH
				5631	xmlGenericError(xmlGenericErrorContext,
				5632	"HPP: entering CONTENT\n");
				5633	#endif
				5634	break;
				5635
				5636	case XML_PARSER_IGNORE:
				5637	xmlGenericError(xmlGenericErrorContext,
				5638	"HPP: internal error, state == XML_PARSER_IGNORE\n");
				5639	ctxt->instate = XML_PARSER_CONTENT;
				5640	ctxt->checkIndex = 0;
				5641	#ifdef DEBUG_PUSH
				5642	xmlGenericError(xmlGenericErrorContext,
				5643	"HPP: entering CONTENT\n");
				5644	#endif
				5645	break;
				5646	}
				5647	}
				5648	done:
				5649	if ((avail == 0) && (terminate)) {
				5650	docbAutoClose(ctxt, NULL);
				5651	if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
				5652	/*
				5653	* SAX: end of the document processing.
				5654	*/
				5655	ctxt->instate = XML_PARSER_EOF;
				5656	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
				5657	ctxt->sax->endDocument(ctxt->userData);
				5658	}
				5659	}
				5660	if ((ctxt->myDoc != NULL) &&
				5661	((terminate) \|\| (ctxt->instate == XML_PARSER_EOF) \|\|
				5662	(ctxt->instate == XML_PARSER_EPILOG))) {
				5663	xmlDtdPtr dtd;
				5664	dtd = ctxt->myDoc->intSubset;
				5665	if (dtd == NULL)
				5666	ctxt->myDoc->intSubset =
				5667	xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML",
				5668	BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
				5669	BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
				5670	}
				5671	#ifdef DEBUG_PUSH
				5672	xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret);
				5673	#endif
				5674	return(ret);
				5675	}
				5676
				5677	/**
				5678	* docbParseChunk:
				5679	* @ctxt: an XML parser context
				5680	* @chunk: an char array
				5681	* @size: the size in byte of the chunk
				5682	* @terminate: last chunk indicator
				5683	*
				5684	* Parse a Chunk of memory
				5685	*
				5686	* Returns zero if no error, the xmlParserErrors otherwise.
				5687	*/
				5688	int
				5689	docbParseChunk(docbParserCtxtPtr ctxt, const char *chunk, int size,
				5690	int terminate) {
				5691	if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
				5692	(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
				5693	int base = ctxt->input->base - ctxt->input->buf->buffer->content;
				5694	int cur = ctxt->input->cur - ctxt->input->base;
				5695
				5696	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
				5697	ctxt->input->base = ctxt->input->buf->buffer->content + base;
				5698	ctxt->input->cur = ctxt->input->base + cur;
				5699	#ifdef DEBUG_PUSH
				5700	xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
				5701	#endif
				5702
				5703	if ((terminate) \|\| (ctxt->input->buf->buffer->use > 80))
				5704	docbParseTryOrFinish(ctxt, terminate);
				5705	} else if (ctxt->instate != XML_PARSER_EOF) {
				5706	xmlParserInputBufferPush(ctxt->input->buf, 0, "");
				5707	docbParseTryOrFinish(ctxt, terminate);
				5708	}
				5709	if (terminate) {
				5710	if ((ctxt->instate != XML_PARSER_EOF) &&
				5711	(ctxt->instate != XML_PARSER_EPILOG) &&
				5712	(ctxt->instate != XML_PARSER_MISC)) {
				5713	ctxt->errNo = XML_ERR_DOCUMENT_END;
				5714	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5715	ctxt->sax->error(ctxt->userData,
				5716	"Extra content at the end of the document\n");
				5717	ctxt->wellFormed = 0;
				5718	}
				5719	if (ctxt->instate != XML_PARSER_EOF) {
				5720	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
				5721	ctxt->sax->endDocument(ctxt->userData);
				5722	}
				5723	ctxt->instate = XML_PARSER_EOF;
				5724	}
				5725	return((xmlParserErrors) ctxt->errNo);
				5726	}
				5727
				5728	/************************************************************************
				5729	* *
				5730	* User entry points *
				5731	* *
				5732	************************************************************************/
				5733
				5734	/**
				5735	* docbCreatePushParserCtxt :
				5736	* @sax: a SAX handler
				5737	* @user_data: The user data returned on SAX callbacks
				5738	* @chunk: a pointer to an array of chars
				5739	* @size: number of chars in the array
				5740	* @filename: an optional file name or URI
				5741	* @enc: an optional encoding
				5742	*
				5743	* Create a parser context for using the DocBook SGML parser in push mode
				5744	* To allow content encoding detection, @size should be >= 4
				5745	* The value of @filename is used for fetching external entities
				5746	* and error/warning reports.
				5747	*
				5748	* Returns the new parser context or NULL
				5749	*/
				5750	docbParserCtxtPtr
				5751	docbCreatePushParserCtxt(docbSAXHandlerPtr sax, void *user_data,
				5752	const char chunk, int size, const char filename,
				5753	xmlCharEncoding enc) {
				5754	docbParserCtxtPtr ctxt;
				5755	docbParserInputPtr inputStream;
				5756	xmlParserInputBufferPtr buf;
				5757
				5758	buf = xmlAllocParserInputBuffer(enc);
				5759	if (buf == NULL) return(NULL);
				5760
				5761	ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt));
				5762	if (ctxt == NULL) {
				5763	xmlFree(buf);
				5764	return(NULL);
				5765	}
				5766	memset(ctxt, 0, sizeof(docbParserCtxt));
				5767	docbInitParserCtxt(ctxt);
				5768	if (sax != NULL) {
				5769	if (ctxt->sax != &docbDefaultSAXHandler)
				5770	xmlFree(ctxt->sax);
				5771	ctxt->sax = (docbSAXHandlerPtr) xmlMalloc(sizeof(docbSAXHandler));
				5772	if (ctxt->sax == NULL) {
				5773	xmlFree(buf);
				5774	xmlFree(ctxt);
				5775	return(NULL);
				5776	}
				5777	memcpy(ctxt->sax, sax, sizeof(docbSAXHandler));
				5778	if (user_data != NULL)
				5779	ctxt->userData = user_data;
				5780	}
				5781	if (filename == NULL) {
				5782	ctxt->directory = NULL;
				5783	} else {
				5784	ctxt->directory = xmlParserGetDirectory(filename);
				5785	}
				5786
				5787	inputStream = docbNewInputStream(ctxt);
				5788	if (inputStream == NULL) {
				5789	xmlFreeParserCtxt(ctxt);
				5790	return(NULL);
				5791	}
				5792
				5793	if (filename == NULL)
				5794	inputStream->filename = NULL;
				5795	else
				5796	inputStream->filename = xmlMemStrdup(filename);
				5797	inputStream->buf = buf;
				5798	inputStream->base = inputStream->buf->buffer->content;
				5799	inputStream->cur = inputStream->buf->buffer->content;
				5800
				5801	inputPush(ctxt, inputStream);
				5802
				5803	if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
				5804	(ctxt->input->buf != NULL)) {
				5805	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
				5806	#ifdef DEBUG_PUSH
				5807	xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
				5808	#endif
				5809	}
				5810
				5811	return(ctxt);
				5812	}
				5813
				5814	/**
				5815	* docbSAXParseDoc :
				5816	* @cur: a pointer to an array of xmlChar
				5817	* @encoding: a free form C string describing the SGML document encoding, or NULL
				5818	* @sax: the SAX handler block
				5819	* @userData: if using SAX, this pointer will be provided on callbacks.
				5820	*
				5821	* parse an SGML in-memory document and build a tree.
				5822	* It use the given SAX function block to handle the parsing callback.
				5823	* If sax is NULL, fallback to the default DOM tree building routines.
				5824	*
				5825	* Returns the resulting document tree
				5826	*/
				5827
				5828	docbDocPtr
				5829	docbSAXParseDoc(xmlChar cur, const char encoding, docbSAXHandlerPtr sax, void *userData) {
				5830	docbDocPtr ret;
				5831	docbParserCtxtPtr ctxt;
				5832
				5833	if (cur == NULL) return(NULL);
				5834
				5835
				5836	ctxt = docbCreateDocParserCtxt(cur, encoding);
				5837	if (ctxt == NULL) return(NULL);
				5838	if (sax != NULL) {
				5839	ctxt->sax = sax;
				5840	ctxt->userData = userData;
				5841	}
				5842
				5843	docbParseDocument(ctxt);
				5844	ret = ctxt->myDoc;
				5845	if (sax != NULL) {
				5846	ctxt->sax = NULL;
				5847	ctxt->userData = NULL;
				5848	}
				5849	docbFreeParserCtxt(ctxt);
				5850
				5851	return(ret);
				5852	}
				5853
				5854	/**
				5855	* docbParseDoc :
				5856	* @cur: a pointer to an array of xmlChar
				5857	* @encoding: a free form C string describing the SGML document encoding, or NULL
				5858	*
				5859	* parse an SGML in-memory document and build a tree.
				5860	*
				5861	* Returns the resulting document tree
				5862	*/
				5863
				5864	docbDocPtr
				5865	docbParseDoc(xmlChar cur, const char encoding) {
				5866	return(docbSAXParseDoc(cur, encoding, NULL, NULL));
				5867	}
				5868
				5869
				5870	/**
				5871	* docbCreateFileParserCtxt :
				5872	* @filename: the filename
				5873	* @encoding: a free form C string describing the SGML document encoding, or NULL
				5874	*
				5875	* Create a parser context for a file content.
				5876	* Automatic support for ZLIB/Compress compressed document is provided
				5877	* by default if found at compile-time.
				5878	*
				5879	* Returns the new parser context or NULL
				5880	*/
				5881	docbParserCtxtPtr
				5882	docbCreateFileParserCtxt(const char filename, const char encoding)
				5883	{
				5884	docbParserCtxtPtr ctxt;
				5885	docbParserInputPtr inputStream;
				5886	xmlParserInputBufferPtr buf;
				5887	/* sgmlCharEncoding enc; */
				5888
				5889	buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
				5890	if (buf == NULL) return(NULL);
				5891
				5892	ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt));
				5893	if (ctxt == NULL) {
				5894	perror("malloc");
				5895	return(NULL);
				5896	}
				5897	memset(ctxt, 0, sizeof(docbParserCtxt));
				5898	docbInitParserCtxt(ctxt);
				5899	inputStream = (docbParserInputPtr) xmlMalloc(sizeof(docbParserInput));
				5900	if (inputStream == NULL) {
				5901	perror("malloc");
				5902	xmlFree(ctxt);
				5903	return(NULL);
				5904	}
				5905	memset(inputStream, 0, sizeof(docbParserInput));
				5906
				5907	inputStream->filename = xmlMemStrdup(filename);
				5908	inputStream->line = 1;
				5909	inputStream->col = 1;
				5910	inputStream->buf = buf;
				5911	inputStream->directory = NULL;
				5912
				5913	inputStream->base = inputStream->buf->buffer->content;
				5914	inputStream->cur = inputStream->buf->buffer->content;
				5915	inputStream->free = NULL;
				5916
				5917	inputPush(ctxt, inputStream);
				5918	return(ctxt);
				5919	}
				5920
				5921	/**
				5922	* docbSAXParseFile :
				5923	* @filename: the filename
				5924	* @encoding: a free form C string describing the SGML document encoding, or NULL
				5925	* @sax: the SAX handler block
				5926	* @userData: if using SAX, this pointer will be provided on callbacks.
				5927	*
				5928	* parse an SGML file and build a tree. Automatic support for ZLIB/Compress
				5929	* compressed document is provided by default if found at compile-time.
				5930	* It use the given SAX function block to handle the parsing callback.
				5931	* If sax is NULL, fallback to the default DOM tree building routines.
				5932	*
				5933	* Returns the resulting document tree
				5934	*/
				5935
				5936	docbDocPtr
				5937	docbSAXParseFile(const char filename, const char encoding, docbSAXHandlerPtr sax,
				5938	void *userData) {
				5939	docbDocPtr ret;
				5940	docbParserCtxtPtr ctxt;
				5941	docbSAXHandlerPtr oldsax = NULL;
				5942
				5943	ctxt = docbCreateFileParserCtxt(filename, encoding);
				5944	if (ctxt == NULL) return(NULL);
				5945	if (sax != NULL) {
				5946	oldsax = ctxt->sax;
				5947	ctxt->sax = sax;
				5948	ctxt->userData = userData;
				5949	}
				5950
				5951	docbParseDocument(ctxt);
				5952
				5953	ret = ctxt->myDoc;
				5954	if (sax != NULL) {
				5955	ctxt->sax = oldsax;
				5956	ctxt->userData = NULL;
				5957	}
				5958	docbFreeParserCtxt(ctxt);
				5959
				5960	return(ret);
				5961	}
				5962
				5963	/**
				5964	* docbParseFile :
				5965	* @filename: the filename
				5966	* @encoding: a free form C string describing document encoding, or NULL
				5967	*
				5968	* parse a Docbook SGML file and build a tree. Automatic support for
				5969	* ZLIB/Compress compressed document is provided by default if found
				5970	* at compile-time.
				5971	*
				5972	* Returns the resulting document tree
				5973	*/
				5974
				5975	docbDocPtr
				5976	docbParseFile(const char filename, const char encoding) {
				5977	return(docbSAXParseFile(filename, encoding, NULL, NULL));
				5978	}
				5979
				5980	#endif /* LIBXML_DOCB_ENABLED */