Blame - DOCBparser.c - fp2-dev/platform/external/libxml2

blob: 93e17559623750cea94518acdb62a0214c75ff10 [file] [log] [blame]

Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	1	/*
				2	* DOCBparser.c : an attempt to parse SGML Docbook documents
				3	*
				4	* See Copyright for the status of this software.
				5	*
				6	* Daniel.Veillard@w3.org
				7	*/
				8
				9	#include "libxml.h"
				10	#ifdef LIBXML_DOCB_ENABLED
				11
				12	#include <string.h>
				13	#ifdef HAVE_CTYPE_H
				14	#include <ctype.h>
				15	#endif
				16	#ifdef HAVE_STDLIB_H
				17	#include <stdlib.h>
				18	#endif
				19	#ifdef HAVE_SYS_STAT_H
				20	#include <sys/stat.h>
				21	#endif
				22	#ifdef HAVE_FCNTL_H
				23	#include <fcntl.h>
				24	#endif
				25	#ifdef HAVE_UNISTD_H
				26	#include <unistd.h>
				27	#endif
				28	#ifdef HAVE_ZLIB_H
				29	#include <zlib.h>
				30	#endif
				31
				32	#include <libxml/xmlmemory.h>
				33	#include <libxml/tree.h>
				34	#include <libxml/SAX.h>
				35	#include <libxml/parser.h>
				36	#include <libxml/parserInternals.h>
				37	#include <libxml/xmlerror.h>
				38	#include <libxml/DOCBparser.h>
				39	#include <libxml/entities.h>
				40	#include <libxml/encoding.h>
				41	#include <libxml/valid.h>
				42	#include <libxml/xmlIO.h>
				43	#include <libxml/uri.h>
				44
				45	/*
				46	* Internal description of an SGML entity
				47	*/
				48	typedef struct _docbEntityDesc docbEntityDesc;
				49	typedef docbEntityDesc *docbEntityDescPtr;
				50	struct _docbEntityDesc {
				51	int value; /* the UNICODE value for the character */
				52	const char name; / The entity name */
				53	const char desc; / the description */
				54	};
				55
				56	#if 0
				57	docbElemDescPtr docbTagLookup (const xmlChar *tag);
				58	docbEntityDescPtr docbEntityLookup(const xmlChar *name);
				59	docbEntityDescPtr docbEntityValueLookup(int value);
				60
				61	int docbIsAutoClosed(docbDocPtr doc,
				62	docbNodePtr elem);
				63	int docbAutoCloseTag(docbDocPtr doc,
				64	const xmlChar *name,
				65	docbNodePtr elem);
				66
				67	#endif
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	68	static int docbParseCharRef(docbParserCtxtPtr ctxt);
				69	static xmlEntityPtr docbParseEntityRef(docbParserCtxtPtr ctxt,
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	70	xmlChar **str);
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	71	static void docbParseElement(docbParserCtxtPtr ctxt);
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame^]	72	static void docbParseContent(docbParserCtxtPtr ctxt);
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	73
				74	/*
				75	* Internal description of an SGML element
				76	*/
				77	typedef struct _docbElemDesc docbElemDesc;
				78	typedef docbElemDesc *docbElemDescPtr;
				79	struct _docbElemDesc {
				80	const char name; / The tag name */
				81	int startTag; /* Whether the start tag can be implied */
				82	int endTag; /* Whether the end tag can be implied */
				83	int empty; /* Is this an empty element ? */
				84	int depr; /* Is this a deprecated element ? */
				85	int dtd; /* 1: only in Loose DTD, 2: only Frameset one */
				86	const char desc; / the description */
				87	};
				88
				89
				90	#define DOCB_MAX_NAMELEN 1000
				91	#define DOCB_PARSER_BIG_BUFFER_SIZE 1000
				92	#define DOCB_PARSER_BUFFER_SIZE 100
				93
				94	/* #define DEBUG */
				95	/* #define DEBUG_PUSH */
				96
				97	/************************************************************************
				98	* *
				99	* Parser stacks related functions and macros *
				100	* *
				101	************************************************************************/
				102
				103	/*
				104	* Generic function for accessing stacks in the Parser Context
				105	*/
				106
				107	#define PUSH_AND_POP(scope, type, name) \
				108	scope int docb##name##Push(docbParserCtxtPtr ctxt, type value) { \
				109	if (ctxt->name##Nr >= ctxt->name##Max) { \
				110	ctxt->name##Max *= 2; \
				111	ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
				112	ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
				113	if (ctxt->name##Tab == NULL) { \
				114	xmlGenericError(xmlGenericErrorContext, "realloc failed !\n"); \
				115	return(0); \
				116	} \
				117	} \
				118	ctxt->name##Tab[ctxt->name##Nr] = value; \
				119	ctxt->name = value; \
				120	return(ctxt->name##Nr++); \
				121	} \
				122	scope type docb##name##Pop(docbParserCtxtPtr ctxt) { \
				123	type ret; \
				124	if (ctxt->name##Nr < 0) return(0); \
				125	ctxt->name##Nr--; \
				126	if (ctxt->name##Nr < 0) return(0); \
				127	if (ctxt->name##Nr > 0) \
				128	ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
				129	else \
				130	ctxt->name = NULL; \
				131	ret = ctxt->name##Tab[ctxt->name##Nr]; \
				132	ctxt->name##Tab[ctxt->name##Nr] = 0; \
				133	return(ret); \
				134	} \
				135
				136	/* PUSH_AND_POP(static, xmlNodePtr, node) */
				137	PUSH_AND_POP(static, xmlChar*, name)
				138
				139	/*
				140	* Macros for accessing the content. Those should be used only by the parser,
				141	* and not exported.
				142	*
				143	* Dirty macros, i.e. one need to make assumption on the context to use them
				144	*
				145	* CUR_PTR return the current pointer to the xmlChar to be parsed.
				146	* CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
				147	* in ISO-Latin or UTF-8, and the current 16 bit value if compiled
				148	* in UNICODE mode. This should be used internally by the parser
				149	* only to compare to ASCII values otherwise it would break when
				150	* running with UTF-8 encoding.
				151	* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
				152	* to compare on ASCII based substring.
				153	* UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR
				154	* it should be used only to compare on ASCII based substring.
				155	* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
				156	* strings within the parser.
				157	*
				158	* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
				159	*
				160	* CURRENT Returns the current char value, with the full decoding of
				161	* UTF-8 if we are using this mode. It returns an int.
				162	* NEXT Skip to the next character, this does the proper decoding
				163	* in UTF-8 mode. It also pop-up unfinished entities on the fly.
				164	* COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
				165	*/
				166
				167	#define UPPER (toupper(*ctxt->input->cur))
				168
				169	#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
				170
				171	#define NXT(val) ctxt->input->cur[(val)]
				172
				173	#define UPP(val) (toupper(ctxt->input->cur[(val)]))
				174
				175	#define CUR_PTR ctxt->input->cur
				176
				177	#define SHRINK xmlParserInputShrink(ctxt->input)
				178
				179	#define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
				180
				181	#define CURRENT ((int) (*ctxt->input->cur))
				182
				183	#define SKIP_BLANKS docbSkipBlankChars(ctxt)
				184
				185	/* Imported from XML */
				186
				187	/* #define CUR (ctxt->token ? ctxt->token : (int) (ctxt->input->cur)) /
				188	#define CUR ((int) (*ctxt->input->cur))
				189	#define NEXT xmlNextChar(ctxt),ctxt->nbChars++
				190
				191	#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
				192	#define NXT(val) ctxt->input->cur[(val)]
				193	#define CUR_PTR ctxt->input->cur
				194
				195
				196	#define NEXTL(l) do { \
				197	if (*(ctxt->input->cur) == '\n') { \
				198	ctxt->input->line++; ctxt->input->col = 1; \
				199	} else ctxt->input->col++; \
				200	ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \
				201	} while (0)
				202
				203	/************
				204	\
				205	if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
				206	if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
				207	************/
				208
				209	#define CUR_CHAR(l) docbCurrentChar(ctxt, &l)
				210	#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
				211
				212	#define COPY_BUF(l,b,i,v) \
				213	if (l == 1) b[i++] = (xmlChar) v; \
				214	else i += xmlCopyChar(l,&b[i],v)
				215
				216	/**
				217	* docbCurrentChar:
				218	* @ctxt: the DocBook SGML parser context
				219	* @len: pointer to the length of the char read
				220	*
				221	* The current char value, if using UTF-8 this may actaully span multiple
				222	* bytes in the input buffer. Implement the end of line normalization:
				223	* 2.11 End-of-Line Handling
				224	* If the encoding is unspecified, in the case we find an ISO-Latin-1
				225	* char, then the encoding converter is plugged in automatically.
				226	*
				227	* Returns the current char value and its lenght
				228	*/
				229
				230	static int
				231	docbCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
				232	if (ctxt->instate == XML_PARSER_EOF)
				233	return(0);
				234
				235	if (ctxt->token != 0) {
				236	*len = 0;
				237	return(ctxt->token);
				238	}
				239	if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
				240	/*
				241	* We are supposed to handle UTF8, check it's valid
				242	* From rfc2044: encoding of the Unicode values on UTF-8:
				243	*
				244	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
				245	* 0000 0000-0000 007F 0xxxxxxx
				246	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
				247	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
				248	*
				249	* Check for the 0x110000 limit too
				250	*/
				251	const unsigned char *cur = ctxt->input->cur;
				252	unsigned char c;
				253	unsigned int val;
				254
				255	c = *cur;
				256	if (c & 0x80) {
				257	if (cur[1] == 0)
				258	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				259	if ((cur[1] & 0xc0) != 0x80)
				260	goto encoding_error;
				261	if ((c & 0xe0) == 0xe0) {
				262
				263	if (cur[2] == 0)
				264	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				265	if ((cur[2] & 0xc0) != 0x80)
				266	goto encoding_error;
				267	if ((c & 0xf0) == 0xf0) {
				268	if (cur[3] == 0)
				269	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				270	if (((c & 0xf8) != 0xf0) \|\|
				271	((cur[3] & 0xc0) != 0x80))
				272	goto encoding_error;
				273	/* 4-byte code */
				274	*len = 4;
				275	val = (cur[0] & 0x7) << 18;
				276	val \|= (cur[1] & 0x3f) << 12;
				277	val \|= (cur[2] & 0x3f) << 6;
				278	val \|= cur[3] & 0x3f;
				279	} else {
				280	/* 3-byte code */
				281	*len = 3;
				282	val = (cur[0] & 0xf) << 12;
				283	val \|= (cur[1] & 0x3f) << 6;
				284	val \|= cur[2] & 0x3f;
				285	}
				286	} else {
				287	/* 2-byte code */
				288	*len = 2;
				289	val = (cur[0] & 0x1f) << 6;
				290	val \|= cur[1] & 0x3f;
				291	}
				292	if (!IS_CHAR(val)) {
				293	ctxt->errNo = XML_ERR_INVALID_ENCODING;
				294	if ((ctxt->sax != NULL) &&
				295	(ctxt->sax->error != NULL))
				296	ctxt->sax->error(ctxt->userData,
				297	"Char 0x%X out of allowed range\n", val);
				298	ctxt->wellFormed = 0;
				299	ctxt->disableSAX = 1;
				300	}
				301	return(val);
				302	} else {
				303	/* 1-byte code */
				304	*len = 1;
				305	return((int) *ctxt->input->cur);
				306	}
				307	}
				308	/*
				309	* Assume it's a fixed lenght encoding (1) with
				310	* a compatibke encoding for the ASCII set, since
				311	* XML constructs only use < 128 chars
				312	*/
				313	*len = 1;
				314	if ((int) *ctxt->input->cur < 0x80)
				315	return((int) *ctxt->input->cur);
				316
				317	/*
				318	* Humm this is bad, do an automatic flow conversion
				319	*/
				320	xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
				321	ctxt->charset = XML_CHAR_ENCODING_UTF8;
				322	return(xmlCurrentChar(ctxt, len));
				323
				324	encoding_error:
				325	/*
				326	* If we detect an UTF8 error that probably mean that the
				327	* input encoding didn't get properly advertized in the
				328	* declaration header. Report the error and switch the encoding
				329	* to ISO-Latin-1 (if you don't like this policy, just declare the
				330	* encoding !)
				331	*/
				332	ctxt->errNo = XML_ERR_INVALID_ENCODING;
				333	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
				334	ctxt->sax->error(ctxt->userData,
				335	"Input is not proper UTF-8, indicate encoding !\n");
				336	ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
				337	ctxt->input->cur[0], ctxt->input->cur[1],
				338	ctxt->input->cur[2], ctxt->input->cur[3]);
				339	}
				340
				341	ctxt->charset = XML_CHAR_ENCODING_8859_1;
				342	*len = 1;
				343	return((int) *ctxt->input->cur);
				344	}
				345
				346	#if 0
				347	/**
				348	* sgmlNextChar:
				349	* @ctxt: the DocBook SGML parser context
				350	*
				351	* Skip to the next char input char.
				352	*/
				353
				354	static void
				355	sgmlNextChar(docbParserCtxtPtr ctxt) {
				356	if (ctxt->instate == XML_PARSER_EOF)
				357	return;
				358	if ((*ctxt->input->cur == 0) &&
				359	(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
				360	xmlPopInput(ctxt);
				361	} else {
				362	if (*(ctxt->input->cur) == '\n') {
				363	ctxt->input->line++; ctxt->input->col = 1;
				364	} else ctxt->input->col++;
				365	ctxt->input->cur++;
				366	ctxt->nbChars++;
				367	if (*ctxt->input->cur == 0)
				368	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				369	}
				370	}
				371	#endif
				372
				373	/**
				374	* docbSkipBlankChars:
				375	* @ctxt: the DocBook SGML parser context
				376	*
				377	* skip all blanks character found at that point in the input streams.
				378	*
				379	* Returns the number of space chars skipped
				380	*/
				381
				382	static int
				383	docbSkipBlankChars(xmlParserCtxtPtr ctxt) {
				384	int res = 0;
				385
				386	while (IS_BLANK(*(ctxt->input->cur))) {
				387	if ((*ctxt->input->cur == 0) &&
				388	(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
				389	xmlPopInput(ctxt);
				390	} else {
				391	if (*(ctxt->input->cur) == '\n') {
				392	ctxt->input->line++; ctxt->input->col = 1;
				393	} else ctxt->input->col++;
				394	ctxt->input->cur++;
				395	ctxt->nbChars++;
				396	if (*ctxt->input->cur == 0)
				397	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				398	}
				399	res++;
				400	}
				401	return(res);
				402	}
				403
				404
				405
				406	/************************************************************************
				407	* *
				408	* The list of SGML elements and their properties *
				409	* *
				410	************************************************************************/
				411
				412	/*
				413	* Start Tag: 1 means the start tag can be ommited
				414	* End Tag: 1 means the end tag can be ommited
				415	* 2 means it's forbidden (empty elements)
				416	* Depr: this element is deprecated
				417	* DTD: 1 means that this element is valid only in the Loose DTD
				418	* 2 means that this element is valid only in the Frameset DTD
				419	*
				420	* Name,Start Tag,End Tag, Empty, Depr., DTD, Description
				421	*/
				422	static docbElemDesc
				423	docbookElementTable[] = {
				424	{ "abbrev", 0, 0, 0, 3, 0, "" }, /* word */
				425	{ "abstract", 0, 0, 0, 9, 0, "" }, /* title */
				426	{ "accel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				427	{ "ackno", 0, 0, 0, 4, 0, "" }, /* docinfo */
				428	{ "acronym", 0, 0, 0, 3, 0, "" }, /* word */
				429	{ "action", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				430	{ "address", 0, 0, 0, 1, 0, "" },
				431	{ "affiliation",0, 0, 0, 9, 0, "" }, /* shortaffil */
				432	{ "alt", 0, 0, 0, 1, 0, "" },
				433	{ "anchor", 0, 2, 1, 0, 0, "" },
				434	{ "answer", 0, 0, 0, 9, 0, "" }, /* label */
				435	{ "appendix", 0, 0, 0, 9, 0, "" }, /* appendixinfo */
				436	{ "appendixinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				437	{ "application",0, 0, 0, 2, 0, "" }, /* para */
				438	{ "area", 0, 2, 1, 0, 0, "" },
				439	{ "areaset", 0, 0, 0, 9, 0, "" }, /* area */
				440	{ "areaspec", 0, 0, 0, 9, 0, "" }, /* area */
				441	{ "arg", 0, 0, 0, 1, 0, "" },
Daniel Veillard	4ec0b0f	2001-04-25 15:53:40 +0000	[diff] [blame]	442	{ "artheader", 0, 0, 0, 9, 0, "" },
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	443	{ "article", 0, 0, 0, 9, 0, "" }, /* div.title.content */
				444	{ "articleinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				445	{ "artpagenums",0, 0, 0, 4, 0, "" }, /* docinfo */
				446	{ "attribution",0, 0, 0, 2, 0, "" }, /* para */
				447	{ "audiodata", 0, 2, 1, 0, 0, "" },
				448	{ "audioobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
				449	{ "authorblurb",0, 0, 0, 9, 0, "" }, /* title */
				450	{ "authorgroup",0, 0, 0, 9, 0, "" }, /* author */
				451	{ "authorinitials",0, 0, 0, 4, 0, "" }, /* docinfo */
				452	{ "author", 0, 0, 0, 9, 0, "" }, /* person.ident.mix */
				453	{ "beginpage", 0, 2, 1, 0, 0, "" },
				454	{ "bibliodiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				455	{ "biblioentry",0, 0, 0, 9, 0, "" }, /* articleinfo */
				456	{ "bibliography",0, 0, 0, 9, 0, "" }, /* bibliographyinfo */
				457	{ "bibliographyinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				458	{ "bibliomisc", 0, 0, 0, 2, 0, "" }, /* para */
				459	{ "bibliomixed",0, 0, 0, 1, 0, "" }, /* %bibliocomponent.mix, bibliomset) */
				460	{ "bibliomset", 0, 0, 0, 1, 0, "" }, /* %bibliocomponent.mix; \| bibliomset) */
				461	{ "biblioset", 0, 0, 0, 9, 0, "" }, /* bibliocomponent.mix */
				462	{ "blockquote", 0, 0, 0, 9, 0, "" }, /* title */
				463	{ "book", 0, 0, 0, 9, 0, "" }, /* div.title.content */
				464	{ "bookinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
				465	{ "bridgehead", 0, 0, 0, 8, 0, "" }, /* title */
				466	{ "callout", 0, 0, 0, 9, 0, "" }, /* component.mix */
				467	{ "calloutlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				468	{ "caption", 0, 0, 0, 9, 0, "" }, /* textobject.mix */
				469	{ "caution", 0, 0, 0, 9, 0, "" }, /* title */
				470	{ "chapter", 0, 0, 0, 9, 0, "" }, /* chapterinfo */
				471	{ "chapterinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				472	{ "citation", 0, 0, 0, 2, 0, "" }, /* para */
				473	{ "citerefentry",0, 0, 0, 9, 0, "" }, /* refentrytitle */
				474	{ "citetitle", 0, 0, 0, 2, 0, "" }, /* para */
				475	{ "city", 0, 0, 0, 4, 0, "" }, /* docinfo */
				476	{ "classname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				477	{ "classsynopsisinfo",0,0, 0, 9, 0, "" }, /* cptr */
				478	{ "classsynopsis",0, 0, 0, 9, 0, "" }, /* ooclass */
				479	{ "cmdsynopsis",0, 0, 0, 9, 0, "" }, /* command */
				480	{ "co", 0, 2, 1, 0, 0, "" },
				481	{ "collab", 0, 0, 0, 9, 0, "" }, /* collabname */
				482	{ "collabname", 0, 0, 0, 4, 0, "" }, /* docinfo */
				483	{ "colophon", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				484	{ "colspec", 0, 2, 1, 0, 0, "" },
				485	{ "colspec", 0, 2, 1, 0, 0, "" },
				486	{ "command", 0, 0, 0, 9, 0, "" }, /* cptr */
				487	{ "computeroutput",0, 0, 0, 9, 0, "" }, /* cptr */
				488	{ "confdates", 0, 0, 0, 4, 0, "" }, /* docinfo */
				489	{ "confgroup", 0, 0, 0, 9, 0, "" }, /* confdates */
				490	{ "confnum", 0, 0, 0, 4, 0, "" }, /* docinfo */
				491	{ "confsponsor",0, 0, 0, 4, 0, "" }, /* docinfo */
				492	{ "conftitle", 0, 0, 0, 4, 0, "" }, /* docinfo */
				493	{ "constant", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				494	{ "constructorsynopsis",0,0, 0, 9, 0, "" }, /* modifier */
				495	{ "contractnum",0, 0, 0, 4, 0, "" }, /* docinfo */
				496	{ "contractsponsor",0, 0, 0, 4, 0, "" }, /* docinfo */
				497	{ "contrib", 0, 0, 0, 4, 0, "" }, /* docinfo */
				498	{ "copyright", 0, 0, 0, 9, 0, "" }, /* year */
				499	{ "corpauthor", 0, 0, 0, 4, 0, "" }, /* docinfo */
				500	{ "corpname", 0, 0, 0, 4, 0, "" }, /* docinfo */
				501	{ "country", 0, 0, 0, 4, 0, "" }, /* docinfo */
				502	{ "database", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				503	{ "date", 0, 0, 0, 4, 0, "" }, /* docinfo */
				504	{ "dedication", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				505	{ "destructorsynopsis",0,0, 0, 9, 0, "" }, /* modifier */
				506	{ "edition", 0, 0, 0, 4, 0, "" }, /* docinfo */
				507	{ "editor", 0, 0, 0, 9, 0, "" }, /* person.ident.mix */
				508	{ "email", 0, 0, 0, 4, 0, "" }, /* docinfo */
				509	{ "emphasis", 0, 0, 0, 2, 0, "" }, /* para */
				510	{ "entry", 0, 0, 0, 9, 0, "" }, /* tbl.entry.mdl */
				511	{ "entrytbl", 0, 0, 0, 9, 0, "" }, /* tbl.entrytbl.mdl */
				512	{ "envar", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				513	{ "epigraph", 0, 0, 0, 9, 0, "" }, /* attribution */
				514	{ "equation", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				515	{ "errorcode", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				516	{ "errorname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				517	{ "errortype", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				518	{ "example", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				519	{ "exceptionname",0, 0, 0, 7, 0, "" }, /* smallcptr */
				520	{ "fax", 0, 0, 0, 4, 0, "" }, /* docinfo */
				521	{ "fieldsynopsis", 0, 0, 0, 9, 0, "" }, /* modifier */
				522	{ "figure", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				523	{ "filename", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				524	{ "firstname", 0, 0, 0, 4, 0, "" }, /* docinfo */
				525	{ "firstterm", 0, 0, 0, 3, 0, "" }, /* word */
				526	{ "footnote", 0, 0, 0, 9, 0, "" }, /* footnote.mix */
				527	{ "footnoteref",0, 2, 1, 0, 0, "" },
				528	{ "foreignphrase",0, 0, 0, 2, 0, "" }, /* para */
				529	{ "formalpara", 0, 0, 0, 9, 0, "" }, /* title */
				530	{ "funcdef", 0, 0, 0, 1, 0, "" },
				531	{ "funcparams", 0, 0, 0, 9, 0, "" }, /* cptr */
				532	{ "funcprototype",0, 0, 0, 9, 0, "" }, /* funcdef */
				533	{ "funcsynopsis",0, 0, 0, 9, 0, "" }, /* funcsynopsisinfo */
				534	{ "funcsynopsisinfo", 0, 0, 0, 9, 0, "" }, /* cptr */
				535	{ "function", 0, 0, 0, 9, 0, "" }, /* cptr */
				536	{ "glossary", 0, 0, 0, 9, 0, "" }, /* glossaryinfo */
				537	{ "glossaryinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				538	{ "glossdef", 0, 0, 0, 9, 0, "" }, /* glossdef.mix */
				539	{ "glossdiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				540	{ "glossentry", 0, 0, 0, 9, 0, "" }, /* glossterm */
				541	{ "glosslist", 0, 0, 0, 9, 0, "" }, /* glossentry */
				542	{ "glossseealso",0, 0, 0, 2, 0, "" }, /* para */
				543	{ "glosssee", 0, 0, 0, 2, 0, "" }, /* para */
				544	{ "glossterm", 0, 0, 0, 2, 0, "" }, /* para */
Daniel Veillard	4ec0b0f	2001-04-25 15:53:40 +0000	[diff] [blame]	545	{ "graphic", 0, 0, 0, 9, 0, "" },
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	546	{ "graphicco", 0, 0, 0, 9, 0, "" }, /* areaspec */
				547	{ "group", 0, 0, 0, 9, 0, "" }, /* arg */
				548	{ "guibutton", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				549	{ "guiicon", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				550	{ "guilabel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				551	{ "guimenuitem",0, 0, 0, 7, 0, "" }, /* smallcptr */
				552	{ "guimenu", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				553	{ "guisubmenu", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				554	{ "hardware", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				555	{ "highlights", 0, 0, 0, 9, 0, "" }, /* highlights.mix */
				556	{ "holder", 0, 0, 0, 4, 0, "" }, /* docinfo */
				557	{ "honorific", 0, 0, 0, 4, 0, "" }, /* docinfo */
				558	{ "imagedata", 0, 2, 1, 0, 0, "" },
				559	{ "imageobjectco",0, 0, 0, 9, 0, "" }, /* areaspec */
				560	{ "imageobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
				561	{ "important", 0, 0, 0, 9, 0, "" }, /* title */
				562	{ "indexdiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				563	{ "indexentry", 0, 0, 0, 9, 0, "" }, /* primaryie */
				564	{ "index", 0, 0, 0, 9, 0, "" }, /* indexinfo */
				565	{ "indexinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
				566	{ "indexterm", 0, 0, 0, 9, 0, "" }, /* primary */
				567	{ "informalequation",0, 0, 0, 9, 0, "" }, /* equation.content */
				568	{ "informalexample",0, 0, 0, 9, 0, "" }, /* example.mix */
				569	{ "informalfigure",0, 0, 0, 9, 0, "" }, /* figure.mix */
				570	{ "informaltable",0, 0, 0, 9, 0, "" }, /* graphic */
				571	{ "initializer",0, 0, 0, 7, 0, "" }, /* smallcptr */
				572	{ "inlineequation",0, 0, 0, 9, 0, "" }, /* inlineequation.content */
				573	{ "inlinegraphic",0, 2, 1, 0, 0, "" },
				574	{ "inlinemediaobject",0,0, 0, 9, 0, "" }, /* objectinfo */
				575	{ "interfacename",0, 0, 0, 7, 0, "" }, /* smallcptr */
				576	{ "interface", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				577	{ "invpartnumber",0, 0, 0, 4, 0, "" }, /* docinfo */
				578	{ "isbn", 0, 0, 0, 4, 0, "" }, /* docinfo */
				579	{ "issn", 0, 0, 0, 4, 0, "" }, /* docinfo */
				580	{ "issuenum", 0, 0, 0, 4, 0, "" }, /* docinfo */
				581	{ "itemizedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				582	{ "itermset", 0, 0, 0, 9, 0, "" }, /* indexterm */
				583	{ "jobtitle", 0, 0, 0, 4, 0, "" }, /* docinfo */
				584	{ "keycap", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				585	{ "keycode", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				586	{ "keycombo", 0, 0, 0, 9, 0, "" }, /* keycap */
				587	{ "keysym", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				588	{ "keyword", 0, 0, 0, 1, 0, "" },
				589	{ "keywordset", 0, 0, 0, 9, 0, "" }, /* keyword */
				590	{ "label", 0, 0, 0, 3, 0, "" }, /* word */
				591	{ "legalnotice",0, 0, 0, 9, 0, "" }, /* title */
				592	{ "lineage", 0, 0, 0, 4, 0, "" }, /* docinfo */
				593	{ "lineannotation",0, 0, 0, 2, 0, "" }, /* para */
				594	{ "link", 0, 0, 0, 2, 0, "" }, /* para */
				595	{ "listitem", 0, 0, 0, 9, 0, "" }, /* component.mix */
				596	{ "literal", 0, 0, 0, 9, 0, "" }, /* cptr */
				597	{ "literallayout",0, 0, 0, 2, 0, "" }, /* para */
				598	{ "lot", 0, 0, 0, 9, 0, "" }, /* bookcomponent.title.content */
				599	{ "lotentry", 0, 0, 0, 2, 0, "" }, /* para */
				600	{ "manvolnum", 0, 0, 0, 3, 0, "" }, /* word */
				601	{ "markup", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				602	{ "medialabel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				603	{ "mediaobjectco",0, 0, 0, 9, 0, "" }, /* objectinfo */
				604	{ "mediaobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
				605	{ "member", 0, 0, 0, 2, 0, "" }, /* para */
				606	{ "menuchoice", 0, 0, 0, 9, 0, "" }, /* shortcut */
				607	{ "methodname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				608	{ "methodparam",0, 0, 0, 9, 0, "" }, /* modifier */
				609	{ "methodsynopsis",0, 0, 0, 9, 0, "" }, /* modifier */
				610	{ "modespec", 0, 0, 0, 4, 0, "" }, /* docinfo */
				611	{ "modifier", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				612	{ "mousebutton",0, 0, 0, 7, 0, "" }, /* smallcptr */
				613	{ "msgaud", 0, 0, 0, 2, 0, "" }, /* para */
				614	{ "msgentry", 0, 0, 0, 9, 0, "" }, /* msg */
				615	{ "msgexplan", 0, 0, 0, 9, 0, "" }, /* title */
				616	{ "msginfo", 0, 0, 0, 9, 0, "" }, /* msglevel */
				617	{ "msglevel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				618	{ "msgmain", 0, 0, 0, 9, 0, "" }, /* title */
				619	{ "msgorig", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				620	{ "msgrel", 0, 0, 0, 9, 0, "" }, /* title */
				621	{ "msgset", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				622	{ "msgsub", 0, 0, 0, 9, 0, "" }, /* title */
				623	{ "msgtext", 0, 0, 0, 9, 0, "" }, /* component.mix */
				624	{ "msg", 0, 0, 0, 9, 0, "" }, /* title */
				625	{ "note", 0, 0, 0, 9, 0, "" }, /* title */
				626	{ "objectinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
				627	{ "olink", 0, 0, 0, 2, 0, "" }, /* para */
				628	{ "ooclass", 0, 0, 0, 9, 0, "" }, /* modifier */
				629	{ "ooexception",0, 0, 0, 9, 0, "" }, /* modifier */
				630	{ "oointerface",0, 0, 0, 9, 0, "" }, /* modifier */
				631	{ "optional", 0, 0, 0, 9, 0, "" }, /* cptr */
				632	{ "option", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				633	{ "orderedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				634	{ "orgdiv", 0, 0, 0, 4, 0, "" }, /* docinfo */
				635	{ "orgname", 0, 0, 0, 4, 0, "" }, /* docinfo */
				636	{ "otheraddr", 0, 0, 0, 4, 0, "" }, /* docinfo */
				637	{ "othercredit",0, 0, 0, 9, 0, "" }, /* person.ident.mix */
				638	{ "othername", 0, 0, 0, 4, 0, "" }, /* docinfo */
				639	{ "pagenums", 0, 0, 0, 4, 0, "" }, /* docinfo */
				640	{ "paramdef", 0, 0, 0, 1, 0, "" },
				641	{ "parameter", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				642	{ "para", 0, 0, 0, 2, 0, "" }, /* para */
				643	{ "partinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
				644	{ "partintro", 0, 0, 0, 9, 0, "" }, /* div.title.content */
				645	{ "part", 0, 0, 0, 9, 0, "" }, /* partinfo */
				646	{ "phone", 0, 0, 0, 4, 0, "" }, /* docinfo */
				647	{ "phrase", 0, 0, 0, 2, 0, "" }, /* para */
				648	{ "pob", 0, 0, 0, 4, 0, "" }, /* docinfo */
				649	{ "postcode", 0, 0, 0, 4, 0, "" }, /* docinfo */
				650	{ "prefaceinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				651	{ "preface", 0, 0, 0, 9, 0, "" }, /* prefaceinfo */
				652	{ "primaryie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				653	{ "primary ", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				654	{ "printhistory",0, 0, 0, 9, 0, "" }, /* para.class */
				655	{ "procedure", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				656	{ "productname",0, 0, 0, 2, 0, "" }, /* para */
				657	{ "productnumber",0, 0, 0, 4, 0, "" }, /* docinfo */
				658	{ "programlistingco",0, 0, 0, 9, 0, "" }, /* areaspec */
				659	{ "programlisting",0, 0, 0, 2, 0, "" }, /* para */
				660	{ "prompt", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				661	{ "property", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				662	{ "pubdate", 0, 0, 0, 4, 0, "" }, /* docinfo */
				663	{ "publishername",0, 0, 0, 4, 0, "" }, /* docinfo */
				664	{ "publisher", 0, 0, 0, 9, 0, "" }, /* publishername */
				665	{ "pubsnumber", 0, 0, 0, 4, 0, "" }, /* docinfo */
				666	{ "qandadiv", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				667	{ "qandaentry", 0, 0, 0, 9, 0, "" }, /* revhistory */
				668	{ "qandaset", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				669	{ "question", 0, 0, 0, 9, 0, "" }, /* label */
				670	{ "quote", 0, 0, 0, 2, 0, "" }, /* para */
				671	{ "refclass", 0, 0, 0, 9, 0, "" }, /* refclass.char.mix */
				672	{ "refdescriptor",0, 0, 0, 9, 0, "" }, /* refname.char.mix */
				673	{ "refentryinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				674	{ "refentry", 0, 0, 0, 9, 0, "" }, /* ndxterm.class */
				675	{ "refentrytitle",0, 0, 0, 2, 0, "" }, /* para */
				676	{ "referenceinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				677	{ "reference", 0, 0, 0, 9, 0, "" }, /* referenceinfo */
				678	{ "refmeta", 0, 0, 0, 9, 0, "" }, /* ndxterm.class */
				679	{ "refmiscinfo",0, 0, 0, 4, 0, "" }, /* docinfo */
				680	{ "refnamediv", 0, 0, 0, 9, 0, "" }, /* refdescriptor */
				681	{ "refname", 0, 0, 0, 9, 0, "" }, /* refname.char.mix */
				682	{ "refpurpose", 0, 0, 0, 9, 0, "" }, /* refinline.char.mix */
				683	{ "refsect1info",0, 0, 0, 9, 0, "" }, /* graphic */
				684	{ "refsect1", 0, 0, 0, 9, 0, "" }, /* refsect */
				685	{ "refsect2info",0, 0, 0, 9, 0, "" }, /* graphic */
				686	{ "refsect2", 0, 0, 0, 9, 0, "" }, /* refsect */
				687	{ "refsect3info",0, 0, 0, 9, 0, "" }, /* graphic */
				688	{ "refsect3", 0, 0, 0, 9, 0, "" }, /* refsect */
				689	{ "refsynopsisdivinfo",0,0, 0, 9, 0, "" }, /* graphic */
				690	{ "refsynopsisdiv",0, 0, 0, 9, 0, "" }, /* refsynopsisdivinfo */
				691	{ "releaseinfo",0, 0, 0, 4, 0, "" }, /* docinfo */
				692	{ "remark", 0, 0, 0, 2, 0, "" }, /* para */
				693	{ "replaceable",0, 0, 0, 1, 0, "" },
				694	{ "returnvalue",0, 0, 0, 7, 0, "" }, /* smallcptr */
				695	{ "revdescription",0, 0, 0, 9, 0, "" }, /* revdescription.mix */
				696	{ "revhistory", 0, 0, 0, 9, 0, "" }, /* revision */
				697	{ "revision", 0, 0, 0, 9, 0, "" }, /* revnumber */
				698	{ "revnumber", 0, 0, 0, 4, 0, "" }, /* docinfo */
				699	{ "revremark", 0, 0, 0, 4, 0, "" }, /* docinfo */
				700	{ "row", 0, 0, 0, 9, 0, "" }, /* tbl.row.mdl */
				701	{ "row", 0, 0, 0, 9, 0, "" }, /* tbl.row.mdl */
				702	{ "sbr", 0, 2, 1, 0, 0, "" },
				703	{ "screenco", 0, 0, 0, 9, 0, "" }, /* areaspec */
				704	{ "screeninfo", 0, 0, 0, 2, 0, "" }, /* para */
				705	{ "screen", 0, 0, 0, 2, 0, "" }, /* para */
				706	{ "screenshot", 0, 0, 0, 9, 0, "" }, /* screeninfo */
				707	{ "secondaryie",0, 0, 0, 4, 0, "" }, /* ndxterm */
				708	{ "secondary", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				709	{ "sect1info", 0, 0, 0, 9, 0, "" }, /* graphic */
				710	{ "sect1", 0, 0, 0, 9, 0, "" }, /* sect */
				711	{ "sect2info", 0, 0, 0, 9, 0, "" }, /* graphic */
				712	{ "sect2", 0, 0, 0, 9, 0, "" }, /* sect */
				713	{ "sect3info", 0, 0, 0, 9, 0, "" }, /* graphic */
				714	{ "sect3", 0, 0, 0, 9, 0, "" }, /* sect */
				715	{ "sect4info", 0, 0, 0, 9, 0, "" }, /* graphic */
				716	{ "sect4", 0, 0, 0, 9, 0, "" }, /* sect */
				717	{ "sect5info", 0, 0, 0, 9, 0, "" }, /* graphic */
				718	{ "sect5", 0, 0, 0, 9, 0, "" }, /* sect */
				719	{ "sectioninfo",0, 0, 0, 9, 0, "" }, /* graphic */
				720	{ "section", 0, 0, 0, 9, 0, "" }, /* sectioninfo */
				721	{ "seealsoie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				722	{ "seealso", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				723	{ "seeie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				724	{ "see", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				725	{ "seglistitem",0, 0, 0, 9, 0, "" }, /* seg */
				726	{ "segmentedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				727	{ "seg", 0, 0, 0, 2, 0, "" }, /* para */
				728	{ "segtitle", 0, 0, 0, 8, 0, "" }, /* title */
				729	{ "seriesvolnums", 0, 0, 0, 4, 0, "" }, /* docinfo */
				730	{ "set", 0, 0, 0, 9, 0, "" }, /* div.title.content */
				731	{ "setindexinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				732	{ "setindex", 0, 0, 0, 9, 0, "" }, /* setindexinfo */
				733	{ "setinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
				734	{ "sgmltag", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				735	{ "shortaffil", 0, 0, 0, 4, 0, "" }, /* docinfo */
				736	{ "shortcut", 0, 0, 0, 9, 0, "" }, /* keycap */
				737	{ "sidebarinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				738	{ "sidebar", 0, 0, 0, 9, 0, "" }, /* sidebarinfo */
				739	{ "simpara", 0, 0, 0, 2, 0, "" }, /* para */
				740	{ "simplelist", 0, 0, 0, 9, 0, "" }, /* member */
				741	{ "simplemsgentry", 0, 0, 0, 9, 0, "" }, /* msgtext */
				742	{ "simplesect", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				743	{ "spanspec", 0, 2, 1, 0, 0, "" },
				744	{ "state", 0, 0, 0, 4, 0, "" }, /* docinfo */
				745	{ "step", 0, 0, 0, 9, 0, "" }, /* title */
				746	{ "street", 0, 0, 0, 4, 0, "" }, /* docinfo */
				747	{ "structfield",0, 0, 0, 7, 0, "" }, /* smallcptr */
				748	{ "structname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				749	{ "subjectset", 0, 0, 0, 9, 0, "" }, /* subject */
				750	{ "subject", 0, 0, 0, 9, 0, "" }, /* subjectterm */
				751	{ "subjectterm",0, 0, 0, 1, 0, "" },
				752	{ "subscript", 0, 0, 0, 1, 0, "" },
				753	{ "substeps", 0, 0, 0, 9, 0, "" }, /* step */
				754	{ "subtitle", 0, 0, 0, 8, 0, "" }, /* title */
				755	{ "superscript", 0, 0, 0, 1, 0, "" },
				756	{ "surname", 0, 0, 0, 4, 0, "" }, /* docinfo */
				757	{ "symbol", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				758	{ "synopfragment", 0, 0, 0, 9, 0, "" }, /* arg */
				759	{ "synopfragmentref", 0, 0, 0, 1, 0, "" },
				760	{ "synopsis", 0, 0, 0, 2, 0, "" }, /* para */
				761	{ "systemitem", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				762	{ "table", 0, 0, 0, 9, 0, "" }, /* tbl.table.mdl */
				763	/* { "%tbl.table.name;", 0, 0, 0, 9, 0, "" },/ / tbl.table.mdl */
				764	{ "tbody", 0, 0, 0, 9, 0, "" }, /* row */
				765	{ "tbody", 0, 0, 0, 9, 0, "" }, /* row */
				766	{ "term", 0, 0, 0, 2, 0, "" }, /* para */
				767	{ "tertiaryie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				768	{ "tertiary ", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				769	{ "textobject", 0, 0, 0, 9, 0, "" }, /* objectinfo */
				770	{ "tfoot", 0, 0, 0, 9, 0, "" }, /* tbl.hdft.mdl */
				771	{ "tgroup", 0, 0, 0, 9, 0, "" }, /* tbl.tgroup.mdl */
				772	{ "tgroup", 0, 0, 0, 9, 0, "" }, /* tbl.tgroup.mdl */
				773	{ "thead", 0, 0, 0, 9, 0, "" }, /* row */
				774	{ "thead", 0, 0, 0, 9, 0, "" }, /* tbl.hdft.mdl */
				775	{ "tip", 0, 0, 0, 9, 0, "" }, /* title */
				776	{ "titleabbrev",0, 0, 0, 8, 0, "" }, /* title */
				777	{ "title", 0, 0, 0, 8, 0, "" }, /* title */
				778	{ "tocback", 0, 0, 0, 2, 0, "" }, /* para */
				779	{ "toc", 0, 0, 0, 9, 0, "" }, /* bookcomponent.title.content */
				780	{ "tocchap", 0, 0, 0, 9, 0, "" }, /* tocentry */
				781	{ "tocentry", 0, 0, 0, 2, 0, "" }, /* para */
				782	{ "tocfront", 0, 0, 0, 2, 0, "" }, /* para */
				783	{ "toclevel1", 0, 0, 0, 9, 0, "" }, /* tocentry */
				784	{ "toclevel2", 0, 0, 0, 9, 0, "" }, /* tocentry */
				785	{ "toclevel3", 0, 0, 0, 9, 0, "" }, /* tocentry */
				786	{ "toclevel4", 0, 0, 0, 9, 0, "" }, /* tocentry */
				787	{ "toclevel5", 0, 0, 0, 9, 0, "" }, /* tocentry */
				788	{ "tocpart", 0, 0, 0, 9, 0, "" }, /* tocentry */
				789	{ "token", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				790	{ "trademark", 0, 0, 0, 1, 0, "" },
				791	{ "type", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				792	{ "ulink", 0, 0, 0, 2, 0, "" }, /* para */
				793	{ "userinput", 0, 0, 0, 9, 0, "" }, /* cptr */
				794	{ "varargs", 0, 2, 1, 0, 0, "" },
				795	{ "variablelist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				796	{ "varlistentry",0, 0, 0, 9, 0, "" }, /* term */
				797	{ "varname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				798	{ "videodata", 0, 2, 1, 0, 0, "" },
				799	{ "videoobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
				800	{ "void", 0, 2, 1, 0, 0, "" },
				801	{ "volumenum", 0, 0, 0, 4, 0, "" }, /* docinfo */
				802	{ "warning", 0, 0, 0, 9, 0, "" }, /* title */
				803	{ "wordasword", 0, 0, 0, 3, 0, "" }, /* word */
				804	{ "xref", 0, 2, 1, 0, 0, "" },
				805	{ "year", 0, 0, 0, 4, 0, "" }, /* docinfo */
				806	};
				807
				808	#if 0
				809	/*
				810	* start tags that imply the end of a current element
				811	* any tag of each line implies the end of the current element if the type of
				812	* that element is in the same line
				813	*/
				814	static const char *docbEquEnd[] = {
				815	"dt", "dd", "li", "option", NULL,
				816	"h1", "h2", "h3", "h4", "h5", "h6", NULL,
				817	"ol", "menu", "dir", "address", "pre", "listing", "xmp", NULL,
				818	NULL
				819	};
				820	#endif
				821
				822	/*
				823	* acording the SGML DTD, HR should be added to the 2nd line above, as it
				824	* is not allowed within a H1, H2, H3, etc. But we should tolerate that case
				825	* because many documents contain rules in headings...
				826	*/
				827
				828	/*
				829	* start tags that imply the end of current element
				830	*/
				831	static const char *docbStartClose[] = {
				832	NULL
				833	};
				834
				835	/*
				836	* The list of SGML elements which are supposed not to have
				837	* CDATA content and where a p element will be implied
				838	*
				839	* TODO: extend that list by reading the SGML SGML DtD on
				840	* implied paragraph
				841	*/
				842	static char *docbNoContentElements[] = {
				843	NULL
				844	};
				845
				846
				847	static const char** docbStartCloseIndex[100];
				848	static int docbStartCloseIndexinitialized = 0;
				849
				850	/************************************************************************
				851	* *
				852	* functions to handle SGML specific data *
				853	* *
				854	************************************************************************/
				855
				856	/**
				857	* docbInitAutoClose:
				858	*
				859	* Initialize the docbStartCloseIndex for fast lookup of closing tags names.
				860	*
				861	*/
				862	static void
				863	docbInitAutoClose(void) {
				864	int indx, i = 0;
				865
				866	if (docbStartCloseIndexinitialized) return;
				867
				868	for (indx = 0;indx < 100;indx ++) docbStartCloseIndex[indx] = NULL;
				869	indx = 0;
				870	while ((docbStartClose[i] != NULL) && (indx < 100 - 1)) {
				871	docbStartCloseIndex[indx++] = &docbStartClose[i];
				872	while (docbStartClose[i] != NULL) i++;
				873	i++;
				874	}
				875	}
				876
				877	/**
				878	* docbTagLookup:
				879	* @tag: The tag name
				880	*
				881	* Lookup the SGML tag in the ElementTable
				882	*
				883	* Returns the related docbElemDescPtr or NULL if not found.
				884	*/
				885	static docbElemDescPtr
				886	docbTagLookup(const xmlChar *tag) {
				887	unsigned int i;
				888
				889	for (i = 0; i < (sizeof(docbookElementTable) /
				890	sizeof(docbookElementTable[0]));i++) {
				891	if (xmlStrEqual(tag, BAD_CAST docbookElementTable[i].name))
				892	return(&docbookElementTable[i]);
				893	}
				894	return(NULL);
				895	}
				896
				897	/**
				898	* docbCheckAutoClose:
				899	* @newtag: The new tag name
				900	* @oldtag: The old tag name
				901	*
				902	* Checks wether the new tag is one of the registered valid tags for closing old.
				903	* Initialize the docbStartCloseIndex for fast lookup of closing tags names.
				904	*
				905	* Returns 0 if no, 1 if yes.
				906	*/
				907	static int
				908	docbCheckAutoClose(const xmlChar newtag, const xmlChar oldtag) {
				909	int i, indx;
				910	const char **closed = NULL;
				911
				912	if (docbStartCloseIndexinitialized == 0) docbInitAutoClose();
				913
				914	/* inefficient, but not a big deal */
				915	for (indx = 0; indx < 100;indx++) {
				916	closed = docbStartCloseIndex[indx];
				917	if (closed == NULL) return(0);
				918	if (xmlStrEqual(BAD_CAST *closed, newtag)) break;
				919	}
				920
				921	i = closed - docbStartClose;
				922	i++;
				923	while (docbStartClose[i] != NULL) {
				924	if (xmlStrEqual(BAD_CAST docbStartClose[i], oldtag)) {
				925	return(1);
				926	}
				927	i++;
				928	}
				929	return(0);
				930	}
				931
				932	/**
				933	* docbAutoCloseOnClose:
				934	* @ctxt: an SGML parser context
				935	* @newtag: The new tag name
				936	*
				937	* The HTmL DtD allows an ending tag to implicitely close other tags.
				938	*/
				939	static void
				940	docbAutoCloseOnClose(docbParserCtxtPtr ctxt, const xmlChar *newtag) {
				941	docbElemDescPtr info;
				942	xmlChar *oldname;
				943	int i;
				944
				945	if ((newtag[0] == '/') && (newtag[1] == 0))
				946	return;
				947
				948	#ifdef DEBUG
				949	xmlGenericError(xmlGenericErrorContext,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr);
				950	for (i = 0;i < ctxt->nameNr;i++)
				951	xmlGenericError(xmlGenericErrorContext,"%d : %s\n", i, ctxt->nameTab[i]);
				952	#endif
				953
				954	for (i = (ctxt->nameNr - 1);i >= 0;i--) {
				955	if (xmlStrEqual(newtag, ctxt->nameTab[i])) break;
				956	}
				957	if (i < 0) return;
				958
				959	while (!xmlStrEqual(newtag, ctxt->name)) {
				960	info = docbTagLookup(ctxt->name);
				961	if ((info == NULL) \|\| (info->endTag == 1)) {
				962	#ifdef DEBUG
				963	xmlGenericError(xmlGenericErrorContext,"docbAutoCloseOnClose: %s closes %s\n", newtag, ctxt->name);
				964	#endif
				965	} else {
				966	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				967	ctxt->sax->error(ctxt->userData,
				968	"Opening and ending tag mismatch: %s and %s\n",
				969	newtag, ctxt->name);
				970	ctxt->wellFormed = 0;
				971	}
				972	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				973	ctxt->sax->endElement(ctxt->userData, ctxt->name);
				974	oldname = docbnamePop(ctxt);
				975	if (oldname != NULL) {
				976	#ifdef DEBUG
				977	xmlGenericError(xmlGenericErrorContext,"docbAutoCloseOnClose: popped %s\n", oldname);
				978	#endif
				979	xmlFree(oldname);
				980	}
				981	}
				982	}
				983
				984	/**
				985	* docbAutoClose:
				986	* @ctxt: an SGML parser context
				987	* @newtag: The new tag name or NULL
				988	*
				989	* The HTmL DtD allows a tag to implicitely close other tags.
				990	* The list is kept in docbStartClose array. This function is
				991	* called when a new tag has been detected and generates the
				992	* appropriates closes if possible/needed.
				993	* If newtag is NULL this mean we are at the end of the resource
				994	* and we should check
				995	*/
				996	static void
				997	docbAutoClose(docbParserCtxtPtr ctxt, const xmlChar *newtag) {
				998	xmlChar *oldname;
				999	while ((newtag != NULL) && (ctxt->name != NULL) &&
				1000	(docbCheckAutoClose(newtag, ctxt->name))) {
				1001	#ifdef DEBUG
				1002	xmlGenericError(xmlGenericErrorContext,"docbAutoClose: %s closes %s\n", newtag, ctxt->name);
				1003	#endif
				1004	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				1005	ctxt->sax->endElement(ctxt->userData, ctxt->name);
				1006	oldname = docbnamePop(ctxt);
				1007	if (oldname != NULL) {
				1008	#ifdef DEBUG
				1009	xmlGenericError(xmlGenericErrorContext,"docbAutoClose: popped %s\n", oldname);
				1010	#endif
				1011	xmlFree(oldname);
				1012	}
				1013	}
				1014	}
				1015
				1016	/**
				1017	* docbAutoCloseTag:
				1018	* @doc: the SGML document
				1019	* @name: The tag name
				1020	* @elem: the SGML element
				1021	*
				1022	* The HTmL DtD allows a tag to implicitely close other tags.
				1023	* The list is kept in docbStartClose array. This function checks
				1024	* if the element or one of it's children would autoclose the
				1025	* given tag.
				1026	*
				1027	* Returns 1 if autoclose, 0 otherwise
				1028	*/
				1029	static int
				1030	docbAutoCloseTag(docbDocPtr doc, const xmlChar *name, docbNodePtr elem) {
				1031	docbNodePtr child;
				1032
				1033	if (elem == NULL) return(1);
				1034	if (xmlStrEqual(name, elem->name)) return(0);
				1035	if (docbCheckAutoClose(elem->name, name)) return(1);
				1036	child = elem->children;
				1037	while (child != NULL) {
				1038	if (docbAutoCloseTag(doc, name, child)) return(1);
				1039	child = child->next;
				1040	}
				1041	return(0);
				1042	}
				1043
				1044	#if 0
				1045	/**
				1046	* docbIsAutoClosed:
				1047	* @doc: the SGML document
				1048	* @elem: the SGML element
				1049	*
				1050	* The list is kept in docbStartClose array. This function checks
				1051	* if a tag is autoclosed by one of it's child
				1052	*
				1053	* Returns 1 if autoclosed, 0 otherwise
				1054	*/
				1055	static int
				1056	docbIsAutoClosed(docbDocPtr doc, docbNodePtr elem) {
				1057	docbNodePtr child;
				1058
				1059	if (elem == NULL) return(1);
				1060	child = elem->children;
				1061	while (child != NULL) {
				1062	if (docbAutoCloseTag(doc, elem->name, child)) return(1);
				1063	child = child->next;
				1064	}
				1065	return(0);
				1066	}
				1067	#endif
				1068
				1069	/**
				1070	* docbCheckParagraph
				1071	* @ctxt: an SGML parser context
				1072	*
				1073	* Check whether a p element need to be implied before inserting
				1074	* characters in the current element.
				1075	*
				1076	* Returns 1 if a paragraph has been inserted, 0 if not and -1
				1077	* in case of error.
				1078	*/
				1079
				1080	static int
				1081	docbCheckParagraph(docbParserCtxtPtr ctxt) {
				1082	const xmlChar *tag;
				1083	int i;
				1084
				1085	if (ctxt == NULL)
				1086	return(-1);
				1087	tag = ctxt->name;
				1088	if (tag == NULL) {
				1089	docbAutoClose(ctxt, BAD_CAST"p");
				1090	docbnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
				1091	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
				1092	ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
				1093	return(1);
				1094	}
				1095	for (i = 0; docbNoContentElements[i] != NULL; i++) {
				1096	if (xmlStrEqual(tag, BAD_CAST docbNoContentElements[i])) {
				1097	#ifdef DEBUG
				1098	xmlGenericError(xmlGenericErrorContext,"Implied element paragraph\n");
				1099	#endif
				1100	docbAutoClose(ctxt, BAD_CAST"p");
				1101	docbnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
				1102	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
				1103	ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
				1104	return(1);
				1105	}
				1106	}
				1107	return(0);
				1108	}
				1109
				1110	/************************************************************************
				1111	* *
				1112	* The list of SGML predefined entities *
				1113	* *
				1114	************************************************************************/
				1115
				1116
				1117	static docbEntityDesc
				1118	docbookEntitiesTable[] = {
				1119	/*
				1120	* the 4 absolute ones, plus apostrophe.
				1121	*/
				1122	{ 0x0026, "amp", "AMPERSAND" },
				1123	{ 0x003C, "lt", "LESS-THAN SIGN" },
				1124
				1125	/*
				1126	* Converted with VI macros from docbook ent files
				1127	*/
				1128	{ 0x0021, "excl", "EXCLAMATION MARK" },
				1129	{ 0x0022, "quot", "QUOTATION MARK" },
				1130	{ 0x0023, "num", "NUMBER SIGN" },
				1131	{ 0x0024, "dollar", "DOLLAR SIGN" },
				1132	{ 0x0025, "percnt", "PERCENT SIGN" },
				1133	{ 0x0027, "apos", "APOSTROPHE" },
				1134	{ 0x0028, "lpar", "LEFT PARENTHESIS" },
				1135	{ 0x0029, "rpar", "RIGHT PARENTHESIS" },
				1136	{ 0x002A, "ast", "ASTERISK OPERATOR" },
				1137	{ 0x002B, "plus", "PLUS SIGN" },
				1138	{ 0x002C, "comma", "COMMA" },
				1139	{ 0x002D, "hyphen", "HYPHEN-MINUS" },
				1140	{ 0x002E, "period", "FULL STOP" },
				1141	{ 0x002F, "sol", "SOLIDUS" },
				1142	{ 0x003A, "colon", "COLON" },
				1143	{ 0x003B, "semi", "SEMICOLON" },
				1144	{ 0x003D, "equals", "EQUALS SIGN" },
				1145	{ 0x003E, "gt", "GREATER-THAN SIGN" },
				1146	{ 0x003F, "quest", "QUESTION MARK" },
				1147	{ 0x0040, "commat", "COMMERCIAL AT" },
				1148	{ 0x005B, "lsqb", "LEFT SQUARE BRACKET" },
				1149	{ 0x005C, "bsol", "REVERSE SOLIDUS" },
				1150	{ 0x005D, "rsqb", "RIGHT SQUARE BRACKET" },
				1151	{ 0x005E, "circ", "RING OPERATOR" },
				1152	{ 0x005F, "lowbar", "LOW LINE" },
				1153	{ 0x0060, "grave", "GRAVE ACCENT" },
				1154	{ 0x007B, "lcub", "LEFT CURLY BRACKET" },
				1155	{ 0x007C, "verbar", "VERTICAL LINE" },
				1156	{ 0x007D, "rcub", "RIGHT CURLY BRACKET" },
				1157	{ 0x00A0, "nbsp", "NO-BREAK SPACE" },
				1158	{ 0x00A1, "iexcl", "INVERTED EXCLAMATION MARK" },
				1159	{ 0x00A2, "cent", "CENT SIGN" },
				1160	{ 0x00A3, "pound", "POUND SIGN" },
				1161	{ 0x00A4, "curren", "CURRENCY SIGN" },
				1162	{ 0x00A5, "yen", "YEN SIGN" },
				1163	{ 0x00A6, "brvbar", "BROKEN BAR" },
				1164	{ 0x00A7, "sect", "SECTION SIGN" },
				1165	{ 0x00A8, "die", "" },
				1166	{ 0x00A8, "Dot", "" },
				1167	{ 0x00A8, "uml", "" },
				1168	{ 0x00A9, "copy", "COPYRIGHT SIGN" },
				1169	{ 0x00AA, "ordf", "FEMININE ORDINAL INDICATOR" },
				1170	{ 0x00AB, "laquo", "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK" },
				1171	{ 0x00AC, "not", "NOT SIGN" },
				1172	{ 0x00AD, "shy", "SOFT HYPHEN" },
				1173	{ 0x00AE, "reg", "REG TRADE MARK SIGN" },
				1174	{ 0x00AF, "macr", "MACRON" },
				1175	{ 0x00B0, "deg", "DEGREE SIGN" },
				1176	{ 0x00B1, "plusmn", "PLUS-MINUS SIGN" },
				1177	{ 0x00B2, "sup2", "SUPERSCRIPT TWO" },
				1178	{ 0x00B3, "sup3", "SUPERSCRIPT THREE" },
				1179	{ 0x00B4, "acute", "ACUTE ACCENT" },
				1180	{ 0x00B5, "micro", "MICRO SIGN" },
				1181	{ 0x00B6, "para", "PILCROW SIGN" },
				1182	{ 0x00B7, "middot", "MIDDLE DOT" },
				1183	{ 0x00B8, "cedil", "CEDILLA" },
				1184	{ 0x00B9, "sup1", "SUPERSCRIPT ONE" },
				1185	{ 0x00BA, "ordm", "MASCULINE ORDINAL INDICATOR" },
				1186	{ 0x00BB, "raquo", "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK" },
				1187	{ 0x00BC, "frac14", "VULGAR FRACTION ONE QUARTER" },
				1188	{ 0x00BD, "frac12", "VULGAR FRACTION ONE HALF" },
				1189	{ 0x00BD, "half", "VULGAR FRACTION ONE HALF" },
				1190	{ 0x00BE, "frac34", "VULGAR FRACTION THREE QUARTERS" },
				1191	{ 0x00BF, "iquest", "INVERTED QUESTION MARK" },
				1192	{ 0x00C0, "Agrave", "LATIN CAPITAL LETTER A WITH GRAVE" },
				1193	{ 0x00C1, "Aacute", "LATIN CAPITAL LETTER A WITH ACUTE" },
				1194	{ 0x00C2, "Acirc", "LATIN CAPITAL LETTER A WITH CIRCUMFLEX" },
				1195	{ 0x00C3, "Atilde", "LATIN CAPITAL LETTER A WITH TILDE" },
				1196	{ 0x00C4, "Auml", "LATIN CAPITAL LETTER A WITH DIAERESIS" },
				1197	{ 0x00C5, "Aring", "LATIN CAPITAL LETTER A WITH RING ABOVE" },
				1198	{ 0x00C6, "AElig", "LATIN CAPITAL LETTER AE" },
				1199	{ 0x00C7, "Ccedil", "LATIN CAPITAL LETTER C WITH CEDILLA" },
				1200	{ 0x00C8, "Egrave", "LATIN CAPITAL LETTER E WITH GRAVE" },
				1201	{ 0x00C9, "Eacute", "LATIN CAPITAL LETTER E WITH ACUTE" },
				1202	{ 0x00CA, "Ecirc", "LATIN CAPITAL LETTER E WITH CIRCUMFLEX" },
				1203	{ 0x00CB, "Euml", "LATIN CAPITAL LETTER E WITH DIAERESIS" },
				1204	{ 0x00CC, "Igrave", "LATIN CAPITAL LETTER I WITH GRAVE" },
				1205	{ 0x00CD, "Iacute", "LATIN CAPITAL LETTER I WITH ACUTE" },
				1206	{ 0x00CE, "Icirc", "LATIN CAPITAL LETTER I WITH CIRCUMFLEX" },
				1207	{ 0x00CF, "Iuml", "LATIN CAPITAL LETTER I WITH DIAERESIS" },
				1208	{ 0x00D0, "ETH", "LATIN CAPITAL LETTER ETH" },
				1209	{ 0x00D1, "Ntilde", "LATIN CAPITAL LETTER N WITH TILDE" },
				1210	{ 0x00D2, "Ograve", "LATIN CAPITAL LETTER O WITH GRAVE" },
				1211	{ 0x00D3, "Oacute", "LATIN CAPITAL LETTER O WITH ACUTE" },
				1212	{ 0x00D4, "Ocirc", "LATIN CAPITAL LETTER O WITH CIRCUMFLEX" },
				1213	{ 0x00D5, "Otilde", "LATIN CAPITAL LETTER O WITH TILDE" },
				1214	{ 0x00D6, "Ouml", "LATIN CAPITAL LETTER O WITH DIAERESIS" },
				1215	{ 0x00D7, "times", "MULTIPLICATION SIGN" },
				1216	{ 0x00D8, "Oslash", "LATIN CAPITAL LETTER O WITH STROKE" },
				1217	{ 0x00D9, "Ugrave", "LATIN CAPITAL LETTER U WITH GRAVE" },
				1218	{ 0x00DA, "Uacute", "LATIN CAPITAL LETTER U WITH ACUTE" },
				1219	{ 0x00DB, "Ucirc", "LATIN CAPITAL LETTER U WITH CIRCUMFLEX" },
				1220	{ 0x00DC, "Uuml", "LATIN CAPITAL LETTER U WITH DIAERESIS" },
				1221	{ 0x00DD, "Yacute", "LATIN CAPITAL LETTER Y WITH ACUTE" },
				1222	{ 0x00DE, "THORN", "LATIN CAPITAL LETTER THORN" },
				1223	{ 0x00DF, "szlig", "LATIN SMALL LETTER SHARP S" },
				1224	{ 0x00E0, "agrave", "LATIN SMALL LETTER A WITH GRAVE" },
				1225	{ 0x00E1, "aacute", "LATIN SMALL LETTER A WITH ACUTE" },
				1226	{ 0x00E2, "acirc", "LATIN SMALL LETTER A WITH CIRCUMFLEX" },
				1227	{ 0x00E3, "atilde", "LATIN SMALL LETTER A WITH TILDE" },
				1228	{ 0x00E4, "auml", "LATIN SMALL LETTER A WITH DIAERESIS" },
				1229	{ 0x00E5, "aring", "LATIN SMALL LETTER A WITH RING ABOVE" },
				1230	{ 0x00E6, "aelig", "LATIN SMALL LETTER AE" },
				1231	{ 0x00E7, "ccedil", "LATIN SMALL LETTER C WITH CEDILLA" },
				1232	{ 0x00E8, "egrave", "LATIN SMALL LETTER E WITH GRAVE" },
				1233	{ 0x00E9, "eacute", "LATIN SMALL LETTER E WITH ACUTE" },
				1234	{ 0x00EA, "ecirc", "LATIN SMALL LETTER E WITH CIRCUMFLEX" },
				1235	{ 0x00EB, "euml", "LATIN SMALL LETTER E WITH DIAERESIS" },
				1236	{ 0x00EC, "igrave", "LATIN SMALL LETTER I WITH GRAVE" },
				1237	{ 0x00ED, "iacute", "LATIN SMALL LETTER I WITH ACUTE" },
				1238	{ 0x00EE, "icirc", "LATIN SMALL LETTER I WITH CIRCUMFLEX" },
				1239	{ 0x00EF, "iuml", "LATIN SMALL LETTER I WITH DIAERESIS" },
				1240	{ 0x00F0, "eth", "LATIN SMALL LETTER ETH" },
				1241	{ 0x00F1, "ntilde", "LATIN SMALL LETTER N WITH TILDE" },
				1242	{ 0x00F2, "ograve", "LATIN SMALL LETTER O WITH GRAVE" },
				1243	{ 0x00F3, "oacute", "LATIN SMALL LETTER O WITH ACUTE" },
				1244	{ 0x00F4, "ocirc", "LATIN SMALL LETTER O WITH CIRCUMFLEX" },
				1245	{ 0x00F5, "otilde", "LATIN SMALL LETTER O WITH TILDE" },
				1246	{ 0x00F6, "ouml", "LATIN SMALL LETTER O WITH DIAERESIS" },
				1247	{ 0x00F7, "divide", "DIVISION SIGN" },
				1248	{ 0x00F8, "oslash", "CIRCLED DIVISION SLASH" },
				1249	{ 0x00F9, "ugrave", "LATIN SMALL LETTER U WITH GRAVE" },
				1250	{ 0x00FA, "uacute", "LATIN SMALL LETTER U WITH ACUTE" },
				1251	{ 0x00FB, "ucirc", "LATIN SMALL LETTER U WITH CIRCUMFLEX" },
				1252	{ 0x00FC, "uuml", "LATIN SMALL LETTER U WITH DIAERESIS" },
				1253	{ 0x00FD, "yacute", "LATIN SMALL LETTER Y WITH ACUTE" },
				1254	{ 0x00FE, "thorn", "LATIN SMALL LETTER THORN" },
				1255	{ 0x00FF, "yuml", "LATIN SMALL LETTER Y WITH DIAERESIS" },
				1256	{ 0x0100, "Amacr", "LATIN CAPITAL LETTER A WITH MACRON" },
				1257	{ 0x0101, "amacr", "LATIN SMALL LETTER A WITH MACRON" },
				1258	{ 0x0102, "Abreve", "LATIN CAPITAL LETTER A WITH BREVE" },
				1259	{ 0x0103, "abreve", "LATIN SMALL LETTER A WITH BREVE" },
				1260	{ 0x0104, "Aogon", "LATIN CAPITAL LETTER A WITH OGONEK" },
				1261	{ 0x0105, "aogon", "LATIN SMALL LETTER A WITH OGONEK" },
				1262	{ 0x0106, "Cacute", "LATIN CAPITAL LETTER C WITH ACUTE" },
				1263	{ 0x0107, "cacute", "LATIN SMALL LETTER C WITH ACUTE" },
				1264	{ 0x0108, "Ccirc", "LATIN CAPITAL LETTER C WITH CIRCUMFLEX" },
				1265	{ 0x0109, "ccirc", "LATIN SMALL LETTER C WITH CIRCUMFLEX" },
				1266	{ 0x010A, "Cdot", "LATIN CAPITAL LETTER C WITH DOT ABOVE" },
				1267	{ 0x010B, "cdot", "DOT OPERATOR" },
				1268	{ 0x010C, "Ccaron", "LATIN CAPITAL LETTER C WITH CARON" },
				1269	{ 0x010D, "ccaron", "LATIN SMALL LETTER C WITH CARON" },
				1270	{ 0x010E, "Dcaron", "LATIN CAPITAL LETTER D WITH CARON" },
				1271	{ 0x010F, "dcaron", "LATIN SMALL LETTER D WITH CARON" },
				1272	{ 0x0110, "Dstrok", "LATIN CAPITAL LETTER D WITH STROKE" },
				1273	{ 0x0111, "dstrok", "LATIN SMALL LETTER D WITH STROKE" },
				1274	{ 0x0112, "Emacr", "LATIN CAPITAL LETTER E WITH MACRON" },
				1275	{ 0x0113, "emacr", "LATIN SMALL LETTER E WITH MACRON" },
				1276	{ 0x0116, "Edot", "LATIN CAPITAL LETTER E WITH DOT ABOVE" },
				1277	{ 0x0117, "edot", "LATIN SMALL LETTER E WITH DOT ABOVE" },
				1278	{ 0x0118, "Eogon", "LATIN CAPITAL LETTER E WITH OGONEK" },
				1279	{ 0x0119, "eogon", "LATIN SMALL LETTER E WITH OGONEK" },
				1280	{ 0x011A, "Ecaron", "LATIN CAPITAL LETTER E WITH CARON" },
				1281	{ 0x011B, "ecaron", "LATIN SMALL LETTER E WITH CARON" },
				1282	{ 0x011C, "Gcirc", "LATIN CAPITAL LETTER G WITH CIRCUMFLEX" },
				1283	{ 0x011D, "gcirc", "LATIN SMALL LETTER G WITH CIRCUMFLEX" },
				1284	{ 0x011E, "Gbreve", "LATIN CAPITAL LETTER G WITH BREVE" },
				1285	{ 0x011F, "gbreve", "LATIN SMALL LETTER G WITH BREVE" },
				1286	{ 0x0120, "Gdot", "LATIN CAPITAL LETTER G WITH DOT ABOVE" },
				1287	{ 0x0121, "gdot", "LATIN SMALL LETTER G WITH DOT ABOVE" },
				1288	{ 0x0122, "Gcedil", "LATIN CAPITAL LETTER G WITH CEDILLA" },
				1289	{ 0x0124, "Hcirc", "LATIN CAPITAL LETTER H WITH CIRCUMFLEX" },
				1290	{ 0x0125, "hcirc", "LATIN SMALL LETTER H WITH CIRCUMFLEX" },
				1291	{ 0x0126, "Hstrok", "LATIN CAPITAL LETTER H WITH STROKE" },
				1292	{ 0x0127, "hstrok", "LATIN SMALL LETTER H WITH STROKE" },
				1293	{ 0x0128, "Itilde", "LATIN CAPITAL LETTER I WITH TILDE" },
				1294	{ 0x0129, "itilde", "LATIN SMALL LETTER I WITH TILDE" },
				1295	{ 0x012A, "Imacr", "LATIN CAPITAL LETTER I WITH MACRON" },
				1296	{ 0x012B, "imacr", "LATIN SMALL LETTER I WITH MACRON" },
				1297	{ 0x012E, "Iogon", "LATIN CAPITAL LETTER I WITH OGONEK" },
				1298	{ 0x012F, "iogon", "LATIN SMALL LETTER I WITH OGONEK" },
				1299	{ 0x0130, "Idot", "LATIN CAPITAL LETTER I WITH DOT ABOVE" },
				1300	{ 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
				1301	{ 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
				1302	{ 0x0132, "IJlig", "LATIN CAPITAL LIGATURE IJ" },
				1303	{ 0x0133, "ijlig", "LATIN SMALL LIGATURE IJ" },
				1304	{ 0x0134, "Jcirc", "LATIN CAPITAL LETTER J WITH CIRCUMFLEX" },
				1305	{ 0x0135, "jcirc", "LATIN SMALL LETTER J WITH CIRCUMFLEX" },
				1306	{ 0x0136, "Kcedil", "LATIN CAPITAL LETTER K WITH CEDILLA" },
				1307	{ 0x0137, "kcedil", "LATIN SMALL LETTER K WITH CEDILLA" },
				1308	{ 0x0138, "kgreen", "LATIN SMALL LETTER KRA" },
				1309	{ 0x0139, "Lacute", "LATIN CAPITAL LETTER L WITH ACUTE" },
				1310	{ 0x013A, "lacute", "LATIN SMALL LETTER L WITH ACUTE" },
				1311	{ 0x013B, "Lcedil", "LATIN CAPITAL LETTER L WITH CEDILLA" },
				1312	{ 0x013C, "lcedil", "LATIN SMALL LETTER L WITH CEDILLA" },
				1313	{ 0x013D, "Lcaron", "LATIN CAPITAL LETTER L WITH CARON" },
				1314	{ 0x013E, "lcaron", "LATIN SMALL LETTER L WITH CARON" },
				1315	{ 0x013F, "Lmidot", "LATIN CAPITAL LETTER L WITH MIDDLE DOT" },
				1316	{ 0x0140, "lmidot", "LATIN SMALL LETTER L WITH MIDDLE DOT" },
				1317	{ 0x0141, "Lstrok", "LATIN CAPITAL LETTER L WITH STROKE" },
				1318	{ 0x0142, "lstrok", "LATIN SMALL LETTER L WITH STROKE" },
				1319	{ 0x0143, "Nacute", "LATIN CAPITAL LETTER N WITH ACUTE" },
				1320	{ 0x0144, "nacute", "LATIN SMALL LETTER N WITH ACUTE" },
				1321	{ 0x0145, "Ncedil", "LATIN CAPITAL LETTER N WITH CEDILLA" },
				1322	{ 0x0146, "ncedil", "LATIN SMALL LETTER N WITH CEDILLA" },
				1323	{ 0x0147, "Ncaron", "LATIN CAPITAL LETTER N WITH CARON" },
				1324	{ 0x0148, "ncaron", "LATIN SMALL LETTER N WITH CARON" },
				1325	{ 0x0149, "napos", "LATIN SMALL LETTER N PRECEDED BY APOSTROPHE" },
				1326	{ 0x014A, "ENG", "LATIN CAPITAL LETTER ENG" },
				1327	{ 0x014B, "eng", "LATIN SMALL LETTER ENG" },
				1328	{ 0x014C, "Omacr", "LATIN CAPITAL LETTER O WITH MACRON" },
				1329	{ 0x014D, "omacr", "LATIN SMALL LETTER O WITH MACRON" },
				1330	{ 0x0150, "Odblac", "LATIN CAPITAL LETTER O WITH DOUBLE ACUTE" },
				1331	{ 0x0151, "odblac", "LATIN SMALL LETTER O WITH DOUBLE ACUTE" },
				1332	{ 0x0152, "OElig", "LATIN CAPITAL LIGATURE OE" },
				1333	{ 0x0153, "oelig", "LATIN SMALL LIGATURE OE" },
				1334	{ 0x0154, "Racute", "LATIN CAPITAL LETTER R WITH ACUTE" },
				1335	{ 0x0155, "racute", "LATIN SMALL LETTER R WITH ACUTE" },
				1336	{ 0x0156, "Rcedil", "LATIN CAPITAL LETTER R WITH CEDILLA" },
				1337	{ 0x0157, "rcedil", "LATIN SMALL LETTER R WITH CEDILLA" },
				1338	{ 0x0158, "Rcaron", "LATIN CAPITAL LETTER R WITH CARON" },
				1339	{ 0x0159, "rcaron", "LATIN SMALL LETTER R WITH CARON" },
				1340	{ 0x015A, "Sacute", "LATIN CAPITAL LETTER S WITH ACUTE" },
				1341	{ 0x015B, "sacute", "LATIN SMALL LETTER S WITH ACUTE" },
				1342	{ 0x015C, "Scirc", "LATIN CAPITAL LETTER S WITH CIRCUMFLEX" },
				1343	{ 0x015D, "scirc", "LATIN SMALL LETTER S WITH CIRCUMFLEX" },
				1344	{ 0x015E, "Scedil", "LATIN CAPITAL LETTER S WITH CEDILLA" },
				1345	{ 0x015F, "scedil", "LATIN SMALL LETTER S WITH CEDILLA" },
				1346	{ 0x0160, "Scaron", "LATIN CAPITAL LETTER S WITH CARON" },
				1347	{ 0x0161, "scaron", "LATIN SMALL LETTER S WITH CARON" },
				1348	{ 0x0162, "Tcedil", "LATIN CAPITAL LETTER T WITH CEDILLA" },
				1349	{ 0x0163, "tcedil", "LATIN SMALL LETTER T WITH CEDILLA" },
				1350	{ 0x0164, "Tcaron", "LATIN CAPITAL LETTER T WITH CARON" },
				1351	{ 0x0165, "tcaron", "LATIN SMALL LETTER T WITH CARON" },
				1352	{ 0x0166, "Tstrok", "LATIN CAPITAL LETTER T WITH STROKE" },
				1353	{ 0x0167, "tstrok", "LATIN SMALL LETTER T WITH STROKE" },
				1354	{ 0x0168, "Utilde", "LATIN CAPITAL LETTER U WITH TILDE" },
				1355	{ 0x0169, "utilde", "LATIN SMALL LETTER U WITH TILDE" },
				1356	{ 0x016A, "Umacr", "LATIN CAPITAL LETTER U WITH MACRON" },
				1357	{ 0x016B, "umacr", "LATIN SMALL LETTER U WITH MACRON" },
				1358	{ 0x016C, "Ubreve", "LATIN CAPITAL LETTER U WITH BREVE" },
				1359	{ 0x016D, "ubreve", "LATIN SMALL LETTER U WITH BREVE" },
				1360	{ 0x016E, "Uring", "LATIN CAPITAL LETTER U WITH RING ABOVE" },
				1361	{ 0x016F, "uring", "LATIN SMALL LETTER U WITH RING ABOVE" },
				1362	{ 0x0170, "Udblac", "LATIN CAPITAL LETTER U WITH DOUBLE ACUTE" },
				1363	{ 0x0171, "udblac", "LATIN SMALL LETTER U WITH DOUBLE ACUTE" },
				1364	{ 0x0172, "Uogon", "LATIN CAPITAL LETTER U WITH OGONEK" },
				1365	{ 0x0173, "uogon", "LATIN SMALL LETTER U WITH OGONEK" },
				1366	{ 0x0174, "Wcirc", "LATIN CAPITAL LETTER W WITH CIRCUMFLEX" },
				1367	{ 0x0175, "wcirc", "LATIN SMALL LETTER W WITH CIRCUMFLEX" },
				1368	{ 0x0176, "Ycirc", "LATIN CAPITAL LETTER Y WITH CIRCUMFLEX" },
				1369	{ 0x0177, "ycirc", "LATIN SMALL LETTER Y WITH CIRCUMFLEX" },
				1370	{ 0x0178, "Yuml", "LATIN CAPITAL LETTER Y WITH DIAERESIS" },
				1371	{ 0x0179, "Zacute", "LATIN CAPITAL LETTER Z WITH ACUTE" },
				1372	{ 0x017A, "zacute", "LATIN SMALL LETTER Z WITH ACUTE" },
				1373	{ 0x017B, "Zdot", "LATIN CAPITAL LETTER Z WITH DOT ABOVE" },
				1374	{ 0x017C, "zdot", "LATIN SMALL LETTER Z WITH DOT ABOVE" },
				1375	{ 0x017D, "Zcaron", "LATIN CAPITAL LETTER Z WITH CARON" },
				1376	{ 0x017E, "zcaron", "LATIN SMALL LETTER Z WITH CARON" },
				1377	{ 0x0192, "fnof", "LATIN SMALL LETTER F WITH HOOK" },
				1378	{ 0x01F5, "gacute", "LATIN SMALL LETTER G WITH ACUTE" },
				1379	{ 0x02C7, "caron", "CARON" },
				1380	{ 0x02D8, "breve", "BREVE" },
				1381	{ 0x02D9, "dot", "DOT ABOVE" },
				1382	{ 0x02DA, "ring", "RING ABOVE" },
				1383	{ 0x02DB, "ogon", "OGONEK" },
				1384	{ 0x02DC, "tilde", "TILDE" },
				1385	{ 0x02DD, "dblac", "DOUBLE ACUTE ACCENT" },
				1386	{ 0x0386, "Aacgr", "GREEK CAPITAL LETTER ALPHA WITH TONOS" },
				1387	{ 0x0388, "Eacgr", "GREEK CAPITAL LETTER EPSILON WITH TONOS" },
				1388	{ 0x0389, "EEacgr", "GREEK CAPITAL LETTER ETA WITH TONOS" },
				1389	{ 0x038A, "Iacgr", "GREEK CAPITAL LETTER IOTA WITH TONOS" },
				1390	{ 0x038C, "Oacgr", "GREEK CAPITAL LETTER OMICRON WITH TONOS" },
				1391	{ 0x038E, "Uacgr", "GREEK CAPITAL LETTER UPSILON WITH TONOS" },
				1392	{ 0x038F, "OHacgr", "GREEK CAPITAL LETTER OMEGA WITH TONOS" },
				1393	{ 0x0390, "idiagr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS" },
				1394	{ 0x0391, "Agr", "GREEK CAPITAL LETTER ALPHA" },
				1395	{ 0x0392, "Bgr", "GREEK CAPITAL LETTER BETA" },
				1396	{ 0x0393, "b.Gamma", "GREEK CAPITAL LETTER GAMMA" },
				1397	{ 0x0393, "Gamma", "GREEK CAPITAL LETTER GAMMA" },
				1398	{ 0x0393, "Ggr", "GREEK CAPITAL LETTER GAMMA" },
				1399	{ 0x0394, "b.Delta", "GREEK CAPITAL LETTER DELTA" },
				1400	{ 0x0394, "Delta", "GREEK CAPITAL LETTER DELTA" },
				1401	{ 0x0394, "Dgr", "GREEK CAPITAL LETTER DELTA" },
				1402	{ 0x0395, "Egr", "GREEK CAPITAL LETTER EPSILON" },
				1403	{ 0x0396, "Zgr", "GREEK CAPITAL LETTER ZETA" },
				1404	{ 0x0397, "EEgr", "GREEK CAPITAL LETTER ETA" },
				1405	{ 0x0398, "b.Theta", "GREEK CAPITAL LETTER THETA" },
				1406	{ 0x0398, "Theta", "GREEK CAPITAL LETTER THETA" },
				1407	{ 0x0398, "THgr", "GREEK CAPITAL LETTER THETA" },
				1408	{ 0x0399, "Igr", "GREEK CAPITAL LETTER IOTA" },
				1409	{ 0x039A, "Kgr", "GREEK CAPITAL LETTER KAPPA" },
				1410	{ 0x039B, "b.Lambda", "GREEK CAPITAL LETTER LAMDA" },
				1411	{ 0x039B, "Lambda", "GREEK CAPITAL LETTER LAMDA" },
				1412	{ 0x039B, "Lgr", "GREEK CAPITAL LETTER LAMDA" },
				1413	{ 0x039C, "Mgr", "GREEK CAPITAL LETTER MU" },
				1414	{ 0x039D, "Ngr", "GREEK CAPITAL LETTER NU" },
				1415	{ 0x039E, "b.Xi", "GREEK CAPITAL LETTER XI" },
				1416	{ 0x039E, "Xgr", "GREEK CAPITAL LETTER XI" },
				1417	{ 0x039E, "Xi", "GREEK CAPITAL LETTER XI" },
				1418	{ 0x039F, "Ogr", "GREEK CAPITAL LETTER OMICRON" },
				1419	{ 0x03A0, "b.Pi", "GREEK CAPITAL LETTER PI" },
				1420	{ 0x03A0, "Pgr", "GREEK CAPITAL LETTER PI" },
				1421	{ 0x03A0, "Pi", "GREEK CAPITAL LETTER PI" },
				1422	{ 0x03A1, "Rgr", "GREEK CAPITAL LETTER RHO" },
				1423	{ 0x03A3, "b.Sigma", "GREEK CAPITAL LETTER SIGMA" },
				1424	{ 0x03A3, "Sgr", "GREEK CAPITAL LETTER SIGMA" },
				1425	{ 0x03A3, "Sigma", "GREEK CAPITAL LETTER SIGMA" },
				1426	{ 0x03A4, "Tgr", "GREEK CAPITAL LETTER TAU" },
				1427	{ 0x03A5, "Ugr", "" },
				1428	{ 0x03A6, "b.Phi", "GREEK CAPITAL LETTER PHI" },
				1429	{ 0x03A6, "PHgr", "GREEK CAPITAL LETTER PHI" },
				1430	{ 0x03A6, "Phi", "GREEK CAPITAL LETTER PHI" },
				1431	{ 0x03A7, "KHgr", "GREEK CAPITAL LETTER CHI" },
				1432	{ 0x03A8, "b.Psi", "GREEK CAPITAL LETTER PSI" },
				1433	{ 0x03A8, "PSgr", "GREEK CAPITAL LETTER PSI" },
				1434	{ 0x03A8, "Psi", "GREEK CAPITAL LETTER PSI" },
				1435	{ 0x03A9, "b.Omega", "GREEK CAPITAL LETTER OMEGA" },
				1436	{ 0x03A9, "OHgr", "GREEK CAPITAL LETTER OMEGA" },
				1437	{ 0x03A9, "Omega", "GREEK CAPITAL LETTER OMEGA" },
				1438	{ 0x03AA, "Idigr", "GREEK CAPITAL LETTER IOTA WITH DIALYTIKA" },
				1439	{ 0x03AB, "Udigr", "GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA" },
				1440	{ 0x03AC, "aacgr", "GREEK SMALL LETTER ALPHA WITH TONOS" },
				1441	{ 0x03AD, "eacgr", "GREEK SMALL LETTER EPSILON WITH TONOS" },
				1442	{ 0x03AE, "eeacgr", "GREEK SMALL LETTER ETA WITH TONOS" },
				1443	{ 0x03AF, "iacgr", "GREEK SMALL LETTER IOTA WITH TONOS" },
				1444	{ 0x03B0, "udiagr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS" },
				1445	{ 0x03B1, "agr", "" },
				1446	{ 0x03B1, "alpha", "" },
				1447	{ 0x03B1, "b.alpha", "" },
				1448	{ 0x03B2, "b.beta", "GREEK SMALL LETTER BETA" },
				1449	{ 0x03B2, "beta", "GREEK SMALL LETTER BETA" },
				1450	{ 0x03B2, "bgr", "GREEK SMALL LETTER BETA" },
				1451	{ 0x03B3, "b.gamma", "GREEK SMALL LETTER GAMMA" },
				1452	{ 0x03B3, "gamma", "GREEK SMALL LETTER GAMMA" },
				1453	{ 0x03B3, "ggr", "GREEK SMALL LETTER GAMMA" },
				1454	{ 0x03B4, "b.delta", "GREEK SMALL LETTER DELTA" },
				1455	{ 0x03B4, "delta", "GREEK SMALL LETTER DELTA" },
				1456	{ 0x03B4, "dgr", "GREEK SMALL LETTER DELTA" },
				1457	{ 0x03B5, "b.epsi", "" },
				1458	{ 0x03B5, "b.epsis", "" },
				1459	{ 0x03B5, "b.epsiv", "" },
				1460	{ 0x03B5, "egr", "" },
				1461	{ 0x03B5, "epsiv", "" },
				1462	{ 0x03B6, "b.zeta", "GREEK SMALL LETTER ZETA" },
				1463	{ 0x03B6, "zeta", "GREEK SMALL LETTER ZETA" },
				1464	{ 0x03B6, "zgr", "GREEK SMALL LETTER ZETA" },
				1465	{ 0x03B7, "b.eta", "GREEK SMALL LETTER ETA" },
				1466	{ 0x03B7, "eegr", "GREEK SMALL LETTER ETA" },
				1467	{ 0x03B7, "eta", "GREEK SMALL LETTER ETA" },
				1468	{ 0x03B8, "b.thetas", "" },
				1469	{ 0x03B8, "thetas", "" },
				1470	{ 0x03B8, "thgr", "" },
				1471	{ 0x03B9, "b.iota", "GREEK SMALL LETTER IOTA" },
				1472	{ 0x03B9, "igr", "GREEK SMALL LETTER IOTA" },
				1473	{ 0x03B9, "iota", "GREEK SMALL LETTER IOTA" },
				1474	{ 0x03BA, "b.kappa", "GREEK SMALL LETTER KAPPA" },
				1475	{ 0x03BA, "kappa", "GREEK SMALL LETTER KAPPA" },
				1476	{ 0x03BA, "kgr", "GREEK SMALL LETTER KAPPA" },
				1477	{ 0x03BB, "b.lambda", "GREEK SMALL LETTER LAMDA" },
				1478	{ 0x03BB, "lambda", "GREEK SMALL LETTER LAMDA" },
				1479	{ 0x03BB, "lgr", "GREEK SMALL LETTER LAMDA" },
				1480	{ 0x03BC, "b.mu", "GREEK SMALL LETTER MU" },
				1481	{ 0x03BC, "mgr", "GREEK SMALL LETTER MU" },
				1482	{ 0x03BC, "mu", "GREEK SMALL LETTER MU" },
				1483	{ 0x03BD, "b.nu", "GREEK SMALL LETTER NU" },
				1484	{ 0x03BD, "ngr", "GREEK SMALL LETTER NU" },
				1485	{ 0x03BD, "nu", "GREEK SMALL LETTER NU" },
				1486	{ 0x03BE, "b.xi", "GREEK SMALL LETTER XI" },
				1487	{ 0x03BE, "xgr", "GREEK SMALL LETTER XI" },
				1488	{ 0x03BE, "xi", "GREEK SMALL LETTER XI" },
				1489	{ 0x03BF, "ogr", "GREEK SMALL LETTER OMICRON" },
				1490	{ 0x03C0, "b.pi", "GREEK SMALL LETTER PI" },
				1491	{ 0x03C0, "pgr", "GREEK SMALL LETTER PI" },
				1492	{ 0x03C0, "pi", "GREEK SMALL LETTER PI" },
				1493	{ 0x03C1, "b.rho", "GREEK SMALL LETTER RHO" },
				1494	{ 0x03C1, "rgr", "GREEK SMALL LETTER RHO" },
				1495	{ 0x03C1, "rho", "GREEK SMALL LETTER RHO" },
				1496	{ 0x03C2, "b.sigmav", "" },
				1497	{ 0x03C2, "sfgr", "" },
				1498	{ 0x03C2, "sigmav", "" },
				1499	{ 0x03C3, "b.sigma", "GREEK SMALL LETTER SIGMA" },
				1500	{ 0x03C3, "sgr", "GREEK SMALL LETTER SIGMA" },
				1501	{ 0x03C3, "sigma", "GREEK SMALL LETTER SIGMA" },
				1502	{ 0x03C4, "b.tau", "GREEK SMALL LETTER TAU" },
				1503	{ 0x03C4, "tau", "GREEK SMALL LETTER TAU" },
				1504	{ 0x03C4, "tgr", "GREEK SMALL LETTER TAU" },
				1505	{ 0x03C5, "b.upsi", "GREEK SMALL LETTER UPSILON" },
				1506	{ 0x03C5, "ugr", "GREEK SMALL LETTER UPSILON" },
				1507	{ 0x03C5, "upsi", "GREEK SMALL LETTER UPSILON" },
				1508	{ 0x03C6, "b.phis", "GREEK SMALL LETTER PHI" },
				1509	{ 0x03C6, "phgr", "GREEK SMALL LETTER PHI" },
				1510	{ 0x03C6, "phis", "GREEK SMALL LETTER PHI" },
				1511	{ 0x03C7, "b.chi", "GREEK SMALL LETTER CHI" },
				1512	{ 0x03C7, "chi", "GREEK SMALL LETTER CHI" },
				1513	{ 0x03C7, "khgr", "GREEK SMALL LETTER CHI" },
				1514	{ 0x03C8, "b.psi", "GREEK SMALL LETTER PSI" },
				1515	{ 0x03C8, "psgr", "GREEK SMALL LETTER PSI" },
				1516	{ 0x03C8, "psi", "GREEK SMALL LETTER PSI" },
				1517	{ 0x03C9, "b.omega", "GREEK SMALL LETTER OMEGA" },
				1518	{ 0x03C9, "ohgr", "GREEK SMALL LETTER OMEGA" },
				1519	{ 0x03C9, "omega", "GREEK SMALL LETTER OMEGA" },
				1520	{ 0x03CA, "idigr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA" },
				1521	{ 0x03CB, "udigr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA" },
				1522	{ 0x03CC, "oacgr", "GREEK SMALL LETTER OMICRON WITH TONOS" },
				1523	{ 0x03CD, "uacgr", "GREEK SMALL LETTER UPSILON WITH TONOS" },
				1524	{ 0x03CE, "ohacgr", "GREEK SMALL LETTER OMEGA WITH TONOS" },
				1525	{ 0x03D1, "b.thetav", "" },
				1526	{ 0x03D1, "thetav", "" },
				1527	{ 0x03D2, "b.Upsi", "" },
				1528	{ 0x03D2, "Upsi", "" },
				1529	{ 0x03D5, "b.phiv", "GREEK PHI SYMBOL" },
				1530	{ 0x03D5, "phiv", "GREEK PHI SYMBOL" },
				1531	{ 0x03D6, "b.piv", "GREEK PI SYMBOL" },
				1532	{ 0x03D6, "piv", "GREEK PI SYMBOL" },
				1533	{ 0x03DC, "b.gammad", "GREEK LETTER DIGAMMA" },
				1534	{ 0x03DC, "gammad", "GREEK LETTER DIGAMMA" },
				1535	{ 0x03F0, "b.kappav", "GREEK KAPPA SYMBOL" },
				1536	{ 0x03F0, "kappav", "GREEK KAPPA SYMBOL" },
				1537	{ 0x03F1, "b.rhov", "GREEK RHO SYMBOL" },
				1538	{ 0x03F1, "rhov", "GREEK RHO SYMBOL" },
				1539	{ 0x0401, "IOcy", "CYRILLIC CAPITAL LETTER IO" },
				1540	{ 0x0402, "DJcy", "CYRILLIC CAPITAL LETTER DJE" },
				1541	{ 0x0403, "GJcy", "CYRILLIC CAPITAL LETTER GJE" },
				1542	{ 0x0404, "Jukcy", "CYRILLIC CAPITAL LETTER UKRAINIAN IE" },
				1543	{ 0x0405, "DScy", "CYRILLIC CAPITAL LETTER DZE" },
				1544	{ 0x0406, "Iukcy", "CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I" },
				1545	{ 0x0407, "YIcy", "CYRILLIC CAPITAL LETTER YI" },
				1546	{ 0x0408, "Jsercy", "CYRILLIC CAPITAL LETTER JE" },
				1547	{ 0x0409, "LJcy", "CYRILLIC CAPITAL LETTER LJE" },
				1548	{ 0x040A, "NJcy", "CYRILLIC CAPITAL LETTER NJE" },
				1549	{ 0x040B, "TSHcy", "CYRILLIC CAPITAL LETTER TSHE" },
				1550	{ 0x040C, "KJcy", "CYRILLIC CAPITAL LETTER KJE" },
				1551	{ 0x040E, "Ubrcy", "CYRILLIC CAPITAL LETTER SHORT U" },
				1552	{ 0x040F, "DZcy", "CYRILLIC CAPITAL LETTER DZHE" },
				1553	{ 0x0410, "Acy", "CYRILLIC CAPITAL LETTER A" },
				1554	{ 0x0411, "Bcy", "CYRILLIC CAPITAL LETTER BE" },
				1555	{ 0x0412, "Vcy", "CYRILLIC CAPITAL LETTER VE" },
				1556	{ 0x0413, "Gcy", "CYRILLIC CAPITAL LETTER GHE" },
				1557	{ 0x0414, "Dcy", "CYRILLIC CAPITAL LETTER DE" },
				1558	{ 0x0415, "IEcy", "CYRILLIC CAPITAL LETTER IE" },
				1559	{ 0x0416, "ZHcy", "CYRILLIC CAPITAL LETTER ZHE" },
				1560	{ 0x0417, "Zcy", "CYRILLIC CAPITAL LETTER ZE" },
				1561	{ 0x0418, "Icy", "CYRILLIC CAPITAL LETTER I" },
				1562	{ 0x0419, "Jcy", "CYRILLIC CAPITAL LETTER SHORT I" },
				1563	{ 0x041A, "Kcy", "CYRILLIC CAPITAL LETTER KA" },
				1564	{ 0x041B, "Lcy", "CYRILLIC CAPITAL LETTER EL" },
				1565	{ 0x041C, "Mcy", "CYRILLIC CAPITAL LETTER EM" },
				1566	{ 0x041D, "Ncy", "CYRILLIC CAPITAL LETTER EN" },
				1567	{ 0x041E, "Ocy", "CYRILLIC CAPITAL LETTER O" },
				1568	{ 0x041F, "Pcy", "CYRILLIC CAPITAL LETTER PE" },
				1569	{ 0x0420, "Rcy", "CYRILLIC CAPITAL LETTER ER" },
				1570	{ 0x0421, "Scy", "CYRILLIC CAPITAL LETTER ES" },
				1571	{ 0x0422, "Tcy", "CYRILLIC CAPITAL LETTER TE" },
				1572	{ 0x0423, "Ucy", "CYRILLIC CAPITAL LETTER U" },
				1573	{ 0x0424, "Fcy", "CYRILLIC CAPITAL LETTER EF" },
				1574	{ 0x0425, "KHcy", "CYRILLIC CAPITAL LETTER HA" },
				1575	{ 0x0426, "TScy", "CYRILLIC CAPITAL LETTER TSE" },
				1576	{ 0x0427, "CHcy", "CYRILLIC CAPITAL LETTER CHE" },
				1577	{ 0x0428, "SHcy", "CYRILLIC CAPITAL LETTER SHA" },
				1578	{ 0x0429, "SHCHcy", "CYRILLIC CAPITAL LETTER SHCHA" },
				1579	{ 0x042A, "HARDcy", "CYRILLIC CAPITAL LETTER HARD SIGN" },
				1580	{ 0x042B, "Ycy", "CYRILLIC CAPITAL LETTER YERU" },
				1581	{ 0x042C, "SOFTcy", "CYRILLIC CAPITAL LETTER SOFT SIGN" },
				1582	{ 0x042D, "Ecy", "CYRILLIC CAPITAL LETTER E" },
				1583	{ 0x042E, "YUcy", "CYRILLIC CAPITAL LETTER YU" },
				1584	{ 0x042F, "YAcy", "CYRILLIC CAPITAL LETTER YA" },
				1585	{ 0x0430, "acy", "CYRILLIC SMALL LETTER A" },
				1586	{ 0x0431, "bcy", "CYRILLIC SMALL LETTER BE" },
				1587	{ 0x0432, "vcy", "CYRILLIC SMALL LETTER VE" },
				1588	{ 0x0433, "gcy", "CYRILLIC SMALL LETTER GHE" },
				1589	{ 0x0434, "dcy", "CYRILLIC SMALL LETTER DE" },
				1590	{ 0x0435, "iecy", "CYRILLIC SMALL LETTER IE" },
				1591	{ 0x0436, "zhcy", "CYRILLIC SMALL LETTER ZHE" },
				1592	{ 0x0437, "zcy", "CYRILLIC SMALL LETTER ZE" },
				1593	{ 0x0438, "icy", "CYRILLIC SMALL LETTER I" },
				1594	{ 0x0439, "jcy", "CYRILLIC SMALL LETTER SHORT I" },
				1595	{ 0x043A, "kcy", "CYRILLIC SMALL LETTER KA" },
				1596	{ 0x043B, "lcy", "CYRILLIC SMALL LETTER EL" },
				1597	{ 0x043C, "mcy", "CYRILLIC SMALL LETTER EM" },
				1598	{ 0x043D, "ncy", "CYRILLIC SMALL LETTER EN" },
				1599	{ 0x043E, "ocy", "CYRILLIC SMALL LETTER O" },
				1600	{ 0x043F, "pcy", "CYRILLIC SMALL LETTER PE" },
				1601	{ 0x0440, "rcy", "CYRILLIC SMALL LETTER ER" },
				1602	{ 0x0441, "scy", "CYRILLIC SMALL LETTER ES" },
				1603	{ 0x0442, "tcy", "CYRILLIC SMALL LETTER TE" },
				1604	{ 0x0443, "ucy", "CYRILLIC SMALL LETTER U" },
				1605	{ 0x0444, "fcy", "CYRILLIC SMALL LETTER EF" },
				1606	{ 0x0445, "khcy", "CYRILLIC SMALL LETTER HA" },
				1607	{ 0x0446, "tscy", "CYRILLIC SMALL LETTER TSE" },
				1608	{ 0x0447, "chcy", "CYRILLIC SMALL LETTER CHE" },
				1609	{ 0x0448, "shcy", "CYRILLIC SMALL LETTER SHA" },
				1610	{ 0x0449, "shchcy", "CYRILLIC SMALL LETTER SHCHA" },
				1611	{ 0x044A, "hardcy", "CYRILLIC SMALL LETTER HARD SIGN" },
				1612	{ 0x044B, "ycy", "CYRILLIC SMALL LETTER YERU" },
				1613	{ 0x044C, "softcy", "CYRILLIC SMALL LETTER SOFT SIGN" },
				1614	{ 0x044D, "ecy", "CYRILLIC SMALL LETTER E" },
				1615	{ 0x044E, "yucy", "CYRILLIC SMALL LETTER YU" },
				1616	{ 0x044F, "yacy", "CYRILLIC SMALL LETTER YA" },
				1617	{ 0x0451, "iocy", "CYRILLIC SMALL LETTER IO" },
				1618	{ 0x0452, "djcy", "CYRILLIC SMALL LETTER DJE" },
				1619	{ 0x0453, "gjcy", "CYRILLIC SMALL LETTER GJE" },
				1620	{ 0x0454, "jukcy", "CYRILLIC SMALL LETTER UKRAINIAN IE" },
				1621	{ 0x0455, "dscy", "CYRILLIC SMALL LETTER DZE" },
				1622	{ 0x0456, "iukcy", "CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I" },
				1623	{ 0x0457, "yicy", "CYRILLIC SMALL LETTER YI" },
				1624	{ 0x0458, "jsercy", "CYRILLIC SMALL LETTER JE" },
				1625	{ 0x0459, "ljcy", "CYRILLIC SMALL LETTER LJE" },
				1626	{ 0x045A, "njcy", "CYRILLIC SMALL LETTER NJE" },
				1627	{ 0x045B, "tshcy", "CYRILLIC SMALL LETTER TSHE" },
				1628	{ 0x045C, "kjcy", "CYRILLIC SMALL LETTER KJE" },
				1629	{ 0x045E, "ubrcy", "CYRILLIC SMALL LETTER SHORT U" },
				1630	{ 0x045F, "dzcy", "CYRILLIC SMALL LETTER DZHE" },
				1631	{ 0x2002, "ensp", "EN SPACE" },
				1632	{ 0x2003, "emsp", "EM SPACE" },
				1633	{ 0x2004, "emsp13", "THREE-PER-EM SPACE" },
				1634	{ 0x2005, "emsp14", "FOUR-PER-EM SPACE" },
				1635	{ 0x2007, "numsp", "FIGURE SPACE" },
				1636	{ 0x2008, "puncsp", "PUNCTUATION SPACE" },
				1637	{ 0x2009, "thinsp", "THIN SPACE" },
				1638	{ 0x200A, "hairsp", "HAIR SPACE" },
				1639	{ 0x2010, "dash", "HYPHEN" },
				1640	{ 0x2013, "ndash", "EN DASH" },
				1641	{ 0x2014, "mdash", "EM DASH" },
				1642	{ 0x2015, "horbar", "HORIZONTAL BAR" },
				1643	{ 0x2016, "Verbar", "DOUBLE VERTICAL LINE" },
				1644	{ 0x2018, "lsquo", "" },
				1645	{ 0x2018, "rsquor", "" },
				1646	{ 0x2019, "rsquo", "RIGHT SINGLE QUOTATION MARK" },
				1647	{ 0x201A, "lsquor", "SINGLE LOW-9 QUOTATION MARK" },
				1648	{ 0x201C, "ldquo", "" },
				1649	{ 0x201C, "rdquor", "" },
				1650	{ 0x201D, "rdquo", "RIGHT DOUBLE QUOTATION MARK" },
				1651	{ 0x201E, "ldquor", "DOUBLE LOW-9 QUOTATION MARK" },
				1652	{ 0x2020, "dagger", "DAGGER" },
				1653	{ 0x2021, "Dagger", "DOUBLE DAGGER" },
				1654	{ 0x2022, "bull", "BULLET" },
				1655	{ 0x2025, "nldr", "TWO DOT LEADER" },
				1656	{ 0x2026, "hellip", "HORIZONTAL ELLIPSIS" },
				1657	{ 0x2026, "mldr", "HORIZONTAL ELLIPSIS" },
				1658	{ 0x2030, "permil", "PER MILLE SIGN" },
				1659	{ 0x2032, "prime", "PRIME" },
				1660	{ 0x2032, "vprime", "PRIME" },
				1661	{ 0x2033, "Prime", "DOUBLE PRIME" },
				1662	{ 0x2034, "tprime", "TRIPLE PRIME" },
				1663	{ 0x2035, "bprime", "REVERSED PRIME" },
				1664	{ 0x2041, "caret", "CARET" },
				1665	{ 0x2043, "hybull", "HYPHEN BULLET" },
				1666	{ 0x20DB, "tdot", "COMBINING THREE DOTS ABOVE" },
				1667	{ 0x20DC, "DotDot", "COMBINING FOUR DOTS ABOVE" },
				1668	{ 0x2105, "incare", "CARE OF" },
				1669	{ 0x210B, "hamilt", "SCRIPT CAPITAL H" },
				1670	{ 0x210F, "planck", "PLANCK CONSTANT OVER TWO PI" },
				1671	{ 0x2111, "image", "BLACK-LETTER CAPITAL I" },
				1672	{ 0x2112, "lagran", "SCRIPT CAPITAL L" },
				1673	{ 0x2113, "ell", "SCRIPT SMALL L" },
				1674	{ 0x2116, "numero", "NUMERO SIGN" },
				1675	{ 0x2117, "copysr", "SOUND RECORDING COPYRIGHT" },
				1676	{ 0x2118, "weierp", "SCRIPT CAPITAL P" },
				1677	{ 0x211C, "real", "BLACK-LETTER CAPITAL R" },
				1678	{ 0x211E, "rx", "PRESCRIPTION TAKE" },
				1679	{ 0x2122, "trade", "TRADE MARK SIGN" },
				1680	{ 0x2126, "ohm", "OHM SIGN" },
				1681	{ 0x212B, "angst", "ANGSTROM SIGN" },
				1682	{ 0x212C, "bernou", "SCRIPT CAPITAL B" },
				1683	{ 0x2133, "phmmat", "SCRIPT CAPITAL M" },
				1684	{ 0x2134, "order", "SCRIPT SMALL O" },
				1685	{ 0x2135, "aleph", "ALEF SYMBOL" },
				1686	{ 0x2136, "beth", "BET SYMBOL" },
				1687	{ 0x2137, "gimel", "GIMEL SYMBOL" },
				1688	{ 0x2138, "daleth", "DALET SYMBOL" },
				1689	{ 0x2153, "frac13", "VULGAR FRACTION ONE THIRD" },
				1690	{ 0x2154, "frac23", "VULGAR FRACTION TWO THIRDS" },
				1691	{ 0x2155, "frac15", "VULGAR FRACTION ONE FIFTH" },
				1692	{ 0x2156, "frac25", "VULGAR FRACTION TWO FIFTHS" },
				1693	{ 0x2157, "frac35", "VULGAR FRACTION THREE FIFTHS" },
				1694	{ 0x2158, "frac45", "VULGAR FRACTION FOUR FIFTHS" },
				1695	{ 0x2159, "frac16", "VULGAR FRACTION ONE SIXTH" },
				1696	{ 0x215A, "frac56", "VULGAR FRACTION FIVE SIXTHS" },
				1697	{ 0x215B, "frac18", "" },
				1698	{ 0x215C, "frac38", "" },
				1699	{ 0x215D, "frac58", "" },
				1700	{ 0x215E, "frac78", "" },
				1701	{ 0x2190, "larr", "LEFTWARDS DOUBLE ARROW" },
				1702	{ 0x2191, "uarr", "UPWARDS ARROW" },
				1703	{ 0x2192, "rarr", "RIGHTWARDS DOUBLE ARROW" },
				1704	{ 0x2193, "darr", "DOWNWARDS ARROW" },
				1705	{ 0x2194, "harr", "LEFT RIGHT ARROW" },
				1706	{ 0x2194, "xhArr", "LEFT RIGHT ARROW" },
				1707	{ 0x2194, "xharr", "LEFT RIGHT ARROW" },
				1708	{ 0x2195, "varr", "UP DOWN ARROW" },
				1709	{ 0x2196, "nwarr", "NORTH WEST ARROW" },
				1710	{ 0x2197, "nearr", "NORTH EAST ARROW" },
				1711	{ 0x2198, "drarr", "SOUTH EAST ARROW" },
				1712	{ 0x2199, "dlarr", "SOUTH WEST ARROW" },
				1713	{ 0x219A, "nlarr", "LEFTWARDS ARROW WITH STROKE" },
				1714	{ 0x219B, "nrarr", "RIGHTWARDS ARROW WITH STROKE" },
				1715	{ 0x219D, "rarrw", "RIGHTWARDS SQUIGGLE ARROW" },
				1716	{ 0x219E, "Larr", "LEFTWARDS TWO HEADED ARROW" },
				1717	{ 0x21A0, "Rarr", "RIGHTWARDS TWO HEADED ARROW" },
				1718	{ 0x21A2, "larrtl", "LEFTWARDS ARROW WITH TAIL" },
				1719	{ 0x21A3, "rarrtl", "RIGHTWARDS ARROW WITH TAIL" },
				1720	{ 0x21A6, "map", "RIGHTWARDS ARROW FROM BAR" },
				1721	{ 0x21A9, "larrhk", "LEFTWARDS ARROW WITH HOOK" },
				1722	{ 0x21AA, "rarrhk", "RIGHTWARDS ARROW WITH HOOK" },
				1723	{ 0x21AB, "larrlp", "LEFTWARDS ARROW WITH LOOP" },
				1724	{ 0x21AC, "rarrlp", "RIGHTWARDS ARROW WITH LOOP" },
				1725	{ 0x21AD, "harrw", "LEFT RIGHT WAVE ARROW" },
				1726	{ 0x21AE, "nharr", "LEFT RIGHT ARROW WITH STROKE" },
				1727	{ 0x21B0, "lsh", "UPWARDS ARROW WITH TIP LEFTWARDS" },
				1728	{ 0x21B1, "rsh", "UPWARDS ARROW WITH TIP RIGHTWARDS" },
				1729	{ 0x21B6, "cularr", "ANTICLOCKWISE TOP SEMICIRCLE ARROW" },
				1730	{ 0x21B7, "curarr", "CLOCKWISE TOP SEMICIRCLE ARROW" },
				1731	{ 0x21BA, "olarr", "ANTICLOCKWISE OPEN CIRCLE ARROW" },
				1732	{ 0x21BB, "orarr", "CLOCKWISE OPEN CIRCLE ARROW" },
				1733	{ 0x21BC, "lharu", "LEFTWARDS HARPOON WITH BARB UPWARDS" },
				1734	{ 0x21BD, "lhard", "LEFTWARDS HARPOON WITH BARB DOWNWARDS" },
				1735	{ 0x21BE, "uharr", "UPWARDS HARPOON WITH BARB RIGHTWARDS" },
				1736	{ 0x21BF, "uharl", "UPWARDS HARPOON WITH BARB LEFTWARDS" },
				1737	{ 0x21C0, "rharu", "RIGHTWARDS HARPOON WITH BARB UPWARDS" },
				1738	{ 0x21C1, "rhard", "RIGHTWARDS HARPOON WITH BARB DOWNWARDS" },
				1739	{ 0x21C2, "dharr", "DOWNWARDS HARPOON WITH BARB RIGHTWARDS" },
				1740	{ 0x21C3, "dharl", "DOWNWARDS HARPOON WITH BARB LEFTWARDS" },
				1741	{ 0x21C4, "rlarr2", "RIGHTWARDS ARROW OVER LEFTWARDS ARROW" },
				1742	{ 0x21C6, "lrarr2", "LEFTWARDS ARROW OVER RIGHTWARDS ARROW" },
				1743	{ 0x21C7, "larr2", "LEFTWARDS PAIRED ARROWS" },
				1744	{ 0x21C8, "uarr2", "UPWARDS PAIRED ARROWS" },
				1745	{ 0x21C9, "rarr2", "RIGHTWARDS PAIRED ARROWS" },
				1746	{ 0x21CA, "darr2", "DOWNWARDS PAIRED ARROWS" },
				1747	{ 0x21CB, "lrhar2", "LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON" },
				1748	{ 0x21CC, "rlhar2", "RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON" },
				1749	{ 0x21CD, "nlArr", "LEFTWARDS DOUBLE ARROW WITH STROKE" },
				1750	{ 0x21CE, "nhArr", "LEFT RIGHT DOUBLE ARROW WITH STROKE" },
				1751	{ 0x21CF, "nrArr", "RIGHTWARDS DOUBLE ARROW WITH STROKE" },
				1752	{ 0x21D0, "lArr", "LEFTWARDS ARROW" },
				1753	{ 0x21D0, "xlArr", "LEFTWARDS DOUBLE ARROW" },
				1754	{ 0x21D1, "uArr", "UPWARDS DOUBLE ARROW" },
				1755	{ 0x21D2, "rArr", "RIGHTWARDS ARROW" },
				1756	{ 0x21D2, "xrArr", "RIGHTWARDS DOUBLE ARROW" },
				1757	{ 0x21D3, "dArr", "DOWNWARDS DOUBLE ARROW" },
				1758	{ 0x21D4, "hArr", "" },
				1759	{ 0x21D4, "iff", "LEFT RIGHT DOUBLE ARROW" },
				1760	{ 0x21D5, "vArr", "UP DOWN DOUBLE ARROW" },
				1761	{ 0x21DA, "lAarr", "LEFTWARDS TRIPLE ARROW" },
				1762	{ 0x21DB, "rAarr", "RIGHTWARDS TRIPLE ARROW" },
				1763	{ 0x2200, "forall", "" },
				1764	{ 0x2201, "comp", "COMPLEMENT" },
				1765	{ 0x2202, "part", "" },
				1766	{ 0x2203, "exist", "" },
				1767	{ 0x2204, "nexist", "THERE DOES NOT EXIST" },
				1768	{ 0x2205, "empty", "" },
				1769	{ 0x2207, "nabla", "NABLA" },
				1770	{ 0x2209, "notin", "" },
				1771	{ 0x220A, "epsi", "" },
				1772	{ 0x220A, "epsis", "" },
				1773	{ 0x220A, "isin", "" },
				1774	{ 0x220D, "bepsi", "SMALL CONTAINS AS MEMBER" },
				1775	{ 0x220D, "ni", "" },
				1776	{ 0x220F, "prod", "N-ARY PRODUCT" },
				1777	{ 0x2210, "amalg", "N-ARY COPRODUCT" },
				1778	{ 0x2210, "coprod", "N-ARY COPRODUCT" },
				1779	{ 0x2210, "samalg", "" },
				1780	{ 0x2211, "sum", "N-ARY SUMMATION" },
				1781	{ 0x2212, "minus", "MINUS SIGN" },
				1782	{ 0x2213, "mnplus", "" },
				1783	{ 0x2214, "plusdo", "DOT PLUS" },
				1784	{ 0x2216, "setmn", "SET MINUS" },
				1785	{ 0x2216, "ssetmn", "SET MINUS" },
				1786	{ 0x2217, "lowast", "ASTERISK OPERATOR" },
				1787	{ 0x2218, "compfn", "RING OPERATOR" },
				1788	{ 0x221A, "radic", "" },
				1789	{ 0x221D, "prop", "" },
				1790	{ 0x221D, "vprop", "" },
				1791	{ 0x221E, "infin", "" },
				1792	{ 0x221F, "ang90", "RIGHT ANGLE" },
				1793	{ 0x2220, "ang", "ANGLE" },
				1794	{ 0x2221, "angmsd", "MEASURED ANGLE" },
				1795	{ 0x2222, "angsph", "" },
				1796	{ 0x2223, "mid", "" },
				1797	{ 0x2224, "nmid", "DOES NOT DIVIDE" },
				1798	{ 0x2225, "par", "PARALLEL TO" },
				1799	{ 0x2225, "spar", "PARALLEL TO" },
				1800	{ 0x2226, "npar", "NOT PARALLEL TO" },
				1801	{ 0x2226, "nspar", "NOT PARALLEL TO" },
				1802	{ 0x2227, "and", "" },
				1803	{ 0x2228, "or", "" },
				1804	{ 0x2229, "cap", "" },
				1805	{ 0x222A, "cup", "" },
				1806	{ 0x222B, "int", "" },
				1807	{ 0x222E, "conint", "" },
				1808	{ 0x2234, "there4", "" },
				1809	{ 0x2235, "becaus", "BECAUSE" },
				1810	{ 0x223C, "sim", "" },
				1811	{ 0x223C, "thksim", "TILDE OPERATOR" },
				1812	{ 0x223D, "bsim", "" },
				1813	{ 0x2240, "wreath", "WREATH PRODUCT" },
				1814	{ 0x2241, "nsim", "" },
				1815	{ 0x2243, "sime", "" },
				1816	{ 0x2244, "nsime", "" },
				1817	{ 0x2245, "cong", "" },
				1818	{ 0x2247, "ncong", "NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO" },
				1819	{ 0x2248, "ap", "" },
				1820	{ 0x2248, "thkap", "ALMOST EQUAL TO" },
				1821	{ 0x2249, "nap", "NOT ALMOST EQUAL TO" },
				1822	{ 0x224A, "ape", "" },
				1823	{ 0x224C, "bcong", "ALL EQUAL TO" },
				1824	{ 0x224D, "asymp", "EQUIVALENT TO" },
				1825	{ 0x224E, "bump", "" },
				1826	{ 0x224F, "bumpe", "" },
				1827	{ 0x2250, "esdot", "" },
				1828	{ 0x2251, "eDot", "" },
				1829	{ 0x2252, "efDot", "" },
				1830	{ 0x2253, "erDot", "" },
				1831	{ 0x2254, "colone", "" },
				1832	{ 0x2255, "ecolon", "" },
				1833	{ 0x2256, "ecir", "" },
				1834	{ 0x2257, "cire", "" },
				1835	{ 0x2259, "wedgeq", "ESTIMATES" },
				1836	{ 0x225C, "trie", "" },
				1837	{ 0x2260, "ne", "" },
				1838	{ 0x2261, "equiv", "" },
				1839	{ 0x2262, "nequiv", "NOT IDENTICAL TO" },
				1840	{ 0x2264, "le", "" },
				1841	{ 0x2264, "les", "LESS-THAN OR EQUAL TO" },
				1842	{ 0x2265, "ge", "GREATER-THAN OR EQUAL TO" },
				1843	{ 0x2265, "ges", "GREATER-THAN OR EQUAL TO" },
				1844	{ 0x2266, "lE", "" },
				1845	{ 0x2267, "gE", "" },
				1846	{ 0x2268, "lnE", "" },
				1847	{ 0x2268, "lne", "" },
				1848	{ 0x2268, "lvnE", "LESS-THAN BUT NOT EQUAL TO" },
				1849	{ 0x2269, "gnE", "" },
				1850	{ 0x2269, "gne", "" },
				1851	{ 0x2269, "gvnE", "GREATER-THAN BUT NOT EQUAL TO" },
				1852	{ 0x226A, "Lt", "MUCH LESS-THAN" },
				1853	{ 0x226B, "Gt", "MUCH GREATER-THAN" },
				1854	{ 0x226C, "twixt", "BETWEEN" },
				1855	{ 0x226E, "nlt", "NOT LESS-THAN" },
				1856	{ 0x226F, "ngt", "NOT GREATER-THAN" },
				1857	{ 0x2270, "nlE", "" },
				1858	{ 0x2270, "nle", "NEITHER LESS-THAN NOR EQUAL TO" },
				1859	{ 0x2270, "nles", "" },
				1860	{ 0x2271, "ngE", "" },
				1861	{ 0x2271, "nge", "NEITHER GREATER-THAN NOR EQUAL TO" },
				1862	{ 0x2271, "nges", "" },
				1863	{ 0x2272, "lap", "LESS-THAN OR EQUIVALENT TO" },
				1864	{ 0x2272, "lsim", "LESS-THAN OR EQUIVALENT TO" },
				1865	{ 0x2273, "gap", "GREATER-THAN OR EQUIVALENT TO" },
				1866	{ 0x2273, "gsim", "GREATER-THAN OR EQUIVALENT TO" },
				1867	{ 0x2276, "lg", "LESS-THAN OR GREATER-THAN" },
				1868	{ 0x2277, "gl", "" },
				1869	{ 0x227A, "pr", "" },
				1870	{ 0x227B, "sc", "" },
				1871	{ 0x227C, "cupre", "" },
				1872	{ 0x227C, "pre", "" },
				1873	{ 0x227D, "sccue", "" },
				1874	{ 0x227D, "sce", "" },
				1875	{ 0x227E, "prap", "" },
				1876	{ 0x227E, "prsim", "" },
				1877	{ 0x227F, "scap", "" },
				1878	{ 0x227F, "scsim", "" },
				1879	{ 0x2280, "npr", "DOES NOT PRECEDE" },
				1880	{ 0x2281, "nsc", "DOES NOT SUCCEED" },
				1881	{ 0x2282, "sub", "" },
				1882	{ 0x2283, "sup", "" },
				1883	{ 0x2284, "nsub", "NOT A SUBSET OF" },
				1884	{ 0x2285, "nsup", "NOT A SUPERSET OF" },
				1885	{ 0x2286, "subE", "" },
				1886	{ 0x2286, "sube", "" },
				1887	{ 0x2287, "supE", "" },
				1888	{ 0x2287, "supe", "" },
				1889	{ 0x2288, "nsubE", "" },
				1890	{ 0x2288, "nsube", "" },
				1891	{ 0x2289, "nsupE", "" },
				1892	{ 0x2289, "nsupe", "" },
				1893	{ 0x228A, "subne", "" },
				1894	{ 0x228A, "subnE", "SUBSET OF WITH NOT EQUAL TO" },
				1895	{ 0x228A, "vsubne", "SUBSET OF WITH NOT EQUAL TO" },
				1896	{ 0x228B, "supnE", "" },
				1897	{ 0x228B, "supne", "" },
				1898	{ 0x228B, "vsupnE", "SUPERSET OF WITH NOT EQUAL TO" },
				1899	{ 0x228B, "vsupne", "SUPERSET OF WITH NOT EQUAL TO" },
				1900	{ 0x228E, "uplus", "MULTISET UNION" },
				1901	{ 0x228F, "sqsub", "" },
				1902	{ 0x2290, "sqsup", "" },
				1903	{ 0x2291, "sqsube", "" },
				1904	{ 0x2292, "sqsupe", "" },
				1905	{ 0x2293, "sqcap", "SQUARE CAP" },
				1906	{ 0x2294, "sqcup", "SQUARE CUP" },
				1907	{ 0x2295, "oplus", "CIRCLED PLUS" },
				1908	{ 0x2296, "ominus", "CIRCLED MINUS" },
				1909	{ 0x2297, "otimes", "CIRCLED TIMES" },
				1910	{ 0x2298, "osol", "CIRCLED DIVISION SLASH" },
				1911	{ 0x2299, "odot", "CIRCLED DOT OPERATOR" },
				1912	{ 0x229A, "ocir", "CIRCLED RING OPERATOR" },
				1913	{ 0x229B, "oast", "CIRCLED ASTERISK OPERATOR" },
				1914	{ 0x229D, "odash", "CIRCLED DASH" },
				1915	{ 0x229E, "plusb", "SQUARED PLUS" },
				1916	{ 0x229F, "minusb", "SQUARED MINUS" },
				1917	{ 0x22A0, "timesb", "SQUARED TIMES" },
				1918	{ 0x22A1, "sdotb", "SQUARED DOT OPERATOR" },
				1919	{ 0x22A2, "vdash", "" },
				1920	{ 0x22A3, "dashv", "" },
				1921	{ 0x22A4, "top", "DOWN TACK" },
				1922	{ 0x22A5, "bottom", "" },
				1923	{ 0x22A5, "perp", "" },
				1924	{ 0x22A7, "models", "MODELS" },
				1925	{ 0x22A8, "vDash", "" },
				1926	{ 0x22A9, "Vdash", "" },
				1927	{ 0x22AA, "Vvdash", "" },
				1928	{ 0x22AC, "nvdash", "DOES NOT PROVE" },
				1929	{ 0x22AD, "nvDash", "NOT TRUE" },
				1930	{ 0x22AE, "nVdash", "DOES NOT FORCE" },
				1931	{ 0x22AF, "nVDash", "NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE" },
				1932	{ 0x22B2, "vltri", "" },
				1933	{ 0x22B3, "vrtri", "" },
				1934	{ 0x22B4, "ltrie", "" },
				1935	{ 0x22B5, "rtrie", "" },
				1936	{ 0x22B8, "mumap", "MULTIMAP" },
				1937	{ 0x22BA, "intcal", "INTERCALATE" },
				1938	{ 0x22BB, "veebar", "" },
				1939	{ 0x22BC, "barwed", "NAND" },
				1940	{ 0x22C4, "diam", "DIAMOND OPERATOR" },
				1941	{ 0x22C5, "sdot", "DOT OPERATOR" },
				1942	{ 0x22C6, "sstarf", "STAR OPERATOR" },
				1943	{ 0x22C6, "star", "STAR OPERATOR" },
				1944	{ 0x22C7, "divonx", "DIVISION TIMES" },
				1945	{ 0x22C8, "bowtie", "" },
				1946	{ 0x22C9, "ltimes", "LEFT NORMAL FACTOR SEMIDIRECT PRODUCT" },
				1947	{ 0x22CA, "rtimes", "RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT" },
				1948	{ 0x22CB, "lthree", "LEFT SEMIDIRECT PRODUCT" },
				1949	{ 0x22CC, "rthree", "RIGHT SEMIDIRECT PRODUCT" },
				1950	{ 0x22CD, "bsime", "" },
				1951	{ 0x22CE, "cuvee", "CURLY LOGICAL OR" },
				1952	{ 0x22CF, "cuwed", "CURLY LOGICAL AND" },
				1953	{ 0x22D0, "Sub", "" },
				1954	{ 0x22D1, "Sup", "" },
				1955	{ 0x22D2, "Cap", "DOUBLE INTERSECTION" },
				1956	{ 0x22D3, "Cup", "DOUBLE UNION" },
				1957	{ 0x22D4, "fork", "" },
				1958	{ 0x22D6, "ldot", "" },
				1959	{ 0x22D7, "gsdot", "" },
				1960	{ 0x22D8, "Ll", "" },
				1961	{ 0x22D9, "Gg", "VERY MUCH GREATER-THAN" },
				1962	{ 0x22DA, "lEg", "" },
				1963	{ 0x22DA, "leg", "" },
				1964	{ 0x22DB, "gEl", "" },
				1965	{ 0x22DB, "gel", "" },
				1966	{ 0x22DC, "els", "" },
				1967	{ 0x22DD, "egs", "" },
				1968	{ 0x22DE, "cuepr", "" },
				1969	{ 0x22DF, "cuesc", "" },
				1970	{ 0x22E0, "npre", "DOES NOT PRECEDE OR EQUAL" },
				1971	{ 0x22E1, "nsce", "DOES NOT SUCCEED OR EQUAL" },
				1972	{ 0x22E6, "lnsim", "" },
				1973	{ 0x22E7, "gnsim", "GREATER-THAN BUT NOT EQUIVALENT TO" },
				1974	{ 0x22E8, "prnap", "" },
				1975	{ 0x22E8, "prnsim", "" },
				1976	{ 0x22E9, "scnap", "" },
				1977	{ 0x22E9, "scnsim", "" },
				1978	{ 0x22EA, "nltri", "NOT NORMAL SUBGROUP OF" },
				1979	{ 0x22EB, "nrtri", "DOES NOT CONTAIN AS NORMAL SUBGROUP" },
				1980	{ 0x22EC, "nltrie", "NOT NORMAL SUBGROUP OF OR EQUAL TO" },
				1981	{ 0x22ED, "nrtrie", "DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL" },
				1982	{ 0x22EE, "vellip", "" },
				1983	{ 0x2306, "Barwed", "PERSPECTIVE" },
				1984	{ 0x2308, "lceil", "LEFT CEILING" },
				1985	{ 0x2309, "rceil", "RIGHT CEILING" },
				1986	{ 0x230A, "lfloor", "LEFT FLOOR" },
				1987	{ 0x230B, "rfloor", "RIGHT FLOOR" },
				1988	{ 0x230C, "drcrop", "BOTTOM RIGHT CROP" },
				1989	{ 0x230D, "dlcrop", "BOTTOM LEFT CROP" },
				1990	{ 0x230E, "urcrop", "TOP RIGHT CROP" },
				1991	{ 0x230F, "ulcrop", "TOP LEFT CROP" },
				1992	{ 0x2315, "telrec", "TELEPHONE RECORDER" },
				1993	{ 0x2316, "target", "POSITION INDICATOR" },
				1994	{ 0x231C, "ulcorn", "TOP LEFT CORNER" },
				1995	{ 0x231D, "urcorn", "TOP RIGHT CORNER" },
				1996	{ 0x231E, "dlcorn", "BOTTOM LEFT CORNER" },
				1997	{ 0x231F, "drcorn", "BOTTOM RIGHT CORNER" },
				1998	{ 0x2322, "frown", "" },
				1999	{ 0x2322, "sfrown", "FROWN" },
				2000	{ 0x2323, "smile", "" },
				2001	{ 0x2323, "ssmile", "SMILE" },
				2002	{ 0x2423, "blank", "OPEN BOX" },
				2003	{ 0x24C8, "oS", "CIRCLED LATIN CAPITAL LETTER S" },
				2004	{ 0x2500, "boxh", "BOX DRAWINGS LIGHT HORIZONTAL" },
				2005	{ 0x2502, "boxv", "BOX DRAWINGS LIGHT VERTICAL" },
				2006	{ 0x250C, "boxdr", "BOX DRAWINGS LIGHT DOWN AND RIGHT" },
				2007	{ 0x2510, "boxdl", "BOX DRAWINGS LIGHT DOWN AND LEFT" },
				2008	{ 0x2514, "boxur", "BOX DRAWINGS LIGHT UP AND RIGHT" },
				2009	{ 0x2518, "boxul", "BOX DRAWINGS LIGHT UP AND LEFT" },
				2010	{ 0x251C, "boxvr", "BOX DRAWINGS LIGHT VERTICAL AND RIGHT" },
				2011	{ 0x2524, "boxvl", "BOX DRAWINGS LIGHT VERTICAL AND LEFT" },
				2012	{ 0x252C, "boxhd", "BOX DRAWINGS LIGHT DOWN AND HORIZONTAL" },
				2013	{ 0x2534, "boxhu", "BOX DRAWINGS LIGHT UP AND HORIZONTAL" },
				2014	{ 0x253C, "boxvh", "BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL" },
				2015	{ 0x2550, "boxH", "BOX DRAWINGS DOUBLE HORIZONTAL" },
				2016	{ 0x2551, "boxV", "BOX DRAWINGS DOUBLE VERTICAL" },
				2017	{ 0x2552, "boxDR", "BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE" },
				2018	{ 0x2553, "boxDr", "BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE" },
				2019	{ 0x2554, "boxdR", "BOX DRAWINGS DOUBLE DOWN AND RIGHT" },
				2020	{ 0x2555, "boxDL", "BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE" },
				2021	{ 0x2556, "boxdL", "BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE" },
				2022	{ 0x2557, "boxDl", "BOX DRAWINGS DOUBLE DOWN AND LEFT" },
				2023	{ 0x2558, "boxUR", "BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE" },
				2024	{ 0x2559, "boxuR", "BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE" },
				2025	{ 0x255A, "boxUr", "BOX DRAWINGS DOUBLE UP AND RIGHT" },
				2026	{ 0x255B, "boxUL", "BOX DRAWINGS UP SINGLE AND LEFT DOUBLE" },
				2027	{ 0x255C, "boxUl", "BOX DRAWINGS UP DOUBLE AND LEFT SINGLE" },
				2028	{ 0x255D, "boxuL", "BOX DRAWINGS DOUBLE UP AND LEFT" },
				2029	{ 0x255E, "boxvR", "BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE" },
				2030	{ 0x255F, "boxVR", "BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE" },
				2031	{ 0x2560, "boxVr", "BOX DRAWINGS DOUBLE VERTICAL AND RIGHT" },
				2032	{ 0x2561, "boxvL", "BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE" },
				2033	{ 0x2562, "boxVL", "BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE" },
				2034	{ 0x2563, "boxVl", "BOX DRAWINGS DOUBLE VERTICAL AND LEFT" },
				2035	{ 0x2564, "boxhD", "BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE" },
				2036	{ 0x2565, "boxHD", "BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE" },
				2037	{ 0x2566, "boxHd", "BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL" },
				2038	{ 0x2567, "boxhU", "BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE" },
				2039	{ 0x2568, "boxHU", "BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE" },
				2040	{ 0x2569, "boxHu", "BOX DRAWINGS DOUBLE UP AND HORIZONTAL" },
				2041	{ 0x256A, "boxvH", "BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE" },
				2042	{ 0x256B, "boxVH", "BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE" },
				2043	{ 0x256C, "boxVh", "BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL" },
				2044	{ 0x2580, "uhblk", "UPPER HALF BLOCK" },
				2045	{ 0x2584, "lhblk", "LOWER HALF BLOCK" },
				2046	{ 0x2588, "block", "FULL BLOCK" },
				2047	{ 0x2591, "blk14", "LIGHT SHADE" },
				2048	{ 0x2592, "blk12", "MEDIUM SHADE" },
				2049	{ 0x2593, "blk34", "DARK SHADE" },
				2050	{ 0x25A1, "square", "WHITE SQUARE" },
				2051	{ 0x25A1, "squ", "WHITE SQUARE" },
				2052	{ 0x25AA, "squf", "" },
				2053	{ 0x25AD, "rect", "WHITE RECTANGLE" },
				2054	{ 0x25AE, "marker", "BLACK VERTICAL RECTANGLE" },
				2055	{ 0x25B3, "xutri", "WHITE UP-POINTING TRIANGLE" },
				2056	{ 0x25B4, "utrif", "BLACK UP-POINTING TRIANGLE" },
				2057	{ 0x25B5, "utri", "WHITE UP-POINTING TRIANGLE" },
				2058	{ 0x25B8, "rtrif", "BLACK RIGHT-POINTING TRIANGLE" },
				2059	{ 0x25B9, "rtri", "WHITE RIGHT-POINTING TRIANGLE" },
				2060	{ 0x25BD, "xdtri", "WHITE DOWN-POINTING TRIANGLE" },
				2061	{ 0x25BE, "dtrif", "BLACK DOWN-POINTING TRIANGLE" },
				2062	{ 0x25BF, "dtri", "WHITE DOWN-POINTING TRIANGLE" },
				2063	{ 0x25C2, "ltrif", "BLACK LEFT-POINTING TRIANGLE" },
				2064	{ 0x25C3, "ltri", "WHITE LEFT-POINTING TRIANGLE" },
				2065	{ 0x25CA, "loz", "LOZENGE" },
				2066	{ 0x25CB, "cir", "WHITE CIRCLE" },
				2067	{ 0x25CB, "xcirc", "WHITE CIRCLE" },
				2068	{ 0x2605, "starf", "BLACK STAR" },
				2069	{ 0x260E, "phone", "TELEPHONE SIGN" },
				2070	{ 0x2640, "female", "" },
				2071	{ 0x2642, "male", "MALE SIGN" },
				2072	{ 0x2660, "spades", "BLACK SPADE SUIT" },
				2073	{ 0x2663, "clubs", "BLACK CLUB SUIT" },
				2074	{ 0x2665, "hearts", "BLACK HEART SUIT" },
				2075	{ 0x2666, "diams", "BLACK DIAMOND SUIT" },
				2076	{ 0x2669, "sung", "" },
				2077	{ 0x266D, "flat", "MUSIC FLAT SIGN" },
				2078	{ 0x266E, "natur", "MUSIC NATURAL SIGN" },
				2079	{ 0x266F, "sharp", "MUSIC SHARP SIGN" },
				2080	{ 0x2713, "check", "CHECK MARK" },
				2081	{ 0x2717, "cross", "BALLOT X" },
				2082	{ 0x2720, "malt", "MALTESE CROSS" },
				2083	{ 0x2726, "lozf", "" },
				2084	{ 0x2736, "sext", "SIX POINTED BLACK STAR" },
				2085	{ 0x3008, "lang", "" },
				2086	{ 0x3009, "rang", "" },
				2087	{ 0xE291, "rpargt", "" },
				2088	{ 0xE2A2, "lnap", "" },
				2089	{ 0xE2AA, "nsmid", "" },
				2090	{ 0xE2B3, "prnE", "" },
				2091	{ 0xE2B5, "scnE", "" },
				2092	{ 0xE2B8, "vsubnE", "" },
				2093	{ 0xE301, "smid", "" },
				2094	{ 0xE411, "gnap", "" },
				2095	{ 0xFB00, "fflig", "" },
				2096	{ 0xFB01, "filig", "" },
				2097	{ 0xFB02, "fllig", "" },
				2098	{ 0xFB03, "ffilig", "" },
				2099	{ 0xFB04, "ffllig", "" },
				2100	{ 0xFE68, "sbsol", "SMALL REVERSE SOLIDUS" },
				2101	};
				2102
				2103	/************************************************************************
				2104	* *
				2105	* Commodity functions to handle entities *
				2106	* *
				2107	************************************************************************/
				2108
				2109	/*
				2110	* Macro used to grow the current buffer.
				2111	*/
				2112	#define growBuffer(buffer) { \
				2113	buffer##_size *= 2; \
				2114	buffer = (xmlChar ) xmlRealloc(buffer, buffer##_size sizeof(xmlChar)); \
				2115	if (buffer == NULL) { \
				2116	perror("realloc failed"); \
				2117	return(NULL); \
				2118	} \
				2119	}
				2120
				2121	/**
				2122	* docbEntityLookup:
				2123	* @name: the entity name
				2124	*
				2125	* Lookup the given entity in EntitiesTable
				2126	*
				2127	* TODO: the linear scan is really ugly, an hash table is really needed.
				2128	*
				2129	* Returns the associated docbEntityDescPtr if found, NULL otherwise.
				2130	*/
				2131	static docbEntityDescPtr
				2132	docbEntityLookup(const xmlChar *name) {
				2133	unsigned int i;
				2134
				2135	for (i = 0;i < (sizeof(docbookEntitiesTable)/
				2136	sizeof(docbookEntitiesTable[0]));i++) {
				2137	if (xmlStrEqual(name, BAD_CAST docbookEntitiesTable[i].name)) {
				2138	#ifdef DEBUG
				2139	xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", name);
				2140	#endif
				2141	return(&docbookEntitiesTable[i]);
				2142	}
				2143	}
				2144	return(NULL);
				2145	}
				2146
				2147	/**
				2148	* docbEntityValueLookup:
				2149	* @value: the entity's unicode value
				2150	*
				2151	* Lookup the given entity in EntitiesTable
				2152	*
				2153	* TODO: the linear scan is really ugly, an hash table is really needed.
				2154	*
				2155	* Returns the associated docbEntityDescPtr if found, NULL otherwise.
				2156	*/
				2157	static docbEntityDescPtr
				2158	docbEntityValueLookup(int value) {
				2159	unsigned int i;
				2160	#ifdef DEBUG
				2161	int lv = 0;
				2162	#endif
				2163
				2164	for (i = 0;i < (sizeof(docbookEntitiesTable)/
				2165	sizeof(docbookEntitiesTable[0]));i++) {
				2166	if (docbookEntitiesTable[i].value >= value) {
				2167	if (docbookEntitiesTable[i].value > value)
				2168	break;
				2169	#ifdef DEBUG
				2170	xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", docbookEntitiesTable[i].name);
				2171	#endif
				2172	return(&docbookEntitiesTable[i]);
				2173	}
				2174	#ifdef DEBUG
				2175	if (lv > docbookEntitiesTable[i].value) {
				2176	xmlGenericError(xmlGenericErrorContext,
				2177	"docbookEntitiesTable[] is not sorted (%d > %d)!\n",
				2178	lv, docbookEntitiesTable[i].value);
				2179	}
				2180	lv = docbookEntitiesTable[i].value;
				2181	#endif
				2182	}
				2183	return(NULL);
				2184	}
				2185
				2186	#if 0
				2187	/**
				2188	* UTF8ToSgml:
				2189	* @out: a pointer to an array of bytes to store the result
				2190	* @outlen: the length of @out
				2191	* @in: a pointer to an array of UTF-8 chars
				2192	* @inlen: the length of @in
				2193	*
				2194	* Take a block of UTF-8 chars in and try to convert it to an ASCII
				2195	* plus SGML entities block of chars out.
				2196	*
				2197	* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
				2198	* The value of @inlen after return is the number of octets consumed
				2199	* as the return value is positive, else unpredictiable.
				2200	* The value of @outlen after return is the number of octets consumed.
				2201	*/
				2202	int
				2203	UTF8ToSgml(unsigned char* out, int *outlen,
				2204	const unsigned char* in, int *inlen) {
				2205	const unsigned char* processed = in;
				2206	const unsigned char* outend;
				2207	const unsigned char* outstart = out;
				2208	const unsigned char* instart = in;
				2209	const unsigned char* inend;
				2210	unsigned int c, d;
				2211	int trailing;
				2212
				2213	if (in == NULL) {
				2214	/*
				2215	* initialization nothing to do
				2216	*/
				2217	*outlen = 0;
				2218	*inlen = 0;
				2219	return(0);
				2220	}
				2221	inend = in + (*inlen);
				2222	outend = out + (*outlen);
				2223	while (in < inend) {
				2224	d = *in++;
				2225	if (d < 0x80) { c= d; trailing= 0; }
				2226	else if (d < 0xC0) {
				2227	/* trailing byte in leading position */
				2228	*outlen = out - outstart;
				2229	*inlen = processed - instart;
				2230	return(-2);
				2231	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
				2232	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
				2233	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
				2234	else {
				2235	/* no chance for this in Ascii */
				2236	*outlen = out - outstart;
				2237	*inlen = processed - instart;
				2238	return(-2);
				2239	}
				2240
				2241	if (inend - in < trailing) {
				2242	break;
				2243	}
				2244
				2245	for ( ; trailing; trailing--) {
				2246	if ((in >= inend) \|\| (((d= *in++) & 0xC0) != 0x80))
				2247	break;
				2248	c <<= 6;
				2249	c \|= d & 0x3F;
				2250	}
				2251
				2252	/* assertion: c is a single UTF-4 value */
				2253	if (c < 0x80) {
				2254	if (out + 1 >= outend)
				2255	break;
				2256	*out++ = c;
				2257	} else {
				2258	int len;
				2259	docbEntityDescPtr ent;
				2260
				2261	/*
				2262	* Try to lookup a predefined SGML entity for it
				2263	*/
				2264
				2265	ent = docbEntityValueLookup(c);
				2266	if (ent == NULL) {
				2267	/* no chance for this in Ascii */
				2268	*outlen = out - outstart;
				2269	*inlen = processed - instart;
				2270	return(-2);
				2271	}
				2272	len = strlen(ent->name);
				2273	if (out + 2 + len >= outend)
				2274	break;
				2275	*out++ = '&';
				2276	memcpy(out, ent->name, len);
				2277	out += len;
				2278	*out++ = ';';
				2279	}
				2280	processed = in;
				2281	}
				2282	*outlen = out - outstart;
				2283	*inlen = processed - instart;
				2284	return(0);
				2285	}
				2286	#endif
				2287
				2288	/**
				2289	* docbEncodeEntities:
				2290	* @out: a pointer to an array of bytes to store the result
				2291	* @outlen: the length of @out
				2292	* @in: a pointer to an array of UTF-8 chars
				2293	* @inlen: the length of @in
				2294	* @quoteChar: the quote character to escape (' or ") or zero.
				2295	*
				2296	* Take a block of UTF-8 chars in and try to convert it to an ASCII
				2297	* plus SGML entities block of chars out.
				2298	*
				2299	* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
				2300	* The value of @inlen after return is the number of octets consumed
				2301	* as the return value is positive, else unpredictiable.
				2302	* The value of @outlen after return is the number of octets consumed.
				2303	*/
				2304	int
				2305	docbEncodeEntities(unsigned char* out, int *outlen,
				2306	const unsigned char* in, int *inlen, int quoteChar) {
				2307	const unsigned char* processed = in;
				2308	const unsigned char* outend = out + (*outlen);
				2309	const unsigned char* outstart = out;
				2310	const unsigned char* instart = in;
				2311	const unsigned char* inend = in + (*inlen);
				2312	unsigned int c, d;
				2313	int trailing;
				2314
				2315	while (in < inend) {
				2316	d = *in++;
				2317	if (d < 0x80) { c= d; trailing= 0; }
				2318	else if (d < 0xC0) {
				2319	/* trailing byte in leading position */
				2320	*outlen = out - outstart;
				2321	*inlen = processed - instart;
				2322	return(-2);
				2323	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
				2324	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
				2325	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
				2326	else {
				2327	/* no chance for this in Ascii */
				2328	*outlen = out - outstart;
				2329	*inlen = processed - instart;
				2330	return(-2);
				2331	}
				2332
				2333	if (inend - in < trailing)
				2334	break;
				2335
				2336	while (trailing--) {
				2337	if (((d= *in++) & 0xC0) != 0x80) {
				2338	*outlen = out - outstart;
				2339	*inlen = processed - instart;
				2340	return(-2);
				2341	}
				2342	c <<= 6;
				2343	c \|= d & 0x3F;
				2344	}
				2345
				2346	/* assertion: c is a single UTF-4 value */
				2347	if (c < 0x80 && c != (unsigned int) quoteChar && c != '&' && c != '<' && c != '>') {
				2348	if (out >= outend)
				2349	break;
				2350	*out++ = c;
				2351	} else {
				2352	docbEntityDescPtr ent;
				2353	const char *cp;
				2354	char nbuf[16];
				2355	int len;
				2356
				2357	/*
				2358	* Try to lookup a predefined SGML entity for it
				2359	*/
				2360	ent = docbEntityValueLookup(c);
				2361	if (ent == NULL) {
				2362	sprintf(nbuf, "#%u", c);
				2363	cp = nbuf;
				2364	}
				2365	else
				2366	cp = ent->name;
				2367	len = strlen(cp);
				2368	if (out + 2 + len > outend)
				2369	break;
				2370	*out++ = '&';
				2371	memcpy(out, cp, len);
				2372	out += len;
				2373	*out++ = ';';
				2374	}
				2375	processed = in;
				2376	}
				2377	*outlen = out - outstart;
				2378	*inlen = processed - instart;
				2379	return(0);
				2380	}
				2381
				2382
				2383	/************************************************************************
				2384	* *
				2385	* Commodity functions to handle streams *
				2386	* *
				2387	************************************************************************/
				2388
				2389	/**
				2390	* docbNewInputStream:
				2391	* @ctxt: an SGML parser context
				2392	*
				2393	* Create a new input stream structure
				2394	* Returns the new input stream or NULL
				2395	*/
				2396	static docbParserInputPtr
				2397	docbNewInputStream(docbParserCtxtPtr ctxt) {
				2398	docbParserInputPtr input;
				2399
				2400	input = (xmlParserInputPtr) xmlMalloc(sizeof(docbParserInput));
				2401	if (input == NULL) {
				2402	ctxt->errNo = XML_ERR_NO_MEMORY;
				2403	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2404	ctxt->sax->error(ctxt->userData,
				2405	"malloc: couldn't allocate a new input stream\n");
				2406	return(NULL);
				2407	}
				2408	memset(input, 0, sizeof(docbParserInput));
				2409	input->filename = NULL;
				2410	input->directory = NULL;
				2411	input->base = NULL;
				2412	input->cur = NULL;
				2413	input->buf = NULL;
				2414	input->line = 1;
				2415	input->col = 1;
				2416	input->buf = NULL;
				2417	input->free = NULL;
				2418	input->version = NULL;
				2419	input->consumed = 0;
				2420	input->length = 0;
				2421	return(input);
				2422	}
				2423
				2424
				2425	/************************************************************************
				2426	* *
				2427	* Commodity functions, cleanup needed ? *
				2428	* *
				2429	************************************************************************/
				2430
				2431	/**
				2432	* areBlanks:
				2433	* @ctxt: an SGML parser context
				2434	* @str: a xmlChar *
				2435	* @len: the size of @str
				2436	*
				2437	* Is this a sequence of blank chars that one can ignore ?
				2438	*
				2439	* Returns 1 if ignorable 0 otherwise.
				2440	*/
				2441
				2442	static int areBlanks(docbParserCtxtPtr ctxt, const xmlChar *str, int len) {
				2443	int i;
				2444	xmlNodePtr lastChild;
				2445
				2446	for (i = 0;i < len;i++)
				2447	if (!(IS_BLANK(str[i]))) return(0);
				2448
				2449	if (CUR == 0) return(1);
				2450	if (CUR != '<') return(0);
				2451	if (ctxt->name == NULL)
				2452	return(1);
				2453	if (ctxt->node == NULL) return(0);
				2454	lastChild = xmlGetLastChild(ctxt->node);
				2455	if (lastChild == NULL) {
				2456	if (ctxt->node->content != NULL) return(0);
				2457	} else if (xmlNodeIsText(lastChild))
				2458	return(0);
				2459	return(1);
				2460	}
				2461
				2462	/************************************************************************
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2463	* *
				2464	* External entities support *
				2465	* *
				2466	************************************************************************/
				2467
				2468	/**
				2469	* docbParseCtxtExternalEntity:
				2470	* @ctx: the existing parsing context
				2471	* @URL: the URL for the entity to load
				2472	* @ID: the System ID for the entity to load
				2473	* @list: the return value for the set of parsed nodes
				2474	*
				2475	* Parse an external general entity within an existing parsing context
				2476	*
				2477	* Returns 0 if the entity is well formed, -1 in case of args problem and
				2478	* the parser error code otherwise
				2479	*/
				2480
				2481	static int
				2482	docbParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
				2483	const xmlChar ID, xmlNodePtr list) {
				2484	xmlParserCtxtPtr ctxt;
				2485	xmlDocPtr newDoc;
				2486	xmlSAXHandlerPtr oldsax = NULL;
				2487	int ret = 0;
				2488
				2489	if (ctx->depth > 40) {
				2490	return(XML_ERR_ENTITY_LOOP);
				2491	}
				2492
				2493	if (list != NULL)
				2494	*list = NULL;
				2495	if ((URL == NULL) && (ID == NULL))
				2496	return(-1);
				2497	if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
				2498	return(-1);
				2499
				2500
				2501	ctxt = xmlCreateEntityParserCtxt(URL, ID, ctx->myDoc->URL);
				2502	if (ctxt == NULL) return(-1);
				2503	ctxt->userData = ctxt;
				2504	oldsax = ctxt->sax;
				2505	ctxt->sax = ctx->sax;
				2506	newDoc = xmlNewDoc(BAD_CAST "1.0");
				2507	if (newDoc == NULL) {
				2508	xmlFreeParserCtxt(ctxt);
				2509	return(-1);
				2510	}
				2511	if (ctx->myDoc != NULL) {
				2512	newDoc->intSubset = ctx->myDoc->intSubset;
				2513	newDoc->extSubset = ctx->myDoc->extSubset;
				2514	}
				2515	if (ctx->myDoc->URL != NULL) {
				2516	newDoc->URL = xmlStrdup(ctx->myDoc->URL);
				2517	}
				2518	newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
				2519	if (newDoc->children == NULL) {
				2520	ctxt->sax = oldsax;
				2521	xmlFreeParserCtxt(ctxt);
				2522	newDoc->intSubset = NULL;
				2523	newDoc->extSubset = NULL;
				2524	xmlFreeDoc(newDoc);
				2525	return(-1);
				2526	}
				2527	nodePush(ctxt, newDoc->children);
				2528	if (ctx->myDoc == NULL) {
				2529	ctxt->myDoc = newDoc;
				2530	} else {
				2531	ctxt->myDoc = ctx->myDoc;
				2532	newDoc->children->doc = ctx->myDoc;
				2533	}
				2534
				2535	/*
				2536	* Parse a possible text declaration first
				2537	*/
				2538	GROW;
				2539	if ((RAW == '<') && (NXT(1) == '?') &&
				2540	(NXT(2) == 'x') && (NXT(3) == 'm') &&
				2541	(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
				2542	xmlParseTextDecl(ctxt);
				2543	}
				2544
				2545	/*
				2546	* Doing validity checking on chunk doesn't make sense
				2547	*/
				2548	ctxt->instate = XML_PARSER_CONTENT;
				2549	ctxt->validate = ctx->validate;
				2550	ctxt->loadsubset = ctx->loadsubset;
				2551	ctxt->depth = ctx->depth + 1;
				2552	ctxt->replaceEntities = ctx->replaceEntities;
				2553	if (ctxt->validate) {
				2554	ctxt->vctxt.error = ctx->vctxt.error;
				2555	ctxt->vctxt.warning = ctx->vctxt.warning;
				2556	/* Allocate the Node stack */
				2557	ctxt->vctxt.nodeTab = (xmlNodePtr ) xmlMalloc(4 sizeof(xmlNodePtr));
				2558	if (ctxt->vctxt.nodeTab == NULL) {
				2559	xmlGenericError(xmlGenericErrorContext,
				2560	"docbParseCtxtExternalEntity: out of memory\n");
				2561	ctxt->validate = 0;
				2562	ctxt->vctxt.error = NULL;
				2563	ctxt->vctxt.warning = NULL;
				2564	} else {
				2565	ctxt->vctxt.nodeNr = 0;
				2566	ctxt->vctxt.nodeMax = 4;
				2567	ctxt->vctxt.node = NULL;
				2568	}
				2569	} else {
				2570	ctxt->vctxt.error = NULL;
				2571	ctxt->vctxt.warning = NULL;
				2572	}
				2573
				2574	docbParseContent(ctxt);
				2575
				2576	if ((RAW == '<') && (NXT(1) == '/')) {
				2577	ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
				2578	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2579	ctxt->sax->error(ctxt->userData,
				2580	"chunk is not well balanced\n");
				2581	ctxt->wellFormed = 0;
				2582	ctxt->disableSAX = 1;
				2583	} else if (RAW != 0) {
				2584	ctxt->errNo = XML_ERR_EXTRA_CONTENT;
				2585	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2586	ctxt->sax->error(ctxt->userData,
				2587	"extra content at the end of well balanced chunk\n");
				2588	ctxt->wellFormed = 0;
				2589	ctxt->disableSAX = 1;
				2590	}
				2591	if (ctxt->node != newDoc->children) {
				2592	ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
				2593	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2594	ctxt->sax->error(ctxt->userData,
				2595	"chunk is not well balanced\n");
				2596	ctxt->wellFormed = 0;
				2597	ctxt->disableSAX = 1;
				2598	}
				2599
				2600	if (!ctxt->wellFormed) {
				2601	if (ctxt->errNo == 0)
				2602	ret = 1;
				2603	else
				2604	ret = ctxt->errNo;
				2605	} else {
				2606	if (list != NULL) {
				2607	xmlNodePtr cur;
				2608
				2609	/*
				2610	* Return the newly created nodeset after unlinking it from
				2611	* they pseudo parent.
				2612	*/
				2613	cur = newDoc->children->children;
				2614	*list = cur;
				2615	while (cur != NULL) {
				2616	cur->parent = NULL;
				2617	cur = cur->next;
				2618	}
				2619	newDoc->children->children = NULL;
				2620	}
				2621	ret = 0;
				2622	}
				2623	ctxt->sax = oldsax;
				2624	xmlFreeParserCtxt(ctxt);
				2625	newDoc->intSubset = NULL;
				2626	newDoc->extSubset = NULL;
				2627	xmlFreeDoc(newDoc);
				2628
				2629	return(ret);
				2630	}
				2631
				2632	/************************************************************************
				2633	* *
				2634	* The parser itself *
				2635	* *
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2636	************************************************************************/
				2637
				2638	/**
				2639	* docbParseSGMLName:
				2640	* @ctxt: an SGML parser context
				2641	*
				2642	* parse an SGML tag or attribute name, note that we convert it to lowercase
				2643	* since SGML names are not case-sensitive.
				2644	*
				2645	* Returns the Tag Name parsed or NULL
				2646	*/
				2647
				2648	static xmlChar *
				2649	docbParseSGMLName(docbParserCtxtPtr ctxt) {
				2650	xmlChar *ret = NULL;
				2651	int i = 0;
				2652	xmlChar loc[DOCB_PARSER_BUFFER_SIZE];
				2653
				2654	if (!IS_LETTER(CUR) && (CUR != '_') &&
				2655	(CUR != ':')) return(NULL);
				2656
				2657	while ((i < DOCB_PARSER_BUFFER_SIZE) &&
				2658	((IS_LETTER(CUR)) \|\| (IS_DIGIT(CUR)) \|\|
				2659	(CUR == ':') \|\| (CUR == '_'))) {
				2660	if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
				2661	else loc[i] = CUR;
				2662	i++;
				2663
				2664	NEXT;
				2665	}
				2666
				2667	ret = xmlStrndup(loc, i);
				2668
				2669	return(ret);
				2670	}
				2671
				2672	/**
				2673	* docbParseName:
				2674	* @ctxt: an SGML parser context
				2675	*
				2676	* parse an SGML name, this routine is case sensistive.
				2677	*
				2678	* Returns the Name parsed or NULL
				2679	*/
				2680
				2681	static xmlChar *
				2682	docbParseName(docbParserCtxtPtr ctxt) {
				2683	xmlChar buf[DOCB_MAX_NAMELEN];
				2684	int len = 0;
				2685
				2686	GROW;
				2687	if (!IS_LETTER(CUR) && (CUR != '_')) {
				2688	return(NULL);
				2689	}
				2690
				2691	while ((IS_LETTER(CUR)) \|\| (IS_DIGIT(CUR)) \|\|
				2692	(CUR == '.') \|\| (CUR == '-') \|\|
				2693	(CUR == '_') \|\| (CUR == ':') \|\|
				2694	(IS_COMBINING(CUR)) \|\|
				2695	(IS_EXTENDER(CUR))) {
				2696	buf[len++] = CUR;
				2697	NEXT;
				2698	if (len >= DOCB_MAX_NAMELEN) {
				2699	xmlGenericError(xmlGenericErrorContext,
				2700	"docbParseName: reached DOCB_MAX_NAMELEN limit\n");
				2701	while ((IS_LETTER(CUR)) \|\| (IS_DIGIT(CUR)) \|\|
				2702	(CUR == '.') \|\| (CUR == '-') \|\|
				2703	(CUR == '_') \|\| (CUR == ':') \|\|
				2704	(IS_COMBINING(CUR)) \|\|
				2705	(IS_EXTENDER(CUR)))
				2706	NEXT;
				2707	break;
				2708	}
				2709	}
				2710	return(xmlStrndup(buf, len));
				2711	}
				2712
				2713	/**
				2714	* docbParseSGMLAttribute:
				2715	* @ctxt: an SGML parser context
				2716	* @stop: a char stop value
				2717	*
				2718	* parse an SGML attribute value till the stop (quote), if
				2719	* stop is 0 then it stops at the first space
				2720	*
				2721	* Returns the attribute parsed or NULL
				2722	*/
				2723
				2724	static xmlChar *
				2725	docbParseSGMLAttribute(docbParserCtxtPtr ctxt, const xmlChar stop) {
				2726	xmlChar *buffer = NULL;
				2727	int buffer_size = 0;
				2728	xmlChar *out = NULL;
				2729	xmlChar *name = NULL;
				2730
				2731	xmlChar *cur = NULL;
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2732	xmlEntityPtr xent;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2733	docbEntityDescPtr ent;
				2734
				2735	/*
				2736	* allocate a translation buffer.
				2737	*/
				2738	buffer_size = DOCB_PARSER_BIG_BUFFER_SIZE;
				2739	buffer = (xmlChar ) xmlMalloc(buffer_size sizeof(xmlChar));
				2740	if (buffer == NULL) {
				2741	perror("docbParseSGMLAttribute: malloc failed");
				2742	return(NULL);
				2743	}
				2744	out = buffer;
				2745
				2746	/*
				2747	* Ok loop until we reach one of the ending chars
				2748	*/
				2749	while ((CUR != 0) && (CUR != stop) && (CUR != '>')) {
				2750	if ((stop == 0) && (IS_BLANK(CUR))) break;
				2751	if (CUR == '&') {
				2752	if (NXT(1) == '#') {
				2753	unsigned int c;
				2754	int bits;
				2755
				2756	c = docbParseCharRef(ctxt);
				2757	if (c < 0x80)
				2758	{ *out++ = c; bits= -6; }
				2759	else if (c < 0x800)
				2760	{ *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				2761	else if (c < 0x10000)
				2762	{ *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				2763	else
				2764	{ *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				2765
				2766	for ( ; bits >= 0; bits-= 6) {
				2767	*out++ = ((c >> bits) & 0x3F) \| 0x80;
				2768	}
				2769	} else {
				2770	ent = docbParseEntityRef(ctxt, &name);
				2771	if (name == NULL) {
				2772	*out++ = '&';
				2773	if (out - buffer > buffer_size - 100) {
				2774	int indx = out - buffer;
				2775
				2776	growBuffer(buffer);
				2777	out = &buffer[indx];
				2778	}
				2779	} else if (ent == NULL) {
				2780	*out++ = '&';
				2781	cur = name;
				2782	while (*cur != 0) {
				2783	if (out - buffer > buffer_size - 100) {
				2784	int indx = out - buffer;
				2785
				2786	growBuffer(buffer);
				2787	out = &buffer[indx];
				2788	}
				2789	out++ = cur++;
				2790	}
				2791	xmlFree(name);
				2792	} else {
				2793	unsigned int c;
				2794	int bits;
				2795
				2796	if (out - buffer > buffer_size - 100) {
				2797	int indx = out - buffer;
				2798
				2799	growBuffer(buffer);
				2800	out = &buffer[indx];
				2801	}
				2802	c = (xmlChar)ent->value;
				2803	if (c < 0x80)
				2804	{ *out++ = c; bits= -6; }
				2805	else if (c < 0x800)
				2806	{ *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				2807	else if (c < 0x10000)
				2808	{ *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				2809	else
				2810	{ *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				2811
				2812	for ( ; bits >= 0; bits-= 6) {
				2813	*out++ = ((c >> bits) & 0x3F) \| 0x80;
				2814	}
				2815	xmlFree(name);
				2816	}
				2817	}
				2818	} else {
				2819	unsigned int c;
				2820	int bits;
				2821
				2822	if (out - buffer > buffer_size - 100) {
				2823	int indx = out - buffer;
				2824
				2825	growBuffer(buffer);
				2826	out = &buffer[indx];
				2827	}
				2828	c = CUR;
				2829	if (c < 0x80)
				2830	{ *out++ = c; bits= -6; }
				2831	else if (c < 0x800)
				2832	{ *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				2833	else if (c < 0x10000)
				2834	{ *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				2835	else
				2836	{ *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				2837
				2838	for ( ; bits >= 0; bits-= 6) {
				2839	*out++ = ((c >> bits) & 0x3F) \| 0x80;
				2840	}
				2841	NEXT;
				2842	}
				2843	}
				2844	*out++ = 0;
				2845	return(buffer);
				2846	}
				2847
				2848
				2849	/**
				2850	* docbParseEntityRef:
				2851	* @ctxt: an SGML parser context
				2852	* @str: location to store the entity name
				2853	*
				2854	* parse an SGML ENTITY references
				2855	*
				2856	* [68] EntityRef ::= '&' Name ';'
				2857	*
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2858	* Returns the associated xmlEntityPtr if found, or NULL otherwise,
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2859	* if non-NULL *str will have to be freed by the caller.
				2860	*/
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2861	static xmlEntityPtr
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2862	docbParseEntityRef(docbParserCtxtPtr ctxt, xmlChar **str) {
				2863	xmlChar *name;
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2864	xmlEntityPtr ent = NULL;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2865	*str = NULL;
				2866
				2867	if (CUR == '&') {
				2868	NEXT;
				2869	name = docbParseName(ctxt);
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2870	if (name == NULL) {
				2871	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2872	ctxt->sax->error(ctxt->userData,
				2873	"docbParseEntityRef: no name\n");
				2874	ctxt->wellFormed = 0;
				2875	} else {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2876	GROW;
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2877	if (CUR == ';') {
				2878	*str = name;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2879
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2880	/*
				2881	* Ask first SAX for entity resolution, otherwise try the
				2882	* predefined set.
				2883	*/
				2884	if (ctxt->sax != NULL) {
				2885	if (ctxt->sax->getEntity != NULL)
				2886	ent = ctxt->sax->getEntity(ctxt->userData, name);
				2887	if (ent == NULL)
				2888	ent = xmlGetPredefinedEntity(name);
				2889	}
				2890	NEXT;
				2891	} else {
				2892	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2893	ctxt->sax->error(ctxt->userData,
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2894	"docbParseEntityRef: expecting ';'\n");
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2895	*str = name;
				2896	}
				2897	}
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2898	}
				2899	return(ent);
				2900	}
				2901
				2902	/**
				2903	* docbParseAttValue:
				2904	* @ctxt: an SGML parser context
				2905	*
				2906	* parse a value for an attribute
				2907	* Note: the parser won't do substitution of entities here, this
				2908	* will be handled later in xmlStringGetNodeList, unless it was
				2909	* asked for ctxt->replaceEntities != 0
				2910	*
				2911	* Returns the AttValue parsed or NULL.
				2912	*/
				2913
				2914	static xmlChar *
				2915	docbParseAttValue(docbParserCtxtPtr ctxt) {
				2916	xmlChar *ret = NULL;
				2917
				2918	if (CUR == '"') {
				2919	NEXT;
				2920	ret = docbParseSGMLAttribute(ctxt, '"');
				2921	if (CUR != '"') {
				2922	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2923	ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
				2924	ctxt->wellFormed = 0;
				2925	} else
				2926	NEXT;
				2927	} else if (CUR == '\'') {
				2928	NEXT;
				2929	ret = docbParseSGMLAttribute(ctxt, '\'');
				2930	if (CUR != '\'') {
				2931	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2932	ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
				2933	ctxt->wellFormed = 0;
				2934	} else
				2935	NEXT;
				2936	} else {
				2937	/*
				2938	* That's an SGMLism, the attribute value may not be quoted
				2939	*/
				2940	ret = docbParseSGMLAttribute(ctxt, 0);
				2941	if (ret == NULL) {
				2942	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2943	ctxt->sax->error(ctxt->userData, "AttValue: no value found\n");
				2944	ctxt->wellFormed = 0;
				2945	}
				2946	}
				2947	return(ret);
				2948	}
				2949
				2950	/**
				2951	* docbParseSystemLiteral:
				2952	* @ctxt: an SGML parser context
				2953	*
				2954	* parse an SGML Literal
				2955	*
				2956	* [11] SystemLiteral ::= ('"' [^"]* '"') \| ("'" [^']* "'")
				2957	*
				2958	* Returns the SystemLiteral parsed or NULL
				2959	*/
				2960
				2961	static xmlChar *
				2962	docbParseSystemLiteral(docbParserCtxtPtr ctxt) {
				2963	const xmlChar *q;
				2964	xmlChar *ret = NULL;
				2965
				2966	if (CUR == '"') {
				2967	NEXT;
				2968	q = CUR_PTR;
				2969	while ((IS_CHAR(CUR)) && (CUR != '"'))
				2970	NEXT;
				2971	if (!IS_CHAR(CUR)) {
				2972	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2973	ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
				2974	ctxt->wellFormed = 0;
				2975	} else {
				2976	ret = xmlStrndup(q, CUR_PTR - q);
				2977	NEXT;
				2978	}
				2979	} else if (CUR == '\'') {
				2980	NEXT;
				2981	q = CUR_PTR;
				2982	while ((IS_CHAR(CUR)) && (CUR != '\''))
				2983	NEXT;
				2984	if (!IS_CHAR(CUR)) {
				2985	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2986	ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
				2987	ctxt->wellFormed = 0;
				2988	} else {
				2989	ret = xmlStrndup(q, CUR_PTR - q);
				2990	NEXT;
				2991	}
				2992	} else {
				2993	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2994	ctxt->sax->error(ctxt->userData,
				2995	"SystemLiteral \" or ' expected\n");
				2996	ctxt->wellFormed = 0;
				2997	}
				2998
				2999	return(ret);
				3000	}
				3001
				3002	/**
				3003	* docbParsePubidLiteral:
				3004	* @ctxt: an SGML parser context
				3005	*
				3006	* parse an SGML public literal
				3007	*
				3008	* [12] PubidLiteral ::= '"' PubidChar* '"' \| "'" (PubidChar - "'")* "'"
				3009	*
				3010	* Returns the PubidLiteral parsed or NULL.
				3011	*/
				3012
				3013	static xmlChar *
				3014	docbParsePubidLiteral(docbParserCtxtPtr ctxt) {
				3015	const xmlChar *q;
				3016	xmlChar *ret = NULL;
				3017	/*
				3018	* Name ::= (Letter \| '_') (NameChar)*
				3019	*/
				3020	if (CUR == '"') {
				3021	NEXT;
				3022	q = CUR_PTR;
				3023	while (IS_PUBIDCHAR(CUR)) NEXT;
				3024	if (CUR != '"') {
				3025	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3026	ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
				3027	ctxt->wellFormed = 0;
				3028	} else {
				3029	ret = xmlStrndup(q, CUR_PTR - q);
				3030	NEXT;
				3031	}
				3032	} else if (CUR == '\'') {
				3033	NEXT;
				3034	q = CUR_PTR;
				3035	while ((IS_LETTER(CUR)) && (CUR != '\''))
				3036	NEXT;
				3037	if (!IS_LETTER(CUR)) {
				3038	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3039	ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
				3040	ctxt->wellFormed = 0;
				3041	} else {
				3042	ret = xmlStrndup(q, CUR_PTR - q);
				3043	NEXT;
				3044	}
				3045	} else {
				3046	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3047	ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
				3048	ctxt->wellFormed = 0;
				3049	}
				3050
				3051	return(ret);
				3052	}
				3053
				3054	/**
				3055	* docbParseCharData:
				3056	* @ctxt: an SGML parser context
				3057	* @cdata: int indicating whether we are within a CDATA section
				3058	*
				3059	* parse a CharData section.
				3060	* if we are within a CDATA section ']]>' marks an end of section.
				3061	*
				3062	* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
				3063	*/
				3064
				3065	static void
				3066	docbParseCharData(docbParserCtxtPtr ctxt) {
				3067	xmlChar buf[DOCB_PARSER_BIG_BUFFER_SIZE + 5];
				3068	int nbchar = 0;
				3069	int cur, l;
				3070
				3071	SHRINK;
				3072	cur = CUR_CHAR(l);
				3073	while (((cur != '<') \|\| (ctxt->token == '<')) &&
				3074	((cur != '&') \|\| (ctxt->token == '&')) &&
				3075	(IS_CHAR(cur))) {
				3076	COPY_BUF(l,buf,nbchar,cur);
				3077	if (nbchar >= DOCB_PARSER_BIG_BUFFER_SIZE) {
				3078	/*
				3079	* Ok the segment is to be consumed as chars.
				3080	*/
				3081	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
				3082	if (areBlanks(ctxt, buf, nbchar)) {
				3083	if (ctxt->sax->ignorableWhitespace != NULL)
				3084	ctxt->sax->ignorableWhitespace(ctxt->userData,
				3085	buf, nbchar);
				3086	} else {
				3087	docbCheckParagraph(ctxt);
				3088	if (ctxt->sax->characters != NULL)
				3089	ctxt->sax->characters(ctxt->userData, buf, nbchar);
				3090	}
				3091	}
				3092	nbchar = 0;
				3093	}
				3094	NEXTL(l);
				3095	cur = CUR_CHAR(l);
				3096	}
				3097	if (nbchar != 0) {
				3098	/*
				3099	* Ok the segment is to be consumed as chars.
				3100	*/
				3101	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
				3102	if (areBlanks(ctxt, buf, nbchar)) {
				3103	if (ctxt->sax->ignorableWhitespace != NULL)
				3104	ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
				3105	} else {
				3106	docbCheckParagraph(ctxt);
				3107	if (ctxt->sax->characters != NULL)
				3108	ctxt->sax->characters(ctxt->userData, buf, nbchar);
				3109	}
				3110	}
				3111	}
				3112	}
				3113
				3114	/**
				3115	* docbParseExternalID:
				3116	* @ctxt: an SGML parser context
				3117	* @publicID: a xmlChar** receiving PubidLiteral
				3118	*
				3119	* Parse an External ID or a Public ID
				3120	*
				3121	* Returns the function returns SystemLiteral and in the second
				3122	* case publicID receives PubidLiteral,
				3123	* it is possible to return NULL and have publicID set.
				3124	*/
				3125
				3126	static xmlChar *
				3127	docbParseExternalID(docbParserCtxtPtr ctxt, xmlChar **publicID) {
				3128	xmlChar *URI = NULL;
				3129
				3130	if ((UPPER == 'S') && (UPP(1) == 'Y') &&
				3131	(UPP(2) == 'S') && (UPP(3) == 'T') &&
				3132	(UPP(4) == 'E') && (UPP(5) == 'M')) {
				3133	SKIP(6);
				3134	if (!IS_BLANK(CUR)) {
				3135	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3136	ctxt->sax->error(ctxt->userData,
				3137	"Space required after 'SYSTEM'\n");
				3138	ctxt->wellFormed = 0;
				3139	}
				3140	SKIP_BLANKS;
				3141	URI = docbParseSystemLiteral(ctxt);
				3142	if (URI == NULL) {
				3143	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3144	ctxt->sax->error(ctxt->userData,
				3145	"docbParseExternalID: SYSTEM, no URI\n");
				3146	ctxt->wellFormed = 0;
				3147	}
				3148	} else if ((UPPER == 'P') && (UPP(1) == 'U') &&
				3149	(UPP(2) == 'B') && (UPP(3) == 'L') &&
				3150	(UPP(4) == 'I') && (UPP(5) == 'C')) {
				3151	SKIP(6);
				3152	if (!IS_BLANK(CUR)) {
				3153	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3154	ctxt->sax->error(ctxt->userData,
				3155	"Space required after 'PUBLIC'\n");
				3156	ctxt->wellFormed = 0;
				3157	}
				3158	SKIP_BLANKS;
				3159	*publicID = docbParsePubidLiteral(ctxt);
				3160	if (*publicID == NULL) {
				3161	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3162	ctxt->sax->error(ctxt->userData,
				3163	"docbParseExternalID: PUBLIC, no Public Identifier\n");
				3164	ctxt->wellFormed = 0;
				3165	}
				3166	SKIP_BLANKS;
				3167	if ((CUR == '"') \|\| (CUR == '\'')) {
				3168	URI = docbParseSystemLiteral(ctxt);
				3169	}
				3170	}
				3171	return(URI);
				3172	}
				3173
				3174	/**
				3175	* docbParseComment:
				3176	* @ctxt: an SGML parser context
				3177	*
				3178	* Parse an XML (SGML) comment <!-- .... -->
				3179	*
				3180	* [15] Comment ::= '<!--' ((Char - '-') \| ('-' (Char - '-')))* '-->'
				3181	*/
				3182	static void
				3183	docbParseComment(docbParserCtxtPtr ctxt) {
				3184	xmlChar *buf = NULL;
				3185	int len;
				3186	int size = DOCB_PARSER_BUFFER_SIZE;
				3187	int q, ql;
				3188	int r, rl;
				3189	int cur, l;
				3190	xmlParserInputState state;
				3191
				3192	/*
				3193	* Check that there is a comment right here.
				3194	*/
				3195	if ((RAW != '<') \|\| (NXT(1) != '!') \|\|
				3196	(NXT(2) != '-') \|\| (NXT(3) != '-')) return;
				3197
				3198	state = ctxt->instate;
				3199	ctxt->instate = XML_PARSER_COMMENT;
				3200	SHRINK;
				3201	SKIP(4);
				3202	buf = (xmlChar ) xmlMalloc(size sizeof(xmlChar));
				3203	if (buf == NULL) {
				3204	xmlGenericError(xmlGenericErrorContext,
				3205	"malloc of %d byte failed\n", size);
				3206	ctxt->instate = state;
				3207	return;
				3208	}
				3209	q = CUR_CHAR(ql);
				3210	NEXTL(ql);
				3211	r = CUR_CHAR(rl);
				3212	NEXTL(rl);
				3213	cur = CUR_CHAR(l);
				3214	len = 0;
				3215	while (IS_CHAR(cur) &&
				3216	((cur != '>') \|\|
				3217	(r != '-') \|\| (q != '-'))) {
				3218	if (len + 5 >= size) {
				3219	size *= 2;
				3220	buf = (xmlChar ) xmlRealloc(buf, size sizeof(xmlChar));
				3221	if (buf == NULL) {
				3222	xmlGenericError(xmlGenericErrorContext,
				3223	"realloc of %d byte failed\n", size);
				3224	ctxt->instate = state;
				3225	return;
				3226	}
				3227	}
				3228	COPY_BUF(ql,buf,len,q);
				3229	q = r;
				3230	ql = rl;
				3231	r = cur;
				3232	rl = l;
				3233	NEXTL(l);
				3234	cur = CUR_CHAR(l);
				3235	if (cur == 0) {
				3236	SHRINK;
				3237	GROW;
				3238	cur = CUR_CHAR(l);
				3239	}
				3240	}
				3241	buf[len] = 0;
				3242	if (!IS_CHAR(cur)) {
				3243	ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
				3244	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3245	ctxt->sax->error(ctxt->userData,
				3246	"Comment not terminated \n<!--%.50s\n", buf);
				3247	ctxt->wellFormed = 0;
				3248	xmlFree(buf);
				3249	} else {
				3250	NEXT;
				3251	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
				3252	(!ctxt->disableSAX))
				3253	ctxt->sax->comment(ctxt->userData, buf);
				3254	xmlFree(buf);
				3255	}
				3256	ctxt->instate = state;
				3257	}
				3258
				3259	/**
				3260	* docbParseCharRef:
				3261	* @ctxt: an SGML parser context
				3262	*
				3263	* parse Reference declarations
				3264	*
				3265	* [66] CharRef ::= '&#' [0-9]+ ';' \|
				3266	* '&#x' [0-9a-fA-F]+ ';'
				3267	*
				3268	* Returns the value parsed (as an int)
				3269	*/
				3270	static int
				3271	docbParseCharRef(docbParserCtxtPtr ctxt) {
				3272	int val = 0;
				3273
				3274	if ((CUR == '&') && (NXT(1) == '#') &&
				3275	(NXT(2) == 'x')) {
				3276	SKIP(3);
				3277	while (CUR != ';') {
				3278	if ((CUR >= '0') && (CUR <= '9'))
				3279	val = val * 16 + (CUR - '0');
				3280	else if ((CUR >= 'a') && (CUR <= 'f'))
				3281	val = val * 16 + (CUR - 'a') + 10;
				3282	else if ((CUR >= 'A') && (CUR <= 'F'))
				3283	val = val * 16 + (CUR - 'A') + 10;
				3284	else {
				3285	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3286	ctxt->sax->error(ctxt->userData,
				3287	"docbParseCharRef: invalid hexadecimal value\n");
				3288	ctxt->wellFormed = 0;
				3289	val = 0;
				3290	break;
				3291	}
				3292	NEXT;
				3293	}
				3294	if (CUR == ';')
				3295	NEXT;
				3296	} else if ((CUR == '&') && (NXT(1) == '#')) {
				3297	SKIP(2);
				3298	while (CUR != ';') {
				3299	if ((CUR >= '0') && (CUR <= '9'))
				3300	val = val * 10 + (CUR - '0');
				3301	else {
				3302	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3303	ctxt->sax->error(ctxt->userData,
				3304	"docbParseCharRef: invalid decimal value\n");
				3305	ctxt->wellFormed = 0;
				3306	val = 0;
				3307	break;
				3308	}
				3309	NEXT;
				3310	}
				3311	if (CUR == ';')
				3312	NEXT;
				3313	} else {
				3314	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3315	ctxt->sax->error(ctxt->userData, "docbParseCharRef: invalid value\n");
				3316	ctxt->wellFormed = 0;
				3317	}
				3318	/*
				3319	* Check the value IS_CHAR ...
				3320	*/
				3321	if (IS_CHAR(val)) {
				3322	return(val);
				3323	} else {
				3324	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3325	ctxt->sax->error(ctxt->userData, "docbParseCharRef: invalid xmlChar value %d\n",
				3326	val);
				3327	ctxt->wellFormed = 0;
				3328	}
				3329	return(0);
				3330	}
				3331
				3332
				3333	/**
				3334	* docbParseDocTypeDecl :
				3335	* @ctxt: an SGML parser context
				3336	*
				3337	* parse a DOCTYPE declaration
				3338	*
				3339	* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
				3340	* ('[' (markupdecl \| PEReference \| S)* ']' S?)? '>'
				3341	*/
				3342
				3343	static void
				3344	docbParseDocTypeDecl(docbParserCtxtPtr ctxt) {
				3345	xmlChar *name;
				3346	xmlChar *ExternalID = NULL;
				3347	xmlChar *URI = NULL;
				3348
				3349	/*
				3350	* We know that '<!DOCTYPE' has been detected.
				3351	*/
				3352	SKIP(9);
				3353
				3354	SKIP_BLANKS;
				3355
				3356	/*
				3357	* Parse the DOCTYPE name.
				3358	*/
				3359	name = docbParseName(ctxt);
				3360	if (name == NULL) {
				3361	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3362	ctxt->sax->error(ctxt->userData, "docbParseDocTypeDecl : no DOCTYPE name !\n");
				3363	ctxt->wellFormed = 0;
				3364	}
				3365	/*
				3366	* Check that upper(name) == "SGML" !!!!!!!!!!!!!
				3367	*/
				3368
				3369	SKIP_BLANKS;
				3370
				3371	/*
				3372	* Check for SystemID and ExternalID
				3373	*/
				3374	URI = docbParseExternalID(ctxt, &ExternalID);
				3375	SKIP_BLANKS;
				3376
				3377	/*
				3378	* Create or update the document accordingly to the DOCTYPE
				3379	*/
				3380	if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
				3381	(!ctxt->disableSAX))
				3382	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
				3383
				3384	/*
				3385	* Is there any internal subset declarations ?
				3386	* they are handled separately in docbParseInternalSubset()
				3387	*/
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	3388	if (RAW != '[') {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3389	return;
				3390
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	3391	/*
				3392	* We should be at the end of the DOCTYPE declaration.
				3393	*/
				3394	if (CUR != '>') {
				3395	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3396	ctxt->sax->error(ctxt->userData,
				3397	"DOCTYPE unproperly terminated\n");
				3398	ctxt->wellFormed = 0;
				3399	/* We shouldn't try to resynchronize ... */
				3400	}
				3401	NEXT;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3402	}
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3403
				3404	/*
				3405	* Cleanup, since we don't use all those identifiers
				3406	*/
				3407	if (URI != NULL) xmlFree(URI);
				3408	if (ExternalID != NULL) xmlFree(ExternalID);
				3409	if (name != NULL) xmlFree(name);
				3410	}
				3411
				3412	/**
				3413	* docbParseAttribute:
				3414	* @ctxt: an SGML parser context
				3415	* @value: a xmlChar ** used to store the value of the attribute
				3416	*
				3417	* parse an attribute
				3418	*
				3419	* [41] Attribute ::= Name Eq AttValue
				3420	*
				3421	* [25] Eq ::= S? '=' S?
				3422	*
				3423	* With namespace:
				3424	*
				3425	* [NS 11] Attribute ::= QName Eq AttValue
				3426	*
				3427	* Also the case QName == xmlns:??? is handled independently as a namespace
				3428	* definition.
				3429	*
				3430	* Returns the attribute name, and the value in *value.
				3431	*/
				3432
				3433	static xmlChar *
				3434	docbParseAttribute(docbParserCtxtPtr ctxt, xmlChar **value) {
				3435	xmlChar name, val = NULL;
				3436
				3437	*value = NULL;
				3438	name = docbParseName(ctxt);
				3439	if (name == NULL) {
				3440	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3441	ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
				3442	ctxt->wellFormed = 0;
				3443	return(NULL);
				3444	}
				3445
				3446	/*
				3447	* read the value
				3448	*/
				3449	SKIP_BLANKS;
				3450	if (CUR == '=') {
				3451	NEXT;
				3452	SKIP_BLANKS;
				3453	val = docbParseAttValue(ctxt);
				3454	/******
				3455	} else {
				3456	* TODO : some attribute must have values, some may not
				3457	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3458	ctxt->sax->warning(ctxt->userData,
				3459	"No value for attribute %s\n", name); */
				3460	}
				3461
				3462	*value = val;
				3463	return(name);
				3464	}
				3465
				3466	/**
				3467	* docbCheckEncoding:
				3468	* @ctxt: an SGML parser context
				3469	* @attvalue: the attribute value
				3470	*
				3471	* Checks an http-equiv attribute from a Meta tag to detect
				3472	* the encoding
				3473	* If a new encoding is detected the parser is switched to decode
				3474	* it and pass UTF8
				3475	*/
				3476	static void
				3477	docbCheckEncoding(docbParserCtxtPtr ctxt, const xmlChar *attvalue) {
				3478	const xmlChar *encoding;
				3479
				3480	if ((ctxt == NULL) \|\| (attvalue == NULL))
				3481	return;
				3482
				3483	encoding = xmlStrstr(attvalue, BAD_CAST"charset=");
				3484	if (encoding == NULL)
				3485	encoding = xmlStrstr(attvalue, BAD_CAST"Charset=");
				3486	if (encoding == NULL)
				3487	encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET=");
				3488	if (encoding != NULL) {
				3489	encoding += 8;
				3490	} else {
				3491	encoding = xmlStrstr(attvalue, BAD_CAST"charset =");
				3492	if (encoding == NULL)
				3493	encoding = xmlStrstr(attvalue, BAD_CAST"Charset =");
				3494	if (encoding == NULL)
				3495	encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET =");
				3496	if (encoding != NULL)
				3497	encoding += 9;
				3498	}
				3499	/*
				3500	* Restricted from 2.3.5 */
				3501	if (encoding != NULL) {
				3502	xmlCharEncoding enc;
				3503
				3504	if (ctxt->input->encoding != NULL)
				3505	xmlFree((xmlChar *) ctxt->input->encoding);
				3506	ctxt->input->encoding = encoding;
				3507
				3508	enc = xmlParseCharEncoding((const char *) encoding);
				3509	if (enc == XML_CHAR_ENCODING_8859_1) {
				3510	ctxt->charset = XML_CHAR_ENCODING_8859_1;
				3511	} else if (enc != XML_CHAR_ENCODING_UTF8) {
				3512	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3513	ctxt->sax->error(ctxt->userData,
				3514	"Unsupported encoding %s\n", encoding);
				3515	/* xmlFree(encoding); */
				3516	ctxt->wellFormed = 0;
				3517	ctxt->disableSAX = 1;
				3518	ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
				3519	}
				3520	}
				3521	}
				3522
				3523	/**
				3524	* docbCheckMeta:
				3525	* @ctxt: an SGML parser context
				3526	* @atts: the attributes values
				3527	*
				3528	* Checks an attributes from a Meta tag
				3529	*/
				3530	static void
				3531	docbCheckMeta(docbParserCtxtPtr ctxt, const xmlChar **atts) {
				3532	int i;
				3533	const xmlChar att, value;
				3534	int http = 0;
				3535	const xmlChar *content = NULL;
				3536
				3537	if ((ctxt == NULL) \|\| (atts == NULL))
				3538	return;
				3539
				3540	i = 0;
				3541	att = atts[i++];
				3542	while (att != NULL) {
				3543	value = atts[i++];
				3544	if ((value != NULL) &&
				3545	((xmlStrEqual(att, BAD_CAST"http-equiv")) \|\|
				3546	(xmlStrEqual(att, BAD_CAST"Http-Equiv")) \|\|
				3547	(xmlStrEqual(att, BAD_CAST"HTTP-EQUIV"))) &&
				3548	((xmlStrEqual(value, BAD_CAST"Content-Type")) \|\|
				3549	(xmlStrEqual(value, BAD_CAST"content-type")) \|\|
				3550	(xmlStrEqual(value, BAD_CAST"CONTENT-TYPE"))))
				3551	http = 1;
				3552	else if ((value != NULL) &&
				3553	((xmlStrEqual(att, BAD_CAST"content")) \|\|
				3554	(xmlStrEqual(att, BAD_CAST"Content")) \|\|
				3555	(xmlStrEqual(att, BAD_CAST"CONTENT"))))
				3556	content = value;
				3557	att = atts[i++];
				3558	}
				3559	if ((http) && (content != NULL))
				3560	docbCheckEncoding(ctxt, content);
				3561
				3562	}
				3563
				3564	/**
				3565	* docbParseStartTag:
				3566	* @ctxt: an SGML parser context
				3567	*
				3568	* parse a start of tag either for rule element or
				3569	* EmptyElement. In both case we don't parse the tag closing chars.
				3570	*
				3571	* [40] STag ::= '<' Name (S Attribute)* S? '>'
				3572	*
				3573	* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
				3574	*
				3575	* With namespace:
				3576	*
				3577	* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
				3578	*
				3579	* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
				3580	*
				3581	*/
				3582
				3583	static void
				3584	docbParseStartTag(docbParserCtxtPtr ctxt) {
				3585	xmlChar *name;
				3586	xmlChar *attname;
				3587	xmlChar *attvalue;
				3588	const xmlChar **atts = NULL;
				3589	int nbatts = 0;
				3590	int maxatts = 0;
				3591	int meta = 0;
				3592	int i;
				3593
				3594	if (CUR != '<') return;
				3595	NEXT;
				3596
				3597	GROW;
				3598	name = docbParseSGMLName(ctxt);
				3599	if (name == NULL) {
				3600	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3601	ctxt->sax->error(ctxt->userData,
				3602	"docbParseStartTag: invalid element name\n");
				3603	ctxt->wellFormed = 0;
				3604	return;
				3605	}
				3606	if (xmlStrEqual(name, BAD_CAST"meta"))
				3607	meta = 1;
				3608
				3609	/*
				3610	* Check for auto-closure of SGML elements.
				3611	*/
				3612	docbAutoClose(ctxt, name);
				3613
				3614	/*
				3615	* Now parse the attributes, it ends up with the ending
				3616	*
				3617	* (S Attribute)* S?
				3618	*/
				3619	SKIP_BLANKS;
				3620	while ((IS_CHAR(CUR)) &&
				3621	(CUR != '>') &&
				3622	((CUR != '/') \|\| (NXT(1) != '>'))) {
				3623	long cons = ctxt->nbChars;
				3624
				3625	GROW;
				3626	attname = docbParseAttribute(ctxt, &attvalue);
				3627	if (attname != NULL) {
				3628
				3629	/*
				3630	* Well formedness requires at most one declaration of an attribute
				3631	*/
				3632	for (i = 0; i < nbatts;i += 2) {
				3633	if (xmlStrEqual(atts[i], attname)) {
				3634	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3635	ctxt->sax->error(ctxt->userData,
				3636	"Attribute %s redefined\n",
				3637	attname);
				3638	ctxt->wellFormed = 0;
				3639	xmlFree(attname);
				3640	if (attvalue != NULL)
				3641	xmlFree(attvalue);
				3642	goto failed;
				3643	}
				3644	}
				3645
				3646	/*
				3647	* Add the pair to atts
				3648	*/
				3649	if (atts == NULL) {
				3650	maxatts = 10;
				3651	atts = (const xmlChar *) xmlMalloc(maxatts sizeof(xmlChar *));
				3652	if (atts == NULL) {
				3653	xmlGenericError(xmlGenericErrorContext,
				3654	"malloc of %ld byte failed\n",
				3655	maxatts * (long)sizeof(xmlChar *));
				3656	if (name != NULL) xmlFree(name);
				3657	return;
				3658	}
				3659	} else if (nbatts + 4 > maxatts) {
				3660	maxatts *= 2;
				3661	atts = (const xmlChar *) xmlRealloc(atts, maxatts sizeof(xmlChar *));
				3662	if (atts == NULL) {
				3663	xmlGenericError(xmlGenericErrorContext,
				3664	"realloc of %ld byte failed\n",
				3665	maxatts * (long)sizeof(xmlChar *));
				3666	if (name != NULL) xmlFree(name);
				3667	return;
				3668	}
				3669	}
				3670	atts[nbatts++] = attname;
				3671	atts[nbatts++] = attvalue;
				3672	atts[nbatts] = NULL;
				3673	atts[nbatts + 1] = NULL;
				3674	}
				3675
				3676	failed:
				3677	SKIP_BLANKS;
				3678	if (cons == ctxt->nbChars) {
				3679	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3680	ctxt->sax->error(ctxt->userData,
				3681	"docbParseStartTag: problem parsing attributes\n");
				3682	ctxt->wellFormed = 0;
				3683	break;
				3684	}
				3685	}
				3686
				3687	/*
				3688	* Handle specific association to the META tag
				3689	*/
				3690	if (meta)
				3691	docbCheckMeta(ctxt, atts);
				3692
				3693	/*
				3694	* SAX: Start of Element !
				3695	*/
				3696	docbnamePush(ctxt, xmlStrdup(name));
				3697	#ifdef DEBUG
				3698	xmlGenericError(xmlGenericErrorContext,"Start of element %s: pushed %s\n", name, ctxt->name);
				3699	#endif
				3700	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
				3701	ctxt->sax->startElement(ctxt->userData, name, atts);
				3702
				3703	if (atts != NULL) {
				3704	for (i = 0;i < nbatts;i++) {
				3705	if (atts[i] != NULL)
				3706	xmlFree((xmlChar *) atts[i]);
				3707	}
				3708	xmlFree((void *) atts);
				3709	}
				3710	if (name != NULL) xmlFree(name);
				3711	}
				3712
				3713	/**
				3714	* docbParseEndTag:
				3715	* @ctxt: an SGML parser context
				3716	*
				3717	* parse an end of tag
				3718	*
				3719	* [42] ETag ::= '</' Name S? '>'
				3720	*
				3721	* With namespace
				3722	*
				3723	* [NS 9] ETag ::= '</' QName S? '>'
				3724	*/
				3725
				3726	static void
				3727	docbParseEndTag(docbParserCtxtPtr ctxt) {
				3728	xmlChar *name;
				3729	xmlChar *oldname;
				3730	int i;
				3731
				3732	if ((CUR != '<') \|\| (NXT(1) != '/')) {
				3733	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3734	ctxt->sax->error(ctxt->userData, "docbParseEndTag: '</' not found\n");
				3735	ctxt->wellFormed = 0;
				3736	return;
				3737	}
				3738	SKIP(2);
				3739
				3740	name = docbParseSGMLName(ctxt);
				3741	if (name == NULL) {
				3742	if (CUR == '>') {
				3743	NEXT;
				3744	oldname = docbnamePop(ctxt);
				3745	if (oldname != NULL) {
				3746	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				3747	ctxt->sax->endElement(ctxt->userData, name);
				3748	#ifdef DEBUG
				3749	xmlGenericError(xmlGenericErrorContext,"End of tag </>: popping out %s\n", oldname);
				3750	#endif
				3751	xmlFree(oldname);
				3752	#ifdef DEBUG
				3753	} else {
				3754	xmlGenericError(xmlGenericErrorContext,"End of tag </>: stack empty !!!\n");
				3755	#endif
				3756	}
				3757	return;
				3758	} else
				3759	return;
				3760	}
				3761
				3762	/*
				3763	* We should definitely be at the ending "S? '>'" part
				3764	*/
				3765	SKIP_BLANKS;
				3766	if ((!IS_CHAR(CUR)) \|\| (CUR != '>')) {
				3767	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3768	ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
				3769	ctxt->wellFormed = 0;
				3770	} else
				3771	NEXT;
				3772
				3773	/*
				3774	* If the name read is not one of the element in the parsing stack
				3775	* then return, it's just an error.
				3776	*/
				3777	for (i = (ctxt->nameNr - 1);i >= 0;i--) {
				3778	if (xmlStrEqual(name, ctxt->nameTab[i])) break;
				3779	}
				3780	if (i < 0) {
				3781	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3782	ctxt->sax->error(ctxt->userData,
				3783	"Unexpected end tag : %s\n", name);
				3784	xmlFree(name);
				3785	ctxt->wellFormed = 0;
				3786	return;
				3787	}
				3788
				3789
				3790	/*
				3791	* Check for auto-closure of SGML elements.
				3792	*/
				3793
				3794	docbAutoCloseOnClose(ctxt, name);
				3795
				3796	/*
				3797	* Well formedness constraints, opening and closing must match.
				3798	* With the exception that the autoclose may have popped stuff out
				3799	* of the stack.
				3800	*/
				3801	if (((name[0] != '/') \|\| (name[1] != 0)) &&
				3802	(!xmlStrEqual(name, ctxt->name))) {
				3803	#ifdef DEBUG
				3804	xmlGenericError(xmlGenericErrorContext,"End of tag %s: expecting %s\n", name, ctxt->name);
				3805	#endif
				3806	if ((ctxt->name != NULL) &&
				3807	(!xmlStrEqual(ctxt->name, name))) {
				3808	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3809	ctxt->sax->error(ctxt->userData,
				3810	"Opening and ending tag mismatch: %s and %s\n",
				3811	name, ctxt->name);
				3812	ctxt->wellFormed = 0;
				3813	}
				3814	}
				3815
				3816	/*
				3817	* SAX: End of Tag
				3818	*/
				3819	oldname = ctxt->name;
				3820	if (((name[0] == '/') && (name[1] == 0)) \|\|
				3821	((oldname != NULL) && (xmlStrEqual(oldname, name)))) {
				3822	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				3823	ctxt->sax->endElement(ctxt->userData, name);
				3824	oldname = docbnamePop(ctxt);
				3825	if (oldname != NULL) {
				3826	#ifdef DEBUG
				3827	xmlGenericError(xmlGenericErrorContext,"End of tag %s: popping out %s\n", name, oldname);
				3828	#endif
				3829	xmlFree(oldname);
				3830	#ifdef DEBUG
				3831	} else {
				3832	xmlGenericError(xmlGenericErrorContext,"End of tag %s: stack empty !!!\n", name);
				3833	#endif
				3834	}
				3835	}
				3836
				3837	if (name != NULL)
				3838	xmlFree(name);
				3839
				3840	return;
				3841	}
				3842
				3843
				3844	/**
				3845	* docbParseReference:
				3846	* @ctxt: an SGML parser context
				3847	*
				3848	* parse and handle entity references in content,
				3849	* this will end-up in a call to character() since this is either a
				3850	* CharRef, or a predefined entity.
				3851	*/
				3852	static void
				3853	docbParseReference(docbParserCtxtPtr ctxt) {
				3854	docbEntityDescPtr ent;
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	3855	xmlEntityPtr xent;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3856	xmlChar out[6];
				3857	xmlChar *name;
				3858	if (CUR != '&') return;
				3859
				3860	if (NXT(1) == '#') {
				3861	unsigned int c;
				3862	int bits, i = 0;
				3863
				3864	c = docbParseCharRef(ctxt);
				3865	if (c < 0x80) { out[i++]= c; bits= -6; }
				3866	else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				3867	else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				3868	else { out[i++]=((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				3869
				3870	for ( ; bits >= 0; bits-= 6) {
				3871	out[i++]= ((c >> bits) & 0x3F) \| 0x80;
				3872	}
				3873	out[i] = 0;
				3874
				3875	docbCheckParagraph(ctxt);
				3876	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
				3877	ctxt->sax->characters(ctxt->userData, out, i);
				3878	} else {
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	3879	/*
				3880	* Lookup the entity in the table.
				3881	*/
				3882	xent = docbParseEntityRef(ctxt, &name);
				3883	if (xent != NULL) {
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame^]	3884	if (((ctxt->replaceEntities) \|\| (ctxt->loadsubset)) &&
				3885	((xent->children == NULL) &&
				3886	(xent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))) {
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	3887	/*
				3888	* we really need to fetch and parse the external entity
				3889	*/
				3890	int parse;
				3891	xmlNodePtr children = NULL;
				3892
				3893	parse = docbParseCtxtExternalEntity(ctxt,
				3894	xent->SystemID, xent->ExternalID, &children);
				3895	xmlAddChildList((xmlNodePtr) xent, children);
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame^]	3896	}
				3897	if (ctxt->replaceEntities) {
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	3898	if ((ctxt->node != NULL) && (xent->children != NULL)) {
				3899	/*
				3900	* Seems we are generating the DOM content, do
				3901	* a simple tree copy
				3902	*/
				3903	xmlNodePtr new;
				3904	new = xmlCopyNodeList(xent->children);
				3905
				3906	xmlAddChildList(ctxt->node, new);
				3907	/*
				3908	* This is to avoid a nasty side effect, see
				3909	* characters() in SAX.c
				3910	*/
				3911	ctxt->nodemem = 0;
				3912	ctxt->nodelen = 0;
				3913	}
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame^]	3914	} else {
				3915	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
				3916	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
				3917	/*
				3918	* Create a node.
				3919	*/
				3920	ctxt->sax->reference(ctxt->userData, xent->name);
				3921	}
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	3922	}
				3923	} else if (name != NULL) {
				3924	ent = docbEntityLookup(name);
				3925	if ((ent == NULL) \|\| (ent->value <= 0)) {
				3926	docbCheckParagraph(ctxt);
				3927	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
				3928	ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
				3929	ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
				3930	/* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
				3931	}
				3932	} else {
				3933	unsigned int c;
				3934	int bits, i = 0;
				3935
				3936	c = ent->value;
				3937	if (c < 0x80)
				3938	{ out[i++]= c; bits= -6; }
				3939	else if (c < 0x800)
				3940	{ out[i++]=((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				3941	else if (c < 0x10000)
				3942	{ out[i++]=((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				3943	else
				3944	{ out[i++]=((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				3945
				3946	for ( ; bits >= 0; bits-= 6) {
				3947	out[i++]= ((c >> bits) & 0x3F) \| 0x80;
				3948	}
				3949	out[i] = 0;
				3950
				3951	docbCheckParagraph(ctxt);
				3952	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
				3953	ctxt->sax->characters(ctxt->userData, out, i);
				3954	}
				3955	} else {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3956	docbCheckParagraph(ctxt);
				3957	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
				3958	ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
				3959	return;
				3960	}
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	3961	if (name != NULL)
				3962	xmlFree(name);
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3963	}
				3964	}
				3965
				3966	/**
				3967	* docbParseContent:
				3968	* @ctxt: an SGML parser context
				3969	* @name: the node name
				3970	*
				3971	* Parse a content: comment, sub-element, reference or text.
				3972	*
				3973	*/
				3974
				3975	static void
				3976	docbParseContent(docbParserCtxtPtr ctxt) {
				3977	xmlChar *currentNode;
				3978	int depth;
				3979
				3980	currentNode = xmlStrdup(ctxt->name);
				3981	depth = ctxt->nameNr;
				3982	while (1) {
				3983	long cons = ctxt->nbChars;
				3984
				3985	GROW;
				3986	/*
				3987	* Our tag or one of it's parent or children is ending.
				3988	*/
				3989	if ((CUR == '<') && (NXT(1) == '/')) {
				3990	docbParseEndTag(ctxt);
				3991	if (currentNode != NULL) xmlFree(currentNode);
				3992	return;
				3993	}
				3994
				3995	/*
				3996	* Has this node been popped out during parsing of
				3997	* the next element
				3998	*/
				3999	if ((!xmlStrEqual(currentNode, ctxt->name)) &&
				4000	(depth >= ctxt->nameNr)) {
				4001	if (currentNode != NULL) xmlFree(currentNode);
				4002	return;
				4003	}
				4004
				4005	/*
				4006	* Sometimes DOCTYPE arrives in the middle of the document
				4007	*/
				4008	if ((CUR == '<') && (NXT(1) == '!') &&
				4009	(UPP(2) == 'D') && (UPP(3) == 'O') &&
				4010	(UPP(4) == 'C') && (UPP(5) == 'T') &&
				4011	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
				4012	(UPP(8) == 'E')) {
				4013	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4014	ctxt->sax->error(ctxt->userData,
				4015	"Misplaced DOCTYPE declaration\n");
				4016	ctxt->wellFormed = 0;
				4017	docbParseDocTypeDecl(ctxt);
				4018	}
				4019
				4020	/*
				4021	* First case : a comment
				4022	*/
				4023	if ((CUR == '<') && (NXT(1) == '!') &&
				4024	(NXT(2) == '-') && (NXT(3) == '-')) {
				4025	docbParseComment(ctxt);
				4026	}
				4027
				4028	/*
				4029	* Second case : a sub-element.
				4030	*/
				4031	else if (CUR == '<') {
				4032	docbParseElement(ctxt);
				4033	}
				4034
				4035	/*
				4036	* Third case : a reference. If if has not been resolved,
				4037	* parsing returns it's Name, create the node
				4038	*/
				4039	else if (CUR == '&') {
				4040	docbParseReference(ctxt);
				4041	}
				4042
				4043	/*
				4044	* Fourth : end of the resource
				4045	*/
				4046	else if (CUR == 0) {
				4047	docbAutoClose(ctxt, NULL);
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	4048	if (ctxt->nameNr == 0)
				4049	break;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4050	}
				4051
				4052	/*
				4053	* Last case, text. Note that References are handled directly.
				4054	*/
				4055	else {
				4056	docbParseCharData(ctxt);
				4057	}
				4058
				4059	if (cons == ctxt->nbChars) {
				4060	if (ctxt->node != NULL) {
				4061	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4062	ctxt->sax->error(ctxt->userData,
				4063	"detected an error in element content\n");
				4064	ctxt->wellFormed = 0;
				4065	}
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	4066	break;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4067	}
				4068
				4069	GROW;
				4070	}
				4071	if (currentNode != NULL) xmlFree(currentNode);
				4072	}
				4073
				4074	/**
				4075	* docbParseElement:
				4076	* @ctxt: an SGML parser context
				4077	*
				4078	* parse an SGML element, this is highly recursive
				4079	*
				4080	* [39] element ::= EmptyElemTag \| STag content ETag
				4081	*
				4082	* [41] Attribute ::= Name Eq AttValue
				4083	*/
				4084
				4085	static void
				4086	docbParseElement(docbParserCtxtPtr ctxt) {
				4087	xmlChar *name;
				4088	xmlChar *currentNode = NULL;
				4089	docbElemDescPtr info;
				4090	docbParserNodeInfo node_info;
				4091	xmlChar *oldname;
				4092	int depth = ctxt->nameNr;
				4093
				4094	/* Capture start position */
				4095	if (ctxt->record_info) {
				4096	node_info.begin_pos = ctxt->input->consumed +
				4097	(CUR_PTR - ctxt->input->base);
				4098	node_info.begin_line = ctxt->input->line;
				4099	}
				4100
				4101	oldname = xmlStrdup(ctxt->name);
				4102	docbParseStartTag(ctxt);
				4103	name = ctxt->name;
				4104	#ifdef DEBUG
				4105	if (oldname == NULL)
				4106	xmlGenericError(xmlGenericErrorContext,
				4107	"Start of element %s\n", name);
				4108	else if (name == NULL)
				4109	xmlGenericError(xmlGenericErrorContext,
				4110	"Start of element failed, was %s\n", oldname);
				4111	else
				4112	xmlGenericError(xmlGenericErrorContext,
				4113	"Start of element %s, was %s\n", name, oldname);
				4114	#endif
				4115	if (((depth == ctxt->nameNr) && (xmlStrEqual(oldname, ctxt->name))) \|\|
				4116	(name == NULL)) {
				4117	if (CUR == '>')
				4118	NEXT;
				4119	if (oldname != NULL)
				4120	xmlFree(oldname);
				4121	return;
				4122	}
				4123	if (oldname != NULL)
				4124	xmlFree(oldname);
				4125
				4126	/*
				4127	* Lookup the info for that element.
				4128	*/
				4129	info = docbTagLookup(name);
				4130	if (info == NULL) {
				4131	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4132	ctxt->sax->error(ctxt->userData, "Tag %s unknown\n",
				4133	name);
				4134	ctxt->wellFormed = 0;
				4135	} else if (info->depr) {
				4136	/***************************
				4137	if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
				4138	ctxt->sax->warning(ctxt->userData, "Tag %s is deprecated\n",
				4139	name);
				4140	***************************/
				4141	}
				4142
				4143	/*
				4144	* Check for an Empty Element labelled the XML/SGML way
				4145	*/
				4146	if ((CUR == '/') && (NXT(1) == '>')) {
				4147	SKIP(2);
				4148	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				4149	ctxt->sax->endElement(ctxt->userData, name);
				4150	oldname = docbnamePop(ctxt);
				4151	#ifdef DEBUG
				4152	xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n", oldname);
				4153	#endif
				4154	if (oldname != NULL)
				4155	xmlFree(oldname);
				4156	return;
				4157	}
				4158
				4159	if (CUR == '>') {
				4160	NEXT;
				4161	} else {
				4162	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4163	ctxt->sax->error(ctxt->userData,
				4164	"Couldn't find end of Start Tag %s\n",
				4165	name);
				4166	ctxt->wellFormed = 0;
				4167
				4168	/*
				4169	* end of parsing of this node.
				4170	*/
				4171	if (xmlStrEqual(name, ctxt->name)) {
				4172	nodePop(ctxt);
				4173	oldname = docbnamePop(ctxt);
				4174	#ifdef DEBUG
				4175	xmlGenericError(xmlGenericErrorContext,"End of start tag problem: popping out %s\n", oldname);
				4176	#endif
				4177	if (oldname != NULL)
				4178	xmlFree(oldname);
				4179	}
				4180
				4181	/*
				4182	* Capture end position and add node
				4183	*/
				4184	if ( currentNode != NULL && ctxt->record_info ) {
				4185	node_info.end_pos = ctxt->input->consumed +
				4186	(CUR_PTR - ctxt->input->base);
				4187	node_info.end_line = ctxt->input->line;
				4188	node_info.node = ctxt->node;
				4189	xmlParserAddNodeInfo(ctxt, &node_info);
				4190	}
				4191	return;
				4192	}
				4193
				4194	/*
				4195	* Check for an Empty Element from DTD definition
				4196	*/
				4197	if ((info != NULL) && (info->empty)) {
				4198	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				4199	ctxt->sax->endElement(ctxt->userData, name);
				4200	oldname = docbnamePop(ctxt);
				4201	#ifdef DEBUG
				4202	xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname);
				4203	#endif
				4204	if (oldname != NULL)
				4205	xmlFree(oldname);
				4206	return;
				4207	}
				4208
				4209	/*
				4210	* Parse the content of the element:
				4211	*/
				4212	currentNode = xmlStrdup(ctxt->name);
				4213	depth = ctxt->nameNr;
				4214	while (IS_CHAR(CUR)) {
				4215	docbParseContent(ctxt);
				4216	if (ctxt->nameNr < depth) break;
				4217	}
				4218
				4219	if (!IS_CHAR(CUR)) {
				4220	/************
				4221	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4222	ctxt->sax->error(ctxt->userData,
				4223	"Premature end of data in tag %s\n", currentNode);
				4224	ctxt->wellFormed = 0;
				4225	*************/
				4226
				4227	/*
				4228	* end of parsing of this node.
				4229	*/
				4230	nodePop(ctxt);
				4231	oldname = docbnamePop(ctxt);
				4232	#ifdef DEBUG
				4233	xmlGenericError(xmlGenericErrorContext,"Premature end of tag %s : popping out %s\n", name, oldname);
				4234	#endif
				4235	if (oldname != NULL)
				4236	xmlFree(oldname);
				4237	if (currentNode != NULL)
				4238	xmlFree(currentNode);
				4239	return;
				4240	}
				4241
				4242	/*
				4243	* Capture end position and add node
				4244	*/
				4245	if ( currentNode != NULL && ctxt->record_info ) {
				4246	node_info.end_pos = ctxt->input->consumed +
				4247	(CUR_PTR - ctxt->input->base);
				4248	node_info.end_line = ctxt->input->line;
				4249	node_info.node = ctxt->node;
				4250	xmlParserAddNodeInfo(ctxt, &node_info);
				4251	}
				4252	if (currentNode != NULL)
				4253	xmlFree(currentNode);
				4254	}
				4255
				4256	/**
				4257	* docbParseEntityDecl:
				4258	* @ctxt: an SGML parser context
				4259	*
				4260	* parse <!ENTITY declarations
				4261	*
				4262	*/
				4263
				4264	static void
				4265	docbParseEntityDecl(xmlParserCtxtPtr ctxt) {
				4266	xmlChar *name = NULL;
				4267	xmlChar *value = NULL;
				4268	xmlChar URI = NULL, literal = NULL;
				4269	xmlChar *ndata = NULL;
				4270	int isParameter = 0;
				4271	xmlChar *orig = NULL;
				4272
				4273	GROW;
				4274	if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	4275	(UPP(2) == 'E') && (UPP(3) == 'N') &&
				4276	(UPP(4) == 'T') && (UPP(5) == 'I') &&
				4277	(UPP(6) == 'T') && (UPP(7) == 'Y')) {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4278	xmlParserInputPtr input = ctxt->input;
				4279	ctxt->instate = XML_PARSER_ENTITY_DECL;
				4280	SHRINK;
				4281	SKIP(8);
				4282	if (!IS_BLANK(CUR)) {
				4283	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4284	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4285	ctxt->sax->error(ctxt->userData,
				4286	"Space required after '<!ENTITY'\n");
				4287	ctxt->wellFormed = 0;
				4288	ctxt->disableSAX = 1;
				4289	}
				4290	SKIP_BLANKS;
				4291
				4292	if (RAW == '%') {
				4293	NEXT;
				4294	if (!IS_BLANK(CUR)) {
				4295	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4296	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4297	ctxt->sax->error(ctxt->userData,
				4298	"Space required after '%'\n");
				4299	ctxt->wellFormed = 0;
				4300	ctxt->disableSAX = 1;
				4301	}
				4302	SKIP_BLANKS;
				4303	isParameter = 1;
				4304	}
				4305
				4306	name = xmlParseName(ctxt);
				4307	if (name == NULL) {
				4308	ctxt->errNo = XML_ERR_NAME_REQUIRED;
				4309	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4310	ctxt->sax->error(ctxt->userData, "sgmlarseEntityDecl: no name\n");
				4311	ctxt->wellFormed = 0;
				4312	ctxt->disableSAX = 1;
				4313	return;
				4314	}
				4315	if (!IS_BLANK(CUR)) {
				4316	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4317	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4318	ctxt->sax->error(ctxt->userData,
				4319	"Space required after the entity name\n");
				4320	ctxt->wellFormed = 0;
				4321	ctxt->disableSAX = 1;
				4322	}
				4323	SKIP_BLANKS;
				4324
				4325	/*
				4326	* handle the various case of definitions...
				4327	*/
				4328	if (isParameter) {
				4329	if ((RAW == '"') \|\| (RAW == '\'')) {
				4330	value = xmlParseEntityValue(ctxt, &orig);
				4331	if (value) {
				4332	if ((ctxt->sax != NULL) &&
				4333	(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
				4334	ctxt->sax->entityDecl(ctxt->userData, name,
				4335	XML_INTERNAL_PARAMETER_ENTITY,
				4336	NULL, NULL, value);
				4337	}
				4338	} else {
				4339	URI = xmlParseExternalID(ctxt, &literal, 1);
				4340	if ((URI == NULL) && (literal == NULL)) {
				4341	ctxt->errNo = XML_ERR_VALUE_REQUIRED;
				4342	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4343	ctxt->sax->error(ctxt->userData,
				4344	"Entity value required\n");
				4345	ctxt->wellFormed = 0;
				4346	ctxt->disableSAX = 1;
				4347	}
				4348	if (URI) {
				4349	xmlURIPtr uri;
				4350
				4351	uri = xmlParseURI((const char *) URI);
				4352	if (uri == NULL) {
				4353	ctxt->errNo = XML_ERR_INVALID_URI;
				4354	if ((ctxt->sax != NULL) &&
				4355	(!ctxt->disableSAX) &&
				4356	(ctxt->sax->error != NULL))
				4357	ctxt->sax->error(ctxt->userData,
				4358	"Invalid URI: %s\n", URI);
				4359	ctxt->wellFormed = 0;
				4360	} else {
				4361	if (uri->fragment != NULL) {
				4362	ctxt->errNo = XML_ERR_URI_FRAGMENT;
				4363	if ((ctxt->sax != NULL) &&
				4364	(!ctxt->disableSAX) &&
				4365	(ctxt->sax->error != NULL))
				4366	ctxt->sax->error(ctxt->userData,
				4367	"Fragment not allowed: %s\n", URI);
				4368	ctxt->wellFormed = 0;
				4369	} else {
				4370	if ((ctxt->sax != NULL) &&
				4371	(!ctxt->disableSAX) &&
				4372	(ctxt->sax->entityDecl != NULL))
				4373	ctxt->sax->entityDecl(ctxt->userData, name,
				4374	XML_EXTERNAL_PARAMETER_ENTITY,
				4375	literal, URI, NULL);
				4376	}
				4377	xmlFreeURI(uri);
				4378	}
				4379	}
				4380	}
				4381	} else {
				4382	if ((RAW == '"') \|\| (RAW == '\'')) {
				4383	value = xmlParseEntityValue(ctxt, &orig);
				4384	if ((ctxt->sax != NULL) &&
				4385	(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
				4386	ctxt->sax->entityDecl(ctxt->userData, name,
				4387	XML_INTERNAL_GENERAL_ENTITY,
				4388	NULL, NULL, value);
				4389	} else {
				4390	URI = xmlParseExternalID(ctxt, &literal, 1);
				4391	if ((URI == NULL) && (literal == NULL)) {
				4392	ctxt->errNo = XML_ERR_VALUE_REQUIRED;
				4393	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4394	ctxt->sax->error(ctxt->userData,
				4395	"Entity value required\n");
				4396	ctxt->wellFormed = 0;
				4397	ctxt->disableSAX = 1;
				4398	}
				4399	if (URI) {
				4400	xmlURIPtr uri;
				4401
				4402	uri = xmlParseURI((const char *)URI);
				4403	if (uri == NULL) {
				4404	ctxt->errNo = XML_ERR_INVALID_URI;
				4405	if ((ctxt->sax != NULL) &&
				4406	(!ctxt->disableSAX) &&
				4407	(ctxt->sax->error != NULL))
				4408	ctxt->sax->error(ctxt->userData,
				4409	"Invalid URI: %s\n", URI);
				4410	ctxt->wellFormed = 0;
				4411	} else {
				4412	if (uri->fragment != NULL) {
				4413	ctxt->errNo = XML_ERR_URI_FRAGMENT;
				4414	if ((ctxt->sax != NULL) &&
				4415	(!ctxt->disableSAX) &&
				4416	(ctxt->sax->error != NULL))
				4417	ctxt->sax->error(ctxt->userData,
				4418	"Fragment not allowed: %s\n", URI);
				4419	ctxt->wellFormed = 0;
				4420	}
				4421	xmlFreeURI(uri);
				4422	}
				4423	}
				4424	if ((RAW != '>') && (!IS_BLANK(CUR))) {
				4425	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4426	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4427	ctxt->sax->error(ctxt->userData,
				4428	"Space required before content model\n");
				4429	ctxt->wellFormed = 0;
				4430	ctxt->disableSAX = 1;
				4431	}
				4432	SKIP_BLANKS;
				4433
				4434	/*
				4435	* SGML specific: here we can get the content model
				4436	*/
				4437	if (RAW != '>') {
				4438	xmlChar *contmod;
				4439
				4440	contmod = xmlParseName(ctxt);
				4441
				4442	if (contmod == NULL) {
				4443	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4444	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4445	ctxt->sax->error(ctxt->userData,
				4446	"Could not parse entity content model\n");
				4447	ctxt->wellFormed = 0;
				4448	ctxt->disableSAX = 1;
				4449	} else {
				4450	if (xmlStrEqual(contmod, BAD_CAST"NDATA")) {
				4451	if (!IS_BLANK(CUR)) {
				4452	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4453	if ((ctxt->sax != NULL) &&
				4454	(ctxt->sax->error != NULL))
				4455	ctxt->sax->error(ctxt->userData,
				4456	"Space required after 'NDATA'\n");
				4457	ctxt->wellFormed = 0;
				4458	ctxt->disableSAX = 1;
				4459	}
				4460	SKIP_BLANKS;
				4461	ndata = xmlParseName(ctxt);
				4462	if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
				4463	(ctxt->sax->unparsedEntityDecl != NULL)) {
				4464	ctxt->sax->unparsedEntityDecl(ctxt->userData,
				4465	name, literal, URI, ndata);
				4466	}
				4467	} else if (xmlStrEqual(contmod, BAD_CAST"SUBDOC")) {
				4468	if ((ctxt->sax != NULL) &&
				4469	(ctxt->sax->warning != NULL))
				4470	ctxt->sax->warning(ctxt->userData,
				4471	"SUBDOC entities are not supported\n");
				4472	SKIP_BLANKS;
				4473	ndata = xmlParseName(ctxt);
				4474	if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
				4475	(ctxt->sax->unparsedEntityDecl != NULL)) {
				4476	ctxt->sax->unparsedEntityDecl(ctxt->userData,
				4477	name, literal, URI, ndata);
				4478	}
				4479	} else if (xmlStrEqual(contmod, BAD_CAST"CDATA")) {
				4480	if ((ctxt->sax != NULL) &&
				4481	(ctxt->sax->warning != NULL))
				4482	ctxt->sax->warning(ctxt->userData,
				4483	"CDATA entities are not supported\n");
				4484	SKIP_BLANKS;
				4485	ndata = xmlParseName(ctxt);
				4486	if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
				4487	(ctxt->sax->unparsedEntityDecl != NULL)) {
				4488	ctxt->sax->unparsedEntityDecl(ctxt->userData,
				4489	name, literal, URI, ndata);
				4490	}
				4491	}
				4492	xmlFree(contmod);
				4493	}
				4494	} else {
				4495	if ((ctxt->sax != NULL) &&
				4496	(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
				4497	ctxt->sax->entityDecl(ctxt->userData, name,
				4498	XML_EXTERNAL_GENERAL_PARSED_ENTITY,
				4499	literal, URI, NULL);
				4500	}
				4501	}
				4502	}
				4503	SKIP_BLANKS;
				4504	if (RAW != '>') {
				4505	ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
				4506	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4507	ctxt->sax->error(ctxt->userData,
				4508	"docbParseEntityDecl: entity %s not terminated\n", name);
				4509	ctxt->wellFormed = 0;
				4510	ctxt->disableSAX = 1;
				4511	} else {
				4512	if (input != ctxt->input) {
				4513	ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
				4514	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4515	ctxt->sax->error(ctxt->userData,
				4516	"Entity declaration doesn't start and stop in the same entity\n");
				4517	ctxt->wellFormed = 0;
				4518	ctxt->disableSAX = 1;
				4519	}
				4520	NEXT;
				4521	}
				4522	if (orig != NULL) {
				4523	/*
				4524	* Ugly mechanism to save the raw entity value.
				4525	*/
				4526	xmlEntityPtr cur = NULL;
				4527
				4528	if (isParameter) {
				4529	if ((ctxt->sax != NULL) &&
				4530	(ctxt->sax->getParameterEntity != NULL))
				4531	cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
				4532	} else {
				4533	if ((ctxt->sax != NULL) &&
				4534	(ctxt->sax->getEntity != NULL))
				4535	cur = ctxt->sax->getEntity(ctxt->userData, name);
				4536	}
				4537	if (cur != NULL) {
				4538	if (cur->orig != NULL)
				4539	xmlFree(orig);
				4540	else
				4541	cur->orig = orig;
				4542	} else
				4543	xmlFree(orig);
				4544	}
				4545	if (name != NULL) xmlFree(name);
				4546	if (value != NULL) xmlFree(value);
				4547	if (URI != NULL) xmlFree(URI);
				4548	if (literal != NULL) xmlFree(literal);
				4549	if (ndata != NULL) xmlFree(ndata);
				4550	}
				4551	}
				4552
				4553	/**
				4554	* docbParseMarkupDecl:
				4555	* @ctxt: an SGML parser context
				4556	*
				4557	* parse Markup declarations
				4558	*
				4559	* [29] markupdecl ::= elementdecl \| AttlistDecl \| EntityDecl \|
				4560	* NotationDecl \| PI \| Comment
				4561	*/
				4562	static void
				4563	docbParseMarkupDecl(xmlParserCtxtPtr ctxt) {
				4564	GROW;
				4565	xmlParseElementDecl(ctxt);
				4566	xmlParseAttributeListDecl(ctxt);
				4567	docbParseEntityDecl(ctxt);
				4568	xmlParseNotationDecl(ctxt);
				4569	xmlParsePI(ctxt);
				4570	xmlParseComment(ctxt);
				4571	/*
				4572	* This is only for internal subset. On external entities,
				4573	* the replacement is done before parsing stage
				4574	*/
				4575	if ((ctxt->external == 0) && (ctxt->inputNr == 1))
				4576	xmlParsePEReference(ctxt);
				4577	ctxt->instate = XML_PARSER_DTD;
				4578	}
				4579
				4580	/**
				4581	* docbParseInternalsubset:
				4582	* @ctxt: an SGML parser context
				4583	*
				4584	* parse the internal subset declaration
				4585	*
				4586	* [28 end] ('[' (markupdecl \| PEReference \| S)* ']' S?)? '>'
				4587	*/
				4588
				4589	static void
				4590	docbParseInternalSubset(xmlParserCtxtPtr ctxt) {
				4591	/*
				4592	* Is there any DTD definition ?
				4593	*/
				4594	if (RAW == '[') {
				4595	ctxt->instate = XML_PARSER_DTD;
				4596	NEXT;
				4597	/*
				4598	* Parse the succession of Markup declarations and
				4599	* PEReferences.
				4600	* Subsequence (markupdecl \| PEReference \| S)*
				4601	*/
				4602	while (RAW != ']') {
				4603	const xmlChar *check = CUR_PTR;
				4604	int cons = ctxt->input->consumed;
				4605
				4606	SKIP_BLANKS;
				4607	docbParseMarkupDecl(ctxt);
				4608	xmlParsePEReference(ctxt);
				4609
				4610	/*
				4611	* Pop-up of finished entities.
				4612	*/
				4613	while ((RAW == 0) && (ctxt->inputNr > 1))
				4614	xmlPopInput(ctxt);
				4615
				4616	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
				4617	ctxt->errNo = XML_ERR_INTERNAL_ERROR;
				4618	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4619	ctxt->sax->error(ctxt->userData,
				4620	"docbParseInternalSubset: error detected in Markup declaration\n");
				4621	ctxt->wellFormed = 0;
				4622	ctxt->disableSAX = 1;
				4623	break;
				4624	}
				4625	}
				4626	if (RAW == ']') {
				4627	NEXT;
				4628	SKIP_BLANKS;
				4629	}
				4630	}
				4631
				4632	/*
				4633	* We should be at the end of the DOCTYPE declaration.
				4634	*/
				4635	if (RAW != '>') {
				4636	ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
				4637	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4638	ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
				4639	ctxt->wellFormed = 0;
				4640	ctxt->disableSAX = 1;
				4641	}
				4642	NEXT;
				4643	}
				4644
				4645	/**
				4646	* docbParseMisc:
				4647	* @ctxt: an XML parser context
				4648	*
				4649	* parse an XML Misc* optionnal field.
				4650	*
				4651	* [27] Misc ::= Comment \| PI \| S
				4652	*/
				4653
				4654	static void
				4655	docbParseMisc(xmlParserCtxtPtr ctxt) {
				4656	while (((RAW == '<') && (NXT(1) == '?')) \|\|
				4657	((RAW == '<') && (NXT(1) == '!') &&
				4658	(NXT(2) == '-') && (NXT(3) == '-')) \|\|
				4659	IS_BLANK(CUR)) {
				4660	if ((RAW == '<') && (NXT(1) == '?')) {
				4661	xmlParsePI(ctxt); /* TODO: SGML PIs differs */
				4662	} else if (IS_BLANK(CUR)) {
				4663	NEXT;
				4664	} else
				4665	xmlParseComment(ctxt);
				4666	}
				4667	}
				4668
				4669	/**
				4670	* docbParseDocument :
				4671	* @ctxt: an SGML parser context
				4672	*
				4673	* parse an SGML document (and build a tree if using the standard SAX
				4674	* interface).
				4675	*
				4676	* Returns 0, -1 in case of error. the parser context is augmented
				4677	* as a result of the parsing.
				4678	*/
				4679
				4680	int
				4681	docbParseDocument(docbParserCtxtPtr ctxt) {
				4682	xmlChar start[4];
				4683	xmlCharEncoding enc;
				4684	xmlDtdPtr dtd;
				4685
				4686	docbDefaultSAXHandlerInit();
				4687	ctxt->html = 2;
				4688
				4689	GROW;
				4690	/*
				4691	* SAX: beginning of the document processing.
				4692	*/
				4693	if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
				4694	ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
				4695
				4696	/*
				4697	* Get the 4 first bytes and decode the charset
				4698	* if enc != XML_CHAR_ENCODING_NONE
				4699	* plug some encoding conversion routines.
				4700	*/
				4701	start[0] = RAW;
				4702	start[1] = NXT(1);
				4703	start[2] = NXT(2);
				4704	start[3] = NXT(3);
				4705	enc = xmlDetectCharEncoding(start, 4);
				4706	if (enc != XML_CHAR_ENCODING_NONE) {
				4707	xmlSwitchEncoding(ctxt, enc);
				4708	}
				4709
				4710	/*
				4711	* Wipe out everything which is before the first '<'
				4712	*/
				4713	SKIP_BLANKS;
				4714	if (CUR == 0) {
				4715	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4716	ctxt->sax->error(ctxt->userData, "Document is empty\n");
				4717	ctxt->wellFormed = 0;
				4718	}
				4719
				4720	if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
				4721	ctxt->sax->startDocument(ctxt->userData);
				4722
				4723
				4724	/*
				4725	* The Misc part of the Prolog
				4726	*/
				4727	GROW;
				4728	docbParseMisc(ctxt);
				4729
				4730	/*
				4731	* Then possibly doc type declaration(s) and more Misc
				4732	* (doctypedecl Misc*)?
				4733	*/
				4734	GROW;
				4735	if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	4736	(UPP(2) == 'D') && (UPP(3) == 'O') &&
				4737	(UPP(4) == 'C') && (UPP(5) == 'T') &&
				4738	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
				4739	(UPP(8) == 'E')) {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4740
				4741	ctxt->inSubset = 1;
				4742	docbParseDocTypeDecl(ctxt);
				4743	if (RAW == '[') {
				4744	ctxt->instate = XML_PARSER_DTD;
				4745	docbParseInternalSubset(ctxt);
				4746	}
				4747
				4748	/*
				4749	* Create and update the external subset.
				4750	*/
				4751	ctxt->inSubset = 2;
				4752	if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
				4753	(!ctxt->disableSAX))
				4754	ctxt->sax->internalSubset(ctxt->userData, ctxt->intSubName,
				4755	ctxt->extSubSystem, ctxt->extSubURI);
				4756	ctxt->inSubset = 0;
				4757
				4758
				4759	ctxt->instate = XML_PARSER_PROLOG;
				4760	docbParseMisc(ctxt);
				4761	}
				4762
				4763	/*
				4764	* Time to start parsing the tree itself
				4765	*/
				4766	docbParseContent(ctxt);
				4767
				4768	/*
				4769	* autoclose
				4770	*/
				4771	if (CUR == 0)
				4772	docbAutoClose(ctxt, NULL);
				4773
				4774
				4775	/*
				4776	* SAX: end of the document processing.
				4777	*/
				4778	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
				4779	ctxt->sax->endDocument(ctxt->userData);
				4780
				4781	if (ctxt->myDoc != NULL) {
				4782	dtd = ctxt->myDoc->intSubset;
				4783	if (dtd == NULL)
				4784	ctxt->myDoc->intSubset =
				4785	xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML",
				4786	BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
				4787	BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
				4788	}
				4789	if (! ctxt->wellFormed) return(-1);
				4790	return(0);
				4791	}
				4792
				4793
				4794	/************************************************************************
				4795	* *
				4796	* Parser contexts handling *
				4797	* *
				4798	************************************************************************/
				4799
				4800	/**
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame^]	4801	* docbInitParserCtxt:
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4802	* @ctxt: an SGML parser context
				4803	*
				4804	* Initialize a parser context
				4805	*/
				4806
				4807	static void
				4808	docbInitParserCtxt(docbParserCtxtPtr ctxt)
				4809	{
				4810	docbSAXHandler *sax;
				4811
				4812	if (ctxt == NULL) return;
				4813	memset(ctxt, 0, sizeof(docbParserCtxt));
				4814
				4815	sax = (docbSAXHandler *) xmlMalloc(sizeof(docbSAXHandler));
				4816	if (sax == NULL) {
				4817	xmlGenericError(xmlGenericErrorContext,
				4818	"docbInitParserCtxt: out of memory\n");
				4819	}
				4820	memset(sax, 0, sizeof(docbSAXHandler));
				4821
				4822	/* Allocate the Input stack */
				4823	ctxt->inputTab = (docbParserInputPtr *)
				4824	xmlMalloc(5 * sizeof(docbParserInputPtr));
				4825	if (ctxt->inputTab == NULL) {
				4826	xmlGenericError(xmlGenericErrorContext,
				4827	"docbInitParserCtxt: out of memory\n");
				4828	}
				4829	ctxt->inputNr = 0;
				4830	ctxt->inputMax = 5;
				4831	ctxt->input = NULL;
				4832	ctxt->version = NULL;
				4833	ctxt->encoding = NULL;
				4834	ctxt->standalone = -1;
				4835	ctxt->instate = XML_PARSER_START;
				4836
				4837	/* Allocate the Node stack */
				4838	ctxt->nodeTab = (docbNodePtr ) xmlMalloc(10 sizeof(docbNodePtr));
				4839	ctxt->nodeNr = 0;
				4840	ctxt->nodeMax = 10;
				4841	ctxt->node = NULL;
				4842
				4843	/* Allocate the Name stack */
				4844	ctxt->nameTab = (xmlChar *) xmlMalloc(10 sizeof(xmlChar *));
				4845	ctxt->nameNr = 0;
				4846	ctxt->nameMax = 10;
				4847	ctxt->name = NULL;
				4848
				4849	if (sax == NULL) ctxt->sax = &docbDefaultSAXHandler;
				4850	else {
				4851	ctxt->sax = sax;
				4852	memcpy(sax, &docbDefaultSAXHandler, sizeof(docbSAXHandler));
				4853	}
				4854	ctxt->userData = ctxt;
				4855	ctxt->myDoc = NULL;
				4856	ctxt->wellFormed = 1;
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	4857	ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4858	ctxt->html = 2;
				4859	ctxt->record_info = 0;
				4860	ctxt->validate = 0;
				4861	ctxt->nbChars = 0;
				4862	ctxt->checkIndex = 0;
				4863	xmlInitNodeInfoSeq(&ctxt->node_seq);
				4864	}
				4865
				4866	/**
				4867	* docbFreeParserCtxt:
				4868	* @ctxt: an SGML parser context
				4869	*
				4870	* Free all the memory used by a parser context. However the parsed
				4871	* document in ctxt->myDoc is not freed.
				4872	*/
				4873
				4874	void
				4875	docbFreeParserCtxt(docbParserCtxtPtr ctxt)
				4876	{
				4877	xmlFreeParserCtxt(ctxt);
				4878	}
				4879
				4880	/**
				4881	* docbCreateDocParserCtxt :
				4882	* @cur: a pointer to an array of xmlChar
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame^]	4883	* @encoding: the SGML document encoding, or NULL
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4884	*
				4885	* Create a parser context for an SGML document.
				4886	*
				4887	* Returns the new parser context or NULL
				4888	*/
				4889	static docbParserCtxtPtr
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame^]	4890	docbCreateDocParserCtxt(xmlChar cur, const char encoding ATTRIBUTE_UNUSED) {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4891	docbParserCtxtPtr ctxt;
				4892	docbParserInputPtr input;
				4893	/* sgmlCharEncoding enc; */
				4894
				4895	ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt));
				4896	if (ctxt == NULL) {
				4897	perror("malloc");
				4898	return(NULL);
				4899	}
				4900	docbInitParserCtxt(ctxt);
				4901	input = (docbParserInputPtr) xmlMalloc(sizeof(docbParserInput));
				4902	if (input == NULL) {
				4903	perror("malloc");
				4904	xmlFree(ctxt);
				4905	return(NULL);
				4906	}
				4907	memset(input, 0, sizeof(docbParserInput));
				4908
				4909	input->line = 1;
				4910	input->col = 1;
				4911	input->base = cur;
				4912	input->cur = cur;
				4913
				4914	inputPush(ctxt, input);
				4915	return(ctxt);
				4916	}
				4917
				4918	/************************************************************************
				4919	* *
				4920	* Progressive parsing interfaces *
				4921	* *
				4922	************************************************************************/
				4923
				4924	/**
				4925	* docbParseLookupSequence:
				4926	* @ctxt: an SGML parser context
				4927	* @first: the first char to lookup
				4928	* @next: the next char to lookup or zero
				4929	* @third: the next char to lookup or zero
				4930	*
				4931	* Try to find if a sequence (first, next, third) or just (first next) or
				4932	* (first) is available in the input stream.
				4933	* This function has a side effect of (possibly) incrementing ctxt->checkIndex
				4934	* to avoid rescanning sequences of bytes, it DOES change the state of the
				4935	* parser, do not use liberally.
				4936	* This is basically similar to xmlParseLookupSequence()
				4937	*
				4938	* Returns the index to the current parsing point if the full sequence
				4939	* is available, -1 otherwise.
				4940	*/
				4941	static int
				4942	docbParseLookupSequence(docbParserCtxtPtr ctxt, xmlChar first,
				4943	xmlChar next, xmlChar third) {
				4944	int base, len;
				4945	docbParserInputPtr in;
				4946	const xmlChar *buf;
				4947
				4948	in = ctxt->input;
				4949	if (in == NULL) return(-1);
				4950	base = in->cur - in->base;
				4951	if (base < 0) return(-1);
				4952	if (ctxt->checkIndex > base)
				4953	base = ctxt->checkIndex;
				4954	if (in->buf == NULL) {
				4955	buf = in->base;
				4956	len = in->length;
				4957	} else {
				4958	buf = in->buf->buffer->content;
				4959	len = in->buf->buffer->use;
				4960	}
				4961	/* take into account the sequence length */
				4962	if (third) len -= 2;
				4963	else if (next) len --;
				4964	for (;base < len;base++) {
				4965	if (buf[base] == first) {
				4966	if (third != 0) {
				4967	if ((buf[base + 1] != next) \|\|
				4968	(buf[base + 2] != third)) continue;
				4969	} else if (next != 0) {
				4970	if (buf[base + 1] != next) continue;
				4971	}
				4972	ctxt->checkIndex = 0;
				4973	#ifdef DEBUG_PUSH
				4974	if (next == 0)
				4975	xmlGenericError(xmlGenericErrorContext,
				4976	"HPP: lookup '%c' found at %d\n",
				4977	first, base);
				4978	else if (third == 0)
				4979	xmlGenericError(xmlGenericErrorContext,
				4980	"HPP: lookup '%c%c' found at %d\n",
				4981	first, next, base);
				4982	else
				4983	xmlGenericError(xmlGenericErrorContext,
				4984	"HPP: lookup '%c%c%c' found at %d\n",
				4985	first, next, third, base);
				4986	#endif
				4987	return(base - (in->cur - in->base));
				4988	}
				4989	}
				4990	ctxt->checkIndex = base;
				4991	#ifdef DEBUG_PUSH
				4992	if (next == 0)
				4993	xmlGenericError(xmlGenericErrorContext,
				4994	"HPP: lookup '%c' failed\n", first);
				4995	else if (third == 0)
				4996	xmlGenericError(xmlGenericErrorContext,
				4997	"HPP: lookup '%c%c' failed\n", first, next);
				4998	else
				4999	xmlGenericError(xmlGenericErrorContext,
				5000	"HPP: lookup '%c%c%c' failed\n", first, next, third);
				5001	#endif
				5002	return(-1);
				5003	}
				5004
				5005	/**
				5006	* docbParseTryOrFinish:
				5007	* @ctxt: an SGML parser context
				5008	* @terminate: last chunk indicator
				5009	*
				5010	* Try to progress on parsing
				5011	*
				5012	* Returns zero if no parsing was possible
				5013	*/
				5014	static int
				5015	docbParseTryOrFinish(docbParserCtxtPtr ctxt, int terminate) {
				5016	int ret = 0;
				5017	docbParserInputPtr in;
				5018	int avail = 0;
				5019	xmlChar cur, next;
				5020
				5021	#ifdef DEBUG_PUSH
				5022	switch (ctxt->instate) {
				5023	case XML_PARSER_EOF:
				5024	xmlGenericError(xmlGenericErrorContext,
				5025	"HPP: try EOF\n"); break;
				5026	case XML_PARSER_START:
				5027	xmlGenericError(xmlGenericErrorContext,
				5028	"HPP: try START\n"); break;
				5029	case XML_PARSER_MISC:
				5030	xmlGenericError(xmlGenericErrorContext,
				5031	"HPP: try MISC\n");break;
				5032	case XML_PARSER_COMMENT:
				5033	xmlGenericError(xmlGenericErrorContext,
				5034	"HPP: try COMMENT\n");break;
				5035	case XML_PARSER_PROLOG:
				5036	xmlGenericError(xmlGenericErrorContext,
				5037	"HPP: try PROLOG\n");break;
				5038	case XML_PARSER_START_TAG:
				5039	xmlGenericError(xmlGenericErrorContext,
				5040	"HPP: try START_TAG\n");break;
				5041	case XML_PARSER_CONTENT:
				5042	xmlGenericError(xmlGenericErrorContext,
				5043	"HPP: try CONTENT\n");break;
				5044	case XML_PARSER_CDATA_SECTION:
				5045	xmlGenericError(xmlGenericErrorContext,
				5046	"HPP: try CDATA_SECTION\n");break;
				5047	case XML_PARSER_END_TAG:
				5048	xmlGenericError(xmlGenericErrorContext,
				5049	"HPP: try END_TAG\n");break;
				5050	case XML_PARSER_ENTITY_DECL:
				5051	xmlGenericError(xmlGenericErrorContext,
				5052	"HPP: try ENTITY_DECL\n");break;
				5053	case XML_PARSER_ENTITY_VALUE:
				5054	xmlGenericError(xmlGenericErrorContext,
				5055	"HPP: try ENTITY_VALUE\n");break;
				5056	case XML_PARSER_ATTRIBUTE_VALUE:
				5057	xmlGenericError(xmlGenericErrorContext,
				5058	"HPP: try ATTRIBUTE_VALUE\n");break;
				5059	case XML_PARSER_DTD:
				5060	xmlGenericError(xmlGenericErrorContext,
				5061	"HPP: try DTD\n");break;
				5062	case XML_PARSER_EPILOG:
				5063	xmlGenericError(xmlGenericErrorContext,
				5064	"HPP: try EPILOG\n");break;
				5065	case XML_PARSER_PI:
				5066	xmlGenericError(xmlGenericErrorContext,
				5067	"HPP: try PI\n");break;
				5068	}
				5069	#endif
				5070
				5071	while (1) {
				5072
				5073	in = ctxt->input;
				5074	if (in == NULL) break;
				5075	if (in->buf == NULL)
				5076	avail = in->length - (in->cur - in->base);
				5077	else
				5078	avail = in->buf->buffer->use - (in->cur - in->base);
				5079	if ((avail == 0) && (terminate)) {
				5080	docbAutoClose(ctxt, NULL);
				5081	if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
				5082	/*
				5083	* SAX: end of the document processing.
				5084	*/
				5085	ctxt->instate = XML_PARSER_EOF;
				5086	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
				5087	ctxt->sax->endDocument(ctxt->userData);
				5088	}
				5089	}
				5090	if (avail < 1)
				5091	goto done;
				5092	switch (ctxt->instate) {
				5093	case XML_PARSER_EOF:
				5094	/*
				5095	* Document parsing is done !
				5096	*/
				5097	goto done;
				5098	case XML_PARSER_START:
				5099	/*
				5100	* Very first chars read from the document flow.
				5101	*/
				5102	cur = in->cur[0];
				5103	if (IS_BLANK(cur)) {
				5104	SKIP_BLANKS;
				5105	if (in->buf == NULL)
				5106	avail = in->length - (in->cur - in->base);
				5107	else
				5108	avail = in->buf->buffer->use - (in->cur - in->base);
				5109	}
				5110	if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
				5111	ctxt->sax->setDocumentLocator(ctxt->userData,
				5112	&xmlDefaultSAXLocator);
				5113	if ((ctxt->sax) && (ctxt->sax->startDocument) &&
				5114	(!ctxt->disableSAX))
				5115	ctxt->sax->startDocument(ctxt->userData);
				5116
				5117	cur = in->cur[0];
				5118	next = in->cur[1];
				5119	if ((cur == '<') && (next == '!') &&
				5120	(UPP(2) == 'D') && (UPP(3) == 'O') &&
				5121	(UPP(4) == 'C') && (UPP(5) == 'T') &&
				5122	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
				5123	(UPP(8) == 'E')) {
				5124	if ((!terminate) &&
				5125	(docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
				5126	goto done;
				5127	#ifdef DEBUG_PUSH
				5128	xmlGenericError(xmlGenericErrorContext,
				5129	"HPP: Parsing internal subset\n");
				5130	#endif
				5131	docbParseDocTypeDecl(ctxt);
				5132	ctxt->instate = XML_PARSER_PROLOG;
				5133	#ifdef DEBUG_PUSH
				5134	xmlGenericError(xmlGenericErrorContext,
				5135	"HPP: entering PROLOG\n");
				5136	#endif
				5137	} else {
				5138	ctxt->instate = XML_PARSER_MISC;
				5139	}
				5140	#ifdef DEBUG_PUSH
				5141	xmlGenericError(xmlGenericErrorContext,
				5142	"HPP: entering MISC\n");
				5143	#endif
				5144	break;
				5145	case XML_PARSER_MISC:
				5146	SKIP_BLANKS;
				5147	if (in->buf == NULL)
				5148	avail = in->length - (in->cur - in->base);
				5149	else
				5150	avail = in->buf->buffer->use - (in->cur - in->base);
				5151	if (avail < 2)
				5152	goto done;
				5153	cur = in->cur[0];
				5154	next = in->cur[1];
				5155	if ((cur == '<') && (next == '!') &&
				5156	(in->cur[2] == '-') && (in->cur[3] == '-')) {
				5157	if ((!terminate) &&
				5158	(docbParseLookupSequence(ctxt, '-', '-', '>') < 0))
				5159	goto done;
				5160	#ifdef DEBUG_PUSH
				5161	xmlGenericError(xmlGenericErrorContext,
				5162	"HPP: Parsing Comment\n");
				5163	#endif
				5164	docbParseComment(ctxt);
				5165	ctxt->instate = XML_PARSER_MISC;
				5166	} else if ((cur == '<') && (next == '!') &&
				5167	(UPP(2) == 'D') && (UPP(3) == 'O') &&
				5168	(UPP(4) == 'C') && (UPP(5) == 'T') &&
				5169	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
				5170	(UPP(8) == 'E')) {
				5171	if ((!terminate) &&
				5172	(docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
				5173	goto done;
				5174	#ifdef DEBUG_PUSH
				5175	xmlGenericError(xmlGenericErrorContext,
				5176	"HPP: Parsing internal subset\n");
				5177	#endif
				5178	docbParseDocTypeDecl(ctxt);
				5179	ctxt->instate = XML_PARSER_PROLOG;
				5180	#ifdef DEBUG_PUSH
				5181	xmlGenericError(xmlGenericErrorContext,
				5182	"HPP: entering PROLOG\n");
				5183	#endif
				5184	} else if ((cur == '<') && (next == '!') &&
				5185	(avail < 9)) {
				5186	goto done;
				5187	} else {
				5188	ctxt->instate = XML_PARSER_START_TAG;
				5189	#ifdef DEBUG_PUSH
				5190	xmlGenericError(xmlGenericErrorContext,
				5191	"HPP: entering START_TAG\n");
				5192	#endif
				5193	}
				5194	break;
				5195	case XML_PARSER_PROLOG:
				5196	SKIP_BLANKS;
				5197	if (in->buf == NULL)
				5198	avail = in->length - (in->cur - in->base);
				5199	else
				5200	avail = in->buf->buffer->use - (in->cur - in->base);
				5201	if (avail < 2)
				5202	goto done;
				5203	cur = in->cur[0];
				5204	next = in->cur[1];
				5205	if ((cur == '<') && (next == '!') &&
				5206	(in->cur[2] == '-') && (in->cur[3] == '-')) {
				5207	if ((!terminate) &&
				5208	(docbParseLookupSequence(ctxt, '-', '-', '>') < 0))
				5209	goto done;
				5210	#ifdef DEBUG_PUSH
				5211	xmlGenericError(xmlGenericErrorContext,
				5212	"HPP: Parsing Comment\n");
				5213	#endif
				5214	docbParseComment(ctxt);
				5215	ctxt->instate = XML_PARSER_PROLOG;
				5216	} else if ((cur == '<') && (next == '!') &&
				5217	(avail < 4)) {
				5218	goto done;
				5219	} else {
				5220	ctxt->instate = XML_PARSER_START_TAG;
				5221	#ifdef DEBUG_PUSH
				5222	xmlGenericError(xmlGenericErrorContext,
				5223	"HPP: entering START_TAG\n");
				5224	#endif
				5225	}
				5226	break;
				5227	case XML_PARSER_EPILOG:
				5228	if (in->buf == NULL)
				5229	avail = in->length - (in->cur - in->base);
				5230	else
				5231	avail = in->buf->buffer->use - (in->cur - in->base);
				5232	if (avail < 1)
				5233	goto done;
				5234	cur = in->cur[0];
				5235	if (IS_BLANK(cur)) {
				5236	docbParseCharData(ctxt);
				5237	goto done;
				5238	}
				5239	if (avail < 2)
				5240	goto done;
				5241	next = in->cur[1];
				5242	if ((cur == '<') && (next == '!') &&
				5243	(in->cur[2] == '-') && (in->cur[3] == '-')) {
				5244	if ((!terminate) &&
				5245	(docbParseLookupSequence(ctxt, '-', '-', '>') < 0))
				5246	goto done;
				5247	#ifdef DEBUG_PUSH
				5248	xmlGenericError(xmlGenericErrorContext,
				5249	"HPP: Parsing Comment\n");
				5250	#endif
				5251	docbParseComment(ctxt);
				5252	ctxt->instate = XML_PARSER_EPILOG;
				5253	} else if ((cur == '<') && (next == '!') &&
				5254	(avail < 4)) {
				5255	goto done;
				5256	} else {
				5257	ctxt->errNo = XML_ERR_DOCUMENT_END;
				5258	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5259	ctxt->sax->error(ctxt->userData,
				5260	"Extra content at the end of the document\n");
				5261	ctxt->wellFormed = 0;
				5262	ctxt->instate = XML_PARSER_EOF;
				5263	#ifdef DEBUG_PUSH
				5264	xmlGenericError(xmlGenericErrorContext,
				5265	"HPP: entering EOF\n");
				5266	#endif
				5267	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
				5268	ctxt->sax->endDocument(ctxt->userData);
				5269	goto done;
				5270	}
				5271	break;
				5272	case XML_PARSER_START_TAG: {
				5273	xmlChar name, oldname;
				5274	int depth = ctxt->nameNr;
				5275	docbElemDescPtr info;
				5276
				5277	if (avail < 2)
				5278	goto done;
				5279	cur = in->cur[0];
				5280	if (cur != '<') {
				5281	ctxt->instate = XML_PARSER_CONTENT;
				5282	#ifdef DEBUG_PUSH
				5283	xmlGenericError(xmlGenericErrorContext,
				5284	"HPP: entering CONTENT\n");
				5285	#endif
				5286	break;
				5287	}
				5288	if ((!terminate) &&
				5289	(docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
				5290	goto done;
				5291
				5292	oldname = xmlStrdup(ctxt->name);
				5293	docbParseStartTag(ctxt);
				5294	name = ctxt->name;
				5295	#ifdef DEBUG
				5296	if (oldname == NULL)
				5297	xmlGenericError(xmlGenericErrorContext,
				5298	"Start of element %s\n", name);
				5299	else if (name == NULL)
				5300	xmlGenericError(xmlGenericErrorContext,
				5301	"Start of element failed, was %s\n",
				5302	oldname);
				5303	else
				5304	xmlGenericError(xmlGenericErrorContext,
				5305	"Start of element %s, was %s\n",
				5306	name, oldname);
				5307	#endif
				5308	if (((depth == ctxt->nameNr) &&
				5309	(xmlStrEqual(oldname, ctxt->name))) \|\|
				5310	(name == NULL)) {
				5311	if (CUR == '>')
				5312	NEXT;
				5313	if (oldname != NULL)
				5314	xmlFree(oldname);
				5315	break;
				5316	}
				5317	if (oldname != NULL)
				5318	xmlFree(oldname);
				5319
				5320	/*
				5321	* Lookup the info for that element.
				5322	*/
				5323	info = docbTagLookup(name);
				5324	if (info == NULL) {
				5325	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5326	ctxt->sax->error(ctxt->userData, "Tag %s unknown\n",
				5327	name);
				5328	ctxt->wellFormed = 0;
				5329	} else if (info->depr) {
				5330	/***************************
				5331	if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
				5332	ctxt->sax->warning(ctxt->userData,
				5333	"Tag %s is deprecated\n",
				5334	name);
				5335	***************************/
				5336	}
				5337
				5338	/*
				5339	* Check for an Empty Element labelled the XML/SGML way
				5340	*/
				5341	if ((CUR == '/') && (NXT(1) == '>')) {
				5342	SKIP(2);
				5343	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				5344	ctxt->sax->endElement(ctxt->userData, name);
				5345	oldname = docbnamePop(ctxt);
				5346	#ifdef DEBUG
				5347	xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n",
				5348	oldname);
				5349	#endif
				5350	if (oldname != NULL)
				5351	xmlFree(oldname);
				5352	ctxt->instate = XML_PARSER_CONTENT;
				5353	#ifdef DEBUG_PUSH
				5354	xmlGenericError(xmlGenericErrorContext,
				5355	"HPP: entering CONTENT\n");
				5356	#endif
				5357	break;
				5358	}
				5359
				5360	if (CUR == '>') {
				5361	NEXT;
				5362	} else {
				5363	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5364	ctxt->sax->error(ctxt->userData,
				5365	"Couldn't find end of Start Tag %s\n",
				5366	name);
				5367	ctxt->wellFormed = 0;
				5368
				5369	/*
				5370	* end of parsing of this node.
				5371	*/
				5372	if (xmlStrEqual(name, ctxt->name)) {
				5373	nodePop(ctxt);
				5374	oldname = docbnamePop(ctxt);
				5375	#ifdef DEBUG
				5376	xmlGenericError(xmlGenericErrorContext,
				5377	"End of start tag problem: popping out %s\n", oldname);
				5378	#endif
				5379	if (oldname != NULL)
				5380	xmlFree(oldname);
				5381	}
				5382
				5383	ctxt->instate = XML_PARSER_CONTENT;
				5384	#ifdef DEBUG_PUSH
				5385	xmlGenericError(xmlGenericErrorContext,
				5386	"HPP: entering CONTENT\n");
				5387	#endif
				5388	break;
				5389	}
				5390
				5391	/*
				5392	* Check for an Empty Element from DTD definition
				5393	*/
				5394	if ((info != NULL) && (info->empty)) {
				5395	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				5396	ctxt->sax->endElement(ctxt->userData, name);
				5397	oldname = docbnamePop(ctxt);
				5398	#ifdef DEBUG
				5399	xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname);
				5400	#endif
				5401	if (oldname != NULL)
				5402	xmlFree(oldname);
				5403	}
				5404	ctxt->instate = XML_PARSER_CONTENT;
				5405	#ifdef DEBUG_PUSH
				5406	xmlGenericError(xmlGenericErrorContext,
				5407	"HPP: entering CONTENT\n");
				5408	#endif
				5409	break;
				5410	}
				5411	case XML_PARSER_CONTENT: {
				5412	long cons;
				5413	/*
				5414	* Handle preparsed entities and charRef
				5415	*/
				5416	if (ctxt->token != 0) {
				5417	xmlChar chr[2] = { 0 , 0 } ;
				5418
				5419	chr[0] = (xmlChar) ctxt->token;
				5420	docbCheckParagraph(ctxt);
				5421	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
				5422	ctxt->sax->characters(ctxt->userData, chr, 1);
				5423	ctxt->token = 0;
				5424	ctxt->checkIndex = 0;
				5425	}
				5426	if ((avail == 1) && (terminate)) {
				5427	cur = in->cur[0];
				5428	if ((cur != '<') && (cur != '&')) {
				5429	if (ctxt->sax != NULL) {
				5430	if (IS_BLANK(cur)) {
				5431	if (ctxt->sax->ignorableWhitespace != NULL)
				5432	ctxt->sax->ignorableWhitespace(
				5433	ctxt->userData, &cur, 1);
				5434	} else {
				5435	docbCheckParagraph(ctxt);
				5436	if (ctxt->sax->characters != NULL)
				5437	ctxt->sax->characters(
				5438	ctxt->userData, &cur, 1);
				5439	}
				5440	}
				5441	ctxt->token = 0;
				5442	ctxt->checkIndex = 0;
				5443	NEXT;
				5444	}
				5445	break;
				5446	}
				5447	if (avail < 2)
				5448	goto done;
				5449	cur = in->cur[0];
				5450	next = in->cur[1];
				5451	cons = ctxt->nbChars;
				5452	/*
				5453	* Sometimes DOCTYPE arrives in the middle of the document
				5454	*/
				5455	if ((cur == '<') && (next == '!') &&
				5456	(UPP(2) == 'D') && (UPP(3) == 'O') &&
				5457	(UPP(4) == 'C') && (UPP(5) == 'T') &&
				5458	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
				5459	(UPP(8) == 'E')) {
				5460	if ((!terminate) &&
				5461	(docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
				5462	goto done;
				5463	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5464	ctxt->sax->error(ctxt->userData,
				5465	"Misplaced DOCTYPE declaration\n");
				5466	ctxt->wellFormed = 0;
				5467	docbParseDocTypeDecl(ctxt);
				5468	} else if ((cur == '<') && (next == '!') &&
				5469	(in->cur[2] == '-') && (in->cur[3] == '-')) {
				5470	if ((!terminate) &&
				5471	(docbParseLookupSequence(ctxt, '-', '-', '>') < 0))
				5472	goto done;
				5473	#ifdef DEBUG_PUSH
				5474	xmlGenericError(xmlGenericErrorContext,
				5475	"HPP: Parsing Comment\n");
				5476	#endif
				5477	docbParseComment(ctxt);
				5478	ctxt->instate = XML_PARSER_CONTENT;
				5479	} else if ((cur == '<') && (next == '!') && (avail < 4)) {
				5480	goto done;
				5481	} else if ((cur == '<') && (next == '/')) {
				5482	ctxt->instate = XML_PARSER_END_TAG;
				5483	ctxt->checkIndex = 0;
				5484	#ifdef DEBUG_PUSH
				5485	xmlGenericError(xmlGenericErrorContext,
				5486	"HPP: entering END_TAG\n");
				5487	#endif
				5488	break;
				5489	} else if (cur == '<') {
				5490	ctxt->instate = XML_PARSER_START_TAG;
				5491	ctxt->checkIndex = 0;
				5492	#ifdef DEBUG_PUSH
				5493	xmlGenericError(xmlGenericErrorContext,
				5494	"HPP: entering START_TAG\n");
				5495	#endif
				5496	break;
				5497	} else if (cur == '&') {
				5498	if ((!terminate) &&
				5499	(docbParseLookupSequence(ctxt, ';', 0, 0) < 0))
				5500	goto done;
				5501	#ifdef DEBUG_PUSH
				5502	xmlGenericError(xmlGenericErrorContext,
				5503	"HPP: Parsing Reference\n");
				5504	#endif
				5505	/* TODO: check generation of subtrees if noent !!! */
				5506	docbParseReference(ctxt);
				5507	} else {
				5508	/* TODO Avoid the extra copy, handle directly !!!!!! */
				5509	/*
				5510	* Goal of the following test is :
				5511	* - minimize calls to the SAX 'character' callback
				5512	* when they are mergeable
				5513	*/
				5514	if ((ctxt->inputNr == 1) &&
				5515	(avail < DOCB_PARSER_BIG_BUFFER_SIZE)) {
				5516	if ((!terminate) &&
				5517	(docbParseLookupSequence(ctxt, '<', 0, 0) < 0))
				5518	goto done;
				5519	}
				5520	ctxt->checkIndex = 0;
				5521	#ifdef DEBUG_PUSH
				5522	xmlGenericError(xmlGenericErrorContext,
				5523	"HPP: Parsing char data\n");
				5524	#endif
				5525	docbParseCharData(ctxt);
				5526	}
				5527	if (cons == ctxt->nbChars) {
				5528	if (ctxt->node != NULL) {
				5529	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5530	ctxt->sax->error(ctxt->userData,
				5531	"detected an error in element content\n");
				5532	ctxt->wellFormed = 0;
				5533	NEXT;
				5534	}
				5535	break;
				5536	}
				5537
				5538	break;
				5539	}
				5540	case XML_PARSER_END_TAG:
				5541	if (avail < 2)
				5542	goto done;
				5543	if ((!terminate) &&
				5544	(docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
				5545	goto done;
				5546	docbParseEndTag(ctxt);
				5547	if (ctxt->nameNr == 0) {
				5548	ctxt->instate = XML_PARSER_EPILOG;
				5549	} else {
				5550	ctxt->instate = XML_PARSER_CONTENT;
				5551	}
				5552	ctxt->checkIndex = 0;
				5553	#ifdef DEBUG_PUSH
				5554	xmlGenericError(xmlGenericErrorContext,
				5555	"HPP: entering CONTENT\n");
				5556	#endif
				5557	break;
				5558	case XML_PARSER_CDATA_SECTION:
				5559	xmlGenericError(xmlGenericErrorContext,
				5560	"HPP: internal error, state == CDATA\n");
				5561	ctxt->instate = XML_PARSER_CONTENT;
				5562	ctxt->checkIndex = 0;
				5563	#ifdef DEBUG_PUSH
				5564	xmlGenericError(xmlGenericErrorContext,
				5565	"HPP: entering CONTENT\n");
				5566	#endif
				5567	break;
				5568	case XML_PARSER_DTD:
				5569	xmlGenericError(xmlGenericErrorContext,
				5570	"HPP: internal error, state == DTD\n");
				5571	ctxt->instate = XML_PARSER_CONTENT;
				5572	ctxt->checkIndex = 0;
				5573	#ifdef DEBUG_PUSH
				5574	xmlGenericError(xmlGenericErrorContext,
				5575	"HPP: entering CONTENT\n");
				5576	#endif
				5577	break;
				5578	case XML_PARSER_COMMENT:
				5579	xmlGenericError(xmlGenericErrorContext,
				5580	"HPP: internal error, state == COMMENT\n");
				5581	ctxt->instate = XML_PARSER_CONTENT;
				5582	ctxt->checkIndex = 0;
				5583	#ifdef DEBUG_PUSH
				5584	xmlGenericError(xmlGenericErrorContext,
				5585	"HPP: entering CONTENT\n");
				5586	#endif
				5587	break;
				5588	case XML_PARSER_PI:
				5589	xmlGenericError(xmlGenericErrorContext,
				5590	"HPP: internal error, state == PI\n");
				5591	ctxt->instate = XML_PARSER_CONTENT;
				5592	ctxt->checkIndex = 0;
				5593	#ifdef DEBUG_PUSH
				5594	xmlGenericError(xmlGenericErrorContext,
				5595	"HPP: entering CONTENT\n");
				5596	#endif
				5597	break;
				5598	case XML_PARSER_ENTITY_DECL:
				5599	xmlGenericError(xmlGenericErrorContext,
				5600	"HPP: internal error, state == ENTITY_DECL\n");
				5601	ctxt->instate = XML_PARSER_CONTENT;
				5602	ctxt->checkIndex = 0;
				5603	#ifdef DEBUG_PUSH
				5604	xmlGenericError(xmlGenericErrorContext,
				5605	"HPP: entering CONTENT\n");
				5606	#endif
				5607	break;
				5608	case XML_PARSER_ENTITY_VALUE:
				5609	xmlGenericError(xmlGenericErrorContext,
				5610	"HPP: internal error, state == ENTITY_VALUE\n");
				5611	ctxt->instate = XML_PARSER_CONTENT;
				5612	ctxt->checkIndex = 0;
				5613	#ifdef DEBUG_PUSH
				5614	xmlGenericError(xmlGenericErrorContext,
				5615	"HPP: entering DTD\n");
				5616	#endif
				5617	break;
				5618	case XML_PARSER_ATTRIBUTE_VALUE:
				5619	xmlGenericError(xmlGenericErrorContext,
				5620	"HPP: internal error, state == ATTRIBUTE_VALUE\n");
				5621	ctxt->instate = XML_PARSER_START_TAG;
				5622	ctxt->checkIndex = 0;
				5623	#ifdef DEBUG_PUSH
				5624	xmlGenericError(xmlGenericErrorContext,
				5625	"HPP: entering START_TAG\n");
				5626	#endif
				5627	break;
				5628	case XML_PARSER_SYSTEM_LITERAL:
				5629	xmlGenericError(xmlGenericErrorContext,
				5630	"HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n");
				5631	ctxt->instate = XML_PARSER_CONTENT;
				5632	ctxt->checkIndex = 0;
				5633	#ifdef DEBUG_PUSH
				5634	xmlGenericError(xmlGenericErrorContext,
				5635	"HPP: entering CONTENT\n");
				5636	#endif
				5637	break;
				5638
				5639	case XML_PARSER_IGNORE:
				5640	xmlGenericError(xmlGenericErrorContext,
				5641	"HPP: internal error, state == XML_PARSER_IGNORE\n");
				5642	ctxt->instate = XML_PARSER_CONTENT;
				5643	ctxt->checkIndex = 0;
				5644	#ifdef DEBUG_PUSH
				5645	xmlGenericError(xmlGenericErrorContext,
				5646	"HPP: entering CONTENT\n");
				5647	#endif
				5648	break;
				5649	}
				5650	}
				5651	done:
				5652	if ((avail == 0) && (terminate)) {
				5653	docbAutoClose(ctxt, NULL);
				5654	if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
				5655	/*
				5656	* SAX: end of the document processing.
				5657	*/
				5658	ctxt->instate = XML_PARSER_EOF;
				5659	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
				5660	ctxt->sax->endDocument(ctxt->userData);
				5661	}
				5662	}
				5663	if ((ctxt->myDoc != NULL) &&
				5664	((terminate) \|\| (ctxt->instate == XML_PARSER_EOF) \|\|
				5665	(ctxt->instate == XML_PARSER_EPILOG))) {
				5666	xmlDtdPtr dtd;
				5667	dtd = ctxt->myDoc->intSubset;
				5668	if (dtd == NULL)
				5669	ctxt->myDoc->intSubset =
				5670	xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML",
				5671	BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
				5672	BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
				5673	}
				5674	#ifdef DEBUG_PUSH
				5675	xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret);
				5676	#endif
				5677	return(ret);
				5678	}
				5679
				5680	/**
				5681	* docbParseChunk:
				5682	* @ctxt: an XML parser context
				5683	* @chunk: an char array
				5684	* @size: the size in byte of the chunk
				5685	* @terminate: last chunk indicator
				5686	*
				5687	* Parse a Chunk of memory
				5688	*
				5689	* Returns zero if no error, the xmlParserErrors otherwise.
				5690	*/
				5691	int
				5692	docbParseChunk(docbParserCtxtPtr ctxt, const char *chunk, int size,
				5693	int terminate) {
				5694	if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
				5695	(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
				5696	int base = ctxt->input->base - ctxt->input->buf->buffer->content;
				5697	int cur = ctxt->input->cur - ctxt->input->base;
				5698
				5699	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
				5700	ctxt->input->base = ctxt->input->buf->buffer->content + base;
				5701	ctxt->input->cur = ctxt->input->base + cur;
				5702	#ifdef DEBUG_PUSH
				5703	xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
				5704	#endif
				5705
				5706	if ((terminate) \|\| (ctxt->input->buf->buffer->use > 80))
				5707	docbParseTryOrFinish(ctxt, terminate);
				5708	} else if (ctxt->instate != XML_PARSER_EOF) {
				5709	xmlParserInputBufferPush(ctxt->input->buf, 0, "");
				5710	docbParseTryOrFinish(ctxt, terminate);
				5711	}
				5712	if (terminate) {
				5713	if ((ctxt->instate != XML_PARSER_EOF) &&
				5714	(ctxt->instate != XML_PARSER_EPILOG) &&
				5715	(ctxt->instate != XML_PARSER_MISC)) {
				5716	ctxt->errNo = XML_ERR_DOCUMENT_END;
				5717	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5718	ctxt->sax->error(ctxt->userData,
				5719	"Extra content at the end of the document\n");
				5720	ctxt->wellFormed = 0;
				5721	}
				5722	if (ctxt->instate != XML_PARSER_EOF) {
				5723	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
				5724	ctxt->sax->endDocument(ctxt->userData);
				5725	}
				5726	ctxt->instate = XML_PARSER_EOF;
				5727	}
				5728	return((xmlParserErrors) ctxt->errNo);
				5729	}
				5730
				5731	/************************************************************************
				5732	* *
				5733	* User entry points *
				5734	* *
				5735	************************************************************************/
				5736
				5737	/**
				5738	* docbCreatePushParserCtxt :
				5739	* @sax: a SAX handler
				5740	* @user_data: The user data returned on SAX callbacks
				5741	* @chunk: a pointer to an array of chars
				5742	* @size: number of chars in the array
				5743	* @filename: an optional file name or URI
				5744	* @enc: an optional encoding
				5745	*
				5746	* Create a parser context for using the DocBook SGML parser in push mode
				5747	* To allow content encoding detection, @size should be >= 4
				5748	* The value of @filename is used for fetching external entities
				5749	* and error/warning reports.
				5750	*
				5751	* Returns the new parser context or NULL
				5752	*/
				5753	docbParserCtxtPtr
				5754	docbCreatePushParserCtxt(docbSAXHandlerPtr sax, void *user_data,
				5755	const char chunk, int size, const char filename,
				5756	xmlCharEncoding enc) {
				5757	docbParserCtxtPtr ctxt;
				5758	docbParserInputPtr inputStream;
				5759	xmlParserInputBufferPtr buf;
				5760
				5761	buf = xmlAllocParserInputBuffer(enc);
				5762	if (buf == NULL) return(NULL);
				5763
				5764	ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt));
				5765	if (ctxt == NULL) {
				5766	xmlFree(buf);
				5767	return(NULL);
				5768	}
				5769	memset(ctxt, 0, sizeof(docbParserCtxt));
				5770	docbInitParserCtxt(ctxt);
				5771	if (sax != NULL) {
				5772	if (ctxt->sax != &docbDefaultSAXHandler)
				5773	xmlFree(ctxt->sax);
				5774	ctxt->sax = (docbSAXHandlerPtr) xmlMalloc(sizeof(docbSAXHandler));
				5775	if (ctxt->sax == NULL) {
				5776	xmlFree(buf);
				5777	xmlFree(ctxt);
				5778	return(NULL);
				5779	}
				5780	memcpy(ctxt->sax, sax, sizeof(docbSAXHandler));
				5781	if (user_data != NULL)
				5782	ctxt->userData = user_data;
				5783	}
				5784	if (filename == NULL) {
				5785	ctxt->directory = NULL;
				5786	} else {
				5787	ctxt->directory = xmlParserGetDirectory(filename);
				5788	}
				5789
				5790	inputStream = docbNewInputStream(ctxt);
				5791	if (inputStream == NULL) {
				5792	xmlFreeParserCtxt(ctxt);
				5793	return(NULL);
				5794	}
				5795
				5796	if (filename == NULL)
				5797	inputStream->filename = NULL;
				5798	else
				5799	inputStream->filename = xmlMemStrdup(filename);
				5800	inputStream->buf = buf;
				5801	inputStream->base = inputStream->buf->buffer->content;
				5802	inputStream->cur = inputStream->buf->buffer->content;
				5803
				5804	inputPush(ctxt, inputStream);
				5805
				5806	if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
				5807	(ctxt->input->buf != NULL)) {
				5808	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
				5809	#ifdef DEBUG_PUSH
				5810	xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
				5811	#endif
				5812	}
				5813
				5814	return(ctxt);
				5815	}
				5816
				5817	/**
				5818	* docbSAXParseDoc :
				5819	* @cur: a pointer to an array of xmlChar
				5820	* @encoding: a free form C string describing the SGML document encoding, or NULL
				5821	* @sax: the SAX handler block
				5822	* @userData: if using SAX, this pointer will be provided on callbacks.
				5823	*
				5824	* parse an SGML in-memory document and build a tree.
				5825	* It use the given SAX function block to handle the parsing callback.
				5826	* If sax is NULL, fallback to the default DOM tree building routines.
				5827	*
				5828	* Returns the resulting document tree
				5829	*/
				5830
				5831	docbDocPtr
				5832	docbSAXParseDoc(xmlChar cur, const char encoding, docbSAXHandlerPtr sax, void *userData) {
				5833	docbDocPtr ret;
				5834	docbParserCtxtPtr ctxt;
				5835
				5836	if (cur == NULL) return(NULL);
				5837
				5838
				5839	ctxt = docbCreateDocParserCtxt(cur, encoding);
				5840	if (ctxt == NULL) return(NULL);
				5841	if (sax != NULL) {
				5842	ctxt->sax = sax;
				5843	ctxt->userData = userData;
				5844	}
				5845
				5846	docbParseDocument(ctxt);
				5847	ret = ctxt->myDoc;
				5848	if (sax != NULL) {
				5849	ctxt->sax = NULL;
				5850	ctxt->userData = NULL;
				5851	}
				5852	docbFreeParserCtxt(ctxt);
				5853
				5854	return(ret);
				5855	}
				5856
				5857	/**
				5858	* docbParseDoc :
				5859	* @cur: a pointer to an array of xmlChar
				5860	* @encoding: a free form C string describing the SGML document encoding, or NULL
				5861	*
				5862	* parse an SGML in-memory document and build a tree.
				5863	*
				5864	* Returns the resulting document tree
				5865	*/
				5866
				5867	docbDocPtr
				5868	docbParseDoc(xmlChar cur, const char encoding) {
				5869	return(docbSAXParseDoc(cur, encoding, NULL, NULL));
				5870	}
				5871
				5872
				5873	/**
				5874	* docbCreateFileParserCtxt :
				5875	* @filename: the filename
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame^]	5876	* @encoding: the SGML document encoding, or NULL
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	5877	*
				5878	* Create a parser context for a file content.
				5879	* Automatic support for ZLIB/Compress compressed document is provided
				5880	* by default if found at compile-time.
				5881	*
				5882	* Returns the new parser context or NULL
				5883	*/
				5884	docbParserCtxtPtr
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame^]	5885	docbCreateFileParserCtxt(const char *filename,
				5886	const char *encoding ATTRIBUTE_UNUSED)
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	5887	{
				5888	docbParserCtxtPtr ctxt;
				5889	docbParserInputPtr inputStream;
				5890	xmlParserInputBufferPtr buf;
				5891	/* sgmlCharEncoding enc; */
				5892
				5893	buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
				5894	if (buf == NULL) return(NULL);
				5895
				5896	ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt));
				5897	if (ctxt == NULL) {
				5898	perror("malloc");
				5899	return(NULL);
				5900	}
				5901	memset(ctxt, 0, sizeof(docbParserCtxt));
				5902	docbInitParserCtxt(ctxt);
				5903	inputStream = (docbParserInputPtr) xmlMalloc(sizeof(docbParserInput));
				5904	if (inputStream == NULL) {
				5905	perror("malloc");
				5906	xmlFree(ctxt);
				5907	return(NULL);
				5908	}
				5909	memset(inputStream, 0, sizeof(docbParserInput));
				5910
				5911	inputStream->filename = xmlMemStrdup(filename);
				5912	inputStream->line = 1;
				5913	inputStream->col = 1;
				5914	inputStream->buf = buf;
				5915	inputStream->directory = NULL;
				5916
				5917	inputStream->base = inputStream->buf->buffer->content;
				5918	inputStream->cur = inputStream->buf->buffer->content;
				5919	inputStream->free = NULL;
				5920
				5921	inputPush(ctxt, inputStream);
				5922	return(ctxt);
				5923	}
				5924
				5925	/**
				5926	* docbSAXParseFile :
				5927	* @filename: the filename
				5928	* @encoding: a free form C string describing the SGML document encoding, or NULL
				5929	* @sax: the SAX handler block
				5930	* @userData: if using SAX, this pointer will be provided on callbacks.
				5931	*
				5932	* parse an SGML file and build a tree. Automatic support for ZLIB/Compress
				5933	* compressed document is provided by default if found at compile-time.
				5934	* It use the given SAX function block to handle the parsing callback.
				5935	* If sax is NULL, fallback to the default DOM tree building routines.
				5936	*
				5937	* Returns the resulting document tree
				5938	*/
				5939
				5940	docbDocPtr
				5941	docbSAXParseFile(const char filename, const char encoding, docbSAXHandlerPtr sax,
				5942	void *userData) {
				5943	docbDocPtr ret;
				5944	docbParserCtxtPtr ctxt;
				5945	docbSAXHandlerPtr oldsax = NULL;
				5946
				5947	ctxt = docbCreateFileParserCtxt(filename, encoding);
				5948	if (ctxt == NULL) return(NULL);
				5949	if (sax != NULL) {
				5950	oldsax = ctxt->sax;
				5951	ctxt->sax = sax;
				5952	ctxt->userData = userData;
				5953	}
				5954
				5955	docbParseDocument(ctxt);
				5956
				5957	ret = ctxt->myDoc;
				5958	if (sax != NULL) {
				5959	ctxt->sax = oldsax;
				5960	ctxt->userData = NULL;
				5961	}
				5962	docbFreeParserCtxt(ctxt);
				5963
				5964	return(ret);
				5965	}
				5966
				5967	/**
				5968	* docbParseFile :
				5969	* @filename: the filename
				5970	* @encoding: a free form C string describing document encoding, or NULL
				5971	*
				5972	* parse a Docbook SGML file and build a tree. Automatic support for
				5973	* ZLIB/Compress compressed document is provided by default if found
				5974	* at compile-time.
				5975	*
				5976	* Returns the resulting document tree
				5977	*/
				5978
				5979	docbDocPtr
				5980	docbParseFile(const char filename, const char encoding) {
				5981	return(docbSAXParseFile(filename, encoding, NULL, NULL));
				5982	}
				5983
				5984	#endif /* LIBXML_DOCB_ENABLED */