Blame - DOCBparser.c - platform/external/libxml2

blob: 0b4012a6c250a63e5805cd1b5c435805f5433e5b [file] [log] [blame]

Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	1	/*
				2	* DOCBparser.c : an attempt to parse SGML Docbook documents
				3	*
Daniel Veillard	e95e239	2001-06-06 10:46:28 +0000	[diff] [blame]	4	* This is extremely hackish. It also adds one extension
				5	* <?sgml-declaration encoding="ISO-8859-1"?>
				6	* allowing to store the encoding of the document within the instance.
				7	*
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	8	* See Copyright for the status of this software.
				9	*
Daniel Veillard	c5d6434	2001-06-24 12:13:24 +0000	[diff] [blame]	10	* daniel@veillard.com
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	11	*/
				12
				13	#include "libxml.h"
				14	#ifdef LIBXML_DOCB_ENABLED
				15
				16	#include <string.h>
				17	#ifdef HAVE_CTYPE_H
				18	#include <ctype.h>
				19	#endif
				20	#ifdef HAVE_STDLIB_H
				21	#include <stdlib.h>
				22	#endif
				23	#ifdef HAVE_SYS_STAT_H
				24	#include <sys/stat.h>
				25	#endif
				26	#ifdef HAVE_FCNTL_H
				27	#include <fcntl.h>
				28	#endif
				29	#ifdef HAVE_UNISTD_H
				30	#include <unistd.h>
				31	#endif
				32	#ifdef HAVE_ZLIB_H
				33	#include <zlib.h>
				34	#endif
				35
				36	#include <libxml/xmlmemory.h>
				37	#include <libxml/tree.h>
				38	#include <libxml/SAX.h>
				39	#include <libxml/parser.h>
				40	#include <libxml/parserInternals.h>
				41	#include <libxml/xmlerror.h>
				42	#include <libxml/DOCBparser.h>
				43	#include <libxml/entities.h>
				44	#include <libxml/encoding.h>
				45	#include <libxml/valid.h>
				46	#include <libxml/xmlIO.h>
				47	#include <libxml/uri.h>
Daniel Veillard	3c01b1d	2001-10-17 15:58:35 +0000	[diff] [blame]	48	#include <libxml/globals.h>
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	49
				50	/*
Daniel Veillard	89cad53	2001-10-22 09:46:13 +0000	[diff] [blame]	51	* DocBook XML current versions
				52	*/
				53
				54	#define XML_DOCBOOK_XML_PUBLIC (const xmlChar *) \
				55	"-//OASIS//DTD DocBook XML V4.1.2//EN"
				56	#define XML_DOCBOOK_XML_SYSTEM (const xmlChar *) \
				57	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd"
				58
				59	/*
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	60	* Internal description of an SGML entity
				61	*/
				62	typedef struct _docbEntityDesc docbEntityDesc;
				63	typedef docbEntityDesc *docbEntityDescPtr;
				64	struct _docbEntityDesc {
				65	int value; /* the UNICODE value for the character */
				66	const char name; / The entity name */
				67	const char desc; / the description */
				68	};
				69
				70	#if 0
				71	docbElemDescPtr docbTagLookup (const xmlChar *tag);
				72	docbEntityDescPtr docbEntityLookup(const xmlChar *name);
				73	docbEntityDescPtr docbEntityValueLookup(int value);
				74
				75	int docbIsAutoClosed(docbDocPtr doc,
				76	docbNodePtr elem);
				77	int docbAutoCloseTag(docbDocPtr doc,
				78	const xmlChar *name,
				79	docbNodePtr elem);
				80
				81	#endif
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	82	static int docbParseCharRef(docbParserCtxtPtr ctxt);
				83	static xmlEntityPtr docbParseEntityRef(docbParserCtxtPtr ctxt,
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	84	xmlChar **str);
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	85	static void docbParseElement(docbParserCtxtPtr ctxt);
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame]	86	static void docbParseContent(docbParserCtxtPtr ctxt);
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	87
				88	/*
				89	* Internal description of an SGML element
				90	*/
				91	typedef struct _docbElemDesc docbElemDesc;
				92	typedef docbElemDesc *docbElemDescPtr;
				93	struct _docbElemDesc {
				94	const char name; / The tag name */
				95	int startTag; /* Whether the start tag can be implied */
				96	int endTag; /* Whether the end tag can be implied */
				97	int empty; /* Is this an empty element ? */
				98	int depr; /* Is this a deprecated element ? */
				99	int dtd; /* 1: only in Loose DTD, 2: only Frameset one */
				100	const char desc; / the description */
				101	};
				102
				103
				104	#define DOCB_MAX_NAMELEN 1000
				105	#define DOCB_PARSER_BIG_BUFFER_SIZE 1000
				106	#define DOCB_PARSER_BUFFER_SIZE 100
				107
				108	/* #define DEBUG */
				109	/* #define DEBUG_PUSH */
				110
				111	/************************************************************************
				112	* *
				113	* Parser stacks related functions and macros *
				114	* *
				115	************************************************************************/
				116
				117	/*
				118	* Generic function for accessing stacks in the Parser Context
				119	*/
				120
				121	#define PUSH_AND_POP(scope, type, name) \
				122	scope int docb##name##Push(docbParserCtxtPtr ctxt, type value) { \
				123	if (ctxt->name##Nr >= ctxt->name##Max) { \
				124	ctxt->name##Max *= 2; \
				125	ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
				126	ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
				127	if (ctxt->name##Tab == NULL) { \
				128	xmlGenericError(xmlGenericErrorContext, "realloc failed !\n"); \
				129	return(0); \
				130	} \
				131	} \
				132	ctxt->name##Tab[ctxt->name##Nr] = value; \
				133	ctxt->name = value; \
				134	return(ctxt->name##Nr++); \
				135	} \
				136	scope type docb##name##Pop(docbParserCtxtPtr ctxt) { \
				137	type ret; \
				138	if (ctxt->name##Nr < 0) return(0); \
				139	ctxt->name##Nr--; \
				140	if (ctxt->name##Nr < 0) return(0); \
				141	if (ctxt->name##Nr > 0) \
				142	ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
				143	else \
				144	ctxt->name = NULL; \
				145	ret = ctxt->name##Tab[ctxt->name##Nr]; \
				146	ctxt->name##Tab[ctxt->name##Nr] = 0; \
				147	return(ret); \
				148	} \
				149
				150	/* PUSH_AND_POP(static, xmlNodePtr, node) */
				151	PUSH_AND_POP(static, xmlChar*, name)
				152
				153	/*
				154	* Macros for accessing the content. Those should be used only by the parser,
				155	* and not exported.
				156	*
				157	* Dirty macros, i.e. one need to make assumption on the context to use them
				158	*
				159	* CUR_PTR return the current pointer to the xmlChar to be parsed.
				160	* CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
				161	* in ISO-Latin or UTF-8, and the current 16 bit value if compiled
				162	* in UNICODE mode. This should be used internally by the parser
				163	* only to compare to ASCII values otherwise it would break when
				164	* running with UTF-8 encoding.
				165	* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
				166	* to compare on ASCII based substring.
				167	* UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR
				168	* it should be used only to compare on ASCII based substring.
				169	* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
				170	* strings within the parser.
				171	*
				172	* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
				173	*
				174	* CURRENT Returns the current char value, with the full decoding of
				175	* UTF-8 if we are using this mode. It returns an int.
				176	* NEXT Skip to the next character, this does the proper decoding
				177	* in UTF-8 mode. It also pop-up unfinished entities on the fly.
				178	* COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
				179	*/
				180
				181	#define UPPER (toupper(*ctxt->input->cur))
				182
				183	#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
				184
				185	#define NXT(val) ctxt->input->cur[(val)]
				186
				187	#define UPP(val) (toupper(ctxt->input->cur[(val)]))
				188
				189	#define CUR_PTR ctxt->input->cur
				190
				191	#define SHRINK xmlParserInputShrink(ctxt->input)
				192
				193	#define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
				194
				195	#define CURRENT ((int) (*ctxt->input->cur))
				196
				197	#define SKIP_BLANKS docbSkipBlankChars(ctxt)
				198
				199	/* Imported from XML */
				200
				201	/* #define CUR (ctxt->token ? ctxt->token : (int) (ctxt->input->cur)) /
				202	#define CUR ((int) (*ctxt->input->cur))
				203	#define NEXT xmlNextChar(ctxt),ctxt->nbChars++
				204
				205	#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
				206	#define NXT(val) ctxt->input->cur[(val)]
				207	#define CUR_PTR ctxt->input->cur
				208
				209
				210	#define NEXTL(l) do { \
				211	if (*(ctxt->input->cur) == '\n') { \
				212	ctxt->input->line++; ctxt->input->col = 1; \
				213	} else ctxt->input->col++; \
				214	ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \
				215	} while (0)
				216
				217	/************
				218	\
				219	if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
				220	if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
				221	************/
				222
				223	#define CUR_CHAR(l) docbCurrentChar(ctxt, &l)
				224	#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
				225
				226	#define COPY_BUF(l,b,i,v) \
				227	if (l == 1) b[i++] = (xmlChar) v; \
				228	else i += xmlCopyChar(l,&b[i],v)
				229
				230	/**
				231	* docbCurrentChar:
				232	* @ctxt: the DocBook SGML parser context
				233	* @len: pointer to the length of the char read
				234	*
Daniel Veillard	cbaf399	2001-12-31 16:16:02 +0000	[diff] [blame]	235	* The current char value, if using UTF-8 this may actually span multiple
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	236	* bytes in the input buffer. Implement the end of line normalization:
				237	* 2.11 End-of-Line Handling
				238	* If the encoding is unspecified, in the case we find an ISO-Latin-1
				239	* char, then the encoding converter is plugged in automatically.
				240	*
Daniel Veillard	60087f3	2001-10-10 09:45:09 +0000	[diff] [blame]	241	* Returns the current char value and its length
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	242	*/
				243
				244	static int
				245	docbCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
				246	if (ctxt->instate == XML_PARSER_EOF)
				247	return(0);
				248
				249	if (ctxt->token != 0) {
				250	*len = 0;
				251	return(ctxt->token);
				252	}
				253	if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
				254	/*
				255	* We are supposed to handle UTF8, check it's valid
				256	* From rfc2044: encoding of the Unicode values on UTF-8:
				257	*
				258	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
				259	* 0000 0000-0000 007F 0xxxxxxx
				260	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
				261	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
				262	*
				263	* Check for the 0x110000 limit too
				264	*/
				265	const unsigned char *cur = ctxt->input->cur;
				266	unsigned char c;
				267	unsigned int val;
				268
				269	c = *cur;
				270	if (c & 0x80) {
				271	if (cur[1] == 0)
				272	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				273	if ((cur[1] & 0xc0) != 0x80)
				274	goto encoding_error;
				275	if ((c & 0xe0) == 0xe0) {
				276
				277	if (cur[2] == 0)
				278	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				279	if ((cur[2] & 0xc0) != 0x80)
				280	goto encoding_error;
				281	if ((c & 0xf0) == 0xf0) {
				282	if (cur[3] == 0)
				283	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				284	if (((c & 0xf8) != 0xf0) \|\|
				285	((cur[3] & 0xc0) != 0x80))
				286	goto encoding_error;
				287	/* 4-byte code */
				288	*len = 4;
				289	val = (cur[0] & 0x7) << 18;
				290	val \|= (cur[1] & 0x3f) << 12;
				291	val \|= (cur[2] & 0x3f) << 6;
				292	val \|= cur[3] & 0x3f;
				293	} else {
				294	/* 3-byte code */
				295	*len = 3;
				296	val = (cur[0] & 0xf) << 12;
				297	val \|= (cur[1] & 0x3f) << 6;
				298	val \|= cur[2] & 0x3f;
				299	}
				300	} else {
				301	/* 2-byte code */
				302	*len = 2;
				303	val = (cur[0] & 0x1f) << 6;
				304	val \|= cur[1] & 0x3f;
				305	}
				306	if (!IS_CHAR(val)) {
				307	ctxt->errNo = XML_ERR_INVALID_ENCODING;
				308	if ((ctxt->sax != NULL) &&
				309	(ctxt->sax->error != NULL))
				310	ctxt->sax->error(ctxt->userData,
				311	"Char 0x%X out of allowed range\n", val);
				312	ctxt->wellFormed = 0;
				313	ctxt->disableSAX = 1;
				314	}
				315	return(val);
				316	} else {
				317	/* 1-byte code */
				318	*len = 1;
				319	return((int) *ctxt->input->cur);
				320	}
				321	}
				322	/*
Daniel Veillard	60087f3	2001-10-10 09:45:09 +0000	[diff] [blame]	323	* Assume it's a fixed length encoding (1) with
Daniel Veillard	cbaf399	2001-12-31 16:16:02 +0000	[diff] [blame]	324	* a compatible encoding for the ASCII set, since
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	325	* XML constructs only use < 128 chars
				326	*/
				327	*len = 1;
				328	if ((int) *ctxt->input->cur < 0x80)
				329	return((int) *ctxt->input->cur);
				330
				331	/*
				332	* Humm this is bad, do an automatic flow conversion
				333	*/
				334	xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
				335	ctxt->charset = XML_CHAR_ENCODING_UTF8;
				336	return(xmlCurrentChar(ctxt, len));
				337
				338	encoding_error:
				339	/*
				340	* If we detect an UTF8 error that probably mean that the
				341	* input encoding didn't get properly advertized in the
				342	* declaration header. Report the error and switch the encoding
				343	* to ISO-Latin-1 (if you don't like this policy, just declare the
				344	* encoding !)
				345	*/
				346	ctxt->errNo = XML_ERR_INVALID_ENCODING;
				347	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
				348	ctxt->sax->error(ctxt->userData,
				349	"Input is not proper UTF-8, indicate encoding !\n");
				350	ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
				351	ctxt->input->cur[0], ctxt->input->cur[1],
				352	ctxt->input->cur[2], ctxt->input->cur[3]);
				353	}
				354
				355	ctxt->charset = XML_CHAR_ENCODING_8859_1;
				356	*len = 1;
				357	return((int) *ctxt->input->cur);
				358	}
				359
				360	#if 0
				361	/**
				362	* sgmlNextChar:
				363	* @ctxt: the DocBook SGML parser context
				364	*
				365	* Skip to the next char input char.
				366	*/
				367
				368	static void
				369	sgmlNextChar(docbParserCtxtPtr ctxt) {
				370	if (ctxt->instate == XML_PARSER_EOF)
				371	return;
				372	if ((*ctxt->input->cur == 0) &&
				373	(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
				374	xmlPopInput(ctxt);
				375	} else {
				376	if (*(ctxt->input->cur) == '\n') {
				377	ctxt->input->line++; ctxt->input->col = 1;
				378	} else ctxt->input->col++;
				379	ctxt->input->cur++;
				380	ctxt->nbChars++;
				381	if (*ctxt->input->cur == 0)
				382	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				383	}
				384	}
				385	#endif
				386
				387	/**
				388	* docbSkipBlankChars:
				389	* @ctxt: the DocBook SGML parser context
				390	*
				391	* skip all blanks character found at that point in the input streams.
				392	*
				393	* Returns the number of space chars skipped
				394	*/
				395
				396	static int
				397	docbSkipBlankChars(xmlParserCtxtPtr ctxt) {
				398	int res = 0;
				399
				400	while (IS_BLANK(*(ctxt->input->cur))) {
				401	if ((*ctxt->input->cur == 0) &&
				402	(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
				403	xmlPopInput(ctxt);
				404	} else {
				405	if (*(ctxt->input->cur) == '\n') {
				406	ctxt->input->line++; ctxt->input->col = 1;
				407	} else ctxt->input->col++;
				408	ctxt->input->cur++;
				409	ctxt->nbChars++;
				410	if (*ctxt->input->cur == 0)
				411	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				412	}
				413	res++;
				414	}
				415	return(res);
				416	}
				417
				418
				419
				420	/************************************************************************
				421	* *
				422	* The list of SGML elements and their properties *
				423	* *
				424	************************************************************************/
				425
				426	/*
				427	* Start Tag: 1 means the start tag can be ommited
				428	* End Tag: 1 means the end tag can be ommited
				429	* 2 means it's forbidden (empty elements)
				430	* Depr: this element is deprecated
				431	* DTD: 1 means that this element is valid only in the Loose DTD
				432	* 2 means that this element is valid only in the Frameset DTD
				433	*
				434	* Name,Start Tag,End Tag, Empty, Depr., DTD, Description
				435	*/
				436	static docbElemDesc
				437	docbookElementTable[] = {
				438	{ "abbrev", 0, 0, 0, 3, 0, "" }, /* word */
				439	{ "abstract", 0, 0, 0, 9, 0, "" }, /* title */
				440	{ "accel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				441	{ "ackno", 0, 0, 0, 4, 0, "" }, /* docinfo */
				442	{ "acronym", 0, 0, 0, 3, 0, "" }, /* word */
				443	{ "action", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				444	{ "address", 0, 0, 0, 1, 0, "" },
				445	{ "affiliation",0, 0, 0, 9, 0, "" }, /* shortaffil */
				446	{ "alt", 0, 0, 0, 1, 0, "" },
				447	{ "anchor", 0, 2, 1, 0, 0, "" },
				448	{ "answer", 0, 0, 0, 9, 0, "" }, /* label */
				449	{ "appendix", 0, 0, 0, 9, 0, "" }, /* appendixinfo */
				450	{ "appendixinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				451	{ "application",0, 0, 0, 2, 0, "" }, /* para */
				452	{ "area", 0, 2, 1, 0, 0, "" },
				453	{ "areaset", 0, 0, 0, 9, 0, "" }, /* area */
				454	{ "areaspec", 0, 0, 0, 9, 0, "" }, /* area */
				455	{ "arg", 0, 0, 0, 1, 0, "" },
Daniel Veillard	4ec0b0f	2001-04-25 15:53:40 +0000	[diff] [blame]	456	{ "artheader", 0, 0, 0, 9, 0, "" },
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	457	{ "article", 0, 0, 0, 9, 0, "" }, /* div.title.content */
				458	{ "articleinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				459	{ "artpagenums",0, 0, 0, 4, 0, "" }, /* docinfo */
				460	{ "attribution",0, 0, 0, 2, 0, "" }, /* para */
				461	{ "audiodata", 0, 2, 1, 0, 0, "" },
				462	{ "audioobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
				463	{ "authorblurb",0, 0, 0, 9, 0, "" }, /* title */
				464	{ "authorgroup",0, 0, 0, 9, 0, "" }, /* author */
				465	{ "authorinitials",0, 0, 0, 4, 0, "" }, /* docinfo */
				466	{ "author", 0, 0, 0, 9, 0, "" }, /* person.ident.mix */
				467	{ "beginpage", 0, 2, 1, 0, 0, "" },
				468	{ "bibliodiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				469	{ "biblioentry",0, 0, 0, 9, 0, "" }, /* articleinfo */
				470	{ "bibliography",0, 0, 0, 9, 0, "" }, /* bibliographyinfo */
				471	{ "bibliographyinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				472	{ "bibliomisc", 0, 0, 0, 2, 0, "" }, /* para */
				473	{ "bibliomixed",0, 0, 0, 1, 0, "" }, /* %bibliocomponent.mix, bibliomset) */
				474	{ "bibliomset", 0, 0, 0, 1, 0, "" }, /* %bibliocomponent.mix; \| bibliomset) */
				475	{ "biblioset", 0, 0, 0, 9, 0, "" }, /* bibliocomponent.mix */
				476	{ "blockquote", 0, 0, 0, 9, 0, "" }, /* title */
				477	{ "book", 0, 0, 0, 9, 0, "" }, /* div.title.content */
				478	{ "bookinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
				479	{ "bridgehead", 0, 0, 0, 8, 0, "" }, /* title */
				480	{ "callout", 0, 0, 0, 9, 0, "" }, /* component.mix */
				481	{ "calloutlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				482	{ "caption", 0, 0, 0, 9, 0, "" }, /* textobject.mix */
				483	{ "caution", 0, 0, 0, 9, 0, "" }, /* title */
				484	{ "chapter", 0, 0, 0, 9, 0, "" }, /* chapterinfo */
				485	{ "chapterinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				486	{ "citation", 0, 0, 0, 2, 0, "" }, /* para */
				487	{ "citerefentry",0, 0, 0, 9, 0, "" }, /* refentrytitle */
				488	{ "citetitle", 0, 0, 0, 2, 0, "" }, /* para */
				489	{ "city", 0, 0, 0, 4, 0, "" }, /* docinfo */
				490	{ "classname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				491	{ "classsynopsisinfo",0,0, 0, 9, 0, "" }, /* cptr */
				492	{ "classsynopsis",0, 0, 0, 9, 0, "" }, /* ooclass */
				493	{ "cmdsynopsis",0, 0, 0, 9, 0, "" }, /* command */
				494	{ "co", 0, 2, 1, 0, 0, "" },
				495	{ "collab", 0, 0, 0, 9, 0, "" }, /* collabname */
				496	{ "collabname", 0, 0, 0, 4, 0, "" }, /* docinfo */
				497	{ "colophon", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				498	{ "colspec", 0, 2, 1, 0, 0, "" },
				499	{ "colspec", 0, 2, 1, 0, 0, "" },
				500	{ "command", 0, 0, 0, 9, 0, "" }, /* cptr */
				501	{ "computeroutput",0, 0, 0, 9, 0, "" }, /* cptr */
				502	{ "confdates", 0, 0, 0, 4, 0, "" }, /* docinfo */
				503	{ "confgroup", 0, 0, 0, 9, 0, "" }, /* confdates */
				504	{ "confnum", 0, 0, 0, 4, 0, "" }, /* docinfo */
				505	{ "confsponsor",0, 0, 0, 4, 0, "" }, /* docinfo */
				506	{ "conftitle", 0, 0, 0, 4, 0, "" }, /* docinfo */
				507	{ "constant", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				508	{ "constructorsynopsis",0,0, 0, 9, 0, "" }, /* modifier */
				509	{ "contractnum",0, 0, 0, 4, 0, "" }, /* docinfo */
				510	{ "contractsponsor",0, 0, 0, 4, 0, "" }, /* docinfo */
				511	{ "contrib", 0, 0, 0, 4, 0, "" }, /* docinfo */
				512	{ "copyright", 0, 0, 0, 9, 0, "" }, /* year */
				513	{ "corpauthor", 0, 0, 0, 4, 0, "" }, /* docinfo */
				514	{ "corpname", 0, 0, 0, 4, 0, "" }, /* docinfo */
				515	{ "country", 0, 0, 0, 4, 0, "" }, /* docinfo */
				516	{ "database", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				517	{ "date", 0, 0, 0, 4, 0, "" }, /* docinfo */
				518	{ "dedication", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				519	{ "destructorsynopsis",0,0, 0, 9, 0, "" }, /* modifier */
Daniel Veillard	c057c5d	2001-05-02 12:41:24 +0000	[diff] [blame]	520	{ "docinfo", 0, 0, 0, 9, 0, "" },
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	521	{ "edition", 0, 0, 0, 4, 0, "" }, /* docinfo */
				522	{ "editor", 0, 0, 0, 9, 0, "" }, /* person.ident.mix */
				523	{ "email", 0, 0, 0, 4, 0, "" }, /* docinfo */
				524	{ "emphasis", 0, 0, 0, 2, 0, "" }, /* para */
				525	{ "entry", 0, 0, 0, 9, 0, "" }, /* tbl.entry.mdl */
				526	{ "entrytbl", 0, 0, 0, 9, 0, "" }, /* tbl.entrytbl.mdl */
				527	{ "envar", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				528	{ "epigraph", 0, 0, 0, 9, 0, "" }, /* attribution */
				529	{ "equation", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				530	{ "errorcode", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				531	{ "errorname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				532	{ "errortype", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				533	{ "example", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				534	{ "exceptionname",0, 0, 0, 7, 0, "" }, /* smallcptr */
				535	{ "fax", 0, 0, 0, 4, 0, "" }, /* docinfo */
				536	{ "fieldsynopsis", 0, 0, 0, 9, 0, "" }, /* modifier */
				537	{ "figure", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				538	{ "filename", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				539	{ "firstname", 0, 0, 0, 4, 0, "" }, /* docinfo */
				540	{ "firstterm", 0, 0, 0, 3, 0, "" }, /* word */
				541	{ "footnote", 0, 0, 0, 9, 0, "" }, /* footnote.mix */
				542	{ "footnoteref",0, 2, 1, 0, 0, "" },
				543	{ "foreignphrase",0, 0, 0, 2, 0, "" }, /* para */
				544	{ "formalpara", 0, 0, 0, 9, 0, "" }, /* title */
				545	{ "funcdef", 0, 0, 0, 1, 0, "" },
				546	{ "funcparams", 0, 0, 0, 9, 0, "" }, /* cptr */
				547	{ "funcprototype",0, 0, 0, 9, 0, "" }, /* funcdef */
				548	{ "funcsynopsis",0, 0, 0, 9, 0, "" }, /* funcsynopsisinfo */
				549	{ "funcsynopsisinfo", 0, 0, 0, 9, 0, "" }, /* cptr */
				550	{ "function", 0, 0, 0, 9, 0, "" }, /* cptr */
				551	{ "glossary", 0, 0, 0, 9, 0, "" }, /* glossaryinfo */
				552	{ "glossaryinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				553	{ "glossdef", 0, 0, 0, 9, 0, "" }, /* glossdef.mix */
				554	{ "glossdiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				555	{ "glossentry", 0, 0, 0, 9, 0, "" }, /* glossterm */
				556	{ "glosslist", 0, 0, 0, 9, 0, "" }, /* glossentry */
Daniel Veillard	c057c5d	2001-05-02 12:41:24 +0000	[diff] [blame]	557	{ "glossseealso",0, 0, 1, 2, 0, "" }, /* para */
				558	{ "glosssee", 0, 0, 1, 2, 0, "" }, /* para */
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	559	{ "glossterm", 0, 0, 0, 2, 0, "" }, /* para */
Daniel Veillard	4ec0b0f	2001-04-25 15:53:40 +0000	[diff] [blame]	560	{ "graphic", 0, 0, 0, 9, 0, "" },
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	561	{ "graphicco", 0, 0, 0, 9, 0, "" }, /* areaspec */
				562	{ "group", 0, 0, 0, 9, 0, "" }, /* arg */
				563	{ "guibutton", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				564	{ "guiicon", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				565	{ "guilabel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				566	{ "guimenuitem",0, 0, 0, 7, 0, "" }, /* smallcptr */
				567	{ "guimenu", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				568	{ "guisubmenu", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				569	{ "hardware", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				570	{ "highlights", 0, 0, 0, 9, 0, "" }, /* highlights.mix */
				571	{ "holder", 0, 0, 0, 4, 0, "" }, /* docinfo */
				572	{ "honorific", 0, 0, 0, 4, 0, "" }, /* docinfo */
				573	{ "imagedata", 0, 2, 1, 0, 0, "" },
				574	{ "imageobjectco",0, 0, 0, 9, 0, "" }, /* areaspec */
				575	{ "imageobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
				576	{ "important", 0, 0, 0, 9, 0, "" }, /* title */
				577	{ "indexdiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				578	{ "indexentry", 0, 0, 0, 9, 0, "" }, /* primaryie */
				579	{ "index", 0, 0, 0, 9, 0, "" }, /* indexinfo */
				580	{ "indexinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
				581	{ "indexterm", 0, 0, 0, 9, 0, "" }, /* primary */
				582	{ "informalequation",0, 0, 0, 9, 0, "" }, /* equation.content */
				583	{ "informalexample",0, 0, 0, 9, 0, "" }, /* example.mix */
				584	{ "informalfigure",0, 0, 0, 9, 0, "" }, /* figure.mix */
				585	{ "informaltable",0, 0, 0, 9, 0, "" }, /* graphic */
				586	{ "initializer",0, 0, 0, 7, 0, "" }, /* smallcptr */
				587	{ "inlineequation",0, 0, 0, 9, 0, "" }, /* inlineequation.content */
Daniel Veillard	02f077a	2001-04-26 10:59:11 +0000	[diff] [blame]	588	{ "inlinegraphic",0, 0, 0, 9, 0, "" },
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	589	{ "inlinemediaobject",0,0, 0, 9, 0, "" }, /* objectinfo */
				590	{ "interfacename",0, 0, 0, 7, 0, "" }, /* smallcptr */
				591	{ "interface", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				592	{ "invpartnumber",0, 0, 0, 4, 0, "" }, /* docinfo */
				593	{ "isbn", 0, 0, 0, 4, 0, "" }, /* docinfo */
				594	{ "issn", 0, 0, 0, 4, 0, "" }, /* docinfo */
				595	{ "issuenum", 0, 0, 0, 4, 0, "" }, /* docinfo */
				596	{ "itemizedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				597	{ "itermset", 0, 0, 0, 9, 0, "" }, /* indexterm */
				598	{ "jobtitle", 0, 0, 0, 4, 0, "" }, /* docinfo */
				599	{ "keycap", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				600	{ "keycode", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				601	{ "keycombo", 0, 0, 0, 9, 0, "" }, /* keycap */
				602	{ "keysym", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				603	{ "keyword", 0, 0, 0, 1, 0, "" },
				604	{ "keywordset", 0, 0, 0, 9, 0, "" }, /* keyword */
				605	{ "label", 0, 0, 0, 3, 0, "" }, /* word */
				606	{ "legalnotice",0, 0, 0, 9, 0, "" }, /* title */
				607	{ "lineage", 0, 0, 0, 4, 0, "" }, /* docinfo */
				608	{ "lineannotation",0, 0, 0, 2, 0, "" }, /* para */
				609	{ "link", 0, 0, 0, 2, 0, "" }, /* para */
				610	{ "listitem", 0, 0, 0, 9, 0, "" }, /* component.mix */
				611	{ "literal", 0, 0, 0, 9, 0, "" }, /* cptr */
				612	{ "literallayout",0, 0, 0, 2, 0, "" }, /* para */
				613	{ "lot", 0, 0, 0, 9, 0, "" }, /* bookcomponent.title.content */
				614	{ "lotentry", 0, 0, 0, 2, 0, "" }, /* para */
				615	{ "manvolnum", 0, 0, 0, 3, 0, "" }, /* word */
				616	{ "markup", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				617	{ "medialabel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				618	{ "mediaobjectco",0, 0, 0, 9, 0, "" }, /* objectinfo */
				619	{ "mediaobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
				620	{ "member", 0, 0, 0, 2, 0, "" }, /* para */
				621	{ "menuchoice", 0, 0, 0, 9, 0, "" }, /* shortcut */
				622	{ "methodname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				623	{ "methodparam",0, 0, 0, 9, 0, "" }, /* modifier */
				624	{ "methodsynopsis",0, 0, 0, 9, 0, "" }, /* modifier */
				625	{ "modespec", 0, 0, 0, 4, 0, "" }, /* docinfo */
				626	{ "modifier", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				627	{ "mousebutton",0, 0, 0, 7, 0, "" }, /* smallcptr */
				628	{ "msgaud", 0, 0, 0, 2, 0, "" }, /* para */
				629	{ "msgentry", 0, 0, 0, 9, 0, "" }, /* msg */
				630	{ "msgexplan", 0, 0, 0, 9, 0, "" }, /* title */
				631	{ "msginfo", 0, 0, 0, 9, 0, "" }, /* msglevel */
				632	{ "msglevel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				633	{ "msgmain", 0, 0, 0, 9, 0, "" }, /* title */
				634	{ "msgorig", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				635	{ "msgrel", 0, 0, 0, 9, 0, "" }, /* title */
				636	{ "msgset", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				637	{ "msgsub", 0, 0, 0, 9, 0, "" }, /* title */
				638	{ "msgtext", 0, 0, 0, 9, 0, "" }, /* component.mix */
				639	{ "msg", 0, 0, 0, 9, 0, "" }, /* title */
				640	{ "note", 0, 0, 0, 9, 0, "" }, /* title */
				641	{ "objectinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
				642	{ "olink", 0, 0, 0, 2, 0, "" }, /* para */
				643	{ "ooclass", 0, 0, 0, 9, 0, "" }, /* modifier */
				644	{ "ooexception",0, 0, 0, 9, 0, "" }, /* modifier */
				645	{ "oointerface",0, 0, 0, 9, 0, "" }, /* modifier */
				646	{ "optional", 0, 0, 0, 9, 0, "" }, /* cptr */
				647	{ "option", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				648	{ "orderedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				649	{ "orgdiv", 0, 0, 0, 4, 0, "" }, /* docinfo */
				650	{ "orgname", 0, 0, 0, 4, 0, "" }, /* docinfo */
				651	{ "otheraddr", 0, 0, 0, 4, 0, "" }, /* docinfo */
				652	{ "othercredit",0, 0, 0, 9, 0, "" }, /* person.ident.mix */
				653	{ "othername", 0, 0, 0, 4, 0, "" }, /* docinfo */
				654	{ "pagenums", 0, 0, 0, 4, 0, "" }, /* docinfo */
				655	{ "paramdef", 0, 0, 0, 1, 0, "" },
				656	{ "parameter", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				657	{ "para", 0, 0, 0, 2, 0, "" }, /* para */
				658	{ "partinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
				659	{ "partintro", 0, 0, 0, 9, 0, "" }, /* div.title.content */
				660	{ "part", 0, 0, 0, 9, 0, "" }, /* partinfo */
				661	{ "phone", 0, 0, 0, 4, 0, "" }, /* docinfo */
				662	{ "phrase", 0, 0, 0, 2, 0, "" }, /* para */
				663	{ "pob", 0, 0, 0, 4, 0, "" }, /* docinfo */
				664	{ "postcode", 0, 0, 0, 4, 0, "" }, /* docinfo */
				665	{ "prefaceinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				666	{ "preface", 0, 0, 0, 9, 0, "" }, /* prefaceinfo */
				667	{ "primaryie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
Daniel Veillard	c057c5d	2001-05-02 12:41:24 +0000	[diff] [blame]	668	{ "primary", 0, 0, 0, 9, 0, "" }, /* ndxterm */
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	669	{ "printhistory",0, 0, 0, 9, 0, "" }, /* para.class */
				670	{ "procedure", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				671	{ "productname",0, 0, 0, 2, 0, "" }, /* para */
				672	{ "productnumber",0, 0, 0, 4, 0, "" }, /* docinfo */
				673	{ "programlistingco",0, 0, 0, 9, 0, "" }, /* areaspec */
				674	{ "programlisting",0, 0, 0, 2, 0, "" }, /* para */
				675	{ "prompt", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				676	{ "property", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				677	{ "pubdate", 0, 0, 0, 4, 0, "" }, /* docinfo */
				678	{ "publishername",0, 0, 0, 4, 0, "" }, /* docinfo */
				679	{ "publisher", 0, 0, 0, 9, 0, "" }, /* publishername */
				680	{ "pubsnumber", 0, 0, 0, 4, 0, "" }, /* docinfo */
				681	{ "qandadiv", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				682	{ "qandaentry", 0, 0, 0, 9, 0, "" }, /* revhistory */
				683	{ "qandaset", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				684	{ "question", 0, 0, 0, 9, 0, "" }, /* label */
				685	{ "quote", 0, 0, 0, 2, 0, "" }, /* para */
				686	{ "refclass", 0, 0, 0, 9, 0, "" }, /* refclass.char.mix */
				687	{ "refdescriptor",0, 0, 0, 9, 0, "" }, /* refname.char.mix */
				688	{ "refentryinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				689	{ "refentry", 0, 0, 0, 9, 0, "" }, /* ndxterm.class */
				690	{ "refentrytitle",0, 0, 0, 2, 0, "" }, /* para */
				691	{ "referenceinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				692	{ "reference", 0, 0, 0, 9, 0, "" }, /* referenceinfo */
				693	{ "refmeta", 0, 0, 0, 9, 0, "" }, /* ndxterm.class */
				694	{ "refmiscinfo",0, 0, 0, 4, 0, "" }, /* docinfo */
				695	{ "refnamediv", 0, 0, 0, 9, 0, "" }, /* refdescriptor */
				696	{ "refname", 0, 0, 0, 9, 0, "" }, /* refname.char.mix */
				697	{ "refpurpose", 0, 0, 0, 9, 0, "" }, /* refinline.char.mix */
				698	{ "refsect1info",0, 0, 0, 9, 0, "" }, /* graphic */
				699	{ "refsect1", 0, 0, 0, 9, 0, "" }, /* refsect */
				700	{ "refsect2info",0, 0, 0, 9, 0, "" }, /* graphic */
				701	{ "refsect2", 0, 0, 0, 9, 0, "" }, /* refsect */
				702	{ "refsect3info",0, 0, 0, 9, 0, "" }, /* graphic */
				703	{ "refsect3", 0, 0, 0, 9, 0, "" }, /* refsect */
				704	{ "refsynopsisdivinfo",0,0, 0, 9, 0, "" }, /* graphic */
				705	{ "refsynopsisdiv",0, 0, 0, 9, 0, "" }, /* refsynopsisdivinfo */
				706	{ "releaseinfo",0, 0, 0, 4, 0, "" }, /* docinfo */
				707	{ "remark", 0, 0, 0, 2, 0, "" }, /* para */
				708	{ "replaceable",0, 0, 0, 1, 0, "" },
				709	{ "returnvalue",0, 0, 0, 7, 0, "" }, /* smallcptr */
				710	{ "revdescription",0, 0, 0, 9, 0, "" }, /* revdescription.mix */
				711	{ "revhistory", 0, 0, 0, 9, 0, "" }, /* revision */
				712	{ "revision", 0, 0, 0, 9, 0, "" }, /* revnumber */
				713	{ "revnumber", 0, 0, 0, 4, 0, "" }, /* docinfo */
				714	{ "revremark", 0, 0, 0, 4, 0, "" }, /* docinfo */
				715	{ "row", 0, 0, 0, 9, 0, "" }, /* tbl.row.mdl */
				716	{ "row", 0, 0, 0, 9, 0, "" }, /* tbl.row.mdl */
				717	{ "sbr", 0, 2, 1, 0, 0, "" },
				718	{ "screenco", 0, 0, 0, 9, 0, "" }, /* areaspec */
				719	{ "screeninfo", 0, 0, 0, 2, 0, "" }, /* para */
				720	{ "screen", 0, 0, 0, 2, 0, "" }, /* para */
				721	{ "screenshot", 0, 0, 0, 9, 0, "" }, /* screeninfo */
				722	{ "secondaryie",0, 0, 0, 4, 0, "" }, /* ndxterm */
				723	{ "secondary", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				724	{ "sect1info", 0, 0, 0, 9, 0, "" }, /* graphic */
				725	{ "sect1", 0, 0, 0, 9, 0, "" }, /* sect */
				726	{ "sect2info", 0, 0, 0, 9, 0, "" }, /* graphic */
				727	{ "sect2", 0, 0, 0, 9, 0, "" }, /* sect */
				728	{ "sect3info", 0, 0, 0, 9, 0, "" }, /* graphic */
				729	{ "sect3", 0, 0, 0, 9, 0, "" }, /* sect */
				730	{ "sect4info", 0, 0, 0, 9, 0, "" }, /* graphic */
				731	{ "sect4", 0, 0, 0, 9, 0, "" }, /* sect */
				732	{ "sect5info", 0, 0, 0, 9, 0, "" }, /* graphic */
				733	{ "sect5", 0, 0, 0, 9, 0, "" }, /* sect */
				734	{ "sectioninfo",0, 0, 0, 9, 0, "" }, /* graphic */
				735	{ "section", 0, 0, 0, 9, 0, "" }, /* sectioninfo */
				736	{ "seealsoie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				737	{ "seealso", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				738	{ "seeie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				739	{ "see", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				740	{ "seglistitem",0, 0, 0, 9, 0, "" }, /* seg */
				741	{ "segmentedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				742	{ "seg", 0, 0, 0, 2, 0, "" }, /* para */
				743	{ "segtitle", 0, 0, 0, 8, 0, "" }, /* title */
				744	{ "seriesvolnums", 0, 0, 0, 4, 0, "" }, /* docinfo */
				745	{ "set", 0, 0, 0, 9, 0, "" }, /* div.title.content */
				746	{ "setindexinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				747	{ "setindex", 0, 0, 0, 9, 0, "" }, /* setindexinfo */
				748	{ "setinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
				749	{ "sgmltag", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				750	{ "shortaffil", 0, 0, 0, 4, 0, "" }, /* docinfo */
				751	{ "shortcut", 0, 0, 0, 9, 0, "" }, /* keycap */
				752	{ "sidebarinfo",0, 0, 0, 9, 0, "" }, /* graphic */
				753	{ "sidebar", 0, 0, 0, 9, 0, "" }, /* sidebarinfo */
				754	{ "simpara", 0, 0, 0, 2, 0, "" }, /* para */
				755	{ "simplelist", 0, 0, 0, 9, 0, "" }, /* member */
				756	{ "simplemsgentry", 0, 0, 0, 9, 0, "" }, /* msgtext */
				757	{ "simplesect", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
				758	{ "spanspec", 0, 2, 1, 0, 0, "" },
				759	{ "state", 0, 0, 0, 4, 0, "" }, /* docinfo */
				760	{ "step", 0, 0, 0, 9, 0, "" }, /* title */
				761	{ "street", 0, 0, 0, 4, 0, "" }, /* docinfo */
				762	{ "structfield",0, 0, 0, 7, 0, "" }, /* smallcptr */
				763	{ "structname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				764	{ "subjectset", 0, 0, 0, 9, 0, "" }, /* subject */
				765	{ "subject", 0, 0, 0, 9, 0, "" }, /* subjectterm */
				766	{ "subjectterm",0, 0, 0, 1, 0, "" },
				767	{ "subscript", 0, 0, 0, 1, 0, "" },
				768	{ "substeps", 0, 0, 0, 9, 0, "" }, /* step */
				769	{ "subtitle", 0, 0, 0, 8, 0, "" }, /* title */
				770	{ "superscript", 0, 0, 0, 1, 0, "" },
				771	{ "surname", 0, 0, 0, 4, 0, "" }, /* docinfo */
				772	{ "symbol", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				773	{ "synopfragment", 0, 0, 0, 9, 0, "" }, /* arg */
				774	{ "synopfragmentref", 0, 0, 0, 1, 0, "" },
				775	{ "synopsis", 0, 0, 0, 2, 0, "" }, /* para */
				776	{ "systemitem", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				777	{ "table", 0, 0, 0, 9, 0, "" }, /* tbl.table.mdl */
				778	/* { "%tbl.table.name;", 0, 0, 0, 9, 0, "" },/ / tbl.table.mdl */
				779	{ "tbody", 0, 0, 0, 9, 0, "" }, /* row */
				780	{ "tbody", 0, 0, 0, 9, 0, "" }, /* row */
				781	{ "term", 0, 0, 0, 2, 0, "" }, /* para */
				782	{ "tertiaryie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				783	{ "tertiary ", 0, 0, 0, 4, 0, "" }, /* ndxterm */
				784	{ "textobject", 0, 0, 0, 9, 0, "" }, /* objectinfo */
				785	{ "tfoot", 0, 0, 0, 9, 0, "" }, /* tbl.hdft.mdl */
				786	{ "tgroup", 0, 0, 0, 9, 0, "" }, /* tbl.tgroup.mdl */
				787	{ "tgroup", 0, 0, 0, 9, 0, "" }, /* tbl.tgroup.mdl */
				788	{ "thead", 0, 0, 0, 9, 0, "" }, /* row */
				789	{ "thead", 0, 0, 0, 9, 0, "" }, /* tbl.hdft.mdl */
				790	{ "tip", 0, 0, 0, 9, 0, "" }, /* title */
				791	{ "titleabbrev",0, 0, 0, 8, 0, "" }, /* title */
				792	{ "title", 0, 0, 0, 8, 0, "" }, /* title */
				793	{ "tocback", 0, 0, 0, 2, 0, "" }, /* para */
				794	{ "toc", 0, 0, 0, 9, 0, "" }, /* bookcomponent.title.content */
				795	{ "tocchap", 0, 0, 0, 9, 0, "" }, /* tocentry */
				796	{ "tocentry", 0, 0, 0, 2, 0, "" }, /* para */
				797	{ "tocfront", 0, 0, 0, 2, 0, "" }, /* para */
				798	{ "toclevel1", 0, 0, 0, 9, 0, "" }, /* tocentry */
				799	{ "toclevel2", 0, 0, 0, 9, 0, "" }, /* tocentry */
				800	{ "toclevel3", 0, 0, 0, 9, 0, "" }, /* tocentry */
				801	{ "toclevel4", 0, 0, 0, 9, 0, "" }, /* tocentry */
				802	{ "toclevel5", 0, 0, 0, 9, 0, "" }, /* tocentry */
				803	{ "tocpart", 0, 0, 0, 9, 0, "" }, /* tocentry */
				804	{ "token", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				805	{ "trademark", 0, 0, 0, 1, 0, "" },
				806	{ "type", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				807	{ "ulink", 0, 0, 0, 2, 0, "" }, /* para */
				808	{ "userinput", 0, 0, 0, 9, 0, "" }, /* cptr */
				809	{ "varargs", 0, 2, 1, 0, 0, "" },
				810	{ "variablelist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
				811	{ "varlistentry",0, 0, 0, 9, 0, "" }, /* term */
				812	{ "varname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
				813	{ "videodata", 0, 2, 1, 0, 0, "" },
				814	{ "videoobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
				815	{ "void", 0, 2, 1, 0, 0, "" },
				816	{ "volumenum", 0, 0, 0, 4, 0, "" }, /* docinfo */
				817	{ "warning", 0, 0, 0, 9, 0, "" }, /* title */
				818	{ "wordasword", 0, 0, 0, 3, 0, "" }, /* word */
				819	{ "xref", 0, 2, 1, 0, 0, "" },
				820	{ "year", 0, 0, 0, 4, 0, "" }, /* docinfo */
				821	};
				822
				823	#if 0
				824	/*
				825	* start tags that imply the end of a current element
				826	* any tag of each line implies the end of the current element if the type of
				827	* that element is in the same line
				828	*/
				829	static const char *docbEquEnd[] = {
				830	"dt", "dd", "li", "option", NULL,
				831	"h1", "h2", "h3", "h4", "h5", "h6", NULL,
				832	"ol", "menu", "dir", "address", "pre", "listing", "xmp", NULL,
				833	NULL
				834	};
				835	#endif
				836
				837	/*
Daniel Veillard	cbaf399	2001-12-31 16:16:02 +0000	[diff] [blame]	838	* according the SGML DTD, HR should be added to the 2nd line above, as it
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	839	* is not allowed within a H1, H2, H3, etc. But we should tolerate that case
				840	* because many documents contain rules in headings...
				841	*/
				842
				843	/*
				844	* start tags that imply the end of current element
				845	*/
				846	static const char *docbStartClose[] = {
				847	NULL
				848	};
				849
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	850	static const char** docbStartCloseIndex[100];
				851	static int docbStartCloseIndexinitialized = 0;
				852
				853	/************************************************************************
				854	* *
				855	* functions to handle SGML specific data *
				856	* *
				857	************************************************************************/
				858
				859	/**
				860	* docbInitAutoClose:
				861	*
				862	* Initialize the docbStartCloseIndex for fast lookup of closing tags names.
				863	*
				864	*/
				865	static void
				866	docbInitAutoClose(void) {
				867	int indx, i = 0;
				868
				869	if (docbStartCloseIndexinitialized) return;
				870
				871	for (indx = 0;indx < 100;indx ++) docbStartCloseIndex[indx] = NULL;
				872	indx = 0;
				873	while ((docbStartClose[i] != NULL) && (indx < 100 - 1)) {
				874	docbStartCloseIndex[indx++] = &docbStartClose[i];
				875	while (docbStartClose[i] != NULL) i++;
				876	i++;
				877	}
				878	}
				879
				880	/**
				881	* docbTagLookup:
				882	* @tag: The tag name
				883	*
				884	* Lookup the SGML tag in the ElementTable
				885	*
				886	* Returns the related docbElemDescPtr or NULL if not found.
				887	*/
				888	static docbElemDescPtr
				889	docbTagLookup(const xmlChar *tag) {
				890	unsigned int i;
				891
				892	for (i = 0; i < (sizeof(docbookElementTable) /
				893	sizeof(docbookElementTable[0]));i++) {
				894	if (xmlStrEqual(tag, BAD_CAST docbookElementTable[i].name))
				895	return(&docbookElementTable[i]);
				896	}
				897	return(NULL);
				898	}
				899
				900	/**
				901	* docbCheckAutoClose:
				902	* @newtag: The new tag name
				903	* @oldtag: The old tag name
				904	*
Daniel Veillard	cbaf399	2001-12-31 16:16:02 +0000	[diff] [blame]	905	* Checks whether the new tag is one of the registered valid tags for
				906	* closing old.
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	907	* Initialize the docbStartCloseIndex for fast lookup of closing tags names.
				908	*
				909	* Returns 0 if no, 1 if yes.
				910	*/
				911	static int
				912	docbCheckAutoClose(const xmlChar newtag, const xmlChar oldtag) {
				913	int i, indx;
				914	const char **closed = NULL;
				915
				916	if (docbStartCloseIndexinitialized == 0) docbInitAutoClose();
				917
				918	/* inefficient, but not a big deal */
				919	for (indx = 0; indx < 100;indx++) {
				920	closed = docbStartCloseIndex[indx];
				921	if (closed == NULL) return(0);
				922	if (xmlStrEqual(BAD_CAST *closed, newtag)) break;
				923	}
				924
				925	i = closed - docbStartClose;
				926	i++;
				927	while (docbStartClose[i] != NULL) {
				928	if (xmlStrEqual(BAD_CAST docbStartClose[i], oldtag)) {
				929	return(1);
				930	}
				931	i++;
				932	}
				933	return(0);
				934	}
				935
				936	/**
				937	* docbAutoCloseOnClose:
				938	* @ctxt: an SGML parser context
				939	* @newtag: The new tag name
				940	*
Daniel Veillard	cbaf399	2001-12-31 16:16:02 +0000	[diff] [blame]	941	* The DocBook DTD allows an ending tag to implicitly close other tags.
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	942	*/
				943	static void
				944	docbAutoCloseOnClose(docbParserCtxtPtr ctxt, const xmlChar *newtag) {
				945	docbElemDescPtr info;
				946	xmlChar *oldname;
				947	int i;
				948
				949	if ((newtag[0] == '/') && (newtag[1] == 0))
				950	return;
				951
				952	#ifdef DEBUG
				953	xmlGenericError(xmlGenericErrorContext,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr);
				954	for (i = 0;i < ctxt->nameNr;i++)
				955	xmlGenericError(xmlGenericErrorContext,"%d : %s\n", i, ctxt->nameTab[i]);
				956	#endif
				957
				958	for (i = (ctxt->nameNr - 1);i >= 0;i--) {
				959	if (xmlStrEqual(newtag, ctxt->nameTab[i])) break;
				960	}
				961	if (i < 0) return;
				962
				963	while (!xmlStrEqual(newtag, ctxt->name)) {
				964	info = docbTagLookup(ctxt->name);
				965	if ((info == NULL) \|\| (info->endTag == 1)) {
				966	#ifdef DEBUG
				967	xmlGenericError(xmlGenericErrorContext,"docbAutoCloseOnClose: %s closes %s\n", newtag, ctxt->name);
				968	#endif
				969	} else {
				970	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				971	ctxt->sax->error(ctxt->userData,
				972	"Opening and ending tag mismatch: %s and %s\n",
				973	newtag, ctxt->name);
				974	ctxt->wellFormed = 0;
				975	}
				976	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				977	ctxt->sax->endElement(ctxt->userData, ctxt->name);
				978	oldname = docbnamePop(ctxt);
				979	if (oldname != NULL) {
				980	#ifdef DEBUG
				981	xmlGenericError(xmlGenericErrorContext,"docbAutoCloseOnClose: popped %s\n", oldname);
				982	#endif
				983	xmlFree(oldname);
				984	}
				985	}
				986	}
				987
				988	/**
				989	* docbAutoClose:
				990	* @ctxt: an SGML parser context
				991	* @newtag: The new tag name or NULL
				992	*
Daniel Veillard	cbaf399	2001-12-31 16:16:02 +0000	[diff] [blame]	993	* The DocBook DTD allows a tag to implicitly close other tags.
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	994	* The list is kept in docbStartClose array. This function is
				995	* called when a new tag has been detected and generates the
				996	* appropriates closes if possible/needed.
				997	* If newtag is NULL this mean we are at the end of the resource
				998	* and we should check
				999	*/
				1000	static void
				1001	docbAutoClose(docbParserCtxtPtr ctxt, const xmlChar *newtag) {
				1002	xmlChar *oldname;
				1003	while ((newtag != NULL) && (ctxt->name != NULL) &&
				1004	(docbCheckAutoClose(newtag, ctxt->name))) {
				1005	#ifdef DEBUG
				1006	xmlGenericError(xmlGenericErrorContext,"docbAutoClose: %s closes %s\n", newtag, ctxt->name);
				1007	#endif
				1008	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				1009	ctxt->sax->endElement(ctxt->userData, ctxt->name);
				1010	oldname = docbnamePop(ctxt);
				1011	if (oldname != NULL) {
				1012	#ifdef DEBUG
				1013	xmlGenericError(xmlGenericErrorContext,"docbAutoClose: popped %s\n", oldname);
				1014	#endif
				1015	xmlFree(oldname);
				1016	}
				1017	}
				1018	}
				1019
				1020	/**
				1021	* docbAutoCloseTag:
				1022	* @doc: the SGML document
				1023	* @name: The tag name
				1024	* @elem: the SGML element
				1025	*
Daniel Veillard	cbaf399	2001-12-31 16:16:02 +0000	[diff] [blame]	1026	* The DocBook DTD allows a tag to implicitly close other tags.
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	1027	* The list is kept in docbStartClose array. This function checks
				1028	* if the element or one of it's children would autoclose the
				1029	* given tag.
				1030	*
				1031	* Returns 1 if autoclose, 0 otherwise
				1032	*/
				1033	static int
				1034	docbAutoCloseTag(docbDocPtr doc, const xmlChar *name, docbNodePtr elem) {
				1035	docbNodePtr child;
				1036
				1037	if (elem == NULL) return(1);
				1038	if (xmlStrEqual(name, elem->name)) return(0);
				1039	if (docbCheckAutoClose(elem->name, name)) return(1);
				1040	child = elem->children;
				1041	while (child != NULL) {
				1042	if (docbAutoCloseTag(doc, name, child)) return(1);
				1043	child = child->next;
				1044	}
				1045	return(0);
				1046	}
				1047
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	1048	/************************************************************************
				1049	* *
				1050	* The list of SGML predefined entities *
				1051	* *
				1052	************************************************************************/
				1053
				1054
				1055	static docbEntityDesc
				1056	docbookEntitiesTable[] = {
				1057	/*
				1058	* the 4 absolute ones, plus apostrophe.
				1059	*/
				1060	{ 0x0026, "amp", "AMPERSAND" },
				1061	{ 0x003C, "lt", "LESS-THAN SIGN" },
				1062
				1063	/*
				1064	* Converted with VI macros from docbook ent files
				1065	*/
				1066	{ 0x0021, "excl", "EXCLAMATION MARK" },
				1067	{ 0x0022, "quot", "QUOTATION MARK" },
				1068	{ 0x0023, "num", "NUMBER SIGN" },
				1069	{ 0x0024, "dollar", "DOLLAR SIGN" },
				1070	{ 0x0025, "percnt", "PERCENT SIGN" },
				1071	{ 0x0027, "apos", "APOSTROPHE" },
				1072	{ 0x0028, "lpar", "LEFT PARENTHESIS" },
				1073	{ 0x0029, "rpar", "RIGHT PARENTHESIS" },
				1074	{ 0x002A, "ast", "ASTERISK OPERATOR" },
				1075	{ 0x002B, "plus", "PLUS SIGN" },
				1076	{ 0x002C, "comma", "COMMA" },
				1077	{ 0x002D, "hyphen", "HYPHEN-MINUS" },
				1078	{ 0x002E, "period", "FULL STOP" },
				1079	{ 0x002F, "sol", "SOLIDUS" },
				1080	{ 0x003A, "colon", "COLON" },
				1081	{ 0x003B, "semi", "SEMICOLON" },
				1082	{ 0x003D, "equals", "EQUALS SIGN" },
				1083	{ 0x003E, "gt", "GREATER-THAN SIGN" },
				1084	{ 0x003F, "quest", "QUESTION MARK" },
				1085	{ 0x0040, "commat", "COMMERCIAL AT" },
				1086	{ 0x005B, "lsqb", "LEFT SQUARE BRACKET" },
				1087	{ 0x005C, "bsol", "REVERSE SOLIDUS" },
				1088	{ 0x005D, "rsqb", "RIGHT SQUARE BRACKET" },
				1089	{ 0x005E, "circ", "RING OPERATOR" },
				1090	{ 0x005F, "lowbar", "LOW LINE" },
				1091	{ 0x0060, "grave", "GRAVE ACCENT" },
				1092	{ 0x007B, "lcub", "LEFT CURLY BRACKET" },
				1093	{ 0x007C, "verbar", "VERTICAL LINE" },
				1094	{ 0x007D, "rcub", "RIGHT CURLY BRACKET" },
				1095	{ 0x00A0, "nbsp", "NO-BREAK SPACE" },
				1096	{ 0x00A1, "iexcl", "INVERTED EXCLAMATION MARK" },
				1097	{ 0x00A2, "cent", "CENT SIGN" },
				1098	{ 0x00A3, "pound", "POUND SIGN" },
				1099	{ 0x00A4, "curren", "CURRENCY SIGN" },
				1100	{ 0x00A5, "yen", "YEN SIGN" },
				1101	{ 0x00A6, "brvbar", "BROKEN BAR" },
				1102	{ 0x00A7, "sect", "SECTION SIGN" },
				1103	{ 0x00A8, "die", "" },
				1104	{ 0x00A8, "Dot", "" },
				1105	{ 0x00A8, "uml", "" },
				1106	{ 0x00A9, "copy", "COPYRIGHT SIGN" },
				1107	{ 0x00AA, "ordf", "FEMININE ORDINAL INDICATOR" },
				1108	{ 0x00AB, "laquo", "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK" },
				1109	{ 0x00AC, "not", "NOT SIGN" },
				1110	{ 0x00AD, "shy", "SOFT HYPHEN" },
				1111	{ 0x00AE, "reg", "REG TRADE MARK SIGN" },
				1112	{ 0x00AF, "macr", "MACRON" },
				1113	{ 0x00B0, "deg", "DEGREE SIGN" },
				1114	{ 0x00B1, "plusmn", "PLUS-MINUS SIGN" },
				1115	{ 0x00B2, "sup2", "SUPERSCRIPT TWO" },
				1116	{ 0x00B3, "sup3", "SUPERSCRIPT THREE" },
				1117	{ 0x00B4, "acute", "ACUTE ACCENT" },
				1118	{ 0x00B5, "micro", "MICRO SIGN" },
				1119	{ 0x00B6, "para", "PILCROW SIGN" },
				1120	{ 0x00B7, "middot", "MIDDLE DOT" },
				1121	{ 0x00B8, "cedil", "CEDILLA" },
				1122	{ 0x00B9, "sup1", "SUPERSCRIPT ONE" },
				1123	{ 0x00BA, "ordm", "MASCULINE ORDINAL INDICATOR" },
				1124	{ 0x00BB, "raquo", "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK" },
				1125	{ 0x00BC, "frac14", "VULGAR FRACTION ONE QUARTER" },
				1126	{ 0x00BD, "frac12", "VULGAR FRACTION ONE HALF" },
				1127	{ 0x00BD, "half", "VULGAR FRACTION ONE HALF" },
				1128	{ 0x00BE, "frac34", "VULGAR FRACTION THREE QUARTERS" },
				1129	{ 0x00BF, "iquest", "INVERTED QUESTION MARK" },
				1130	{ 0x00C0, "Agrave", "LATIN CAPITAL LETTER A WITH GRAVE" },
				1131	{ 0x00C1, "Aacute", "LATIN CAPITAL LETTER A WITH ACUTE" },
				1132	{ 0x00C2, "Acirc", "LATIN CAPITAL LETTER A WITH CIRCUMFLEX" },
				1133	{ 0x00C3, "Atilde", "LATIN CAPITAL LETTER A WITH TILDE" },
				1134	{ 0x00C4, "Auml", "LATIN CAPITAL LETTER A WITH DIAERESIS" },
				1135	{ 0x00C5, "Aring", "LATIN CAPITAL LETTER A WITH RING ABOVE" },
				1136	{ 0x00C6, "AElig", "LATIN CAPITAL LETTER AE" },
				1137	{ 0x00C7, "Ccedil", "LATIN CAPITAL LETTER C WITH CEDILLA" },
				1138	{ 0x00C8, "Egrave", "LATIN CAPITAL LETTER E WITH GRAVE" },
				1139	{ 0x00C9, "Eacute", "LATIN CAPITAL LETTER E WITH ACUTE" },
				1140	{ 0x00CA, "Ecirc", "LATIN CAPITAL LETTER E WITH CIRCUMFLEX" },
				1141	{ 0x00CB, "Euml", "LATIN CAPITAL LETTER E WITH DIAERESIS" },
				1142	{ 0x00CC, "Igrave", "LATIN CAPITAL LETTER I WITH GRAVE" },
				1143	{ 0x00CD, "Iacute", "LATIN CAPITAL LETTER I WITH ACUTE" },
				1144	{ 0x00CE, "Icirc", "LATIN CAPITAL LETTER I WITH CIRCUMFLEX" },
				1145	{ 0x00CF, "Iuml", "LATIN CAPITAL LETTER I WITH DIAERESIS" },
				1146	{ 0x00D0, "ETH", "LATIN CAPITAL LETTER ETH" },
				1147	{ 0x00D1, "Ntilde", "LATIN CAPITAL LETTER N WITH TILDE" },
				1148	{ 0x00D2, "Ograve", "LATIN CAPITAL LETTER O WITH GRAVE" },
				1149	{ 0x00D3, "Oacute", "LATIN CAPITAL LETTER O WITH ACUTE" },
				1150	{ 0x00D4, "Ocirc", "LATIN CAPITAL LETTER O WITH CIRCUMFLEX" },
				1151	{ 0x00D5, "Otilde", "LATIN CAPITAL LETTER O WITH TILDE" },
				1152	{ 0x00D6, "Ouml", "LATIN CAPITAL LETTER O WITH DIAERESIS" },
				1153	{ 0x00D7, "times", "MULTIPLICATION SIGN" },
				1154	{ 0x00D8, "Oslash", "LATIN CAPITAL LETTER O WITH STROKE" },
				1155	{ 0x00D9, "Ugrave", "LATIN CAPITAL LETTER U WITH GRAVE" },
				1156	{ 0x00DA, "Uacute", "LATIN CAPITAL LETTER U WITH ACUTE" },
				1157	{ 0x00DB, "Ucirc", "LATIN CAPITAL LETTER U WITH CIRCUMFLEX" },
				1158	{ 0x00DC, "Uuml", "LATIN CAPITAL LETTER U WITH DIAERESIS" },
				1159	{ 0x00DD, "Yacute", "LATIN CAPITAL LETTER Y WITH ACUTE" },
				1160	{ 0x00DE, "THORN", "LATIN CAPITAL LETTER THORN" },
				1161	{ 0x00DF, "szlig", "LATIN SMALL LETTER SHARP S" },
				1162	{ 0x00E0, "agrave", "LATIN SMALL LETTER A WITH GRAVE" },
				1163	{ 0x00E1, "aacute", "LATIN SMALL LETTER A WITH ACUTE" },
				1164	{ 0x00E2, "acirc", "LATIN SMALL LETTER A WITH CIRCUMFLEX" },
				1165	{ 0x00E3, "atilde", "LATIN SMALL LETTER A WITH TILDE" },
				1166	{ 0x00E4, "auml", "LATIN SMALL LETTER A WITH DIAERESIS" },
				1167	{ 0x00E5, "aring", "LATIN SMALL LETTER A WITH RING ABOVE" },
				1168	{ 0x00E6, "aelig", "LATIN SMALL LETTER AE" },
				1169	{ 0x00E7, "ccedil", "LATIN SMALL LETTER C WITH CEDILLA" },
				1170	{ 0x00E8, "egrave", "LATIN SMALL LETTER E WITH GRAVE" },
				1171	{ 0x00E9, "eacute", "LATIN SMALL LETTER E WITH ACUTE" },
				1172	{ 0x00EA, "ecirc", "LATIN SMALL LETTER E WITH CIRCUMFLEX" },
				1173	{ 0x00EB, "euml", "LATIN SMALL LETTER E WITH DIAERESIS" },
				1174	{ 0x00EC, "igrave", "LATIN SMALL LETTER I WITH GRAVE" },
				1175	{ 0x00ED, "iacute", "LATIN SMALL LETTER I WITH ACUTE" },
				1176	{ 0x00EE, "icirc", "LATIN SMALL LETTER I WITH CIRCUMFLEX" },
				1177	{ 0x00EF, "iuml", "LATIN SMALL LETTER I WITH DIAERESIS" },
				1178	{ 0x00F0, "eth", "LATIN SMALL LETTER ETH" },
				1179	{ 0x00F1, "ntilde", "LATIN SMALL LETTER N WITH TILDE" },
				1180	{ 0x00F2, "ograve", "LATIN SMALL LETTER O WITH GRAVE" },
				1181	{ 0x00F3, "oacute", "LATIN SMALL LETTER O WITH ACUTE" },
				1182	{ 0x00F4, "ocirc", "LATIN SMALL LETTER O WITH CIRCUMFLEX" },
				1183	{ 0x00F5, "otilde", "LATIN SMALL LETTER O WITH TILDE" },
				1184	{ 0x00F6, "ouml", "LATIN SMALL LETTER O WITH DIAERESIS" },
				1185	{ 0x00F7, "divide", "DIVISION SIGN" },
				1186	{ 0x00F8, "oslash", "CIRCLED DIVISION SLASH" },
				1187	{ 0x00F9, "ugrave", "LATIN SMALL LETTER U WITH GRAVE" },
				1188	{ 0x00FA, "uacute", "LATIN SMALL LETTER U WITH ACUTE" },
				1189	{ 0x00FB, "ucirc", "LATIN SMALL LETTER U WITH CIRCUMFLEX" },
				1190	{ 0x00FC, "uuml", "LATIN SMALL LETTER U WITH DIAERESIS" },
				1191	{ 0x00FD, "yacute", "LATIN SMALL LETTER Y WITH ACUTE" },
				1192	{ 0x00FE, "thorn", "LATIN SMALL LETTER THORN" },
				1193	{ 0x00FF, "yuml", "LATIN SMALL LETTER Y WITH DIAERESIS" },
				1194	{ 0x0100, "Amacr", "LATIN CAPITAL LETTER A WITH MACRON" },
				1195	{ 0x0101, "amacr", "LATIN SMALL LETTER A WITH MACRON" },
				1196	{ 0x0102, "Abreve", "LATIN CAPITAL LETTER A WITH BREVE" },
				1197	{ 0x0103, "abreve", "LATIN SMALL LETTER A WITH BREVE" },
				1198	{ 0x0104, "Aogon", "LATIN CAPITAL LETTER A WITH OGONEK" },
				1199	{ 0x0105, "aogon", "LATIN SMALL LETTER A WITH OGONEK" },
				1200	{ 0x0106, "Cacute", "LATIN CAPITAL LETTER C WITH ACUTE" },
				1201	{ 0x0107, "cacute", "LATIN SMALL LETTER C WITH ACUTE" },
				1202	{ 0x0108, "Ccirc", "LATIN CAPITAL LETTER C WITH CIRCUMFLEX" },
				1203	{ 0x0109, "ccirc", "LATIN SMALL LETTER C WITH CIRCUMFLEX" },
				1204	{ 0x010A, "Cdot", "LATIN CAPITAL LETTER C WITH DOT ABOVE" },
				1205	{ 0x010B, "cdot", "DOT OPERATOR" },
				1206	{ 0x010C, "Ccaron", "LATIN CAPITAL LETTER C WITH CARON" },
				1207	{ 0x010D, "ccaron", "LATIN SMALL LETTER C WITH CARON" },
				1208	{ 0x010E, "Dcaron", "LATIN CAPITAL LETTER D WITH CARON" },
				1209	{ 0x010F, "dcaron", "LATIN SMALL LETTER D WITH CARON" },
				1210	{ 0x0110, "Dstrok", "LATIN CAPITAL LETTER D WITH STROKE" },
				1211	{ 0x0111, "dstrok", "LATIN SMALL LETTER D WITH STROKE" },
				1212	{ 0x0112, "Emacr", "LATIN CAPITAL LETTER E WITH MACRON" },
				1213	{ 0x0113, "emacr", "LATIN SMALL LETTER E WITH MACRON" },
				1214	{ 0x0116, "Edot", "LATIN CAPITAL LETTER E WITH DOT ABOVE" },
				1215	{ 0x0117, "edot", "LATIN SMALL LETTER E WITH DOT ABOVE" },
				1216	{ 0x0118, "Eogon", "LATIN CAPITAL LETTER E WITH OGONEK" },
				1217	{ 0x0119, "eogon", "LATIN SMALL LETTER E WITH OGONEK" },
				1218	{ 0x011A, "Ecaron", "LATIN CAPITAL LETTER E WITH CARON" },
				1219	{ 0x011B, "ecaron", "LATIN SMALL LETTER E WITH CARON" },
				1220	{ 0x011C, "Gcirc", "LATIN CAPITAL LETTER G WITH CIRCUMFLEX" },
				1221	{ 0x011D, "gcirc", "LATIN SMALL LETTER G WITH CIRCUMFLEX" },
				1222	{ 0x011E, "Gbreve", "LATIN CAPITAL LETTER G WITH BREVE" },
				1223	{ 0x011F, "gbreve", "LATIN SMALL LETTER G WITH BREVE" },
				1224	{ 0x0120, "Gdot", "LATIN CAPITAL LETTER G WITH DOT ABOVE" },
				1225	{ 0x0121, "gdot", "LATIN SMALL LETTER G WITH DOT ABOVE" },
				1226	{ 0x0122, "Gcedil", "LATIN CAPITAL LETTER G WITH CEDILLA" },
				1227	{ 0x0124, "Hcirc", "LATIN CAPITAL LETTER H WITH CIRCUMFLEX" },
				1228	{ 0x0125, "hcirc", "LATIN SMALL LETTER H WITH CIRCUMFLEX" },
				1229	{ 0x0126, "Hstrok", "LATIN CAPITAL LETTER H WITH STROKE" },
				1230	{ 0x0127, "hstrok", "LATIN SMALL LETTER H WITH STROKE" },
				1231	{ 0x0128, "Itilde", "LATIN CAPITAL LETTER I WITH TILDE" },
				1232	{ 0x0129, "itilde", "LATIN SMALL LETTER I WITH TILDE" },
				1233	{ 0x012A, "Imacr", "LATIN CAPITAL LETTER I WITH MACRON" },
				1234	{ 0x012B, "imacr", "LATIN SMALL LETTER I WITH MACRON" },
				1235	{ 0x012E, "Iogon", "LATIN CAPITAL LETTER I WITH OGONEK" },
				1236	{ 0x012F, "iogon", "LATIN SMALL LETTER I WITH OGONEK" },
				1237	{ 0x0130, "Idot", "LATIN CAPITAL LETTER I WITH DOT ABOVE" },
				1238	{ 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
				1239	{ 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
				1240	{ 0x0132, "IJlig", "LATIN CAPITAL LIGATURE IJ" },
				1241	{ 0x0133, "ijlig", "LATIN SMALL LIGATURE IJ" },
				1242	{ 0x0134, "Jcirc", "LATIN CAPITAL LETTER J WITH CIRCUMFLEX" },
				1243	{ 0x0135, "jcirc", "LATIN SMALL LETTER J WITH CIRCUMFLEX" },
				1244	{ 0x0136, "Kcedil", "LATIN CAPITAL LETTER K WITH CEDILLA" },
				1245	{ 0x0137, "kcedil", "LATIN SMALL LETTER K WITH CEDILLA" },
				1246	{ 0x0138, "kgreen", "LATIN SMALL LETTER KRA" },
				1247	{ 0x0139, "Lacute", "LATIN CAPITAL LETTER L WITH ACUTE" },
				1248	{ 0x013A, "lacute", "LATIN SMALL LETTER L WITH ACUTE" },
				1249	{ 0x013B, "Lcedil", "LATIN CAPITAL LETTER L WITH CEDILLA" },
				1250	{ 0x013C, "lcedil", "LATIN SMALL LETTER L WITH CEDILLA" },
				1251	{ 0x013D, "Lcaron", "LATIN CAPITAL LETTER L WITH CARON" },
				1252	{ 0x013E, "lcaron", "LATIN SMALL LETTER L WITH CARON" },
				1253	{ 0x013F, "Lmidot", "LATIN CAPITAL LETTER L WITH MIDDLE DOT" },
				1254	{ 0x0140, "lmidot", "LATIN SMALL LETTER L WITH MIDDLE DOT" },
				1255	{ 0x0141, "Lstrok", "LATIN CAPITAL LETTER L WITH STROKE" },
				1256	{ 0x0142, "lstrok", "LATIN SMALL LETTER L WITH STROKE" },
				1257	{ 0x0143, "Nacute", "LATIN CAPITAL LETTER N WITH ACUTE" },
				1258	{ 0x0144, "nacute", "LATIN SMALL LETTER N WITH ACUTE" },
				1259	{ 0x0145, "Ncedil", "LATIN CAPITAL LETTER N WITH CEDILLA" },
				1260	{ 0x0146, "ncedil", "LATIN SMALL LETTER N WITH CEDILLA" },
				1261	{ 0x0147, "Ncaron", "LATIN CAPITAL LETTER N WITH CARON" },
				1262	{ 0x0148, "ncaron", "LATIN SMALL LETTER N WITH CARON" },
				1263	{ 0x0149, "napos", "LATIN SMALL LETTER N PRECEDED BY APOSTROPHE" },
				1264	{ 0x014A, "ENG", "LATIN CAPITAL LETTER ENG" },
				1265	{ 0x014B, "eng", "LATIN SMALL LETTER ENG" },
				1266	{ 0x014C, "Omacr", "LATIN CAPITAL LETTER O WITH MACRON" },
				1267	{ 0x014D, "omacr", "LATIN SMALL LETTER O WITH MACRON" },
				1268	{ 0x0150, "Odblac", "LATIN CAPITAL LETTER O WITH DOUBLE ACUTE" },
				1269	{ 0x0151, "odblac", "LATIN SMALL LETTER O WITH DOUBLE ACUTE" },
				1270	{ 0x0152, "OElig", "LATIN CAPITAL LIGATURE OE" },
				1271	{ 0x0153, "oelig", "LATIN SMALL LIGATURE OE" },
				1272	{ 0x0154, "Racute", "LATIN CAPITAL LETTER R WITH ACUTE" },
				1273	{ 0x0155, "racute", "LATIN SMALL LETTER R WITH ACUTE" },
				1274	{ 0x0156, "Rcedil", "LATIN CAPITAL LETTER R WITH CEDILLA" },
				1275	{ 0x0157, "rcedil", "LATIN SMALL LETTER R WITH CEDILLA" },
				1276	{ 0x0158, "Rcaron", "LATIN CAPITAL LETTER R WITH CARON" },
				1277	{ 0x0159, "rcaron", "LATIN SMALL LETTER R WITH CARON" },
				1278	{ 0x015A, "Sacute", "LATIN CAPITAL LETTER S WITH ACUTE" },
				1279	{ 0x015B, "sacute", "LATIN SMALL LETTER S WITH ACUTE" },
				1280	{ 0x015C, "Scirc", "LATIN CAPITAL LETTER S WITH CIRCUMFLEX" },
				1281	{ 0x015D, "scirc", "LATIN SMALL LETTER S WITH CIRCUMFLEX" },
				1282	{ 0x015E, "Scedil", "LATIN CAPITAL LETTER S WITH CEDILLA" },
				1283	{ 0x015F, "scedil", "LATIN SMALL LETTER S WITH CEDILLA" },
				1284	{ 0x0160, "Scaron", "LATIN CAPITAL LETTER S WITH CARON" },
				1285	{ 0x0161, "scaron", "LATIN SMALL LETTER S WITH CARON" },
				1286	{ 0x0162, "Tcedil", "LATIN CAPITAL LETTER T WITH CEDILLA" },
				1287	{ 0x0163, "tcedil", "LATIN SMALL LETTER T WITH CEDILLA" },
				1288	{ 0x0164, "Tcaron", "LATIN CAPITAL LETTER T WITH CARON" },
				1289	{ 0x0165, "tcaron", "LATIN SMALL LETTER T WITH CARON" },
				1290	{ 0x0166, "Tstrok", "LATIN CAPITAL LETTER T WITH STROKE" },
				1291	{ 0x0167, "tstrok", "LATIN SMALL LETTER T WITH STROKE" },
				1292	{ 0x0168, "Utilde", "LATIN CAPITAL LETTER U WITH TILDE" },
				1293	{ 0x0169, "utilde", "LATIN SMALL LETTER U WITH TILDE" },
				1294	{ 0x016A, "Umacr", "LATIN CAPITAL LETTER U WITH MACRON" },
				1295	{ 0x016B, "umacr", "LATIN SMALL LETTER U WITH MACRON" },
				1296	{ 0x016C, "Ubreve", "LATIN CAPITAL LETTER U WITH BREVE" },
				1297	{ 0x016D, "ubreve", "LATIN SMALL LETTER U WITH BREVE" },
				1298	{ 0x016E, "Uring", "LATIN CAPITAL LETTER U WITH RING ABOVE" },
				1299	{ 0x016F, "uring", "LATIN SMALL LETTER U WITH RING ABOVE" },
				1300	{ 0x0170, "Udblac", "LATIN CAPITAL LETTER U WITH DOUBLE ACUTE" },
				1301	{ 0x0171, "udblac", "LATIN SMALL LETTER U WITH DOUBLE ACUTE" },
				1302	{ 0x0172, "Uogon", "LATIN CAPITAL LETTER U WITH OGONEK" },
				1303	{ 0x0173, "uogon", "LATIN SMALL LETTER U WITH OGONEK" },
				1304	{ 0x0174, "Wcirc", "LATIN CAPITAL LETTER W WITH CIRCUMFLEX" },
				1305	{ 0x0175, "wcirc", "LATIN SMALL LETTER W WITH CIRCUMFLEX" },
				1306	{ 0x0176, "Ycirc", "LATIN CAPITAL LETTER Y WITH CIRCUMFLEX" },
				1307	{ 0x0177, "ycirc", "LATIN SMALL LETTER Y WITH CIRCUMFLEX" },
				1308	{ 0x0178, "Yuml", "LATIN CAPITAL LETTER Y WITH DIAERESIS" },
				1309	{ 0x0179, "Zacute", "LATIN CAPITAL LETTER Z WITH ACUTE" },
				1310	{ 0x017A, "zacute", "LATIN SMALL LETTER Z WITH ACUTE" },
				1311	{ 0x017B, "Zdot", "LATIN CAPITAL LETTER Z WITH DOT ABOVE" },
				1312	{ 0x017C, "zdot", "LATIN SMALL LETTER Z WITH DOT ABOVE" },
				1313	{ 0x017D, "Zcaron", "LATIN CAPITAL LETTER Z WITH CARON" },
				1314	{ 0x017E, "zcaron", "LATIN SMALL LETTER Z WITH CARON" },
				1315	{ 0x0192, "fnof", "LATIN SMALL LETTER F WITH HOOK" },
				1316	{ 0x01F5, "gacute", "LATIN SMALL LETTER G WITH ACUTE" },
				1317	{ 0x02C7, "caron", "CARON" },
				1318	{ 0x02D8, "breve", "BREVE" },
				1319	{ 0x02D9, "dot", "DOT ABOVE" },
				1320	{ 0x02DA, "ring", "RING ABOVE" },
				1321	{ 0x02DB, "ogon", "OGONEK" },
				1322	{ 0x02DC, "tilde", "TILDE" },
				1323	{ 0x02DD, "dblac", "DOUBLE ACUTE ACCENT" },
				1324	{ 0x0386, "Aacgr", "GREEK CAPITAL LETTER ALPHA WITH TONOS" },
				1325	{ 0x0388, "Eacgr", "GREEK CAPITAL LETTER EPSILON WITH TONOS" },
				1326	{ 0x0389, "EEacgr", "GREEK CAPITAL LETTER ETA WITH TONOS" },
				1327	{ 0x038A, "Iacgr", "GREEK CAPITAL LETTER IOTA WITH TONOS" },
				1328	{ 0x038C, "Oacgr", "GREEK CAPITAL LETTER OMICRON WITH TONOS" },
				1329	{ 0x038E, "Uacgr", "GREEK CAPITAL LETTER UPSILON WITH TONOS" },
				1330	{ 0x038F, "OHacgr", "GREEK CAPITAL LETTER OMEGA WITH TONOS" },
				1331	{ 0x0390, "idiagr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS" },
				1332	{ 0x0391, "Agr", "GREEK CAPITAL LETTER ALPHA" },
				1333	{ 0x0392, "Bgr", "GREEK CAPITAL LETTER BETA" },
				1334	{ 0x0393, "b.Gamma", "GREEK CAPITAL LETTER GAMMA" },
				1335	{ 0x0393, "Gamma", "GREEK CAPITAL LETTER GAMMA" },
				1336	{ 0x0393, "Ggr", "GREEK CAPITAL LETTER GAMMA" },
				1337	{ 0x0394, "b.Delta", "GREEK CAPITAL LETTER DELTA" },
				1338	{ 0x0394, "Delta", "GREEK CAPITAL LETTER DELTA" },
				1339	{ 0x0394, "Dgr", "GREEK CAPITAL LETTER DELTA" },
				1340	{ 0x0395, "Egr", "GREEK CAPITAL LETTER EPSILON" },
				1341	{ 0x0396, "Zgr", "GREEK CAPITAL LETTER ZETA" },
				1342	{ 0x0397, "EEgr", "GREEK CAPITAL LETTER ETA" },
				1343	{ 0x0398, "b.Theta", "GREEK CAPITAL LETTER THETA" },
				1344	{ 0x0398, "Theta", "GREEK CAPITAL LETTER THETA" },
				1345	{ 0x0398, "THgr", "GREEK CAPITAL LETTER THETA" },
				1346	{ 0x0399, "Igr", "GREEK CAPITAL LETTER IOTA" },
				1347	{ 0x039A, "Kgr", "GREEK CAPITAL LETTER KAPPA" },
				1348	{ 0x039B, "b.Lambda", "GREEK CAPITAL LETTER LAMDA" },
				1349	{ 0x039B, "Lambda", "GREEK CAPITAL LETTER LAMDA" },
				1350	{ 0x039B, "Lgr", "GREEK CAPITAL LETTER LAMDA" },
				1351	{ 0x039C, "Mgr", "GREEK CAPITAL LETTER MU" },
				1352	{ 0x039D, "Ngr", "GREEK CAPITAL LETTER NU" },
				1353	{ 0x039E, "b.Xi", "GREEK CAPITAL LETTER XI" },
				1354	{ 0x039E, "Xgr", "GREEK CAPITAL LETTER XI" },
				1355	{ 0x039E, "Xi", "GREEK CAPITAL LETTER XI" },
				1356	{ 0x039F, "Ogr", "GREEK CAPITAL LETTER OMICRON" },
				1357	{ 0x03A0, "b.Pi", "GREEK CAPITAL LETTER PI" },
				1358	{ 0x03A0, "Pgr", "GREEK CAPITAL LETTER PI" },
				1359	{ 0x03A0, "Pi", "GREEK CAPITAL LETTER PI" },
				1360	{ 0x03A1, "Rgr", "GREEK CAPITAL LETTER RHO" },
				1361	{ 0x03A3, "b.Sigma", "GREEK CAPITAL LETTER SIGMA" },
				1362	{ 0x03A3, "Sgr", "GREEK CAPITAL LETTER SIGMA" },
				1363	{ 0x03A3, "Sigma", "GREEK CAPITAL LETTER SIGMA" },
				1364	{ 0x03A4, "Tgr", "GREEK CAPITAL LETTER TAU" },
				1365	{ 0x03A5, "Ugr", "" },
				1366	{ 0x03A6, "b.Phi", "GREEK CAPITAL LETTER PHI" },
				1367	{ 0x03A6, "PHgr", "GREEK CAPITAL LETTER PHI" },
				1368	{ 0x03A6, "Phi", "GREEK CAPITAL LETTER PHI" },
				1369	{ 0x03A7, "KHgr", "GREEK CAPITAL LETTER CHI" },
				1370	{ 0x03A8, "b.Psi", "GREEK CAPITAL LETTER PSI" },
				1371	{ 0x03A8, "PSgr", "GREEK CAPITAL LETTER PSI" },
				1372	{ 0x03A8, "Psi", "GREEK CAPITAL LETTER PSI" },
				1373	{ 0x03A9, "b.Omega", "GREEK CAPITAL LETTER OMEGA" },
				1374	{ 0x03A9, "OHgr", "GREEK CAPITAL LETTER OMEGA" },
				1375	{ 0x03A9, "Omega", "GREEK CAPITAL LETTER OMEGA" },
				1376	{ 0x03AA, "Idigr", "GREEK CAPITAL LETTER IOTA WITH DIALYTIKA" },
				1377	{ 0x03AB, "Udigr", "GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA" },
				1378	{ 0x03AC, "aacgr", "GREEK SMALL LETTER ALPHA WITH TONOS" },
				1379	{ 0x03AD, "eacgr", "GREEK SMALL LETTER EPSILON WITH TONOS" },
				1380	{ 0x03AE, "eeacgr", "GREEK SMALL LETTER ETA WITH TONOS" },
				1381	{ 0x03AF, "iacgr", "GREEK SMALL LETTER IOTA WITH TONOS" },
				1382	{ 0x03B0, "udiagr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS" },
				1383	{ 0x03B1, "agr", "" },
				1384	{ 0x03B1, "alpha", "" },
				1385	{ 0x03B1, "b.alpha", "" },
				1386	{ 0x03B2, "b.beta", "GREEK SMALL LETTER BETA" },
				1387	{ 0x03B2, "beta", "GREEK SMALL LETTER BETA" },
				1388	{ 0x03B2, "bgr", "GREEK SMALL LETTER BETA" },
				1389	{ 0x03B3, "b.gamma", "GREEK SMALL LETTER GAMMA" },
				1390	{ 0x03B3, "gamma", "GREEK SMALL LETTER GAMMA" },
				1391	{ 0x03B3, "ggr", "GREEK SMALL LETTER GAMMA" },
				1392	{ 0x03B4, "b.delta", "GREEK SMALL LETTER DELTA" },
				1393	{ 0x03B4, "delta", "GREEK SMALL LETTER DELTA" },
				1394	{ 0x03B4, "dgr", "GREEK SMALL LETTER DELTA" },
				1395	{ 0x03B5, "b.epsi", "" },
				1396	{ 0x03B5, "b.epsis", "" },
				1397	{ 0x03B5, "b.epsiv", "" },
				1398	{ 0x03B5, "egr", "" },
				1399	{ 0x03B5, "epsiv", "" },
				1400	{ 0x03B6, "b.zeta", "GREEK SMALL LETTER ZETA" },
				1401	{ 0x03B6, "zeta", "GREEK SMALL LETTER ZETA" },
				1402	{ 0x03B6, "zgr", "GREEK SMALL LETTER ZETA" },
				1403	{ 0x03B7, "b.eta", "GREEK SMALL LETTER ETA" },
				1404	{ 0x03B7, "eegr", "GREEK SMALL LETTER ETA" },
				1405	{ 0x03B7, "eta", "GREEK SMALL LETTER ETA" },
				1406	{ 0x03B8, "b.thetas", "" },
				1407	{ 0x03B8, "thetas", "" },
				1408	{ 0x03B8, "thgr", "" },
				1409	{ 0x03B9, "b.iota", "GREEK SMALL LETTER IOTA" },
				1410	{ 0x03B9, "igr", "GREEK SMALL LETTER IOTA" },
				1411	{ 0x03B9, "iota", "GREEK SMALL LETTER IOTA" },
				1412	{ 0x03BA, "b.kappa", "GREEK SMALL LETTER KAPPA" },
				1413	{ 0x03BA, "kappa", "GREEK SMALL LETTER KAPPA" },
				1414	{ 0x03BA, "kgr", "GREEK SMALL LETTER KAPPA" },
				1415	{ 0x03BB, "b.lambda", "GREEK SMALL LETTER LAMDA" },
				1416	{ 0x03BB, "lambda", "GREEK SMALL LETTER LAMDA" },
				1417	{ 0x03BB, "lgr", "GREEK SMALL LETTER LAMDA" },
				1418	{ 0x03BC, "b.mu", "GREEK SMALL LETTER MU" },
				1419	{ 0x03BC, "mgr", "GREEK SMALL LETTER MU" },
				1420	{ 0x03BC, "mu", "GREEK SMALL LETTER MU" },
				1421	{ 0x03BD, "b.nu", "GREEK SMALL LETTER NU" },
				1422	{ 0x03BD, "ngr", "GREEK SMALL LETTER NU" },
				1423	{ 0x03BD, "nu", "GREEK SMALL LETTER NU" },
				1424	{ 0x03BE, "b.xi", "GREEK SMALL LETTER XI" },
				1425	{ 0x03BE, "xgr", "GREEK SMALL LETTER XI" },
				1426	{ 0x03BE, "xi", "GREEK SMALL LETTER XI" },
				1427	{ 0x03BF, "ogr", "GREEK SMALL LETTER OMICRON" },
				1428	{ 0x03C0, "b.pi", "GREEK SMALL LETTER PI" },
				1429	{ 0x03C0, "pgr", "GREEK SMALL LETTER PI" },
				1430	{ 0x03C0, "pi", "GREEK SMALL LETTER PI" },
				1431	{ 0x03C1, "b.rho", "GREEK SMALL LETTER RHO" },
				1432	{ 0x03C1, "rgr", "GREEK SMALL LETTER RHO" },
				1433	{ 0x03C1, "rho", "GREEK SMALL LETTER RHO" },
				1434	{ 0x03C2, "b.sigmav", "" },
				1435	{ 0x03C2, "sfgr", "" },
				1436	{ 0x03C2, "sigmav", "" },
				1437	{ 0x03C3, "b.sigma", "GREEK SMALL LETTER SIGMA" },
				1438	{ 0x03C3, "sgr", "GREEK SMALL LETTER SIGMA" },
				1439	{ 0x03C3, "sigma", "GREEK SMALL LETTER SIGMA" },
				1440	{ 0x03C4, "b.tau", "GREEK SMALL LETTER TAU" },
				1441	{ 0x03C4, "tau", "GREEK SMALL LETTER TAU" },
				1442	{ 0x03C4, "tgr", "GREEK SMALL LETTER TAU" },
				1443	{ 0x03C5, "b.upsi", "GREEK SMALL LETTER UPSILON" },
				1444	{ 0x03C5, "ugr", "GREEK SMALL LETTER UPSILON" },
				1445	{ 0x03C5, "upsi", "GREEK SMALL LETTER UPSILON" },
				1446	{ 0x03C6, "b.phis", "GREEK SMALL LETTER PHI" },
				1447	{ 0x03C6, "phgr", "GREEK SMALL LETTER PHI" },
				1448	{ 0x03C6, "phis", "GREEK SMALL LETTER PHI" },
				1449	{ 0x03C7, "b.chi", "GREEK SMALL LETTER CHI" },
				1450	{ 0x03C7, "chi", "GREEK SMALL LETTER CHI" },
				1451	{ 0x03C7, "khgr", "GREEK SMALL LETTER CHI" },
				1452	{ 0x03C8, "b.psi", "GREEK SMALL LETTER PSI" },
				1453	{ 0x03C8, "psgr", "GREEK SMALL LETTER PSI" },
				1454	{ 0x03C8, "psi", "GREEK SMALL LETTER PSI" },
				1455	{ 0x03C9, "b.omega", "GREEK SMALL LETTER OMEGA" },
				1456	{ 0x03C9, "ohgr", "GREEK SMALL LETTER OMEGA" },
				1457	{ 0x03C9, "omega", "GREEK SMALL LETTER OMEGA" },
				1458	{ 0x03CA, "idigr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA" },
				1459	{ 0x03CB, "udigr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA" },
				1460	{ 0x03CC, "oacgr", "GREEK SMALL LETTER OMICRON WITH TONOS" },
				1461	{ 0x03CD, "uacgr", "GREEK SMALL LETTER UPSILON WITH TONOS" },
				1462	{ 0x03CE, "ohacgr", "GREEK SMALL LETTER OMEGA WITH TONOS" },
				1463	{ 0x03D1, "b.thetav", "" },
				1464	{ 0x03D1, "thetav", "" },
				1465	{ 0x03D2, "b.Upsi", "" },
				1466	{ 0x03D2, "Upsi", "" },
				1467	{ 0x03D5, "b.phiv", "GREEK PHI SYMBOL" },
				1468	{ 0x03D5, "phiv", "GREEK PHI SYMBOL" },
				1469	{ 0x03D6, "b.piv", "GREEK PI SYMBOL" },
				1470	{ 0x03D6, "piv", "GREEK PI SYMBOL" },
				1471	{ 0x03DC, "b.gammad", "GREEK LETTER DIGAMMA" },
				1472	{ 0x03DC, "gammad", "GREEK LETTER DIGAMMA" },
				1473	{ 0x03F0, "b.kappav", "GREEK KAPPA SYMBOL" },
				1474	{ 0x03F0, "kappav", "GREEK KAPPA SYMBOL" },
				1475	{ 0x03F1, "b.rhov", "GREEK RHO SYMBOL" },
				1476	{ 0x03F1, "rhov", "GREEK RHO SYMBOL" },
				1477	{ 0x0401, "IOcy", "CYRILLIC CAPITAL LETTER IO" },
				1478	{ 0x0402, "DJcy", "CYRILLIC CAPITAL LETTER DJE" },
				1479	{ 0x0403, "GJcy", "CYRILLIC CAPITAL LETTER GJE" },
				1480	{ 0x0404, "Jukcy", "CYRILLIC CAPITAL LETTER UKRAINIAN IE" },
				1481	{ 0x0405, "DScy", "CYRILLIC CAPITAL LETTER DZE" },
				1482	{ 0x0406, "Iukcy", "CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I" },
				1483	{ 0x0407, "YIcy", "CYRILLIC CAPITAL LETTER YI" },
				1484	{ 0x0408, "Jsercy", "CYRILLIC CAPITAL LETTER JE" },
				1485	{ 0x0409, "LJcy", "CYRILLIC CAPITAL LETTER LJE" },
				1486	{ 0x040A, "NJcy", "CYRILLIC CAPITAL LETTER NJE" },
				1487	{ 0x040B, "TSHcy", "CYRILLIC CAPITAL LETTER TSHE" },
				1488	{ 0x040C, "KJcy", "CYRILLIC CAPITAL LETTER KJE" },
				1489	{ 0x040E, "Ubrcy", "CYRILLIC CAPITAL LETTER SHORT U" },
				1490	{ 0x040F, "DZcy", "CYRILLIC CAPITAL LETTER DZHE" },
				1491	{ 0x0410, "Acy", "CYRILLIC CAPITAL LETTER A" },
				1492	{ 0x0411, "Bcy", "CYRILLIC CAPITAL LETTER BE" },
				1493	{ 0x0412, "Vcy", "CYRILLIC CAPITAL LETTER VE" },
				1494	{ 0x0413, "Gcy", "CYRILLIC CAPITAL LETTER GHE" },
				1495	{ 0x0414, "Dcy", "CYRILLIC CAPITAL LETTER DE" },
				1496	{ 0x0415, "IEcy", "CYRILLIC CAPITAL LETTER IE" },
				1497	{ 0x0416, "ZHcy", "CYRILLIC CAPITAL LETTER ZHE" },
				1498	{ 0x0417, "Zcy", "CYRILLIC CAPITAL LETTER ZE" },
				1499	{ 0x0418, "Icy", "CYRILLIC CAPITAL LETTER I" },
				1500	{ 0x0419, "Jcy", "CYRILLIC CAPITAL LETTER SHORT I" },
				1501	{ 0x041A, "Kcy", "CYRILLIC CAPITAL LETTER KA" },
				1502	{ 0x041B, "Lcy", "CYRILLIC CAPITAL LETTER EL" },
				1503	{ 0x041C, "Mcy", "CYRILLIC CAPITAL LETTER EM" },
				1504	{ 0x041D, "Ncy", "CYRILLIC CAPITAL LETTER EN" },
				1505	{ 0x041E, "Ocy", "CYRILLIC CAPITAL LETTER O" },
				1506	{ 0x041F, "Pcy", "CYRILLIC CAPITAL LETTER PE" },
				1507	{ 0x0420, "Rcy", "CYRILLIC CAPITAL LETTER ER" },
				1508	{ 0x0421, "Scy", "CYRILLIC CAPITAL LETTER ES" },
				1509	{ 0x0422, "Tcy", "CYRILLIC CAPITAL LETTER TE" },
				1510	{ 0x0423, "Ucy", "CYRILLIC CAPITAL LETTER U" },
				1511	{ 0x0424, "Fcy", "CYRILLIC CAPITAL LETTER EF" },
				1512	{ 0x0425, "KHcy", "CYRILLIC CAPITAL LETTER HA" },
				1513	{ 0x0426, "TScy", "CYRILLIC CAPITAL LETTER TSE" },
				1514	{ 0x0427, "CHcy", "CYRILLIC CAPITAL LETTER CHE" },
				1515	{ 0x0428, "SHcy", "CYRILLIC CAPITAL LETTER SHA" },
				1516	{ 0x0429, "SHCHcy", "CYRILLIC CAPITAL LETTER SHCHA" },
				1517	{ 0x042A, "HARDcy", "CYRILLIC CAPITAL LETTER HARD SIGN" },
				1518	{ 0x042B, "Ycy", "CYRILLIC CAPITAL LETTER YERU" },
				1519	{ 0x042C, "SOFTcy", "CYRILLIC CAPITAL LETTER SOFT SIGN" },
				1520	{ 0x042D, "Ecy", "CYRILLIC CAPITAL LETTER E" },
				1521	{ 0x042E, "YUcy", "CYRILLIC CAPITAL LETTER YU" },
				1522	{ 0x042F, "YAcy", "CYRILLIC CAPITAL LETTER YA" },
				1523	{ 0x0430, "acy", "CYRILLIC SMALL LETTER A" },
				1524	{ 0x0431, "bcy", "CYRILLIC SMALL LETTER BE" },
				1525	{ 0x0432, "vcy", "CYRILLIC SMALL LETTER VE" },
				1526	{ 0x0433, "gcy", "CYRILLIC SMALL LETTER GHE" },
				1527	{ 0x0434, "dcy", "CYRILLIC SMALL LETTER DE" },
				1528	{ 0x0435, "iecy", "CYRILLIC SMALL LETTER IE" },
				1529	{ 0x0436, "zhcy", "CYRILLIC SMALL LETTER ZHE" },
				1530	{ 0x0437, "zcy", "CYRILLIC SMALL LETTER ZE" },
				1531	{ 0x0438, "icy", "CYRILLIC SMALL LETTER I" },
				1532	{ 0x0439, "jcy", "CYRILLIC SMALL LETTER SHORT I" },
				1533	{ 0x043A, "kcy", "CYRILLIC SMALL LETTER KA" },
				1534	{ 0x043B, "lcy", "CYRILLIC SMALL LETTER EL" },
				1535	{ 0x043C, "mcy", "CYRILLIC SMALL LETTER EM" },
				1536	{ 0x043D, "ncy", "CYRILLIC SMALL LETTER EN" },
				1537	{ 0x043E, "ocy", "CYRILLIC SMALL LETTER O" },
				1538	{ 0x043F, "pcy", "CYRILLIC SMALL LETTER PE" },
				1539	{ 0x0440, "rcy", "CYRILLIC SMALL LETTER ER" },
				1540	{ 0x0441, "scy", "CYRILLIC SMALL LETTER ES" },
				1541	{ 0x0442, "tcy", "CYRILLIC SMALL LETTER TE" },
				1542	{ 0x0443, "ucy", "CYRILLIC SMALL LETTER U" },
				1543	{ 0x0444, "fcy", "CYRILLIC SMALL LETTER EF" },
				1544	{ 0x0445, "khcy", "CYRILLIC SMALL LETTER HA" },
				1545	{ 0x0446, "tscy", "CYRILLIC SMALL LETTER TSE" },
				1546	{ 0x0447, "chcy", "CYRILLIC SMALL LETTER CHE" },
				1547	{ 0x0448, "shcy", "CYRILLIC SMALL LETTER SHA" },
				1548	{ 0x0449, "shchcy", "CYRILLIC SMALL LETTER SHCHA" },
				1549	{ 0x044A, "hardcy", "CYRILLIC SMALL LETTER HARD SIGN" },
				1550	{ 0x044B, "ycy", "CYRILLIC SMALL LETTER YERU" },
				1551	{ 0x044C, "softcy", "CYRILLIC SMALL LETTER SOFT SIGN" },
				1552	{ 0x044D, "ecy", "CYRILLIC SMALL LETTER E" },
				1553	{ 0x044E, "yucy", "CYRILLIC SMALL LETTER YU" },
				1554	{ 0x044F, "yacy", "CYRILLIC SMALL LETTER YA" },
				1555	{ 0x0451, "iocy", "CYRILLIC SMALL LETTER IO" },
				1556	{ 0x0452, "djcy", "CYRILLIC SMALL LETTER DJE" },
				1557	{ 0x0453, "gjcy", "CYRILLIC SMALL LETTER GJE" },
				1558	{ 0x0454, "jukcy", "CYRILLIC SMALL LETTER UKRAINIAN IE" },
				1559	{ 0x0455, "dscy", "CYRILLIC SMALL LETTER DZE" },
				1560	{ 0x0456, "iukcy", "CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I" },
				1561	{ 0x0457, "yicy", "CYRILLIC SMALL LETTER YI" },
				1562	{ 0x0458, "jsercy", "CYRILLIC SMALL LETTER JE" },
				1563	{ 0x0459, "ljcy", "CYRILLIC SMALL LETTER LJE" },
				1564	{ 0x045A, "njcy", "CYRILLIC SMALL LETTER NJE" },
				1565	{ 0x045B, "tshcy", "CYRILLIC SMALL LETTER TSHE" },
				1566	{ 0x045C, "kjcy", "CYRILLIC SMALL LETTER KJE" },
				1567	{ 0x045E, "ubrcy", "CYRILLIC SMALL LETTER SHORT U" },
				1568	{ 0x045F, "dzcy", "CYRILLIC SMALL LETTER DZHE" },
				1569	{ 0x2002, "ensp", "EN SPACE" },
				1570	{ 0x2003, "emsp", "EM SPACE" },
				1571	{ 0x2004, "emsp13", "THREE-PER-EM SPACE" },
				1572	{ 0x2005, "emsp14", "FOUR-PER-EM SPACE" },
				1573	{ 0x2007, "numsp", "FIGURE SPACE" },
				1574	{ 0x2008, "puncsp", "PUNCTUATION SPACE" },
				1575	{ 0x2009, "thinsp", "THIN SPACE" },
				1576	{ 0x200A, "hairsp", "HAIR SPACE" },
				1577	{ 0x2010, "dash", "HYPHEN" },
				1578	{ 0x2013, "ndash", "EN DASH" },
				1579	{ 0x2014, "mdash", "EM DASH" },
				1580	{ 0x2015, "horbar", "HORIZONTAL BAR" },
				1581	{ 0x2016, "Verbar", "DOUBLE VERTICAL LINE" },
				1582	{ 0x2018, "lsquo", "" },
				1583	{ 0x2018, "rsquor", "" },
				1584	{ 0x2019, "rsquo", "RIGHT SINGLE QUOTATION MARK" },
				1585	{ 0x201A, "lsquor", "SINGLE LOW-9 QUOTATION MARK" },
				1586	{ 0x201C, "ldquo", "" },
				1587	{ 0x201C, "rdquor", "" },
				1588	{ 0x201D, "rdquo", "RIGHT DOUBLE QUOTATION MARK" },
				1589	{ 0x201E, "ldquor", "DOUBLE LOW-9 QUOTATION MARK" },
				1590	{ 0x2020, "dagger", "DAGGER" },
				1591	{ 0x2021, "Dagger", "DOUBLE DAGGER" },
				1592	{ 0x2022, "bull", "BULLET" },
				1593	{ 0x2025, "nldr", "TWO DOT LEADER" },
				1594	{ 0x2026, "hellip", "HORIZONTAL ELLIPSIS" },
				1595	{ 0x2026, "mldr", "HORIZONTAL ELLIPSIS" },
				1596	{ 0x2030, "permil", "PER MILLE SIGN" },
				1597	{ 0x2032, "prime", "PRIME" },
				1598	{ 0x2032, "vprime", "PRIME" },
				1599	{ 0x2033, "Prime", "DOUBLE PRIME" },
				1600	{ 0x2034, "tprime", "TRIPLE PRIME" },
				1601	{ 0x2035, "bprime", "REVERSED PRIME" },
				1602	{ 0x2041, "caret", "CARET" },
				1603	{ 0x2043, "hybull", "HYPHEN BULLET" },
				1604	{ 0x20DB, "tdot", "COMBINING THREE DOTS ABOVE" },
				1605	{ 0x20DC, "DotDot", "COMBINING FOUR DOTS ABOVE" },
				1606	{ 0x2105, "incare", "CARE OF" },
				1607	{ 0x210B, "hamilt", "SCRIPT CAPITAL H" },
				1608	{ 0x210F, "planck", "PLANCK CONSTANT OVER TWO PI" },
				1609	{ 0x2111, "image", "BLACK-LETTER CAPITAL I" },
				1610	{ 0x2112, "lagran", "SCRIPT CAPITAL L" },
				1611	{ 0x2113, "ell", "SCRIPT SMALL L" },
				1612	{ 0x2116, "numero", "NUMERO SIGN" },
				1613	{ 0x2117, "copysr", "SOUND RECORDING COPYRIGHT" },
				1614	{ 0x2118, "weierp", "SCRIPT CAPITAL P" },
				1615	{ 0x211C, "real", "BLACK-LETTER CAPITAL R" },
				1616	{ 0x211E, "rx", "PRESCRIPTION TAKE" },
				1617	{ 0x2122, "trade", "TRADE MARK SIGN" },
				1618	{ 0x2126, "ohm", "OHM SIGN" },
				1619	{ 0x212B, "angst", "ANGSTROM SIGN" },
				1620	{ 0x212C, "bernou", "SCRIPT CAPITAL B" },
				1621	{ 0x2133, "phmmat", "SCRIPT CAPITAL M" },
				1622	{ 0x2134, "order", "SCRIPT SMALL O" },
				1623	{ 0x2135, "aleph", "ALEF SYMBOL" },
				1624	{ 0x2136, "beth", "BET SYMBOL" },
				1625	{ 0x2137, "gimel", "GIMEL SYMBOL" },
				1626	{ 0x2138, "daleth", "DALET SYMBOL" },
				1627	{ 0x2153, "frac13", "VULGAR FRACTION ONE THIRD" },
				1628	{ 0x2154, "frac23", "VULGAR FRACTION TWO THIRDS" },
				1629	{ 0x2155, "frac15", "VULGAR FRACTION ONE FIFTH" },
				1630	{ 0x2156, "frac25", "VULGAR FRACTION TWO FIFTHS" },
				1631	{ 0x2157, "frac35", "VULGAR FRACTION THREE FIFTHS" },
				1632	{ 0x2158, "frac45", "VULGAR FRACTION FOUR FIFTHS" },
				1633	{ 0x2159, "frac16", "VULGAR FRACTION ONE SIXTH" },
				1634	{ 0x215A, "frac56", "VULGAR FRACTION FIVE SIXTHS" },
				1635	{ 0x215B, "frac18", "" },
				1636	{ 0x215C, "frac38", "" },
				1637	{ 0x215D, "frac58", "" },
				1638	{ 0x215E, "frac78", "" },
				1639	{ 0x2190, "larr", "LEFTWARDS DOUBLE ARROW" },
				1640	{ 0x2191, "uarr", "UPWARDS ARROW" },
				1641	{ 0x2192, "rarr", "RIGHTWARDS DOUBLE ARROW" },
				1642	{ 0x2193, "darr", "DOWNWARDS ARROW" },
				1643	{ 0x2194, "harr", "LEFT RIGHT ARROW" },
				1644	{ 0x2194, "xhArr", "LEFT RIGHT ARROW" },
				1645	{ 0x2194, "xharr", "LEFT RIGHT ARROW" },
				1646	{ 0x2195, "varr", "UP DOWN ARROW" },
				1647	{ 0x2196, "nwarr", "NORTH WEST ARROW" },
				1648	{ 0x2197, "nearr", "NORTH EAST ARROW" },
				1649	{ 0x2198, "drarr", "SOUTH EAST ARROW" },
				1650	{ 0x2199, "dlarr", "SOUTH WEST ARROW" },
				1651	{ 0x219A, "nlarr", "LEFTWARDS ARROW WITH STROKE" },
				1652	{ 0x219B, "nrarr", "RIGHTWARDS ARROW WITH STROKE" },
				1653	{ 0x219D, "rarrw", "RIGHTWARDS SQUIGGLE ARROW" },
				1654	{ 0x219E, "Larr", "LEFTWARDS TWO HEADED ARROW" },
				1655	{ 0x21A0, "Rarr", "RIGHTWARDS TWO HEADED ARROW" },
				1656	{ 0x21A2, "larrtl", "LEFTWARDS ARROW WITH TAIL" },
				1657	{ 0x21A3, "rarrtl", "RIGHTWARDS ARROW WITH TAIL" },
				1658	{ 0x21A6, "map", "RIGHTWARDS ARROW FROM BAR" },
				1659	{ 0x21A9, "larrhk", "LEFTWARDS ARROW WITH HOOK" },
				1660	{ 0x21AA, "rarrhk", "RIGHTWARDS ARROW WITH HOOK" },
				1661	{ 0x21AB, "larrlp", "LEFTWARDS ARROW WITH LOOP" },
				1662	{ 0x21AC, "rarrlp", "RIGHTWARDS ARROW WITH LOOP" },
				1663	{ 0x21AD, "harrw", "LEFT RIGHT WAVE ARROW" },
				1664	{ 0x21AE, "nharr", "LEFT RIGHT ARROW WITH STROKE" },
				1665	{ 0x21B0, "lsh", "UPWARDS ARROW WITH TIP LEFTWARDS" },
				1666	{ 0x21B1, "rsh", "UPWARDS ARROW WITH TIP RIGHTWARDS" },
				1667	{ 0x21B6, "cularr", "ANTICLOCKWISE TOP SEMICIRCLE ARROW" },
				1668	{ 0x21B7, "curarr", "CLOCKWISE TOP SEMICIRCLE ARROW" },
				1669	{ 0x21BA, "olarr", "ANTICLOCKWISE OPEN CIRCLE ARROW" },
				1670	{ 0x21BB, "orarr", "CLOCKWISE OPEN CIRCLE ARROW" },
				1671	{ 0x21BC, "lharu", "LEFTWARDS HARPOON WITH BARB UPWARDS" },
				1672	{ 0x21BD, "lhard", "LEFTWARDS HARPOON WITH BARB DOWNWARDS" },
				1673	{ 0x21BE, "uharr", "UPWARDS HARPOON WITH BARB RIGHTWARDS" },
				1674	{ 0x21BF, "uharl", "UPWARDS HARPOON WITH BARB LEFTWARDS" },
				1675	{ 0x21C0, "rharu", "RIGHTWARDS HARPOON WITH BARB UPWARDS" },
				1676	{ 0x21C1, "rhard", "RIGHTWARDS HARPOON WITH BARB DOWNWARDS" },
				1677	{ 0x21C2, "dharr", "DOWNWARDS HARPOON WITH BARB RIGHTWARDS" },
				1678	{ 0x21C3, "dharl", "DOWNWARDS HARPOON WITH BARB LEFTWARDS" },
				1679	{ 0x21C4, "rlarr2", "RIGHTWARDS ARROW OVER LEFTWARDS ARROW" },
				1680	{ 0x21C6, "lrarr2", "LEFTWARDS ARROW OVER RIGHTWARDS ARROW" },
				1681	{ 0x21C7, "larr2", "LEFTWARDS PAIRED ARROWS" },
				1682	{ 0x21C8, "uarr2", "UPWARDS PAIRED ARROWS" },
				1683	{ 0x21C9, "rarr2", "RIGHTWARDS PAIRED ARROWS" },
				1684	{ 0x21CA, "darr2", "DOWNWARDS PAIRED ARROWS" },
				1685	{ 0x21CB, "lrhar2", "LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON" },
				1686	{ 0x21CC, "rlhar2", "RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON" },
				1687	{ 0x21CD, "nlArr", "LEFTWARDS DOUBLE ARROW WITH STROKE" },
				1688	{ 0x21CE, "nhArr", "LEFT RIGHT DOUBLE ARROW WITH STROKE" },
				1689	{ 0x21CF, "nrArr", "RIGHTWARDS DOUBLE ARROW WITH STROKE" },
				1690	{ 0x21D0, "lArr", "LEFTWARDS ARROW" },
				1691	{ 0x21D0, "xlArr", "LEFTWARDS DOUBLE ARROW" },
				1692	{ 0x21D1, "uArr", "UPWARDS DOUBLE ARROW" },
				1693	{ 0x21D2, "rArr", "RIGHTWARDS ARROW" },
				1694	{ 0x21D2, "xrArr", "RIGHTWARDS DOUBLE ARROW" },
				1695	{ 0x21D3, "dArr", "DOWNWARDS DOUBLE ARROW" },
				1696	{ 0x21D4, "hArr", "" },
				1697	{ 0x21D4, "iff", "LEFT RIGHT DOUBLE ARROW" },
				1698	{ 0x21D5, "vArr", "UP DOWN DOUBLE ARROW" },
				1699	{ 0x21DA, "lAarr", "LEFTWARDS TRIPLE ARROW" },
				1700	{ 0x21DB, "rAarr", "RIGHTWARDS TRIPLE ARROW" },
				1701	{ 0x2200, "forall", "" },
				1702	{ 0x2201, "comp", "COMPLEMENT" },
				1703	{ 0x2202, "part", "" },
				1704	{ 0x2203, "exist", "" },
				1705	{ 0x2204, "nexist", "THERE DOES NOT EXIST" },
				1706	{ 0x2205, "empty", "" },
				1707	{ 0x2207, "nabla", "NABLA" },
				1708	{ 0x2209, "notin", "" },
				1709	{ 0x220A, "epsi", "" },
				1710	{ 0x220A, "epsis", "" },
				1711	{ 0x220A, "isin", "" },
				1712	{ 0x220D, "bepsi", "SMALL CONTAINS AS MEMBER" },
				1713	{ 0x220D, "ni", "" },
				1714	{ 0x220F, "prod", "N-ARY PRODUCT" },
				1715	{ 0x2210, "amalg", "N-ARY COPRODUCT" },
				1716	{ 0x2210, "coprod", "N-ARY COPRODUCT" },
				1717	{ 0x2210, "samalg", "" },
				1718	{ 0x2211, "sum", "N-ARY SUMMATION" },
				1719	{ 0x2212, "minus", "MINUS SIGN" },
				1720	{ 0x2213, "mnplus", "" },
				1721	{ 0x2214, "plusdo", "DOT PLUS" },
				1722	{ 0x2216, "setmn", "SET MINUS" },
				1723	{ 0x2216, "ssetmn", "SET MINUS" },
				1724	{ 0x2217, "lowast", "ASTERISK OPERATOR" },
				1725	{ 0x2218, "compfn", "RING OPERATOR" },
				1726	{ 0x221A, "radic", "" },
				1727	{ 0x221D, "prop", "" },
				1728	{ 0x221D, "vprop", "" },
				1729	{ 0x221E, "infin", "" },
				1730	{ 0x221F, "ang90", "RIGHT ANGLE" },
				1731	{ 0x2220, "ang", "ANGLE" },
				1732	{ 0x2221, "angmsd", "MEASURED ANGLE" },
				1733	{ 0x2222, "angsph", "" },
				1734	{ 0x2223, "mid", "" },
				1735	{ 0x2224, "nmid", "DOES NOT DIVIDE" },
				1736	{ 0x2225, "par", "PARALLEL TO" },
				1737	{ 0x2225, "spar", "PARALLEL TO" },
				1738	{ 0x2226, "npar", "NOT PARALLEL TO" },
				1739	{ 0x2226, "nspar", "NOT PARALLEL TO" },
				1740	{ 0x2227, "and", "" },
				1741	{ 0x2228, "or", "" },
				1742	{ 0x2229, "cap", "" },
				1743	{ 0x222A, "cup", "" },
				1744	{ 0x222B, "int", "" },
				1745	{ 0x222E, "conint", "" },
				1746	{ 0x2234, "there4", "" },
				1747	{ 0x2235, "becaus", "BECAUSE" },
				1748	{ 0x223C, "sim", "" },
				1749	{ 0x223C, "thksim", "TILDE OPERATOR" },
				1750	{ 0x223D, "bsim", "" },
				1751	{ 0x2240, "wreath", "WREATH PRODUCT" },
				1752	{ 0x2241, "nsim", "" },
				1753	{ 0x2243, "sime", "" },
				1754	{ 0x2244, "nsime", "" },
				1755	{ 0x2245, "cong", "" },
				1756	{ 0x2247, "ncong", "NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO" },
				1757	{ 0x2248, "ap", "" },
				1758	{ 0x2248, "thkap", "ALMOST EQUAL TO" },
				1759	{ 0x2249, "nap", "NOT ALMOST EQUAL TO" },
				1760	{ 0x224A, "ape", "" },
				1761	{ 0x224C, "bcong", "ALL EQUAL TO" },
				1762	{ 0x224D, "asymp", "EQUIVALENT TO" },
				1763	{ 0x224E, "bump", "" },
				1764	{ 0x224F, "bumpe", "" },
				1765	{ 0x2250, "esdot", "" },
				1766	{ 0x2251, "eDot", "" },
				1767	{ 0x2252, "efDot", "" },
				1768	{ 0x2253, "erDot", "" },
				1769	{ 0x2254, "colone", "" },
				1770	{ 0x2255, "ecolon", "" },
				1771	{ 0x2256, "ecir", "" },
				1772	{ 0x2257, "cire", "" },
				1773	{ 0x2259, "wedgeq", "ESTIMATES" },
				1774	{ 0x225C, "trie", "" },
				1775	{ 0x2260, "ne", "" },
				1776	{ 0x2261, "equiv", "" },
				1777	{ 0x2262, "nequiv", "NOT IDENTICAL TO" },
				1778	{ 0x2264, "le", "" },
				1779	{ 0x2264, "les", "LESS-THAN OR EQUAL TO" },
				1780	{ 0x2265, "ge", "GREATER-THAN OR EQUAL TO" },
				1781	{ 0x2265, "ges", "GREATER-THAN OR EQUAL TO" },
				1782	{ 0x2266, "lE", "" },
				1783	{ 0x2267, "gE", "" },
				1784	{ 0x2268, "lnE", "" },
				1785	{ 0x2268, "lne", "" },
				1786	{ 0x2268, "lvnE", "LESS-THAN BUT NOT EQUAL TO" },
				1787	{ 0x2269, "gnE", "" },
				1788	{ 0x2269, "gne", "" },
				1789	{ 0x2269, "gvnE", "GREATER-THAN BUT NOT EQUAL TO" },
				1790	{ 0x226A, "Lt", "MUCH LESS-THAN" },
				1791	{ 0x226B, "Gt", "MUCH GREATER-THAN" },
				1792	{ 0x226C, "twixt", "BETWEEN" },
				1793	{ 0x226E, "nlt", "NOT LESS-THAN" },
				1794	{ 0x226F, "ngt", "NOT GREATER-THAN" },
				1795	{ 0x2270, "nlE", "" },
				1796	{ 0x2270, "nle", "NEITHER LESS-THAN NOR EQUAL TO" },
				1797	{ 0x2270, "nles", "" },
				1798	{ 0x2271, "ngE", "" },
				1799	{ 0x2271, "nge", "NEITHER GREATER-THAN NOR EQUAL TO" },
				1800	{ 0x2271, "nges", "" },
				1801	{ 0x2272, "lap", "LESS-THAN OR EQUIVALENT TO" },
				1802	{ 0x2272, "lsim", "LESS-THAN OR EQUIVALENT TO" },
				1803	{ 0x2273, "gap", "GREATER-THAN OR EQUIVALENT TO" },
				1804	{ 0x2273, "gsim", "GREATER-THAN OR EQUIVALENT TO" },
				1805	{ 0x2276, "lg", "LESS-THAN OR GREATER-THAN" },
				1806	{ 0x2277, "gl", "" },
				1807	{ 0x227A, "pr", "" },
				1808	{ 0x227B, "sc", "" },
				1809	{ 0x227C, "cupre", "" },
				1810	{ 0x227C, "pre", "" },
				1811	{ 0x227D, "sccue", "" },
				1812	{ 0x227D, "sce", "" },
				1813	{ 0x227E, "prap", "" },
				1814	{ 0x227E, "prsim", "" },
				1815	{ 0x227F, "scap", "" },
				1816	{ 0x227F, "scsim", "" },
				1817	{ 0x2280, "npr", "DOES NOT PRECEDE" },
				1818	{ 0x2281, "nsc", "DOES NOT SUCCEED" },
				1819	{ 0x2282, "sub", "" },
				1820	{ 0x2283, "sup", "" },
				1821	{ 0x2284, "nsub", "NOT A SUBSET OF" },
				1822	{ 0x2285, "nsup", "NOT A SUPERSET OF" },
				1823	{ 0x2286, "subE", "" },
				1824	{ 0x2286, "sube", "" },
				1825	{ 0x2287, "supE", "" },
				1826	{ 0x2287, "supe", "" },
				1827	{ 0x2288, "nsubE", "" },
				1828	{ 0x2288, "nsube", "" },
				1829	{ 0x2289, "nsupE", "" },
				1830	{ 0x2289, "nsupe", "" },
				1831	{ 0x228A, "subne", "" },
				1832	{ 0x228A, "subnE", "SUBSET OF WITH NOT EQUAL TO" },
				1833	{ 0x228A, "vsubne", "SUBSET OF WITH NOT EQUAL TO" },
				1834	{ 0x228B, "supnE", "" },
				1835	{ 0x228B, "supne", "" },
				1836	{ 0x228B, "vsupnE", "SUPERSET OF WITH NOT EQUAL TO" },
				1837	{ 0x228B, "vsupne", "SUPERSET OF WITH NOT EQUAL TO" },
				1838	{ 0x228E, "uplus", "MULTISET UNION" },
				1839	{ 0x228F, "sqsub", "" },
				1840	{ 0x2290, "sqsup", "" },
				1841	{ 0x2291, "sqsube", "" },
				1842	{ 0x2292, "sqsupe", "" },
				1843	{ 0x2293, "sqcap", "SQUARE CAP" },
				1844	{ 0x2294, "sqcup", "SQUARE CUP" },
				1845	{ 0x2295, "oplus", "CIRCLED PLUS" },
				1846	{ 0x2296, "ominus", "CIRCLED MINUS" },
				1847	{ 0x2297, "otimes", "CIRCLED TIMES" },
				1848	{ 0x2298, "osol", "CIRCLED DIVISION SLASH" },
				1849	{ 0x2299, "odot", "CIRCLED DOT OPERATOR" },
				1850	{ 0x229A, "ocir", "CIRCLED RING OPERATOR" },
				1851	{ 0x229B, "oast", "CIRCLED ASTERISK OPERATOR" },
				1852	{ 0x229D, "odash", "CIRCLED DASH" },
				1853	{ 0x229E, "plusb", "SQUARED PLUS" },
				1854	{ 0x229F, "minusb", "SQUARED MINUS" },
				1855	{ 0x22A0, "timesb", "SQUARED TIMES" },
				1856	{ 0x22A1, "sdotb", "SQUARED DOT OPERATOR" },
				1857	{ 0x22A2, "vdash", "" },
				1858	{ 0x22A3, "dashv", "" },
				1859	{ 0x22A4, "top", "DOWN TACK" },
				1860	{ 0x22A5, "bottom", "" },
				1861	{ 0x22A5, "perp", "" },
				1862	{ 0x22A7, "models", "MODELS" },
				1863	{ 0x22A8, "vDash", "" },
				1864	{ 0x22A9, "Vdash", "" },
				1865	{ 0x22AA, "Vvdash", "" },
				1866	{ 0x22AC, "nvdash", "DOES NOT PROVE" },
				1867	{ 0x22AD, "nvDash", "NOT TRUE" },
				1868	{ 0x22AE, "nVdash", "DOES NOT FORCE" },
				1869	{ 0x22AF, "nVDash", "NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE" },
				1870	{ 0x22B2, "vltri", "" },
				1871	{ 0x22B3, "vrtri", "" },
				1872	{ 0x22B4, "ltrie", "" },
				1873	{ 0x22B5, "rtrie", "" },
				1874	{ 0x22B8, "mumap", "MULTIMAP" },
				1875	{ 0x22BA, "intcal", "INTERCALATE" },
				1876	{ 0x22BB, "veebar", "" },
				1877	{ 0x22BC, "barwed", "NAND" },
				1878	{ 0x22C4, "diam", "DIAMOND OPERATOR" },
				1879	{ 0x22C5, "sdot", "DOT OPERATOR" },
				1880	{ 0x22C6, "sstarf", "STAR OPERATOR" },
				1881	{ 0x22C6, "star", "STAR OPERATOR" },
				1882	{ 0x22C7, "divonx", "DIVISION TIMES" },
				1883	{ 0x22C8, "bowtie", "" },
				1884	{ 0x22C9, "ltimes", "LEFT NORMAL FACTOR SEMIDIRECT PRODUCT" },
				1885	{ 0x22CA, "rtimes", "RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT" },
				1886	{ 0x22CB, "lthree", "LEFT SEMIDIRECT PRODUCT" },
				1887	{ 0x22CC, "rthree", "RIGHT SEMIDIRECT PRODUCT" },
				1888	{ 0x22CD, "bsime", "" },
				1889	{ 0x22CE, "cuvee", "CURLY LOGICAL OR" },
				1890	{ 0x22CF, "cuwed", "CURLY LOGICAL AND" },
				1891	{ 0x22D0, "Sub", "" },
				1892	{ 0x22D1, "Sup", "" },
				1893	{ 0x22D2, "Cap", "DOUBLE INTERSECTION" },
				1894	{ 0x22D3, "Cup", "DOUBLE UNION" },
				1895	{ 0x22D4, "fork", "" },
				1896	{ 0x22D6, "ldot", "" },
				1897	{ 0x22D7, "gsdot", "" },
				1898	{ 0x22D8, "Ll", "" },
				1899	{ 0x22D9, "Gg", "VERY MUCH GREATER-THAN" },
				1900	{ 0x22DA, "lEg", "" },
				1901	{ 0x22DA, "leg", "" },
				1902	{ 0x22DB, "gEl", "" },
				1903	{ 0x22DB, "gel", "" },
				1904	{ 0x22DC, "els", "" },
				1905	{ 0x22DD, "egs", "" },
				1906	{ 0x22DE, "cuepr", "" },
				1907	{ 0x22DF, "cuesc", "" },
				1908	{ 0x22E0, "npre", "DOES NOT PRECEDE OR EQUAL" },
				1909	{ 0x22E1, "nsce", "DOES NOT SUCCEED OR EQUAL" },
				1910	{ 0x22E6, "lnsim", "" },
				1911	{ 0x22E7, "gnsim", "GREATER-THAN BUT NOT EQUIVALENT TO" },
				1912	{ 0x22E8, "prnap", "" },
				1913	{ 0x22E8, "prnsim", "" },
				1914	{ 0x22E9, "scnap", "" },
				1915	{ 0x22E9, "scnsim", "" },
				1916	{ 0x22EA, "nltri", "NOT NORMAL SUBGROUP OF" },
				1917	{ 0x22EB, "nrtri", "DOES NOT CONTAIN AS NORMAL SUBGROUP" },
				1918	{ 0x22EC, "nltrie", "NOT NORMAL SUBGROUP OF OR EQUAL TO" },
				1919	{ 0x22ED, "nrtrie", "DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL" },
				1920	{ 0x22EE, "vellip", "" },
				1921	{ 0x2306, "Barwed", "PERSPECTIVE" },
				1922	{ 0x2308, "lceil", "LEFT CEILING" },
				1923	{ 0x2309, "rceil", "RIGHT CEILING" },
				1924	{ 0x230A, "lfloor", "LEFT FLOOR" },
				1925	{ 0x230B, "rfloor", "RIGHT FLOOR" },
				1926	{ 0x230C, "drcrop", "BOTTOM RIGHT CROP" },
				1927	{ 0x230D, "dlcrop", "BOTTOM LEFT CROP" },
				1928	{ 0x230E, "urcrop", "TOP RIGHT CROP" },
				1929	{ 0x230F, "ulcrop", "TOP LEFT CROP" },
				1930	{ 0x2315, "telrec", "TELEPHONE RECORDER" },
				1931	{ 0x2316, "target", "POSITION INDICATOR" },
				1932	{ 0x231C, "ulcorn", "TOP LEFT CORNER" },
				1933	{ 0x231D, "urcorn", "TOP RIGHT CORNER" },
				1934	{ 0x231E, "dlcorn", "BOTTOM LEFT CORNER" },
				1935	{ 0x231F, "drcorn", "BOTTOM RIGHT CORNER" },
				1936	{ 0x2322, "frown", "" },
				1937	{ 0x2322, "sfrown", "FROWN" },
				1938	{ 0x2323, "smile", "" },
				1939	{ 0x2323, "ssmile", "SMILE" },
				1940	{ 0x2423, "blank", "OPEN BOX" },
				1941	{ 0x24C8, "oS", "CIRCLED LATIN CAPITAL LETTER S" },
				1942	{ 0x2500, "boxh", "BOX DRAWINGS LIGHT HORIZONTAL" },
				1943	{ 0x2502, "boxv", "BOX DRAWINGS LIGHT VERTICAL" },
				1944	{ 0x250C, "boxdr", "BOX DRAWINGS LIGHT DOWN AND RIGHT" },
				1945	{ 0x2510, "boxdl", "BOX DRAWINGS LIGHT DOWN AND LEFT" },
				1946	{ 0x2514, "boxur", "BOX DRAWINGS LIGHT UP AND RIGHT" },
				1947	{ 0x2518, "boxul", "BOX DRAWINGS LIGHT UP AND LEFT" },
				1948	{ 0x251C, "boxvr", "BOX DRAWINGS LIGHT VERTICAL AND RIGHT" },
				1949	{ 0x2524, "boxvl", "BOX DRAWINGS LIGHT VERTICAL AND LEFT" },
				1950	{ 0x252C, "boxhd", "BOX DRAWINGS LIGHT DOWN AND HORIZONTAL" },
				1951	{ 0x2534, "boxhu", "BOX DRAWINGS LIGHT UP AND HORIZONTAL" },
				1952	{ 0x253C, "boxvh", "BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL" },
				1953	{ 0x2550, "boxH", "BOX DRAWINGS DOUBLE HORIZONTAL" },
				1954	{ 0x2551, "boxV", "BOX DRAWINGS DOUBLE VERTICAL" },
				1955	{ 0x2552, "boxDR", "BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE" },
				1956	{ 0x2553, "boxDr", "BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE" },
				1957	{ 0x2554, "boxdR", "BOX DRAWINGS DOUBLE DOWN AND RIGHT" },
				1958	{ 0x2555, "boxDL", "BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE" },
				1959	{ 0x2556, "boxdL", "BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE" },
				1960	{ 0x2557, "boxDl", "BOX DRAWINGS DOUBLE DOWN AND LEFT" },
				1961	{ 0x2558, "boxUR", "BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE" },
				1962	{ 0x2559, "boxuR", "BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE" },
				1963	{ 0x255A, "boxUr", "BOX DRAWINGS DOUBLE UP AND RIGHT" },
				1964	{ 0x255B, "boxUL", "BOX DRAWINGS UP SINGLE AND LEFT DOUBLE" },
				1965	{ 0x255C, "boxUl", "BOX DRAWINGS UP DOUBLE AND LEFT SINGLE" },
				1966	{ 0x255D, "boxuL", "BOX DRAWINGS DOUBLE UP AND LEFT" },
				1967	{ 0x255E, "boxvR", "BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE" },
				1968	{ 0x255F, "boxVR", "BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE" },
				1969	{ 0x2560, "boxVr", "BOX DRAWINGS DOUBLE VERTICAL AND RIGHT" },
				1970	{ 0x2561, "boxvL", "BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE" },
				1971	{ 0x2562, "boxVL", "BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE" },
				1972	{ 0x2563, "boxVl", "BOX DRAWINGS DOUBLE VERTICAL AND LEFT" },
				1973	{ 0x2564, "boxhD", "BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE" },
				1974	{ 0x2565, "boxHD", "BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE" },
				1975	{ 0x2566, "boxHd", "BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL" },
				1976	{ 0x2567, "boxhU", "BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE" },
				1977	{ 0x2568, "boxHU", "BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE" },
				1978	{ 0x2569, "boxHu", "BOX DRAWINGS DOUBLE UP AND HORIZONTAL" },
				1979	{ 0x256A, "boxvH", "BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE" },
				1980	{ 0x256B, "boxVH", "BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE" },
				1981	{ 0x256C, "boxVh", "BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL" },
				1982	{ 0x2580, "uhblk", "UPPER HALF BLOCK" },
				1983	{ 0x2584, "lhblk", "LOWER HALF BLOCK" },
				1984	{ 0x2588, "block", "FULL BLOCK" },
				1985	{ 0x2591, "blk14", "LIGHT SHADE" },
				1986	{ 0x2592, "blk12", "MEDIUM SHADE" },
				1987	{ 0x2593, "blk34", "DARK SHADE" },
				1988	{ 0x25A1, "square", "WHITE SQUARE" },
				1989	{ 0x25A1, "squ", "WHITE SQUARE" },
				1990	{ 0x25AA, "squf", "" },
				1991	{ 0x25AD, "rect", "WHITE RECTANGLE" },
				1992	{ 0x25AE, "marker", "BLACK VERTICAL RECTANGLE" },
				1993	{ 0x25B3, "xutri", "WHITE UP-POINTING TRIANGLE" },
				1994	{ 0x25B4, "utrif", "BLACK UP-POINTING TRIANGLE" },
				1995	{ 0x25B5, "utri", "WHITE UP-POINTING TRIANGLE" },
				1996	{ 0x25B8, "rtrif", "BLACK RIGHT-POINTING TRIANGLE" },
				1997	{ 0x25B9, "rtri", "WHITE RIGHT-POINTING TRIANGLE" },
				1998	{ 0x25BD, "xdtri", "WHITE DOWN-POINTING TRIANGLE" },
				1999	{ 0x25BE, "dtrif", "BLACK DOWN-POINTING TRIANGLE" },
				2000	{ 0x25BF, "dtri", "WHITE DOWN-POINTING TRIANGLE" },
				2001	{ 0x25C2, "ltrif", "BLACK LEFT-POINTING TRIANGLE" },
				2002	{ 0x25C3, "ltri", "WHITE LEFT-POINTING TRIANGLE" },
				2003	{ 0x25CA, "loz", "LOZENGE" },
				2004	{ 0x25CB, "cir", "WHITE CIRCLE" },
				2005	{ 0x25CB, "xcirc", "WHITE CIRCLE" },
				2006	{ 0x2605, "starf", "BLACK STAR" },
				2007	{ 0x260E, "phone", "TELEPHONE SIGN" },
				2008	{ 0x2640, "female", "" },
				2009	{ 0x2642, "male", "MALE SIGN" },
				2010	{ 0x2660, "spades", "BLACK SPADE SUIT" },
				2011	{ 0x2663, "clubs", "BLACK CLUB SUIT" },
				2012	{ 0x2665, "hearts", "BLACK HEART SUIT" },
				2013	{ 0x2666, "diams", "BLACK DIAMOND SUIT" },
				2014	{ 0x2669, "sung", "" },
				2015	{ 0x266D, "flat", "MUSIC FLAT SIGN" },
				2016	{ 0x266E, "natur", "MUSIC NATURAL SIGN" },
				2017	{ 0x266F, "sharp", "MUSIC SHARP SIGN" },
				2018	{ 0x2713, "check", "CHECK MARK" },
				2019	{ 0x2717, "cross", "BALLOT X" },
				2020	{ 0x2720, "malt", "MALTESE CROSS" },
				2021	{ 0x2726, "lozf", "" },
				2022	{ 0x2736, "sext", "SIX POINTED BLACK STAR" },
				2023	{ 0x3008, "lang", "" },
				2024	{ 0x3009, "rang", "" },
				2025	{ 0xE291, "rpargt", "" },
				2026	{ 0xE2A2, "lnap", "" },
				2027	{ 0xE2AA, "nsmid", "" },
				2028	{ 0xE2B3, "prnE", "" },
				2029	{ 0xE2B5, "scnE", "" },
				2030	{ 0xE2B8, "vsubnE", "" },
				2031	{ 0xE301, "smid", "" },
				2032	{ 0xE411, "gnap", "" },
				2033	{ 0xFB00, "fflig", "" },
				2034	{ 0xFB01, "filig", "" },
				2035	{ 0xFB02, "fllig", "" },
				2036	{ 0xFB03, "ffilig", "" },
				2037	{ 0xFB04, "ffllig", "" },
				2038	{ 0xFE68, "sbsol", "SMALL REVERSE SOLIDUS" },
				2039	};
				2040
				2041	/************************************************************************
				2042	* *
				2043	* Commodity functions to handle entities *
				2044	* *
				2045	************************************************************************/
				2046
				2047	/*
				2048	* Macro used to grow the current buffer.
				2049	*/
				2050	#define growBuffer(buffer) { \
				2051	buffer##_size *= 2; \
				2052	buffer = (xmlChar ) xmlRealloc(buffer, buffer##_size sizeof(xmlChar)); \
				2053	if (buffer == NULL) { \
				2054	perror("realloc failed"); \
				2055	return(NULL); \
				2056	} \
				2057	}
				2058
				2059	/**
				2060	* docbEntityLookup:
				2061	* @name: the entity name
				2062	*
				2063	* Lookup the given entity in EntitiesTable
				2064	*
				2065	* TODO: the linear scan is really ugly, an hash table is really needed.
				2066	*
				2067	* Returns the associated docbEntityDescPtr if found, NULL otherwise.
				2068	*/
				2069	static docbEntityDescPtr
				2070	docbEntityLookup(const xmlChar *name) {
				2071	unsigned int i;
				2072
				2073	for (i = 0;i < (sizeof(docbookEntitiesTable)/
				2074	sizeof(docbookEntitiesTable[0]));i++) {
				2075	if (xmlStrEqual(name, BAD_CAST docbookEntitiesTable[i].name)) {
				2076	#ifdef DEBUG
				2077	xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", name);
				2078	#endif
				2079	return(&docbookEntitiesTable[i]);
				2080	}
				2081	}
				2082	return(NULL);
				2083	}
				2084
				2085	/**
				2086	* docbEntityValueLookup:
				2087	* @value: the entity's unicode value
				2088	*
				2089	* Lookup the given entity in EntitiesTable
				2090	*
				2091	* TODO: the linear scan is really ugly, an hash table is really needed.
				2092	*
				2093	* Returns the associated docbEntityDescPtr if found, NULL otherwise.
				2094	*/
				2095	static docbEntityDescPtr
				2096	docbEntityValueLookup(int value) {
				2097	unsigned int i;
				2098	#ifdef DEBUG
				2099	int lv = 0;
				2100	#endif
				2101
				2102	for (i = 0;i < (sizeof(docbookEntitiesTable)/
				2103	sizeof(docbookEntitiesTable[0]));i++) {
				2104	if (docbookEntitiesTable[i].value >= value) {
				2105	if (docbookEntitiesTable[i].value > value)
				2106	break;
				2107	#ifdef DEBUG
				2108	xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", docbookEntitiesTable[i].name);
				2109	#endif
				2110	return(&docbookEntitiesTable[i]);
				2111	}
				2112	#ifdef DEBUG
				2113	if (lv > docbookEntitiesTable[i].value) {
				2114	xmlGenericError(xmlGenericErrorContext,
				2115	"docbookEntitiesTable[] is not sorted (%d > %d)!\n",
				2116	lv, docbookEntitiesTable[i].value);
				2117	}
				2118	lv = docbookEntitiesTable[i].value;
				2119	#endif
				2120	}
				2121	return(NULL);
				2122	}
				2123
				2124	#if 0
				2125	/**
				2126	* UTF8ToSgml:
				2127	* @out: a pointer to an array of bytes to store the result
				2128	* @outlen: the length of @out
				2129	* @in: a pointer to an array of UTF-8 chars
				2130	* @inlen: the length of @in
				2131	*
				2132	* Take a block of UTF-8 chars in and try to convert it to an ASCII
				2133	* plus SGML entities block of chars out.
				2134	*
				2135	* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
				2136	* The value of @inlen after return is the number of octets consumed
Daniel Veillard	cbaf399	2001-12-31 16:16:02 +0000	[diff] [blame]	2137	* as the return value is positive, else unpredictable.
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2138	* The value of @outlen after return is the number of octets consumed.
				2139	*/
				2140	int
				2141	UTF8ToSgml(unsigned char* out, int *outlen,
				2142	const unsigned char* in, int *inlen) {
				2143	const unsigned char* processed = in;
				2144	const unsigned char* outend;
				2145	const unsigned char* outstart = out;
				2146	const unsigned char* instart = in;
				2147	const unsigned char* inend;
				2148	unsigned int c, d;
				2149	int trailing;
				2150
				2151	if (in == NULL) {
				2152	/*
				2153	* initialization nothing to do
				2154	*/
				2155	*outlen = 0;
				2156	*inlen = 0;
				2157	return(0);
				2158	}
				2159	inend = in + (*inlen);
				2160	outend = out + (*outlen);
				2161	while (in < inend) {
				2162	d = *in++;
				2163	if (d < 0x80) { c= d; trailing= 0; }
				2164	else if (d < 0xC0) {
				2165	/* trailing byte in leading position */
				2166	*outlen = out - outstart;
				2167	*inlen = processed - instart;
				2168	return(-2);
				2169	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
				2170	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
				2171	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
				2172	else {
				2173	/* no chance for this in Ascii */
				2174	*outlen = out - outstart;
				2175	*inlen = processed - instart;
				2176	return(-2);
				2177	}
				2178
				2179	if (inend - in < trailing) {
				2180	break;
				2181	}
				2182
				2183	for ( ; trailing; trailing--) {
				2184	if ((in >= inend) \|\| (((d= *in++) & 0xC0) != 0x80))
				2185	break;
				2186	c <<= 6;
				2187	c \|= d & 0x3F;
				2188	}
				2189
				2190	/* assertion: c is a single UTF-4 value */
				2191	if (c < 0x80) {
				2192	if (out + 1 >= outend)
				2193	break;
				2194	*out++ = c;
				2195	} else {
				2196	int len;
				2197	docbEntityDescPtr ent;
				2198
				2199	/*
				2200	* Try to lookup a predefined SGML entity for it
				2201	*/
				2202
				2203	ent = docbEntityValueLookup(c);
				2204	if (ent == NULL) {
				2205	/* no chance for this in Ascii */
				2206	*outlen = out - outstart;
				2207	*inlen = processed - instart;
				2208	return(-2);
				2209	}
				2210	len = strlen(ent->name);
				2211	if (out + 2 + len >= outend)
				2212	break;
				2213	*out++ = '&';
				2214	memcpy(out, ent->name, len);
				2215	out += len;
				2216	*out++ = ';';
				2217	}
				2218	processed = in;
				2219	}
				2220	*outlen = out - outstart;
				2221	*inlen = processed - instart;
				2222	return(0);
				2223	}
				2224	#endif
				2225
				2226	/**
				2227	* docbEncodeEntities:
				2228	* @out: a pointer to an array of bytes to store the result
				2229	* @outlen: the length of @out
				2230	* @in: a pointer to an array of UTF-8 chars
				2231	* @inlen: the length of @in
				2232	* @quoteChar: the quote character to escape (' or ") or zero.
				2233	*
				2234	* Take a block of UTF-8 chars in and try to convert it to an ASCII
				2235	* plus SGML entities block of chars out.
				2236	*
				2237	* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
				2238	* The value of @inlen after return is the number of octets consumed
Daniel Veillard	cbaf399	2001-12-31 16:16:02 +0000	[diff] [blame]	2239	* as the return value is positive, else unpredictable.
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2240	* The value of @outlen after return is the number of octets consumed.
				2241	*/
				2242	int
				2243	docbEncodeEntities(unsigned char* out, int *outlen,
				2244	const unsigned char* in, int *inlen, int quoteChar) {
				2245	const unsigned char* processed = in;
				2246	const unsigned char* outend = out + (*outlen);
				2247	const unsigned char* outstart = out;
				2248	const unsigned char* instart = in;
				2249	const unsigned char* inend = in + (*inlen);
				2250	unsigned int c, d;
				2251	int trailing;
				2252
				2253	while (in < inend) {
				2254	d = *in++;
				2255	if (d < 0x80) { c= d; trailing= 0; }
				2256	else if (d < 0xC0) {
				2257	/* trailing byte in leading position */
				2258	*outlen = out - outstart;
				2259	*inlen = processed - instart;
				2260	return(-2);
				2261	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
				2262	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
				2263	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
				2264	else {
				2265	/* no chance for this in Ascii */
				2266	*outlen = out - outstart;
				2267	*inlen = processed - instart;
				2268	return(-2);
				2269	}
				2270
				2271	if (inend - in < trailing)
				2272	break;
				2273
				2274	while (trailing--) {
				2275	if (((d= *in++) & 0xC0) != 0x80) {
				2276	*outlen = out - outstart;
				2277	*inlen = processed - instart;
				2278	return(-2);
				2279	}
				2280	c <<= 6;
				2281	c \|= d & 0x3F;
				2282	}
				2283
				2284	/* assertion: c is a single UTF-4 value */
				2285	if (c < 0x80 && c != (unsigned int) quoteChar && c != '&' && c != '<' && c != '>') {
				2286	if (out >= outend)
				2287	break;
				2288	*out++ = c;
				2289	} else {
				2290	docbEntityDescPtr ent;
				2291	const char *cp;
				2292	char nbuf[16];
				2293	int len;
				2294
				2295	/*
				2296	* Try to lookup a predefined SGML entity for it
				2297	*/
				2298	ent = docbEntityValueLookup(c);
				2299	if (ent == NULL) {
				2300	sprintf(nbuf, "#%u", c);
				2301	cp = nbuf;
				2302	}
				2303	else
				2304	cp = ent->name;
				2305	len = strlen(cp);
				2306	if (out + 2 + len > outend)
				2307	break;
				2308	*out++ = '&';
				2309	memcpy(out, cp, len);
				2310	out += len;
				2311	*out++ = ';';
				2312	}
				2313	processed = in;
				2314	}
				2315	*outlen = out - outstart;
				2316	*inlen = processed - instart;
				2317	return(0);
				2318	}
				2319
				2320
				2321	/************************************************************************
				2322	* *
				2323	* Commodity functions to handle streams *
				2324	* *
				2325	************************************************************************/
				2326
				2327	/**
				2328	* docbNewInputStream:
				2329	* @ctxt: an SGML parser context
				2330	*
				2331	* Create a new input stream structure
				2332	* Returns the new input stream or NULL
				2333	*/
				2334	static docbParserInputPtr
				2335	docbNewInputStream(docbParserCtxtPtr ctxt) {
				2336	docbParserInputPtr input;
				2337
				2338	input = (xmlParserInputPtr) xmlMalloc(sizeof(docbParserInput));
				2339	if (input == NULL) {
				2340	ctxt->errNo = XML_ERR_NO_MEMORY;
				2341	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2342	ctxt->sax->error(ctxt->userData,
				2343	"malloc: couldn't allocate a new input stream\n");
				2344	return(NULL);
				2345	}
				2346	memset(input, 0, sizeof(docbParserInput));
				2347	input->filename = NULL;
				2348	input->directory = NULL;
				2349	input->base = NULL;
				2350	input->cur = NULL;
				2351	input->buf = NULL;
				2352	input->line = 1;
				2353	input->col = 1;
				2354	input->buf = NULL;
				2355	input->free = NULL;
				2356	input->version = NULL;
				2357	input->consumed = 0;
				2358	input->length = 0;
				2359	return(input);
				2360	}
				2361
				2362
				2363	/************************************************************************
				2364	* *
				2365	* Commodity functions, cleanup needed ? *
				2366	* *
				2367	************************************************************************/
				2368
				2369	/**
				2370	* areBlanks:
				2371	* @ctxt: an SGML parser context
				2372	* @str: a xmlChar *
				2373	* @len: the size of @str
				2374	*
				2375	* Is this a sequence of blank chars that one can ignore ?
				2376	*
				2377	* Returns 1 if ignorable 0 otherwise.
				2378	*/
				2379
				2380	static int areBlanks(docbParserCtxtPtr ctxt, const xmlChar *str, int len) {
				2381	int i;
				2382	xmlNodePtr lastChild;
				2383
				2384	for (i = 0;i < len;i++)
				2385	if (!(IS_BLANK(str[i]))) return(0);
				2386
				2387	if (CUR == 0) return(1);
				2388	if (CUR != '<') return(0);
				2389	if (ctxt->name == NULL)
				2390	return(1);
				2391	if (ctxt->node == NULL) return(0);
				2392	lastChild = xmlGetLastChild(ctxt->node);
				2393	if (lastChild == NULL) {
Daniel Veillard	7db3773	2001-07-12 01:20:08 +0000	[diff] [blame]	2394	if ((ctxt->node->type != XML_ELEMENT_NODE) &&
				2395	(ctxt->node->content != NULL)) return(0);
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2396	} else if (xmlNodeIsText(lastChild))
				2397	return(0);
				2398	return(1);
				2399	}
				2400
				2401	/************************************************************************
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2402	* *
				2403	* External entities support *
				2404	* *
				2405	************************************************************************/
				2406
				2407	/**
				2408	* docbParseCtxtExternalEntity:
				2409	* @ctx: the existing parsing context
				2410	* @URL: the URL for the entity to load
				2411	* @ID: the System ID for the entity to load
				2412	* @list: the return value for the set of parsed nodes
				2413	*
				2414	* Parse an external general entity within an existing parsing context
				2415	*
				2416	* Returns 0 if the entity is well formed, -1 in case of args problem and
				2417	* the parser error code otherwise
				2418	*/
				2419
				2420	static int
				2421	docbParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
				2422	const xmlChar ID, xmlNodePtr list) {
				2423	xmlParserCtxtPtr ctxt;
				2424	xmlDocPtr newDoc;
				2425	xmlSAXHandlerPtr oldsax = NULL;
				2426	int ret = 0;
				2427
				2428	if (ctx->depth > 40) {
				2429	return(XML_ERR_ENTITY_LOOP);
				2430	}
				2431
				2432	if (list != NULL)
				2433	*list = NULL;
				2434	if ((URL == NULL) && (ID == NULL))
				2435	return(-1);
				2436	if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
				2437	return(-1);
				2438
				2439
				2440	ctxt = xmlCreateEntityParserCtxt(URL, ID, ctx->myDoc->URL);
				2441	if (ctxt == NULL) return(-1);
				2442	ctxt->userData = ctxt;
				2443	oldsax = ctxt->sax;
				2444	ctxt->sax = ctx->sax;
				2445	newDoc = xmlNewDoc(BAD_CAST "1.0");
				2446	if (newDoc == NULL) {
				2447	xmlFreeParserCtxt(ctxt);
				2448	return(-1);
				2449	}
				2450	if (ctx->myDoc != NULL) {
				2451	newDoc->intSubset = ctx->myDoc->intSubset;
				2452	newDoc->extSubset = ctx->myDoc->extSubset;
				2453	}
				2454	if (ctx->myDoc->URL != NULL) {
				2455	newDoc->URL = xmlStrdup(ctx->myDoc->URL);
				2456	}
				2457	newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
				2458	if (newDoc->children == NULL) {
				2459	ctxt->sax = oldsax;
				2460	xmlFreeParserCtxt(ctxt);
				2461	newDoc->intSubset = NULL;
				2462	newDoc->extSubset = NULL;
				2463	xmlFreeDoc(newDoc);
				2464	return(-1);
				2465	}
				2466	nodePush(ctxt, newDoc->children);
				2467	if (ctx->myDoc == NULL) {
				2468	ctxt->myDoc = newDoc;
				2469	} else {
				2470	ctxt->myDoc = ctx->myDoc;
				2471	newDoc->children->doc = ctx->myDoc;
				2472	}
				2473
				2474	/*
				2475	* Parse a possible text declaration first
				2476	*/
				2477	GROW;
				2478	if ((RAW == '<') && (NXT(1) == '?') &&
				2479	(NXT(2) == 'x') && (NXT(3) == 'm') &&
				2480	(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
				2481	xmlParseTextDecl(ctxt);
				2482	}
				2483
				2484	/*
				2485	* Doing validity checking on chunk doesn't make sense
				2486	*/
				2487	ctxt->instate = XML_PARSER_CONTENT;
				2488	ctxt->validate = ctx->validate;
				2489	ctxt->loadsubset = ctx->loadsubset;
				2490	ctxt->depth = ctx->depth + 1;
				2491	ctxt->replaceEntities = ctx->replaceEntities;
				2492	if (ctxt->validate) {
				2493	ctxt->vctxt.error = ctx->vctxt.error;
				2494	ctxt->vctxt.warning = ctx->vctxt.warning;
				2495	/* Allocate the Node stack */
				2496	ctxt->vctxt.nodeTab = (xmlNodePtr ) xmlMalloc(4 sizeof(xmlNodePtr));
				2497	if (ctxt->vctxt.nodeTab == NULL) {
				2498	xmlGenericError(xmlGenericErrorContext,
				2499	"docbParseCtxtExternalEntity: out of memory\n");
				2500	ctxt->validate = 0;
				2501	ctxt->vctxt.error = NULL;
				2502	ctxt->vctxt.warning = NULL;
				2503	} else {
				2504	ctxt->vctxt.nodeNr = 0;
				2505	ctxt->vctxt.nodeMax = 4;
				2506	ctxt->vctxt.node = NULL;
				2507	}
				2508	} else {
				2509	ctxt->vctxt.error = NULL;
				2510	ctxt->vctxt.warning = NULL;
				2511	}
				2512
				2513	docbParseContent(ctxt);
				2514
				2515	if ((RAW == '<') && (NXT(1) == '/')) {
				2516	ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
				2517	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2518	ctxt->sax->error(ctxt->userData,
				2519	"chunk is not well balanced\n");
				2520	ctxt->wellFormed = 0;
				2521	ctxt->disableSAX = 1;
				2522	} else if (RAW != 0) {
				2523	ctxt->errNo = XML_ERR_EXTRA_CONTENT;
				2524	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2525	ctxt->sax->error(ctxt->userData,
				2526	"extra content at the end of well balanced chunk\n");
				2527	ctxt->wellFormed = 0;
				2528	ctxt->disableSAX = 1;
				2529	}
				2530	if (ctxt->node != newDoc->children) {
				2531	ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
				2532	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2533	ctxt->sax->error(ctxt->userData,
				2534	"chunk is not well balanced\n");
				2535	ctxt->wellFormed = 0;
				2536	ctxt->disableSAX = 1;
				2537	}
				2538
				2539	if (!ctxt->wellFormed) {
				2540	if (ctxt->errNo == 0)
				2541	ret = 1;
				2542	else
				2543	ret = ctxt->errNo;
				2544	} else {
				2545	if (list != NULL) {
				2546	xmlNodePtr cur;
				2547
				2548	/*
				2549	* Return the newly created nodeset after unlinking it from
				2550	* they pseudo parent.
				2551	*/
				2552	cur = newDoc->children->children;
				2553	*list = cur;
				2554	while (cur != NULL) {
				2555	cur->parent = NULL;
				2556	cur = cur->next;
				2557	}
				2558	newDoc->children->children = NULL;
				2559	}
				2560	ret = 0;
				2561	}
				2562	ctxt->sax = oldsax;
				2563	xmlFreeParserCtxt(ctxt);
				2564	newDoc->intSubset = NULL;
				2565	newDoc->extSubset = NULL;
				2566	xmlFreeDoc(newDoc);
				2567
				2568	return(ret);
				2569	}
				2570
				2571	/************************************************************************
				2572	* *
				2573	* The parser itself *
				2574	* *
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2575	************************************************************************/
				2576
				2577	/**
				2578	* docbParseSGMLName:
				2579	* @ctxt: an SGML parser context
				2580	*
				2581	* parse an SGML tag or attribute name, note that we convert it to lowercase
				2582	* since SGML names are not case-sensitive.
				2583	*
				2584	* Returns the Tag Name parsed or NULL
				2585	*/
				2586
				2587	static xmlChar *
				2588	docbParseSGMLName(docbParserCtxtPtr ctxt) {
				2589	xmlChar *ret = NULL;
				2590	int i = 0;
				2591	xmlChar loc[DOCB_PARSER_BUFFER_SIZE];
				2592
				2593	if (!IS_LETTER(CUR) && (CUR != '_') &&
				2594	(CUR != ':')) return(NULL);
				2595
				2596	while ((i < DOCB_PARSER_BUFFER_SIZE) &&
				2597	((IS_LETTER(CUR)) \|\| (IS_DIGIT(CUR)) \|\|
				2598	(CUR == ':') \|\| (CUR == '_'))) {
				2599	if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
				2600	else loc[i] = CUR;
				2601	i++;
				2602
				2603	NEXT;
				2604	}
				2605
				2606	ret = xmlStrndup(loc, i);
				2607
				2608	return(ret);
				2609	}
				2610
				2611	/**
				2612	* docbParseName:
				2613	* @ctxt: an SGML parser context
				2614	*
Daniel Veillard	cbaf399	2001-12-31 16:16:02 +0000	[diff] [blame]	2615	* parse an SGML name, this routine is case sensitive.
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2616	*
				2617	* Returns the Name parsed or NULL
				2618	*/
				2619
				2620	static xmlChar *
				2621	docbParseName(docbParserCtxtPtr ctxt) {
				2622	xmlChar buf[DOCB_MAX_NAMELEN];
				2623	int len = 0;
				2624
				2625	GROW;
				2626	if (!IS_LETTER(CUR) && (CUR != '_')) {
				2627	return(NULL);
				2628	}
				2629
				2630	while ((IS_LETTER(CUR)) \|\| (IS_DIGIT(CUR)) \|\|
				2631	(CUR == '.') \|\| (CUR == '-') \|\|
				2632	(CUR == '_') \|\| (CUR == ':') \|\|
				2633	(IS_COMBINING(CUR)) \|\|
				2634	(IS_EXTENDER(CUR))) {
				2635	buf[len++] = CUR;
				2636	NEXT;
				2637	if (len >= DOCB_MAX_NAMELEN) {
				2638	xmlGenericError(xmlGenericErrorContext,
				2639	"docbParseName: reached DOCB_MAX_NAMELEN limit\n");
				2640	while ((IS_LETTER(CUR)) \|\| (IS_DIGIT(CUR)) \|\|
				2641	(CUR == '.') \|\| (CUR == '-') \|\|
				2642	(CUR == '_') \|\| (CUR == ':') \|\|
				2643	(IS_COMBINING(CUR)) \|\|
				2644	(IS_EXTENDER(CUR)))
				2645	NEXT;
				2646	break;
				2647	}
				2648	}
				2649	return(xmlStrndup(buf, len));
				2650	}
				2651
				2652	/**
				2653	* docbParseSGMLAttribute:
				2654	* @ctxt: an SGML parser context
				2655	* @stop: a char stop value
				2656	*
				2657	* parse an SGML attribute value till the stop (quote), if
				2658	* stop is 0 then it stops at the first space
				2659	*
				2660	* Returns the attribute parsed or NULL
				2661	*/
				2662
				2663	static xmlChar *
				2664	docbParseSGMLAttribute(docbParserCtxtPtr ctxt, const xmlChar stop) {
				2665	xmlChar *buffer = NULL;
				2666	int buffer_size = 0;
				2667	xmlChar *out = NULL;
				2668	xmlChar *name = NULL;
				2669
				2670	xmlChar *cur = NULL;
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2671	xmlEntityPtr xent;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2672	docbEntityDescPtr ent;
				2673
				2674	/*
				2675	* allocate a translation buffer.
				2676	*/
				2677	buffer_size = DOCB_PARSER_BIG_BUFFER_SIZE;
				2678	buffer = (xmlChar ) xmlMalloc(buffer_size sizeof(xmlChar));
				2679	if (buffer == NULL) {
				2680	perror("docbParseSGMLAttribute: malloc failed");
				2681	return(NULL);
				2682	}
				2683	out = buffer;
				2684
				2685	/*
				2686	* Ok loop until we reach one of the ending chars
				2687	*/
				2688	while ((CUR != 0) && (CUR != stop) && (CUR != '>')) {
				2689	if ((stop == 0) && (IS_BLANK(CUR))) break;
				2690	if (CUR == '&') {
				2691	if (NXT(1) == '#') {
				2692	unsigned int c;
				2693	int bits;
				2694
				2695	c = docbParseCharRef(ctxt);
				2696	if (c < 0x80)
				2697	{ *out++ = c; bits= -6; }
				2698	else if (c < 0x800)
				2699	{ *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				2700	else if (c < 0x10000)
				2701	{ *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				2702	else
				2703	{ *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				2704
				2705	for ( ; bits >= 0; bits-= 6) {
				2706	*out++ = ((c >> bits) & 0x3F) \| 0x80;
				2707	}
				2708	} else {
Daniel Veillard	c057c5d	2001-05-02 12:41:24 +0000	[diff] [blame]	2709	xent = docbParseEntityRef(ctxt, &name);
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2710	if (name == NULL) {
				2711	*out++ = '&';
				2712	if (out - buffer > buffer_size - 100) {
				2713	int indx = out - buffer;
				2714
				2715	growBuffer(buffer);
				2716	out = &buffer[indx];
				2717	}
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2718	*out++ = '&';
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2719	} else {
Daniel Veillard	c057c5d	2001-05-02 12:41:24 +0000	[diff] [blame]	2720	ent = docbEntityLookup(name);
				2721	if (ent == NULL) {
				2722	*out++ = '&';
				2723	cur = name;
				2724	while (*cur != 0) {
				2725	if (out - buffer > buffer_size - 100) {
				2726	int indx = out - buffer;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2727
Daniel Veillard	c057c5d	2001-05-02 12:41:24 +0000	[diff] [blame]	2728	growBuffer(buffer);
				2729	out = &buffer[indx];
				2730	}
				2731	out++ = cur++;
				2732	}
				2733	xmlFree(name);
				2734	} else {
				2735	unsigned int c;
				2736	int bits;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2737
Daniel Veillard	c057c5d	2001-05-02 12:41:24 +0000	[diff] [blame]	2738	if (out - buffer > buffer_size - 100) {
				2739	int indx = out - buffer;
				2740
				2741	growBuffer(buffer);
				2742	out = &buffer[indx];
				2743	}
				2744	c = (xmlChar)ent->value;
				2745	if (c < 0x80)
				2746	{ *out++ = c; bits= -6; }
				2747	else if (c < 0x800)
				2748	{ *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				2749	else if (c < 0x10000)
				2750	{ *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				2751	else
				2752	{ *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				2753
				2754	for ( ; bits >= 0; bits-= 6) {
				2755	*out++ = ((c >> bits) & 0x3F) \| 0x80;
				2756	}
				2757	xmlFree(name);
				2758	}
				2759	}
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2760	}
				2761	} else {
				2762	unsigned int c;
				2763	int bits;
				2764
				2765	if (out - buffer > buffer_size - 100) {
				2766	int indx = out - buffer;
				2767
				2768	growBuffer(buffer);
				2769	out = &buffer[indx];
				2770	}
				2771	c = CUR;
				2772	if (c < 0x80)
				2773	{ *out++ = c; bits= -6; }
				2774	else if (c < 0x800)
				2775	{ *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				2776	else if (c < 0x10000)
				2777	{ *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				2778	else
				2779	{ *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				2780
				2781	for ( ; bits >= 0; bits-= 6) {
				2782	*out++ = ((c >> bits) & 0x3F) \| 0x80;
				2783	}
				2784	NEXT;
				2785	}
				2786	}
				2787	*out++ = 0;
				2788	return(buffer);
				2789	}
				2790
				2791
				2792	/**
				2793	* docbParseEntityRef:
				2794	* @ctxt: an SGML parser context
				2795	* @str: location to store the entity name
				2796	*
				2797	* parse an SGML ENTITY references
				2798	*
				2799	* [68] EntityRef ::= '&' Name ';'
				2800	*
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2801	* Returns the associated xmlEntityPtr if found, or NULL otherwise,
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2802	* if non-NULL *str will have to be freed by the caller.
				2803	*/
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2804	static xmlEntityPtr
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2805	docbParseEntityRef(docbParserCtxtPtr ctxt, xmlChar **str) {
				2806	xmlChar *name;
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2807	xmlEntityPtr ent = NULL;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2808	*str = NULL;
				2809
				2810	if (CUR == '&') {
				2811	NEXT;
				2812	name = docbParseName(ctxt);
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2813	if (name == NULL) {
				2814	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2815	ctxt->sax->error(ctxt->userData,
				2816	"docbParseEntityRef: no name\n");
				2817	ctxt->wellFormed = 0;
				2818	} else {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2819	GROW;
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2820	if (CUR == ';') {
				2821	*str = name;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2822
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2823	/*
				2824	* Ask first SAX for entity resolution, otherwise try the
				2825	* predefined set.
				2826	*/
				2827	if (ctxt->sax != NULL) {
				2828	if (ctxt->sax->getEntity != NULL)
				2829	ent = ctxt->sax->getEntity(ctxt->userData, name);
				2830	if (ent == NULL)
				2831	ent = xmlGetPredefinedEntity(name);
				2832	}
				2833	NEXT;
				2834	} else {
				2835	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2836	ctxt->sax->error(ctxt->userData,
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2837	"docbParseEntityRef: expecting ';'\n");
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	2838	*str = name;
				2839	}
				2840	}
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	2841	}
				2842	return(ent);
				2843	}
				2844
				2845	/**
				2846	* docbParseAttValue:
				2847	* @ctxt: an SGML parser context
				2848	*
				2849	* parse a value for an attribute
				2850	* Note: the parser won't do substitution of entities here, this
				2851	* will be handled later in xmlStringGetNodeList, unless it was
				2852	* asked for ctxt->replaceEntities != 0
				2853	*
				2854	* Returns the AttValue parsed or NULL.
				2855	*/
				2856
				2857	static xmlChar *
				2858	docbParseAttValue(docbParserCtxtPtr ctxt) {
				2859	xmlChar *ret = NULL;
				2860
				2861	if (CUR == '"') {
				2862	NEXT;
				2863	ret = docbParseSGMLAttribute(ctxt, '"');
				2864	if (CUR != '"') {
				2865	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2866	ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
				2867	ctxt->wellFormed = 0;
				2868	} else
				2869	NEXT;
				2870	} else if (CUR == '\'') {
				2871	NEXT;
				2872	ret = docbParseSGMLAttribute(ctxt, '\'');
				2873	if (CUR != '\'') {
				2874	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2875	ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
				2876	ctxt->wellFormed = 0;
				2877	} else
				2878	NEXT;
				2879	} else {
				2880	/*
				2881	* That's an SGMLism, the attribute value may not be quoted
				2882	*/
				2883	ret = docbParseSGMLAttribute(ctxt, 0);
				2884	if (ret == NULL) {
				2885	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2886	ctxt->sax->error(ctxt->userData, "AttValue: no value found\n");
				2887	ctxt->wellFormed = 0;
				2888	}
				2889	}
				2890	return(ret);
				2891	}
				2892
				2893	/**
				2894	* docbParseSystemLiteral:
				2895	* @ctxt: an SGML parser context
				2896	*
				2897	* parse an SGML Literal
				2898	*
				2899	* [11] SystemLiteral ::= ('"' [^"]* '"') \| ("'" [^']* "'")
				2900	*
				2901	* Returns the SystemLiteral parsed or NULL
				2902	*/
				2903
				2904	static xmlChar *
				2905	docbParseSystemLiteral(docbParserCtxtPtr ctxt) {
				2906	const xmlChar *q;
				2907	xmlChar *ret = NULL;
				2908
				2909	if (CUR == '"') {
				2910	NEXT;
				2911	q = CUR_PTR;
				2912	while ((IS_CHAR(CUR)) && (CUR != '"'))
				2913	NEXT;
				2914	if (!IS_CHAR(CUR)) {
				2915	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2916	ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
				2917	ctxt->wellFormed = 0;
				2918	} else {
				2919	ret = xmlStrndup(q, CUR_PTR - q);
				2920	NEXT;
				2921	}
				2922	} else if (CUR == '\'') {
				2923	NEXT;
				2924	q = CUR_PTR;
				2925	while ((IS_CHAR(CUR)) && (CUR != '\''))
				2926	NEXT;
				2927	if (!IS_CHAR(CUR)) {
				2928	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2929	ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
				2930	ctxt->wellFormed = 0;
				2931	} else {
				2932	ret = xmlStrndup(q, CUR_PTR - q);
				2933	NEXT;
				2934	}
				2935	} else {
				2936	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2937	ctxt->sax->error(ctxt->userData,
				2938	"SystemLiteral \" or ' expected\n");
				2939	ctxt->wellFormed = 0;
				2940	}
				2941
				2942	return(ret);
				2943	}
				2944
				2945	/**
				2946	* docbParsePubidLiteral:
				2947	* @ctxt: an SGML parser context
				2948	*
				2949	* parse an SGML public literal
				2950	*
				2951	* [12] PubidLiteral ::= '"' PubidChar* '"' \| "'" (PubidChar - "'")* "'"
				2952	*
				2953	* Returns the PubidLiteral parsed or NULL.
				2954	*/
				2955
				2956	static xmlChar *
				2957	docbParsePubidLiteral(docbParserCtxtPtr ctxt) {
				2958	const xmlChar *q;
				2959	xmlChar *ret = NULL;
				2960	/*
				2961	* Name ::= (Letter \| '_') (NameChar)*
				2962	*/
				2963	if (CUR == '"') {
				2964	NEXT;
				2965	q = CUR_PTR;
				2966	while (IS_PUBIDCHAR(CUR)) NEXT;
				2967	if (CUR != '"') {
				2968	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2969	ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
				2970	ctxt->wellFormed = 0;
				2971	} else {
				2972	ret = xmlStrndup(q, CUR_PTR - q);
				2973	NEXT;
				2974	}
				2975	} else if (CUR == '\'') {
				2976	NEXT;
				2977	q = CUR_PTR;
				2978	while ((IS_LETTER(CUR)) && (CUR != '\''))
				2979	NEXT;
				2980	if (!IS_LETTER(CUR)) {
				2981	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2982	ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
				2983	ctxt->wellFormed = 0;
				2984	} else {
				2985	ret = xmlStrndup(q, CUR_PTR - q);
				2986	NEXT;
				2987	}
				2988	} else {
				2989	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				2990	ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
				2991	ctxt->wellFormed = 0;
				2992	}
				2993
				2994	return(ret);
				2995	}
				2996
				2997	/**
				2998	* docbParseCharData:
				2999	* @ctxt: an SGML parser context
				3000	* @cdata: int indicating whether we are within a CDATA section
				3001	*
				3002	* parse a CharData section.
				3003	* if we are within a CDATA section ']]>' marks an end of section.
				3004	*
				3005	* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
				3006	*/
				3007
				3008	static void
				3009	docbParseCharData(docbParserCtxtPtr ctxt) {
				3010	xmlChar buf[DOCB_PARSER_BIG_BUFFER_SIZE + 5];
				3011	int nbchar = 0;
				3012	int cur, l;
				3013
				3014	SHRINK;
				3015	cur = CUR_CHAR(l);
				3016	while (((cur != '<') \|\| (ctxt->token == '<')) &&
				3017	((cur != '&') \|\| (ctxt->token == '&')) &&
				3018	(IS_CHAR(cur))) {
				3019	COPY_BUF(l,buf,nbchar,cur);
				3020	if (nbchar >= DOCB_PARSER_BIG_BUFFER_SIZE) {
				3021	/*
				3022	* Ok the segment is to be consumed as chars.
				3023	*/
				3024	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
				3025	if (areBlanks(ctxt, buf, nbchar)) {
				3026	if (ctxt->sax->ignorableWhitespace != NULL)
				3027	ctxt->sax->ignorableWhitespace(ctxt->userData,
				3028	buf, nbchar);
				3029	} else {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3030	if (ctxt->sax->characters != NULL)
				3031	ctxt->sax->characters(ctxt->userData, buf, nbchar);
				3032	}
				3033	}
				3034	nbchar = 0;
				3035	}
				3036	NEXTL(l);
				3037	cur = CUR_CHAR(l);
				3038	}
				3039	if (nbchar != 0) {
				3040	/*
				3041	* Ok the segment is to be consumed as chars.
				3042	*/
				3043	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
				3044	if (areBlanks(ctxt, buf, nbchar)) {
				3045	if (ctxt->sax->ignorableWhitespace != NULL)
				3046	ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
				3047	} else {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3048	if (ctxt->sax->characters != NULL)
				3049	ctxt->sax->characters(ctxt->userData, buf, nbchar);
				3050	}
				3051	}
				3052	}
				3053	}
				3054
				3055	/**
				3056	* docbParseExternalID:
				3057	* @ctxt: an SGML parser context
				3058	* @publicID: a xmlChar** receiving PubidLiteral
				3059	*
				3060	* Parse an External ID or a Public ID
				3061	*
				3062	* Returns the function returns SystemLiteral and in the second
				3063	* case publicID receives PubidLiteral,
				3064	* it is possible to return NULL and have publicID set.
				3065	*/
				3066
				3067	static xmlChar *
				3068	docbParseExternalID(docbParserCtxtPtr ctxt, xmlChar **publicID) {
				3069	xmlChar *URI = NULL;
				3070
				3071	if ((UPPER == 'S') && (UPP(1) == 'Y') &&
				3072	(UPP(2) == 'S') && (UPP(3) == 'T') &&
				3073	(UPP(4) == 'E') && (UPP(5) == 'M')) {
				3074	SKIP(6);
				3075	if (!IS_BLANK(CUR)) {
				3076	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3077	ctxt->sax->error(ctxt->userData,
				3078	"Space required after 'SYSTEM'\n");
				3079	ctxt->wellFormed = 0;
				3080	}
				3081	SKIP_BLANKS;
				3082	URI = docbParseSystemLiteral(ctxt);
				3083	if (URI == NULL) {
				3084	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3085	ctxt->sax->error(ctxt->userData,
				3086	"docbParseExternalID: SYSTEM, no URI\n");
				3087	ctxt->wellFormed = 0;
				3088	}
				3089	} else if ((UPPER == 'P') && (UPP(1) == 'U') &&
				3090	(UPP(2) == 'B') && (UPP(3) == 'L') &&
				3091	(UPP(4) == 'I') && (UPP(5) == 'C')) {
				3092	SKIP(6);
				3093	if (!IS_BLANK(CUR)) {
				3094	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3095	ctxt->sax->error(ctxt->userData,
				3096	"Space required after 'PUBLIC'\n");
				3097	ctxt->wellFormed = 0;
				3098	}
				3099	SKIP_BLANKS;
				3100	*publicID = docbParsePubidLiteral(ctxt);
				3101	if (*publicID == NULL) {
				3102	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3103	ctxt->sax->error(ctxt->userData,
				3104	"docbParseExternalID: PUBLIC, no Public Identifier\n");
				3105	ctxt->wellFormed = 0;
				3106	}
				3107	SKIP_BLANKS;
				3108	if ((CUR == '"') \|\| (CUR == '\'')) {
				3109	URI = docbParseSystemLiteral(ctxt);
				3110	}
				3111	}
				3112	return(URI);
				3113	}
				3114
				3115	/**
Daniel Veillard	e95e239	2001-06-06 10:46:28 +0000	[diff] [blame]	3116	* docbParsePI:
				3117	* @ctxt: an XML parser context
				3118	*
				3119	* parse an XML Processing Instruction.
				3120	*
				3121	* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
				3122	*
				3123	* The processing is transfered to SAX once parsed.
				3124	*/
				3125
				3126	static void
				3127	docbParsePI(xmlParserCtxtPtr ctxt) {
				3128	xmlChar *buf = NULL;
				3129	int len = 0;
				3130	int size = DOCB_PARSER_BUFFER_SIZE;
				3131	int cur, l;
				3132	xmlChar *target;
				3133	xmlParserInputState state;
				3134	int count = 0;
				3135
				3136	if ((RAW == '<') && (NXT(1) == '?')) {
				3137	xmlParserInputPtr input = ctxt->input;
				3138	state = ctxt->instate;
				3139	ctxt->instate = XML_PARSER_PI;
				3140	/*
				3141	* this is a Processing Instruction.
				3142	*/
				3143	SKIP(2);
				3144	SHRINK;
				3145
				3146	/*
				3147	* Parse the target name and check for special support like
				3148	* namespace.
				3149	*/
				3150	target = xmlParseName(ctxt);
				3151	if (target != NULL) {
				3152	xmlChar *encoding = NULL;
				3153
				3154	if ((RAW == '?') && (NXT(1) == '>')) {
				3155	if (input != ctxt->input) {
				3156	ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
				3157	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3158	ctxt->sax->error(ctxt->userData,
				3159	"PI declaration doesn't start and stop in the same entity\n");
				3160	ctxt->wellFormed = 0;
				3161	ctxt->disableSAX = 1;
				3162	}
				3163	SKIP(2);
				3164
				3165	/*
				3166	* SAX: PI detected.
				3167	*/
				3168	if ((ctxt->sax) && (!ctxt->disableSAX) &&
				3169	(ctxt->sax->processingInstruction != NULL))
				3170	ctxt->sax->processingInstruction(ctxt->userData,
				3171	target, NULL);
				3172	ctxt->instate = state;
				3173	xmlFree(target);
				3174	return;
				3175	}
				3176	if (xmlStrEqual(target, BAD_CAST "sgml-declaration")) {
				3177
				3178	encoding = xmlParseEncodingDecl(ctxt);
				3179	if (encoding == NULL) {
				3180	xmlGenericError(xmlGenericErrorContext,
				3181	"sgml-declaration: failed to find/handle encoding\n");
				3182	#ifdef DEBUG
				3183	} else {
				3184	xmlGenericError(xmlGenericErrorContext,
				3185	"switched to encoding %s\n", encoding);
				3186	#endif
				3187	}
				3188
				3189	}
				3190	buf = (xmlChar ) xmlMalloc(size sizeof(xmlChar));
				3191	if (buf == NULL) {
				3192	xmlGenericError(xmlGenericErrorContext,
				3193	"malloc of %d byte failed\n", size);
				3194	ctxt->instate = state;
				3195	return;
				3196	}
				3197	cur = CUR;
				3198	if (encoding != NULL) {
				3199	len = snprintf((char *) buf, size - 1,
				3200	" encoding = \"%s\"", encoding);
				3201	if (len < 0)
				3202	len = size;
				3203	} else {
				3204	if (!IS_BLANK(cur)) {
				3205	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				3206	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3207	ctxt->sax->error(ctxt->userData,
				3208	"docbParsePI: PI %s space expected\n", target);
				3209	ctxt->wellFormed = 0;
				3210	ctxt->disableSAX = 1;
				3211	}
				3212	SKIP_BLANKS;
				3213	}
				3214	cur = CUR_CHAR(l);
				3215	while (IS_CHAR(cur) && /* checked */
				3216	((cur != '?') \|\| (NXT(1) != '>'))) {
				3217	if (len + 5 >= size) {
				3218	size *= 2;
				3219	buf = (xmlChar ) xmlRealloc(buf, size sizeof(xmlChar));
				3220	if (buf == NULL) {
				3221	xmlGenericError(xmlGenericErrorContext,
				3222	"realloc of %d byte failed\n", size);
				3223	ctxt->instate = state;
				3224	return;
				3225	}
				3226	}
				3227	count++;
				3228	if (count > 50) {
				3229	GROW;
				3230	count = 0;
				3231	}
				3232	COPY_BUF(l,buf,len,cur);
				3233	NEXTL(l);
				3234	cur = CUR_CHAR(l);
				3235	if (cur == 0) {
				3236	SHRINK;
				3237	GROW;
				3238	cur = CUR_CHAR(l);
				3239	}
				3240	}
				3241	buf[len] = 0;
				3242	if (cur != '?') {
				3243	ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
				3244	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3245	ctxt->sax->error(ctxt->userData,
				3246	"docbParsePI: PI %s never end ...\n", target);
				3247	ctxt->wellFormed = 0;
				3248	ctxt->disableSAX = 1;
				3249	} else {
				3250	if (input != ctxt->input) {
				3251	ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
				3252	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3253	ctxt->sax->error(ctxt->userData,
				3254	"PI declaration doesn't start and stop in the same entity\n");
				3255	ctxt->wellFormed = 0;
				3256	ctxt->disableSAX = 1;
				3257	}
				3258	SKIP(2);
				3259
				3260	/*
				3261	* SAX: PI detected.
				3262	*/
				3263	if ((ctxt->sax) && (!ctxt->disableSAX) &&
				3264	(ctxt->sax->processingInstruction != NULL))
				3265	ctxt->sax->processingInstruction(ctxt->userData,
				3266	target, buf);
				3267	}
				3268	xmlFree(buf);
				3269	xmlFree(target);
				3270	} else {
				3271	ctxt->errNo = XML_ERR_PI_NOT_STARTED;
				3272	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3273	ctxt->sax->error(ctxt->userData,
				3274	"docbParsePI : no target name\n");
				3275	ctxt->wellFormed = 0;
				3276	ctxt->disableSAX = 1;
				3277	}
				3278	ctxt->instate = state;
				3279	}
				3280	}
				3281
				3282	/**
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3283	* docbParseComment:
				3284	* @ctxt: an SGML parser context
				3285	*
				3286	* Parse an XML (SGML) comment <!-- .... -->
				3287	*
				3288	* [15] Comment ::= '<!--' ((Char - '-') \| ('-' (Char - '-')))* '-->'
				3289	*/
				3290	static void
				3291	docbParseComment(docbParserCtxtPtr ctxt) {
				3292	xmlChar *buf = NULL;
				3293	int len;
				3294	int size = DOCB_PARSER_BUFFER_SIZE;
				3295	int q, ql;
				3296	int r, rl;
				3297	int cur, l;
				3298	xmlParserInputState state;
				3299
				3300	/*
				3301	* Check that there is a comment right here.
				3302	*/
				3303	if ((RAW != '<') \|\| (NXT(1) != '!') \|\|
				3304	(NXT(2) != '-') \|\| (NXT(3) != '-')) return;
				3305
				3306	state = ctxt->instate;
				3307	ctxt->instate = XML_PARSER_COMMENT;
				3308	SHRINK;
				3309	SKIP(4);
				3310	buf = (xmlChar ) xmlMalloc(size sizeof(xmlChar));
				3311	if (buf == NULL) {
				3312	xmlGenericError(xmlGenericErrorContext,
				3313	"malloc of %d byte failed\n", size);
				3314	ctxt->instate = state;
				3315	return;
				3316	}
				3317	q = CUR_CHAR(ql);
				3318	NEXTL(ql);
				3319	r = CUR_CHAR(rl);
				3320	NEXTL(rl);
				3321	cur = CUR_CHAR(l);
				3322	len = 0;
				3323	while (IS_CHAR(cur) &&
				3324	((cur != '>') \|\|
				3325	(r != '-') \|\| (q != '-'))) {
				3326	if (len + 5 >= size) {
				3327	size *= 2;
				3328	buf = (xmlChar ) xmlRealloc(buf, size sizeof(xmlChar));
				3329	if (buf == NULL) {
				3330	xmlGenericError(xmlGenericErrorContext,
				3331	"realloc of %d byte failed\n", size);
				3332	ctxt->instate = state;
				3333	return;
				3334	}
				3335	}
				3336	COPY_BUF(ql,buf,len,q);
				3337	q = r;
				3338	ql = rl;
				3339	r = cur;
				3340	rl = l;
				3341	NEXTL(l);
				3342	cur = CUR_CHAR(l);
				3343	if (cur == 0) {
				3344	SHRINK;
				3345	GROW;
				3346	cur = CUR_CHAR(l);
				3347	}
				3348	}
				3349	buf[len] = 0;
				3350	if (!IS_CHAR(cur)) {
				3351	ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
				3352	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3353	ctxt->sax->error(ctxt->userData,
				3354	"Comment not terminated \n<!--%.50s\n", buf);
				3355	ctxt->wellFormed = 0;
				3356	xmlFree(buf);
				3357	} else {
				3358	NEXT;
				3359	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
				3360	(!ctxt->disableSAX))
				3361	ctxt->sax->comment(ctxt->userData, buf);
				3362	xmlFree(buf);
				3363	}
				3364	ctxt->instate = state;
				3365	}
				3366
				3367	/**
				3368	* docbParseCharRef:
				3369	* @ctxt: an SGML parser context
				3370	*
				3371	* parse Reference declarations
				3372	*
				3373	* [66] CharRef ::= '&#' [0-9]+ ';' \|
				3374	* '&#x' [0-9a-fA-F]+ ';'
				3375	*
				3376	* Returns the value parsed (as an int)
				3377	*/
				3378	static int
				3379	docbParseCharRef(docbParserCtxtPtr ctxt) {
				3380	int val = 0;
				3381
				3382	if ((CUR == '&') && (NXT(1) == '#') &&
				3383	(NXT(2) == 'x')) {
				3384	SKIP(3);
				3385	while (CUR != ';') {
				3386	if ((CUR >= '0') && (CUR <= '9'))
				3387	val = val * 16 + (CUR - '0');
				3388	else if ((CUR >= 'a') && (CUR <= 'f'))
				3389	val = val * 16 + (CUR - 'a') + 10;
				3390	else if ((CUR >= 'A') && (CUR <= 'F'))
				3391	val = val * 16 + (CUR - 'A') + 10;
				3392	else {
				3393	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3394	ctxt->sax->error(ctxt->userData,
				3395	"docbParseCharRef: invalid hexadecimal value\n");
				3396	ctxt->wellFormed = 0;
				3397	val = 0;
				3398	break;
				3399	}
				3400	NEXT;
				3401	}
				3402	if (CUR == ';')
				3403	NEXT;
				3404	} else if ((CUR == '&') && (NXT(1) == '#')) {
				3405	SKIP(2);
				3406	while (CUR != ';') {
				3407	if ((CUR >= '0') && (CUR <= '9'))
				3408	val = val * 10 + (CUR - '0');
				3409	else {
				3410	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3411	ctxt->sax->error(ctxt->userData,
				3412	"docbParseCharRef: invalid decimal value\n");
				3413	ctxt->wellFormed = 0;
				3414	val = 0;
				3415	break;
				3416	}
				3417	NEXT;
				3418	}
				3419	if (CUR == ';')
				3420	NEXT;
				3421	} else {
				3422	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3423	ctxt->sax->error(ctxt->userData, "docbParseCharRef: invalid value\n");
				3424	ctxt->wellFormed = 0;
				3425	}
				3426	/*
				3427	* Check the value IS_CHAR ...
				3428	*/
				3429	if (IS_CHAR(val)) {
				3430	return(val);
				3431	} else {
				3432	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3433	ctxt->sax->error(ctxt->userData, "docbParseCharRef: invalid xmlChar value %d\n",
				3434	val);
				3435	ctxt->wellFormed = 0;
				3436	}
				3437	return(0);
				3438	}
				3439
				3440
				3441	/**
				3442	* docbParseDocTypeDecl :
				3443	* @ctxt: an SGML parser context
				3444	*
				3445	* parse a DOCTYPE declaration
				3446	*
				3447	* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
				3448	* ('[' (markupdecl \| PEReference \| S)* ']' S?)? '>'
				3449	*/
				3450
				3451	static void
				3452	docbParseDocTypeDecl(docbParserCtxtPtr ctxt) {
				3453	xmlChar *name;
				3454	xmlChar *ExternalID = NULL;
				3455	xmlChar *URI = NULL;
				3456
				3457	/*
				3458	* We know that '<!DOCTYPE' has been detected.
				3459	*/
				3460	SKIP(9);
				3461
				3462	SKIP_BLANKS;
				3463
				3464	/*
				3465	* Parse the DOCTYPE name.
				3466	*/
				3467	name = docbParseName(ctxt);
				3468	if (name == NULL) {
				3469	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3470	ctxt->sax->error(ctxt->userData, "docbParseDocTypeDecl : no DOCTYPE name !\n");
				3471	ctxt->wellFormed = 0;
				3472	}
				3473	/*
				3474	* Check that upper(name) == "SGML" !!!!!!!!!!!!!
				3475	*/
				3476
				3477	SKIP_BLANKS;
				3478
				3479	/*
				3480	* Check for SystemID and ExternalID
				3481	*/
				3482	URI = docbParseExternalID(ctxt, &ExternalID);
				3483	SKIP_BLANKS;
				3484
				3485	/*
				3486	* Create or update the document accordingly to the DOCTYPE
Daniel Veillard	89cad53	2001-10-22 09:46:13 +0000	[diff] [blame]	3487	* But use the predefined PUBLIC and SYSTEM ID of DocBook XML
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3488	*/
				3489	if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
				3490	(!ctxt->disableSAX))
Daniel Veillard	89cad53	2001-10-22 09:46:13 +0000	[diff] [blame]	3491	ctxt->sax->internalSubset(ctxt->userData, name,
				3492	XML_DOCBOOK_XML_PUBLIC,
				3493	XML_DOCBOOK_XML_SYSTEM);
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3494
Daniel Veillard	89cad53	2001-10-22 09:46:13 +0000	[diff] [blame]	3495	if (RAW != '>') {
				3496	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3497	ctxt->sax->error(ctxt->userData,
				3498	"docbParseDocTypeDecl : internal subset not handled\n");
				3499	} else {
				3500	NEXT;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3501	}
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3502
				3503	/*
				3504	* Cleanup, since we don't use all those identifiers
				3505	*/
				3506	if (URI != NULL) xmlFree(URI);
				3507	if (ExternalID != NULL) xmlFree(ExternalID);
				3508	if (name != NULL) xmlFree(name);
				3509	}
				3510
				3511	/**
				3512	* docbParseAttribute:
				3513	* @ctxt: an SGML parser context
				3514	* @value: a xmlChar ** used to store the value of the attribute
				3515	*
				3516	* parse an attribute
				3517	*
				3518	* [41] Attribute ::= Name Eq AttValue
				3519	*
				3520	* [25] Eq ::= S? '=' S?
				3521	*
				3522	* With namespace:
				3523	*
				3524	* [NS 11] Attribute ::= QName Eq AttValue
				3525	*
				3526	* Also the case QName == xmlns:??? is handled independently as a namespace
				3527	* definition.
				3528	*
				3529	* Returns the attribute name, and the value in *value.
				3530	*/
				3531
				3532	static xmlChar *
				3533	docbParseAttribute(docbParserCtxtPtr ctxt, xmlChar **value) {
				3534	xmlChar name, val = NULL;
				3535
				3536	*value = NULL;
				3537	name = docbParseName(ctxt);
				3538	if (name == NULL) {
				3539	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3540	ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
				3541	ctxt->wellFormed = 0;
				3542	return(NULL);
				3543	}
				3544
				3545	/*
				3546	* read the value
				3547	*/
				3548	SKIP_BLANKS;
				3549	if (CUR == '=') {
				3550	NEXT;
				3551	SKIP_BLANKS;
				3552	val = docbParseAttValue(ctxt);
				3553	/******
				3554	} else {
				3555	* TODO : some attribute must have values, some may not
				3556	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3557	ctxt->sax->warning(ctxt->userData,
				3558	"No value for attribute %s\n", name); */
				3559	}
				3560
				3561	*value = val;
				3562	return(name);
				3563	}
				3564
				3565	/**
				3566	* docbCheckEncoding:
				3567	* @ctxt: an SGML parser context
				3568	* @attvalue: the attribute value
				3569	*
				3570	* Checks an http-equiv attribute from a Meta tag to detect
				3571	* the encoding
				3572	* If a new encoding is detected the parser is switched to decode
				3573	* it and pass UTF8
				3574	*/
				3575	static void
				3576	docbCheckEncoding(docbParserCtxtPtr ctxt, const xmlChar *attvalue) {
				3577	const xmlChar *encoding;
				3578
				3579	if ((ctxt == NULL) \|\| (attvalue == NULL))
				3580	return;
				3581
				3582	encoding = xmlStrstr(attvalue, BAD_CAST"charset=");
				3583	if (encoding == NULL)
				3584	encoding = xmlStrstr(attvalue, BAD_CAST"Charset=");
				3585	if (encoding == NULL)
				3586	encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET=");
				3587	if (encoding != NULL) {
				3588	encoding += 8;
				3589	} else {
				3590	encoding = xmlStrstr(attvalue, BAD_CAST"charset =");
				3591	if (encoding == NULL)
				3592	encoding = xmlStrstr(attvalue, BAD_CAST"Charset =");
				3593	if (encoding == NULL)
				3594	encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET =");
				3595	if (encoding != NULL)
				3596	encoding += 9;
				3597	}
				3598	/*
				3599	* Restricted from 2.3.5 */
				3600	if (encoding != NULL) {
				3601	xmlCharEncoding enc;
				3602
				3603	if (ctxt->input->encoding != NULL)
				3604	xmlFree((xmlChar *) ctxt->input->encoding);
				3605	ctxt->input->encoding = encoding;
				3606
				3607	enc = xmlParseCharEncoding((const char *) encoding);
				3608	if (enc == XML_CHAR_ENCODING_8859_1) {
				3609	ctxt->charset = XML_CHAR_ENCODING_8859_1;
				3610	} else if (enc != XML_CHAR_ENCODING_UTF8) {
				3611	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3612	ctxt->sax->error(ctxt->userData,
				3613	"Unsupported encoding %s\n", encoding);
				3614	/* xmlFree(encoding); */
				3615	ctxt->wellFormed = 0;
				3616	ctxt->disableSAX = 1;
				3617	ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
				3618	}
				3619	}
				3620	}
				3621
				3622	/**
				3623	* docbCheckMeta:
				3624	* @ctxt: an SGML parser context
				3625	* @atts: the attributes values
				3626	*
				3627	* Checks an attributes from a Meta tag
				3628	*/
				3629	static void
				3630	docbCheckMeta(docbParserCtxtPtr ctxt, const xmlChar **atts) {
				3631	int i;
				3632	const xmlChar att, value;
				3633	int http = 0;
				3634	const xmlChar *content = NULL;
				3635
				3636	if ((ctxt == NULL) \|\| (atts == NULL))
				3637	return;
				3638
				3639	i = 0;
				3640	att = atts[i++];
				3641	while (att != NULL) {
				3642	value = atts[i++];
				3643	if ((value != NULL) &&
				3644	((xmlStrEqual(att, BAD_CAST"http-equiv")) \|\|
				3645	(xmlStrEqual(att, BAD_CAST"Http-Equiv")) \|\|
				3646	(xmlStrEqual(att, BAD_CAST"HTTP-EQUIV"))) &&
				3647	((xmlStrEqual(value, BAD_CAST"Content-Type")) \|\|
				3648	(xmlStrEqual(value, BAD_CAST"content-type")) \|\|
				3649	(xmlStrEqual(value, BAD_CAST"CONTENT-TYPE"))))
				3650	http = 1;
				3651	else if ((value != NULL) &&
				3652	((xmlStrEqual(att, BAD_CAST"content")) \|\|
				3653	(xmlStrEqual(att, BAD_CAST"Content")) \|\|
				3654	(xmlStrEqual(att, BAD_CAST"CONTENT"))))
				3655	content = value;
				3656	att = atts[i++];
				3657	}
				3658	if ((http) && (content != NULL))
				3659	docbCheckEncoding(ctxt, content);
				3660
				3661	}
				3662
				3663	/**
				3664	* docbParseStartTag:
				3665	* @ctxt: an SGML parser context
				3666	*
				3667	* parse a start of tag either for rule element or
				3668	* EmptyElement. In both case we don't parse the tag closing chars.
				3669	*
				3670	* [40] STag ::= '<' Name (S Attribute)* S? '>'
				3671	*
				3672	* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
				3673	*
				3674	* With namespace:
				3675	*
				3676	* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
				3677	*
				3678	* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
				3679	*
				3680	*/
				3681
				3682	static void
				3683	docbParseStartTag(docbParserCtxtPtr ctxt) {
				3684	xmlChar *name;
				3685	xmlChar *attname;
				3686	xmlChar *attvalue;
				3687	const xmlChar **atts = NULL;
				3688	int nbatts = 0;
				3689	int maxatts = 0;
				3690	int meta = 0;
				3691	int i;
				3692
				3693	if (CUR != '<') return;
				3694	NEXT;
				3695
				3696	GROW;
				3697	name = docbParseSGMLName(ctxt);
				3698	if (name == NULL) {
				3699	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3700	ctxt->sax->error(ctxt->userData,
				3701	"docbParseStartTag: invalid element name\n");
				3702	ctxt->wellFormed = 0;
				3703	return;
				3704	}
				3705	if (xmlStrEqual(name, BAD_CAST"meta"))
				3706	meta = 1;
				3707
				3708	/*
				3709	* Check for auto-closure of SGML elements.
				3710	*/
				3711	docbAutoClose(ctxt, name);
				3712
				3713	/*
				3714	* Now parse the attributes, it ends up with the ending
				3715	*
				3716	* (S Attribute)* S?
				3717	*/
				3718	SKIP_BLANKS;
				3719	while ((IS_CHAR(CUR)) &&
				3720	(CUR != '>') &&
				3721	((CUR != '/') \|\| (NXT(1) != '>'))) {
				3722	long cons = ctxt->nbChars;
				3723
				3724	GROW;
				3725	attname = docbParseAttribute(ctxt, &attvalue);
				3726	if (attname != NULL) {
				3727
				3728	/*
				3729	* Well formedness requires at most one declaration of an attribute
				3730	*/
				3731	for (i = 0; i < nbatts;i += 2) {
				3732	if (xmlStrEqual(atts[i], attname)) {
				3733	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3734	ctxt->sax->error(ctxt->userData,
				3735	"Attribute %s redefined\n",
				3736	attname);
				3737	ctxt->wellFormed = 0;
				3738	xmlFree(attname);
				3739	if (attvalue != NULL)
				3740	xmlFree(attvalue);
				3741	goto failed;
				3742	}
				3743	}
				3744
				3745	/*
				3746	* Add the pair to atts
				3747	*/
				3748	if (atts == NULL) {
				3749	maxatts = 10;
				3750	atts = (const xmlChar *) xmlMalloc(maxatts sizeof(xmlChar *));
				3751	if (atts == NULL) {
				3752	xmlGenericError(xmlGenericErrorContext,
				3753	"malloc of %ld byte failed\n",
				3754	maxatts * (long)sizeof(xmlChar *));
				3755	if (name != NULL) xmlFree(name);
				3756	return;
				3757	}
				3758	} else if (nbatts + 4 > maxatts) {
				3759	maxatts *= 2;
Daniel Veillard	50f3437	2001-08-03 12:06:36 +0000	[diff] [blame]	3760	atts = (const xmlChar *) xmlRealloc((void )atts, maxatts * sizeof(xmlChar *));
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3761	if (atts == NULL) {
				3762	xmlGenericError(xmlGenericErrorContext,
				3763	"realloc of %ld byte failed\n",
				3764	maxatts * (long)sizeof(xmlChar *));
				3765	if (name != NULL) xmlFree(name);
				3766	return;
				3767	}
				3768	}
				3769	atts[nbatts++] = attname;
				3770	atts[nbatts++] = attvalue;
				3771	atts[nbatts] = NULL;
				3772	atts[nbatts + 1] = NULL;
				3773	}
				3774
				3775	failed:
				3776	SKIP_BLANKS;
				3777	if (cons == ctxt->nbChars) {
				3778	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3779	ctxt->sax->error(ctxt->userData,
				3780	"docbParseStartTag: problem parsing attributes\n");
				3781	ctxt->wellFormed = 0;
				3782	break;
				3783	}
				3784	}
				3785
				3786	/*
				3787	* Handle specific association to the META tag
				3788	*/
				3789	if (meta)
				3790	docbCheckMeta(ctxt, atts);
				3791
				3792	/*
				3793	* SAX: Start of Element !
				3794	*/
				3795	docbnamePush(ctxt, xmlStrdup(name));
				3796	#ifdef DEBUG
				3797	xmlGenericError(xmlGenericErrorContext,"Start of element %s: pushed %s\n", name, ctxt->name);
				3798	#endif
				3799	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
				3800	ctxt->sax->startElement(ctxt->userData, name, atts);
				3801
				3802	if (atts != NULL) {
				3803	for (i = 0;i < nbatts;i++) {
				3804	if (atts[i] != NULL)
				3805	xmlFree((xmlChar *) atts[i]);
				3806	}
				3807	xmlFree((void *) atts);
				3808	}
				3809	if (name != NULL) xmlFree(name);
				3810	}
				3811
				3812	/**
				3813	* docbParseEndTag:
				3814	* @ctxt: an SGML parser context
				3815	*
				3816	* parse an end of tag
				3817	*
				3818	* [42] ETag ::= '</' Name S? '>'
				3819	*
				3820	* With namespace
				3821	*
				3822	* [NS 9] ETag ::= '</' QName S? '>'
				3823	*/
				3824
				3825	static void
				3826	docbParseEndTag(docbParserCtxtPtr ctxt) {
				3827	xmlChar *name;
				3828	xmlChar *oldname;
				3829	int i;
				3830
				3831	if ((CUR != '<') \|\| (NXT(1) != '/')) {
				3832	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3833	ctxt->sax->error(ctxt->userData, "docbParseEndTag: '</' not found\n");
				3834	ctxt->wellFormed = 0;
				3835	return;
				3836	}
				3837	SKIP(2);
				3838
				3839	name = docbParseSGMLName(ctxt);
				3840	if (name == NULL) {
				3841	if (CUR == '>') {
				3842	NEXT;
				3843	oldname = docbnamePop(ctxt);
				3844	if (oldname != NULL) {
				3845	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				3846	ctxt->sax->endElement(ctxt->userData, name);
				3847	#ifdef DEBUG
				3848	xmlGenericError(xmlGenericErrorContext,"End of tag </>: popping out %s\n", oldname);
				3849	#endif
				3850	xmlFree(oldname);
				3851	#ifdef DEBUG
				3852	} else {
				3853	xmlGenericError(xmlGenericErrorContext,"End of tag </>: stack empty !!!\n");
				3854	#endif
				3855	}
				3856	return;
				3857	} else
				3858	return;
				3859	}
				3860
				3861	/*
				3862	* We should definitely be at the ending "S? '>'" part
				3863	*/
				3864	SKIP_BLANKS;
				3865	if ((!IS_CHAR(CUR)) \|\| (CUR != '>')) {
				3866	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3867	ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
				3868	ctxt->wellFormed = 0;
				3869	} else
				3870	NEXT;
				3871
				3872	/*
				3873	* If the name read is not one of the element in the parsing stack
				3874	* then return, it's just an error.
				3875	*/
				3876	for (i = (ctxt->nameNr - 1);i >= 0;i--) {
				3877	if (xmlStrEqual(name, ctxt->nameTab[i])) break;
				3878	}
				3879	if (i < 0) {
				3880	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3881	ctxt->sax->error(ctxt->userData,
				3882	"Unexpected end tag : %s\n", name);
				3883	xmlFree(name);
				3884	ctxt->wellFormed = 0;
				3885	return;
				3886	}
				3887
				3888
				3889	/*
				3890	* Check for auto-closure of SGML elements.
				3891	*/
				3892
				3893	docbAutoCloseOnClose(ctxt, name);
				3894
				3895	/*
				3896	* Well formedness constraints, opening and closing must match.
				3897	* With the exception that the autoclose may have popped stuff out
				3898	* of the stack.
				3899	*/
				3900	if (((name[0] != '/') \|\| (name[1] != 0)) &&
				3901	(!xmlStrEqual(name, ctxt->name))) {
				3902	#ifdef DEBUG
				3903	xmlGenericError(xmlGenericErrorContext,"End of tag %s: expecting %s\n", name, ctxt->name);
				3904	#endif
				3905	if ((ctxt->name != NULL) &&
				3906	(!xmlStrEqual(ctxt->name, name))) {
				3907	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				3908	ctxt->sax->error(ctxt->userData,
				3909	"Opening and ending tag mismatch: %s and %s\n",
				3910	name, ctxt->name);
				3911	ctxt->wellFormed = 0;
				3912	}
				3913	}
				3914
				3915	/*
				3916	* SAX: End of Tag
				3917	*/
				3918	oldname = ctxt->name;
				3919	if (((name[0] == '/') && (name[1] == 0)) \|\|
				3920	((oldname != NULL) && (xmlStrEqual(oldname, name)))) {
				3921	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				3922	ctxt->sax->endElement(ctxt->userData, name);
				3923	oldname = docbnamePop(ctxt);
				3924	if (oldname != NULL) {
				3925	#ifdef DEBUG
				3926	xmlGenericError(xmlGenericErrorContext,"End of tag %s: popping out %s\n", name, oldname);
				3927	#endif
				3928	xmlFree(oldname);
				3929	#ifdef DEBUG
				3930	} else {
				3931	xmlGenericError(xmlGenericErrorContext,"End of tag %s: stack empty !!!\n", name);
				3932	#endif
				3933	}
				3934	}
				3935
				3936	if (name != NULL)
				3937	xmlFree(name);
				3938
				3939	return;
				3940	}
				3941
				3942
				3943	/**
				3944	* docbParseReference:
				3945	* @ctxt: an SGML parser context
				3946	*
				3947	* parse and handle entity references in content,
				3948	* this will end-up in a call to character() since this is either a
				3949	* CharRef, or a predefined entity.
				3950	*/
				3951	static void
				3952	docbParseReference(docbParserCtxtPtr ctxt) {
				3953	docbEntityDescPtr ent;
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	3954	xmlEntityPtr xent;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3955	xmlChar out[6];
				3956	xmlChar *name;
				3957	if (CUR != '&') return;
				3958
				3959	if (NXT(1) == '#') {
				3960	unsigned int c;
				3961	int bits, i = 0;
				3962
				3963	c = docbParseCharRef(ctxt);
				3964	if (c < 0x80) { out[i++]= c; bits= -6; }
				3965	else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				3966	else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				3967	else { out[i++]=((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				3968
				3969	for ( ; bits >= 0; bits-= 6) {
				3970	out[i++]= ((c >> bits) & 0x3F) \| 0x80;
				3971	}
				3972	out[i] = 0;
				3973
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	3974	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
				3975	ctxt->sax->characters(ctxt->userData, out, i);
				3976	} else {
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	3977	/*
				3978	* Lookup the entity in the table.
				3979	*/
				3980	xent = docbParseEntityRef(ctxt, &name);
				3981	if (xent != NULL) {
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame]	3982	if (((ctxt->replaceEntities) \|\| (ctxt->loadsubset)) &&
				3983	((xent->children == NULL) &&
				3984	(xent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))) {
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	3985	/*
				3986	* we really need to fetch and parse the external entity
				3987	*/
				3988	int parse;
				3989	xmlNodePtr children = NULL;
				3990
				3991	parse = docbParseCtxtExternalEntity(ctxt,
				3992	xent->SystemID, xent->ExternalID, &children);
				3993	xmlAddChildList((xmlNodePtr) xent, children);
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame]	3994	}
				3995	if (ctxt->replaceEntities) {
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	3996	if ((ctxt->node != NULL) && (xent->children != NULL)) {
				3997	/*
				3998	* Seems we are generating the DOM content, do
				3999	* a simple tree copy
				4000	*/
				4001	xmlNodePtr new;
				4002	new = xmlCopyNodeList(xent->children);
				4003
				4004	xmlAddChildList(ctxt->node, new);
				4005	/*
				4006	* This is to avoid a nasty side effect, see
				4007	* characters() in SAX.c
				4008	*/
				4009	ctxt->nodemem = 0;
				4010	ctxt->nodelen = 0;
				4011	}
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame]	4012	} else {
				4013	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
				4014	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
				4015	/*
				4016	* Create a node.
				4017	*/
				4018	ctxt->sax->reference(ctxt->userData, xent->name);
				4019	}
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	4020	}
				4021	} else if (name != NULL) {
				4022	ent = docbEntityLookup(name);
				4023	if ((ent == NULL) \|\| (ent->value <= 0)) {
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	4024	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
				4025	ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
				4026	ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
				4027	/* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
				4028	}
				4029	} else {
				4030	unsigned int c;
				4031	int bits, i = 0;
				4032
				4033	c = ent->value;
				4034	if (c < 0x80)
				4035	{ out[i++]= c; bits= -6; }
				4036	else if (c < 0x800)
				4037	{ out[i++]=((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				4038	else if (c < 0x10000)
				4039	{ out[i++]=((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				4040	else
				4041	{ out[i++]=((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				4042
				4043	for ( ; bits >= 0; bits-= 6) {
				4044	out[i++]= ((c >> bits) & 0x3F) \| 0x80;
				4045	}
				4046	out[i] = 0;
				4047
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	4048	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
				4049	ctxt->sax->characters(ctxt->userData, out, i);
				4050	}
				4051	} else {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4052	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
				4053	ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
				4054	return;
				4055	}
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	4056	if (name != NULL)
				4057	xmlFree(name);
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4058	}
				4059	}
				4060
				4061	/**
				4062	* docbParseContent:
				4063	* @ctxt: an SGML parser context
				4064	* @name: the node name
				4065	*
				4066	* Parse a content: comment, sub-element, reference or text.
				4067	*
				4068	*/
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4069	static void
Daniel Veillard	84666b3	2001-06-11 17:31:08 +0000	[diff] [blame]	4070	docbParseContent(docbParserCtxtPtr ctxt)
				4071	{
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4072	xmlChar *currentNode;
				4073	int depth;
				4074
				4075	currentNode = xmlStrdup(ctxt->name);
				4076	depth = ctxt->nameNr;
				4077	while (1) {
Daniel Veillard	84666b3	2001-06-11 17:31:08 +0000	[diff] [blame]	4078	long cons = ctxt->nbChars;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4079
				4080	GROW;
Daniel Veillard	84666b3	2001-06-11 17:31:08 +0000	[diff] [blame]	4081	/*
				4082	* Our tag or one of it's parent or children is ending.
				4083	*/
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4084	if ((CUR == '<') && (NXT(1) == '/')) {
Daniel Veillard	84666b3	2001-06-11 17:31:08 +0000	[diff] [blame]	4085	docbParseEndTag(ctxt);
				4086	if (currentNode != NULL)
				4087	xmlFree(currentNode);
				4088	return;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4089	}
				4090
Daniel Veillard	84666b3	2001-06-11 17:31:08 +0000	[diff] [blame]	4091	/*
				4092	* Has this node been popped out during parsing of
				4093	* the next element
				4094	*/
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4095	if ((!xmlStrEqual(currentNode, ctxt->name)) &&
Daniel Veillard	84666b3	2001-06-11 17:31:08 +0000	[diff] [blame]	4096	(depth >= ctxt->nameNr)) {
				4097	if (currentNode != NULL)
				4098	xmlFree(currentNode);
				4099	return;
				4100	}
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4101
Daniel Veillard	84666b3	2001-06-11 17:31:08 +0000	[diff] [blame]	4102	/*
				4103	* Sometimes DOCTYPE arrives in the middle of the document
				4104	*/
				4105	if ((CUR == '<') && (NXT(1) == '!') &&
				4106	(UPP(2) == 'D') && (UPP(3) == 'O') &&
				4107	(UPP(4) == 'C') && (UPP(5) == 'T') &&
				4108	(UPP(6) == 'Y') && (UPP(7) == 'P') && (UPP(8) == 'E')) {
				4109	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4110	ctxt->sax->error(ctxt->userData,
				4111	"Misplaced DOCTYPE declaration\n");
				4112	ctxt->wellFormed = 0;
				4113	docbParseDocTypeDecl(ctxt);
				4114	}
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4115
Daniel Veillard	84666b3	2001-06-11 17:31:08 +0000	[diff] [blame]	4116	/*
				4117	* First case : a comment
				4118	*/
				4119	if ((CUR == '<') && (NXT(1) == '!') &&
				4120	(NXT(2) == '-') && (NXT(3) == '-')) {
				4121	docbParseComment(ctxt);
				4122	}
				4123
				4124	/*
				4125	* Second case : a PI
				4126	*/
				4127	else if ((RAW == '<') && (NXT(1) == '?')) {
				4128	docbParsePI(ctxt);
				4129	}
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4130
Daniel Veillard	84666b3	2001-06-11 17:31:08 +0000	[diff] [blame]	4131	/*
				4132	* Third case : a sub-element.
				4133	*/
				4134	else if (CUR == '<') {
				4135	docbParseElement(ctxt);
				4136	}
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4137
Daniel Veillard	84666b3	2001-06-11 17:31:08 +0000	[diff] [blame]	4138	/*
				4139	* Fourth case : a reference. If if has not been resolved,
				4140	* parsing returns it's Name, create the node
				4141	*/
				4142	else if (CUR == '&') {
				4143	docbParseReference(ctxt);
				4144	}
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4145
Daniel Veillard	84666b3	2001-06-11 17:31:08 +0000	[diff] [blame]	4146	/*
				4147	* Fifth : end of the resource
				4148	*/
				4149	else if (CUR == 0) {
				4150	docbAutoClose(ctxt, NULL);
				4151	if (ctxt->nameNr == 0)
				4152	break;
				4153	}
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4154
Daniel Veillard	84666b3	2001-06-11 17:31:08 +0000	[diff] [blame]	4155	/*
				4156	* Last case, text. Note that References are handled directly.
				4157	*/
				4158	else {
				4159	docbParseCharData(ctxt);
				4160	}
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4161
Daniel Veillard	84666b3	2001-06-11 17:31:08 +0000	[diff] [blame]	4162	if (cons == ctxt->nbChars) {
				4163	if (ctxt->node != NULL) {
				4164	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4165	ctxt->sax->error(ctxt->userData,
				4166	"detected an error in element content\n");
				4167	ctxt->wellFormed = 0;
				4168	}
				4169	break;
				4170	}
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4171
				4172	GROW;
				4173	}
Daniel Veillard	84666b3	2001-06-11 17:31:08 +0000	[diff] [blame]	4174	if (currentNode != NULL)
				4175	xmlFree(currentNode);
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4176	}
				4177
				4178	/**
				4179	* docbParseElement:
				4180	* @ctxt: an SGML parser context
				4181	*
				4182	* parse an SGML element, this is highly recursive
				4183	*
				4184	* [39] element ::= EmptyElemTag \| STag content ETag
				4185	*
				4186	* [41] Attribute ::= Name Eq AttValue
				4187	*/
				4188
				4189	static void
				4190	docbParseElement(docbParserCtxtPtr ctxt) {
				4191	xmlChar *name;
				4192	xmlChar *currentNode = NULL;
				4193	docbElemDescPtr info;
				4194	docbParserNodeInfo node_info;
				4195	xmlChar *oldname;
				4196	int depth = ctxt->nameNr;
				4197
				4198	/* Capture start position */
				4199	if (ctxt->record_info) {
				4200	node_info.begin_pos = ctxt->input->consumed +
				4201	(CUR_PTR - ctxt->input->base);
				4202	node_info.begin_line = ctxt->input->line;
				4203	}
				4204
				4205	oldname = xmlStrdup(ctxt->name);
				4206	docbParseStartTag(ctxt);
				4207	name = ctxt->name;
				4208	#ifdef DEBUG
				4209	if (oldname == NULL)
				4210	xmlGenericError(xmlGenericErrorContext,
				4211	"Start of element %s\n", name);
				4212	else if (name == NULL)
				4213	xmlGenericError(xmlGenericErrorContext,
				4214	"Start of element failed, was %s\n", oldname);
				4215	else
				4216	xmlGenericError(xmlGenericErrorContext,
				4217	"Start of element %s, was %s\n", name, oldname);
				4218	#endif
				4219	if (((depth == ctxt->nameNr) && (xmlStrEqual(oldname, ctxt->name))) \|\|
				4220	(name == NULL)) {
				4221	if (CUR == '>')
				4222	NEXT;
				4223	if (oldname != NULL)
				4224	xmlFree(oldname);
				4225	return;
				4226	}
				4227	if (oldname != NULL)
				4228	xmlFree(oldname);
				4229
				4230	/*
				4231	* Lookup the info for that element.
				4232	*/
				4233	info = docbTagLookup(name);
				4234	if (info == NULL) {
				4235	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4236	ctxt->sax->error(ctxt->userData, "Tag %s unknown\n",
				4237	name);
				4238	ctxt->wellFormed = 0;
				4239	} else if (info->depr) {
				4240	/***************************
				4241	if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
				4242	ctxt->sax->warning(ctxt->userData, "Tag %s is deprecated\n",
				4243	name);
				4244	***************************/
				4245	}
				4246
				4247	/*
Daniel Veillard	cbaf399	2001-12-31 16:16:02 +0000	[diff] [blame]	4248	* Check for an Empty Element labeled the XML/SGML way
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4249	*/
				4250	if ((CUR == '/') && (NXT(1) == '>')) {
				4251	SKIP(2);
				4252	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				4253	ctxt->sax->endElement(ctxt->userData, name);
				4254	oldname = docbnamePop(ctxt);
				4255	#ifdef DEBUG
				4256	xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n", oldname);
				4257	#endif
				4258	if (oldname != NULL)
				4259	xmlFree(oldname);
				4260	return;
				4261	}
				4262
				4263	if (CUR == '>') {
				4264	NEXT;
				4265	} else {
				4266	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4267	ctxt->sax->error(ctxt->userData,
				4268	"Couldn't find end of Start Tag %s\n",
				4269	name);
				4270	ctxt->wellFormed = 0;
				4271
				4272	/*
				4273	* end of parsing of this node.
				4274	*/
				4275	if (xmlStrEqual(name, ctxt->name)) {
				4276	nodePop(ctxt);
				4277	oldname = docbnamePop(ctxt);
				4278	#ifdef DEBUG
				4279	xmlGenericError(xmlGenericErrorContext,"End of start tag problem: popping out %s\n", oldname);
				4280	#endif
				4281	if (oldname != NULL)
				4282	xmlFree(oldname);
				4283	}
				4284
				4285	/*
				4286	* Capture end position and add node
				4287	*/
				4288	if ( currentNode != NULL && ctxt->record_info ) {
				4289	node_info.end_pos = ctxt->input->consumed +
				4290	(CUR_PTR - ctxt->input->base);
				4291	node_info.end_line = ctxt->input->line;
				4292	node_info.node = ctxt->node;
				4293	xmlParserAddNodeInfo(ctxt, &node_info);
				4294	}
				4295	return;
				4296	}
				4297
				4298	/*
				4299	* Check for an Empty Element from DTD definition
				4300	*/
				4301	if ((info != NULL) && (info->empty)) {
				4302	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				4303	ctxt->sax->endElement(ctxt->userData, name);
				4304	oldname = docbnamePop(ctxt);
				4305	#ifdef DEBUG
				4306	xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname);
				4307	#endif
				4308	if (oldname != NULL)
				4309	xmlFree(oldname);
				4310	return;
				4311	}
				4312
				4313	/*
				4314	* Parse the content of the element:
				4315	*/
				4316	currentNode = xmlStrdup(ctxt->name);
				4317	depth = ctxt->nameNr;
				4318	while (IS_CHAR(CUR)) {
				4319	docbParseContent(ctxt);
				4320	if (ctxt->nameNr < depth) break;
				4321	}
				4322
				4323	if (!IS_CHAR(CUR)) {
				4324	/************
				4325	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4326	ctxt->sax->error(ctxt->userData,
				4327	"Premature end of data in tag %s\n", currentNode);
				4328	ctxt->wellFormed = 0;
				4329	*************/
				4330
				4331	/*
				4332	* end of parsing of this node.
				4333	*/
				4334	nodePop(ctxt);
				4335	oldname = docbnamePop(ctxt);
				4336	#ifdef DEBUG
				4337	xmlGenericError(xmlGenericErrorContext,"Premature end of tag %s : popping out %s\n", name, oldname);
				4338	#endif
				4339	if (oldname != NULL)
				4340	xmlFree(oldname);
				4341	if (currentNode != NULL)
				4342	xmlFree(currentNode);
				4343	return;
				4344	}
				4345
				4346	/*
				4347	* Capture end position and add node
				4348	*/
				4349	if ( currentNode != NULL && ctxt->record_info ) {
				4350	node_info.end_pos = ctxt->input->consumed +
				4351	(CUR_PTR - ctxt->input->base);
				4352	node_info.end_line = ctxt->input->line;
				4353	node_info.node = ctxt->node;
				4354	xmlParserAddNodeInfo(ctxt, &node_info);
				4355	}
				4356	if (currentNode != NULL)
				4357	xmlFree(currentNode);
				4358	}
				4359
				4360	/**
				4361	* docbParseEntityDecl:
				4362	* @ctxt: an SGML parser context
				4363	*
				4364	* parse <!ENTITY declarations
				4365	*
				4366	*/
				4367
				4368	static void
				4369	docbParseEntityDecl(xmlParserCtxtPtr ctxt) {
				4370	xmlChar *name = NULL;
				4371	xmlChar *value = NULL;
				4372	xmlChar URI = NULL, literal = NULL;
				4373	xmlChar *ndata = NULL;
				4374	int isParameter = 0;
				4375	xmlChar *orig = NULL;
				4376
				4377	GROW;
				4378	if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	4379	(UPP(2) == 'E') && (UPP(3) == 'N') &&
				4380	(UPP(4) == 'T') && (UPP(5) == 'I') &&
				4381	(UPP(6) == 'T') && (UPP(7) == 'Y')) {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4382	xmlParserInputPtr input = ctxt->input;
				4383	ctxt->instate = XML_PARSER_ENTITY_DECL;
				4384	SHRINK;
				4385	SKIP(8);
				4386	if (!IS_BLANK(CUR)) {
				4387	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4388	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4389	ctxt->sax->error(ctxt->userData,
				4390	"Space required after '<!ENTITY'\n");
				4391	ctxt->wellFormed = 0;
				4392	ctxt->disableSAX = 1;
				4393	}
				4394	SKIP_BLANKS;
				4395
				4396	if (RAW == '%') {
				4397	NEXT;
				4398	if (!IS_BLANK(CUR)) {
				4399	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4400	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4401	ctxt->sax->error(ctxt->userData,
				4402	"Space required after '%'\n");
				4403	ctxt->wellFormed = 0;
				4404	ctxt->disableSAX = 1;
				4405	}
				4406	SKIP_BLANKS;
				4407	isParameter = 1;
				4408	}
				4409
				4410	name = xmlParseName(ctxt);
				4411	if (name == NULL) {
				4412	ctxt->errNo = XML_ERR_NAME_REQUIRED;
				4413	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4414	ctxt->sax->error(ctxt->userData, "sgmlarseEntityDecl: no name\n");
				4415	ctxt->wellFormed = 0;
				4416	ctxt->disableSAX = 1;
				4417	return;
				4418	}
				4419	if (!IS_BLANK(CUR)) {
				4420	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4421	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4422	ctxt->sax->error(ctxt->userData,
				4423	"Space required after the entity name\n");
				4424	ctxt->wellFormed = 0;
				4425	ctxt->disableSAX = 1;
				4426	}
				4427	SKIP_BLANKS;
				4428
				4429	/*
				4430	* handle the various case of definitions...
				4431	*/
				4432	if (isParameter) {
				4433	if ((RAW == '"') \|\| (RAW == '\'')) {
				4434	value = xmlParseEntityValue(ctxt, &orig);
				4435	if (value) {
				4436	if ((ctxt->sax != NULL) &&
				4437	(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
				4438	ctxt->sax->entityDecl(ctxt->userData, name,
				4439	XML_INTERNAL_PARAMETER_ENTITY,
				4440	NULL, NULL, value);
				4441	}
				4442	} else {
				4443	URI = xmlParseExternalID(ctxt, &literal, 1);
				4444	if ((URI == NULL) && (literal == NULL)) {
				4445	ctxt->errNo = XML_ERR_VALUE_REQUIRED;
				4446	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4447	ctxt->sax->error(ctxt->userData,
				4448	"Entity value required\n");
				4449	ctxt->wellFormed = 0;
				4450	ctxt->disableSAX = 1;
				4451	}
				4452	if (URI) {
				4453	xmlURIPtr uri;
				4454
				4455	uri = xmlParseURI((const char *) URI);
				4456	if (uri == NULL) {
				4457	ctxt->errNo = XML_ERR_INVALID_URI;
				4458	if ((ctxt->sax != NULL) &&
				4459	(!ctxt->disableSAX) &&
				4460	(ctxt->sax->error != NULL))
				4461	ctxt->sax->error(ctxt->userData,
				4462	"Invalid URI: %s\n", URI);
				4463	ctxt->wellFormed = 0;
				4464	} else {
				4465	if (uri->fragment != NULL) {
				4466	ctxt->errNo = XML_ERR_URI_FRAGMENT;
				4467	if ((ctxt->sax != NULL) &&
				4468	(!ctxt->disableSAX) &&
				4469	(ctxt->sax->error != NULL))
				4470	ctxt->sax->error(ctxt->userData,
				4471	"Fragment not allowed: %s\n", URI);
				4472	ctxt->wellFormed = 0;
				4473	} else {
				4474	if ((ctxt->sax != NULL) &&
				4475	(!ctxt->disableSAX) &&
				4476	(ctxt->sax->entityDecl != NULL))
				4477	ctxt->sax->entityDecl(ctxt->userData, name,
				4478	XML_EXTERNAL_PARAMETER_ENTITY,
				4479	literal, URI, NULL);
				4480	}
				4481	xmlFreeURI(uri);
				4482	}
				4483	}
				4484	}
				4485	} else {
				4486	if ((RAW == '"') \|\| (RAW == '\'')) {
				4487	value = xmlParseEntityValue(ctxt, &orig);
				4488	if ((ctxt->sax != NULL) &&
				4489	(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
				4490	ctxt->sax->entityDecl(ctxt->userData, name,
				4491	XML_INTERNAL_GENERAL_ENTITY,
				4492	NULL, NULL, value);
				4493	} else {
				4494	URI = xmlParseExternalID(ctxt, &literal, 1);
				4495	if ((URI == NULL) && (literal == NULL)) {
				4496	ctxt->errNo = XML_ERR_VALUE_REQUIRED;
				4497	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4498	ctxt->sax->error(ctxt->userData,
				4499	"Entity value required\n");
				4500	ctxt->wellFormed = 0;
				4501	ctxt->disableSAX = 1;
				4502	}
				4503	if (URI) {
				4504	xmlURIPtr uri;
				4505
				4506	uri = xmlParseURI((const char *)URI);
				4507	if (uri == NULL) {
				4508	ctxt->errNo = XML_ERR_INVALID_URI;
				4509	if ((ctxt->sax != NULL) &&
				4510	(!ctxt->disableSAX) &&
				4511	(ctxt->sax->error != NULL))
				4512	ctxt->sax->error(ctxt->userData,
				4513	"Invalid URI: %s\n", URI);
				4514	ctxt->wellFormed = 0;
				4515	} else {
				4516	if (uri->fragment != NULL) {
				4517	ctxt->errNo = XML_ERR_URI_FRAGMENT;
				4518	if ((ctxt->sax != NULL) &&
				4519	(!ctxt->disableSAX) &&
				4520	(ctxt->sax->error != NULL))
				4521	ctxt->sax->error(ctxt->userData,
				4522	"Fragment not allowed: %s\n", URI);
				4523	ctxt->wellFormed = 0;
				4524	}
				4525	xmlFreeURI(uri);
				4526	}
				4527	}
				4528	if ((RAW != '>') && (!IS_BLANK(CUR))) {
				4529	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4530	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4531	ctxt->sax->error(ctxt->userData,
				4532	"Space required before content model\n");
				4533	ctxt->wellFormed = 0;
				4534	ctxt->disableSAX = 1;
				4535	}
				4536	SKIP_BLANKS;
				4537
				4538	/*
				4539	* SGML specific: here we can get the content model
				4540	*/
				4541	if (RAW != '>') {
				4542	xmlChar *contmod;
				4543
				4544	contmod = xmlParseName(ctxt);
				4545
				4546	if (contmod == NULL) {
				4547	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4548	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4549	ctxt->sax->error(ctxt->userData,
				4550	"Could not parse entity content model\n");
				4551	ctxt->wellFormed = 0;
				4552	ctxt->disableSAX = 1;
				4553	} else {
				4554	if (xmlStrEqual(contmod, BAD_CAST"NDATA")) {
				4555	if (!IS_BLANK(CUR)) {
				4556	ctxt->errNo = XML_ERR_SPACE_REQUIRED;
				4557	if ((ctxt->sax != NULL) &&
				4558	(ctxt->sax->error != NULL))
				4559	ctxt->sax->error(ctxt->userData,
				4560	"Space required after 'NDATA'\n");
				4561	ctxt->wellFormed = 0;
				4562	ctxt->disableSAX = 1;
				4563	}
				4564	SKIP_BLANKS;
				4565	ndata = xmlParseName(ctxt);
				4566	if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
				4567	(ctxt->sax->unparsedEntityDecl != NULL)) {
				4568	ctxt->sax->unparsedEntityDecl(ctxt->userData,
				4569	name, literal, URI, ndata);
				4570	}
				4571	} else if (xmlStrEqual(contmod, BAD_CAST"SUBDOC")) {
				4572	if ((ctxt->sax != NULL) &&
				4573	(ctxt->sax->warning != NULL))
				4574	ctxt->sax->warning(ctxt->userData,
				4575	"SUBDOC entities are not supported\n");
				4576	SKIP_BLANKS;
				4577	ndata = xmlParseName(ctxt);
				4578	if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
				4579	(ctxt->sax->unparsedEntityDecl != NULL)) {
				4580	ctxt->sax->unparsedEntityDecl(ctxt->userData,
				4581	name, literal, URI, ndata);
				4582	}
				4583	} else if (xmlStrEqual(contmod, BAD_CAST"CDATA")) {
				4584	if ((ctxt->sax != NULL) &&
				4585	(ctxt->sax->warning != NULL))
				4586	ctxt->sax->warning(ctxt->userData,
				4587	"CDATA entities are not supported\n");
				4588	SKIP_BLANKS;
				4589	ndata = xmlParseName(ctxt);
				4590	if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
				4591	(ctxt->sax->unparsedEntityDecl != NULL)) {
				4592	ctxt->sax->unparsedEntityDecl(ctxt->userData,
				4593	name, literal, URI, ndata);
				4594	}
				4595	}
				4596	xmlFree(contmod);
				4597	}
				4598	} else {
				4599	if ((ctxt->sax != NULL) &&
				4600	(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
				4601	ctxt->sax->entityDecl(ctxt->userData, name,
				4602	XML_EXTERNAL_GENERAL_PARSED_ENTITY,
				4603	literal, URI, NULL);
				4604	}
				4605	}
				4606	}
				4607	SKIP_BLANKS;
				4608	if (RAW != '>') {
				4609	ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
				4610	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4611	ctxt->sax->error(ctxt->userData,
				4612	"docbParseEntityDecl: entity %s not terminated\n", name);
				4613	ctxt->wellFormed = 0;
				4614	ctxt->disableSAX = 1;
				4615	} else {
				4616	if (input != ctxt->input) {
				4617	ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
				4618	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4619	ctxt->sax->error(ctxt->userData,
				4620	"Entity declaration doesn't start and stop in the same entity\n");
				4621	ctxt->wellFormed = 0;
				4622	ctxt->disableSAX = 1;
				4623	}
				4624	NEXT;
				4625	}
				4626	if (orig != NULL) {
				4627	/*
				4628	* Ugly mechanism to save the raw entity value.
				4629	*/
				4630	xmlEntityPtr cur = NULL;
				4631
				4632	if (isParameter) {
				4633	if ((ctxt->sax != NULL) &&
				4634	(ctxt->sax->getParameterEntity != NULL))
				4635	cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
				4636	} else {
				4637	if ((ctxt->sax != NULL) &&
				4638	(ctxt->sax->getEntity != NULL))
				4639	cur = ctxt->sax->getEntity(ctxt->userData, name);
				4640	}
				4641	if (cur != NULL) {
				4642	if (cur->orig != NULL)
				4643	xmlFree(orig);
				4644	else
				4645	cur->orig = orig;
				4646	} else
				4647	xmlFree(orig);
				4648	}
				4649	if (name != NULL) xmlFree(name);
				4650	if (value != NULL) xmlFree(value);
				4651	if (URI != NULL) xmlFree(URI);
				4652	if (literal != NULL) xmlFree(literal);
				4653	if (ndata != NULL) xmlFree(ndata);
				4654	}
				4655	}
				4656
				4657	/**
				4658	* docbParseMarkupDecl:
				4659	* @ctxt: an SGML parser context
				4660	*
				4661	* parse Markup declarations
				4662	*
				4663	* [29] markupdecl ::= elementdecl \| AttlistDecl \| EntityDecl \|
				4664	* NotationDecl \| PI \| Comment
				4665	*/
				4666	static void
				4667	docbParseMarkupDecl(xmlParserCtxtPtr ctxt) {
				4668	GROW;
				4669	xmlParseElementDecl(ctxt);
				4670	xmlParseAttributeListDecl(ctxt);
				4671	docbParseEntityDecl(ctxt);
				4672	xmlParseNotationDecl(ctxt);
Daniel Veillard	e95e239	2001-06-06 10:46:28 +0000	[diff] [blame]	4673	docbParsePI(ctxt);
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4674	xmlParseComment(ctxt);
				4675	/*
				4676	* This is only for internal subset. On external entities,
				4677	* the replacement is done before parsing stage
				4678	*/
				4679	if ((ctxt->external == 0) && (ctxt->inputNr == 1))
				4680	xmlParsePEReference(ctxt);
				4681	ctxt->instate = XML_PARSER_DTD;
				4682	}
				4683
				4684	/**
Daniel Veillard	cbaf399	2001-12-31 16:16:02 +0000	[diff] [blame]	4685	* docbParseInternalSubset:
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4686	* @ctxt: an SGML parser context
				4687	*
				4688	* parse the internal subset declaration
				4689	*
				4690	* [28 end] ('[' (markupdecl \| PEReference \| S)* ']' S?)? '>'
				4691	*/
				4692
				4693	static void
				4694	docbParseInternalSubset(xmlParserCtxtPtr ctxt) {
				4695	/*
				4696	* Is there any DTD definition ?
				4697	*/
				4698	if (RAW == '[') {
				4699	ctxt->instate = XML_PARSER_DTD;
				4700	NEXT;
				4701	/*
				4702	* Parse the succession of Markup declarations and
				4703	* PEReferences.
				4704	* Subsequence (markupdecl \| PEReference \| S)*
				4705	*/
				4706	while (RAW != ']') {
				4707	const xmlChar *check = CUR_PTR;
				4708	int cons = ctxt->input->consumed;
				4709
				4710	SKIP_BLANKS;
				4711	docbParseMarkupDecl(ctxt);
				4712	xmlParsePEReference(ctxt);
				4713
				4714	/*
				4715	* Pop-up of finished entities.
				4716	*/
				4717	while ((RAW == 0) && (ctxt->inputNr > 1))
				4718	xmlPopInput(ctxt);
				4719
				4720	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
				4721	ctxt->errNo = XML_ERR_INTERNAL_ERROR;
				4722	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4723	ctxt->sax->error(ctxt->userData,
				4724	"docbParseInternalSubset: error detected in Markup declaration\n");
				4725	ctxt->wellFormed = 0;
				4726	ctxt->disableSAX = 1;
				4727	break;
				4728	}
				4729	}
				4730	if (RAW == ']') {
				4731	NEXT;
				4732	SKIP_BLANKS;
				4733	}
				4734	}
				4735
				4736	/*
				4737	* We should be at the end of the DOCTYPE declaration.
				4738	*/
				4739	if (RAW != '>') {
				4740	ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
				4741	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard	f6ed8bc	2001-10-02 09:22:47 +0000	[diff] [blame]	4742	ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4743	ctxt->wellFormed = 0;
				4744	ctxt->disableSAX = 1;
				4745	}
				4746	NEXT;
				4747	}
				4748
				4749	/**
				4750	* docbParseMisc:
				4751	* @ctxt: an XML parser context
				4752	*
Daniel Veillard	cbaf399	2001-12-31 16:16:02 +0000	[diff] [blame]	4753	* parse an XML Misc* optional field.
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4754	*
				4755	* [27] Misc ::= Comment \| PI \| S
				4756	*/
				4757
				4758	static void
				4759	docbParseMisc(xmlParserCtxtPtr ctxt) {
				4760	while (((RAW == '<') && (NXT(1) == '?')) \|\|
				4761	((RAW == '<') && (NXT(1) == '!') &&
				4762	(NXT(2) == '-') && (NXT(3) == '-')) \|\|
				4763	IS_BLANK(CUR)) {
				4764	if ((RAW == '<') && (NXT(1) == '?')) {
Daniel Veillard	84666b3	2001-06-11 17:31:08 +0000	[diff] [blame]	4765	docbParsePI(ctxt);
				4766	} else if (IS_BLANK(CUR)) {
				4767	NEXT;
				4768	} else
				4769	xmlParseComment(ctxt);
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4770	}
				4771	}
				4772
				4773	/**
				4774	* docbParseDocument :
				4775	* @ctxt: an SGML parser context
				4776	*
				4777	* parse an SGML document (and build a tree if using the standard SAX
				4778	* interface).
				4779	*
				4780	* Returns 0, -1 in case of error. the parser context is augmented
				4781	* as a result of the parsing.
				4782	*/
				4783
				4784	int
				4785	docbParseDocument(docbParserCtxtPtr ctxt) {
				4786	xmlChar start[4];
				4787	xmlCharEncoding enc;
				4788	xmlDtdPtr dtd;
				4789
				4790	docbDefaultSAXHandlerInit();
				4791	ctxt->html = 2;
				4792
				4793	GROW;
				4794	/*
				4795	* SAX: beginning of the document processing.
				4796	*/
				4797	if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
				4798	ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
				4799
				4800	/*
				4801	* Get the 4 first bytes and decode the charset
				4802	* if enc != XML_CHAR_ENCODING_NONE
				4803	* plug some encoding conversion routines.
				4804	*/
				4805	start[0] = RAW;
				4806	start[1] = NXT(1);
				4807	start[2] = NXT(2);
				4808	start[3] = NXT(3);
				4809	enc = xmlDetectCharEncoding(start, 4);
				4810	if (enc != XML_CHAR_ENCODING_NONE) {
				4811	xmlSwitchEncoding(ctxt, enc);
				4812	}
				4813
				4814	/*
				4815	* Wipe out everything which is before the first '<'
				4816	*/
				4817	SKIP_BLANKS;
				4818	if (CUR == 0) {
				4819	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				4820	ctxt->sax->error(ctxt->userData, "Document is empty\n");
				4821	ctxt->wellFormed = 0;
				4822	}
				4823
				4824	if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
				4825	ctxt->sax->startDocument(ctxt->userData);
				4826
				4827
				4828	/*
				4829	* The Misc part of the Prolog
				4830	*/
				4831	GROW;
				4832	docbParseMisc(ctxt);
				4833
				4834	/*
				4835	* Then possibly doc type declaration(s) and more Misc
				4836	* (doctypedecl Misc*)?
				4837	*/
				4838	GROW;
				4839	if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	4840	(UPP(2) == 'D') && (UPP(3) == 'O') &&
				4841	(UPP(4) == 'C') && (UPP(5) == 'T') &&
				4842	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
				4843	(UPP(8) == 'E')) {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4844
				4845	ctxt->inSubset = 1;
				4846	docbParseDocTypeDecl(ctxt);
				4847	if (RAW == '[') {
				4848	ctxt->instate = XML_PARSER_DTD;
				4849	docbParseInternalSubset(ctxt);
				4850	}
				4851
				4852	/*
				4853	* Create and update the external subset.
				4854	*/
				4855	ctxt->inSubset = 2;
				4856	if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
				4857	(!ctxt->disableSAX))
				4858	ctxt->sax->internalSubset(ctxt->userData, ctxt->intSubName,
				4859	ctxt->extSubSystem, ctxt->extSubURI);
				4860	ctxt->inSubset = 0;
				4861
				4862
				4863	ctxt->instate = XML_PARSER_PROLOG;
				4864	docbParseMisc(ctxt);
				4865	}
				4866
				4867	/*
				4868	* Time to start parsing the tree itself
				4869	*/
				4870	docbParseContent(ctxt);
				4871
				4872	/*
				4873	* autoclose
				4874	*/
				4875	if (CUR == 0)
				4876	docbAutoClose(ctxt, NULL);
				4877
				4878
				4879	/*
				4880	* SAX: end of the document processing.
				4881	*/
				4882	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
				4883	ctxt->sax->endDocument(ctxt->userData);
				4884
				4885	if (ctxt->myDoc != NULL) {
				4886	dtd = ctxt->myDoc->intSubset;
Daniel Veillard	e95e239	2001-06-06 10:46:28 +0000	[diff] [blame]	4887	ctxt->myDoc->standalone = -1;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4888	if (dtd == NULL)
				4889	ctxt->myDoc->intSubset =
				4890	xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML",
				4891	BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
				4892	BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
				4893	}
				4894	if (! ctxt->wellFormed) return(-1);
				4895	return(0);
				4896	}
				4897
				4898
				4899	/************************************************************************
				4900	* *
				4901	* Parser contexts handling *
				4902	* *
				4903	************************************************************************/
				4904
				4905	/**
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame]	4906	* docbInitParserCtxt:
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4907	* @ctxt: an SGML parser context
				4908	*
				4909	* Initialize a parser context
				4910	*/
				4911
				4912	static void
				4913	docbInitParserCtxt(docbParserCtxtPtr ctxt)
				4914	{
				4915	docbSAXHandler *sax;
				4916
				4917	if (ctxt == NULL) return;
				4918	memset(ctxt, 0, sizeof(docbParserCtxt));
				4919
				4920	sax = (docbSAXHandler *) xmlMalloc(sizeof(docbSAXHandler));
				4921	if (sax == NULL) {
				4922	xmlGenericError(xmlGenericErrorContext,
				4923	"docbInitParserCtxt: out of memory\n");
				4924	}
				4925	memset(sax, 0, sizeof(docbSAXHandler));
				4926
				4927	/* Allocate the Input stack */
				4928	ctxt->inputTab = (docbParserInputPtr *)
				4929	xmlMalloc(5 * sizeof(docbParserInputPtr));
				4930	if (ctxt->inputTab == NULL) {
				4931	xmlGenericError(xmlGenericErrorContext,
				4932	"docbInitParserCtxt: out of memory\n");
				4933	}
				4934	ctxt->inputNr = 0;
				4935	ctxt->inputMax = 5;
				4936	ctxt->input = NULL;
				4937	ctxt->version = NULL;
				4938	ctxt->encoding = NULL;
				4939	ctxt->standalone = -1;
				4940	ctxt->instate = XML_PARSER_START;
				4941
				4942	/* Allocate the Node stack */
				4943	ctxt->nodeTab = (docbNodePtr ) xmlMalloc(10 sizeof(docbNodePtr));
				4944	ctxt->nodeNr = 0;
				4945	ctxt->nodeMax = 10;
				4946	ctxt->node = NULL;
				4947
				4948	/* Allocate the Name stack */
				4949	ctxt->nameTab = (xmlChar *) xmlMalloc(10 sizeof(xmlChar *));
				4950	ctxt->nameNr = 0;
				4951	ctxt->nameMax = 10;
				4952	ctxt->name = NULL;
				4953
				4954	if (sax == NULL) ctxt->sax = &docbDefaultSAXHandler;
				4955	else {
				4956	ctxt->sax = sax;
				4957	memcpy(sax, &docbDefaultSAXHandler, sizeof(docbSAXHandler));
				4958	}
				4959	ctxt->userData = ctxt;
				4960	ctxt->myDoc = NULL;
				4961	ctxt->wellFormed = 1;
Daniel Veillard	635ef72	2001-10-29 11:48:19 +0000	[diff] [blame]	4962	ctxt->linenumbers = xmlLineNumbersDefaultValue;
Daniel Veillard	61b33d5	2001-04-24 13:55:12 +0000	[diff] [blame]	4963	ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4964	ctxt->html = 2;
				4965	ctxt->record_info = 0;
				4966	ctxt->validate = 0;
				4967	ctxt->nbChars = 0;
				4968	ctxt->checkIndex = 0;
				4969	xmlInitNodeInfoSeq(&ctxt->node_seq);
				4970	}
				4971
				4972	/**
				4973	* docbFreeParserCtxt:
				4974	* @ctxt: an SGML parser context
				4975	*
				4976	* Free all the memory used by a parser context. However the parsed
				4977	* document in ctxt->myDoc is not freed.
				4978	*/
				4979
				4980	void
				4981	docbFreeParserCtxt(docbParserCtxtPtr ctxt)
				4982	{
				4983	xmlFreeParserCtxt(ctxt);
				4984	}
				4985
				4986	/**
				4987	* docbCreateDocParserCtxt :
				4988	* @cur: a pointer to an array of xmlChar
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame]	4989	* @encoding: the SGML document encoding, or NULL
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4990	*
				4991	* Create a parser context for an SGML document.
				4992	*
				4993	* Returns the new parser context or NULL
				4994	*/
				4995	static docbParserCtxtPtr
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame]	4996	docbCreateDocParserCtxt(xmlChar cur, const char encoding ATTRIBUTE_UNUSED) {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	4997	docbParserCtxtPtr ctxt;
				4998	docbParserInputPtr input;
				4999	/* sgmlCharEncoding enc; */
				5000
				5001	ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt));
				5002	if (ctxt == NULL) {
				5003	perror("malloc");
				5004	return(NULL);
				5005	}
				5006	docbInitParserCtxt(ctxt);
				5007	input = (docbParserInputPtr) xmlMalloc(sizeof(docbParserInput));
				5008	if (input == NULL) {
				5009	perror("malloc");
				5010	xmlFree(ctxt);
				5011	return(NULL);
				5012	}
				5013	memset(input, 0, sizeof(docbParserInput));
				5014
				5015	input->line = 1;
				5016	input->col = 1;
				5017	input->base = cur;
				5018	input->cur = cur;
				5019
				5020	inputPush(ctxt, input);
				5021	return(ctxt);
				5022	}
				5023
				5024	/************************************************************************
				5025	* *
				5026	* Progressive parsing interfaces *
				5027	* *
				5028	************************************************************************/
				5029
				5030	/**
				5031	* docbParseLookupSequence:
				5032	* @ctxt: an SGML parser context
				5033	* @first: the first char to lookup
				5034	* @next: the next char to lookup or zero
				5035	* @third: the next char to lookup or zero
				5036	*
				5037	* Try to find if a sequence (first, next, third) or just (first next) or
				5038	* (first) is available in the input stream.
				5039	* This function has a side effect of (possibly) incrementing ctxt->checkIndex
				5040	* to avoid rescanning sequences of bytes, it DOES change the state of the
				5041	* parser, do not use liberally.
				5042	* This is basically similar to xmlParseLookupSequence()
				5043	*
				5044	* Returns the index to the current parsing point if the full sequence
				5045	* is available, -1 otherwise.
				5046	*/
				5047	static int
				5048	docbParseLookupSequence(docbParserCtxtPtr ctxt, xmlChar first,
				5049	xmlChar next, xmlChar third) {
				5050	int base, len;
				5051	docbParserInputPtr in;
				5052	const xmlChar *buf;
				5053
				5054	in = ctxt->input;
				5055	if (in == NULL) return(-1);
				5056	base = in->cur - in->base;
				5057	if (base < 0) return(-1);
				5058	if (ctxt->checkIndex > base)
				5059	base = ctxt->checkIndex;
				5060	if (in->buf == NULL) {
				5061	buf = in->base;
				5062	len = in->length;
				5063	} else {
				5064	buf = in->buf->buffer->content;
				5065	len = in->buf->buffer->use;
				5066	}
				5067	/* take into account the sequence length */
				5068	if (third) len -= 2;
				5069	else if (next) len --;
				5070	for (;base < len;base++) {
				5071	if (buf[base] == first) {
				5072	if (third != 0) {
				5073	if ((buf[base + 1] != next) \|\|
				5074	(buf[base + 2] != third)) continue;
				5075	} else if (next != 0) {
				5076	if (buf[base + 1] != next) continue;
				5077	}
				5078	ctxt->checkIndex = 0;
				5079	#ifdef DEBUG_PUSH
				5080	if (next == 0)
				5081	xmlGenericError(xmlGenericErrorContext,
				5082	"HPP: lookup '%c' found at %d\n",
				5083	first, base);
				5084	else if (third == 0)
				5085	xmlGenericError(xmlGenericErrorContext,
				5086	"HPP: lookup '%c%c' found at %d\n",
				5087	first, next, base);
				5088	else
				5089	xmlGenericError(xmlGenericErrorContext,
				5090	"HPP: lookup '%c%c%c' found at %d\n",
				5091	first, next, third, base);
				5092	#endif
				5093	return(base - (in->cur - in->base));
				5094	}
				5095	}
				5096	ctxt->checkIndex = base;
				5097	#ifdef DEBUG_PUSH
				5098	if (next == 0)
				5099	xmlGenericError(xmlGenericErrorContext,
				5100	"HPP: lookup '%c' failed\n", first);
				5101	else if (third == 0)
				5102	xmlGenericError(xmlGenericErrorContext,
				5103	"HPP: lookup '%c%c' failed\n", first, next);
				5104	else
				5105	xmlGenericError(xmlGenericErrorContext,
				5106	"HPP: lookup '%c%c%c' failed\n", first, next, third);
				5107	#endif
				5108	return(-1);
				5109	}
				5110
				5111	/**
				5112	* docbParseTryOrFinish:
				5113	* @ctxt: an SGML parser context
				5114	* @terminate: last chunk indicator
				5115	*
				5116	* Try to progress on parsing
				5117	*
				5118	* Returns zero if no parsing was possible
				5119	*/
				5120	static int
				5121	docbParseTryOrFinish(docbParserCtxtPtr ctxt, int terminate) {
				5122	int ret = 0;
				5123	docbParserInputPtr in;
				5124	int avail = 0;
				5125	xmlChar cur, next;
				5126
				5127	#ifdef DEBUG_PUSH
				5128	switch (ctxt->instate) {
				5129	case XML_PARSER_EOF:
				5130	xmlGenericError(xmlGenericErrorContext,
				5131	"HPP: try EOF\n"); break;
				5132	case XML_PARSER_START:
				5133	xmlGenericError(xmlGenericErrorContext,
				5134	"HPP: try START\n"); break;
				5135	case XML_PARSER_MISC:
				5136	xmlGenericError(xmlGenericErrorContext,
				5137	"HPP: try MISC\n");break;
				5138	case XML_PARSER_COMMENT:
				5139	xmlGenericError(xmlGenericErrorContext,
				5140	"HPP: try COMMENT\n");break;
				5141	case XML_PARSER_PROLOG:
				5142	xmlGenericError(xmlGenericErrorContext,
				5143	"HPP: try PROLOG\n");break;
				5144	case XML_PARSER_START_TAG:
				5145	xmlGenericError(xmlGenericErrorContext,
				5146	"HPP: try START_TAG\n");break;
				5147	case XML_PARSER_CONTENT:
				5148	xmlGenericError(xmlGenericErrorContext,
				5149	"HPP: try CONTENT\n");break;
				5150	case XML_PARSER_CDATA_SECTION:
				5151	xmlGenericError(xmlGenericErrorContext,
				5152	"HPP: try CDATA_SECTION\n");break;
				5153	case XML_PARSER_END_TAG:
				5154	xmlGenericError(xmlGenericErrorContext,
				5155	"HPP: try END_TAG\n");break;
				5156	case XML_PARSER_ENTITY_DECL:
				5157	xmlGenericError(xmlGenericErrorContext,
				5158	"HPP: try ENTITY_DECL\n");break;
				5159	case XML_PARSER_ENTITY_VALUE:
				5160	xmlGenericError(xmlGenericErrorContext,
				5161	"HPP: try ENTITY_VALUE\n");break;
				5162	case XML_PARSER_ATTRIBUTE_VALUE:
				5163	xmlGenericError(xmlGenericErrorContext,
				5164	"HPP: try ATTRIBUTE_VALUE\n");break;
				5165	case XML_PARSER_DTD:
				5166	xmlGenericError(xmlGenericErrorContext,
				5167	"HPP: try DTD\n");break;
				5168	case XML_PARSER_EPILOG:
				5169	xmlGenericError(xmlGenericErrorContext,
				5170	"HPP: try EPILOG\n");break;
				5171	case XML_PARSER_PI:
				5172	xmlGenericError(xmlGenericErrorContext,
				5173	"HPP: try PI\n");break;
				5174	}
				5175	#endif
				5176
				5177	while (1) {
				5178
				5179	in = ctxt->input;
				5180	if (in == NULL) break;
				5181	if (in->buf == NULL)
				5182	avail = in->length - (in->cur - in->base);
				5183	else
				5184	avail = in->buf->buffer->use - (in->cur - in->base);
				5185	if ((avail == 0) && (terminate)) {
				5186	docbAutoClose(ctxt, NULL);
				5187	if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
				5188	/*
				5189	* SAX: end of the document processing.
				5190	*/
				5191	ctxt->instate = XML_PARSER_EOF;
				5192	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
				5193	ctxt->sax->endDocument(ctxt->userData);
				5194	}
				5195	}
				5196	if (avail < 1)
				5197	goto done;
				5198	switch (ctxt->instate) {
				5199	case XML_PARSER_EOF:
				5200	/*
				5201	* Document parsing is done !
				5202	*/
				5203	goto done;
				5204	case XML_PARSER_START:
				5205	/*
				5206	* Very first chars read from the document flow.
				5207	*/
				5208	cur = in->cur[0];
				5209	if (IS_BLANK(cur)) {
				5210	SKIP_BLANKS;
				5211	if (in->buf == NULL)
				5212	avail = in->length - (in->cur - in->base);
				5213	else
				5214	avail = in->buf->buffer->use - (in->cur - in->base);
				5215	}
				5216	if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
				5217	ctxt->sax->setDocumentLocator(ctxt->userData,
				5218	&xmlDefaultSAXLocator);
				5219	if ((ctxt->sax) && (ctxt->sax->startDocument) &&
				5220	(!ctxt->disableSAX))
				5221	ctxt->sax->startDocument(ctxt->userData);
				5222
				5223	cur = in->cur[0];
				5224	next = in->cur[1];
				5225	if ((cur == '<') && (next == '!') &&
				5226	(UPP(2) == 'D') && (UPP(3) == 'O') &&
				5227	(UPP(4) == 'C') && (UPP(5) == 'T') &&
				5228	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
				5229	(UPP(8) == 'E')) {
				5230	if ((!terminate) &&
				5231	(docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
				5232	goto done;
				5233	#ifdef DEBUG_PUSH
				5234	xmlGenericError(xmlGenericErrorContext,
				5235	"HPP: Parsing internal subset\n");
				5236	#endif
				5237	docbParseDocTypeDecl(ctxt);
				5238	ctxt->instate = XML_PARSER_PROLOG;
				5239	#ifdef DEBUG_PUSH
				5240	xmlGenericError(xmlGenericErrorContext,
				5241	"HPP: entering PROLOG\n");
				5242	#endif
				5243	} else {
				5244	ctxt->instate = XML_PARSER_MISC;
				5245	}
				5246	#ifdef DEBUG_PUSH
				5247	xmlGenericError(xmlGenericErrorContext,
				5248	"HPP: entering MISC\n");
				5249	#endif
				5250	break;
				5251	case XML_PARSER_MISC:
				5252	SKIP_BLANKS;
				5253	if (in->buf == NULL)
				5254	avail = in->length - (in->cur - in->base);
				5255	else
				5256	avail = in->buf->buffer->use - (in->cur - in->base);
				5257	if (avail < 2)
				5258	goto done;
				5259	cur = in->cur[0];
				5260	next = in->cur[1];
				5261	if ((cur == '<') && (next == '!') &&
				5262	(in->cur[2] == '-') && (in->cur[3] == '-')) {
				5263	if ((!terminate) &&
				5264	(docbParseLookupSequence(ctxt, '-', '-', '>') < 0))
				5265	goto done;
				5266	#ifdef DEBUG_PUSH
				5267	xmlGenericError(xmlGenericErrorContext,
				5268	"HPP: Parsing Comment\n");
				5269	#endif
				5270	docbParseComment(ctxt);
				5271	ctxt->instate = XML_PARSER_MISC;
				5272	} else if ((cur == '<') && (next == '!') &&
				5273	(UPP(2) == 'D') && (UPP(3) == 'O') &&
				5274	(UPP(4) == 'C') && (UPP(5) == 'T') &&
				5275	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
				5276	(UPP(8) == 'E')) {
				5277	if ((!terminate) &&
				5278	(docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
				5279	goto done;
				5280	#ifdef DEBUG_PUSH
				5281	xmlGenericError(xmlGenericErrorContext,
				5282	"HPP: Parsing internal subset\n");
				5283	#endif
				5284	docbParseDocTypeDecl(ctxt);
				5285	ctxt->instate = XML_PARSER_PROLOG;
				5286	#ifdef DEBUG_PUSH
				5287	xmlGenericError(xmlGenericErrorContext,
				5288	"HPP: entering PROLOG\n");
				5289	#endif
				5290	} else if ((cur == '<') && (next == '!') &&
				5291	(avail < 9)) {
				5292	goto done;
				5293	} else {
				5294	ctxt->instate = XML_PARSER_START_TAG;
				5295	#ifdef DEBUG_PUSH
				5296	xmlGenericError(xmlGenericErrorContext,
				5297	"HPP: entering START_TAG\n");
				5298	#endif
				5299	}
				5300	break;
				5301	case XML_PARSER_PROLOG:
				5302	SKIP_BLANKS;
				5303	if (in->buf == NULL)
				5304	avail = in->length - (in->cur - in->base);
				5305	else
				5306	avail = in->buf->buffer->use - (in->cur - in->base);
				5307	if (avail < 2)
				5308	goto done;
				5309	cur = in->cur[0];
				5310	next = in->cur[1];
				5311	if ((cur == '<') && (next == '!') &&
				5312	(in->cur[2] == '-') && (in->cur[3] == '-')) {
				5313	if ((!terminate) &&
				5314	(docbParseLookupSequence(ctxt, '-', '-', '>') < 0))
				5315	goto done;
				5316	#ifdef DEBUG_PUSH
				5317	xmlGenericError(xmlGenericErrorContext,
				5318	"HPP: Parsing Comment\n");
				5319	#endif
				5320	docbParseComment(ctxt);
				5321	ctxt->instate = XML_PARSER_PROLOG;
				5322	} else if ((cur == '<') && (next == '!') &&
				5323	(avail < 4)) {
				5324	goto done;
				5325	} else {
				5326	ctxt->instate = XML_PARSER_START_TAG;
				5327	#ifdef DEBUG_PUSH
				5328	xmlGenericError(xmlGenericErrorContext,
				5329	"HPP: entering START_TAG\n");
				5330	#endif
				5331	}
				5332	break;
				5333	case XML_PARSER_EPILOG:
				5334	if (in->buf == NULL)
				5335	avail = in->length - (in->cur - in->base);
				5336	else
				5337	avail = in->buf->buffer->use - (in->cur - in->base);
				5338	if (avail < 1)
				5339	goto done;
				5340	cur = in->cur[0];
				5341	if (IS_BLANK(cur)) {
				5342	docbParseCharData(ctxt);
				5343	goto done;
				5344	}
				5345	if (avail < 2)
				5346	goto done;
				5347	next = in->cur[1];
				5348	if ((cur == '<') && (next == '!') &&
				5349	(in->cur[2] == '-') && (in->cur[3] == '-')) {
				5350	if ((!terminate) &&
				5351	(docbParseLookupSequence(ctxt, '-', '-', '>') < 0))
				5352	goto done;
				5353	#ifdef DEBUG_PUSH
				5354	xmlGenericError(xmlGenericErrorContext,
				5355	"HPP: Parsing Comment\n");
				5356	#endif
				5357	docbParseComment(ctxt);
				5358	ctxt->instate = XML_PARSER_EPILOG;
				5359	} else if ((cur == '<') && (next == '!') &&
				5360	(avail < 4)) {
				5361	goto done;
				5362	} else {
				5363	ctxt->errNo = XML_ERR_DOCUMENT_END;
				5364	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5365	ctxt->sax->error(ctxt->userData,
				5366	"Extra content at the end of the document\n");
				5367	ctxt->wellFormed = 0;
				5368	ctxt->instate = XML_PARSER_EOF;
				5369	#ifdef DEBUG_PUSH
				5370	xmlGenericError(xmlGenericErrorContext,
				5371	"HPP: entering EOF\n");
				5372	#endif
				5373	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
				5374	ctxt->sax->endDocument(ctxt->userData);
				5375	goto done;
				5376	}
				5377	break;
				5378	case XML_PARSER_START_TAG: {
				5379	xmlChar name, oldname;
				5380	int depth = ctxt->nameNr;
				5381	docbElemDescPtr info;
				5382
				5383	if (avail < 2)
				5384	goto done;
				5385	cur = in->cur[0];
				5386	if (cur != '<') {
				5387	ctxt->instate = XML_PARSER_CONTENT;
				5388	#ifdef DEBUG_PUSH
				5389	xmlGenericError(xmlGenericErrorContext,
				5390	"HPP: entering CONTENT\n");
				5391	#endif
				5392	break;
				5393	}
				5394	if ((!terminate) &&
				5395	(docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
				5396	goto done;
				5397
				5398	oldname = xmlStrdup(ctxt->name);
				5399	docbParseStartTag(ctxt);
				5400	name = ctxt->name;
				5401	#ifdef DEBUG
				5402	if (oldname == NULL)
				5403	xmlGenericError(xmlGenericErrorContext,
				5404	"Start of element %s\n", name);
				5405	else if (name == NULL)
				5406	xmlGenericError(xmlGenericErrorContext,
				5407	"Start of element failed, was %s\n",
				5408	oldname);
				5409	else
				5410	xmlGenericError(xmlGenericErrorContext,
				5411	"Start of element %s, was %s\n",
				5412	name, oldname);
				5413	#endif
				5414	if (((depth == ctxt->nameNr) &&
				5415	(xmlStrEqual(oldname, ctxt->name))) \|\|
				5416	(name == NULL)) {
				5417	if (CUR == '>')
				5418	NEXT;
				5419	if (oldname != NULL)
				5420	xmlFree(oldname);
				5421	break;
				5422	}
				5423	if (oldname != NULL)
				5424	xmlFree(oldname);
				5425
				5426	/*
				5427	* Lookup the info for that element.
				5428	*/
				5429	info = docbTagLookup(name);
				5430	if (info == NULL) {
				5431	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5432	ctxt->sax->error(ctxt->userData, "Tag %s unknown\n",
				5433	name);
				5434	ctxt->wellFormed = 0;
				5435	} else if (info->depr) {
				5436	/***************************
				5437	if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
				5438	ctxt->sax->warning(ctxt->userData,
				5439	"Tag %s is deprecated\n",
				5440	name);
				5441	***************************/
				5442	}
				5443
				5444	/*
Daniel Veillard	cbaf399	2001-12-31 16:16:02 +0000	[diff] [blame]	5445	* Check for an Empty Element labeled the XML/SGML way
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	5446	*/
				5447	if ((CUR == '/') && (NXT(1) == '>')) {
				5448	SKIP(2);
				5449	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				5450	ctxt->sax->endElement(ctxt->userData, name);
				5451	oldname = docbnamePop(ctxt);
				5452	#ifdef DEBUG
				5453	xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n",
				5454	oldname);
				5455	#endif
				5456	if (oldname != NULL)
				5457	xmlFree(oldname);
				5458	ctxt->instate = XML_PARSER_CONTENT;
				5459	#ifdef DEBUG_PUSH
				5460	xmlGenericError(xmlGenericErrorContext,
				5461	"HPP: entering CONTENT\n");
				5462	#endif
				5463	break;
				5464	}
				5465
				5466	if (CUR == '>') {
				5467	NEXT;
				5468	} else {
				5469	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5470	ctxt->sax->error(ctxt->userData,
				5471	"Couldn't find end of Start Tag %s\n",
				5472	name);
				5473	ctxt->wellFormed = 0;
				5474
				5475	/*
				5476	* end of parsing of this node.
				5477	*/
				5478	if (xmlStrEqual(name, ctxt->name)) {
				5479	nodePop(ctxt);
				5480	oldname = docbnamePop(ctxt);
				5481	#ifdef DEBUG
				5482	xmlGenericError(xmlGenericErrorContext,
				5483	"End of start tag problem: popping out %s\n", oldname);
				5484	#endif
				5485	if (oldname != NULL)
				5486	xmlFree(oldname);
				5487	}
				5488
				5489	ctxt->instate = XML_PARSER_CONTENT;
				5490	#ifdef DEBUG_PUSH
				5491	xmlGenericError(xmlGenericErrorContext,
				5492	"HPP: entering CONTENT\n");
				5493	#endif
				5494	break;
				5495	}
				5496
				5497	/*
				5498	* Check for an Empty Element from DTD definition
				5499	*/
				5500	if ((info != NULL) && (info->empty)) {
				5501	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
				5502	ctxt->sax->endElement(ctxt->userData, name);
				5503	oldname = docbnamePop(ctxt);
				5504	#ifdef DEBUG
				5505	xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname);
				5506	#endif
				5507	if (oldname != NULL)
				5508	xmlFree(oldname);
				5509	}
				5510	ctxt->instate = XML_PARSER_CONTENT;
				5511	#ifdef DEBUG_PUSH
				5512	xmlGenericError(xmlGenericErrorContext,
				5513	"HPP: entering CONTENT\n");
				5514	#endif
				5515	break;
				5516	}
				5517	case XML_PARSER_CONTENT: {
				5518	long cons;
				5519	/*
				5520	* Handle preparsed entities and charRef
				5521	*/
				5522	if (ctxt->token != 0) {
				5523	xmlChar chr[2] = { 0 , 0 } ;
				5524
				5525	chr[0] = (xmlChar) ctxt->token;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	5526	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
				5527	ctxt->sax->characters(ctxt->userData, chr, 1);
				5528	ctxt->token = 0;
				5529	ctxt->checkIndex = 0;
				5530	}
				5531	if ((avail == 1) && (terminate)) {
				5532	cur = in->cur[0];
				5533	if ((cur != '<') && (cur != '&')) {
				5534	if (ctxt->sax != NULL) {
				5535	if (IS_BLANK(cur)) {
				5536	if (ctxt->sax->ignorableWhitespace != NULL)
				5537	ctxt->sax->ignorableWhitespace(
				5538	ctxt->userData, &cur, 1);
				5539	} else {
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	5540	if (ctxt->sax->characters != NULL)
				5541	ctxt->sax->characters(
				5542	ctxt->userData, &cur, 1);
				5543	}
				5544	}
				5545	ctxt->token = 0;
				5546	ctxt->checkIndex = 0;
				5547	NEXT;
				5548	}
				5549	break;
				5550	}
				5551	if (avail < 2)
				5552	goto done;
				5553	cur = in->cur[0];
				5554	next = in->cur[1];
				5555	cons = ctxt->nbChars;
				5556	/*
				5557	* Sometimes DOCTYPE arrives in the middle of the document
				5558	*/
				5559	if ((cur == '<') && (next == '!') &&
				5560	(UPP(2) == 'D') && (UPP(3) == 'O') &&
				5561	(UPP(4) == 'C') && (UPP(5) == 'T') &&
				5562	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
				5563	(UPP(8) == 'E')) {
				5564	if ((!terminate) &&
				5565	(docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
				5566	goto done;
				5567	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5568	ctxt->sax->error(ctxt->userData,
				5569	"Misplaced DOCTYPE declaration\n");
				5570	ctxt->wellFormed = 0;
				5571	docbParseDocTypeDecl(ctxt);
				5572	} else if ((cur == '<') && (next == '!') &&
				5573	(in->cur[2] == '-') && (in->cur[3] == '-')) {
				5574	if ((!terminate) &&
				5575	(docbParseLookupSequence(ctxt, '-', '-', '>') < 0))
				5576	goto done;
				5577	#ifdef DEBUG_PUSH
				5578	xmlGenericError(xmlGenericErrorContext,
				5579	"HPP: Parsing Comment\n");
				5580	#endif
				5581	docbParseComment(ctxt);
				5582	ctxt->instate = XML_PARSER_CONTENT;
				5583	} else if ((cur == '<') && (next == '!') && (avail < 4)) {
				5584	goto done;
				5585	} else if ((cur == '<') && (next == '/')) {
				5586	ctxt->instate = XML_PARSER_END_TAG;
				5587	ctxt->checkIndex = 0;
				5588	#ifdef DEBUG_PUSH
				5589	xmlGenericError(xmlGenericErrorContext,
				5590	"HPP: entering END_TAG\n");
				5591	#endif
				5592	break;
				5593	} else if (cur == '<') {
				5594	ctxt->instate = XML_PARSER_START_TAG;
				5595	ctxt->checkIndex = 0;
				5596	#ifdef DEBUG_PUSH
				5597	xmlGenericError(xmlGenericErrorContext,
				5598	"HPP: entering START_TAG\n");
				5599	#endif
				5600	break;
				5601	} else if (cur == '&') {
				5602	if ((!terminate) &&
				5603	(docbParseLookupSequence(ctxt, ';', 0, 0) < 0))
				5604	goto done;
				5605	#ifdef DEBUG_PUSH
				5606	xmlGenericError(xmlGenericErrorContext,
				5607	"HPP: Parsing Reference\n");
				5608	#endif
				5609	/* TODO: check generation of subtrees if noent !!! */
				5610	docbParseReference(ctxt);
				5611	} else {
				5612	/* TODO Avoid the extra copy, handle directly !!!!!! */
				5613	/*
				5614	* Goal of the following test is :
				5615	* - minimize calls to the SAX 'character' callback
				5616	* when they are mergeable
				5617	*/
				5618	if ((ctxt->inputNr == 1) &&
				5619	(avail < DOCB_PARSER_BIG_BUFFER_SIZE)) {
				5620	if ((!terminate) &&
				5621	(docbParseLookupSequence(ctxt, '<', 0, 0) < 0))
				5622	goto done;
				5623	}
				5624	ctxt->checkIndex = 0;
				5625	#ifdef DEBUG_PUSH
				5626	xmlGenericError(xmlGenericErrorContext,
				5627	"HPP: Parsing char data\n");
				5628	#endif
				5629	docbParseCharData(ctxt);
				5630	}
				5631	if (cons == ctxt->nbChars) {
				5632	if (ctxt->node != NULL) {
				5633	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5634	ctxt->sax->error(ctxt->userData,
				5635	"detected an error in element content\n");
				5636	ctxt->wellFormed = 0;
				5637	NEXT;
				5638	}
				5639	break;
				5640	}
				5641
				5642	break;
				5643	}
				5644	case XML_PARSER_END_TAG:
				5645	if (avail < 2)
				5646	goto done;
				5647	if ((!terminate) &&
				5648	(docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
				5649	goto done;
				5650	docbParseEndTag(ctxt);
				5651	if (ctxt->nameNr == 0) {
				5652	ctxt->instate = XML_PARSER_EPILOG;
				5653	} else {
				5654	ctxt->instate = XML_PARSER_CONTENT;
				5655	}
				5656	ctxt->checkIndex = 0;
				5657	#ifdef DEBUG_PUSH
				5658	xmlGenericError(xmlGenericErrorContext,
				5659	"HPP: entering CONTENT\n");
				5660	#endif
				5661	break;
				5662	case XML_PARSER_CDATA_SECTION:
				5663	xmlGenericError(xmlGenericErrorContext,
				5664	"HPP: internal error, state == CDATA\n");
				5665	ctxt->instate = XML_PARSER_CONTENT;
				5666	ctxt->checkIndex = 0;
				5667	#ifdef DEBUG_PUSH
				5668	xmlGenericError(xmlGenericErrorContext,
				5669	"HPP: entering CONTENT\n");
				5670	#endif
				5671	break;
				5672	case XML_PARSER_DTD:
				5673	xmlGenericError(xmlGenericErrorContext,
				5674	"HPP: internal error, state == DTD\n");
				5675	ctxt->instate = XML_PARSER_CONTENT;
				5676	ctxt->checkIndex = 0;
				5677	#ifdef DEBUG_PUSH
				5678	xmlGenericError(xmlGenericErrorContext,
				5679	"HPP: entering CONTENT\n");
				5680	#endif
				5681	break;
				5682	case XML_PARSER_COMMENT:
				5683	xmlGenericError(xmlGenericErrorContext,
				5684	"HPP: internal error, state == COMMENT\n");
				5685	ctxt->instate = XML_PARSER_CONTENT;
				5686	ctxt->checkIndex = 0;
				5687	#ifdef DEBUG_PUSH
				5688	xmlGenericError(xmlGenericErrorContext,
				5689	"HPP: entering CONTENT\n");
				5690	#endif
				5691	break;
				5692	case XML_PARSER_PI:
				5693	xmlGenericError(xmlGenericErrorContext,
				5694	"HPP: internal error, state == PI\n");
				5695	ctxt->instate = XML_PARSER_CONTENT;
				5696	ctxt->checkIndex = 0;
				5697	#ifdef DEBUG_PUSH
				5698	xmlGenericError(xmlGenericErrorContext,
				5699	"HPP: entering CONTENT\n");
				5700	#endif
				5701	break;
				5702	case XML_PARSER_ENTITY_DECL:
				5703	xmlGenericError(xmlGenericErrorContext,
				5704	"HPP: internal error, state == ENTITY_DECL\n");
				5705	ctxt->instate = XML_PARSER_CONTENT;
				5706	ctxt->checkIndex = 0;
				5707	#ifdef DEBUG_PUSH
				5708	xmlGenericError(xmlGenericErrorContext,
				5709	"HPP: entering CONTENT\n");
				5710	#endif
				5711	break;
				5712	case XML_PARSER_ENTITY_VALUE:
				5713	xmlGenericError(xmlGenericErrorContext,
				5714	"HPP: internal error, state == ENTITY_VALUE\n");
				5715	ctxt->instate = XML_PARSER_CONTENT;
				5716	ctxt->checkIndex = 0;
				5717	#ifdef DEBUG_PUSH
				5718	xmlGenericError(xmlGenericErrorContext,
				5719	"HPP: entering DTD\n");
				5720	#endif
				5721	break;
				5722	case XML_PARSER_ATTRIBUTE_VALUE:
				5723	xmlGenericError(xmlGenericErrorContext,
				5724	"HPP: internal error, state == ATTRIBUTE_VALUE\n");
				5725	ctxt->instate = XML_PARSER_START_TAG;
				5726	ctxt->checkIndex = 0;
				5727	#ifdef DEBUG_PUSH
				5728	xmlGenericError(xmlGenericErrorContext,
				5729	"HPP: entering START_TAG\n");
				5730	#endif
				5731	break;
				5732	case XML_PARSER_SYSTEM_LITERAL:
				5733	xmlGenericError(xmlGenericErrorContext,
				5734	"HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n");
				5735	ctxt->instate = XML_PARSER_CONTENT;
				5736	ctxt->checkIndex = 0;
				5737	#ifdef DEBUG_PUSH
				5738	xmlGenericError(xmlGenericErrorContext,
				5739	"HPP: entering CONTENT\n");
				5740	#endif
				5741	break;
				5742
				5743	case XML_PARSER_IGNORE:
				5744	xmlGenericError(xmlGenericErrorContext,
				5745	"HPP: internal error, state == XML_PARSER_IGNORE\n");
				5746	ctxt->instate = XML_PARSER_CONTENT;
				5747	ctxt->checkIndex = 0;
				5748	#ifdef DEBUG_PUSH
				5749	xmlGenericError(xmlGenericErrorContext,
				5750	"HPP: entering CONTENT\n");
				5751	#endif
				5752	break;
Daniel Veillard	044fc6b	2002-03-04 17:09:44 +0000	[diff] [blame]	5753	case XML_PARSER_PUBLIC_LITERAL:
				5754	xmlGenericError(xmlGenericErrorContext,
				5755	"HPP: internal error, state == XML_PARSER_LITERAL\n");
				5756	ctxt->instate = XML_PARSER_CONTENT;
				5757	ctxt->checkIndex = 0;
				5758	#ifdef DEBUG_PUSH
				5759	xmlGenericError(xmlGenericErrorContext,
				5760	"HPP: entering CONTENT\n");
				5761	#endif
				5762	break;
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	5763	}
				5764	}
				5765	done:
				5766	if ((avail == 0) && (terminate)) {
				5767	docbAutoClose(ctxt, NULL);
				5768	if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
				5769	/*
				5770	* SAX: end of the document processing.
				5771	*/
				5772	ctxt->instate = XML_PARSER_EOF;
				5773	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
				5774	ctxt->sax->endDocument(ctxt->userData);
				5775	}
				5776	}
				5777	if ((ctxt->myDoc != NULL) &&
				5778	((terminate) \|\| (ctxt->instate == XML_PARSER_EOF) \|\|
				5779	(ctxt->instate == XML_PARSER_EPILOG))) {
				5780	xmlDtdPtr dtd;
				5781	dtd = ctxt->myDoc->intSubset;
				5782	if (dtd == NULL)
				5783	ctxt->myDoc->intSubset =
				5784	xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML",
				5785	BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
				5786	BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
				5787	}
				5788	#ifdef DEBUG_PUSH
				5789	xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret);
				5790	#endif
				5791	return(ret);
				5792	}
				5793
				5794	/**
				5795	* docbParseChunk:
				5796	* @ctxt: an XML parser context
				5797	* @chunk: an char array
				5798	* @size: the size in byte of the chunk
				5799	* @terminate: last chunk indicator
				5800	*
				5801	* Parse a Chunk of memory
				5802	*
				5803	* Returns zero if no error, the xmlParserErrors otherwise.
				5804	*/
				5805	int
				5806	docbParseChunk(docbParserCtxtPtr ctxt, const char *chunk, int size,
				5807	int terminate) {
				5808	if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
				5809	(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
				5810	int base = ctxt->input->base - ctxt->input->buf->buffer->content;
				5811	int cur = ctxt->input->cur - ctxt->input->base;
				5812
				5813	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
				5814	ctxt->input->base = ctxt->input->buf->buffer->content + base;
				5815	ctxt->input->cur = ctxt->input->base + cur;
				5816	#ifdef DEBUG_PUSH
				5817	xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
				5818	#endif
				5819
				5820	if ((terminate) \|\| (ctxt->input->buf->buffer->use > 80))
				5821	docbParseTryOrFinish(ctxt, terminate);
				5822	} else if (ctxt->instate != XML_PARSER_EOF) {
				5823	xmlParserInputBufferPush(ctxt->input->buf, 0, "");
				5824	docbParseTryOrFinish(ctxt, terminate);
				5825	}
				5826	if (terminate) {
				5827	if ((ctxt->instate != XML_PARSER_EOF) &&
				5828	(ctxt->instate != XML_PARSER_EPILOG) &&
				5829	(ctxt->instate != XML_PARSER_MISC)) {
				5830	ctxt->errNo = XML_ERR_DOCUMENT_END;
				5831	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
				5832	ctxt->sax->error(ctxt->userData,
				5833	"Extra content at the end of the document\n");
				5834	ctxt->wellFormed = 0;
				5835	}
				5836	if (ctxt->instate != XML_PARSER_EOF) {
				5837	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
				5838	ctxt->sax->endDocument(ctxt->userData);
				5839	}
				5840	ctxt->instate = XML_PARSER_EOF;
				5841	}
				5842	return((xmlParserErrors) ctxt->errNo);
				5843	}
				5844
				5845	/************************************************************************
				5846	* *
				5847	* User entry points *
				5848	* *
				5849	************************************************************************/
				5850
				5851	/**
				5852	* docbCreatePushParserCtxt :
				5853	* @sax: a SAX handler
				5854	* @user_data: The user data returned on SAX callbacks
				5855	* @chunk: a pointer to an array of chars
				5856	* @size: number of chars in the array
				5857	* @filename: an optional file name or URI
				5858	* @enc: an optional encoding
				5859	*
				5860	* Create a parser context for using the DocBook SGML parser in push mode
				5861	* To allow content encoding detection, @size should be >= 4
				5862	* The value of @filename is used for fetching external entities
				5863	* and error/warning reports.
				5864	*
				5865	* Returns the new parser context or NULL
				5866	*/
				5867	docbParserCtxtPtr
				5868	docbCreatePushParserCtxt(docbSAXHandlerPtr sax, void *user_data,
				5869	const char chunk, int size, const char filename,
				5870	xmlCharEncoding enc) {
				5871	docbParserCtxtPtr ctxt;
				5872	docbParserInputPtr inputStream;
				5873	xmlParserInputBufferPtr buf;
				5874
				5875	buf = xmlAllocParserInputBuffer(enc);
				5876	if (buf == NULL) return(NULL);
				5877
				5878	ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt));
				5879	if (ctxt == NULL) {
				5880	xmlFree(buf);
				5881	return(NULL);
				5882	}
				5883	memset(ctxt, 0, sizeof(docbParserCtxt));
				5884	docbInitParserCtxt(ctxt);
				5885	if (sax != NULL) {
				5886	if (ctxt->sax != &docbDefaultSAXHandler)
				5887	xmlFree(ctxt->sax);
				5888	ctxt->sax = (docbSAXHandlerPtr) xmlMalloc(sizeof(docbSAXHandler));
				5889	if (ctxt->sax == NULL) {
				5890	xmlFree(buf);
				5891	xmlFree(ctxt);
				5892	return(NULL);
				5893	}
				5894	memcpy(ctxt->sax, sax, sizeof(docbSAXHandler));
				5895	if (user_data != NULL)
				5896	ctxt->userData = user_data;
				5897	}
				5898	if (filename == NULL) {
				5899	ctxt->directory = NULL;
				5900	} else {
				5901	ctxt->directory = xmlParserGetDirectory(filename);
				5902	}
				5903
				5904	inputStream = docbNewInputStream(ctxt);
				5905	if (inputStream == NULL) {
				5906	xmlFreeParserCtxt(ctxt);
				5907	return(NULL);
				5908	}
				5909
				5910	if (filename == NULL)
				5911	inputStream->filename = NULL;
				5912	else
				5913	inputStream->filename = xmlMemStrdup(filename);
				5914	inputStream->buf = buf;
				5915	inputStream->base = inputStream->buf->buffer->content;
				5916	inputStream->cur = inputStream->buf->buffer->content;
				5917
				5918	inputPush(ctxt, inputStream);
				5919
				5920	if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
				5921	(ctxt->input->buf != NULL)) {
				5922	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
				5923	#ifdef DEBUG_PUSH
				5924	xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
				5925	#endif
				5926	}
				5927
				5928	return(ctxt);
				5929	}
				5930
				5931	/**
				5932	* docbSAXParseDoc :
				5933	* @cur: a pointer to an array of xmlChar
				5934	* @encoding: a free form C string describing the SGML document encoding, or NULL
				5935	* @sax: the SAX handler block
				5936	* @userData: if using SAX, this pointer will be provided on callbacks.
				5937	*
				5938	* parse an SGML in-memory document and build a tree.
				5939	* It use the given SAX function block to handle the parsing callback.
				5940	* If sax is NULL, fallback to the default DOM tree building routines.
				5941	*
				5942	* Returns the resulting document tree
				5943	*/
				5944
				5945	docbDocPtr
				5946	docbSAXParseDoc(xmlChar cur, const char encoding, docbSAXHandlerPtr sax, void *userData) {
				5947	docbDocPtr ret;
				5948	docbParserCtxtPtr ctxt;
				5949
				5950	if (cur == NULL) return(NULL);
				5951
				5952
				5953	ctxt = docbCreateDocParserCtxt(cur, encoding);
				5954	if (ctxt == NULL) return(NULL);
				5955	if (sax != NULL) {
				5956	ctxt->sax = sax;
				5957	ctxt->userData = userData;
				5958	}
				5959
				5960	docbParseDocument(ctxt);
				5961	ret = ctxt->myDoc;
				5962	if (sax != NULL) {
				5963	ctxt->sax = NULL;
				5964	ctxt->userData = NULL;
				5965	}
				5966	docbFreeParserCtxt(ctxt);
				5967
				5968	return(ret);
				5969	}
				5970
				5971	/**
				5972	* docbParseDoc :
				5973	* @cur: a pointer to an array of xmlChar
				5974	* @encoding: a free form C string describing the SGML document encoding, or NULL
				5975	*
				5976	* parse an SGML in-memory document and build a tree.
				5977	*
				5978	* Returns the resulting document tree
				5979	*/
				5980
				5981	docbDocPtr
				5982	docbParseDoc(xmlChar cur, const char encoding) {
				5983	return(docbSAXParseDoc(cur, encoding, NULL, NULL));
				5984	}
				5985
				5986
				5987	/**
				5988	* docbCreateFileParserCtxt :
				5989	* @filename: the filename
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame]	5990	* @encoding: the SGML document encoding, or NULL
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	5991	*
				5992	* Create a parser context for a file content.
				5993	* Automatic support for ZLIB/Compress compressed document is provided
				5994	* by default if found at compile-time.
				5995	*
				5996	* Returns the new parser context or NULL
				5997	*/
				5998	docbParserCtxtPtr
Daniel Veillard	1034da2	2001-04-25 19:06:28 +0000	[diff] [blame]	5999	docbCreateFileParserCtxt(const char *filename,
				6000	const char *encoding ATTRIBUTE_UNUSED)
Daniel Veillard	eae522a	2001-04-23 13:41:34 +0000	[diff] [blame]	6001	{
				6002	docbParserCtxtPtr ctxt;
				6003	docbParserInputPtr inputStream;
				6004	xmlParserInputBufferPtr buf;
				6005	/* sgmlCharEncoding enc; */
				6006
				6007	buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
				6008	if (buf == NULL) return(NULL);
				6009
				6010	ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt));
				6011	if (ctxt == NULL) {
				6012	perror("malloc");
				6013	return(NULL);
				6014	}
				6015	memset(ctxt, 0, sizeof(docbParserCtxt));
				6016	docbInitParserCtxt(ctxt);
				6017	inputStream = (docbParserInputPtr) xmlMalloc(sizeof(docbParserInput));
				6018	if (inputStream == NULL) {
				6019	perror("malloc");
				6020	xmlFree(ctxt);
				6021	return(NULL);
				6022	}
				6023	memset(inputStream, 0, sizeof(docbParserInput));
				6024
				6025	inputStream->filename = xmlMemStrdup(filename);
				6026	inputStream->line = 1;
				6027	inputStream->col = 1;
				6028	inputStream->buf = buf;
				6029	inputStream->directory = NULL;
				6030
				6031	inputStream->base = inputStream->buf->buffer->content;
				6032	inputStream->cur = inputStream->buf->buffer->content;
				6033	inputStream->free = NULL;
				6034
				6035	inputPush(ctxt, inputStream);
				6036	return(ctxt);
				6037	}
				6038
				6039	/**
				6040	* docbSAXParseFile :
				6041	* @filename: the filename
				6042	* @encoding: a free form C string describing the SGML document encoding, or NULL
				6043	* @sax: the SAX handler block
				6044	* @userData: if using SAX, this pointer will be provided on callbacks.
				6045	*
				6046	* parse an SGML file and build a tree. Automatic support for ZLIB/Compress
				6047	* compressed document is provided by default if found at compile-time.
				6048	* It use the given SAX function block to handle the parsing callback.
				6049	* If sax is NULL, fallback to the default DOM tree building routines.
				6050	*
				6051	* Returns the resulting document tree
				6052	*/
				6053
				6054	docbDocPtr
				6055	docbSAXParseFile(const char filename, const char encoding, docbSAXHandlerPtr sax,
				6056	void *userData) {
				6057	docbDocPtr ret;
				6058	docbParserCtxtPtr ctxt;
				6059	docbSAXHandlerPtr oldsax = NULL;
				6060
				6061	ctxt = docbCreateFileParserCtxt(filename, encoding);
				6062	if (ctxt == NULL) return(NULL);
				6063	if (sax != NULL) {
				6064	oldsax = ctxt->sax;
				6065	ctxt->sax = sax;
				6066	ctxt->userData = userData;
				6067	}
				6068
				6069	docbParseDocument(ctxt);
				6070
				6071	ret = ctxt->myDoc;
				6072	if (sax != NULL) {
				6073	ctxt->sax = oldsax;
				6074	ctxt->userData = NULL;
				6075	}
				6076	docbFreeParserCtxt(ctxt);
				6077
				6078	return(ret);
				6079	}
				6080
				6081	/**
				6082	* docbParseFile :
				6083	* @filename: the filename
				6084	* @encoding: a free form C string describing document encoding, or NULL
				6085	*
				6086	* parse a Docbook SGML file and build a tree. Automatic support for
				6087	* ZLIB/Compress compressed document is provided by default if found
				6088	* at compile-time.
				6089	*
				6090	* Returns the resulting document tree
				6091	*/
				6092
				6093	docbDocPtr
				6094	docbParseFile(const char filename, const char encoding) {
				6095	return(docbSAXParseFile(filename, encoding, NULL, NULL));
				6096	}
				6097
				6098	#endif /* LIBXML_DOCB_ENABLED */