| /* |
| * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| package com.sun.xml.internal.dtdparser; |
| |
| import org.xml.sax.EntityResolver; |
| import org.xml.sax.InputSource; |
| import org.xml.sax.Locator; |
| import org.xml.sax.SAXException; |
| import org.xml.sax.SAXParseException; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Enumeration; |
| import java.util.Hashtable; |
| import java.util.Locale; |
| import java.util.Set; |
| import java.util.Vector; |
| import java.util.logging.Level; |
| import java.util.logging.Logger; |
| |
| /** |
| * This implements parsing of XML 1.0 DTDs. |
| * <p> |
| * This conforms to the portion of the XML 1.0 specification related to the |
| * external DTD subset. |
| * <p> |
| * For multi-language applications (such as web servers using XML processing to |
| * create dynamic content), a method supports choosing a locale for parser |
| * diagnostics which is both understood by the message recipient and supported |
| * by the parser. |
| * <p> |
| * This parser produces a stream of parse events. It supports some features |
| * (exposing comments, CDATA sections, and entity references) which are not |
| * required to be reported by conformant XML processors. |
| * |
| * @author David Brownell |
| * @author Janet Koenig |
| * @author Kohsuke KAWAGUCHI |
| * @version $Id: DTDParser.java,v 1.2 2009-04-16 15:25:49 snajper Exp $ |
| */ |
| public class DTDParser { |
| |
| public final static String TYPE_CDATA = "CDATA"; |
| public final static String TYPE_ID = "ID"; |
| public final static String TYPE_IDREF = "IDREF"; |
| public final static String TYPE_IDREFS = "IDREFS"; |
| public final static String TYPE_ENTITY = "ENTITY"; |
| public final static String TYPE_ENTITIES = "ENTITIES"; |
| public final static String TYPE_NMTOKEN = "NMTOKEN"; |
| public final static String TYPE_NMTOKENS = "NMTOKENS"; |
| public final static String TYPE_NOTATION = "NOTATION"; |
| public final static String TYPE_ENUMERATION = "ENUMERATION"; |
| // stack of input entities being merged |
| private InputEntity in; |
| // temporaries reused during parsing |
| private StringBuffer strTmp; |
| private char nameTmp[]; |
| private NameCache nameCache; |
| private char charTmp[] = new char[2]; |
| // temporary DTD parsing state |
| private boolean doLexicalPE; |
| // DTD state, used during parsing |
| // private SimpleHashtable elements = new SimpleHashtable (47); |
| protected final Set declaredElements = new java.util.HashSet(); |
| private SimpleHashtable params = new SimpleHashtable(7); |
| // exposed to package-private subclass |
| Hashtable notations = new Hashtable(7); |
| SimpleHashtable entities = new SimpleHashtable(17); |
| private SimpleHashtable ids = new SimpleHashtable(); |
| // listeners for DTD parsing events |
| private DTDEventListener dtdHandler; |
| private EntityResolver resolver; |
| private Locale locale; |
| // string constants -- use these copies so "==" works |
| // package private |
| static final String strANY = "ANY"; |
| static final String strEMPTY = "EMPTY"; |
| |
| private static final Logger LOGGER = Logger.getLogger(DTDParser.class.getName()); |
| |
| /** |
| * Used by applications to request locale for diagnostics. |
| * |
| * @param l The locale to use, or null to use system defaults (which may |
| * include only message IDs). |
| */ |
| public void setLocale(Locale l) throws SAXException { |
| |
| if (l != null && !messages.isLocaleSupported(l.toString())) { |
| throw new SAXException(messages.getMessage(locale, |
| "P-078", new Object[]{l})); |
| } |
| locale = l; |
| } |
| |
| /** |
| * Returns the diagnostic locale. |
| */ |
| public Locale getLocale() { |
| return locale; |
| } |
| |
| /** |
| * Chooses a client locale to use for diagnostics, using the first language |
| * specified in the list that is supported by this parser. That locale is |
| * then set using <a href="#setLocale(java.util.Locale)"> setLocale()</a>. |
| * Such a list could be provided by a variety of user preference mechanisms, |
| * including the HTTP <em>Accept-Language</em> header field. |
| * |
| * @param languages Array of language specifiers, ordered with the most |
| * preferable one at the front. For example, "en-ca" then "fr-ca", followed |
| * by "zh_CN". Both RFC 1766 and Java styles are supported. |
| * @return The chosen locale, or null. |
| * @see MessageCatalog |
| */ |
| public Locale chooseLocale(String languages[]) |
| throws SAXException { |
| |
| Locale l = messages.chooseLocale(languages); |
| |
| if (l != null) { |
| setLocale(l); |
| } |
| return l; |
| } |
| |
| /** |
| * Lets applications control entity resolution. |
| */ |
| public void setEntityResolver(EntityResolver r) { |
| |
| resolver = r; |
| } |
| |
| /** |
| * Returns the object used to resolve entities |
| */ |
| public EntityResolver getEntityResolver() { |
| |
| return resolver; |
| } |
| |
| /** |
| * Used by applications to set handling of DTD parsing events. |
| */ |
| public void setDtdHandler(DTDEventListener handler) { |
| dtdHandler = handler; |
| if (handler != null) { |
| handler.setDocumentLocator(new Locator() { |
| @Override |
| public String getPublicId() { |
| return DTDParser.this.getPublicId(); |
| } |
| |
| @Override |
| public String getSystemId() { |
| return DTDParser.this.getSystemId(); |
| } |
| |
| @Override |
| public int getLineNumber() { |
| return DTDParser.this.getLineNumber(); |
| } |
| |
| @Override |
| public int getColumnNumber() { |
| return DTDParser.this.getColumnNumber(); |
| } |
| }); |
| } |
| } |
| |
| /** |
| * Returns the handler used to for DTD parsing events. |
| */ |
| public DTDEventListener getDtdHandler() { |
| return dtdHandler; |
| } |
| |
| /** |
| * Parse a DTD. |
| */ |
| public void parse(InputSource in) |
| throws IOException, SAXException { |
| init(); |
| parseInternal(in); |
| } |
| |
| /** |
| * Parse a DTD. |
| */ |
| public void parse(String uri) |
| throws IOException, SAXException { |
| InputSource inSource; |
| |
| init(); |
| // System.out.println ("parse (\"" + uri + "\")"); |
| inSource = resolver.resolveEntity(null, uri); |
| |
| // If custom resolver punts resolution to parser, handle it ... |
| if (inSource == null) { |
| inSource = Resolver.createInputSource(new java.net.URL(uri), false); |
| |
| // ... or if custom resolver doesn't correctly construct the |
| // input entity, patch it up enough so relative URIs work, and |
| // issue a warning to minimize later confusion. |
| } else if (inSource.getSystemId() == null) { |
| warning("P-065", null); |
| inSource.setSystemId(uri); |
| } |
| |
| parseInternal(inSource); |
| } |
| |
| // makes sure the parser is reset to "before a document" |
| private void init() { |
| in = null; |
| |
| // alloc temporary data used in parsing |
| strTmp = new StringBuffer(); |
| nameTmp = new char[20]; |
| nameCache = new NameCache(); |
| |
| // reset doc info |
| // isInAttribute = false; |
| |
| doLexicalPE = false; |
| |
| entities.clear(); |
| notations.clear(); |
| params.clear(); |
| // elements.clear (); |
| declaredElements.clear(); |
| |
| // initialize predefined references ... re-interpreted later |
| builtin("amp", "&"); |
| builtin("lt", "<"); |
| builtin("gt", ">"); |
| builtin("quot", "\""); |
| builtin("apos", "'"); |
| |
| if (locale == null) { |
| locale = Locale.getDefault(); |
| } |
| if (resolver == null) { |
| resolver = new Resolver(); |
| } |
| if (dtdHandler == null) { |
| dtdHandler = new DTDHandlerBase(); |
| } |
| } |
| |
| private void builtin(String entityName, String entityValue) { |
| InternalEntity entity; |
| entity = new InternalEntity(entityName, entityValue.toCharArray()); |
| entities.put(entityName, entity); |
| } |
| |
| //////////////////////////////////////////////////////////////// |
| // |
| // parsing is by recursive descent, code roughly |
| // following the BNF rules except tweaked for simple |
| // lookahead. rules are more or less in numeric order, |
| // except where code sharing suggests other structures. |
| // |
| // a classic benefit of recursive descent parsers: it's |
| // relatively easy to get diagnostics that make sense. |
| // |
| //////////////////////////////////////////////////////////////// |
| @SuppressWarnings("CallToThreadDumpStack") |
| private void parseInternal(InputSource input) |
| throws IOException, SAXException { |
| |
| if (input == null) { |
| fatal("P-000"); |
| } |
| |
| try { |
| in = InputEntity.getInputEntity(dtdHandler, locale); |
| in.init(input, null, null, false); |
| |
| dtdHandler.startDTD(in); |
| |
| // [30] extSubset ::= TextDecl? extSubsetDecl |
| // [31] extSubsetDecl ::= ( markupdecl | conditionalSect |
| // | PEReference | S )* |
| // ... same as [79] extPE, which is where the code is |
| |
| ExternalEntity externalSubset = new ExternalEntity(in); |
| externalParameterEntity(externalSubset); |
| |
| if (!in.isEOF()) { |
| fatal("P-001", new Object[]{Integer.toHexString(((int) getc()))}); |
| } |
| afterRoot(); |
| dtdHandler.endDTD(); |
| |
| } catch (EndOfInputException e) { |
| if (!in.isDocument()) { |
| String name = in.getName(); |
| do { // force a relevant URI and line number |
| in = in.pop(); |
| } while (in.isInternal()); |
| fatal("P-002", new Object[]{name}); |
| } else { |
| fatal("P-003", null); |
| } |
| } catch (RuntimeException e) { |
| LOGGER.log(Level.SEVERE, "Internal DTD parser error.", e); |
| throw new SAXParseException(e.getMessage() != null |
| ? e.getMessage() : e.getClass().getName(), |
| getPublicId(), getSystemId(), |
| getLineNumber(), getColumnNumber()); |
| |
| } finally { |
| // recycle temporary data used during parsing |
| strTmp = null; |
| nameTmp = null; |
| nameCache = null; |
| |
| // ditto input sources etc |
| if (in != null) { |
| in.close(); |
| in = null; |
| } |
| |
| // get rid of all DTD info ... some of it would be |
| // useful for editors etc, investigate later. |
| |
| params.clear(); |
| entities.clear(); |
| notations.clear(); |
| declaredElements.clear(); |
| // elements.clear(); |
| ids.clear(); |
| } |
| } |
| |
| void afterRoot() throws SAXException { |
| // Make sure all IDREFs match declared ID attributes. We scan |
| // after the document element is parsed, since XML allows forward |
| // references, and only now can we know if they're all resolved. |
| |
| for (Enumeration e = ids.keys(); |
| e.hasMoreElements();) { |
| String id = (String) e.nextElement(); |
| Boolean value = (Boolean) ids.get(id); |
| if (Boolean.FALSE.equals(value)) { |
| error("V-024", new Object[]{id}); |
| } |
| } |
| } |
| |
| // role is for diagnostics |
| private void whitespace(String roleId) |
| throws IOException, SAXException { |
| |
| // [3] S ::= (#x20 | #x9 | #xd | #xa)+ |
| if (!maybeWhitespace()) { |
| fatal("P-004", new Object[]{messages.getMessage(locale, roleId)}); |
| } |
| } |
| |
| // S? |
| private boolean maybeWhitespace() |
| throws IOException, SAXException { |
| |
| if (!doLexicalPE) { |
| return in.maybeWhitespace(); |
| } |
| |
| // see getc() for the PE logic -- this lets us splice |
| // expansions of PEs in "anywhere". getc() has smarts, |
| // so for external PEs we don't bypass it. |
| |
| // XXX we can marginally speed PE handling, and certainly |
| // be cleaner (hence potentially more correct), by using |
| // the observations that expanded PEs only start and stop |
| // where whitespace is allowed. getc wouldn't need any |
| // "lexical" PE expansion logic, and no other method needs |
| // to handle termination of PEs. (parsing of literals would |
| // still need to pop entities, but not parsing of references |
| // in content.) |
| |
| char c = getc(); |
| boolean saw = false; |
| |
| while (c == ' ' || c == '\t' || c == '\n' || c == '\r') { |
| saw = true; |
| |
| // this gracefully ends things when we stop playing |
| // with internal parameters. caller should have a |
| // grammar rule allowing whitespace at end of entity. |
| if (in.isEOF() && !in.isInternal()) { |
| return saw; |
| } |
| c = getc(); |
| } |
| ungetc(); |
| return saw; |
| } |
| |
| private String maybeGetName() |
| throws IOException, SAXException { |
| |
| NameCacheEntry entry = maybeGetNameCacheEntry(); |
| return (entry == null) ? null : entry.name; |
| } |
| |
| private NameCacheEntry maybeGetNameCacheEntry() |
| throws IOException, SAXException { |
| |
| // [5] Name ::= (Letter|'_'|':') (Namechar)* |
| char c = getc(); |
| |
| if (!XmlChars.isLetter(c) && c != ':' && c != '_') { |
| ungetc(); |
| return null; |
| } |
| return nameCharString(c); |
| } |
| |
| // Used when parsing enumerations |
| private String getNmtoken() |
| throws IOException, SAXException { |
| |
| // [7] Nmtoken ::= (Namechar)+ |
| char c = getc(); |
| if (!XmlChars.isNameChar(c)) { |
| fatal("P-006", new Object[]{Character.valueOf(c)}); |
| } |
| return nameCharString(c).name; |
| } |
| |
| // n.b. this gets used when parsing attribute values (for |
| // internal references) so we can't use strTmp; it's also |
| // a hotspot for CPU and memory in the parser (called at least |
| // once for each element) so this has been optimized a bit. |
| private NameCacheEntry nameCharString(char c) |
| throws IOException, SAXException { |
| |
| int i = 1; |
| |
| nameTmp[0] = c; |
| for (;;) { |
| if ((c = in.getNameChar()) == 0) { |
| break; |
| } |
| if (i >= nameTmp.length) { |
| char tmp[] = new char[nameTmp.length + 10]; |
| System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length); |
| nameTmp = tmp; |
| } |
| nameTmp[i++] = c; |
| } |
| return nameCache.lookupEntry(nameTmp, i); |
| } |
| |
| // |
| // much similarity between parsing entity values in DTD |
| // and attribute values (in DTD or content) ... both follow |
| // literal parsing rules, newline canonicalization, etc |
| // |
| // leaves value in 'strTmp' ... either a "replacement text" (4.5), |
| // or else partially normalized attribute value (the first bit |
| // of 3.3.3's spec, without the "if not CDATA" bits). |
| // |
| @SuppressWarnings("UnusedAssignment") |
| private void parseLiteral(boolean isEntityValue) |
| throws IOException, SAXException { |
| |
| // [9] EntityValue ::= |
| // '"' ([^"&%] | Reference | PEReference)* '"' |
| // | "'" ([^'&%] | Reference | PEReference)* "'" |
| // [10] AttValue ::= |
| // '"' ([^"&] | Reference )* '"' |
| // | "'" ([^'&] | Reference )* "'" |
| char quote = getc(); |
| char c; |
| InputEntity source = in; |
| |
| if (quote != '\'' && quote != '"') { |
| fatal("P-007"); |
| } |
| |
| // don't report entity expansions within attributes, |
| // they're reported "fully expanded" via SAX |
| // isInAttribute = !isEntityValue; |
| |
| // get value into strTmp |
| strTmp = new StringBuffer(); |
| |
| // scan, allowing entity push/pop wherever ... |
| // expanded entities can't terminate the literal! |
| for (;;) { |
| if (in != source && in.isEOF()) { |
| // we don't report end of parsed entities |
| // within attributes (no SAX hooks) |
| in = in.pop(); |
| continue; |
| } |
| if ((c = getc()) == quote && in == source) { |
| break; |
| } |
| |
| // |
| // Basically the "reference in attribute value" |
| // row of the chart in section 4.4 of the spec |
| // |
| if (c == '&') { |
| String entityName = maybeGetName(); |
| |
| if (entityName != null) { |
| nextChar(';', "F-020", entityName); |
| |
| // 4.4 says: bypass these here ... we'll catch |
| // forbidden refs to unparsed entities on use |
| if (isEntityValue) { |
| strTmp.append('&'); |
| strTmp.append(entityName); |
| strTmp.append(';'); |
| continue; |
| } |
| expandEntityInLiteral(entityName, entities, isEntityValue); |
| |
| // character references are always included immediately |
| } else if ((getc()) == '#') { |
| int tmp = parseCharNumber(); |
| |
| if (tmp > 0xffff) { |
| tmp = surrogatesToCharTmp(tmp); |
| strTmp.append(charTmp[0]); |
| if (tmp == 2) { |
| strTmp.append(charTmp[1]); |
| } |
| } else { |
| strTmp.append((char) tmp); |
| } |
| } else { |
| fatal("P-009"); |
| } |
| continue; |
| |
| } |
| |
| // expand parameter entities only within entity value literals |
| if (c == '%' && isEntityValue) { |
| String entityName = maybeGetName(); |
| |
| if (entityName != null) { |
| nextChar(';', "F-021", entityName); |
| expandEntityInLiteral(entityName, params, isEntityValue); |
| continue; |
| } else { |
| fatal("P-011"); |
| } |
| } |
| |
| // For attribute values ... |
| if (!isEntityValue) { |
| // 3.3.3 says whitespace normalizes to space... |
| if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { |
| strTmp.append(' '); |
| continue; |
| } |
| |
| // "<" not legal in parsed literals ... |
| if (c == '<') { |
| fatal("P-012"); |
| } |
| } |
| |
| strTmp.append(c); |
| } |
| // isInAttribute = false; |
| } |
| |
| // does a SINGLE expansion of the entity (often reparsed later) |
| private void expandEntityInLiteral(String name, SimpleHashtable table, |
| boolean isEntityValue) |
| throws IOException, SAXException { |
| |
| Object entity = table.get(name); |
| |
| if (entity instanceof InternalEntity) { |
| InternalEntity value = (InternalEntity) entity; |
| pushReader(value.buf, name, !value.isPE); |
| |
| } else if (entity instanceof ExternalEntity) { |
| if (!isEntityValue) // must be a PE ... |
| { |
| fatal("P-013", new Object[]{name}); |
| } |
| // XXX if this returns false ... |
| pushReader((ExternalEntity) entity); |
| |
| } else if (entity == null) { |
| // |
| // Note: much confusion about whether spec requires such |
| // errors to be fatal in many cases, but none about whether |
| // it allows "normal" errors to be unrecoverable! |
| // |
| fatal((table == params) ? "V-022" : "P-014", |
| new Object[]{name}); |
| } |
| } |
| |
| // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") |
| // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>' |
| // NOTE: XML spec should explicitly say that PE ref syntax is |
| // ignored in PIs, comments, SystemLiterals, and Pubid Literal |
| // values ... can't process the XML spec's own DTD without doing |
| // that for comments. |
| private String getQuotedString(String type, String extra) |
| throws IOException, SAXException { |
| |
| // use in.getc to bypass PE processing |
| char quote = in.getc(); |
| |
| if (quote != '\'' && quote != '"') { |
| fatal("P-015", new Object[]{ |
| messages.getMessage(locale, type, new Object[]{extra}) |
| }); |
| } |
| |
| char c; |
| |
| strTmp = new StringBuffer(); |
| while ((c = in.getc()) != quote) { |
| strTmp.append((char) c); |
| } |
| return strTmp.toString(); |
| } |
| |
| private String parsePublicId() throws IOException, SAXException { |
| |
| // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'") |
| // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%] |
| String retval = getQuotedString("F-033", null); |
| for (int i = 0; i < retval.length(); i++) { |
| char c = retval.charAt(i); |
| if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1 |
| && !(c >= 'A' && c <= 'Z') |
| && !(c >= 'a' && c <= 'z')) { |
| fatal("P-016", new Object[]{Character.valueOf(c)}); |
| } |
| } |
| strTmp = new StringBuffer(); |
| strTmp.append(retval); |
| return normalize(false); |
| } |
| |
| // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) |
| // handled by: InputEntity.parsedContent() |
| private boolean maybeComment(boolean skipStart) |
| throws IOException, SAXException { |
| |
| // [15] Comment ::= '<!--' |
| // ( (Char - '-') | ('-' (Char - '-'))* |
| // '-->' |
| if (!in.peek(skipStart ? "!--" : "<!--", null)) { |
| return false; |
| } |
| |
| boolean savedLexicalPE = doLexicalPE; |
| boolean saveCommentText; |
| |
| doLexicalPE = false; |
| saveCommentText = false; |
| if (saveCommentText) { |
| strTmp = new StringBuffer(); |
| } |
| |
| oneComment: |
| for (;;) { |
| try { |
| // bypass PE expansion, but permit PEs |
| // to complete ... valid docs won't care. |
| for (;;) { |
| int c = getc(); |
| if (c == '-') { |
| c = getc(); |
| if (c != '-') { |
| if (saveCommentText) { |
| strTmp.append('-'); |
| } |
| ungetc(); |
| continue; |
| } |
| nextChar('>', "F-022", null); |
| break oneComment; |
| } |
| if (saveCommentText) { |
| strTmp.append((char) c); |
| } |
| } |
| } catch (EndOfInputException e) { |
| // |
| // This is fatal EXCEPT when we're processing a PE... |
| // in which case a validating processor reports an error. |
| // External PEs are easy to detect; internal ones we |
| // infer by being an internal entity outside an element. |
| // |
| if (in.isInternal()) { |
| error("V-021", null); |
| } |
| fatal("P-017"); |
| } |
| } |
| doLexicalPE = savedLexicalPE; |
| if (saveCommentText) { |
| dtdHandler.comment(strTmp.toString()); |
| } |
| return true; |
| } |
| |
| private boolean maybePI(boolean skipStart) |
| throws IOException, SAXException { |
| |
| // [16] PI ::= '<?' PITarget |
| // (S (Char* - (Char* '?>' Char*)))? |
| // '?>' |
| // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l') |
| boolean savedLexicalPE = doLexicalPE; |
| |
| if (!in.peek(skipStart ? "?" : "<?", null)) { |
| return false; |
| } |
| doLexicalPE = false; |
| |
| String target = maybeGetName(); |
| |
| if (target == null) { |
| fatal("P-018"); |
| } |
| if ("xml".equals(target)) { |
| fatal("P-019"); |
| } |
| if ("xml".equalsIgnoreCase(target)) { |
| fatal("P-020", new Object[]{target}); |
| } |
| |
| if (maybeWhitespace()) { |
| strTmp = new StringBuffer(); |
| try { |
| for (;;) { |
| // use in.getc to bypass PE processing |
| char c = in.getc(); |
| //Reached the end of PI. |
| if (c == '?' && in.peekc('>')) { |
| break; |
| } |
| strTmp.append(c); |
| } |
| } catch (EndOfInputException e) { |
| fatal("P-021"); |
| } |
| dtdHandler.processingInstruction(target, strTmp.toString()); |
| } else { |
| if (!in.peek("?>", null)) { |
| fatal("P-022"); |
| } |
| dtdHandler.processingInstruction(target, ""); |
| } |
| |
| doLexicalPE = savedLexicalPE; |
| return true; |
| } |
| |
| // [18] CDSect ::= CDStart CData CDEnd |
| // [19] CDStart ::= '<![CDATA[' |
| // [20] CData ::= (Char* - (Char* ']]>' Char*)) |
| // [21] CDEnd ::= ']]>' |
| // |
| // ... handled by InputEntity.unparsedContent() |
| // collapsing several rules together ... |
| // simpler than attribute literals -- no reference parsing! |
| private String maybeReadAttribute(String name, boolean must) |
| throws IOException, SAXException { |
| |
| // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\" |
| // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\" |
| // [32] SDDecl ::= S 'standalone' Eq \'|\" ... \'|\" |
| if (!maybeWhitespace()) { |
| if (!must) { |
| return null; |
| } |
| fatal("P-024", new Object[]{name}); |
| // NOTREACHED |
| } |
| |
| if (!peek(name)) { |
| if (must) { |
| fatal("P-024", new Object[]{name}); |
| } else { |
| // To ensure that the whitespace is there so that when we |
| // check for the next attribute we assure that the |
| // whitespace still exists. |
| ungetc(); |
| return null; |
| } |
| } |
| |
| // [25] Eq ::= S? '=' S? |
| maybeWhitespace(); |
| nextChar('=', "F-023", null); |
| maybeWhitespace(); |
| |
| return getQuotedString("F-035", name); |
| } |
| |
| private void readVersion(boolean must, String versionNum) |
| throws IOException, SAXException { |
| |
| String value = maybeReadAttribute("version", must); |
| |
| // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+ |
| |
| if (must && value == null) { |
| fatal("P-025", new Object[]{versionNum}); |
| } |
| if (value != null) { |
| int length = value.length(); |
| for (int i = 0; i < length; i++) { |
| char c = value.charAt(i); |
| if (!((c >= '0' && c <= '9') |
| || c == '_' || c == '.' |
| || (c >= 'a' && c <= 'z') |
| || (c >= 'A' && c <= 'Z') |
| || c == ':' || c == '-')) { |
| fatal("P-026", new Object[]{value}); |
| } |
| } |
| } |
| if (value != null && !value.equals(versionNum)) { |
| error("P-027", new Object[]{versionNum, value}); |
| } |
| } |
| |
| // common code used by most markup declarations |
| // ... S (Q)Name ... |
| private String getMarkupDeclname(String roleId, boolean qname) |
| throws IOException, SAXException { |
| |
| String name; |
| |
| whitespace(roleId); |
| name = maybeGetName(); |
| if (name == null) { |
| fatal("P-005", new Object[]{messages.getMessage(locale, roleId)}); |
| } |
| return name; |
| } |
| |
| private boolean maybeMarkupDecl() |
| throws IOException, SAXException { |
| |
| // [29] markupdecl ::= elementdecl | Attlistdecl |
| // | EntityDecl | NotationDecl | PI | Comment |
| return maybeElementDecl() |
| || maybeAttlistDecl() |
| || maybeEntityDecl() |
| || maybeNotationDecl() |
| || maybePI(false) |
| || maybeComment(false); |
| } |
| private static final String XmlLang = "xml:lang"; |
| |
| private boolean isXmlLang(String value) { |
| |
| // [33] LanguageId ::= Langcode ('-' Subcode)* |
| // [34] Langcode ::= ISO639Code | IanaCode | UserCode |
| // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z] |
| // [36] IanaCode ::= [iI] '-' SubCode |
| // [37] UserCode ::= [xX] '-' SubCode |
| // [38] SubCode ::= [a-zA-Z]+ |
| |
| // the ISO and IANA codes (and subcodes) are registered, |
| // but that's neither a WF nor a validity constraint. |
| |
| int nextSuffix; |
| char c; |
| |
| if (value.length() < 2) { |
| return false; |
| } |
| c = value.charAt(1); |
| if (c == '-') { // IANA, or user, code |
| c = value.charAt(0); |
| if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X')) { |
| return false; |
| } |
| nextSuffix = 1; |
| } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { |
| // 2 letter ISO code, or error |
| c = value.charAt(0); |
| if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) { |
| return false; |
| } |
| nextSuffix = 2; |
| } else { |
| return false; |
| } |
| |
| // here "suffix" ::= '-' [a-zA-Z]+ suffix* |
| while (nextSuffix < value.length()) { |
| c = value.charAt(nextSuffix); |
| if (c != '-') { |
| break; |
| } |
| while (++nextSuffix < value.length()) { |
| c = value.charAt(nextSuffix); |
| if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) { |
| break; |
| } |
| } |
| } |
| return value.length() == nextSuffix && c != '-'; |
| } |
| |
| // |
| // CHAPTER 3: Logical Structures |
| // |
| /** |
| * To validate, subclassers should at this time make sure that values are of |
| * the declared types:<UL> <LI> ID and IDREF(S) values are Names <LI> |
| * NMTOKEN(S) are Nmtokens <LI> ENUMERATION values match one of the tokens |
| * <LI> NOTATION values match a notation name <LI> ENTITIY(IES) values match |
| * an unparsed external entity </UL> |
| * <p> |
| * <P> Separately, make sure IDREF values match some ID provided in the |
| * document (in the afterRoot method). |
| */ |
| /* void validateAttributeSyntax (Attribute attr, String value) |
| throws DTDParseException { |
| // ID, IDREF(S) ... values are Names |
| if (Attribute.ID == attr.type()) { |
| if (!XmlNames.isName (value)) |
| error ("V-025", new Object [] { value }); |
| |
| Boolean b = (Boolean) ids.getNonInterned (value); |
| if (b == null || b.equals (Boolean.FALSE)) |
| ids.put (value.intern (), Boolean.TRUE); |
| else |
| error ("V-026", new Object [] { value }); |
| |
| } else if (Attribute.IDREF == attr.type()) { |
| if (!XmlNames.isName (value)) |
| error ("V-027", new Object [] { value }); |
| |
| Boolean b = (Boolean) ids.getNonInterned (value); |
| if (b == null) |
| ids.put (value.intern (), Boolean.FALSE); |
| |
| } else if (Attribute.IDREFS == attr.type()) { |
| StringTokenizer tokenizer = new StringTokenizer (value); |
| Boolean b; |
| boolean sawValue = false; |
| |
| while (tokenizer.hasMoreTokens ()) { |
| value = tokenizer.nextToken (); |
| if (!XmlNames.isName (value)) |
| error ("V-027", new Object [] { value }); |
| b = (Boolean) ids.getNonInterned (value); |
| if (b == null) |
| ids.put (value.intern (), Boolean.FALSE); |
| sawValue = true; |
| } |
| if (!sawValue) |
| error ("V-039", null); |
| |
| |
| // NMTOKEN(S) ... values are Nmtoken(s) |
| } else if (Attribute.NMTOKEN == attr.type()) { |
| if (!XmlNames.isNmtoken (value)) |
| error ("V-028", new Object [] { value }); |
| |
| } else if (Attribute.NMTOKENS == attr.type()) { |
| StringTokenizer tokenizer = new StringTokenizer (value); |
| boolean sawValue = false; |
| |
| while (tokenizer.hasMoreTokens ()) { |
| value = tokenizer.nextToken (); |
| if (!XmlNames.isNmtoken (value)) |
| error ("V-028", new Object [] { value }); |
| sawValue = true; |
| } |
| if (!sawValue) |
| error ("V-032", null); |
| |
| // ENUMERATION ... values match one of the tokens |
| } else if (Attribute.ENUMERATION == attr.type()) { |
| for (int i = 0; i < attr.values().length; i++) |
| if (value.equals (attr.values()[i])) |
| return; |
| error ("V-029", new Object [] { value }); |
| |
| // NOTATION values match a notation name |
| } else if (Attribute.NOTATION == attr.type()) { |
| // |
| // XXX XML 1.0 spec should probably list references to |
| // externally defined notations in standalone docs as |
| // validity errors. Ditto externally defined unparsed |
| // entities; neither should show up in attributes, else |
| // one needs to read the external declarations in order |
| // to make sense of the document (exactly what tagging |
| // a doc as "standalone" intends you won't need to do). |
| // |
| for (int i = 0; i < attr.values().length; i++) |
| if (value.equals (attr.values()[i])) |
| return; |
| error ("V-030", new Object [] { value }); |
| |
| // ENTITY(IES) values match an unparsed entity(ies) |
| } else if (Attribute.ENTITY == attr.type()) { |
| // see note above re standalone |
| if (!isUnparsedEntity (value)) |
| error ("V-031", new Object [] { value }); |
| |
| } else if (Attribute.ENTITIES == attr.type()) { |
| StringTokenizer tokenizer = new StringTokenizer (value); |
| boolean sawValue = false; |
| |
| while (tokenizer.hasMoreTokens ()) { |
| value = tokenizer.nextToken (); |
| // see note above re standalone |
| if (!isUnparsedEntity (value)) |
| error ("V-031", new Object [] { value }); |
| sawValue = true; |
| } |
| if (!sawValue) |
| error ("V-040", null); |
| |
| } else if (Attribute.CDATA != attr.type()) |
| throw new InternalError (attr.type()); |
| } |
| */ |
| /* |
| private boolean isUnparsedEntity (String name) |
| { |
| Object e = entities.getNonInterned (name); |
| if (e == null || !(e instanceof ExternalEntity)) |
| return false; |
| return ((ExternalEntity)e).notation != null; |
| } |
| */ |
| private boolean maybeElementDecl() |
| throws IOException, SAXException { |
| |
| // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>' |
| // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children |
| InputEntity start = peekDeclaration("!ELEMENT"); |
| |
| if (start == null) { |
| return false; |
| } |
| |
| // n.b. for content models where inter-element whitespace is |
| // ignorable, we mark that fact here. |
| String name = getMarkupDeclname("F-015", true); |
| // Element element = (Element) elements.get (name); |
| // boolean declEffective = false; |
| |
| /* |
| if (element != null) { |
| if (element.contentModel() != null) { |
| error ("V-012", new Object [] { name }); |
| } // else <!ATTLIST name ...> came first |
| } else { |
| element = new Element(name); |
| elements.put (element.name(), element); |
| declEffective = true; |
| } |
| */ |
| if (declaredElements.contains(name)) { |
| error("V-012", new Object[]{name}); |
| } else { |
| declaredElements.add(name); |
| // declEffective = true; |
| } |
| |
| short modelType; |
| whitespace("F-000"); |
| if (peek(strEMPTY)) { |
| /// // leave element.contentModel as null for this case. |
| dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY); |
| } else if (peek(strANY)) { |
| /// element.setContentModel(new StringModel(StringModelType.ANY)); |
| dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY); |
| } else { |
| modelType = getMixedOrChildren(name); |
| } |
| |
| dtdHandler.endContentModel(name, modelType); |
| |
| maybeWhitespace(); |
| char c = getc(); |
| if (c != '>') { |
| fatal("P-036", new Object[]{name, Character.valueOf(c)}); |
| } |
| if (start != in) { |
| error("V-013", null); |
| } |
| |
| /// dtdHandler.elementDecl(element); |
| |
| return true; |
| } |
| |
| // We're leaving the content model as a regular expression; |
| // it's an efficient natural way to express such things, and |
| // libraries often interpret them. No whitespace in the |
| // model we store, though! |
| /** |
| * returns content model type. |
| */ |
| private short getMixedOrChildren(String elementName/*Element element*/) |
| throws IOException, SAXException { |
| |
| InputEntity start; |
| |
| // [47] children ::= (choice|seq) ('?'|'*'|'+')? |
| strTmp = new StringBuffer(); |
| |
| nextChar('(', "F-028", elementName); |
| start = in; |
| maybeWhitespace(); |
| strTmp.append('('); |
| |
| short modelType; |
| if (peek("#PCDATA")) { |
| strTmp.append("#PCDATA"); |
| dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED); |
| getMixed(elementName, start); |
| } else { |
| dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_CHILDREN); |
| getcps(elementName, start); |
| } |
| |
| return modelType; |
| } |
| |
| // '(' S? already consumed |
| // matching ')' must be in "start" entity if validating |
| private void getcps(/*Element element,*/String elementName, InputEntity start) |
| throws IOException, SAXException { |
| |
| // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')? |
| // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')' |
| // [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')' |
| boolean decided = false; |
| char type = 0; |
| // ContentModel retval, temp, current; |
| |
| // retval = temp = current = null; |
| |
| dtdHandler.startModelGroup(); |
| |
| do { |
| String tag; |
| |
| tag = maybeGetName(); |
| if (tag != null) { |
| strTmp.append(tag); |
| // temp = new ElementModel(tag); |
| // getFrequency((RepeatableContent)temp); |
| ///-> |
| dtdHandler.childElement(tag, getFrequency()); |
| ///<- |
| } else if (peek("(")) { |
| InputEntity next = in; |
| strTmp.append('('); |
| maybeWhitespace(); |
| // temp = getcps(element, next); |
| // getFrequency(temp); |
| ///-> |
| getcps(elementName, next); |
| /// getFrequency(); <- this looks like a bug |
| ///<- |
| } else { |
| fatal((type == 0) ? "P-039" |
| : ((type == ',') ? "P-037" : "P-038"), |
| new Object[]{Character.valueOf(getc())}); |
| } |
| |
| maybeWhitespace(); |
| if (decided) { |
| char c = getc(); |
| |
| // if (current != null) { |
| // current.addChild(temp); |
| // } |
| if (c == type) { |
| strTmp.append(type); |
| maybeWhitespace(); |
| reportConnector(type); |
| continue; |
| } else if (c == '\u0029') { // rparen |
| ungetc(); |
| continue; |
| } else { |
| fatal((type == 0) ? "P-041" : "P-040", |
| new Object[]{ |
| Character.valueOf(c), |
| Character.valueOf(type) |
| }); |
| } |
| } else { |
| type = getc(); |
| switch (type) { |
| case '|': |
| case ',': |
| reportConnector(type); |
| break; |
| default: |
| // retval = temp; |
| ungetc(); |
| continue; |
| } |
| // retval = (ContentModel)current; |
| decided = true; |
| // current.addChild(temp); |
| strTmp.append(type); |
| } |
| maybeWhitespace(); |
| } while (!peek(")")); |
| |
| if (in != start) { |
| error("V-014", new Object[]{elementName}); |
| } |
| strTmp.append(')'); |
| |
| dtdHandler.endModelGroup(getFrequency()); |
| // return retval; |
| } |
| |
| private void reportConnector(char type) throws SAXException { |
| switch (type) { |
| case '|': |
| dtdHandler.connector(DTDEventListener.CHOICE); ///<- |
| return; |
| case ',': |
| dtdHandler.connector(DTDEventListener.SEQUENCE); ///<- |
| return; |
| default: |
| throw new Error(); //assertion failed. |
| } |
| } |
| |
| private short getFrequency() |
| throws IOException, SAXException { |
| |
| final char c = getc(); |
| |
| if (c == '?') { |
| strTmp.append(c); |
| return DTDEventListener.OCCURENCE_ZERO_OR_ONE; |
| // original.setRepeat(Repeat.ZERO_OR_ONE); |
| } else if (c == '+') { |
| strTmp.append(c); |
| return DTDEventListener.OCCURENCE_ONE_OR_MORE; |
| // original.setRepeat(Repeat.ONE_OR_MORE); |
| } else if (c == '*') { |
| strTmp.append(c); |
| return DTDEventListener.OCCURENCE_ZERO_OR_MORE; |
| // original.setRepeat(Repeat.ZERO_OR_MORE); |
| } else { |
| ungetc(); |
| return DTDEventListener.OCCURENCE_ONCE; |
| } |
| } |
| |
| // '(' S? '#PCDATA' already consumed |
| // matching ')' must be in "start" entity if validating |
| private void getMixed(String elementName, /*Element element,*/ InputEntity start) |
| throws IOException, SAXException { |
| |
| // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
| // | '(' S? '#PCDATA' S? ')' |
| maybeWhitespace(); |
| if (peek("\u0029*") || peek("\u0029")) { |
| if (in != start) { |
| error("V-014", new Object[]{elementName}); |
| } |
| strTmp.append(')'); |
| // element.setContentModel(new StringModel(StringModelType.PCDATA)); |
| return; |
| } |
| |
| ArrayList l = new ArrayList(); |
| // l.add(new StringModel(StringModelType.PCDATA)); |
| |
| |
| while (peek("|")) { |
| String name; |
| |
| strTmp.append('|'); |
| maybeWhitespace(); |
| |
| doLexicalPE = true; |
| name = maybeGetName(); |
| if (name == null) { |
| fatal("P-042", new Object[]{elementName, Integer.toHexString(getc())}); |
| } |
| if (l.contains(name)) { |
| error("V-015", new Object[]{name}); |
| } else { |
| l.add(name); |
| dtdHandler.mixedElement(name); |
| } |
| strTmp.append(name); |
| maybeWhitespace(); |
| } |
| |
| if (!peek("\u0029*")) // right paren |
| { |
| fatal("P-043", new Object[]{elementName, Character.valueOf(getc())}); |
| } |
| if (in != start) { |
| error("V-014", new Object[]{elementName}); |
| } |
| strTmp.append(')'); |
| // ChoiceModel cm = new ChoiceModel((Collection)l); |
| // cm.setRepeat(Repeat.ZERO_OR_MORE); |
| // element.setContentModel(cm); |
| } |
| |
| private boolean maybeAttlistDecl() |
| throws IOException, SAXException { |
| |
| // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' |
| InputEntity start = peekDeclaration("!ATTLIST"); |
| |
| if (start == null) { |
| return false; |
| } |
| |
| String elementName = getMarkupDeclname("F-016", true); |
| // Element element = (Element) elements.get (name); |
| |
| // if (element == null) { |
| // // not yet declared -- no problem. |
| // element = new Element(name); |
| // elements.put(name, element); |
| // } |
| |
| while (!peek(">")) { |
| |
| // [53] AttDef ::= S Name S AttType S DefaultDecl |
| // [54] AttType ::= StringType | TokenizedType | EnumeratedType |
| |
| // look for global attribute definitions, don't expand for now... |
| maybeWhitespace(); |
| char c = getc(); |
| if (c == '%') { |
| String entityName = maybeGetName(); |
| if (entityName != null) { |
| nextChar(';', "F-021", entityName); |
| whitespace("F-021"); |
| continue; |
| } else { |
| fatal("P-011"); |
| } |
| } |
| |
| ungetc(); |
| // look for attribute name otherwise |
| String attName = maybeGetName(); |
| if (attName == null) { |
| fatal("P-044", new Object[]{Character.valueOf(getc())}); |
| } |
| whitespace("F-001"); |
| |
| /// Attribute a = new Attribute (name); |
| |
| String typeName; |
| Vector values = null; // notation/enumeration values |
| |
| // Note: use the type constants from Attribute |
| // so that "==" may be used (faster) |
| |
| // [55] StringType ::= 'CDATA' |
| if (peek(TYPE_CDATA)) /// a.setType(Attribute.CDATA); |
| { |
| typeName = TYPE_CDATA; |
| } // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' |
| // | 'ENTITY' | 'ENTITIES' |
| // | 'NMTOKEN' | 'NMTOKENS' |
| // n.b. if "IDREFS" is there, both "ID" and "IDREF" |
| // match peekahead ... so this order matters! |
| else if (peek(TYPE_IDREFS)) { |
| typeName = TYPE_IDREFS; |
| } else if (peek(TYPE_IDREF)) { |
| typeName = TYPE_IDREF; |
| } else if (peek(TYPE_ID)) { |
| typeName = TYPE_ID; |
| // TODO: should implement this error check? |
| /// if (element.id() != null) { |
| /// error ("V-016", new Object [] { element.id() }); |
| /// } else |
| /// element.setId(name); |
| } else if (peek(TYPE_ENTITY)) { |
| typeName = TYPE_ENTITY; |
| } else if (peek(TYPE_ENTITIES)) { |
| typeName = TYPE_ENTITIES; |
| } else if (peek(TYPE_NMTOKENS)) { |
| typeName = TYPE_NMTOKENS; |
| } else if (peek(TYPE_NMTOKEN)) { |
| typeName = TYPE_NMTOKEN; |
| } // [57] EnumeratedType ::= NotationType | Enumeration |
| // [58] NotationType ::= 'NOTATION' S '(' S? Name |
| // (S? '|' S? Name)* S? ')' |
| else if (peek(TYPE_NOTATION)) { |
| typeName = TYPE_NOTATION; |
| whitespace("F-002"); |
| nextChar('(', "F-029", null); |
| maybeWhitespace(); |
| |
| values = new Vector(); |
| do { |
| String name; |
| if ((name = maybeGetName()) == null) { |
| fatal("P-068"); |
| } |
| // permit deferred declarations |
| if (notations.get(name) == null) { |
| notations.put(name, name); |
| } |
| values.addElement(name); |
| maybeWhitespace(); |
| if (peek("|")) { |
| maybeWhitespace(); |
| } |
| } while (!peek(")")); |
| /// a.setValues(new String [v.size ()]); |
| /// for (int i = 0; i < v.size (); i++) |
| /// a.setValue(i, (String)v.elementAt(i)); |
| |
| // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')' |
| } else if (peek("(")) { |
| /// a.setType(Attribute.ENUMERATION); |
| typeName = TYPE_ENUMERATION; |
| |
| maybeWhitespace(); |
| |
| /// Vector v = new Vector (); |
| values = new Vector(); |
| do { |
| String name = getNmtoken(); |
| /// v.addElement (name); |
| values.addElement(name); |
| maybeWhitespace(); |
| if (peek("|")) { |
| maybeWhitespace(); |
| } |
| } while (!peek(")")); |
| /// a.setValues(new String [v.size ()]); |
| /// for (int i = 0; i < v.size (); i++) |
| /// a.setValue(i, (String)v.elementAt(i)); |
| } else { |
| fatal("P-045", |
| new Object[]{attName, Character.valueOf(getc())}); |
| typeName = null; |
| } |
| |
| short attributeUse; |
| String defaultValue = null; |
| |
| // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' |
| // | (('#FIXED' S)? AttValue) |
| whitespace("F-003"); |
| if (peek("#REQUIRED")) { |
| attributeUse = DTDEventListener.USE_REQUIRED; |
| } /// a.setIsRequired(true); |
| else if (peek("#FIXED")) { |
| /// if (a.type() == Attribute.ID) |
| if (typeName == TYPE_ID) { |
| error("V-017", new Object[]{attName}); |
| } |
| /// a.setIsFixed(true); |
| attributeUse = DTDEventListener.USE_FIXED; |
| whitespace("F-004"); |
| parseLiteral(false); |
| /// if (a.type() != Attribute.CDATA) |
| /// a.setDefaultValue(normalize(false)); |
| /// else |
| /// a.setDefaultValue(strTmp.toString()); |
| |
| if (typeName == TYPE_CDATA) { |
| defaultValue = normalize(false); |
| } else { |
| defaultValue = strTmp.toString(); |
| } |
| |
| // TODO: implement this check |
| /// if (a.type() != Attribute.CDATA) |
| /// validateAttributeSyntax (a, a.defaultValue()); |
| } else if (!peek("#IMPLIED")) { |
| attributeUse = DTDEventListener.USE_IMPLIED; |
| |
| /// if (a.type() == Attribute.ID) |
| if (typeName == TYPE_ID) { |
| error("V-018", new Object[]{attName}); |
| } |
| parseLiteral(false); |
| /// if (a.type() != Attribute.CDATA) |
| /// a.setDefaultValue(normalize(false)); |
| /// else |
| /// a.setDefaultValue(strTmp.toString()); |
| if (typeName == TYPE_CDATA) { |
| defaultValue = normalize(false); |
| } else { |
| defaultValue = strTmp.toString(); |
| } |
| |
| // TODO: implement this check |
| /// if (a.type() != Attribute.CDATA) |
| /// validateAttributeSyntax (a, a.defaultValue()); |
| } else { |
| // TODO: this looks like an fatal error. |
| attributeUse = DTDEventListener.USE_NORMAL; |
| } |
| |
| if (XmlLang.equals(attName) |
| && defaultValue/* a.defaultValue()*/ != null |
| && !isXmlLang(defaultValue/*a.defaultValue()*/)) { |
| error("P-033", new Object[]{defaultValue /*a.defaultValue()*/}); |
| } |
| |
| // TODO: isn't it an error to specify the same attribute twice? |
| /// if (!element.attributes().contains(a)) { |
| /// element.addAttribute(a); |
| /// dtdHandler.attributeDecl(a); |
| /// } |
| |
| String[] v = (values != null) ? (String[]) values.toArray(new String[values.size()]) : null; |
| dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue); |
| maybeWhitespace(); |
| } |
| if (start != in) { |
| error("V-013", null); |
| } |
| return true; |
| } |
| |
| // used when parsing literal attribute values, |
| // or public identifiers. |
| // |
| // input in strTmp |
| private String normalize(boolean invalidIfNeeded) { |
| |
| // this can allocate an extra string... |
| |
| String s = strTmp.toString(); |
| String s2 = s.trim(); |
| boolean didStrip = false; |
| |
| if (s != s2) { |
| s = s2; |
| didStrip = true; |
| } |
| strTmp = new StringBuffer(); |
| for (int i = 0; i < s.length(); i++) { |
| char c = s.charAt(i); |
| if (!XmlChars.isSpace(c)) { |
| strTmp.append(c); |
| continue; |
| } |
| strTmp.append(' '); |
| while (++i < s.length() && XmlChars.isSpace(s.charAt(i))) { |
| didStrip = true; |
| } |
| i--; |
| } |
| if (didStrip) { |
| return strTmp.toString(); |
| } else { |
| return s; |
| } |
| } |
| |
| private boolean maybeConditionalSect() |
| throws IOException, SAXException { |
| |
| // [61] conditionalSect ::= includeSect | ignoreSect |
| |
| if (!peek("<![")) { |
| return false; |
| } |
| |
| String keyword; |
| InputEntity start = in; |
| |
| maybeWhitespace(); |
| |
| if ((keyword = maybeGetName()) == null) { |
| fatal("P-046"); |
| } |
| maybeWhitespace(); |
| nextChar('[', "F-030", null); |
| |
| // [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' |
| // extSubsetDecl ']]>' |
| if ("INCLUDE".equals(keyword)) { |
| for (;;) { |
| while (in.isEOF() && in != start) { |
| in = in.pop(); |
| } |
| if (in.isEOF()) { |
| error("V-020", null); |
| } |
| if (peek("]]>")) { |
| break; |
| } |
| |
| doLexicalPE = false; |
| if (maybeWhitespace()) { |
| continue; |
| } |
| if (maybePEReference()) { |
| continue; |
| } |
| doLexicalPE = true; |
| if (maybeMarkupDecl() || maybeConditionalSect()) { |
| continue; |
| } |
| |
| fatal("P-047"); |
| } |
| |
| // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' |
| // ignoreSectcontents ']]>' |
| // [64] ignoreSectcontents ::= Ignore ('<![' |
| // ignoreSectcontents ']]>' Ignore)* |
| // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) |
| } else if ("IGNORE".equals(keyword)) { |
| int nestlevel = 1; |
| // ignoreSectcontents |
| doLexicalPE = false; |
| while (nestlevel > 0) { |
| char c = getc(); // will pop input entities |
| if (c == '<') { |
| if (peek("![")) { |
| nestlevel++; |
| } |
| } else if (c == ']') { |
| if (peek("]>")) { |
| nestlevel--; |
| } |
| } else { |
| continue; |
| } |
| } |
| } else { |
| fatal("P-048", new Object[]{keyword}); |
| } |
| return true; |
| } |
| |
| // |
| // CHAPTER 4: Physical Structures |
| // |
| // parse decimal or hex numeric character reference |
| private int parseCharNumber() |
| throws IOException, SAXException { |
| |
| char c; |
| int retval = 0; |
| |
| // n.b. we ignore overflow ... |
| if (getc() != 'x') { |
| ungetc(); |
| for (;;) { |
| c = getc(); |
| if (c >= '0' && c <= '9') { |
| retval *= 10; |
| retval += (c - '0'); |
| continue; |
| } |
| if (c == ';') { |
| return retval; |
| } |
| fatal("P-049"); |
| } |
| } else { |
| for (;;) { |
| c = getc(); |
| if (c >= '0' && c <= '9') { |
| retval <<= 4; |
| retval += (c - '0'); |
| continue; |
| } |
| if (c >= 'a' && c <= 'f') { |
| retval <<= 4; |
| retval += 10 + (c - 'a'); |
| continue; |
| } |
| if (c >= 'A' && c <= 'F') { |
| retval <<= 4; |
| retval += 10 + (c - 'A'); |
| continue; |
| } |
| if (c == ';') { |
| return retval; |
| } |
| fatal("P-050"); |
| } |
| } |
| } |
| |
| // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE, |
| // though still subject to the 'Char' construct in XML |
| private int surrogatesToCharTmp(int ucs4) |
| throws SAXException { |
| |
| if (ucs4 <= 0xffff) { |
| if (XmlChars.isChar(ucs4)) { |
| charTmp[0] = (char) ucs4; |
| return 1; |
| } |
| } else if (ucs4 <= 0x0010ffff) { |
| // we represent these as UNICODE surrogate pairs |
| ucs4 -= 0x10000; |
| charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff)); |
| charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff)); |
| return 2; |
| } |
| fatal("P-051", new Object[]{Integer.toHexString(ucs4)}); |
| // NOTREACHED |
| return -1; |
| } |
| |
| private boolean maybePEReference() |
| throws IOException, SAXException { |
| |
| // This is the SYNTACTIC version of this construct. |
| // When processing external entities, there is also |
| // a LEXICAL version; see getc() and doLexicalPE. |
| |
| // [69] PEReference ::= '%' Name ';' |
| if (!in.peekc('%')) { |
| return false; |
| } |
| |
| String name = maybeGetName(); |
| Object entity; |
| |
| if (name == null) { |
| fatal("P-011"); |
| } |
| nextChar(';', "F-021", name); |
| entity = params.get(name); |
| |
| if (entity instanceof InternalEntity) { |
| InternalEntity value = (InternalEntity) entity; |
| pushReader(value.buf, name, false); |
| |
| } else if (entity instanceof ExternalEntity) { |
| pushReader((ExternalEntity) entity); |
| externalParameterEntity((ExternalEntity) entity); |
| |
| } else if (entity == null) { |
| error("V-022", new Object[]{name}); |
| } |
| return true; |
| } |
| |
| private boolean maybeEntityDecl() |
| throws IOException, SAXException { |
| |
| // [70] EntityDecl ::= GEDecl | PEDecl |
| // [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' |
| // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF S? '>' |
| // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) |
| // [74] PEDef ::= EntityValue | ExternalID |
| // |
| InputEntity start = peekDeclaration("!ENTITY"); |
| |
| if (start == null) { |
| return false; |
| } |
| |
| String entityName; |
| SimpleHashtable defns; |
| ExternalEntity externalId; |
| boolean doStore; |
| |
| // PE expansion gets selectively turned off several places: |
| // in ENTITY declarations (here), in comments, in PIs. |
| |
| // Here, we allow PE entities to be declared, and allows |
| // literals to include PE refs without the added spaces |
| // required with their expansion in markup decls. |
| |
| doLexicalPE = false; |
| whitespace("F-005"); |
| if (in.peekc('%')) { |
| whitespace("F-006"); |
| defns = params; |
| } else { |
| defns = entities; |
| } |
| |
| ungetc(); // leave some whitespace |
| doLexicalPE = true; |
| entityName = getMarkupDeclname("F-017", false); |
| whitespace("F-007"); |
| externalId = maybeExternalID(); |
| |
| // |
| // first definition sticks ... e.g. internal subset PEs are used |
| // to override DTD defaults. It's also an "error" to incorrectly |
| // redefine builtin internal entities, but since reporting such |
| // errors is optional we only give warnings ("just in case") for |
| // non-parameter entities. |
| // |
| doStore = (defns.get(entityName) == null); |
| if (!doStore && defns == entities) { |
| warning("P-054", new Object[]{entityName}); |
| } |
| |
| // internal entities |
| if (externalId == null) { |
| char value[]; |
| InternalEntity entity; |
| |
| doLexicalPE = false; // "ab%bar;cd" -maybe-> "abcd" |
| parseLiteral(true); |
| doLexicalPE = true; |
| if (doStore) { |
| value = new char[strTmp.length()]; |
| if (value.length != 0) { |
| strTmp.getChars(0, value.length, value, 0); |
| } |
| entity = new InternalEntity(entityName, value); |
| entity.isPE = (defns == params); |
| defns.put(entityName, entity); |
| if (defns == entities) { |
| dtdHandler.internalGeneralEntityDecl(entityName, |
| new String(value)); |
| } |
| } |
| |
| // external entities (including unparsed) |
| } else { |
| // [76] NDataDecl ::= S 'NDATA' S Name |
| if (defns == entities && maybeWhitespace() |
| && peek("NDATA")) { |
| externalId.notation = getMarkupDeclname("F-018", false); |
| |
| // flag undeclared notation for checking after |
| // the DTD is fully processed |
| if (notations.get(externalId.notation) == null) { |
| notations.put(externalId.notation, Boolean.TRUE); |
| } |
| } |
| externalId.name = entityName; |
| externalId.isPE = (defns == params); |
| if (doStore) { |
| defns.put(entityName, externalId); |
| if (externalId.notation != null) { |
| dtdHandler.unparsedEntityDecl(entityName, |
| externalId.publicId, externalId.systemId, |
| externalId.notation); |
| } else if (defns == entities) { |
| dtdHandler.externalGeneralEntityDecl(entityName, |
| externalId.publicId, externalId.systemId); |
| } |
| } |
| } |
| maybeWhitespace(); |
| nextChar('>', "F-031", entityName); |
| if (start != in) { |
| error("V-013", null); |
| } |
| return true; |
| } |
| |
| private ExternalEntity maybeExternalID() |
| throws IOException, SAXException { |
| |
| // [75] ExternalID ::= 'SYSTEM' S SystemLiteral |
| // | 'PUBLIC' S' PubidLiteral S Systemliteral |
| String temp = null; |
| ExternalEntity retval; |
| |
| if (peek("PUBLIC")) { |
| whitespace("F-009"); |
| temp = parsePublicId(); |
| } else if (!peek("SYSTEM")) { |
| return null; |
| } |
| |
| retval = new ExternalEntity(in); |
| retval.publicId = temp; |
| whitespace("F-008"); |
| retval.systemId = parseSystemId(); |
| return retval; |
| } |
| |
| private String parseSystemId() |
| throws IOException, SAXException { |
| |
| String uri = getQuotedString("F-034", null); |
| int temp = uri.indexOf(':'); |
| |
| // resolve relative URIs ... must do it here since |
| // it's relative to the source file holding the URI! |
| |
| // "new java.net.URL (URL, string)" conforms to RFC 1630, |
| // but we can't use that except when the URI is a URL. |
| // The entity resolver is allowed to handle URIs that are |
| // not URLs, so we pass URIs through with scheme intact |
| if (temp == -1 || uri.indexOf('/') < temp) { |
| String baseURI; |
| |
| baseURI = in.getSystemId(); |
| if (baseURI == null) { |
| fatal("P-055", new Object[]{uri}); |
| } |
| if (uri.length() == 0) { |
| uri = "."; |
| } |
| baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1); |
| if (uri.charAt(0) != '/') { |
| uri = baseURI + uri; |
| } else { |
| // XXX slashes at the beginning of a relative URI are |
| // a special case we don't handle. |
| throw new InternalError(); |
| } |
| |
| // letting other code map any "/xxx/../" or "/./" to "/", |
| // since all URIs must handle it the same. |
| } |
| // check for fragment ID in URI |
| if (uri.indexOf('#') != -1) { |
| error("P-056", new Object[]{uri}); |
| } |
| return uri; |
| } |
| |
| private void maybeTextDecl() |
| throws IOException, SAXException { |
| |
| // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' |
| if (peek("<?xml")) { |
| readVersion(false, "1.0"); |
| readEncoding(true); |
| maybeWhitespace(); |
| if (!peek("?>")) { |
| fatal("P-057"); |
| } |
| } |
| } |
| |
| private void externalParameterEntity(ExternalEntity next) |
| throws IOException, SAXException { |
| |
| // |
| // Reap the intended benefits of standalone declarations: |
| // don't deal with external parameter entities, except to |
| // validate the standalone declaration. |
| // |
| |
| // n.b. "in external parameter entities" (and external |
| // DTD subset, same grammar) parameter references can |
| // occur "within" markup declarations ... expansions can |
| // cross syntax rules. Flagged here; affects getc(). |
| |
| // [79] ExtPE ::= TextDecl? extSubsetDecl |
| // [31] extSubsetDecl ::= ( markupdecl | conditionalSect |
| // | PEReference | S )* |
| InputEntity pe; |
| |
| // XXX if this returns false ... |
| |
| pe = in; |
| maybeTextDecl(); |
| while (!pe.isEOF()) { |
| // pop internal PEs (and whitespace before/after) |
| if (in.isEOF()) { |
| in = in.pop(); |
| continue; |
| } |
| doLexicalPE = false; |
| if (maybeWhitespace()) { |
| continue; |
| } |
| if (maybePEReference()) { |
| continue; |
| } |
| doLexicalPE = true; |
| if (maybeMarkupDecl() || maybeConditionalSect()) { |
| continue; |
| } |
| break; |
| } |
| // if (in != pe) throw new InternalError("who popped my PE?"); |
| if (!pe.isEOF()) { |
| fatal("P-059", new Object[]{in.getName()}); |
| } |
| } |
| |
| private void readEncoding(boolean must) |
| throws IOException, SAXException { |
| |
| // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* |
| String name = maybeReadAttribute("encoding", must); |
| |
| if (name == null) { |
| return; |
| } |
| for (int i = 0; i < name.length(); i++) { |
| char c = name.charAt(i); |
| if ((c >= 'A' && c <= 'Z') |
| || (c >= 'a' && c <= 'z')) { |
| continue; |
| } |
| if (i != 0 |
| && ((c >= '0' && c <= '9') |
| || c == '-' |
| || c == '_' |
| || c == '.')) { |
| continue; |
| } |
| fatal("P-060", new Object[]{Character.valueOf(c)}); |
| } |
| |
| // |
| // This should be the encoding in use, and it's even an error for |
| // it to be anything else (in certain cases that are impractical to |
| // to test, and may even be insufficient). So, we do the best we |
| // can, and warn if things look suspicious. Note that Java doesn't |
| // uniformly expose the encodings, and that the names it uses |
| // internally are nonstandard. Also, that the XML spec allows |
| // such "errors" not to be reported at all. |
| // |
| String currentEncoding = in.getEncoding(); |
| |
| if (currentEncoding != null |
| && !name.equalsIgnoreCase(currentEncoding)) { |
| warning("P-061", new Object[]{name, currentEncoding}); |
| } |
| } |
| |
| private boolean maybeNotationDecl() |
| throws IOException, SAXException { |
| |
| // [82] NotationDecl ::= '<!NOTATION' S Name S |
| // (ExternalID | PublicID) S? '>' |
| // [83] PublicID ::= 'PUBLIC' S PubidLiteral |
| InputEntity start = peekDeclaration("!NOTATION"); |
| |
| if (start == null) { |
| return false; |
| } |
| |
| String name = getMarkupDeclname("F-019", false); |
| ExternalEntity entity = new ExternalEntity(in); |
| |
| whitespace("F-011"); |
| if (peek("PUBLIC")) { |
| whitespace("F-009"); |
| entity.publicId = parsePublicId(); |
| if (maybeWhitespace()) { |
| if (!peek(">")) { |
| entity.systemId = parseSystemId(); |
| } else { |
| ungetc(); |
| } |
| } |
| } else if (peek("SYSTEM")) { |
| whitespace("F-008"); |
| entity.systemId = parseSystemId(); |
| } else { |
| fatal("P-062"); |
| } |
| maybeWhitespace(); |
| nextChar('>', "F-032", name); |
| if (start != in) { |
| error("V-013", null); |
| } |
| if (entity.systemId != null && entity.systemId.indexOf('#') != -1) { |
| error("P-056", new Object[]{entity.systemId}); |
| } |
| |
| Object value = notations.get(name); |
| if (value != null && value instanceof ExternalEntity) { |
| warning("P-063", new Object[]{name}); |
| } else { |
| notations.put(name, entity); |
| dtdHandler.notationDecl(name, entity.publicId, |
| entity.systemId); |
| } |
| return true; |
| } |
| |
| //////////////////////////////////////////////////////////////// |
| // |
| // UTILITIES |
| // |
| //////////////////////////////////////////////////////////////// |
| private char getc() throws IOException, SAXException { |
| |
| if (!doLexicalPE) { |
| char c = in.getc(); |
| return c; |
| } |
| |
| // |
| // External parameter entities get funky processing of '%param;' |
| // references. It's not clearly defined in the XML spec; but it |
| // boils down to having those refs be _lexical_ in most cases to |
| // include partial syntax productions. It also needs selective |
| // enabling; "<!ENTITY % foo ...>" must work, for example, and |
| // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd" |
| // if it's expanded in a literal, else "ab cd". PEs also do |
| // not expand within comments or PIs, and external PEs are only |
| // allowed to have markup decls (and so aren't handled lexically). |
| // |
| // This PE handling should be merged into maybeWhitespace, where |
| // it can be dealt with more consistently. |
| // |
| // Also, there are some validity constraints in this area. |
| // |
| char c; |
| |
| while (in.isEOF()) { |
| if (in.isInternal() || (doLexicalPE && !in.isDocument())) { |
| in = in.pop(); |
| } else { |
| fatal("P-064", new Object[]{in.getName()}); |
| } |
| } |
| if ((c = in.getc()) == '%' && doLexicalPE) { |
| // PE ref ::= '%' name ';' |
| String name = maybeGetName(); |
| Object entity; |
| |
| if (name == null) { |
| fatal("P-011"); |
| } |
| nextChar(';', "F-021", name); |
| entity = params.get(name); |
| |
| // push a magic "entity" before and after the |
| // real one, so ungetc() behaves uniformly |
| pushReader(" ".toCharArray(), null, false); |
| if (entity instanceof InternalEntity) { |
| pushReader(((InternalEntity) entity).buf, name, false); |
| } else if (entity instanceof ExternalEntity) // PEs can't be unparsed! |
| // XXX if this returns false ... |
| { |
| pushReader((ExternalEntity) entity); |
| } else if (entity == null) // see note in maybePEReference re making this be nonfatal. |
| { |
| fatal("V-022"); |
| } else { |
| throw new InternalError(); |
| } |
| pushReader(" ".toCharArray(), null, false); |
| return in.getc(); |
| } |
| return c; |
| } |
| |
| private void ungetc() { |
| |
| in.ungetc(); |
| } |
| |
| private boolean peek(String s) |
| throws IOException, SAXException { |
| |
| return in.peek(s, null); |
| } |
| |
| // Return the entity starting the specified declaration |
| // (for validating declaration nesting) else null. |
| private InputEntity peekDeclaration(String s) |
| throws IOException, SAXException { |
| |
| InputEntity start; |
| |
| if (!in.peekc('<')) { |
| return null; |
| } |
| start = in; |
| if (in.peek(s, null)) { |
| return start; |
| } |
| in.ungetc(); |
| return null; |
| } |
| |
| private void nextChar(char c, String location, String near) |
| throws IOException, SAXException { |
| |
| while (in.isEOF() && !in.isDocument()) { |
| in = in.pop(); |
| } |
| if (!in.peekc(c)) { |
| fatal("P-008", new Object[]{Character.valueOf(c), |
| messages.getMessage(locale, location), |
| (near == null ? "" : ('"' + near + '"'))}); |
| } |
| } |
| |
| private void pushReader(char buf[], String name, boolean isGeneral) |
| throws SAXException { |
| |
| InputEntity r = InputEntity.getInputEntity(dtdHandler, locale); |
| r.init(buf, name, in, !isGeneral); |
| in = r; |
| } |
| |
| private boolean pushReader(ExternalEntity next) |
| throws IOException, SAXException { |
| |
| InputEntity r = InputEntity.getInputEntity(dtdHandler, locale); |
| InputSource s; |
| try { |
| s = next.getInputSource(resolver); |
| } catch (IOException e) { |
| String msg = |
| "unable to open the external entity from :" + next.systemId; |
| if (next.publicId != null) { |
| msg += " (public id:" + next.publicId + ")"; |
| } |
| |
| SAXParseException spe = new SAXParseException(msg, |
| getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e); |
| dtdHandler.fatalError(spe); |
| throw e; |
| } |
| |
| r.init(s, next.name, in, next.isPE); |
| in = r; |
| return true; |
| } |
| |
| public String getPublicId() { |
| |
| return (in == null) ? null : in.getPublicId(); |
| } |
| |
| public String getSystemId() { |
| |
| return (in == null) ? null : in.getSystemId(); |
| } |
| |
| public int getLineNumber() { |
| |
| return (in == null) ? -1 : in.getLineNumber(); |
| } |
| |
| public int getColumnNumber() { |
| |
| return (in == null) ? -1 : in.getColumnNumber(); |
| } |
| |
| // error handling convenience routines |
| private void warning(String messageId, Object parameters[]) |
| throws SAXException { |
| |
| SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), |
| getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); |
| |
| dtdHandler.warning(e); |
| } |
| |
| void error(String messageId, Object parameters[]) |
| throws SAXException { |
| |
| SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), |
| getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); |
| |
| dtdHandler.error(e); |
| } |
| |
| private void fatal(String messageId) throws SAXException { |
| |
| fatal(messageId, null); |
| } |
| |
| private void fatal(String messageId, Object parameters[]) |
| throws SAXException { |
| |
| SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), |
| getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); |
| |
| dtdHandler.fatalError(e); |
| |
| throw e; |
| } |
| |
| // |
| // Map char arrays to strings ... cuts down both on memory and |
| // CPU usage for element/attribute/other names that are reused. |
| // |
| // Documents typically repeat names a lot, so we more or less |
| // intern all the strings within the document; since some strings |
| // are repeated in multiple documents (e.g. stylesheets) we go |
| // a bit further, and intern globally. |
| // |
| static class NameCache { |
| // |
| // Unless we auto-grow this, the default size should be a |
| // reasonable bit larger than needed for most XML files |
| // we've yet seen (and be prime). If it's too small, the |
| // penalty is just excess cache collisions. |
| // |
| |
| NameCacheEntry hashtable[] = new NameCacheEntry[541]; |
| |
| // |
| // Usually we just want to get the 'symbol' for these chars |
| // |
| String lookup(char value[], int len) { |
| |
| return lookupEntry(value, len).name; |
| } |
| |
| // |
| // Sometimes we need to scan the chars in the resulting |
| // string, so there's an accessor which exposes them. |
| // (Mostly for element end tags.) |
| // |
| NameCacheEntry lookupEntry(char value[], int len) { |
| |
| int index = 0; |
| NameCacheEntry entry; |
| |
| // hashing to get index |
| for (int i = 0; i < len; i++) { |
| index = index * 31 + value[i]; |
| } |
| index &= 0x7fffffff; |
| index %= hashtable.length; |
| |
| // return entry if one's there ... |
| for (entry = hashtable[index]; |
| entry != null; |
| entry = entry.next) { |
| if (entry.matches(value, len)) { |
| return entry; |
| } |
| } |
| |
| // else create new one |
| entry = new NameCacheEntry(); |
| entry.chars = new char[len]; |
| System.arraycopy(value, 0, entry.chars, 0, len); |
| entry.name = new String(entry.chars); |
| // |
| // NOTE: JDK 1.1 has a fixed size string intern table, |
| // with non-GC'd entries. It can panic here; that's a |
| // JDK problem, use 1.2 or later with many identifiers. |
| // |
| entry.name = entry.name.intern(); // "global" intern |
| entry.next = hashtable[index]; |
| hashtable[index] = entry; |
| return entry; |
| } |
| } |
| |
| static class NameCacheEntry { |
| |
| String name; |
| char chars[]; |
| NameCacheEntry next; |
| |
| boolean matches(char value[], int len) { |
| if (chars == null || chars.length != len) { |
| return false; |
| } |
| for (int i = 0; i < len; i++) { |
| if (value[i] != chars[i]) { |
| return false; |
| } |
| } |
| return true; |
| } |
| } |
| |
| // |
| // Message catalog for diagnostics. |
| // |
| static final Catalog messages = new Catalog(); |
| |
| static final class Catalog extends MessageCatalog { |
| |
| Catalog() { |
| super(DTDParser.class); |
| } |
| } |
| } |