src/share/jaxws_classes/com/sun/xml/internal/dtdparser/DTDParser.java

Thu, 31 Aug 2017 15:18:52 +0800

author
aoqi
date
Thu, 31 Aug 2017 15:18:52 +0800
changeset 637
9c07ef4934dd
parent 397
b99d7e355d4b
parent 0
373ffda63c9a
permissions
-rw-r--r--

merge

     1 /*
     2  * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.xml.internal.dtdparser;
    28 import org.xml.sax.EntityResolver;
    29 import org.xml.sax.InputSource;
    30 import org.xml.sax.Locator;
    31 import org.xml.sax.SAXException;
    32 import org.xml.sax.SAXParseException;
    34 import java.io.IOException;
    35 import java.util.ArrayList;
    36 import java.util.Enumeration;
    37 import java.util.Hashtable;
    38 import java.util.Locale;
    39 import java.util.Set;
    40 import java.util.Vector;
    42 /**
    43  * This implements parsing of XML 1.0 DTDs.
    44  * <p/>
    45  * This conforms to the portion of the XML 1.0 specification related
    46  * to the external DTD subset.
    47  * <p/>
    48  * For multi-language applications (such as web servers using XML
    49  * processing to create dynamic content), a method supports choosing
    50  * a locale for parser diagnostics which is both understood by the
    51  * message recipient and supported by the parser.
    52  * <p/>
    53  * This parser produces a stream of parse events.  It supports some
    54  * features (exposing comments, CDATA sections, and entity references)
    55  * which are not required to be reported by conformant XML processors.
    56  *
    57  * @author David Brownell
    58  * @author Janet Koenig
    59  * @author Kohsuke KAWAGUCHI
    60  * @version $Id: DTDParser.java,v 1.2 2009/04/16 15:25:49 snajper Exp $
    61  */
    62 public class DTDParser {
    63     public final static String TYPE_CDATA = "CDATA";
    64     public final static String TYPE_ID = "ID";
    65     public final static String TYPE_IDREF = "IDREF";
    66     public final static String TYPE_IDREFS = "IDREFS";
    67     public final static String TYPE_ENTITY = "ENTITY";
    68     public final static String TYPE_ENTITIES = "ENTITIES";
    69     public final static String TYPE_NMTOKEN = "NMTOKEN";
    70     public final static String TYPE_NMTOKENS = "NMTOKENS";
    71     public final static String TYPE_NOTATION = "NOTATION";
    72     public final static String TYPE_ENUMERATION = "ENUMERATION";
    75     // stack of input entities being merged
    76     private InputEntity in;
    78     // temporaries reused during parsing
    79     private StringBuffer strTmp;
    80     private char nameTmp [];
    81     private NameCache nameCache;
    82     private char charTmp [] = new char[2];
    84     // temporary DTD parsing state
    85     private boolean doLexicalPE;
    87     // DTD state, used during parsing
    88 //    private SimpleHashtable    elements = new SimpleHashtable (47);
    89     protected final Set declaredElements = new java.util.HashSet();
    90     private SimpleHashtable params = new SimpleHashtable(7);
    92     // exposed to package-private subclass
    93     Hashtable notations = new Hashtable(7);
    94     SimpleHashtable entities = new SimpleHashtable(17);
    96     private SimpleHashtable ids = new SimpleHashtable();
    98     // listeners for DTD parsing events
    99     private DTDEventListener dtdHandler;
   101     private EntityResolver resolver;
   102     private Locale locale;
   104     // string constants -- use these copies so "==" works
   105     // package private
   106     static final String strANY = "ANY";
   107     static final String strEMPTY = "EMPTY";
   109     /**
   110      * Used by applications to request locale for diagnostics.
   111      *
   112      * @param l The locale to use, or null to use system defaults
   113      *          (which may include only message IDs).
   114      */
   115     public void setLocale(Locale l) throws SAXException {
   117         if (l != null && !messages.isLocaleSupported(l.toString())) {
   118             throw new SAXException(messages.getMessage(locale,
   119                     "P-078", new Object[]{l}));
   120         }
   121         locale = l;
   122     }
   124     /**
   125      * Returns the diagnostic locale.
   126      */
   127     public Locale getLocale() {
   128         return locale;
   129     }
   131     /**
   132      * Chooses a client locale to use for diagnostics, using the first
   133      * language specified in the list that is supported by this parser.
   134      * That locale is then set using <a href="#setLocale(java.util.Locale)">
   135      * setLocale()</a>.  Such a list could be provided by a variety of user
   136      * preference mechanisms, including the HTTP <em>Accept-Language</em>
   137      * header field.
   138      *
   139      * @param languages Array of language specifiers, ordered with the most
   140      *                  preferable one at the front.  For example, "en-ca" then "fr-ca",
   141      *                  followed by "zh_CN".  Both RFC 1766 and Java styles are supported.
   142      * @return The chosen locale, or null.
   143      * @see MessageCatalog
   144      */
   145     public Locale chooseLocale(String languages [])
   146             throws SAXException {
   148         Locale l = messages.chooseLocale(languages);
   150         if (l != null) {
   151             setLocale(l);
   152         }
   153         return l;
   154     }
   156     /**
   157      * Lets applications control entity resolution.
   158      */
   159     public void setEntityResolver(EntityResolver r) {
   161         resolver = r;
   162     }
   164     /**
   165      * Returns the object used to resolve entities
   166      */
   167     public EntityResolver getEntityResolver() {
   169         return resolver;
   170     }
   172     /**
   173      * Used by applications to set handling of DTD parsing events.
   174      */
   175     public void setDtdHandler(DTDEventListener handler) {
   176         dtdHandler = handler;
   177         if (handler != null)
   178             handler.setDocumentLocator(new Locator() {
   179                 public String getPublicId() {
   180                     return DTDParser.this.getPublicId();
   181                 }
   183                 public String getSystemId() {
   184                     return DTDParser.this.getSystemId();
   185                 }
   187                 public int getLineNumber() {
   188                     return DTDParser.this.getLineNumber();
   189                 }
   191                 public int getColumnNumber() {
   192                     return DTDParser.this.getColumnNumber();
   193                 }
   194             });
   195     }
   197     /**
   198      * Returns the handler used to for DTD parsing events.
   199      */
   200     public DTDEventListener getDtdHandler() {
   201         return dtdHandler;
   202     }
   204     /**
   205      * Parse a DTD.
   206      */
   207     public void parse(InputSource in)
   208             throws IOException, SAXException {
   209         init();
   210         parseInternal(in);
   211     }
   213     /**
   214      * Parse a DTD.
   215      */
   216     public void parse(String uri)
   217             throws IOException, SAXException {
   218         InputSource in;
   220         init();
   221         // System.out.println ("parse (\"" + uri + "\")");
   222         in = resolver.resolveEntity(null, uri);
   224         // If custom resolver punts resolution to parser, handle it ...
   225         if (in == null) {
   226             in = Resolver.createInputSource(new java.net.URL(uri), false);
   228             // ... or if custom resolver doesn't correctly construct the
   229             // input entity, patch it up enough so relative URIs work, and
   230             // issue a warning to minimize later confusion.
   231         } else if (in.getSystemId() == null) {
   232             warning("P-065", null);
   233             in.setSystemId(uri);
   234         }
   236         parseInternal(in);
   237     }
   239     // makes sure the parser is reset to "before a document"
   240     private void init() {
   241         in = null;
   243         // alloc temporary data used in parsing
   244         strTmp = new StringBuffer();
   245         nameTmp = new char[20];
   246         nameCache = new NameCache();
   248         // reset doc info
   249 //        isInAttribute = false;
   251         doLexicalPE = false;
   253         entities.clear();
   254         notations.clear();
   255         params.clear();
   256         //    elements.clear ();
   257         declaredElements.clear();
   259         // initialize predefined references ... re-interpreted later
   260         builtin("amp", "&#38;");
   261         builtin("lt", "&#60;");
   262         builtin("gt", ">");
   263         builtin("quot", "\"");
   264         builtin("apos", "'");
   266         if (locale == null)
   267             locale = Locale.getDefault();
   268         if (resolver == null)
   269             resolver = new Resolver();
   270         if (dtdHandler == null)
   271             dtdHandler = new DTDHandlerBase();
   272     }
   274     private void builtin(String entityName, String entityValue) {
   275         InternalEntity entity;
   276         entity = new InternalEntity(entityName, entityValue.toCharArray());
   277         entities.put(entityName, entity);
   278     }
   281     ////////////////////////////////////////////////////////////////
   282     //
   283     // parsing is by recursive descent, code roughly
   284     // following the BNF rules except tweaked for simple
   285     // lookahead.  rules are more or less in numeric order,
   286     // except where code sharing suggests other structures.
   287     //
   288     // a classic benefit of recursive descent parsers:  it's
   289     // relatively easy to get diagnostics that make sense.
   290     //
   291     ////////////////////////////////////////////////////////////////
   294     private void parseInternal(InputSource input)
   295             throws IOException, SAXException {
   297         if (input == null)
   298             fatal("P-000");
   300         try {
   301             in = InputEntity.getInputEntity(dtdHandler, locale);
   302             in.init(input, null, null, false);
   304             dtdHandler.startDTD(in);
   306             // [30] extSubset ::= TextDecl? extSubsetDecl
   307             // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
   308             //        | PEReference | S )*
   309             //    ... same as [79] extPE, which is where the code is
   311             ExternalEntity externalSubset = new ExternalEntity(in);
   312             externalParameterEntity(externalSubset);
   314             if (!in.isEOF()) {
   315                 fatal("P-001", new Object[]
   316                 {Integer.toHexString(((int) getc()))});
   317             }
   318             afterRoot();
   319             dtdHandler.endDTD();
   321         } catch (EndOfInputException e) {
   322             if (!in.isDocument()) {
   323                 String name = in.getName();
   324                 do {    // force a relevant URI and line number
   325                     in = in.pop();
   326                 } while (in.isInternal());
   327                 fatal("P-002", new Object[]{name});
   328             } else {
   329                 fatal("P-003", null);
   330             }
   331         } catch (RuntimeException e) {
   332             // Don't discard location that triggered the exception
   333             // ## Should properly wrap exception
   334             System.err.print("Internal DTD parser error: "); // ##
   335             e.printStackTrace();
   336             throw new SAXParseException(e.getMessage() != null
   337                     ? e.getMessage() : e.getClass().getName(),
   338                     getPublicId(), getSystemId(),
   339                     getLineNumber(), getColumnNumber());
   341         } finally {
   342             // recycle temporary data used during parsing
   343             strTmp = null;
   344             nameTmp = null;
   345             nameCache = null;
   347             // ditto input sources etc
   348             if (in != null) {
   349                 in.close();
   350                 in = null;
   351             }
   353             // get rid of all DTD info ... some of it would be
   354             // useful for editors etc, investigate later.
   356             params.clear();
   357             entities.clear();
   358             notations.clear();
   359             declaredElements.clear();
   360 //        elements.clear();
   361             ids.clear();
   362         }
   363     }
   365     void afterRoot() throws SAXException {
   366         // Make sure all IDREFs match declared ID attributes.  We scan
   367         // after the document element is parsed, since XML allows forward
   368         // references, and only now can we know if they're all resolved.
   370         for (Enumeration e = ids.keys();
   371              e.hasMoreElements();
   372                 ) {
   373             String id = (String) e.nextElement();
   374             Boolean value = (Boolean) ids.get(id);
   375             if (Boolean.FALSE == value)
   376                 error("V-024", new Object[]{id});
   377         }
   378     }
   381     // role is for diagnostics
   382     private void whitespace(String roleId)
   383             throws IOException, SAXException {
   385         // [3] S ::= (#x20 | #x9 | #xd | #xa)+
   386         if (!maybeWhitespace()) {
   387             fatal("P-004", new Object[]
   388             {messages.getMessage(locale, roleId)});
   389         }
   390     }
   392     // S?
   393     private boolean maybeWhitespace()
   394             throws IOException, SAXException {
   396         if (!doLexicalPE)
   397             return in.maybeWhitespace();
   399         // see getc() for the PE logic -- this lets us splice
   400         // expansions of PEs in "anywhere".  getc() has smarts,
   401         // so for external PEs we don't bypass it.
   403         // XXX we can marginally speed PE handling, and certainly
   404         // be cleaner (hence potentially more correct), by using
   405         // the observations that expanded PEs only start and stop
   406         // where whitespace is allowed.  getc wouldn't need any
   407         // "lexical" PE expansion logic, and no other method needs
   408         // to handle termination of PEs.  (parsing of literals would
   409         // still need to pop entities, but not parsing of references
   410         // in content.)
   412         char c = getc();
   413         boolean saw = false;
   415         while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
   416             saw = true;
   418             // this gracefully ends things when we stop playing
   419             // with internal parameters.  caller should have a
   420             // grammar rule allowing whitespace at end of entity.
   421             if (in.isEOF() && !in.isInternal())
   422                 return saw;
   423             c = getc();
   424         }
   425         ungetc();
   426         return saw;
   427     }
   429     private String maybeGetName()
   430             throws IOException, SAXException {
   432         NameCacheEntry entry = maybeGetNameCacheEntry();
   433         return (entry == null) ? null : entry.name;
   434     }
   436     private NameCacheEntry maybeGetNameCacheEntry()
   437             throws IOException, SAXException {
   439         // [5] Name ::= (Letter|'_'|':') (Namechar)*
   440         char c = getc();
   442         if (!XmlChars.isLetter(c) && c != ':' && c != '_') {
   443             ungetc();
   444             return null;
   445         }
   446         return nameCharString(c);
   447     }
   449     // Used when parsing enumerations
   450     private String getNmtoken()
   451             throws IOException, SAXException {
   453         // [7] Nmtoken ::= (Namechar)+
   454         char c = getc();
   455         if (!XmlChars.isNameChar(c))
   456             fatal("P-006", new Object[]{new Character(c)});
   457         return nameCharString(c).name;
   458     }
   460     // n.b. this gets used when parsing attribute values (for
   461     // internal references) so we can't use strTmp; it's also
   462     // a hotspot for CPU and memory in the parser (called at least
   463     // once for each element) so this has been optimized a bit.
   465     private NameCacheEntry nameCharString(char c)
   466             throws IOException, SAXException {
   468         int i = 1;
   470         nameTmp[0] = c;
   471         for (; ;) {
   472             if ((c = in.getNameChar()) == 0)
   473                 break;
   474             if (i >= nameTmp.length) {
   475                 char tmp [] = new char[nameTmp.length + 10];
   476                 System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length);
   477                 nameTmp = tmp;
   478             }
   479             nameTmp[i++] = c;
   480         }
   481         return nameCache.lookupEntry(nameTmp, i);
   482     }
   484     //
   485     // much similarity between parsing entity values in DTD
   486     // and attribute values (in DTD or content) ... both follow
   487     // literal parsing rules, newline canonicalization, etc
   488     //
   489     // leaves value in 'strTmp' ... either a "replacement text" (4.5),
   490     // or else partially normalized attribute value (the first bit
   491     // of 3.3.3's spec, without the "if not CDATA" bits).
   492     //
   493     private void parseLiteral(boolean isEntityValue)
   494             throws IOException, SAXException {
   496         // [9] EntityValue ::=
   497         //    '"' ([^"&%] | Reference | PEReference)* '"'
   498         //    |    "'" ([^'&%] | Reference | PEReference)* "'"
   499         // [10] AttValue ::=
   500         //    '"' ([^"&]  | Reference             )* '"'
   501         //    |    "'" ([^'&]  | Reference             )* "'"
   502         char quote = getc();
   503         char c;
   504         InputEntity source = in;
   506         if (quote != '\'' && quote != '"') {
   507             fatal("P-007");
   508         }
   510         // don't report entity expansions within attributes,
   511         // they're reported "fully expanded" via SAX
   512 //    isInAttribute = !isEntityValue;
   514         // get value into strTmp
   515         strTmp = new StringBuffer();
   517         // scan, allowing entity push/pop wherever ...
   518         // expanded entities can't terminate the literal!
   519         for (; ;) {
   520             if (in != source && in.isEOF()) {
   521                 // we don't report end of parsed entities
   522                 // within attributes (no SAX hooks)
   523                 in = in.pop();
   524                 continue;
   525             }
   526             if ((c = getc()) == quote && in == source) {
   527                 break;
   528             }
   530             //
   531             // Basically the "reference in attribute value"
   532             // row of the chart in section 4.4 of the spec
   533             //
   534             if (c == '&') {
   535                 String entityName = maybeGetName();
   537                 if (entityName != null) {
   538                     nextChar(';', "F-020", entityName);
   540                     // 4.4 says:  bypass these here ... we'll catch
   541                     // forbidden refs to unparsed entities on use
   542                     if (isEntityValue) {
   543                         strTmp.append('&');
   544                         strTmp.append(entityName);
   545                         strTmp.append(';');
   546                         continue;
   547                     }
   548                     expandEntityInLiteral(entityName, entities, isEntityValue);
   551                     // character references are always included immediately
   552                 } else if ((c = getc()) == '#') {
   553                     int tmp = parseCharNumber();
   555                     if (tmp > 0xffff) {
   556                         tmp = surrogatesToCharTmp(tmp);
   557                         strTmp.append(charTmp[0]);
   558                         if (tmp == 2)
   559                             strTmp.append(charTmp[1]);
   560                     } else
   561                         strTmp.append((char) tmp);
   562                 } else
   563                     fatal("P-009");
   564                 continue;
   566             }
   568             // expand parameter entities only within entity value literals
   569             if (c == '%' && isEntityValue) {
   570                 String entityName = maybeGetName();
   572                 if (entityName != null) {
   573                     nextChar(';', "F-021", entityName);
   574                     expandEntityInLiteral(entityName, params, isEntityValue);
   575                     continue;
   576                 } else
   577                     fatal("P-011");
   578             }
   580             // For attribute values ...
   581             if (!isEntityValue) {
   582                 // 3.3.3 says whitespace normalizes to space...
   583                 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
   584                     strTmp.append(' ');
   585                     continue;
   586                 }
   588                 // "<" not legal in parsed literals ...
   589                 if (c == '<')
   590                     fatal("P-012");
   591             }
   593             strTmp.append(c);
   594         }
   595 //    isInAttribute = false;
   596     }
   598     // does a SINGLE expansion of the entity (often reparsed later)
   599     private void expandEntityInLiteral(String name, SimpleHashtable table,
   600                                        boolean isEntityValue)
   601             throws IOException, SAXException {
   603         Object entity = table.get(name);
   605         if (entity instanceof InternalEntity) {
   606             InternalEntity value = (InternalEntity) entity;
   607             pushReader(value.buf, name, !value.isPE);
   609         } else if (entity instanceof ExternalEntity) {
   610             if (!isEntityValue)    // must be a PE ...
   611                 fatal("P-013", new Object[]{name});
   612             // XXX if this returns false ...
   613             pushReader((ExternalEntity) entity);
   615         } else if (entity == null) {
   616             //
   617             // Note:  much confusion about whether spec requires such
   618             // errors to be fatal in many cases, but none about whether
   619             // it allows "normal" errors to be unrecoverable!
   620             //
   621             fatal((table == params) ? "V-022" : "P-014",
   622                     new Object[]{name});
   623         }
   624     }
   626     // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
   627     // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>'
   629     // NOTE:  XML spec should explicitly say that PE ref syntax is
   630     // ignored in PIs, comments, SystemLiterals, and Pubid Literal
   631     // values ... can't process the XML spec's own DTD without doing
   632     // that for comments.
   634     private String getQuotedString(String type, String extra)
   635             throws IOException, SAXException {
   637         // use in.getc to bypass PE processing
   638         char quote = in.getc();
   640         if (quote != '\'' && quote != '"')
   641             fatal("P-015", new Object[]{
   642                 messages.getMessage(locale, type, new Object[]{extra})
   643             });
   645         char c;
   647         strTmp = new StringBuffer();
   648         while ((c = in.getc()) != quote)
   649             strTmp.append((char) c);
   650         return strTmp.toString();
   651     }
   654     private String parsePublicId() throws IOException, SAXException {
   656         // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
   657         // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
   658         String retval = getQuotedString("F-033", null);
   659         for (int i = 0; i < retval.length(); i++) {
   660             char c = retval.charAt(i);
   661             if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
   662                     && !(c >= 'A' && c <= 'Z')
   663                     && !(c >= 'a' && c <= 'z'))
   664                 fatal("P-016", new Object[]{new Character(c)});
   665         }
   666         strTmp = new StringBuffer();
   667         strTmp.append(retval);
   668         return normalize(false);
   669     }
   671     // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
   672     // handled by:  InputEntity.parsedContent()
   674     private boolean maybeComment(boolean skipStart)
   675             throws IOException, SAXException {
   677         // [15] Comment ::= '<!--'
   678         //        ( (Char - '-') | ('-' (Char - '-'))*
   679         //        '-->'
   680         if (!in.peek(skipStart ? "!--" : "<!--", null))
   681             return false;
   683         boolean savedLexicalPE = doLexicalPE;
   684         boolean saveCommentText;
   686         doLexicalPE = false;
   687         saveCommentText = false;
   688         if (saveCommentText)
   689             strTmp = new StringBuffer();
   691         oneComment:
   692         for (; ;) {
   693             try {
   694                 // bypass PE expansion, but permit PEs
   695                 // to complete ... valid docs won't care.
   696                 for (; ;) {
   697                     int c = getc();
   698                     if (c == '-') {
   699                         c = getc();
   700                         if (c != '-') {
   701                             if (saveCommentText)
   702                                 strTmp.append('-');
   703                             ungetc();
   704                             continue;
   705                         }
   706                         nextChar('>', "F-022", null);
   707                         break oneComment;
   708                     }
   709                     if (saveCommentText)
   710                         strTmp.append((char) c);
   711                 }
   712             } catch (EndOfInputException e) {
   713                 //
   714                 // This is fatal EXCEPT when we're processing a PE...
   715                 // in which case a validating processor reports an error.
   716                 // External PEs are easy to detect; internal ones we
   717                 // infer by being an internal entity outside an element.
   718                 //
   719                 if (in.isInternal()) {
   720                     error("V-021", null);
   721                 }
   722                 fatal("P-017");
   723             }
   724         }
   725         doLexicalPE = savedLexicalPE;
   726         if (saveCommentText)
   727             dtdHandler.comment(strTmp.toString());
   728         return true;
   729     }
   731     private boolean maybePI(boolean skipStart)
   732             throws IOException, SAXException {
   734         // [16] PI ::= '<?' PITarget
   735         //        (S (Char* - (Char* '?>' Char*)))?
   736         //        '?>'
   737         // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l')
   738         boolean savedLexicalPE = doLexicalPE;
   740         if (!in.peek(skipStart ? "?" : "<?", null))
   741             return false;
   742         doLexicalPE = false;
   744         String target = maybeGetName();
   746         if (target == null) {
   747             fatal("P-018");
   748         }
   749         if ("xml".equals(target)) {
   750             fatal("P-019");
   751         }
   752         if ("xml".equalsIgnoreCase(target)) {
   753             fatal("P-020", new Object[]{target});
   754         }
   756         if (maybeWhitespace()) {
   757             strTmp = new StringBuffer();
   758             try {
   759                 for (; ;) {
   760                     // use in.getc to bypass PE processing
   761                     char c = in.getc();
   762                     //Reached the end of PI.
   763                     if (c == '?' && in.peekc('>'))
   764                         break;
   765                     strTmp.append(c);
   766                 }
   767             } catch (EndOfInputException e) {
   768                 fatal("P-021");
   769             }
   770             dtdHandler.processingInstruction(target, strTmp.toString());
   771         } else {
   772             if (!in.peek("?>", null)) {
   773                 fatal("P-022");
   774             }
   775             dtdHandler.processingInstruction(target, "");
   776         }
   778         doLexicalPE = savedLexicalPE;
   779         return true;
   780     }
   782     // [18] CDSect ::= CDStart CData CDEnd
   783     // [19] CDStart ::= '<![CDATA['
   784     // [20] CData ::= (Char* - (Char* ']]>' Char*))
   785     // [21] CDEnd ::= ']]>'
   786     //
   787     //    ... handled by InputEntity.unparsedContent()
   789     // collapsing several rules together ...
   790     // simpler than attribute literals -- no reference parsing!
   791     private String maybeReadAttribute(String name, boolean must)
   792             throws IOException, SAXException {
   794         // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\"
   795         // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\"
   796         // [32] SDDecl ::=  S 'standalone' Eq \'|\" ... \'|\"
   797         if (!maybeWhitespace()) {
   798             if (!must) {
   799                 return null;
   800             }
   801             fatal("P-024", new Object[]{name});
   802             // NOTREACHED
   803         }
   805         if (!peek(name)) {
   806             if (must) {
   807                 fatal("P-024", new Object[]{name});
   808             } else {
   809                 // To ensure that the whitespace is there so that when we
   810                 // check for the next attribute we assure that the
   811                 // whitespace still exists.
   812                 ungetc();
   813                 return null;
   814             }
   815         }
   817         // [25] Eq ::= S? '=' S?
   818         maybeWhitespace();
   819         nextChar('=', "F-023", null);
   820         maybeWhitespace();
   822         return getQuotedString("F-035", name);
   823     }
   825     private void readVersion(boolean must, String versionNum)
   826             throws IOException, SAXException {
   828         String value = maybeReadAttribute("version", must);
   830         // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+
   832         if (must && value == null)
   833             fatal("P-025", new Object[]{versionNum});
   834         if (value != null) {
   835             int length = value.length();
   836             for (int i = 0; i < length; i++) {
   837                 char c = value.charAt(i);
   838                 if (!((c >= '0' && c <= '9')
   839                         || c == '_' || c == '.'
   840                         || (c >= 'a' && c <= 'z')
   841                         || (c >= 'A' && c <= 'Z')
   842                         || c == ':' || c == '-')
   843                 )
   844                     fatal("P-026", new Object[]{value});
   845             }
   846         }
   847         if (value != null && !value.equals(versionNum))
   848             error("P-027", new Object[]{versionNum, value});
   849     }
   851     // common code used by most markup declarations
   852     // ... S (Q)Name ...
   853     private String getMarkupDeclname(String roleId, boolean qname)
   854             throws IOException, SAXException {
   856         String name;
   858         whitespace(roleId);
   859         name = maybeGetName();
   860         if (name == null)
   861             fatal("P-005", new Object[]
   862             {messages.getMessage(locale, roleId)});
   863         return name;
   864     }
   866     private boolean maybeMarkupDecl()
   867             throws IOException, SAXException {
   869         // [29] markupdecl ::= elementdecl | Attlistdecl
   870         //           | EntityDecl | NotationDecl | PI | Comment
   871         return maybeElementDecl()
   872                 || maybeAttlistDecl()
   873                 || maybeEntityDecl()
   874                 || maybeNotationDecl()
   875                 || maybePI(false)
   876                 || maybeComment(false);
   877     }
   879     private static final String XmlLang = "xml:lang";
   881     private boolean isXmlLang(String value) {
   883         // [33] LanguageId ::= Langcode ('-' Subcode)*
   884         // [34] Langcode ::= ISO639Code | IanaCode | UserCode
   885         // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z]
   886         // [36] IanaCode ::= [iI] '-' SubCode
   887         // [37] UserCode ::= [xX] '-' SubCode
   888         // [38] SubCode ::= [a-zA-Z]+
   890         // the ISO and IANA codes (and subcodes) are registered,
   891         // but that's neither a WF nor a validity constraint.
   893         int nextSuffix;
   894         char c;
   896         if (value.length() < 2)
   897             return false;
   898         c = value.charAt(1);
   899         if (c == '-') {        // IANA, or user, code
   900             c = value.charAt(0);
   901             if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X'))
   902                 return false;
   903             nextSuffix = 1;
   904         } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
   905             // 2 letter ISO code, or error
   906             c = value.charAt(0);
   907             if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
   908                 return false;
   909             nextSuffix = 2;
   910         } else
   911             return false;
   913         // here "suffix" ::= '-' [a-zA-Z]+ suffix*
   914         while (nextSuffix < value.length()) {
   915             c = value.charAt(nextSuffix);
   916             if (c != '-')
   917                 break;
   918             while (++nextSuffix < value.length()) {
   919                 c = value.charAt(nextSuffix);
   920                 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
   921                     break;
   922             }
   923         }
   924         return value.length() == nextSuffix && c != '-';
   925     }
   928     //
   929     // CHAPTER 3:  Logical Structures
   930     //
   932     /**
   933      * To validate, subclassers should at this time make sure that
   934      * values are of the declared types:<UL>
   935      * <LI> ID and IDREF(S) values are Names
   936      * <LI> NMTOKEN(S) are Nmtokens
   937      * <LI> ENUMERATION values match one of the tokens
   938      * <LI> NOTATION values match a notation name
   939      * <LI> ENTITIY(IES) values match an unparsed external entity
   940      * </UL>
   941      * <p/>
   942      * <P> Separately, make sure IDREF values match some ID
   943      * provided in the document (in the afterRoot method).
   944      */
   945 /*    void validateAttributeSyntax (Attribute attr, String value)
   946          throws DTDParseException {
   947         // ID, IDREF(S) ... values are Names
   948         if (Attribute.ID == attr.type()) {
   949             if (!XmlNames.isName (value))
   950                 error ("V-025", new Object [] { value });
   952             Boolean             b = (Boolean) ids.getNonInterned (value);
   953             if (b == null || b.equals (Boolean.FALSE))
   954                 ids.put (value.intern (), Boolean.TRUE);
   955             else
   956                 error ("V-026", new Object [] { value });
   958         } else if (Attribute.IDREF == attr.type()) {
   959             if (!XmlNames.isName (value))
   960                 error ("V-027", new Object [] { value });
   962             Boolean             b = (Boolean) ids.getNonInterned (value);
   963             if (b == null)
   964                 ids.put (value.intern (), Boolean.FALSE);
   966         } else if (Attribute.IDREFS == attr.type()) {
   967             StringTokenizer     tokenizer = new StringTokenizer (value);
   968             Boolean             b;
   969             boolean             sawValue = false;
   971             while (tokenizer.hasMoreTokens ()) {
   972                 value = tokenizer.nextToken ();
   973                 if (!XmlNames.isName (value))
   974                     error ("V-027", new Object [] { value });
   975                 b = (Boolean) ids.getNonInterned (value);
   976                 if (b == null)
   977                     ids.put (value.intern (), Boolean.FALSE);
   978                 sawValue = true;
   979             }
   980             if (!sawValue)
   981                 error ("V-039", null);
   984         // NMTOKEN(S) ... values are Nmtoken(s)
   985         } else if (Attribute.NMTOKEN == attr.type()) {
   986             if (!XmlNames.isNmtoken (value))
   987                 error ("V-028", new Object [] { value });
   989         } else if (Attribute.NMTOKENS == attr.type()) {
   990             StringTokenizer     tokenizer = new StringTokenizer (value);
   991             boolean             sawValue = false;
   993             while (tokenizer.hasMoreTokens ()) {
   994                 value = tokenizer.nextToken ();
   995                 if (!XmlNames.isNmtoken (value))
   996                     error ("V-028", new Object [] { value });
   997                 sawValue = true;
   998             }
   999             if (!sawValue)
  1000                 error ("V-032", null);
  1002         // ENUMERATION ... values match one of the tokens
  1003         } else if (Attribute.ENUMERATION == attr.type()) {
  1004             for (int i = 0; i < attr.values().length; i++)
  1005                 if (value.equals (attr.values()[i]))
  1006                     return;
  1007             error ("V-029", new Object [] { value });
  1009         // NOTATION values match a notation name
  1010         } else if (Attribute.NOTATION == attr.type()) {
  1011             //
  1012             // XXX XML 1.0 spec should probably list references to
  1013             // externally defined notations in standalone docs as
  1014             // validity errors.  Ditto externally defined unparsed
  1015             // entities; neither should show up in attributes, else
  1016             // one needs to read the external declarations in order
  1017             // to make sense of the document (exactly what tagging
  1018             // a doc as "standalone" intends you won't need to do).
  1019             //
  1020             for (int i = 0; i < attr.values().length; i++)
  1021                 if (value.equals (attr.values()[i]))
  1022                     return;
  1023             error ("V-030", new Object [] { value });
  1025         // ENTITY(IES) values match an unparsed entity(ies)
  1026         } else if (Attribute.ENTITY == attr.type()) {
  1027             // see note above re standalone
  1028             if (!isUnparsedEntity (value))
  1029                 error ("V-031", new Object [] { value });
  1031         } else if (Attribute.ENTITIES == attr.type()) {
  1032             StringTokenizer     tokenizer = new StringTokenizer (value);
  1033             boolean             sawValue = false;
  1035             while (tokenizer.hasMoreTokens ()) {
  1036                 value = tokenizer.nextToken ();
  1037                 // see note above re standalone
  1038                 if (!isUnparsedEntity (value))
  1039                     error ("V-031", new Object [] { value });
  1040                 sawValue = true;
  1042             if (!sawValue)
  1043                 error ("V-040", null);
  1045         } else if (Attribute.CDATA != attr.type())
  1046             throw new InternalError (attr.type());
  1048 */
  1049 /*
  1050     private boolean isUnparsedEntity (String name)
  1052         Object e = entities.getNonInterned (name);
  1053         if (e == null || !(e instanceof ExternalEntity))
  1054             return false;
  1055         return ((ExternalEntity)e).notation != null;
  1057 */
  1058     private boolean maybeElementDecl()
  1059             throws IOException, SAXException {
  1061         // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>'
  1062         // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
  1063         InputEntity start = peekDeclaration("!ELEMENT");
  1065         if (start == null)
  1066             return false;
  1068         // n.b. for content models where inter-element whitespace is
  1069         // ignorable, we mark that fact here.
  1070         String name = getMarkupDeclname("F-015", true);
  1071 //    Element        element = (Element) elements.get (name);
  1072 //    boolean        declEffective = false;
  1074 /*
  1075     if (element != null) {
  1076         if (element.contentModel() != null) {
  1077             error ("V-012", new Object [] { name });
  1078         } // else <!ATTLIST name ...> came first
  1079     } else {
  1080         element = new Element(name);
  1081         elements.put (element.name(), element);
  1082         declEffective = true;
  1084 */
  1085         if (declaredElements.contains(name))
  1086             error("V-012", new Object[]{name});
  1087         else {
  1088             declaredElements.add(name);
  1089 //        declEffective = true;
  1092         short modelType;
  1093         whitespace("F-000");
  1094         if (peek(strEMPTY)) {
  1095 ///        // leave element.contentModel as null for this case.
  1096             dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY);
  1097         } else if (peek(strANY)) {
  1098 ///        element.setContentModel(new StringModel(StringModelType.ANY));
  1099             dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY);
  1100         } else {
  1101             modelType = getMixedOrChildren(name);
  1104         dtdHandler.endContentModel(name, modelType);
  1106         maybeWhitespace();
  1107         char c = getc();
  1108         if (c != '>')
  1109             fatal("P-036", new Object[]{name, new Character(c)});
  1110         if (start != in)
  1111             error("V-013", null);
  1113 ///        dtdHandler.elementDecl(element);
  1115         return true;
  1118     // We're leaving the content model as a regular expression;
  1119     // it's an efficient natural way to express such things, and
  1120     // libraries often interpret them.  No whitespace in the
  1121     // model we store, though!
  1123     /**
  1124      * returns content model type.
  1125      */
  1126     private short getMixedOrChildren(String elementName/*Element element*/)
  1127             throws IOException, SAXException {
  1129         InputEntity start;
  1131         // [47] children ::= (choice|seq) ('?'|'*'|'+')?
  1132         strTmp = new StringBuffer();
  1134         nextChar('(', "F-028", elementName);
  1135         start = in;
  1136         maybeWhitespace();
  1137         strTmp.append('(');
  1139         short modelType;
  1140         if (peek("#PCDATA")) {
  1141             strTmp.append("#PCDATA");
  1142             dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED);
  1143             getMixed(elementName, start);
  1144         } else {
  1145             dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_CHILDREN);
  1146             getcps(elementName, start);
  1149         return modelType;
  1152     // '(' S? already consumed
  1153     // matching ')' must be in "start" entity if validating
  1154     private void getcps(/*Element element,*/String elementName, InputEntity start)
  1155             throws IOException, SAXException {
  1157         // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')?
  1158         // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')'
  1159         // [50] seq    ::= '(' S? cp (S? ',' S? cp)* S? ')'
  1160         boolean decided = false;
  1161         char type = 0;
  1162 //        ContentModel       retval, temp, current;
  1164 //        retval = temp = current = null;
  1166         dtdHandler.startModelGroup();
  1168         do {
  1169             String tag;
  1171             tag = maybeGetName();
  1172             if (tag != null) {
  1173                 strTmp.append(tag);
  1174 //                temp = new ElementModel(tag);
  1175 //                getFrequency((RepeatableContent)temp);
  1176 ///->
  1177                 dtdHandler.childElement(tag, getFrequency());
  1178 ///<-
  1179             } else if (peek("(")) {
  1180                 InputEntity next = in;
  1181                 strTmp.append('(');
  1182                 maybeWhitespace();
  1183 //                temp = getcps(element, next);
  1184 //                getFrequency(temp);
  1185 ///->
  1186                 getcps(elementName, next);
  1187 ///                getFrequency();        <- this looks like a bug
  1188 ///<-
  1189             } else
  1190                 fatal((type == 0) ? "P-039" :
  1191                         ((type == ',') ? "P-037" : "P-038"),
  1192                         new Object[]{new Character(getc())});
  1194             maybeWhitespace();
  1195             if (decided) {
  1196                 char c = getc();
  1198 //                if (current != null) {
  1199 //                    current.addChild(temp);
  1200 //                }
  1201                 if (c == type) {
  1202                     strTmp.append(type);
  1203                     maybeWhitespace();
  1204                     reportConnector(type);
  1205                     continue;
  1206                 } else if (c == '\u0029') {    // rparen
  1207                     ungetc();
  1208                     continue;
  1209                 } else {
  1210                     fatal((type == 0) ? "P-041" : "P-040",
  1211                             new Object[]{
  1212                                 new Character(c),
  1213                                 new Character(type)
  1214                             });
  1216             } else {
  1217                 type = getc();
  1218                 switch (type) {
  1219                 case '|':
  1220                 case ',':
  1221                     reportConnector(type);
  1222                     break;
  1223                 default:
  1224 //                        retval = temp;
  1225                     ungetc();
  1226                     continue;
  1228 //                retval = (ContentModel)current;
  1229                 decided = true;
  1230 //                current.addChild(temp);
  1231                 strTmp.append(type);
  1233             maybeWhitespace();
  1234         } while (!peek(")"));
  1236         if (in != start)
  1237             error("V-014", new Object[]{elementName});
  1238         strTmp.append(')');
  1240         dtdHandler.endModelGroup(getFrequency());
  1241 //        return retval;
  1244     private void reportConnector(char type) throws SAXException {
  1245         switch (type) {
  1246         case '|':
  1247             dtdHandler.connector(DTDEventListener.CHOICE);    ///<-
  1248             return;
  1249         case ',':
  1250             dtdHandler.connector(DTDEventListener.SEQUENCE); ///<-
  1251             return;
  1252         default:
  1253             throw new Error();    //assertion failed.
  1257     private short getFrequency()
  1258             throws IOException, SAXException {
  1260         final char c = getc();
  1262         if (c == '?') {
  1263             strTmp.append(c);
  1264             return DTDEventListener.OCCURENCE_ZERO_OR_ONE;
  1265             //        original.setRepeat(Repeat.ZERO_OR_ONE);
  1266         } else if (c == '+') {
  1267             strTmp.append(c);
  1268             return DTDEventListener.OCCURENCE_ONE_OR_MORE;
  1269             //        original.setRepeat(Repeat.ONE_OR_MORE);
  1270         } else if (c == '*') {
  1271             strTmp.append(c);
  1272             return DTDEventListener.OCCURENCE_ZERO_OR_MORE;
  1273             //        original.setRepeat(Repeat.ZERO_OR_MORE);
  1274         } else {
  1275             ungetc();
  1276             return DTDEventListener.OCCURENCE_ONCE;
  1280     // '(' S? '#PCDATA' already consumed
  1281     // matching ')' must be in "start" entity if validating
  1282     private void getMixed(String elementName, /*Element element,*/ InputEntity start)
  1283             throws IOException, SAXException {
  1285         // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
  1286         //        | '(' S? '#PCDATA'                   S? ')'
  1287         maybeWhitespace();
  1288         if (peek("\u0029*") || peek("\u0029")) {
  1289             if (in != start)
  1290                 error("V-014", new Object[]{elementName});
  1291             strTmp.append(')');
  1292 //            element.setContentModel(new StringModel(StringModelType.PCDATA));
  1293             return;
  1296         ArrayList l = new ArrayList();
  1297 //    l.add(new StringModel(StringModelType.PCDATA));
  1300         while (peek("|")) {
  1301             String name;
  1303             strTmp.append('|');
  1304             maybeWhitespace();
  1306             doLexicalPE = true;
  1307             name = maybeGetName();
  1308             if (name == null)
  1309                 fatal("P-042", new Object[]
  1310                 {elementName, Integer.toHexString(getc())});
  1311             if (l.contains(name)) {
  1312                 error("V-015", new Object[]{name});
  1313             } else {
  1314                 l.add(name);
  1315                 dtdHandler.mixedElement(name);
  1317             strTmp.append(name);
  1318             maybeWhitespace();
  1321         if (!peek("\u0029*"))    // right paren
  1322             fatal("P-043", new Object[]
  1323             {elementName, new Character(getc())});
  1324         if (in != start)
  1325             error("V-014", new Object[]{elementName});
  1326         strTmp.append(')');
  1327 //        ChoiceModel cm = new ChoiceModel((Collection)l);
  1328 //    cm.setRepeat(Repeat.ZERO_OR_MORE);
  1329 //       element.setContentModel(cm);
  1332     private boolean maybeAttlistDecl()
  1333             throws IOException, SAXException {
  1335         // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
  1336         InputEntity start = peekDeclaration("!ATTLIST");
  1338         if (start == null)
  1339             return false;
  1341         String elementName = getMarkupDeclname("F-016", true);
  1342 //    Element    element = (Element) elements.get (name);
  1344 //    if (element == null) {
  1345 //        // not yet declared -- no problem.
  1346 //        element = new Element(name);
  1347 //        elements.put(name, element);
  1348 //    }
  1350         while (!peek(">")) {
  1352             // [53] AttDef ::= S Name S AttType S DefaultDecl
  1353             // [54] AttType ::= StringType | TokenizedType | EnumeratedType
  1355             // look for global attribute definitions, don't expand for now...
  1356             maybeWhitespace();
  1357             char c = getc();
  1358             if (c == '%') {
  1359                 String entityName = maybeGetName();
  1360                 if (entityName != null) {
  1361                     nextChar(';', "F-021", entityName);
  1362                     whitespace("F-021");
  1363                     continue;
  1364                 } else
  1365                     fatal("P-011");
  1368             ungetc();
  1369             // look for attribute name otherwise
  1370             String attName = maybeGetName();
  1371             if (attName == null) {
  1372                 fatal("P-044", new Object[]{new Character(getc())});
  1374             whitespace("F-001");
  1376 ///        Attribute    a = new Attribute (name);
  1378             String typeName;
  1379             Vector values = null;    // notation/enumeration values
  1381             // Note:  use the type constants from Attribute
  1382             // so that "==" may be used (faster)
  1384             // [55] StringType ::= 'CDATA'
  1385             if (peek(TYPE_CDATA))
  1386 ///            a.setType(Attribute.CDATA);
  1387                 typeName = TYPE_CDATA;
  1389             // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS'
  1390             //        | 'ENTITY' | 'ENTITIES'
  1391             //        | 'NMTOKEN' | 'NMTOKENS'
  1392             // n.b. if "IDREFS" is there, both "ID" and "IDREF"
  1393             // match peekahead ... so this order matters!
  1394             else if (peek(TYPE_IDREFS))
  1395                 typeName = TYPE_IDREFS;
  1396             else if (peek(TYPE_IDREF))
  1397                 typeName = TYPE_IDREF;
  1398             else if (peek(TYPE_ID)) {
  1399                 typeName = TYPE_ID;
  1400 // TODO: should implement this error check?
  1401 ///        if (element.id() != null) {
  1402 ///                    error ("V-016", new Object [] { element.id() });
  1403 ///        } else
  1404 ///            element.setId(name);
  1405             } else if (peek(TYPE_ENTITY))
  1406                 typeName = TYPE_ENTITY;
  1407             else if (peek(TYPE_ENTITIES))
  1408                 typeName = TYPE_ENTITIES;
  1409             else if (peek(TYPE_NMTOKENS))
  1410                 typeName = TYPE_NMTOKENS;
  1411             else if (peek(TYPE_NMTOKEN))
  1412                 typeName = TYPE_NMTOKEN;
  1414             // [57] EnumeratedType ::= NotationType | Enumeration
  1415             // [58] NotationType ::= 'NOTATION' S '(' S? Name
  1416             //        (S? '|' S? Name)* S? ')'
  1417             else if (peek(TYPE_NOTATION)) {
  1418                 typeName = TYPE_NOTATION;
  1419                 whitespace("F-002");
  1420                 nextChar('(', "F-029", null);
  1421                 maybeWhitespace();
  1423                 values = new Vector();
  1424                 do {
  1425                     String name;
  1426                     if ((name = maybeGetName()) == null)
  1427                         fatal("P-068");
  1428                     // permit deferred declarations
  1429                     if (notations.get(name) == null)
  1430                         notations.put(name, name);
  1431                     values.addElement(name);
  1432                     maybeWhitespace();
  1433                     if (peek("|"))
  1434                         maybeWhitespace();
  1435                 } while (!peek(")"));
  1436 ///            a.setValues(new String [v.size ()]);
  1437 ///            for (int i = 0; i < v.size (); i++)
  1438 ///                a.setValue(i, (String)v.elementAt(i));
  1440                 // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')'
  1441             } else if (peek("(")) {
  1442 ///            a.setType(Attribute.ENUMERATION);
  1443                 typeName = TYPE_ENUMERATION;
  1445                 maybeWhitespace();
  1447 ///            Vector v = new Vector ();
  1448                 values = new Vector();
  1449                 do {
  1450                     String name = getNmtoken();
  1451 ///                v.addElement (name);
  1452                     values.addElement(name);
  1453                     maybeWhitespace();
  1454                     if (peek("|"))
  1455                         maybeWhitespace();
  1456                 } while (!peek(")"));
  1457 ///            a.setValues(new String [v.size ()]);
  1458 ///            for (int i = 0; i < v.size (); i++)
  1459 ///                a.setValue(i, (String)v.elementAt(i));
  1460             } else {
  1461                 fatal("P-045",
  1462                         new Object[]{attName, new Character(getc())});
  1463                 typeName = null;
  1466             short attributeUse;
  1467             String defaultValue = null;
  1469             // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
  1470             //        | (('#FIXED' S)? AttValue)
  1471             whitespace("F-003");
  1472             if (peek("#REQUIRED"))
  1473                 attributeUse = DTDEventListener.USE_REQUIRED;
  1474 ///            a.setIsRequired(true);
  1475             else if (peek("#FIXED")) {
  1476 ///            if (a.type() == Attribute.ID)
  1477                 if (typeName == TYPE_ID)
  1478                     error("V-017", new Object[]{attName});
  1479 ///            a.setIsFixed(true);
  1480                 attributeUse = DTDEventListener.USE_FIXED;
  1481                 whitespace("F-004");
  1482                 parseLiteral(false);
  1483 ///            if (a.type() != Attribute.CDATA)
  1484 ///                a.setDefaultValue(normalize(false));
  1485 ///            else
  1486 ///                a.setDefaultValue(strTmp.toString());
  1488                 if (typeName == TYPE_CDATA)
  1489                     defaultValue = normalize(false);
  1490                 else
  1491                     defaultValue = strTmp.toString();
  1493 // TODO: implement this check
  1494 ///            if (a.type() != Attribute.CDATA)
  1495 ///                validateAttributeSyntax (a, a.defaultValue());
  1496             } else if (!peek("#IMPLIED")) {
  1497                 attributeUse = DTDEventListener.USE_IMPLIED;
  1499 ///            if (a.type() == Attribute.ID)
  1500                 if (typeName == TYPE_ID)
  1501                     error("V-018", new Object[]{attName});
  1502                 parseLiteral(false);
  1503 ///            if (a.type() != Attribute.CDATA)
  1504 ///                a.setDefaultValue(normalize(false));
  1505 ///            else
  1506 ///                a.setDefaultValue(strTmp.toString());
  1507                 if (typeName == TYPE_CDATA)
  1508                     defaultValue = normalize(false);
  1509                 else
  1510                     defaultValue = strTmp.toString();
  1512 // TODO: implement this check
  1513 ///            if (a.type() != Attribute.CDATA)
  1514 ///                validateAttributeSyntax (a, a.defaultValue());
  1515             } else {
  1516                 // TODO: this looks like an fatal error.
  1517                 attributeUse = DTDEventListener.USE_NORMAL;
  1520             if (XmlLang.equals(attName)
  1521                     && defaultValue/* a.defaultValue()*/ != null
  1522                     && !isXmlLang(defaultValue/*a.defaultValue()*/))
  1523                 error("P-033", new Object[]{defaultValue /*a.defaultValue()*/});
  1525 // TODO: isn't it an error to specify the same attribute twice?
  1526 ///        if (!element.attributes().contains(a)) {
  1527 ///            element.addAttribute(a);
  1528 ///            dtdHandler.attributeDecl(a);
  1529 ///        }
  1531             String[] v = (values != null) ? (String[]) values.toArray(new String[0]) : null;
  1532             dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue);
  1533             maybeWhitespace();
  1535         if (start != in)
  1536             error("V-013", null);
  1537         return true;
  1540     // used when parsing literal attribute values,
  1541     // or public identifiers.
  1542     //
  1543     // input in strTmp
  1544     private String normalize(boolean invalidIfNeeded) {
  1546         // this can allocate an extra string...
  1548         String s = strTmp.toString();
  1549         String s2 = s.trim();
  1550         boolean didStrip = false;
  1552         if (s != s2) {
  1553             s = s2;
  1554             s2 = null;
  1555             didStrip = true;
  1557         strTmp = new StringBuffer();
  1558         for (int i = 0; i < s.length(); i++) {
  1559             char c = s.charAt(i);
  1560             if (!XmlChars.isSpace(c)) {
  1561                 strTmp.append(c);
  1562                 continue;
  1564             strTmp.append(' ');
  1565             while (++i < s.length() && XmlChars.isSpace(s.charAt(i)))
  1566                 didStrip = true;
  1567             i--;
  1569         if (didStrip)
  1570             return strTmp.toString();
  1571         else
  1572             return s;
  1575     private boolean maybeConditionalSect()
  1576             throws IOException, SAXException {
  1578         // [61] conditionalSect ::= includeSect | ignoreSect
  1580         if (!peek("<!["))
  1581             return false;
  1583         String keyword;
  1584         InputEntity start = in;
  1586         maybeWhitespace();
  1588         if ((keyword = maybeGetName()) == null)
  1589             fatal("P-046");
  1590         maybeWhitespace();
  1591         nextChar('[', "F-030", null);
  1593         // [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
  1594         //                extSubsetDecl ']]>'
  1595         if ("INCLUDE".equals(keyword)) {
  1596             for (; ;) {
  1597                 while (in.isEOF() && in != start)
  1598                     in = in.pop();
  1599                 if (in.isEOF()) {
  1600                     error("V-020", null);
  1602                 if (peek("]]>"))
  1603                     break;
  1605                 doLexicalPE = false;
  1606                 if (maybeWhitespace())
  1607                     continue;
  1608                 if (maybePEReference())
  1609                     continue;
  1610                 doLexicalPE = true;
  1611                 if (maybeMarkupDecl() || maybeConditionalSect())
  1612                     continue;
  1614                 fatal("P-047");
  1617             // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
  1618             //            ignoreSectcontents ']]>'
  1619             // [64] ignoreSectcontents ::= Ignore ('<!['
  1620             //            ignoreSectcontents ']]>' Ignore)*
  1621             // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
  1622         } else if ("IGNORE".equals(keyword)) {
  1623             int nestlevel = 1;
  1624             // ignoreSectcontents
  1625             doLexicalPE = false;
  1626             while (nestlevel > 0) {
  1627                 char c = getc();    // will pop input entities
  1628                 if (c == '<') {
  1629                     if (peek("!["))
  1630                         nestlevel++;
  1631                 } else if (c == ']') {
  1632                     if (peek("]>"))
  1633                         nestlevel--;
  1634                 } else
  1635                     continue;
  1637         } else
  1638             fatal("P-048", new Object[]{keyword});
  1639         return true;
  1643     //
  1644     // CHAPTER 4:  Physical Structures
  1645     //
  1647     // parse decimal or hex numeric character reference
  1648     private int parseCharNumber()
  1649             throws IOException, SAXException {
  1651         char c;
  1652         int retval = 0;
  1654         // n.b. we ignore overflow ...
  1655         if (getc() != 'x') {
  1656             ungetc();
  1657             for (; ;) {
  1658                 c = getc();
  1659                 if (c >= '0' && c <= '9') {
  1660                     retval *= 10;
  1661                     retval += (c - '0');
  1662                     continue;
  1664                 if (c == ';')
  1665                     return retval;
  1666                 fatal("P-049");
  1668         } else
  1669             for (; ;) {
  1670                 c = getc();
  1671                 if (c >= '0' && c <= '9') {
  1672                     retval <<= 4;
  1673                     retval += (c - '0');
  1674                     continue;
  1676                 if (c >= 'a' && c <= 'f') {
  1677                     retval <<= 4;
  1678                     retval += 10 + (c - 'a');
  1679                     continue;
  1681                 if (c >= 'A' && c <= 'F') {
  1682                     retval <<= 4;
  1683                     retval += 10 + (c - 'A');
  1684                     continue;
  1686                 if (c == ';')
  1687                     return retval;
  1688                 fatal("P-050");
  1692     // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE,
  1693     // though still subject to the 'Char' construct in XML
  1694     private int surrogatesToCharTmp(int ucs4)
  1695             throws SAXException {
  1697         if (ucs4 <= 0xffff) {
  1698             if (XmlChars.isChar(ucs4)) {
  1699                 charTmp[0] = (char) ucs4;
  1700                 return 1;
  1702         } else if (ucs4 <= 0x0010ffff) {
  1703             // we represent these as UNICODE surrogate pairs
  1704             ucs4 -= 0x10000;
  1705             charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff));
  1706             charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff));
  1707             return 2;
  1709         fatal("P-051", new Object[]{Integer.toHexString(ucs4)});
  1710         // NOTREACHED
  1711         return -1;
  1714     private boolean maybePEReference()
  1715             throws IOException, SAXException {
  1717         // This is the SYNTACTIC version of this construct.
  1718         // When processing external entities, there is also
  1719         // a LEXICAL version; see getc() and doLexicalPE.
  1721         // [69] PEReference ::= '%' Name ';'
  1722         if (!in.peekc('%'))
  1723             return false;
  1725         String name = maybeGetName();
  1726         Object entity;
  1728         if (name == null)
  1729             fatal("P-011");
  1730         nextChar(';', "F-021", name);
  1731         entity = params.get(name);
  1733         if (entity instanceof InternalEntity) {
  1734             InternalEntity value = (InternalEntity) entity;
  1735             pushReader(value.buf, name, false);
  1737         } else if (entity instanceof ExternalEntity) {
  1738             pushReader((ExternalEntity) entity);
  1739             externalParameterEntity((ExternalEntity) entity);
  1741         } else if (entity == null) {
  1742             error("V-022", new Object[]{name});
  1744         return true;
  1747     private boolean maybeEntityDecl()
  1748             throws IOException, SAXException {
  1750         // [70] EntityDecl ::= GEDecl | PEDecl
  1751         // [71] GEDecl ::= '<!ENTITY' S       Name S EntityDef S? '>'
  1752         // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF     S? '>'
  1753         // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
  1754         // [74] PEDef     ::= EntityValue |  ExternalID
  1755         //
  1756         InputEntity start = peekDeclaration("!ENTITY");
  1758         if (start == null)
  1759             return false;
  1761         String entityName;
  1762         SimpleHashtable defns;
  1763         ExternalEntity externalId;
  1764         boolean doStore;
  1766         // PE expansion gets selectively turned off several places:
  1767         // in ENTITY declarations (here), in comments, in PIs.
  1769         // Here, we allow PE entities to be declared, and allows
  1770         // literals to include PE refs without the added spaces
  1771         // required with their expansion in markup decls.
  1773         doLexicalPE = false;
  1774         whitespace("F-005");
  1775         if (in.peekc('%')) {
  1776             whitespace("F-006");
  1777             defns = params;
  1778         } else
  1779             defns = entities;
  1781         ungetc();    // leave some whitespace
  1782         doLexicalPE = true;
  1783         entityName = getMarkupDeclname("F-017", false);
  1784         whitespace("F-007");
  1785         externalId = maybeExternalID();
  1787         //
  1788         // first definition sticks ... e.g. internal subset PEs are used
  1789         // to override DTD defaults.  It's also an "error" to incorrectly
  1790         // redefine builtin internal entities, but since reporting such
  1791         // errors is optional we only give warnings ("just in case") for
  1792         // non-parameter entities.
  1793         //
  1794         doStore = (defns.get(entityName) == null);
  1795         if (!doStore && defns == entities)
  1796             warning("P-054", new Object[]{entityName});
  1798         // internal entities
  1799         if (externalId == null) {
  1800             char value [];
  1801             InternalEntity entity;
  1803             doLexicalPE = false;        // "ab%bar;cd" -maybe-> "abcd"
  1804             parseLiteral(true);
  1805             doLexicalPE = true;
  1806             if (doStore) {
  1807                 value = new char[strTmp.length()];
  1808                 if (value.length != 0)
  1809                     strTmp.getChars(0, value.length, value, 0);
  1810                 entity = new InternalEntity(entityName, value);
  1811                 entity.isPE = (defns == params);
  1812                 entity.isFromInternalSubset = false;
  1813                 defns.put(entityName, entity);
  1814                 if (defns == entities)
  1815                     dtdHandler.internalGeneralEntityDecl(entityName,
  1816                             new String(value));
  1819             // external entities (including unparsed)
  1820         } else {
  1821             // [76] NDataDecl ::= S 'NDATA' S Name
  1822             if (defns == entities && maybeWhitespace()
  1823                     && peek("NDATA")) {
  1824                 externalId.notation = getMarkupDeclname("F-018", false);
  1826                 // flag undeclared notation for checking after
  1827                 // the DTD is fully processed
  1828                 if (notations.get(externalId.notation) == null)
  1829                     notations.put(externalId.notation, Boolean.TRUE);
  1831             externalId.name = entityName;
  1832             externalId.isPE = (defns == params);
  1833             externalId.isFromInternalSubset = false;
  1834             if (doStore) {
  1835                 defns.put(entityName, externalId);
  1836                 if (externalId.notation != null)
  1837                     dtdHandler.unparsedEntityDecl(entityName,
  1838                             externalId.publicId, externalId.systemId,
  1839                             externalId.notation);
  1840                 else if (defns == entities)
  1841                     dtdHandler.externalGeneralEntityDecl(entityName,
  1842                             externalId.publicId, externalId.systemId);
  1845         maybeWhitespace();
  1846         nextChar('>', "F-031", entityName);
  1847         if (start != in)
  1848             error("V-013", null);
  1849         return true;
  1852     private ExternalEntity maybeExternalID()
  1853             throws IOException, SAXException {
  1855         // [75] ExternalID ::= 'SYSTEM' S SystemLiteral
  1856         //        | 'PUBLIC' S' PubidLiteral S Systemliteral
  1857         String temp = null;
  1858         ExternalEntity retval;
  1860         if (peek("PUBLIC")) {
  1861             whitespace("F-009");
  1862             temp = parsePublicId();
  1863         } else if (!peek("SYSTEM"))
  1864             return null;
  1866         retval = new ExternalEntity(in);
  1867         retval.publicId = temp;
  1868         whitespace("F-008");
  1869         retval.systemId = parseSystemId();
  1870         return retval;
  1873     private String parseSystemId()
  1874             throws IOException, SAXException {
  1876         String uri = getQuotedString("F-034", null);
  1877         int temp = uri.indexOf(':');
  1879         // resolve relative URIs ... must do it here since
  1880         // it's relative to the source file holding the URI!
  1882         // "new java.net.URL (URL, string)" conforms to RFC 1630,
  1883         // but we can't use that except when the URI is a URL.
  1884         // The entity resolver is allowed to handle URIs that are
  1885         // not URLs, so we pass URIs through with scheme intact
  1886         if (temp == -1 || uri.indexOf('/') < temp) {
  1887             String baseURI;
  1889             baseURI = in.getSystemId();
  1890             if (baseURI == null)
  1891                 fatal("P-055", new Object[]{uri});
  1892             if (uri.length() == 0)
  1893                 uri = ".";
  1894             baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1);
  1895             if (uri.charAt(0) != '/')
  1896                 uri = baseURI + uri;
  1897             else {
  1898                 // XXX slashes at the beginning of a relative URI are
  1899                 // a special case we don't handle.
  1900                 throw new InternalError();
  1903             // letting other code map any "/xxx/../" or "/./" to "/",
  1904             // since all URIs must handle it the same.
  1906         // check for fragment ID in URI
  1907         if (uri.indexOf('#') != -1)
  1908             error("P-056", new Object[]{uri});
  1909         return uri;
  1912     private void maybeTextDecl()
  1913             throws IOException, SAXException {
  1915         // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
  1916         if (peek("<?xml")) {
  1917             readVersion(false, "1.0");
  1918             readEncoding(true);
  1919             maybeWhitespace();
  1920             if (!peek("?>"))
  1921                 fatal("P-057");
  1925     private void externalParameterEntity(ExternalEntity next)
  1926             throws IOException, SAXException {
  1928         //
  1929         // Reap the intended benefits of standalone declarations:
  1930         // don't deal with external parameter entities, except to
  1931         // validate the standalone declaration.
  1932         //
  1934         // n.b. "in external parameter entities" (and external
  1935         // DTD subset, same grammar) parameter references can
  1936         // occur "within" markup declarations ... expansions can
  1937         // cross syntax rules.  Flagged here; affects getc().
  1939         // [79] ExtPE ::= TextDecl? extSubsetDecl
  1940         // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
  1941         //        | PEReference | S )*
  1942         InputEntity pe;
  1944         // XXX if this returns false ...
  1946         pe = in;
  1947         maybeTextDecl();
  1948         while (!pe.isEOF()) {
  1949             // pop internal PEs (and whitespace before/after)
  1950             if (in.isEOF()) {
  1951                 in = in.pop();
  1952                 continue;
  1954             doLexicalPE = false;
  1955             if (maybeWhitespace())
  1956                 continue;
  1957             if (maybePEReference())
  1958                 continue;
  1959             doLexicalPE = true;
  1960             if (maybeMarkupDecl() || maybeConditionalSect())
  1961                 continue;
  1962             break;
  1964         // if (in != pe) throw new InternalError("who popped my PE?");
  1965         if (!pe.isEOF())
  1966             fatal("P-059", new Object[]{in.getName()});
  1969     private void readEncoding(boolean must)
  1970             throws IOException, SAXException {
  1972         // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
  1973         String name = maybeReadAttribute("encoding", must);
  1975         if (name == null)
  1976             return;
  1977         for (int i = 0; i < name.length(); i++) {
  1978             char c = name.charAt(i);
  1979             if ((c >= 'A' && c <= 'Z')
  1980                     || (c >= 'a' && c <= 'z'))
  1981                 continue;
  1982             if (i != 0
  1983                     && ((c >= '0' && c <= '9')
  1984                     || c == '-'
  1985                     || c == '_'
  1986                     || c == '.'
  1987                     ))
  1988                 continue;
  1989             fatal("P-060", new Object[]{new Character(c)});
  1992         //
  1993         // This should be the encoding in use, and it's even an error for
  1994         // it to be anything else (in certain cases that are impractical to
  1995         // to test, and may even be insufficient).  So, we do the best we
  1996         // can, and warn if things look suspicious.  Note that Java doesn't
  1997         // uniformly expose the encodings, and that the names it uses
  1998         // internally are nonstandard.  Also, that the XML spec allows
  1999         // such "errors" not to be reported at all.
  2000         //
  2001         String currentEncoding = in.getEncoding();
  2003         if (currentEncoding != null
  2004                 && !name.equalsIgnoreCase(currentEncoding))
  2005             warning("P-061", new Object[]{name, currentEncoding});
  2008     private boolean maybeNotationDecl()
  2009             throws IOException, SAXException {
  2011         // [82] NotationDecl ::= '<!NOTATION' S Name S
  2012         //        (ExternalID | PublicID) S? '>'
  2013         // [83] PublicID ::= 'PUBLIC' S PubidLiteral
  2014         InputEntity start = peekDeclaration("!NOTATION");
  2016         if (start == null)
  2017             return false;
  2019         String name = getMarkupDeclname("F-019", false);
  2020         ExternalEntity entity = new ExternalEntity(in);
  2022         whitespace("F-011");
  2023         if (peek("PUBLIC")) {
  2024             whitespace("F-009");
  2025             entity.publicId = parsePublicId();
  2026             if (maybeWhitespace()) {
  2027                 if (!peek(">"))
  2028                     entity.systemId = parseSystemId();
  2029                 else
  2030                     ungetc();
  2032         } else if (peek("SYSTEM")) {
  2033             whitespace("F-008");
  2034             entity.systemId = parseSystemId();
  2035         } else
  2036             fatal("P-062");
  2037         maybeWhitespace();
  2038         nextChar('>', "F-032", name);
  2039         if (start != in)
  2040             error("V-013", null);
  2041         if (entity.systemId != null && entity.systemId.indexOf('#') != -1)
  2042             error("P-056", new Object[]{entity.systemId});
  2044         Object value = notations.get(name);
  2045         if (value != null && value instanceof ExternalEntity)
  2046             warning("P-063", new Object[]{name});
  2048         else {
  2049             notations.put(name, entity);
  2050             dtdHandler.notationDecl(name, entity.publicId,
  2051                     entity.systemId);
  2053         return true;
  2057     ////////////////////////////////////////////////////////////////
  2058     //
  2059     //    UTILITIES
  2060     //
  2061     ////////////////////////////////////////////////////////////////
  2063     private char getc() throws IOException, SAXException {
  2065         if (!doLexicalPE) {
  2066             char c = in.getc();
  2067             return c;
  2070         //
  2071         // External parameter entities get funky processing of '%param;'
  2072         // references.  It's not clearly defined in the XML spec; but it
  2073         // boils down to having those refs be _lexical_ in most cases to
  2074         // include partial syntax productions.  It also needs selective
  2075         // enabling; "<!ENTITY % foo ...>" must work, for example, and
  2076         // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd"
  2077         // if it's expanded in a literal, else "ab  cd".  PEs also do
  2078         // not expand within comments or PIs, and external PEs are only
  2079         // allowed to have markup decls (and so aren't handled lexically).
  2080         //
  2081         // This PE handling should be merged into maybeWhitespace, where
  2082         // it can be dealt with more consistently.
  2083         //
  2084         // Also, there are some validity constraints in this area.
  2085         //
  2086         char c;
  2088         while (in.isEOF()) {
  2089             if (in.isInternal() || (doLexicalPE && !in.isDocument()))
  2090                 in = in.pop();
  2091             else {
  2092                 fatal("P-064", new Object[]{in.getName()});
  2095         if ((c = in.getc()) == '%' && doLexicalPE) {
  2096             // PE ref ::= '%' name ';'
  2097             String name = maybeGetName();
  2098             Object entity;
  2100             if (name == null)
  2101                 fatal("P-011");
  2102             nextChar(';', "F-021", name);
  2103             entity = params.get(name);
  2105             // push a magic "entity" before and after the
  2106             // real one, so ungetc() behaves uniformly
  2107             pushReader(" ".toCharArray(), null, false);
  2108             if (entity instanceof InternalEntity)
  2109                 pushReader(((InternalEntity) entity).buf, name, false);
  2110             else if (entity instanceof ExternalEntity)
  2111             // PEs can't be unparsed!
  2112             // XXX if this returns false ...
  2113                 pushReader((ExternalEntity) entity);
  2114             else if (entity == null)
  2115             // see note in maybePEReference re making this be nonfatal.
  2116                 fatal("V-022");
  2117             else
  2118                 throw new InternalError();
  2119             pushReader(" ".toCharArray(), null, false);
  2120             return in.getc();
  2122         return c;
  2125     private void ungetc() {
  2127         in.ungetc();
  2130     private boolean peek(String s)
  2131             throws IOException, SAXException {
  2133         return in.peek(s, null);
  2136     // Return the entity starting the specified declaration
  2137     // (for validating declaration nesting) else null.
  2139     private InputEntity peekDeclaration(String s)
  2140             throws IOException, SAXException {
  2142         InputEntity start;
  2144         if (!in.peekc('<'))
  2145             return null;
  2146         start = in;
  2147         if (in.peek(s, null))
  2148             return start;
  2149         in.ungetc();
  2150         return null;
  2153     private void nextChar(char c, String location, String near)
  2154             throws IOException, SAXException {
  2156         while (in.isEOF() && !in.isDocument())
  2157             in = in.pop();
  2158         if (!in.peekc(c))
  2159             fatal("P-008", new Object[]
  2160             {new Character(c),
  2161              messages.getMessage(locale, location),
  2162              (near == null ? "" : ('"' + near + '"'))});
  2166     private void pushReader(char buf [], String name, boolean isGeneral)
  2167             throws SAXException {
  2169         InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
  2170         r.init(buf, name, in, !isGeneral);
  2171         in = r;
  2174     private boolean pushReader(ExternalEntity next)
  2175             throws IOException, SAXException {
  2177         InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
  2178         InputSource s;
  2179         try {
  2180             s = next.getInputSource(resolver);
  2181         } catch (IOException e) {
  2182             String msg =
  2183                     "unable to open the external entity from :" + next.systemId;
  2184             if (next.publicId != null)
  2185                 msg += " (public id:" + next.publicId + ")";
  2187             SAXParseException spe = new SAXParseException(msg,
  2188                     getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e);
  2189             dtdHandler.fatalError(spe);
  2190             throw e;
  2193         r.init(s, next.name, in, next.isPE);
  2194         in = r;
  2195         return true;
  2198     public String getPublicId() {
  2200         return (in == null) ? null : in.getPublicId();
  2203     public String getSystemId() {
  2205         return (in == null) ? null : in.getSystemId();
  2208     public int getLineNumber() {
  2210         return (in == null) ? -1 : in.getLineNumber();
  2213     public int getColumnNumber() {
  2215         return (in == null) ? -1 : in.getColumnNumber();
  2218     // error handling convenience routines
  2220     private void warning(String messageId, Object parameters [])
  2221             throws SAXException {
  2223         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
  2224                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
  2226         dtdHandler.warning(e);
  2229     void error(String messageId, Object parameters [])
  2230             throws SAXException {
  2232         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
  2233                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
  2235         dtdHandler.error(e);
  2238     private void fatal(String messageId) throws SAXException {
  2240         fatal(messageId, null);
  2243     private void fatal(String messageId, Object parameters [])
  2244             throws SAXException {
  2246         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
  2247                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
  2249         dtdHandler.fatalError(e);
  2251         throw e;
  2254     //
  2255     // Map char arrays to strings ... cuts down both on memory and
  2256     // CPU usage for element/attribute/other names that are reused.
  2257     //
  2258     // Documents typically repeat names a lot, so we more or less
  2259     // intern all the strings within the document; since some strings
  2260     // are repeated in multiple documents (e.g. stylesheets) we go
  2261     // a bit further, and intern globally.
  2262     //
  2263     static class NameCache {
  2264         //
  2265         // Unless we auto-grow this, the default size should be a
  2266         // reasonable bit larger than needed for most XML files
  2267         // we've yet seen (and be prime).  If it's too small, the
  2268         // penalty is just excess cache collisions.
  2269         //
  2270         NameCacheEntry hashtable [] = new NameCacheEntry[541];
  2272         //
  2273         // Usually we just want to get the 'symbol' for these chars
  2274         //
  2275         String lookup(char value [], int len) {
  2277             return lookupEntry(value, len).name;
  2280         //
  2281         // Sometimes we need to scan the chars in the resulting
  2282         // string, so there's an accessor which exposes them.
  2283         // (Mostly for element end tags.)
  2284         //
  2285         NameCacheEntry lookupEntry(char value [], int len) {
  2287             int index = 0;
  2288             NameCacheEntry entry;
  2290             // hashing to get index
  2291             for (int i = 0; i < len; i++)
  2292                 index = index * 31 + value[i];
  2293             index &= 0x7fffffff;
  2294             index %= hashtable.length;
  2296             // return entry if one's there ...
  2297             for (entry = hashtable[index];
  2298                  entry != null;
  2299                  entry = entry.next) {
  2300                 if (entry.matches(value, len))
  2301                     return entry;
  2304             // else create new one
  2305             entry = new NameCacheEntry();
  2306             entry.chars = new char[len];
  2307             System.arraycopy(value, 0, entry.chars, 0, len);
  2308             entry.name = new String(entry.chars);
  2309             //
  2310             // NOTE:  JDK 1.1 has a fixed size string intern table,
  2311             // with non-GC'd entries.  It can panic here; that's a
  2312             // JDK problem, use 1.2 or later with many identifiers.
  2313             //
  2314             entry.name = entry.name.intern();        // "global" intern
  2315             entry.next = hashtable[index];
  2316             hashtable[index] = entry;
  2317             return entry;
  2321     static class NameCacheEntry {
  2323         String name;
  2324         char chars [];
  2325         NameCacheEntry next;
  2327         boolean matches(char value [], int len) {
  2329             if (chars.length != len)
  2330                 return false;
  2331             for (int i = 0; i < len; i++)
  2332                 if (value[i] != chars[i])
  2333                     return false;
  2334             return true;
  2338     //
  2339     // Message catalog for diagnostics.
  2340     //
  2341     static final Catalog messages = new Catalog();
  2343     static final class Catalog extends MessageCatalog {
  2345         Catalog() {
  2346             super(DTDParser.class);

mercurial