src/share/jaxws_classes/com/sun/xml/internal/dtdparser/DTDParser.java

changeset 0
373ffda63c9a
child 637
9c07ef4934dd
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/share/jaxws_classes/com/sun/xml/internal/dtdparser/DTDParser.java	Wed Apr 27 01:27:09 2016 +0800
     1.3 @@ -0,0 +1,2350 @@
     1.4 +/*
     1.5 + * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.  Oracle designates this
    1.11 + * particular file as subject to the "Classpath" exception as provided
    1.12 + * by Oracle in the LICENSE file that accompanied this code.
    1.13 + *
    1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.17 + * version 2 for more details (a copy is included in the LICENSE file that
    1.18 + * accompanied this code).
    1.19 + *
    1.20 + * You should have received a copy of the GNU General Public License version
    1.21 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.23 + *
    1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.25 + * or visit www.oracle.com if you need additional information or have any
    1.26 + * questions.
    1.27 + */
    1.28 +
    1.29 +package com.sun.xml.internal.dtdparser;
    1.30 +
    1.31 +import org.xml.sax.EntityResolver;
    1.32 +import org.xml.sax.InputSource;
    1.33 +import org.xml.sax.Locator;
    1.34 +import org.xml.sax.SAXException;
    1.35 +import org.xml.sax.SAXParseException;
    1.36 +
    1.37 +import java.io.IOException;
    1.38 +import java.util.ArrayList;
    1.39 +import java.util.Enumeration;
    1.40 +import java.util.Hashtable;
    1.41 +import java.util.Locale;
    1.42 +import java.util.Set;
    1.43 +import java.util.Vector;
    1.44 +
    1.45 +/**
    1.46 + * This implements parsing of XML 1.0 DTDs.
    1.47 + * <p/>
    1.48 + * This conforms to the portion of the XML 1.0 specification related
    1.49 + * to the external DTD subset.
    1.50 + * <p/>
    1.51 + * For multi-language applications (such as web servers using XML
    1.52 + * processing to create dynamic content), a method supports choosing
    1.53 + * a locale for parser diagnostics which is both understood by the
    1.54 + * message recipient and supported by the parser.
    1.55 + * <p/>
    1.56 + * This parser produces a stream of parse events.  It supports some
    1.57 + * features (exposing comments, CDATA sections, and entity references)
    1.58 + * which are not required to be reported by conformant XML processors.
    1.59 + *
    1.60 + * @author David Brownell
    1.61 + * @author Janet Koenig
    1.62 + * @author Kohsuke KAWAGUCHI
    1.63 + * @version $Id: DTDParser.java,v 1.2 2009/04/16 15:25:49 snajper Exp $
    1.64 + */
    1.65 +public class DTDParser {
    1.66 +    public final static String TYPE_CDATA = "CDATA";
    1.67 +    public final static String TYPE_ID = "ID";
    1.68 +    public final static String TYPE_IDREF = "IDREF";
    1.69 +    public final static String TYPE_IDREFS = "IDREFS";
    1.70 +    public final static String TYPE_ENTITY = "ENTITY";
    1.71 +    public final static String TYPE_ENTITIES = "ENTITIES";
    1.72 +    public final static String TYPE_NMTOKEN = "NMTOKEN";
    1.73 +    public final static String TYPE_NMTOKENS = "NMTOKENS";
    1.74 +    public final static String TYPE_NOTATION = "NOTATION";
    1.75 +    public final static String TYPE_ENUMERATION = "ENUMERATION";
    1.76 +
    1.77 +
    1.78 +    // stack of input entities being merged
    1.79 +    private InputEntity in;
    1.80 +
    1.81 +    // temporaries reused during parsing
    1.82 +    private StringBuffer strTmp;
    1.83 +    private char nameTmp [];
    1.84 +    private NameCache nameCache;
    1.85 +    private char charTmp [] = new char[2];
    1.86 +
    1.87 +    // temporary DTD parsing state
    1.88 +    private boolean doLexicalPE;
    1.89 +
    1.90 +    // DTD state, used during parsing
    1.91 +//    private SimpleHashtable    elements = new SimpleHashtable (47);
    1.92 +    protected final Set declaredElements = new java.util.HashSet();
    1.93 +    private SimpleHashtable params = new SimpleHashtable(7);
    1.94 +
    1.95 +    // exposed to package-private subclass
    1.96 +    Hashtable notations = new Hashtable(7);
    1.97 +    SimpleHashtable entities = new SimpleHashtable(17);
    1.98 +
    1.99 +    private SimpleHashtable ids = new SimpleHashtable();
   1.100 +
   1.101 +    // listeners for DTD parsing events
   1.102 +    private DTDEventListener dtdHandler;
   1.103 +
   1.104 +    private EntityResolver resolver;
   1.105 +    private Locale locale;
   1.106 +
   1.107 +    // string constants -- use these copies so "==" works
   1.108 +    // package private
   1.109 +    static final String strANY = "ANY";
   1.110 +    static final String strEMPTY = "EMPTY";
   1.111 +
   1.112 +    /**
   1.113 +     * Used by applications to request locale for diagnostics.
   1.114 +     *
   1.115 +     * @param l The locale to use, or null to use system defaults
   1.116 +     *          (which may include only message IDs).
   1.117 +     */
   1.118 +    public void setLocale(Locale l) throws SAXException {
   1.119 +
   1.120 +        if (l != null && !messages.isLocaleSupported(l.toString())) {
   1.121 +            throw new SAXException(messages.getMessage(locale,
   1.122 +                    "P-078", new Object[]{l}));
   1.123 +        }
   1.124 +        locale = l;
   1.125 +    }
   1.126 +
   1.127 +    /**
   1.128 +     * Returns the diagnostic locale.
   1.129 +     */
   1.130 +    public Locale getLocale() {
   1.131 +        return locale;
   1.132 +    }
   1.133 +
   1.134 +    /**
   1.135 +     * Chooses a client locale to use for diagnostics, using the first
   1.136 +     * language specified in the list that is supported by this parser.
   1.137 +     * That locale is then set using <a href="#setLocale(java.util.Locale)">
   1.138 +     * setLocale()</a>.  Such a list could be provided by a variety of user
   1.139 +     * preference mechanisms, including the HTTP <em>Accept-Language</em>
   1.140 +     * header field.
   1.141 +     *
   1.142 +     * @param languages Array of language specifiers, ordered with the most
   1.143 +     *                  preferable one at the front.  For example, "en-ca" then "fr-ca",
   1.144 +     *                  followed by "zh_CN".  Both RFC 1766 and Java styles are supported.
   1.145 +     * @return The chosen locale, or null.
   1.146 +     * @see MessageCatalog
   1.147 +     */
   1.148 +    public Locale chooseLocale(String languages [])
   1.149 +            throws SAXException {
   1.150 +
   1.151 +        Locale l = messages.chooseLocale(languages);
   1.152 +
   1.153 +        if (l != null) {
   1.154 +            setLocale(l);
   1.155 +        }
   1.156 +        return l;
   1.157 +    }
   1.158 +
   1.159 +    /**
   1.160 +     * Lets applications control entity resolution.
   1.161 +     */
   1.162 +    public void setEntityResolver(EntityResolver r) {
   1.163 +
   1.164 +        resolver = r;
   1.165 +    }
   1.166 +
   1.167 +    /**
   1.168 +     * Returns the object used to resolve entities
   1.169 +     */
   1.170 +    public EntityResolver getEntityResolver() {
   1.171 +
   1.172 +        return resolver;
   1.173 +    }
   1.174 +
   1.175 +    /**
   1.176 +     * Used by applications to set handling of DTD parsing events.
   1.177 +     */
   1.178 +    public void setDtdHandler(DTDEventListener handler) {
   1.179 +        dtdHandler = handler;
   1.180 +        if (handler != null)
   1.181 +            handler.setDocumentLocator(new Locator() {
   1.182 +                public String getPublicId() {
   1.183 +                    return DTDParser.this.getPublicId();
   1.184 +                }
   1.185 +
   1.186 +                public String getSystemId() {
   1.187 +                    return DTDParser.this.getSystemId();
   1.188 +                }
   1.189 +
   1.190 +                public int getLineNumber() {
   1.191 +                    return DTDParser.this.getLineNumber();
   1.192 +                }
   1.193 +
   1.194 +                public int getColumnNumber() {
   1.195 +                    return DTDParser.this.getColumnNumber();
   1.196 +                }
   1.197 +            });
   1.198 +    }
   1.199 +
   1.200 +    /**
   1.201 +     * Returns the handler used to for DTD parsing events.
   1.202 +     */
   1.203 +    public DTDEventListener getDtdHandler() {
   1.204 +        return dtdHandler;
   1.205 +    }
   1.206 +
   1.207 +    /**
   1.208 +     * Parse a DTD.
   1.209 +     */
   1.210 +    public void parse(InputSource in)
   1.211 +            throws IOException, SAXException {
   1.212 +        init();
   1.213 +        parseInternal(in);
   1.214 +    }
   1.215 +
   1.216 +    /**
   1.217 +     * Parse a DTD.
   1.218 +     */
   1.219 +    public void parse(String uri)
   1.220 +            throws IOException, SAXException {
   1.221 +        InputSource in;
   1.222 +
   1.223 +        init();
   1.224 +        // System.out.println ("parse (\"" + uri + "\")");
   1.225 +        in = resolver.resolveEntity(null, uri);
   1.226 +
   1.227 +        // If custom resolver punts resolution to parser, handle it ...
   1.228 +        if (in == null) {
   1.229 +            in = Resolver.createInputSource(new java.net.URL(uri), false);
   1.230 +
   1.231 +            // ... or if custom resolver doesn't correctly construct the
   1.232 +            // input entity, patch it up enough so relative URIs work, and
   1.233 +            // issue a warning to minimize later confusion.
   1.234 +        } else if (in.getSystemId() == null) {
   1.235 +            warning("P-065", null);
   1.236 +            in.setSystemId(uri);
   1.237 +        }
   1.238 +
   1.239 +        parseInternal(in);
   1.240 +    }
   1.241 +
   1.242 +    // makes sure the parser is reset to "before a document"
   1.243 +    private void init() {
   1.244 +        in = null;
   1.245 +
   1.246 +        // alloc temporary data used in parsing
   1.247 +        strTmp = new StringBuffer();
   1.248 +        nameTmp = new char[20];
   1.249 +        nameCache = new NameCache();
   1.250 +
   1.251 +        // reset doc info
   1.252 +//        isInAttribute = false;
   1.253 +
   1.254 +        doLexicalPE = false;
   1.255 +
   1.256 +        entities.clear();
   1.257 +        notations.clear();
   1.258 +        params.clear();
   1.259 +        //    elements.clear ();
   1.260 +        declaredElements.clear();
   1.261 +
   1.262 +        // initialize predefined references ... re-interpreted later
   1.263 +        builtin("amp", "&#38;");
   1.264 +        builtin("lt", "&#60;");
   1.265 +        builtin("gt", ">");
   1.266 +        builtin("quot", "\"");
   1.267 +        builtin("apos", "'");
   1.268 +
   1.269 +        if (locale == null)
   1.270 +            locale = Locale.getDefault();
   1.271 +        if (resolver == null)
   1.272 +            resolver = new Resolver();
   1.273 +        if (dtdHandler == null)
   1.274 +            dtdHandler = new DTDHandlerBase();
   1.275 +    }
   1.276 +
   1.277 +    private void builtin(String entityName, String entityValue) {
   1.278 +        InternalEntity entity;
   1.279 +        entity = new InternalEntity(entityName, entityValue.toCharArray());
   1.280 +        entities.put(entityName, entity);
   1.281 +    }
   1.282 +
   1.283 +
   1.284 +    ////////////////////////////////////////////////////////////////
   1.285 +    //
   1.286 +    // parsing is by recursive descent, code roughly
   1.287 +    // following the BNF rules except tweaked for simple
   1.288 +    // lookahead.  rules are more or less in numeric order,
   1.289 +    // except where code sharing suggests other structures.
   1.290 +    //
   1.291 +    // a classic benefit of recursive descent parsers:  it's
   1.292 +    // relatively easy to get diagnostics that make sense.
   1.293 +    //
   1.294 +    ////////////////////////////////////////////////////////////////
   1.295 +
   1.296 +
   1.297 +    private void parseInternal(InputSource input)
   1.298 +            throws IOException, SAXException {
   1.299 +
   1.300 +        if (input == null)
   1.301 +            fatal("P-000");
   1.302 +
   1.303 +        try {
   1.304 +            in = InputEntity.getInputEntity(dtdHandler, locale);
   1.305 +            in.init(input, null, null, false);
   1.306 +
   1.307 +            dtdHandler.startDTD(in);
   1.308 +
   1.309 +            // [30] extSubset ::= TextDecl? extSubsetDecl
   1.310 +            // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
   1.311 +            //        | PEReference | S )*
   1.312 +            //    ... same as [79] extPE, which is where the code is
   1.313 +
   1.314 +            ExternalEntity externalSubset = new ExternalEntity(in);
   1.315 +            externalParameterEntity(externalSubset);
   1.316 +
   1.317 +            if (!in.isEOF()) {
   1.318 +                fatal("P-001", new Object[]
   1.319 +                {Integer.toHexString(((int) getc()))});
   1.320 +            }
   1.321 +            afterRoot();
   1.322 +            dtdHandler.endDTD();
   1.323 +
   1.324 +        } catch (EndOfInputException e) {
   1.325 +            if (!in.isDocument()) {
   1.326 +                String name = in.getName();
   1.327 +                do {    // force a relevant URI and line number
   1.328 +                    in = in.pop();
   1.329 +                } while (in.isInternal());
   1.330 +                fatal("P-002", new Object[]{name});
   1.331 +            } else {
   1.332 +                fatal("P-003", null);
   1.333 +            }
   1.334 +        } catch (RuntimeException e) {
   1.335 +            // Don't discard location that triggered the exception
   1.336 +            // ## Should properly wrap exception
   1.337 +            System.err.print("Internal DTD parser error: "); // ##
   1.338 +            e.printStackTrace();
   1.339 +            throw new SAXParseException(e.getMessage() != null
   1.340 +                    ? e.getMessage() : e.getClass().getName(),
   1.341 +                    getPublicId(), getSystemId(),
   1.342 +                    getLineNumber(), getColumnNumber());
   1.343 +
   1.344 +        } finally {
   1.345 +            // recycle temporary data used during parsing
   1.346 +            strTmp = null;
   1.347 +            nameTmp = null;
   1.348 +            nameCache = null;
   1.349 +
   1.350 +            // ditto input sources etc
   1.351 +            if (in != null) {
   1.352 +                in.close();
   1.353 +                in = null;
   1.354 +            }
   1.355 +
   1.356 +            // get rid of all DTD info ... some of it would be
   1.357 +            // useful for editors etc, investigate later.
   1.358 +
   1.359 +            params.clear();
   1.360 +            entities.clear();
   1.361 +            notations.clear();
   1.362 +            declaredElements.clear();
   1.363 +//        elements.clear();
   1.364 +            ids.clear();
   1.365 +        }
   1.366 +    }
   1.367 +
   1.368 +    void afterRoot() throws SAXException {
   1.369 +        // Make sure all IDREFs match declared ID attributes.  We scan
   1.370 +        // after the document element is parsed, since XML allows forward
   1.371 +        // references, and only now can we know if they're all resolved.
   1.372 +
   1.373 +        for (Enumeration e = ids.keys();
   1.374 +             e.hasMoreElements();
   1.375 +                ) {
   1.376 +            String id = (String) e.nextElement();
   1.377 +            Boolean value = (Boolean) ids.get(id);
   1.378 +            if (Boolean.FALSE == value)
   1.379 +                error("V-024", new Object[]{id});
   1.380 +        }
   1.381 +    }
   1.382 +
   1.383 +
   1.384 +    // role is for diagnostics
   1.385 +    private void whitespace(String roleId)
   1.386 +            throws IOException, SAXException {
   1.387 +
   1.388 +        // [3] S ::= (#x20 | #x9 | #xd | #xa)+
   1.389 +        if (!maybeWhitespace()) {
   1.390 +            fatal("P-004", new Object[]
   1.391 +            {messages.getMessage(locale, roleId)});
   1.392 +        }
   1.393 +    }
   1.394 +
   1.395 +    // S?
   1.396 +    private boolean maybeWhitespace()
   1.397 +            throws IOException, SAXException {
   1.398 +
   1.399 +        if (!doLexicalPE)
   1.400 +            return in.maybeWhitespace();
   1.401 +
   1.402 +        // see getc() for the PE logic -- this lets us splice
   1.403 +        // expansions of PEs in "anywhere".  getc() has smarts,
   1.404 +        // so for external PEs we don't bypass it.
   1.405 +
   1.406 +        // XXX we can marginally speed PE handling, and certainly
   1.407 +        // be cleaner (hence potentially more correct), by using
   1.408 +        // the observations that expanded PEs only start and stop
   1.409 +        // where whitespace is allowed.  getc wouldn't need any
   1.410 +        // "lexical" PE expansion logic, and no other method needs
   1.411 +        // to handle termination of PEs.  (parsing of literals would
   1.412 +        // still need to pop entities, but not parsing of references
   1.413 +        // in content.)
   1.414 +
   1.415 +        char c = getc();
   1.416 +        boolean saw = false;
   1.417 +
   1.418 +        while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
   1.419 +            saw = true;
   1.420 +
   1.421 +            // this gracefully ends things when we stop playing
   1.422 +            // with internal parameters.  caller should have a
   1.423 +            // grammar rule allowing whitespace at end of entity.
   1.424 +            if (in.isEOF() && !in.isInternal())
   1.425 +                return saw;
   1.426 +            c = getc();
   1.427 +        }
   1.428 +        ungetc();
   1.429 +        return saw;
   1.430 +    }
   1.431 +
   1.432 +    private String maybeGetName()
   1.433 +            throws IOException, SAXException {
   1.434 +
   1.435 +        NameCacheEntry entry = maybeGetNameCacheEntry();
   1.436 +        return (entry == null) ? null : entry.name;
   1.437 +    }
   1.438 +
   1.439 +    private NameCacheEntry maybeGetNameCacheEntry()
   1.440 +            throws IOException, SAXException {
   1.441 +
   1.442 +        // [5] Name ::= (Letter|'_'|':') (Namechar)*
   1.443 +        char c = getc();
   1.444 +
   1.445 +        if (!XmlChars.isLetter(c) && c != ':' && c != '_') {
   1.446 +            ungetc();
   1.447 +            return null;
   1.448 +        }
   1.449 +        return nameCharString(c);
   1.450 +    }
   1.451 +
   1.452 +    // Used when parsing enumerations
   1.453 +    private String getNmtoken()
   1.454 +            throws IOException, SAXException {
   1.455 +
   1.456 +        // [7] Nmtoken ::= (Namechar)+
   1.457 +        char c = getc();
   1.458 +        if (!XmlChars.isNameChar(c))
   1.459 +            fatal("P-006", new Object[]{new Character(c)});
   1.460 +        return nameCharString(c).name;
   1.461 +    }
   1.462 +
   1.463 +    // n.b. this gets used when parsing attribute values (for
   1.464 +    // internal references) so we can't use strTmp; it's also
   1.465 +    // a hotspot for CPU and memory in the parser (called at least
   1.466 +    // once for each element) so this has been optimized a bit.
   1.467 +
   1.468 +    private NameCacheEntry nameCharString(char c)
   1.469 +            throws IOException, SAXException {
   1.470 +
   1.471 +        int i = 1;
   1.472 +
   1.473 +        nameTmp[0] = c;
   1.474 +        for (; ;) {
   1.475 +            if ((c = in.getNameChar()) == 0)
   1.476 +                break;
   1.477 +            if (i >= nameTmp.length) {
   1.478 +                char tmp [] = new char[nameTmp.length + 10];
   1.479 +                System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length);
   1.480 +                nameTmp = tmp;
   1.481 +            }
   1.482 +            nameTmp[i++] = c;
   1.483 +        }
   1.484 +        return nameCache.lookupEntry(nameTmp, i);
   1.485 +    }
   1.486 +
   1.487 +    //
   1.488 +    // much similarity between parsing entity values in DTD
   1.489 +    // and attribute values (in DTD or content) ... both follow
   1.490 +    // literal parsing rules, newline canonicalization, etc
   1.491 +    //
   1.492 +    // leaves value in 'strTmp' ... either a "replacement text" (4.5),
   1.493 +    // or else partially normalized attribute value (the first bit
   1.494 +    // of 3.3.3's spec, without the "if not CDATA" bits).
   1.495 +    //
   1.496 +    private void parseLiteral(boolean isEntityValue)
   1.497 +            throws IOException, SAXException {
   1.498 +
   1.499 +        // [9] EntityValue ::=
   1.500 +        //    '"' ([^"&%] | Reference | PEReference)* '"'
   1.501 +        //    |    "'" ([^'&%] | Reference | PEReference)* "'"
   1.502 +        // [10] AttValue ::=
   1.503 +        //    '"' ([^"&]  | Reference             )* '"'
   1.504 +        //    |    "'" ([^'&]  | Reference             )* "'"
   1.505 +        char quote = getc();
   1.506 +        char c;
   1.507 +        InputEntity source = in;
   1.508 +
   1.509 +        if (quote != '\'' && quote != '"') {
   1.510 +            fatal("P-007");
   1.511 +        }
   1.512 +
   1.513 +        // don't report entity expansions within attributes,
   1.514 +        // they're reported "fully expanded" via SAX
   1.515 +//    isInAttribute = !isEntityValue;
   1.516 +
   1.517 +        // get value into strTmp
   1.518 +        strTmp = new StringBuffer();
   1.519 +
   1.520 +        // scan, allowing entity push/pop wherever ...
   1.521 +        // expanded entities can't terminate the literal!
   1.522 +        for (; ;) {
   1.523 +            if (in != source && in.isEOF()) {
   1.524 +                // we don't report end of parsed entities
   1.525 +                // within attributes (no SAX hooks)
   1.526 +                in = in.pop();
   1.527 +                continue;
   1.528 +            }
   1.529 +            if ((c = getc()) == quote && in == source) {
   1.530 +                break;
   1.531 +            }
   1.532 +
   1.533 +            //
   1.534 +            // Basically the "reference in attribute value"
   1.535 +            // row of the chart in section 4.4 of the spec
   1.536 +            //
   1.537 +            if (c == '&') {
   1.538 +                String entityName = maybeGetName();
   1.539 +
   1.540 +                if (entityName != null) {
   1.541 +                    nextChar(';', "F-020", entityName);
   1.542 +
   1.543 +                    // 4.4 says:  bypass these here ... we'll catch
   1.544 +                    // forbidden refs to unparsed entities on use
   1.545 +                    if (isEntityValue) {
   1.546 +                        strTmp.append('&');
   1.547 +                        strTmp.append(entityName);
   1.548 +                        strTmp.append(';');
   1.549 +                        continue;
   1.550 +                    }
   1.551 +                    expandEntityInLiteral(entityName, entities, isEntityValue);
   1.552 +
   1.553 +
   1.554 +                    // character references are always included immediately
   1.555 +                } else if ((c = getc()) == '#') {
   1.556 +                    int tmp = parseCharNumber();
   1.557 +
   1.558 +                    if (tmp > 0xffff) {
   1.559 +                        tmp = surrogatesToCharTmp(tmp);
   1.560 +                        strTmp.append(charTmp[0]);
   1.561 +                        if (tmp == 2)
   1.562 +                            strTmp.append(charTmp[1]);
   1.563 +                    } else
   1.564 +                        strTmp.append((char) tmp);
   1.565 +                } else
   1.566 +                    fatal("P-009");
   1.567 +                continue;
   1.568 +
   1.569 +            }
   1.570 +
   1.571 +            // expand parameter entities only within entity value literals
   1.572 +            if (c == '%' && isEntityValue) {
   1.573 +                String entityName = maybeGetName();
   1.574 +
   1.575 +                if (entityName != null) {
   1.576 +                    nextChar(';', "F-021", entityName);
   1.577 +                    expandEntityInLiteral(entityName, params, isEntityValue);
   1.578 +                    continue;
   1.579 +                } else
   1.580 +                    fatal("P-011");
   1.581 +            }
   1.582 +
   1.583 +            // For attribute values ...
   1.584 +            if (!isEntityValue) {
   1.585 +                // 3.3.3 says whitespace normalizes to space...
   1.586 +                if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
   1.587 +                    strTmp.append(' ');
   1.588 +                    continue;
   1.589 +                }
   1.590 +
   1.591 +                // "<" not legal in parsed literals ...
   1.592 +                if (c == '<')
   1.593 +                    fatal("P-012");
   1.594 +            }
   1.595 +
   1.596 +            strTmp.append(c);
   1.597 +        }
   1.598 +//    isInAttribute = false;
   1.599 +    }
   1.600 +
   1.601 +    // does a SINGLE expansion of the entity (often reparsed later)
   1.602 +    private void expandEntityInLiteral(String name, SimpleHashtable table,
   1.603 +                                       boolean isEntityValue)
   1.604 +            throws IOException, SAXException {
   1.605 +
   1.606 +        Object entity = table.get(name);
   1.607 +
   1.608 +        if (entity instanceof InternalEntity) {
   1.609 +            InternalEntity value = (InternalEntity) entity;
   1.610 +            pushReader(value.buf, name, !value.isPE);
   1.611 +
   1.612 +        } else if (entity instanceof ExternalEntity) {
   1.613 +            if (!isEntityValue)    // must be a PE ...
   1.614 +                fatal("P-013", new Object[]{name});
   1.615 +            // XXX if this returns false ...
   1.616 +            pushReader((ExternalEntity) entity);
   1.617 +
   1.618 +        } else if (entity == null) {
   1.619 +            //
   1.620 +            // Note:  much confusion about whether spec requires such
   1.621 +            // errors to be fatal in many cases, but none about whether
   1.622 +            // it allows "normal" errors to be unrecoverable!
   1.623 +            //
   1.624 +            fatal((table == params) ? "V-022" : "P-014",
   1.625 +                    new Object[]{name});
   1.626 +        }
   1.627 +    }
   1.628 +
   1.629 +    // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
   1.630 +    // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>'
   1.631 +
   1.632 +    // NOTE:  XML spec should explicitly say that PE ref syntax is
   1.633 +    // ignored in PIs, comments, SystemLiterals, and Pubid Literal
   1.634 +    // values ... can't process the XML spec's own DTD without doing
   1.635 +    // that for comments.
   1.636 +
   1.637 +    private String getQuotedString(String type, String extra)
   1.638 +            throws IOException, SAXException {
   1.639 +
   1.640 +        // use in.getc to bypass PE processing
   1.641 +        char quote = in.getc();
   1.642 +
   1.643 +        if (quote != '\'' && quote != '"')
   1.644 +            fatal("P-015", new Object[]{
   1.645 +                messages.getMessage(locale, type, new Object[]{extra})
   1.646 +            });
   1.647 +
   1.648 +        char c;
   1.649 +
   1.650 +        strTmp = new StringBuffer();
   1.651 +        while ((c = in.getc()) != quote)
   1.652 +            strTmp.append((char) c);
   1.653 +        return strTmp.toString();
   1.654 +    }
   1.655 +
   1.656 +
   1.657 +    private String parsePublicId() throws IOException, SAXException {
   1.658 +
   1.659 +        // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
   1.660 +        // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
   1.661 +        String retval = getQuotedString("F-033", null);
   1.662 +        for (int i = 0; i < retval.length(); i++) {
   1.663 +            char c = retval.charAt(i);
   1.664 +            if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
   1.665 +                    && !(c >= 'A' && c <= 'Z')
   1.666 +                    && !(c >= 'a' && c <= 'z'))
   1.667 +                fatal("P-016", new Object[]{new Character(c)});
   1.668 +        }
   1.669 +        strTmp = new StringBuffer();
   1.670 +        strTmp.append(retval);
   1.671 +        return normalize(false);
   1.672 +    }
   1.673 +
   1.674 +    // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
   1.675 +    // handled by:  InputEntity.parsedContent()
   1.676 +
   1.677 +    private boolean maybeComment(boolean skipStart)
   1.678 +            throws IOException, SAXException {
   1.679 +
   1.680 +        // [15] Comment ::= '<!--'
   1.681 +        //        ( (Char - '-') | ('-' (Char - '-'))*
   1.682 +        //        '-->'
   1.683 +        if (!in.peek(skipStart ? "!--" : "<!--", null))
   1.684 +            return false;
   1.685 +
   1.686 +        boolean savedLexicalPE = doLexicalPE;
   1.687 +        boolean saveCommentText;
   1.688 +
   1.689 +        doLexicalPE = false;
   1.690 +        saveCommentText = false;
   1.691 +        if (saveCommentText)
   1.692 +            strTmp = new StringBuffer();
   1.693 +
   1.694 +        oneComment:
   1.695 +        for (; ;) {
   1.696 +            try {
   1.697 +                // bypass PE expansion, but permit PEs
   1.698 +                // to complete ... valid docs won't care.
   1.699 +                for (; ;) {
   1.700 +                    int c = getc();
   1.701 +                    if (c == '-') {
   1.702 +                        c = getc();
   1.703 +                        if (c != '-') {
   1.704 +                            if (saveCommentText)
   1.705 +                                strTmp.append('-');
   1.706 +                            ungetc();
   1.707 +                            continue;
   1.708 +                        }
   1.709 +                        nextChar('>', "F-022", null);
   1.710 +                        break oneComment;
   1.711 +                    }
   1.712 +                    if (saveCommentText)
   1.713 +                        strTmp.append((char) c);
   1.714 +                }
   1.715 +            } catch (EndOfInputException e) {
   1.716 +                //
   1.717 +                // This is fatal EXCEPT when we're processing a PE...
   1.718 +                // in which case a validating processor reports an error.
   1.719 +                // External PEs are easy to detect; internal ones we
   1.720 +                // infer by being an internal entity outside an element.
   1.721 +                //
   1.722 +                if (in.isInternal()) {
   1.723 +                    error("V-021", null);
   1.724 +                }
   1.725 +                fatal("P-017");
   1.726 +            }
   1.727 +        }
   1.728 +        doLexicalPE = savedLexicalPE;
   1.729 +        if (saveCommentText)
   1.730 +            dtdHandler.comment(strTmp.toString());
   1.731 +        return true;
   1.732 +    }
   1.733 +
   1.734 +    private boolean maybePI(boolean skipStart)
   1.735 +            throws IOException, SAXException {
   1.736 +
   1.737 +        // [16] PI ::= '<?' PITarget
   1.738 +        //        (S (Char* - (Char* '?>' Char*)))?
   1.739 +        //        '?>'
   1.740 +        // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l')
   1.741 +        boolean savedLexicalPE = doLexicalPE;
   1.742 +
   1.743 +        if (!in.peek(skipStart ? "?" : "<?", null))
   1.744 +            return false;
   1.745 +        doLexicalPE = false;
   1.746 +
   1.747 +        String target = maybeGetName();
   1.748 +
   1.749 +        if (target == null) {
   1.750 +            fatal("P-018");
   1.751 +        }
   1.752 +        if ("xml".equals(target)) {
   1.753 +            fatal("P-019");
   1.754 +        }
   1.755 +        if ("xml".equalsIgnoreCase(target)) {
   1.756 +            fatal("P-020", new Object[]{target});
   1.757 +        }
   1.758 +
   1.759 +        if (maybeWhitespace()) {
   1.760 +            strTmp = new StringBuffer();
   1.761 +            try {
   1.762 +                for (; ;) {
   1.763 +                    // use in.getc to bypass PE processing
   1.764 +                    char c = in.getc();
   1.765 +                    //Reached the end of PI.
   1.766 +                    if (c == '?' && in.peekc('>'))
   1.767 +                        break;
   1.768 +                    strTmp.append(c);
   1.769 +                }
   1.770 +            } catch (EndOfInputException e) {
   1.771 +                fatal("P-021");
   1.772 +            }
   1.773 +            dtdHandler.processingInstruction(target, strTmp.toString());
   1.774 +        } else {
   1.775 +            if (!in.peek("?>", null)) {
   1.776 +                fatal("P-022");
   1.777 +            }
   1.778 +            dtdHandler.processingInstruction(target, "");
   1.779 +        }
   1.780 +
   1.781 +        doLexicalPE = savedLexicalPE;
   1.782 +        return true;
   1.783 +    }
   1.784 +
   1.785 +    // [18] CDSect ::= CDStart CData CDEnd
   1.786 +    // [19] CDStart ::= '<![CDATA['
   1.787 +    // [20] CData ::= (Char* - (Char* ']]>' Char*))
   1.788 +    // [21] CDEnd ::= ']]>'
   1.789 +    //
   1.790 +    //    ... handled by InputEntity.unparsedContent()
   1.791 +
   1.792 +    // collapsing several rules together ...
   1.793 +    // simpler than attribute literals -- no reference parsing!
   1.794 +    private String maybeReadAttribute(String name, boolean must)
   1.795 +            throws IOException, SAXException {
   1.796 +
   1.797 +        // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\"
   1.798 +        // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\"
   1.799 +        // [32] SDDecl ::=  S 'standalone' Eq \'|\" ... \'|\"
   1.800 +        if (!maybeWhitespace()) {
   1.801 +            if (!must) {
   1.802 +                return null;
   1.803 +            }
   1.804 +            fatal("P-024", new Object[]{name});
   1.805 +            // NOTREACHED
   1.806 +        }
   1.807 +
   1.808 +        if (!peek(name)) {
   1.809 +            if (must) {
   1.810 +                fatal("P-024", new Object[]{name});
   1.811 +            } else {
   1.812 +                // To ensure that the whitespace is there so that when we
   1.813 +                // check for the next attribute we assure that the
   1.814 +                // whitespace still exists.
   1.815 +                ungetc();
   1.816 +                return null;
   1.817 +            }
   1.818 +        }
   1.819 +
   1.820 +        // [25] Eq ::= S? '=' S?
   1.821 +        maybeWhitespace();
   1.822 +        nextChar('=', "F-023", null);
   1.823 +        maybeWhitespace();
   1.824 +
   1.825 +        return getQuotedString("F-035", name);
   1.826 +    }
   1.827 +
   1.828 +    private void readVersion(boolean must, String versionNum)
   1.829 +            throws IOException, SAXException {
   1.830 +
   1.831 +        String value = maybeReadAttribute("version", must);
   1.832 +
   1.833 +        // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+
   1.834 +
   1.835 +        if (must && value == null)
   1.836 +            fatal("P-025", new Object[]{versionNum});
   1.837 +        if (value != null) {
   1.838 +            int length = value.length();
   1.839 +            for (int i = 0; i < length; i++) {
   1.840 +                char c = value.charAt(i);
   1.841 +                if (!((c >= '0' && c <= '9')
   1.842 +                        || c == '_' || c == '.'
   1.843 +                        || (c >= 'a' && c <= 'z')
   1.844 +                        || (c >= 'A' && c <= 'Z')
   1.845 +                        || c == ':' || c == '-')
   1.846 +                )
   1.847 +                    fatal("P-026", new Object[]{value});
   1.848 +            }
   1.849 +        }
   1.850 +        if (value != null && !value.equals(versionNum))
   1.851 +            error("P-027", new Object[]{versionNum, value});
   1.852 +    }
   1.853 +
   1.854 +    // common code used by most markup declarations
   1.855 +    // ... S (Q)Name ...
   1.856 +    private String getMarkupDeclname(String roleId, boolean qname)
   1.857 +            throws IOException, SAXException {
   1.858 +
   1.859 +        String name;
   1.860 +
   1.861 +        whitespace(roleId);
   1.862 +        name = maybeGetName();
   1.863 +        if (name == null)
   1.864 +            fatal("P-005", new Object[]
   1.865 +            {messages.getMessage(locale, roleId)});
   1.866 +        return name;
   1.867 +    }
   1.868 +
   1.869 +    private boolean maybeMarkupDecl()
   1.870 +            throws IOException, SAXException {
   1.871 +
   1.872 +        // [29] markupdecl ::= elementdecl | Attlistdecl
   1.873 +        //           | EntityDecl | NotationDecl | PI | Comment
   1.874 +        return maybeElementDecl()
   1.875 +                || maybeAttlistDecl()
   1.876 +                || maybeEntityDecl()
   1.877 +                || maybeNotationDecl()
   1.878 +                || maybePI(false)
   1.879 +                || maybeComment(false);
   1.880 +    }
   1.881 +
   1.882 +    private static final String XmlLang = "xml:lang";
   1.883 +
   1.884 +    private boolean isXmlLang(String value) {
   1.885 +
   1.886 +        // [33] LanguageId ::= Langcode ('-' Subcode)*
   1.887 +        // [34] Langcode ::= ISO639Code | IanaCode | UserCode
   1.888 +        // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z]
   1.889 +        // [36] IanaCode ::= [iI] '-' SubCode
   1.890 +        // [37] UserCode ::= [xX] '-' SubCode
   1.891 +        // [38] SubCode ::= [a-zA-Z]+
   1.892 +
   1.893 +        // the ISO and IANA codes (and subcodes) are registered,
   1.894 +        // but that's neither a WF nor a validity constraint.
   1.895 +
   1.896 +        int nextSuffix;
   1.897 +        char c;
   1.898 +
   1.899 +        if (value.length() < 2)
   1.900 +            return false;
   1.901 +        c = value.charAt(1);
   1.902 +        if (c == '-') {        // IANA, or user, code
   1.903 +            c = value.charAt(0);
   1.904 +            if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X'))
   1.905 +                return false;
   1.906 +            nextSuffix = 1;
   1.907 +        } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
   1.908 +            // 2 letter ISO code, or error
   1.909 +            c = value.charAt(0);
   1.910 +            if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
   1.911 +                return false;
   1.912 +            nextSuffix = 2;
   1.913 +        } else
   1.914 +            return false;
   1.915 +
   1.916 +        // here "suffix" ::= '-' [a-zA-Z]+ suffix*
   1.917 +        while (nextSuffix < value.length()) {
   1.918 +            c = value.charAt(nextSuffix);
   1.919 +            if (c != '-')
   1.920 +                break;
   1.921 +            while (++nextSuffix < value.length()) {
   1.922 +                c = value.charAt(nextSuffix);
   1.923 +                if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
   1.924 +                    break;
   1.925 +            }
   1.926 +        }
   1.927 +        return value.length() == nextSuffix && c != '-';
   1.928 +    }
   1.929 +
   1.930 +
   1.931 +    //
   1.932 +    // CHAPTER 3:  Logical Structures
   1.933 +    //
   1.934 +
   1.935 +    /**
   1.936 +     * To validate, subclassers should at this time make sure that
   1.937 +     * values are of the declared types:<UL>
   1.938 +     * <LI> ID and IDREF(S) values are Names
   1.939 +     * <LI> NMTOKEN(S) are Nmtokens
   1.940 +     * <LI> ENUMERATION values match one of the tokens
   1.941 +     * <LI> NOTATION values match a notation name
   1.942 +     * <LI> ENTITIY(IES) values match an unparsed external entity
   1.943 +     * </UL>
   1.944 +     * <p/>
   1.945 +     * <P> Separately, make sure IDREF values match some ID
   1.946 +     * provided in the document (in the afterRoot method).
   1.947 +     */
   1.948 +/*    void validateAttributeSyntax (Attribute attr, String value)
   1.949 +         throws DTDParseException {
   1.950 +        // ID, IDREF(S) ... values are Names
   1.951 +        if (Attribute.ID == attr.type()) {
   1.952 +            if (!XmlNames.isName (value))
   1.953 +                error ("V-025", new Object [] { value });
   1.954 +
   1.955 +            Boolean             b = (Boolean) ids.getNonInterned (value);
   1.956 +            if (b == null || b.equals (Boolean.FALSE))
   1.957 +                ids.put (value.intern (), Boolean.TRUE);
   1.958 +            else
   1.959 +                error ("V-026", new Object [] { value });
   1.960 +
   1.961 +        } else if (Attribute.IDREF == attr.type()) {
   1.962 +            if (!XmlNames.isName (value))
   1.963 +                error ("V-027", new Object [] { value });
   1.964 +
   1.965 +            Boolean             b = (Boolean) ids.getNonInterned (value);
   1.966 +            if (b == null)
   1.967 +                ids.put (value.intern (), Boolean.FALSE);
   1.968 +
   1.969 +        } else if (Attribute.IDREFS == attr.type()) {
   1.970 +            StringTokenizer     tokenizer = new StringTokenizer (value);
   1.971 +            Boolean             b;
   1.972 +            boolean             sawValue = false;
   1.973 +
   1.974 +            while (tokenizer.hasMoreTokens ()) {
   1.975 +                value = tokenizer.nextToken ();
   1.976 +                if (!XmlNames.isName (value))
   1.977 +                    error ("V-027", new Object [] { value });
   1.978 +                b = (Boolean) ids.getNonInterned (value);
   1.979 +                if (b == null)
   1.980 +                    ids.put (value.intern (), Boolean.FALSE);
   1.981 +                sawValue = true;
   1.982 +            }
   1.983 +            if (!sawValue)
   1.984 +                error ("V-039", null);
   1.985 +
   1.986 +
   1.987 +        // NMTOKEN(S) ... values are Nmtoken(s)
   1.988 +        } else if (Attribute.NMTOKEN == attr.type()) {
   1.989 +            if (!XmlNames.isNmtoken (value))
   1.990 +                error ("V-028", new Object [] { value });
   1.991 +
   1.992 +        } else if (Attribute.NMTOKENS == attr.type()) {
   1.993 +            StringTokenizer     tokenizer = new StringTokenizer (value);
   1.994 +            boolean             sawValue = false;
   1.995 +
   1.996 +            while (tokenizer.hasMoreTokens ()) {
   1.997 +                value = tokenizer.nextToken ();
   1.998 +                if (!XmlNames.isNmtoken (value))
   1.999 +                    error ("V-028", new Object [] { value });
  1.1000 +                sawValue = true;
  1.1001 +            }
  1.1002 +            if (!sawValue)
  1.1003 +                error ("V-032", null);
  1.1004 +
  1.1005 +        // ENUMERATION ... values match one of the tokens
  1.1006 +        } else if (Attribute.ENUMERATION == attr.type()) {
  1.1007 +            for (int i = 0; i < attr.values().length; i++)
  1.1008 +                if (value.equals (attr.values()[i]))
  1.1009 +                    return;
  1.1010 +            error ("V-029", new Object [] { value });
  1.1011 +
  1.1012 +        // NOTATION values match a notation name
  1.1013 +        } else if (Attribute.NOTATION == attr.type()) {
  1.1014 +            //
  1.1015 +            // XXX XML 1.0 spec should probably list references to
  1.1016 +            // externally defined notations in standalone docs as
  1.1017 +            // validity errors.  Ditto externally defined unparsed
  1.1018 +            // entities; neither should show up in attributes, else
  1.1019 +            // one needs to read the external declarations in order
  1.1020 +            // to make sense of the document (exactly what tagging
  1.1021 +            // a doc as "standalone" intends you won't need to do).
  1.1022 +            //
  1.1023 +            for (int i = 0; i < attr.values().length; i++)
  1.1024 +                if (value.equals (attr.values()[i]))
  1.1025 +                    return;
  1.1026 +            error ("V-030", new Object [] { value });
  1.1027 +
  1.1028 +        // ENTITY(IES) values match an unparsed entity(ies)
  1.1029 +        } else if (Attribute.ENTITY == attr.type()) {
  1.1030 +            // see note above re standalone
  1.1031 +            if (!isUnparsedEntity (value))
  1.1032 +                error ("V-031", new Object [] { value });
  1.1033 +
  1.1034 +        } else if (Attribute.ENTITIES == attr.type()) {
  1.1035 +            StringTokenizer     tokenizer = new StringTokenizer (value);
  1.1036 +            boolean             sawValue = false;
  1.1037 +
  1.1038 +            while (tokenizer.hasMoreTokens ()) {
  1.1039 +                value = tokenizer.nextToken ();
  1.1040 +                // see note above re standalone
  1.1041 +                if (!isUnparsedEntity (value))
  1.1042 +                    error ("V-031", new Object [] { value });
  1.1043 +                sawValue = true;
  1.1044 +            }
  1.1045 +            if (!sawValue)
  1.1046 +                error ("V-040", null);
  1.1047 +
  1.1048 +        } else if (Attribute.CDATA != attr.type())
  1.1049 +            throw new InternalError (attr.type());
  1.1050 +    }
  1.1051 +*/
  1.1052 +/*
  1.1053 +    private boolean isUnparsedEntity (String name)
  1.1054 +    {
  1.1055 +        Object e = entities.getNonInterned (name);
  1.1056 +        if (e == null || !(e instanceof ExternalEntity))
  1.1057 +            return false;
  1.1058 +        return ((ExternalEntity)e).notation != null;
  1.1059 +    }
  1.1060 +*/
  1.1061 +    private boolean maybeElementDecl()
  1.1062 +            throws IOException, SAXException {
  1.1063 +
  1.1064 +        // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>'
  1.1065 +        // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
  1.1066 +        InputEntity start = peekDeclaration("!ELEMENT");
  1.1067 +
  1.1068 +        if (start == null)
  1.1069 +            return false;
  1.1070 +
  1.1071 +        // n.b. for content models where inter-element whitespace is
  1.1072 +        // ignorable, we mark that fact here.
  1.1073 +        String name = getMarkupDeclname("F-015", true);
  1.1074 +//    Element        element = (Element) elements.get (name);
  1.1075 +//    boolean        declEffective = false;
  1.1076 +
  1.1077 +/*
  1.1078 +    if (element != null) {
  1.1079 +        if (element.contentModel() != null) {
  1.1080 +            error ("V-012", new Object [] { name });
  1.1081 +        } // else <!ATTLIST name ...> came first
  1.1082 +    } else {
  1.1083 +        element = new Element(name);
  1.1084 +        elements.put (element.name(), element);
  1.1085 +        declEffective = true;
  1.1086 +    }
  1.1087 +*/
  1.1088 +        if (declaredElements.contains(name))
  1.1089 +            error("V-012", new Object[]{name});
  1.1090 +        else {
  1.1091 +            declaredElements.add(name);
  1.1092 +//        declEffective = true;
  1.1093 +        }
  1.1094 +
  1.1095 +        short modelType;
  1.1096 +        whitespace("F-000");
  1.1097 +        if (peek(strEMPTY)) {
  1.1098 +///        // leave element.contentModel as null for this case.
  1.1099 +            dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY);
  1.1100 +        } else if (peek(strANY)) {
  1.1101 +///        element.setContentModel(new StringModel(StringModelType.ANY));
  1.1102 +            dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY);
  1.1103 +        } else {
  1.1104 +            modelType = getMixedOrChildren(name);
  1.1105 +        }
  1.1106 +
  1.1107 +        dtdHandler.endContentModel(name, modelType);
  1.1108 +
  1.1109 +        maybeWhitespace();
  1.1110 +        char c = getc();
  1.1111 +        if (c != '>')
  1.1112 +            fatal("P-036", new Object[]{name, new Character(c)});
  1.1113 +        if (start != in)
  1.1114 +            error("V-013", null);
  1.1115 +
  1.1116 +///        dtdHandler.elementDecl(element);
  1.1117 +
  1.1118 +        return true;
  1.1119 +    }
  1.1120 +
  1.1121 +    // We're leaving the content model as a regular expression;
  1.1122 +    // it's an efficient natural way to express such things, and
  1.1123 +    // libraries often interpret them.  No whitespace in the
  1.1124 +    // model we store, though!
  1.1125 +
  1.1126 +    /**
  1.1127 +     * returns content model type.
  1.1128 +     */
  1.1129 +    private short getMixedOrChildren(String elementName/*Element element*/)
  1.1130 +            throws IOException, SAXException {
  1.1131 +
  1.1132 +        InputEntity start;
  1.1133 +
  1.1134 +        // [47] children ::= (choice|seq) ('?'|'*'|'+')?
  1.1135 +        strTmp = new StringBuffer();
  1.1136 +
  1.1137 +        nextChar('(', "F-028", elementName);
  1.1138 +        start = in;
  1.1139 +        maybeWhitespace();
  1.1140 +        strTmp.append('(');
  1.1141 +
  1.1142 +        short modelType;
  1.1143 +        if (peek("#PCDATA")) {
  1.1144 +            strTmp.append("#PCDATA");
  1.1145 +            dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED);
  1.1146 +            getMixed(elementName, start);
  1.1147 +        } else {
  1.1148 +            dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_CHILDREN);
  1.1149 +            getcps(elementName, start);
  1.1150 +        }
  1.1151 +
  1.1152 +        return modelType;
  1.1153 +    }
  1.1154 +
  1.1155 +    // '(' S? already consumed
  1.1156 +    // matching ')' must be in "start" entity if validating
  1.1157 +    private void getcps(/*Element element,*/String elementName, InputEntity start)
  1.1158 +            throws IOException, SAXException {
  1.1159 +
  1.1160 +        // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')?
  1.1161 +        // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')'
  1.1162 +        // [50] seq    ::= '(' S? cp (S? ',' S? cp)* S? ')'
  1.1163 +        boolean decided = false;
  1.1164 +        char type = 0;
  1.1165 +//        ContentModel       retval, temp, current;
  1.1166 +
  1.1167 +//        retval = temp = current = null;
  1.1168 +
  1.1169 +        dtdHandler.startModelGroup();
  1.1170 +
  1.1171 +        do {
  1.1172 +            String tag;
  1.1173 +
  1.1174 +            tag = maybeGetName();
  1.1175 +            if (tag != null) {
  1.1176 +                strTmp.append(tag);
  1.1177 +//                temp = new ElementModel(tag);
  1.1178 +//                getFrequency((RepeatableContent)temp);
  1.1179 +///->
  1.1180 +                dtdHandler.childElement(tag, getFrequency());
  1.1181 +///<-
  1.1182 +            } else if (peek("(")) {
  1.1183 +                InputEntity next = in;
  1.1184 +                strTmp.append('(');
  1.1185 +                maybeWhitespace();
  1.1186 +//                temp = getcps(element, next);
  1.1187 +//                getFrequency(temp);
  1.1188 +///->
  1.1189 +                getcps(elementName, next);
  1.1190 +///                getFrequency();        <- this looks like a bug
  1.1191 +///<-
  1.1192 +            } else
  1.1193 +                fatal((type == 0) ? "P-039" :
  1.1194 +                        ((type == ',') ? "P-037" : "P-038"),
  1.1195 +                        new Object[]{new Character(getc())});
  1.1196 +
  1.1197 +            maybeWhitespace();
  1.1198 +            if (decided) {
  1.1199 +                char c = getc();
  1.1200 +
  1.1201 +//                if (current != null) {
  1.1202 +//                    current.addChild(temp);
  1.1203 +//                }
  1.1204 +                if (c == type) {
  1.1205 +                    strTmp.append(type);
  1.1206 +                    maybeWhitespace();
  1.1207 +                    reportConnector(type);
  1.1208 +                    continue;
  1.1209 +                } else if (c == '\u0029') {    // rparen
  1.1210 +                    ungetc();
  1.1211 +                    continue;
  1.1212 +                } else {
  1.1213 +                    fatal((type == 0) ? "P-041" : "P-040",
  1.1214 +                            new Object[]{
  1.1215 +                                new Character(c),
  1.1216 +                                new Character(type)
  1.1217 +                            });
  1.1218 +                }
  1.1219 +            } else {
  1.1220 +                type = getc();
  1.1221 +                switch (type) {
  1.1222 +                case '|':
  1.1223 +                case ',':
  1.1224 +                    reportConnector(type);
  1.1225 +                    break;
  1.1226 +                default:
  1.1227 +//                        retval = temp;
  1.1228 +                    ungetc();
  1.1229 +                    continue;
  1.1230 +                }
  1.1231 +//                retval = (ContentModel)current;
  1.1232 +                decided = true;
  1.1233 +//                current.addChild(temp);
  1.1234 +                strTmp.append(type);
  1.1235 +            }
  1.1236 +            maybeWhitespace();
  1.1237 +        } while (!peek(")"));
  1.1238 +
  1.1239 +        if (in != start)
  1.1240 +            error("V-014", new Object[]{elementName});
  1.1241 +        strTmp.append(')');
  1.1242 +
  1.1243 +        dtdHandler.endModelGroup(getFrequency());
  1.1244 +//        return retval;
  1.1245 +    }
  1.1246 +
  1.1247 +    private void reportConnector(char type) throws SAXException {
  1.1248 +        switch (type) {
  1.1249 +        case '|':
  1.1250 +            dtdHandler.connector(DTDEventListener.CHOICE);    ///<-
  1.1251 +            return;
  1.1252 +        case ',':
  1.1253 +            dtdHandler.connector(DTDEventListener.SEQUENCE); ///<-
  1.1254 +            return;
  1.1255 +        default:
  1.1256 +            throw new Error();    //assertion failed.
  1.1257 +        }
  1.1258 +    }
  1.1259 +
  1.1260 +    private short getFrequency()
  1.1261 +            throws IOException, SAXException {
  1.1262 +
  1.1263 +        final char c = getc();
  1.1264 +
  1.1265 +        if (c == '?') {
  1.1266 +            strTmp.append(c);
  1.1267 +            return DTDEventListener.OCCURENCE_ZERO_OR_ONE;
  1.1268 +            //        original.setRepeat(Repeat.ZERO_OR_ONE);
  1.1269 +        } else if (c == '+') {
  1.1270 +            strTmp.append(c);
  1.1271 +            return DTDEventListener.OCCURENCE_ONE_OR_MORE;
  1.1272 +            //        original.setRepeat(Repeat.ONE_OR_MORE);
  1.1273 +        } else if (c == '*') {
  1.1274 +            strTmp.append(c);
  1.1275 +            return DTDEventListener.OCCURENCE_ZERO_OR_MORE;
  1.1276 +            //        original.setRepeat(Repeat.ZERO_OR_MORE);
  1.1277 +        } else {
  1.1278 +            ungetc();
  1.1279 +            return DTDEventListener.OCCURENCE_ONCE;
  1.1280 +        }
  1.1281 +    }
  1.1282 +
  1.1283 +    // '(' S? '#PCDATA' already consumed
  1.1284 +    // matching ')' must be in "start" entity if validating
  1.1285 +    private void getMixed(String elementName, /*Element element,*/ InputEntity start)
  1.1286 +            throws IOException, SAXException {
  1.1287 +
  1.1288 +        // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
  1.1289 +        //        | '(' S? '#PCDATA'                   S? ')'
  1.1290 +        maybeWhitespace();
  1.1291 +        if (peek("\u0029*") || peek("\u0029")) {
  1.1292 +            if (in != start)
  1.1293 +                error("V-014", new Object[]{elementName});
  1.1294 +            strTmp.append(')');
  1.1295 +//            element.setContentModel(new StringModel(StringModelType.PCDATA));
  1.1296 +            return;
  1.1297 +        }
  1.1298 +
  1.1299 +        ArrayList l = new ArrayList();
  1.1300 +//    l.add(new StringModel(StringModelType.PCDATA));
  1.1301 +
  1.1302 +
  1.1303 +        while (peek("|")) {
  1.1304 +            String name;
  1.1305 +
  1.1306 +            strTmp.append('|');
  1.1307 +            maybeWhitespace();
  1.1308 +
  1.1309 +            doLexicalPE = true;
  1.1310 +            name = maybeGetName();
  1.1311 +            if (name == null)
  1.1312 +                fatal("P-042", new Object[]
  1.1313 +                {elementName, Integer.toHexString(getc())});
  1.1314 +            if (l.contains(name)) {
  1.1315 +                error("V-015", new Object[]{name});
  1.1316 +            } else {
  1.1317 +                l.add(name);
  1.1318 +                dtdHandler.mixedElement(name);
  1.1319 +            }
  1.1320 +            strTmp.append(name);
  1.1321 +            maybeWhitespace();
  1.1322 +        }
  1.1323 +
  1.1324 +        if (!peek("\u0029*"))    // right paren
  1.1325 +            fatal("P-043", new Object[]
  1.1326 +            {elementName, new Character(getc())});
  1.1327 +        if (in != start)
  1.1328 +            error("V-014", new Object[]{elementName});
  1.1329 +        strTmp.append(')');
  1.1330 +//        ChoiceModel cm = new ChoiceModel((Collection)l);
  1.1331 +//    cm.setRepeat(Repeat.ZERO_OR_MORE);
  1.1332 +//       element.setContentModel(cm);
  1.1333 +    }
  1.1334 +
  1.1335 +    private boolean maybeAttlistDecl()
  1.1336 +            throws IOException, SAXException {
  1.1337 +
  1.1338 +        // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
  1.1339 +        InputEntity start = peekDeclaration("!ATTLIST");
  1.1340 +
  1.1341 +        if (start == null)
  1.1342 +            return false;
  1.1343 +
  1.1344 +        String elementName = getMarkupDeclname("F-016", true);
  1.1345 +//    Element    element = (Element) elements.get (name);
  1.1346 +
  1.1347 +//    if (element == null) {
  1.1348 +//        // not yet declared -- no problem.
  1.1349 +//        element = new Element(name);
  1.1350 +//        elements.put(name, element);
  1.1351 +//    }
  1.1352 +
  1.1353 +        while (!peek(">")) {
  1.1354 +
  1.1355 +            // [53] AttDef ::= S Name S AttType S DefaultDecl
  1.1356 +            // [54] AttType ::= StringType | TokenizedType | EnumeratedType
  1.1357 +
  1.1358 +            // look for global attribute definitions, don't expand for now...
  1.1359 +            maybeWhitespace();
  1.1360 +            char c = getc();
  1.1361 +            if (c == '%') {
  1.1362 +                String entityName = maybeGetName();
  1.1363 +                if (entityName != null) {
  1.1364 +                    nextChar(';', "F-021", entityName);
  1.1365 +                    whitespace("F-021");
  1.1366 +                    continue;
  1.1367 +                } else
  1.1368 +                    fatal("P-011");
  1.1369 +            }
  1.1370 +
  1.1371 +            ungetc();
  1.1372 +            // look for attribute name otherwise
  1.1373 +            String attName = maybeGetName();
  1.1374 +            if (attName == null) {
  1.1375 +                fatal("P-044", new Object[]{new Character(getc())});
  1.1376 +            }
  1.1377 +            whitespace("F-001");
  1.1378 +
  1.1379 +///        Attribute    a = new Attribute (name);
  1.1380 +
  1.1381 +            String typeName;
  1.1382 +            Vector values = null;    // notation/enumeration values
  1.1383 +
  1.1384 +            // Note:  use the type constants from Attribute
  1.1385 +            // so that "==" may be used (faster)
  1.1386 +
  1.1387 +            // [55] StringType ::= 'CDATA'
  1.1388 +            if (peek(TYPE_CDATA))
  1.1389 +///            a.setType(Attribute.CDATA);
  1.1390 +                typeName = TYPE_CDATA;
  1.1391 +
  1.1392 +            // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS'
  1.1393 +            //        | 'ENTITY' | 'ENTITIES'
  1.1394 +            //        | 'NMTOKEN' | 'NMTOKENS'
  1.1395 +            // n.b. if "IDREFS" is there, both "ID" and "IDREF"
  1.1396 +            // match peekahead ... so this order matters!
  1.1397 +            else if (peek(TYPE_IDREFS))
  1.1398 +                typeName = TYPE_IDREFS;
  1.1399 +            else if (peek(TYPE_IDREF))
  1.1400 +                typeName = TYPE_IDREF;
  1.1401 +            else if (peek(TYPE_ID)) {
  1.1402 +                typeName = TYPE_ID;
  1.1403 +// TODO: should implement this error check?
  1.1404 +///        if (element.id() != null) {
  1.1405 +///                    error ("V-016", new Object [] { element.id() });
  1.1406 +///        } else
  1.1407 +///            element.setId(name);
  1.1408 +            } else if (peek(TYPE_ENTITY))
  1.1409 +                typeName = TYPE_ENTITY;
  1.1410 +            else if (peek(TYPE_ENTITIES))
  1.1411 +                typeName = TYPE_ENTITIES;
  1.1412 +            else if (peek(TYPE_NMTOKENS))
  1.1413 +                typeName = TYPE_NMTOKENS;
  1.1414 +            else if (peek(TYPE_NMTOKEN))
  1.1415 +                typeName = TYPE_NMTOKEN;
  1.1416 +
  1.1417 +            // [57] EnumeratedType ::= NotationType | Enumeration
  1.1418 +            // [58] NotationType ::= 'NOTATION' S '(' S? Name
  1.1419 +            //        (S? '|' S? Name)* S? ')'
  1.1420 +            else if (peek(TYPE_NOTATION)) {
  1.1421 +                typeName = TYPE_NOTATION;
  1.1422 +                whitespace("F-002");
  1.1423 +                nextChar('(', "F-029", null);
  1.1424 +                maybeWhitespace();
  1.1425 +
  1.1426 +                values = new Vector();
  1.1427 +                do {
  1.1428 +                    String name;
  1.1429 +                    if ((name = maybeGetName()) == null)
  1.1430 +                        fatal("P-068");
  1.1431 +                    // permit deferred declarations
  1.1432 +                    if (notations.get(name) == null)
  1.1433 +                        notations.put(name, name);
  1.1434 +                    values.addElement(name);
  1.1435 +                    maybeWhitespace();
  1.1436 +                    if (peek("|"))
  1.1437 +                        maybeWhitespace();
  1.1438 +                } while (!peek(")"));
  1.1439 +///            a.setValues(new String [v.size ()]);
  1.1440 +///            for (int i = 0; i < v.size (); i++)
  1.1441 +///                a.setValue(i, (String)v.elementAt(i));
  1.1442 +
  1.1443 +                // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')'
  1.1444 +            } else if (peek("(")) {
  1.1445 +///            a.setType(Attribute.ENUMERATION);
  1.1446 +                typeName = TYPE_ENUMERATION;
  1.1447 +
  1.1448 +                maybeWhitespace();
  1.1449 +
  1.1450 +///            Vector v = new Vector ();
  1.1451 +                values = new Vector();
  1.1452 +                do {
  1.1453 +                    String name = getNmtoken();
  1.1454 +///                v.addElement (name);
  1.1455 +                    values.addElement(name);
  1.1456 +                    maybeWhitespace();
  1.1457 +                    if (peek("|"))
  1.1458 +                        maybeWhitespace();
  1.1459 +                } while (!peek(")"));
  1.1460 +///            a.setValues(new String [v.size ()]);
  1.1461 +///            for (int i = 0; i < v.size (); i++)
  1.1462 +///                a.setValue(i, (String)v.elementAt(i));
  1.1463 +            } else {
  1.1464 +                fatal("P-045",
  1.1465 +                        new Object[]{attName, new Character(getc())});
  1.1466 +                typeName = null;
  1.1467 +            }
  1.1468 +
  1.1469 +            short attributeUse;
  1.1470 +            String defaultValue = null;
  1.1471 +
  1.1472 +            // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
  1.1473 +            //        | (('#FIXED' S)? AttValue)
  1.1474 +            whitespace("F-003");
  1.1475 +            if (peek("#REQUIRED"))
  1.1476 +                attributeUse = DTDEventListener.USE_REQUIRED;
  1.1477 +///            a.setIsRequired(true);
  1.1478 +            else if (peek("#FIXED")) {
  1.1479 +///            if (a.type() == Attribute.ID)
  1.1480 +                if (typeName == TYPE_ID)
  1.1481 +                    error("V-017", new Object[]{attName});
  1.1482 +///            a.setIsFixed(true);
  1.1483 +                attributeUse = DTDEventListener.USE_FIXED;
  1.1484 +                whitespace("F-004");
  1.1485 +                parseLiteral(false);
  1.1486 +///            if (a.type() != Attribute.CDATA)
  1.1487 +///                a.setDefaultValue(normalize(false));
  1.1488 +///            else
  1.1489 +///                a.setDefaultValue(strTmp.toString());
  1.1490 +
  1.1491 +                if (typeName == TYPE_CDATA)
  1.1492 +                    defaultValue = normalize(false);
  1.1493 +                else
  1.1494 +                    defaultValue = strTmp.toString();
  1.1495 +
  1.1496 +// TODO: implement this check
  1.1497 +///            if (a.type() != Attribute.CDATA)
  1.1498 +///                validateAttributeSyntax (a, a.defaultValue());
  1.1499 +            } else if (!peek("#IMPLIED")) {
  1.1500 +                attributeUse = DTDEventListener.USE_IMPLIED;
  1.1501 +
  1.1502 +///            if (a.type() == Attribute.ID)
  1.1503 +                if (typeName == TYPE_ID)
  1.1504 +                    error("V-018", new Object[]{attName});
  1.1505 +                parseLiteral(false);
  1.1506 +///            if (a.type() != Attribute.CDATA)
  1.1507 +///                a.setDefaultValue(normalize(false));
  1.1508 +///            else
  1.1509 +///                a.setDefaultValue(strTmp.toString());
  1.1510 +                if (typeName == TYPE_CDATA)
  1.1511 +                    defaultValue = normalize(false);
  1.1512 +                else
  1.1513 +                    defaultValue = strTmp.toString();
  1.1514 +
  1.1515 +// TODO: implement this check
  1.1516 +///            if (a.type() != Attribute.CDATA)
  1.1517 +///                validateAttributeSyntax (a, a.defaultValue());
  1.1518 +            } else {
  1.1519 +                // TODO: this looks like an fatal error.
  1.1520 +                attributeUse = DTDEventListener.USE_NORMAL;
  1.1521 +            }
  1.1522 +
  1.1523 +            if (XmlLang.equals(attName)
  1.1524 +                    && defaultValue/* a.defaultValue()*/ != null
  1.1525 +                    && !isXmlLang(defaultValue/*a.defaultValue()*/))
  1.1526 +                error("P-033", new Object[]{defaultValue /*a.defaultValue()*/});
  1.1527 +
  1.1528 +// TODO: isn't it an error to specify the same attribute twice?
  1.1529 +///        if (!element.attributes().contains(a)) {
  1.1530 +///            element.addAttribute(a);
  1.1531 +///            dtdHandler.attributeDecl(a);
  1.1532 +///        }
  1.1533 +
  1.1534 +            String[] v = (values != null) ? (String[]) values.toArray(new String[0]) : null;
  1.1535 +            dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue);
  1.1536 +            maybeWhitespace();
  1.1537 +        }
  1.1538 +        if (start != in)
  1.1539 +            error("V-013", null);
  1.1540 +        return true;
  1.1541 +    }
  1.1542 +
  1.1543 +    // used when parsing literal attribute values,
  1.1544 +    // or public identifiers.
  1.1545 +    //
  1.1546 +    // input in strTmp
  1.1547 +    private String normalize(boolean invalidIfNeeded) {
  1.1548 +
  1.1549 +        // this can allocate an extra string...
  1.1550 +
  1.1551 +        String s = strTmp.toString();
  1.1552 +        String s2 = s.trim();
  1.1553 +        boolean didStrip = false;
  1.1554 +
  1.1555 +        if (s != s2) {
  1.1556 +            s = s2;
  1.1557 +            s2 = null;
  1.1558 +            didStrip = true;
  1.1559 +        }
  1.1560 +        strTmp = new StringBuffer();
  1.1561 +        for (int i = 0; i < s.length(); i++) {
  1.1562 +            char c = s.charAt(i);
  1.1563 +            if (!XmlChars.isSpace(c)) {
  1.1564 +                strTmp.append(c);
  1.1565 +                continue;
  1.1566 +            }
  1.1567 +            strTmp.append(' ');
  1.1568 +            while (++i < s.length() && XmlChars.isSpace(s.charAt(i)))
  1.1569 +                didStrip = true;
  1.1570 +            i--;
  1.1571 +        }
  1.1572 +        if (didStrip)
  1.1573 +            return strTmp.toString();
  1.1574 +        else
  1.1575 +            return s;
  1.1576 +    }
  1.1577 +
  1.1578 +    private boolean maybeConditionalSect()
  1.1579 +            throws IOException, SAXException {
  1.1580 +
  1.1581 +        // [61] conditionalSect ::= includeSect | ignoreSect
  1.1582 +
  1.1583 +        if (!peek("<!["))
  1.1584 +            return false;
  1.1585 +
  1.1586 +        String keyword;
  1.1587 +        InputEntity start = in;
  1.1588 +
  1.1589 +        maybeWhitespace();
  1.1590 +
  1.1591 +        if ((keyword = maybeGetName()) == null)
  1.1592 +            fatal("P-046");
  1.1593 +        maybeWhitespace();
  1.1594 +        nextChar('[', "F-030", null);
  1.1595 +
  1.1596 +        // [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
  1.1597 +        //                extSubsetDecl ']]>'
  1.1598 +        if ("INCLUDE".equals(keyword)) {
  1.1599 +            for (; ;) {
  1.1600 +                while (in.isEOF() && in != start)
  1.1601 +                    in = in.pop();
  1.1602 +                if (in.isEOF()) {
  1.1603 +                    error("V-020", null);
  1.1604 +                }
  1.1605 +                if (peek("]]>"))
  1.1606 +                    break;
  1.1607 +
  1.1608 +                doLexicalPE = false;
  1.1609 +                if (maybeWhitespace())
  1.1610 +                    continue;
  1.1611 +                if (maybePEReference())
  1.1612 +                    continue;
  1.1613 +                doLexicalPE = true;
  1.1614 +                if (maybeMarkupDecl() || maybeConditionalSect())
  1.1615 +                    continue;
  1.1616 +
  1.1617 +                fatal("P-047");
  1.1618 +            }
  1.1619 +
  1.1620 +            // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
  1.1621 +            //            ignoreSectcontents ']]>'
  1.1622 +            // [64] ignoreSectcontents ::= Ignore ('<!['
  1.1623 +            //            ignoreSectcontents ']]>' Ignore)*
  1.1624 +            // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
  1.1625 +        } else if ("IGNORE".equals(keyword)) {
  1.1626 +            int nestlevel = 1;
  1.1627 +            // ignoreSectcontents
  1.1628 +            doLexicalPE = false;
  1.1629 +            while (nestlevel > 0) {
  1.1630 +                char c = getc();    // will pop input entities
  1.1631 +                if (c == '<') {
  1.1632 +                    if (peek("!["))
  1.1633 +                        nestlevel++;
  1.1634 +                } else if (c == ']') {
  1.1635 +                    if (peek("]>"))
  1.1636 +                        nestlevel--;
  1.1637 +                } else
  1.1638 +                    continue;
  1.1639 +            }
  1.1640 +        } else
  1.1641 +            fatal("P-048", new Object[]{keyword});
  1.1642 +        return true;
  1.1643 +    }
  1.1644 +
  1.1645 +
  1.1646 +    //
  1.1647 +    // CHAPTER 4:  Physical Structures
  1.1648 +    //
  1.1649 +
  1.1650 +    // parse decimal or hex numeric character reference
  1.1651 +    private int parseCharNumber()
  1.1652 +            throws IOException, SAXException {
  1.1653 +
  1.1654 +        char c;
  1.1655 +        int retval = 0;
  1.1656 +
  1.1657 +        // n.b. we ignore overflow ...
  1.1658 +        if (getc() != 'x') {
  1.1659 +            ungetc();
  1.1660 +            for (; ;) {
  1.1661 +                c = getc();
  1.1662 +                if (c >= '0' && c <= '9') {
  1.1663 +                    retval *= 10;
  1.1664 +                    retval += (c - '0');
  1.1665 +                    continue;
  1.1666 +                }
  1.1667 +                if (c == ';')
  1.1668 +                    return retval;
  1.1669 +                fatal("P-049");
  1.1670 +            }
  1.1671 +        } else
  1.1672 +            for (; ;) {
  1.1673 +                c = getc();
  1.1674 +                if (c >= '0' && c <= '9') {
  1.1675 +                    retval <<= 4;
  1.1676 +                    retval += (c - '0');
  1.1677 +                    continue;
  1.1678 +                }
  1.1679 +                if (c >= 'a' && c <= 'f') {
  1.1680 +                    retval <<= 4;
  1.1681 +                    retval += 10 + (c - 'a');
  1.1682 +                    continue;
  1.1683 +                }
  1.1684 +                if (c >= 'A' && c <= 'F') {
  1.1685 +                    retval <<= 4;
  1.1686 +                    retval += 10 + (c - 'A');
  1.1687 +                    continue;
  1.1688 +                }
  1.1689 +                if (c == ';')
  1.1690 +                    return retval;
  1.1691 +                fatal("P-050");
  1.1692 +            }
  1.1693 +    }
  1.1694 +
  1.1695 +    // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE,
  1.1696 +    // though still subject to the 'Char' construct in XML
  1.1697 +    private int surrogatesToCharTmp(int ucs4)
  1.1698 +            throws SAXException {
  1.1699 +
  1.1700 +        if (ucs4 <= 0xffff) {
  1.1701 +            if (XmlChars.isChar(ucs4)) {
  1.1702 +                charTmp[0] = (char) ucs4;
  1.1703 +                return 1;
  1.1704 +            }
  1.1705 +        } else if (ucs4 <= 0x0010ffff) {
  1.1706 +            // we represent these as UNICODE surrogate pairs
  1.1707 +            ucs4 -= 0x10000;
  1.1708 +            charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff));
  1.1709 +            charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff));
  1.1710 +            return 2;
  1.1711 +        }
  1.1712 +        fatal("P-051", new Object[]{Integer.toHexString(ucs4)});
  1.1713 +        // NOTREACHED
  1.1714 +        return -1;
  1.1715 +    }
  1.1716 +
  1.1717 +    private boolean maybePEReference()
  1.1718 +            throws IOException, SAXException {
  1.1719 +
  1.1720 +        // This is the SYNTACTIC version of this construct.
  1.1721 +        // When processing external entities, there is also
  1.1722 +        // a LEXICAL version; see getc() and doLexicalPE.
  1.1723 +
  1.1724 +        // [69] PEReference ::= '%' Name ';'
  1.1725 +        if (!in.peekc('%'))
  1.1726 +            return false;
  1.1727 +
  1.1728 +        String name = maybeGetName();
  1.1729 +        Object entity;
  1.1730 +
  1.1731 +        if (name == null)
  1.1732 +            fatal("P-011");
  1.1733 +        nextChar(';', "F-021", name);
  1.1734 +        entity = params.get(name);
  1.1735 +
  1.1736 +        if (entity instanceof InternalEntity) {
  1.1737 +            InternalEntity value = (InternalEntity) entity;
  1.1738 +            pushReader(value.buf, name, false);
  1.1739 +
  1.1740 +        } else if (entity instanceof ExternalEntity) {
  1.1741 +            pushReader((ExternalEntity) entity);
  1.1742 +            externalParameterEntity((ExternalEntity) entity);
  1.1743 +
  1.1744 +        } else if (entity == null) {
  1.1745 +            error("V-022", new Object[]{name});
  1.1746 +        }
  1.1747 +        return true;
  1.1748 +    }
  1.1749 +
  1.1750 +    private boolean maybeEntityDecl()
  1.1751 +            throws IOException, SAXException {
  1.1752 +
  1.1753 +        // [70] EntityDecl ::= GEDecl | PEDecl
  1.1754 +        // [71] GEDecl ::= '<!ENTITY' S       Name S EntityDef S? '>'
  1.1755 +        // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF     S? '>'
  1.1756 +        // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
  1.1757 +        // [74] PEDef     ::= EntityValue |  ExternalID
  1.1758 +        //
  1.1759 +        InputEntity start = peekDeclaration("!ENTITY");
  1.1760 +
  1.1761 +        if (start == null)
  1.1762 +            return false;
  1.1763 +
  1.1764 +        String entityName;
  1.1765 +        SimpleHashtable defns;
  1.1766 +        ExternalEntity externalId;
  1.1767 +        boolean doStore;
  1.1768 +
  1.1769 +        // PE expansion gets selectively turned off several places:
  1.1770 +        // in ENTITY declarations (here), in comments, in PIs.
  1.1771 +
  1.1772 +        // Here, we allow PE entities to be declared, and allows
  1.1773 +        // literals to include PE refs without the added spaces
  1.1774 +        // required with their expansion in markup decls.
  1.1775 +
  1.1776 +        doLexicalPE = false;
  1.1777 +        whitespace("F-005");
  1.1778 +        if (in.peekc('%')) {
  1.1779 +            whitespace("F-006");
  1.1780 +            defns = params;
  1.1781 +        } else
  1.1782 +            defns = entities;
  1.1783 +
  1.1784 +        ungetc();    // leave some whitespace
  1.1785 +        doLexicalPE = true;
  1.1786 +        entityName = getMarkupDeclname("F-017", false);
  1.1787 +        whitespace("F-007");
  1.1788 +        externalId = maybeExternalID();
  1.1789 +
  1.1790 +        //
  1.1791 +        // first definition sticks ... e.g. internal subset PEs are used
  1.1792 +        // to override DTD defaults.  It's also an "error" to incorrectly
  1.1793 +        // redefine builtin internal entities, but since reporting such
  1.1794 +        // errors is optional we only give warnings ("just in case") for
  1.1795 +        // non-parameter entities.
  1.1796 +        //
  1.1797 +        doStore = (defns.get(entityName) == null);
  1.1798 +        if (!doStore && defns == entities)
  1.1799 +            warning("P-054", new Object[]{entityName});
  1.1800 +
  1.1801 +        // internal entities
  1.1802 +        if (externalId == null) {
  1.1803 +            char value [];
  1.1804 +            InternalEntity entity;
  1.1805 +
  1.1806 +            doLexicalPE = false;        // "ab%bar;cd" -maybe-> "abcd"
  1.1807 +            parseLiteral(true);
  1.1808 +            doLexicalPE = true;
  1.1809 +            if (doStore) {
  1.1810 +                value = new char[strTmp.length()];
  1.1811 +                if (value.length != 0)
  1.1812 +                    strTmp.getChars(0, value.length, value, 0);
  1.1813 +                entity = new InternalEntity(entityName, value);
  1.1814 +                entity.isPE = (defns == params);
  1.1815 +                entity.isFromInternalSubset = false;
  1.1816 +                defns.put(entityName, entity);
  1.1817 +                if (defns == entities)
  1.1818 +                    dtdHandler.internalGeneralEntityDecl(entityName,
  1.1819 +                            new String(value));
  1.1820 +            }
  1.1821 +
  1.1822 +            // external entities (including unparsed)
  1.1823 +        } else {
  1.1824 +            // [76] NDataDecl ::= S 'NDATA' S Name
  1.1825 +            if (defns == entities && maybeWhitespace()
  1.1826 +                    && peek("NDATA")) {
  1.1827 +                externalId.notation = getMarkupDeclname("F-018", false);
  1.1828 +
  1.1829 +                // flag undeclared notation for checking after
  1.1830 +                // the DTD is fully processed
  1.1831 +                if (notations.get(externalId.notation) == null)
  1.1832 +                    notations.put(externalId.notation, Boolean.TRUE);
  1.1833 +            }
  1.1834 +            externalId.name = entityName;
  1.1835 +            externalId.isPE = (defns == params);
  1.1836 +            externalId.isFromInternalSubset = false;
  1.1837 +            if (doStore) {
  1.1838 +                defns.put(entityName, externalId);
  1.1839 +                if (externalId.notation != null)
  1.1840 +                    dtdHandler.unparsedEntityDecl(entityName,
  1.1841 +                            externalId.publicId, externalId.systemId,
  1.1842 +                            externalId.notation);
  1.1843 +                else if (defns == entities)
  1.1844 +                    dtdHandler.externalGeneralEntityDecl(entityName,
  1.1845 +                            externalId.publicId, externalId.systemId);
  1.1846 +            }
  1.1847 +        }
  1.1848 +        maybeWhitespace();
  1.1849 +        nextChar('>', "F-031", entityName);
  1.1850 +        if (start != in)
  1.1851 +            error("V-013", null);
  1.1852 +        return true;
  1.1853 +    }
  1.1854 +
  1.1855 +    private ExternalEntity maybeExternalID()
  1.1856 +            throws IOException, SAXException {
  1.1857 +
  1.1858 +        // [75] ExternalID ::= 'SYSTEM' S SystemLiteral
  1.1859 +        //        | 'PUBLIC' S' PubidLiteral S Systemliteral
  1.1860 +        String temp = null;
  1.1861 +        ExternalEntity retval;
  1.1862 +
  1.1863 +        if (peek("PUBLIC")) {
  1.1864 +            whitespace("F-009");
  1.1865 +            temp = parsePublicId();
  1.1866 +        } else if (!peek("SYSTEM"))
  1.1867 +            return null;
  1.1868 +
  1.1869 +        retval = new ExternalEntity(in);
  1.1870 +        retval.publicId = temp;
  1.1871 +        whitespace("F-008");
  1.1872 +        retval.systemId = parseSystemId();
  1.1873 +        return retval;
  1.1874 +    }
  1.1875 +
  1.1876 +    private String parseSystemId()
  1.1877 +            throws IOException, SAXException {
  1.1878 +
  1.1879 +        String uri = getQuotedString("F-034", null);
  1.1880 +        int temp = uri.indexOf(':');
  1.1881 +
  1.1882 +        // resolve relative URIs ... must do it here since
  1.1883 +        // it's relative to the source file holding the URI!
  1.1884 +
  1.1885 +        // "new java.net.URL (URL, string)" conforms to RFC 1630,
  1.1886 +        // but we can't use that except when the URI is a URL.
  1.1887 +        // The entity resolver is allowed to handle URIs that are
  1.1888 +        // not URLs, so we pass URIs through with scheme intact
  1.1889 +        if (temp == -1 || uri.indexOf('/') < temp) {
  1.1890 +            String baseURI;
  1.1891 +
  1.1892 +            baseURI = in.getSystemId();
  1.1893 +            if (baseURI == null)
  1.1894 +                fatal("P-055", new Object[]{uri});
  1.1895 +            if (uri.length() == 0)
  1.1896 +                uri = ".";
  1.1897 +            baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1);
  1.1898 +            if (uri.charAt(0) != '/')
  1.1899 +                uri = baseURI + uri;
  1.1900 +            else {
  1.1901 +                // XXX slashes at the beginning of a relative URI are
  1.1902 +                // a special case we don't handle.
  1.1903 +                throw new InternalError();
  1.1904 +            }
  1.1905 +
  1.1906 +            // letting other code map any "/xxx/../" or "/./" to "/",
  1.1907 +            // since all URIs must handle it the same.
  1.1908 +        }
  1.1909 +        // check for fragment ID in URI
  1.1910 +        if (uri.indexOf('#') != -1)
  1.1911 +            error("P-056", new Object[]{uri});
  1.1912 +        return uri;
  1.1913 +    }
  1.1914 +
  1.1915 +    private void maybeTextDecl()
  1.1916 +            throws IOException, SAXException {
  1.1917 +
  1.1918 +        // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
  1.1919 +        if (peek("<?xml")) {
  1.1920 +            readVersion(false, "1.0");
  1.1921 +            readEncoding(true);
  1.1922 +            maybeWhitespace();
  1.1923 +            if (!peek("?>"))
  1.1924 +                fatal("P-057");
  1.1925 +        }
  1.1926 +    }
  1.1927 +
  1.1928 +    private void externalParameterEntity(ExternalEntity next)
  1.1929 +            throws IOException, SAXException {
  1.1930 +
  1.1931 +        //
  1.1932 +        // Reap the intended benefits of standalone declarations:
  1.1933 +        // don't deal with external parameter entities, except to
  1.1934 +        // validate the standalone declaration.
  1.1935 +        //
  1.1936 +
  1.1937 +        // n.b. "in external parameter entities" (and external
  1.1938 +        // DTD subset, same grammar) parameter references can
  1.1939 +        // occur "within" markup declarations ... expansions can
  1.1940 +        // cross syntax rules.  Flagged here; affects getc().
  1.1941 +
  1.1942 +        // [79] ExtPE ::= TextDecl? extSubsetDecl
  1.1943 +        // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
  1.1944 +        //        | PEReference | S )*
  1.1945 +        InputEntity pe;
  1.1946 +
  1.1947 +        // XXX if this returns false ...
  1.1948 +
  1.1949 +        pe = in;
  1.1950 +        maybeTextDecl();
  1.1951 +        while (!pe.isEOF()) {
  1.1952 +            // pop internal PEs (and whitespace before/after)
  1.1953 +            if (in.isEOF()) {
  1.1954 +                in = in.pop();
  1.1955 +                continue;
  1.1956 +            }
  1.1957 +            doLexicalPE = false;
  1.1958 +            if (maybeWhitespace())
  1.1959 +                continue;
  1.1960 +            if (maybePEReference())
  1.1961 +                continue;
  1.1962 +            doLexicalPE = true;
  1.1963 +            if (maybeMarkupDecl() || maybeConditionalSect())
  1.1964 +                continue;
  1.1965 +            break;
  1.1966 +        }
  1.1967 +        // if (in != pe) throw new InternalError("who popped my PE?");
  1.1968 +        if (!pe.isEOF())
  1.1969 +            fatal("P-059", new Object[]{in.getName()});
  1.1970 +    }
  1.1971 +
  1.1972 +    private void readEncoding(boolean must)
  1.1973 +            throws IOException, SAXException {
  1.1974 +
  1.1975 +        // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
  1.1976 +        String name = maybeReadAttribute("encoding", must);
  1.1977 +
  1.1978 +        if (name == null)
  1.1979 +            return;
  1.1980 +        for (int i = 0; i < name.length(); i++) {
  1.1981 +            char c = name.charAt(i);
  1.1982 +            if ((c >= 'A' && c <= 'Z')
  1.1983 +                    || (c >= 'a' && c <= 'z'))
  1.1984 +                continue;
  1.1985 +            if (i != 0
  1.1986 +                    && ((c >= '0' && c <= '9')
  1.1987 +                    || c == '-'
  1.1988 +                    || c == '_'
  1.1989 +                    || c == '.'
  1.1990 +                    ))
  1.1991 +                continue;
  1.1992 +            fatal("P-060", new Object[]{new Character(c)});
  1.1993 +        }
  1.1994 +
  1.1995 +        //
  1.1996 +        // This should be the encoding in use, and it's even an error for
  1.1997 +        // it to be anything else (in certain cases that are impractical to
  1.1998 +        // to test, and may even be insufficient).  So, we do the best we
  1.1999 +        // can, and warn if things look suspicious.  Note that Java doesn't
  1.2000 +        // uniformly expose the encodings, and that the names it uses
  1.2001 +        // internally are nonstandard.  Also, that the XML spec allows
  1.2002 +        // such "errors" not to be reported at all.
  1.2003 +        //
  1.2004 +        String currentEncoding = in.getEncoding();
  1.2005 +
  1.2006 +        if (currentEncoding != null
  1.2007 +                && !name.equalsIgnoreCase(currentEncoding))
  1.2008 +            warning("P-061", new Object[]{name, currentEncoding});
  1.2009 +    }
  1.2010 +
  1.2011 +    private boolean maybeNotationDecl()
  1.2012 +            throws IOException, SAXException {
  1.2013 +
  1.2014 +        // [82] NotationDecl ::= '<!NOTATION' S Name S
  1.2015 +        //        (ExternalID | PublicID) S? '>'
  1.2016 +        // [83] PublicID ::= 'PUBLIC' S PubidLiteral
  1.2017 +        InputEntity start = peekDeclaration("!NOTATION");
  1.2018 +
  1.2019 +        if (start == null)
  1.2020 +            return false;
  1.2021 +
  1.2022 +        String name = getMarkupDeclname("F-019", false);
  1.2023 +        ExternalEntity entity = new ExternalEntity(in);
  1.2024 +
  1.2025 +        whitespace("F-011");
  1.2026 +        if (peek("PUBLIC")) {
  1.2027 +            whitespace("F-009");
  1.2028 +            entity.publicId = parsePublicId();
  1.2029 +            if (maybeWhitespace()) {
  1.2030 +                if (!peek(">"))
  1.2031 +                    entity.systemId = parseSystemId();
  1.2032 +                else
  1.2033 +                    ungetc();
  1.2034 +            }
  1.2035 +        } else if (peek("SYSTEM")) {
  1.2036 +            whitespace("F-008");
  1.2037 +            entity.systemId = parseSystemId();
  1.2038 +        } else
  1.2039 +            fatal("P-062");
  1.2040 +        maybeWhitespace();
  1.2041 +        nextChar('>', "F-032", name);
  1.2042 +        if (start != in)
  1.2043 +            error("V-013", null);
  1.2044 +        if (entity.systemId != null && entity.systemId.indexOf('#') != -1)
  1.2045 +            error("P-056", new Object[]{entity.systemId});
  1.2046 +
  1.2047 +        Object value = notations.get(name);
  1.2048 +        if (value != null && value instanceof ExternalEntity)
  1.2049 +            warning("P-063", new Object[]{name});
  1.2050 +
  1.2051 +        else {
  1.2052 +            notations.put(name, entity);
  1.2053 +            dtdHandler.notationDecl(name, entity.publicId,
  1.2054 +                    entity.systemId);
  1.2055 +        }
  1.2056 +        return true;
  1.2057 +    }
  1.2058 +
  1.2059 +
  1.2060 +    ////////////////////////////////////////////////////////////////
  1.2061 +    //
  1.2062 +    //    UTILITIES
  1.2063 +    //
  1.2064 +    ////////////////////////////////////////////////////////////////
  1.2065 +
  1.2066 +    private char getc() throws IOException, SAXException {
  1.2067 +
  1.2068 +        if (!doLexicalPE) {
  1.2069 +            char c = in.getc();
  1.2070 +            return c;
  1.2071 +        }
  1.2072 +
  1.2073 +        //
  1.2074 +        // External parameter entities get funky processing of '%param;'
  1.2075 +        // references.  It's not clearly defined in the XML spec; but it
  1.2076 +        // boils down to having those refs be _lexical_ in most cases to
  1.2077 +        // include partial syntax productions.  It also needs selective
  1.2078 +        // enabling; "<!ENTITY % foo ...>" must work, for example, and
  1.2079 +        // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd"
  1.2080 +        // if it's expanded in a literal, else "ab  cd".  PEs also do
  1.2081 +        // not expand within comments or PIs, and external PEs are only
  1.2082 +        // allowed to have markup decls (and so aren't handled lexically).
  1.2083 +        //
  1.2084 +        // This PE handling should be merged into maybeWhitespace, where
  1.2085 +        // it can be dealt with more consistently.
  1.2086 +        //
  1.2087 +        // Also, there are some validity constraints in this area.
  1.2088 +        //
  1.2089 +        char c;
  1.2090 +
  1.2091 +        while (in.isEOF()) {
  1.2092 +            if (in.isInternal() || (doLexicalPE && !in.isDocument()))
  1.2093 +                in = in.pop();
  1.2094 +            else {
  1.2095 +                fatal("P-064", new Object[]{in.getName()});
  1.2096 +            }
  1.2097 +        }
  1.2098 +        if ((c = in.getc()) == '%' && doLexicalPE) {
  1.2099 +            // PE ref ::= '%' name ';'
  1.2100 +            String name = maybeGetName();
  1.2101 +            Object entity;
  1.2102 +
  1.2103 +            if (name == null)
  1.2104 +                fatal("P-011");
  1.2105 +            nextChar(';', "F-021", name);
  1.2106 +            entity = params.get(name);
  1.2107 +
  1.2108 +            // push a magic "entity" before and after the
  1.2109 +            // real one, so ungetc() behaves uniformly
  1.2110 +            pushReader(" ".toCharArray(), null, false);
  1.2111 +            if (entity instanceof InternalEntity)
  1.2112 +                pushReader(((InternalEntity) entity).buf, name, false);
  1.2113 +            else if (entity instanceof ExternalEntity)
  1.2114 +            // PEs can't be unparsed!
  1.2115 +            // XXX if this returns false ...
  1.2116 +                pushReader((ExternalEntity) entity);
  1.2117 +            else if (entity == null)
  1.2118 +            // see note in maybePEReference re making this be nonfatal.
  1.2119 +                fatal("V-022");
  1.2120 +            else
  1.2121 +                throw new InternalError();
  1.2122 +            pushReader(" ".toCharArray(), null, false);
  1.2123 +            return in.getc();
  1.2124 +        }
  1.2125 +        return c;
  1.2126 +    }
  1.2127 +
  1.2128 +    private void ungetc() {
  1.2129 +
  1.2130 +        in.ungetc();
  1.2131 +    }
  1.2132 +
  1.2133 +    private boolean peek(String s)
  1.2134 +            throws IOException, SAXException {
  1.2135 +
  1.2136 +        return in.peek(s, null);
  1.2137 +    }
  1.2138 +
  1.2139 +    // Return the entity starting the specified declaration
  1.2140 +    // (for validating declaration nesting) else null.
  1.2141 +
  1.2142 +    private InputEntity peekDeclaration(String s)
  1.2143 +            throws IOException, SAXException {
  1.2144 +
  1.2145 +        InputEntity start;
  1.2146 +
  1.2147 +        if (!in.peekc('<'))
  1.2148 +            return null;
  1.2149 +        start = in;
  1.2150 +        if (in.peek(s, null))
  1.2151 +            return start;
  1.2152 +        in.ungetc();
  1.2153 +        return null;
  1.2154 +    }
  1.2155 +
  1.2156 +    private void nextChar(char c, String location, String near)
  1.2157 +            throws IOException, SAXException {
  1.2158 +
  1.2159 +        while (in.isEOF() && !in.isDocument())
  1.2160 +            in = in.pop();
  1.2161 +        if (!in.peekc(c))
  1.2162 +            fatal("P-008", new Object[]
  1.2163 +            {new Character(c),
  1.2164 +             messages.getMessage(locale, location),
  1.2165 +             (near == null ? "" : ('"' + near + '"'))});
  1.2166 +    }
  1.2167 +
  1.2168 +
  1.2169 +    private void pushReader(char buf [], String name, boolean isGeneral)
  1.2170 +            throws SAXException {
  1.2171 +
  1.2172 +        InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
  1.2173 +        r.init(buf, name, in, !isGeneral);
  1.2174 +        in = r;
  1.2175 +    }
  1.2176 +
  1.2177 +    private boolean pushReader(ExternalEntity next)
  1.2178 +            throws IOException, SAXException {
  1.2179 +
  1.2180 +        InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
  1.2181 +        InputSource s;
  1.2182 +        try {
  1.2183 +            s = next.getInputSource(resolver);
  1.2184 +        } catch (IOException e) {
  1.2185 +            String msg =
  1.2186 +                    "unable to open the external entity from :" + next.systemId;
  1.2187 +            if (next.publicId != null)
  1.2188 +                msg += " (public id:" + next.publicId + ")";
  1.2189 +
  1.2190 +            SAXParseException spe = new SAXParseException(msg,
  1.2191 +                    getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e);
  1.2192 +            dtdHandler.fatalError(spe);
  1.2193 +            throw e;
  1.2194 +        }
  1.2195 +
  1.2196 +        r.init(s, next.name, in, next.isPE);
  1.2197 +        in = r;
  1.2198 +        return true;
  1.2199 +    }
  1.2200 +
  1.2201 +    public String getPublicId() {
  1.2202 +
  1.2203 +        return (in == null) ? null : in.getPublicId();
  1.2204 +    }
  1.2205 +
  1.2206 +    public String getSystemId() {
  1.2207 +
  1.2208 +        return (in == null) ? null : in.getSystemId();
  1.2209 +    }
  1.2210 +
  1.2211 +    public int getLineNumber() {
  1.2212 +
  1.2213 +        return (in == null) ? -1 : in.getLineNumber();
  1.2214 +    }
  1.2215 +
  1.2216 +    public int getColumnNumber() {
  1.2217 +
  1.2218 +        return (in == null) ? -1 : in.getColumnNumber();
  1.2219 +    }
  1.2220 +
  1.2221 +    // error handling convenience routines
  1.2222 +
  1.2223 +    private void warning(String messageId, Object parameters [])
  1.2224 +            throws SAXException {
  1.2225 +
  1.2226 +        SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
  1.2227 +                getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
  1.2228 +
  1.2229 +        dtdHandler.warning(e);
  1.2230 +    }
  1.2231 +
  1.2232 +    void error(String messageId, Object parameters [])
  1.2233 +            throws SAXException {
  1.2234 +
  1.2235 +        SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
  1.2236 +                getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
  1.2237 +
  1.2238 +        dtdHandler.error(e);
  1.2239 +    }
  1.2240 +
  1.2241 +    private void fatal(String messageId) throws SAXException {
  1.2242 +
  1.2243 +        fatal(messageId, null);
  1.2244 +    }
  1.2245 +
  1.2246 +    private void fatal(String messageId, Object parameters [])
  1.2247 +            throws SAXException {
  1.2248 +
  1.2249 +        SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
  1.2250 +                getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
  1.2251 +
  1.2252 +        dtdHandler.fatalError(e);
  1.2253 +
  1.2254 +        throw e;
  1.2255 +    }
  1.2256 +
  1.2257 +    //
  1.2258 +    // Map char arrays to strings ... cuts down both on memory and
  1.2259 +    // CPU usage for element/attribute/other names that are reused.
  1.2260 +    //
  1.2261 +    // Documents typically repeat names a lot, so we more or less
  1.2262 +    // intern all the strings within the document; since some strings
  1.2263 +    // are repeated in multiple documents (e.g. stylesheets) we go
  1.2264 +    // a bit further, and intern globally.
  1.2265 +    //
  1.2266 +    static class NameCache {
  1.2267 +        //
  1.2268 +        // Unless we auto-grow this, the default size should be a
  1.2269 +        // reasonable bit larger than needed for most XML files
  1.2270 +        // we've yet seen (and be prime).  If it's too small, the
  1.2271 +        // penalty is just excess cache collisions.
  1.2272 +        //
  1.2273 +        NameCacheEntry hashtable [] = new NameCacheEntry[541];
  1.2274 +
  1.2275 +        //
  1.2276 +        // Usually we just want to get the 'symbol' for these chars
  1.2277 +        //
  1.2278 +        String lookup(char value [], int len) {
  1.2279 +
  1.2280 +            return lookupEntry(value, len).name;
  1.2281 +        }
  1.2282 +
  1.2283 +        //
  1.2284 +        // Sometimes we need to scan the chars in the resulting
  1.2285 +        // string, so there's an accessor which exposes them.
  1.2286 +        // (Mostly for element end tags.)
  1.2287 +        //
  1.2288 +        NameCacheEntry lookupEntry(char value [], int len) {
  1.2289 +
  1.2290 +            int index = 0;
  1.2291 +            NameCacheEntry entry;
  1.2292 +
  1.2293 +            // hashing to get index
  1.2294 +            for (int i = 0; i < len; i++)
  1.2295 +                index = index * 31 + value[i];
  1.2296 +            index &= 0x7fffffff;
  1.2297 +            index %= hashtable.length;
  1.2298 +
  1.2299 +            // return entry if one's there ...
  1.2300 +            for (entry = hashtable[index];
  1.2301 +                 entry != null;
  1.2302 +                 entry = entry.next) {
  1.2303 +                if (entry.matches(value, len))
  1.2304 +                    return entry;
  1.2305 +            }
  1.2306 +
  1.2307 +            // else create new one
  1.2308 +            entry = new NameCacheEntry();
  1.2309 +            entry.chars = new char[len];
  1.2310 +            System.arraycopy(value, 0, entry.chars, 0, len);
  1.2311 +            entry.name = new String(entry.chars);
  1.2312 +            //
  1.2313 +            // NOTE:  JDK 1.1 has a fixed size string intern table,
  1.2314 +            // with non-GC'd entries.  It can panic here; that's a
  1.2315 +            // JDK problem, use 1.2 or later with many identifiers.
  1.2316 +            //
  1.2317 +            entry.name = entry.name.intern();        // "global" intern
  1.2318 +            entry.next = hashtable[index];
  1.2319 +            hashtable[index] = entry;
  1.2320 +            return entry;
  1.2321 +        }
  1.2322 +    }
  1.2323 +
  1.2324 +    static class NameCacheEntry {
  1.2325 +
  1.2326 +        String name;
  1.2327 +        char chars [];
  1.2328 +        NameCacheEntry next;
  1.2329 +
  1.2330 +        boolean matches(char value [], int len) {
  1.2331 +
  1.2332 +            if (chars.length != len)
  1.2333 +                return false;
  1.2334 +            for (int i = 0; i < len; i++)
  1.2335 +                if (value[i] != chars[i])
  1.2336 +                    return false;
  1.2337 +            return true;
  1.2338 +        }
  1.2339 +    }
  1.2340 +
  1.2341 +    //
  1.2342 +    // Message catalog for diagnostics.
  1.2343 +    //
  1.2344 +    static final Catalog messages = new Catalog();
  1.2345 +
  1.2346 +    static final class Catalog extends MessageCatalog {
  1.2347 +
  1.2348 +        Catalog() {
  1.2349 +            super(DTDParser.class);
  1.2350 +        }
  1.2351 +    }
  1.2352 +
  1.2353 +}

mercurial