1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/jaxws_classes/com/sun/xml/internal/dtdparser/DTDParser.java Wed Apr 27 01:27:09 2016 +0800 1.3 @@ -0,0 +1,2350 @@ 1.4 +/* 1.5 + * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. Oracle designates this 1.11 + * particular file as subject to the "Classpath" exception as provided 1.12 + * by Oracle in the LICENSE file that accompanied this code. 1.13 + * 1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.16 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.17 + * version 2 for more details (a copy is included in the LICENSE file that 1.18 + * accompanied this code). 1.19 + * 1.20 + * You should have received a copy of the GNU General Public License version 1.21 + * 2 along with this work; if not, write to the Free Software Foundation, 1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.23 + * 1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.25 + * or visit www.oracle.com if you need additional information or have any 1.26 + * questions. 1.27 + */ 1.28 + 1.29 +package com.sun.xml.internal.dtdparser; 1.30 + 1.31 +import org.xml.sax.EntityResolver; 1.32 +import org.xml.sax.InputSource; 1.33 +import org.xml.sax.Locator; 1.34 +import org.xml.sax.SAXException; 1.35 +import org.xml.sax.SAXParseException; 1.36 + 1.37 +import java.io.IOException; 1.38 +import java.util.ArrayList; 1.39 +import java.util.Enumeration; 1.40 +import java.util.Hashtable; 1.41 +import java.util.Locale; 1.42 +import java.util.Set; 1.43 +import java.util.Vector; 1.44 + 1.45 +/** 1.46 + * This implements parsing of XML 1.0 DTDs. 1.47 + * <p/> 1.48 + * This conforms to the portion of the XML 1.0 specification related 1.49 + * to the external DTD subset. 1.50 + * <p/> 1.51 + * For multi-language applications (such as web servers using XML 1.52 + * processing to create dynamic content), a method supports choosing 1.53 + * a locale for parser diagnostics which is both understood by the 1.54 + * message recipient and supported by the parser. 1.55 + * <p/> 1.56 + * This parser produces a stream of parse events. It supports some 1.57 + * features (exposing comments, CDATA sections, and entity references) 1.58 + * which are not required to be reported by conformant XML processors. 1.59 + * 1.60 + * @author David Brownell 1.61 + * @author Janet Koenig 1.62 + * @author Kohsuke KAWAGUCHI 1.63 + * @version $Id: DTDParser.java,v 1.2 2009/04/16 15:25:49 snajper Exp $ 1.64 + */ 1.65 +public class DTDParser { 1.66 + public final static String TYPE_CDATA = "CDATA"; 1.67 + public final static String TYPE_ID = "ID"; 1.68 + public final static String TYPE_IDREF = "IDREF"; 1.69 + public final static String TYPE_IDREFS = "IDREFS"; 1.70 + public final static String TYPE_ENTITY = "ENTITY"; 1.71 + public final static String TYPE_ENTITIES = "ENTITIES"; 1.72 + public final static String TYPE_NMTOKEN = "NMTOKEN"; 1.73 + public final static String TYPE_NMTOKENS = "NMTOKENS"; 1.74 + public final static String TYPE_NOTATION = "NOTATION"; 1.75 + public final static String TYPE_ENUMERATION = "ENUMERATION"; 1.76 + 1.77 + 1.78 + // stack of input entities being merged 1.79 + private InputEntity in; 1.80 + 1.81 + // temporaries reused during parsing 1.82 + private StringBuffer strTmp; 1.83 + private char nameTmp []; 1.84 + private NameCache nameCache; 1.85 + private char charTmp [] = new char[2]; 1.86 + 1.87 + // temporary DTD parsing state 1.88 + private boolean doLexicalPE; 1.89 + 1.90 + // DTD state, used during parsing 1.91 +// private SimpleHashtable elements = new SimpleHashtable (47); 1.92 + protected final Set declaredElements = new java.util.HashSet(); 1.93 + private SimpleHashtable params = new SimpleHashtable(7); 1.94 + 1.95 + // exposed to package-private subclass 1.96 + Hashtable notations = new Hashtable(7); 1.97 + SimpleHashtable entities = new SimpleHashtable(17); 1.98 + 1.99 + private SimpleHashtable ids = new SimpleHashtable(); 1.100 + 1.101 + // listeners for DTD parsing events 1.102 + private DTDEventListener dtdHandler; 1.103 + 1.104 + private EntityResolver resolver; 1.105 + private Locale locale; 1.106 + 1.107 + // string constants -- use these copies so "==" works 1.108 + // package private 1.109 + static final String strANY = "ANY"; 1.110 + static final String strEMPTY = "EMPTY"; 1.111 + 1.112 + /** 1.113 + * Used by applications to request locale for diagnostics. 1.114 + * 1.115 + * @param l The locale to use, or null to use system defaults 1.116 + * (which may include only message IDs). 1.117 + */ 1.118 + public void setLocale(Locale l) throws SAXException { 1.119 + 1.120 + if (l != null && !messages.isLocaleSupported(l.toString())) { 1.121 + throw new SAXException(messages.getMessage(locale, 1.122 + "P-078", new Object[]{l})); 1.123 + } 1.124 + locale = l; 1.125 + } 1.126 + 1.127 + /** 1.128 + * Returns the diagnostic locale. 1.129 + */ 1.130 + public Locale getLocale() { 1.131 + return locale; 1.132 + } 1.133 + 1.134 + /** 1.135 + * Chooses a client locale to use for diagnostics, using the first 1.136 + * language specified in the list that is supported by this parser. 1.137 + * That locale is then set using <a href="#setLocale(java.util.Locale)"> 1.138 + * setLocale()</a>. Such a list could be provided by a variety of user 1.139 + * preference mechanisms, including the HTTP <em>Accept-Language</em> 1.140 + * header field. 1.141 + * 1.142 + * @param languages Array of language specifiers, ordered with the most 1.143 + * preferable one at the front. For example, "en-ca" then "fr-ca", 1.144 + * followed by "zh_CN". Both RFC 1766 and Java styles are supported. 1.145 + * @return The chosen locale, or null. 1.146 + * @see MessageCatalog 1.147 + */ 1.148 + public Locale chooseLocale(String languages []) 1.149 + throws SAXException { 1.150 + 1.151 + Locale l = messages.chooseLocale(languages); 1.152 + 1.153 + if (l != null) { 1.154 + setLocale(l); 1.155 + } 1.156 + return l; 1.157 + } 1.158 + 1.159 + /** 1.160 + * Lets applications control entity resolution. 1.161 + */ 1.162 + public void setEntityResolver(EntityResolver r) { 1.163 + 1.164 + resolver = r; 1.165 + } 1.166 + 1.167 + /** 1.168 + * Returns the object used to resolve entities 1.169 + */ 1.170 + public EntityResolver getEntityResolver() { 1.171 + 1.172 + return resolver; 1.173 + } 1.174 + 1.175 + /** 1.176 + * Used by applications to set handling of DTD parsing events. 1.177 + */ 1.178 + public void setDtdHandler(DTDEventListener handler) { 1.179 + dtdHandler = handler; 1.180 + if (handler != null) 1.181 + handler.setDocumentLocator(new Locator() { 1.182 + public String getPublicId() { 1.183 + return DTDParser.this.getPublicId(); 1.184 + } 1.185 + 1.186 + public String getSystemId() { 1.187 + return DTDParser.this.getSystemId(); 1.188 + } 1.189 + 1.190 + public int getLineNumber() { 1.191 + return DTDParser.this.getLineNumber(); 1.192 + } 1.193 + 1.194 + public int getColumnNumber() { 1.195 + return DTDParser.this.getColumnNumber(); 1.196 + } 1.197 + }); 1.198 + } 1.199 + 1.200 + /** 1.201 + * Returns the handler used to for DTD parsing events. 1.202 + */ 1.203 + public DTDEventListener getDtdHandler() { 1.204 + return dtdHandler; 1.205 + } 1.206 + 1.207 + /** 1.208 + * Parse a DTD. 1.209 + */ 1.210 + public void parse(InputSource in) 1.211 + throws IOException, SAXException { 1.212 + init(); 1.213 + parseInternal(in); 1.214 + } 1.215 + 1.216 + /** 1.217 + * Parse a DTD. 1.218 + */ 1.219 + public void parse(String uri) 1.220 + throws IOException, SAXException { 1.221 + InputSource in; 1.222 + 1.223 + init(); 1.224 + // System.out.println ("parse (\"" + uri + "\")"); 1.225 + in = resolver.resolveEntity(null, uri); 1.226 + 1.227 + // If custom resolver punts resolution to parser, handle it ... 1.228 + if (in == null) { 1.229 + in = Resolver.createInputSource(new java.net.URL(uri), false); 1.230 + 1.231 + // ... or if custom resolver doesn't correctly construct the 1.232 + // input entity, patch it up enough so relative URIs work, and 1.233 + // issue a warning to minimize later confusion. 1.234 + } else if (in.getSystemId() == null) { 1.235 + warning("P-065", null); 1.236 + in.setSystemId(uri); 1.237 + } 1.238 + 1.239 + parseInternal(in); 1.240 + } 1.241 + 1.242 + // makes sure the parser is reset to "before a document" 1.243 + private void init() { 1.244 + in = null; 1.245 + 1.246 + // alloc temporary data used in parsing 1.247 + strTmp = new StringBuffer(); 1.248 + nameTmp = new char[20]; 1.249 + nameCache = new NameCache(); 1.250 + 1.251 + // reset doc info 1.252 +// isInAttribute = false; 1.253 + 1.254 + doLexicalPE = false; 1.255 + 1.256 + entities.clear(); 1.257 + notations.clear(); 1.258 + params.clear(); 1.259 + // elements.clear (); 1.260 + declaredElements.clear(); 1.261 + 1.262 + // initialize predefined references ... re-interpreted later 1.263 + builtin("amp", "&"); 1.264 + builtin("lt", "<"); 1.265 + builtin("gt", ">"); 1.266 + builtin("quot", "\""); 1.267 + builtin("apos", "'"); 1.268 + 1.269 + if (locale == null) 1.270 + locale = Locale.getDefault(); 1.271 + if (resolver == null) 1.272 + resolver = new Resolver(); 1.273 + if (dtdHandler == null) 1.274 + dtdHandler = new DTDHandlerBase(); 1.275 + } 1.276 + 1.277 + private void builtin(String entityName, String entityValue) { 1.278 + InternalEntity entity; 1.279 + entity = new InternalEntity(entityName, entityValue.toCharArray()); 1.280 + entities.put(entityName, entity); 1.281 + } 1.282 + 1.283 + 1.284 + //////////////////////////////////////////////////////////////// 1.285 + // 1.286 + // parsing is by recursive descent, code roughly 1.287 + // following the BNF rules except tweaked for simple 1.288 + // lookahead. rules are more or less in numeric order, 1.289 + // except where code sharing suggests other structures. 1.290 + // 1.291 + // a classic benefit of recursive descent parsers: it's 1.292 + // relatively easy to get diagnostics that make sense. 1.293 + // 1.294 + //////////////////////////////////////////////////////////////// 1.295 + 1.296 + 1.297 + private void parseInternal(InputSource input) 1.298 + throws IOException, SAXException { 1.299 + 1.300 + if (input == null) 1.301 + fatal("P-000"); 1.302 + 1.303 + try { 1.304 + in = InputEntity.getInputEntity(dtdHandler, locale); 1.305 + in.init(input, null, null, false); 1.306 + 1.307 + dtdHandler.startDTD(in); 1.308 + 1.309 + // [30] extSubset ::= TextDecl? extSubsetDecl 1.310 + // [31] extSubsetDecl ::= ( markupdecl | conditionalSect 1.311 + // | PEReference | S )* 1.312 + // ... same as [79] extPE, which is where the code is 1.313 + 1.314 + ExternalEntity externalSubset = new ExternalEntity(in); 1.315 + externalParameterEntity(externalSubset); 1.316 + 1.317 + if (!in.isEOF()) { 1.318 + fatal("P-001", new Object[] 1.319 + {Integer.toHexString(((int) getc()))}); 1.320 + } 1.321 + afterRoot(); 1.322 + dtdHandler.endDTD(); 1.323 + 1.324 + } catch (EndOfInputException e) { 1.325 + if (!in.isDocument()) { 1.326 + String name = in.getName(); 1.327 + do { // force a relevant URI and line number 1.328 + in = in.pop(); 1.329 + } while (in.isInternal()); 1.330 + fatal("P-002", new Object[]{name}); 1.331 + } else { 1.332 + fatal("P-003", null); 1.333 + } 1.334 + } catch (RuntimeException e) { 1.335 + // Don't discard location that triggered the exception 1.336 + // ## Should properly wrap exception 1.337 + System.err.print("Internal DTD parser error: "); // ## 1.338 + e.printStackTrace(); 1.339 + throw new SAXParseException(e.getMessage() != null 1.340 + ? e.getMessage() : e.getClass().getName(), 1.341 + getPublicId(), getSystemId(), 1.342 + getLineNumber(), getColumnNumber()); 1.343 + 1.344 + } finally { 1.345 + // recycle temporary data used during parsing 1.346 + strTmp = null; 1.347 + nameTmp = null; 1.348 + nameCache = null; 1.349 + 1.350 + // ditto input sources etc 1.351 + if (in != null) { 1.352 + in.close(); 1.353 + in = null; 1.354 + } 1.355 + 1.356 + // get rid of all DTD info ... some of it would be 1.357 + // useful for editors etc, investigate later. 1.358 + 1.359 + params.clear(); 1.360 + entities.clear(); 1.361 + notations.clear(); 1.362 + declaredElements.clear(); 1.363 +// elements.clear(); 1.364 + ids.clear(); 1.365 + } 1.366 + } 1.367 + 1.368 + void afterRoot() throws SAXException { 1.369 + // Make sure all IDREFs match declared ID attributes. We scan 1.370 + // after the document element is parsed, since XML allows forward 1.371 + // references, and only now can we know if they're all resolved. 1.372 + 1.373 + for (Enumeration e = ids.keys(); 1.374 + e.hasMoreElements(); 1.375 + ) { 1.376 + String id = (String) e.nextElement(); 1.377 + Boolean value = (Boolean) ids.get(id); 1.378 + if (Boolean.FALSE == value) 1.379 + error("V-024", new Object[]{id}); 1.380 + } 1.381 + } 1.382 + 1.383 + 1.384 + // role is for diagnostics 1.385 + private void whitespace(String roleId) 1.386 + throws IOException, SAXException { 1.387 + 1.388 + // [3] S ::= (#x20 | #x9 | #xd | #xa)+ 1.389 + if (!maybeWhitespace()) { 1.390 + fatal("P-004", new Object[] 1.391 + {messages.getMessage(locale, roleId)}); 1.392 + } 1.393 + } 1.394 + 1.395 + // S? 1.396 + private boolean maybeWhitespace() 1.397 + throws IOException, SAXException { 1.398 + 1.399 + if (!doLexicalPE) 1.400 + return in.maybeWhitespace(); 1.401 + 1.402 + // see getc() for the PE logic -- this lets us splice 1.403 + // expansions of PEs in "anywhere". getc() has smarts, 1.404 + // so for external PEs we don't bypass it. 1.405 + 1.406 + // XXX we can marginally speed PE handling, and certainly 1.407 + // be cleaner (hence potentially more correct), by using 1.408 + // the observations that expanded PEs only start and stop 1.409 + // where whitespace is allowed. getc wouldn't need any 1.410 + // "lexical" PE expansion logic, and no other method needs 1.411 + // to handle termination of PEs. (parsing of literals would 1.412 + // still need to pop entities, but not parsing of references 1.413 + // in content.) 1.414 + 1.415 + char c = getc(); 1.416 + boolean saw = false; 1.417 + 1.418 + while (c == ' ' || c == '\t' || c == '\n' || c == '\r') { 1.419 + saw = true; 1.420 + 1.421 + // this gracefully ends things when we stop playing 1.422 + // with internal parameters. caller should have a 1.423 + // grammar rule allowing whitespace at end of entity. 1.424 + if (in.isEOF() && !in.isInternal()) 1.425 + return saw; 1.426 + c = getc(); 1.427 + } 1.428 + ungetc(); 1.429 + return saw; 1.430 + } 1.431 + 1.432 + private String maybeGetName() 1.433 + throws IOException, SAXException { 1.434 + 1.435 + NameCacheEntry entry = maybeGetNameCacheEntry(); 1.436 + return (entry == null) ? null : entry.name; 1.437 + } 1.438 + 1.439 + private NameCacheEntry maybeGetNameCacheEntry() 1.440 + throws IOException, SAXException { 1.441 + 1.442 + // [5] Name ::= (Letter|'_'|':') (Namechar)* 1.443 + char c = getc(); 1.444 + 1.445 + if (!XmlChars.isLetter(c) && c != ':' && c != '_') { 1.446 + ungetc(); 1.447 + return null; 1.448 + } 1.449 + return nameCharString(c); 1.450 + } 1.451 + 1.452 + // Used when parsing enumerations 1.453 + private String getNmtoken() 1.454 + throws IOException, SAXException { 1.455 + 1.456 + // [7] Nmtoken ::= (Namechar)+ 1.457 + char c = getc(); 1.458 + if (!XmlChars.isNameChar(c)) 1.459 + fatal("P-006", new Object[]{new Character(c)}); 1.460 + return nameCharString(c).name; 1.461 + } 1.462 + 1.463 + // n.b. this gets used when parsing attribute values (for 1.464 + // internal references) so we can't use strTmp; it's also 1.465 + // a hotspot for CPU and memory in the parser (called at least 1.466 + // once for each element) so this has been optimized a bit. 1.467 + 1.468 + private NameCacheEntry nameCharString(char c) 1.469 + throws IOException, SAXException { 1.470 + 1.471 + int i = 1; 1.472 + 1.473 + nameTmp[0] = c; 1.474 + for (; ;) { 1.475 + if ((c = in.getNameChar()) == 0) 1.476 + break; 1.477 + if (i >= nameTmp.length) { 1.478 + char tmp [] = new char[nameTmp.length + 10]; 1.479 + System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length); 1.480 + nameTmp = tmp; 1.481 + } 1.482 + nameTmp[i++] = c; 1.483 + } 1.484 + return nameCache.lookupEntry(nameTmp, i); 1.485 + } 1.486 + 1.487 + // 1.488 + // much similarity between parsing entity values in DTD 1.489 + // and attribute values (in DTD or content) ... both follow 1.490 + // literal parsing rules, newline canonicalization, etc 1.491 + // 1.492 + // leaves value in 'strTmp' ... either a "replacement text" (4.5), 1.493 + // or else partially normalized attribute value (the first bit 1.494 + // of 3.3.3's spec, without the "if not CDATA" bits). 1.495 + // 1.496 + private void parseLiteral(boolean isEntityValue) 1.497 + throws IOException, SAXException { 1.498 + 1.499 + // [9] EntityValue ::= 1.500 + // '"' ([^"&%] | Reference | PEReference)* '"' 1.501 + // | "'" ([^'&%] | Reference | PEReference)* "'" 1.502 + // [10] AttValue ::= 1.503 + // '"' ([^"&] | Reference )* '"' 1.504 + // | "'" ([^'&] | Reference )* "'" 1.505 + char quote = getc(); 1.506 + char c; 1.507 + InputEntity source = in; 1.508 + 1.509 + if (quote != '\'' && quote != '"') { 1.510 + fatal("P-007"); 1.511 + } 1.512 + 1.513 + // don't report entity expansions within attributes, 1.514 + // they're reported "fully expanded" via SAX 1.515 +// isInAttribute = !isEntityValue; 1.516 + 1.517 + // get value into strTmp 1.518 + strTmp = new StringBuffer(); 1.519 + 1.520 + // scan, allowing entity push/pop wherever ... 1.521 + // expanded entities can't terminate the literal! 1.522 + for (; ;) { 1.523 + if (in != source && in.isEOF()) { 1.524 + // we don't report end of parsed entities 1.525 + // within attributes (no SAX hooks) 1.526 + in = in.pop(); 1.527 + continue; 1.528 + } 1.529 + if ((c = getc()) == quote && in == source) { 1.530 + break; 1.531 + } 1.532 + 1.533 + // 1.534 + // Basically the "reference in attribute value" 1.535 + // row of the chart in section 4.4 of the spec 1.536 + // 1.537 + if (c == '&') { 1.538 + String entityName = maybeGetName(); 1.539 + 1.540 + if (entityName != null) { 1.541 + nextChar(';', "F-020", entityName); 1.542 + 1.543 + // 4.4 says: bypass these here ... we'll catch 1.544 + // forbidden refs to unparsed entities on use 1.545 + if (isEntityValue) { 1.546 + strTmp.append('&'); 1.547 + strTmp.append(entityName); 1.548 + strTmp.append(';'); 1.549 + continue; 1.550 + } 1.551 + expandEntityInLiteral(entityName, entities, isEntityValue); 1.552 + 1.553 + 1.554 + // character references are always included immediately 1.555 + } else if ((c = getc()) == '#') { 1.556 + int tmp = parseCharNumber(); 1.557 + 1.558 + if (tmp > 0xffff) { 1.559 + tmp = surrogatesToCharTmp(tmp); 1.560 + strTmp.append(charTmp[0]); 1.561 + if (tmp == 2) 1.562 + strTmp.append(charTmp[1]); 1.563 + } else 1.564 + strTmp.append((char) tmp); 1.565 + } else 1.566 + fatal("P-009"); 1.567 + continue; 1.568 + 1.569 + } 1.570 + 1.571 + // expand parameter entities only within entity value literals 1.572 + if (c == '%' && isEntityValue) { 1.573 + String entityName = maybeGetName(); 1.574 + 1.575 + if (entityName != null) { 1.576 + nextChar(';', "F-021", entityName); 1.577 + expandEntityInLiteral(entityName, params, isEntityValue); 1.578 + continue; 1.579 + } else 1.580 + fatal("P-011"); 1.581 + } 1.582 + 1.583 + // For attribute values ... 1.584 + if (!isEntityValue) { 1.585 + // 3.3.3 says whitespace normalizes to space... 1.586 + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { 1.587 + strTmp.append(' '); 1.588 + continue; 1.589 + } 1.590 + 1.591 + // "<" not legal in parsed literals ... 1.592 + if (c == '<') 1.593 + fatal("P-012"); 1.594 + } 1.595 + 1.596 + strTmp.append(c); 1.597 + } 1.598 +// isInAttribute = false; 1.599 + } 1.600 + 1.601 + // does a SINGLE expansion of the entity (often reparsed later) 1.602 + private void expandEntityInLiteral(String name, SimpleHashtable table, 1.603 + boolean isEntityValue) 1.604 + throws IOException, SAXException { 1.605 + 1.606 + Object entity = table.get(name); 1.607 + 1.608 + if (entity instanceof InternalEntity) { 1.609 + InternalEntity value = (InternalEntity) entity; 1.610 + pushReader(value.buf, name, !value.isPE); 1.611 + 1.612 + } else if (entity instanceof ExternalEntity) { 1.613 + if (!isEntityValue) // must be a PE ... 1.614 + fatal("P-013", new Object[]{name}); 1.615 + // XXX if this returns false ... 1.616 + pushReader((ExternalEntity) entity); 1.617 + 1.618 + } else if (entity == null) { 1.619 + // 1.620 + // Note: much confusion about whether spec requires such 1.621 + // errors to be fatal in many cases, but none about whether 1.622 + // it allows "normal" errors to be unrecoverable! 1.623 + // 1.624 + fatal((table == params) ? "V-022" : "P-014", 1.625 + new Object[]{name}); 1.626 + } 1.627 + } 1.628 + 1.629 + // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 1.630 + // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>' 1.631 + 1.632 + // NOTE: XML spec should explicitly say that PE ref syntax is 1.633 + // ignored in PIs, comments, SystemLiterals, and Pubid Literal 1.634 + // values ... can't process the XML spec's own DTD without doing 1.635 + // that for comments. 1.636 + 1.637 + private String getQuotedString(String type, String extra) 1.638 + throws IOException, SAXException { 1.639 + 1.640 + // use in.getc to bypass PE processing 1.641 + char quote = in.getc(); 1.642 + 1.643 + if (quote != '\'' && quote != '"') 1.644 + fatal("P-015", new Object[]{ 1.645 + messages.getMessage(locale, type, new Object[]{extra}) 1.646 + }); 1.647 + 1.648 + char c; 1.649 + 1.650 + strTmp = new StringBuffer(); 1.651 + while ((c = in.getc()) != quote) 1.652 + strTmp.append((char) c); 1.653 + return strTmp.toString(); 1.654 + } 1.655 + 1.656 + 1.657 + private String parsePublicId() throws IOException, SAXException { 1.658 + 1.659 + // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'") 1.660 + // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%] 1.661 + String retval = getQuotedString("F-033", null); 1.662 + for (int i = 0; i < retval.length(); i++) { 1.663 + char c = retval.charAt(i); 1.664 + if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1 1.665 + && !(c >= 'A' && c <= 'Z') 1.666 + && !(c >= 'a' && c <= 'z')) 1.667 + fatal("P-016", new Object[]{new Character(c)}); 1.668 + } 1.669 + strTmp = new StringBuffer(); 1.670 + strTmp.append(retval); 1.671 + return normalize(false); 1.672 + } 1.673 + 1.674 + // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 1.675 + // handled by: InputEntity.parsedContent() 1.676 + 1.677 + private boolean maybeComment(boolean skipStart) 1.678 + throws IOException, SAXException { 1.679 + 1.680 + // [15] Comment ::= '<!--' 1.681 + // ( (Char - '-') | ('-' (Char - '-'))* 1.682 + // '-->' 1.683 + if (!in.peek(skipStart ? "!--" : "<!--", null)) 1.684 + return false; 1.685 + 1.686 + boolean savedLexicalPE = doLexicalPE; 1.687 + boolean saveCommentText; 1.688 + 1.689 + doLexicalPE = false; 1.690 + saveCommentText = false; 1.691 + if (saveCommentText) 1.692 + strTmp = new StringBuffer(); 1.693 + 1.694 + oneComment: 1.695 + for (; ;) { 1.696 + try { 1.697 + // bypass PE expansion, but permit PEs 1.698 + // to complete ... valid docs won't care. 1.699 + for (; ;) { 1.700 + int c = getc(); 1.701 + if (c == '-') { 1.702 + c = getc(); 1.703 + if (c != '-') { 1.704 + if (saveCommentText) 1.705 + strTmp.append('-'); 1.706 + ungetc(); 1.707 + continue; 1.708 + } 1.709 + nextChar('>', "F-022", null); 1.710 + break oneComment; 1.711 + } 1.712 + if (saveCommentText) 1.713 + strTmp.append((char) c); 1.714 + } 1.715 + } catch (EndOfInputException e) { 1.716 + // 1.717 + // This is fatal EXCEPT when we're processing a PE... 1.718 + // in which case a validating processor reports an error. 1.719 + // External PEs are easy to detect; internal ones we 1.720 + // infer by being an internal entity outside an element. 1.721 + // 1.722 + if (in.isInternal()) { 1.723 + error("V-021", null); 1.724 + } 1.725 + fatal("P-017"); 1.726 + } 1.727 + } 1.728 + doLexicalPE = savedLexicalPE; 1.729 + if (saveCommentText) 1.730 + dtdHandler.comment(strTmp.toString()); 1.731 + return true; 1.732 + } 1.733 + 1.734 + private boolean maybePI(boolean skipStart) 1.735 + throws IOException, SAXException { 1.736 + 1.737 + // [16] PI ::= '<?' PITarget 1.738 + // (S (Char* - (Char* '?>' Char*)))? 1.739 + // '?>' 1.740 + // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l') 1.741 + boolean savedLexicalPE = doLexicalPE; 1.742 + 1.743 + if (!in.peek(skipStart ? "?" : "<?", null)) 1.744 + return false; 1.745 + doLexicalPE = false; 1.746 + 1.747 + String target = maybeGetName(); 1.748 + 1.749 + if (target == null) { 1.750 + fatal("P-018"); 1.751 + } 1.752 + if ("xml".equals(target)) { 1.753 + fatal("P-019"); 1.754 + } 1.755 + if ("xml".equalsIgnoreCase(target)) { 1.756 + fatal("P-020", new Object[]{target}); 1.757 + } 1.758 + 1.759 + if (maybeWhitespace()) { 1.760 + strTmp = new StringBuffer(); 1.761 + try { 1.762 + for (; ;) { 1.763 + // use in.getc to bypass PE processing 1.764 + char c = in.getc(); 1.765 + //Reached the end of PI. 1.766 + if (c == '?' && in.peekc('>')) 1.767 + break; 1.768 + strTmp.append(c); 1.769 + } 1.770 + } catch (EndOfInputException e) { 1.771 + fatal("P-021"); 1.772 + } 1.773 + dtdHandler.processingInstruction(target, strTmp.toString()); 1.774 + } else { 1.775 + if (!in.peek("?>", null)) { 1.776 + fatal("P-022"); 1.777 + } 1.778 + dtdHandler.processingInstruction(target, ""); 1.779 + } 1.780 + 1.781 + doLexicalPE = savedLexicalPE; 1.782 + return true; 1.783 + } 1.784 + 1.785 + // [18] CDSect ::= CDStart CData CDEnd 1.786 + // [19] CDStart ::= '<![CDATA[' 1.787 + // [20] CData ::= (Char* - (Char* ']]>' Char*)) 1.788 + // [21] CDEnd ::= ']]>' 1.789 + // 1.790 + // ... handled by InputEntity.unparsedContent() 1.791 + 1.792 + // collapsing several rules together ... 1.793 + // simpler than attribute literals -- no reference parsing! 1.794 + private String maybeReadAttribute(String name, boolean must) 1.795 + throws IOException, SAXException { 1.796 + 1.797 + // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\" 1.798 + // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\" 1.799 + // [32] SDDecl ::= S 'standalone' Eq \'|\" ... \'|\" 1.800 + if (!maybeWhitespace()) { 1.801 + if (!must) { 1.802 + return null; 1.803 + } 1.804 + fatal("P-024", new Object[]{name}); 1.805 + // NOTREACHED 1.806 + } 1.807 + 1.808 + if (!peek(name)) { 1.809 + if (must) { 1.810 + fatal("P-024", new Object[]{name}); 1.811 + } else { 1.812 + // To ensure that the whitespace is there so that when we 1.813 + // check for the next attribute we assure that the 1.814 + // whitespace still exists. 1.815 + ungetc(); 1.816 + return null; 1.817 + } 1.818 + } 1.819 + 1.820 + // [25] Eq ::= S? '=' S? 1.821 + maybeWhitespace(); 1.822 + nextChar('=', "F-023", null); 1.823 + maybeWhitespace(); 1.824 + 1.825 + return getQuotedString("F-035", name); 1.826 + } 1.827 + 1.828 + private void readVersion(boolean must, String versionNum) 1.829 + throws IOException, SAXException { 1.830 + 1.831 + String value = maybeReadAttribute("version", must); 1.832 + 1.833 + // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+ 1.834 + 1.835 + if (must && value == null) 1.836 + fatal("P-025", new Object[]{versionNum}); 1.837 + if (value != null) { 1.838 + int length = value.length(); 1.839 + for (int i = 0; i < length; i++) { 1.840 + char c = value.charAt(i); 1.841 + if (!((c >= '0' && c <= '9') 1.842 + || c == '_' || c == '.' 1.843 + || (c >= 'a' && c <= 'z') 1.844 + || (c >= 'A' && c <= 'Z') 1.845 + || c == ':' || c == '-') 1.846 + ) 1.847 + fatal("P-026", new Object[]{value}); 1.848 + } 1.849 + } 1.850 + if (value != null && !value.equals(versionNum)) 1.851 + error("P-027", new Object[]{versionNum, value}); 1.852 + } 1.853 + 1.854 + // common code used by most markup declarations 1.855 + // ... S (Q)Name ... 1.856 + private String getMarkupDeclname(String roleId, boolean qname) 1.857 + throws IOException, SAXException { 1.858 + 1.859 + String name; 1.860 + 1.861 + whitespace(roleId); 1.862 + name = maybeGetName(); 1.863 + if (name == null) 1.864 + fatal("P-005", new Object[] 1.865 + {messages.getMessage(locale, roleId)}); 1.866 + return name; 1.867 + } 1.868 + 1.869 + private boolean maybeMarkupDecl() 1.870 + throws IOException, SAXException { 1.871 + 1.872 + // [29] markupdecl ::= elementdecl | Attlistdecl 1.873 + // | EntityDecl | NotationDecl | PI | Comment 1.874 + return maybeElementDecl() 1.875 + || maybeAttlistDecl() 1.876 + || maybeEntityDecl() 1.877 + || maybeNotationDecl() 1.878 + || maybePI(false) 1.879 + || maybeComment(false); 1.880 + } 1.881 + 1.882 + private static final String XmlLang = "xml:lang"; 1.883 + 1.884 + private boolean isXmlLang(String value) { 1.885 + 1.886 + // [33] LanguageId ::= Langcode ('-' Subcode)* 1.887 + // [34] Langcode ::= ISO639Code | IanaCode | UserCode 1.888 + // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z] 1.889 + // [36] IanaCode ::= [iI] '-' SubCode 1.890 + // [37] UserCode ::= [xX] '-' SubCode 1.891 + // [38] SubCode ::= [a-zA-Z]+ 1.892 + 1.893 + // the ISO and IANA codes (and subcodes) are registered, 1.894 + // but that's neither a WF nor a validity constraint. 1.895 + 1.896 + int nextSuffix; 1.897 + char c; 1.898 + 1.899 + if (value.length() < 2) 1.900 + return false; 1.901 + c = value.charAt(1); 1.902 + if (c == '-') { // IANA, or user, code 1.903 + c = value.charAt(0); 1.904 + if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X')) 1.905 + return false; 1.906 + nextSuffix = 1; 1.907 + } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { 1.908 + // 2 letter ISO code, or error 1.909 + c = value.charAt(0); 1.910 + if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) 1.911 + return false; 1.912 + nextSuffix = 2; 1.913 + } else 1.914 + return false; 1.915 + 1.916 + // here "suffix" ::= '-' [a-zA-Z]+ suffix* 1.917 + while (nextSuffix < value.length()) { 1.918 + c = value.charAt(nextSuffix); 1.919 + if (c != '-') 1.920 + break; 1.921 + while (++nextSuffix < value.length()) { 1.922 + c = value.charAt(nextSuffix); 1.923 + if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) 1.924 + break; 1.925 + } 1.926 + } 1.927 + return value.length() == nextSuffix && c != '-'; 1.928 + } 1.929 + 1.930 + 1.931 + // 1.932 + // CHAPTER 3: Logical Structures 1.933 + // 1.934 + 1.935 + /** 1.936 + * To validate, subclassers should at this time make sure that 1.937 + * values are of the declared types:<UL> 1.938 + * <LI> ID and IDREF(S) values are Names 1.939 + * <LI> NMTOKEN(S) are Nmtokens 1.940 + * <LI> ENUMERATION values match one of the tokens 1.941 + * <LI> NOTATION values match a notation name 1.942 + * <LI> ENTITIY(IES) values match an unparsed external entity 1.943 + * </UL> 1.944 + * <p/> 1.945 + * <P> Separately, make sure IDREF values match some ID 1.946 + * provided in the document (in the afterRoot method). 1.947 + */ 1.948 +/* void validateAttributeSyntax (Attribute attr, String value) 1.949 + throws DTDParseException { 1.950 + // ID, IDREF(S) ... values are Names 1.951 + if (Attribute.ID == attr.type()) { 1.952 + if (!XmlNames.isName (value)) 1.953 + error ("V-025", new Object [] { value }); 1.954 + 1.955 + Boolean b = (Boolean) ids.getNonInterned (value); 1.956 + if (b == null || b.equals (Boolean.FALSE)) 1.957 + ids.put (value.intern (), Boolean.TRUE); 1.958 + else 1.959 + error ("V-026", new Object [] { value }); 1.960 + 1.961 + } else if (Attribute.IDREF == attr.type()) { 1.962 + if (!XmlNames.isName (value)) 1.963 + error ("V-027", new Object [] { value }); 1.964 + 1.965 + Boolean b = (Boolean) ids.getNonInterned (value); 1.966 + if (b == null) 1.967 + ids.put (value.intern (), Boolean.FALSE); 1.968 + 1.969 + } else if (Attribute.IDREFS == attr.type()) { 1.970 + StringTokenizer tokenizer = new StringTokenizer (value); 1.971 + Boolean b; 1.972 + boolean sawValue = false; 1.973 + 1.974 + while (tokenizer.hasMoreTokens ()) { 1.975 + value = tokenizer.nextToken (); 1.976 + if (!XmlNames.isName (value)) 1.977 + error ("V-027", new Object [] { value }); 1.978 + b = (Boolean) ids.getNonInterned (value); 1.979 + if (b == null) 1.980 + ids.put (value.intern (), Boolean.FALSE); 1.981 + sawValue = true; 1.982 + } 1.983 + if (!sawValue) 1.984 + error ("V-039", null); 1.985 + 1.986 + 1.987 + // NMTOKEN(S) ... values are Nmtoken(s) 1.988 + } else if (Attribute.NMTOKEN == attr.type()) { 1.989 + if (!XmlNames.isNmtoken (value)) 1.990 + error ("V-028", new Object [] { value }); 1.991 + 1.992 + } else if (Attribute.NMTOKENS == attr.type()) { 1.993 + StringTokenizer tokenizer = new StringTokenizer (value); 1.994 + boolean sawValue = false; 1.995 + 1.996 + while (tokenizer.hasMoreTokens ()) { 1.997 + value = tokenizer.nextToken (); 1.998 + if (!XmlNames.isNmtoken (value)) 1.999 + error ("V-028", new Object [] { value }); 1.1000 + sawValue = true; 1.1001 + } 1.1002 + if (!sawValue) 1.1003 + error ("V-032", null); 1.1004 + 1.1005 + // ENUMERATION ... values match one of the tokens 1.1006 + } else if (Attribute.ENUMERATION == attr.type()) { 1.1007 + for (int i = 0; i < attr.values().length; i++) 1.1008 + if (value.equals (attr.values()[i])) 1.1009 + return; 1.1010 + error ("V-029", new Object [] { value }); 1.1011 + 1.1012 + // NOTATION values match a notation name 1.1013 + } else if (Attribute.NOTATION == attr.type()) { 1.1014 + // 1.1015 + // XXX XML 1.0 spec should probably list references to 1.1016 + // externally defined notations in standalone docs as 1.1017 + // validity errors. Ditto externally defined unparsed 1.1018 + // entities; neither should show up in attributes, else 1.1019 + // one needs to read the external declarations in order 1.1020 + // to make sense of the document (exactly what tagging 1.1021 + // a doc as "standalone" intends you won't need to do). 1.1022 + // 1.1023 + for (int i = 0; i < attr.values().length; i++) 1.1024 + if (value.equals (attr.values()[i])) 1.1025 + return; 1.1026 + error ("V-030", new Object [] { value }); 1.1027 + 1.1028 + // ENTITY(IES) values match an unparsed entity(ies) 1.1029 + } else if (Attribute.ENTITY == attr.type()) { 1.1030 + // see note above re standalone 1.1031 + if (!isUnparsedEntity (value)) 1.1032 + error ("V-031", new Object [] { value }); 1.1033 + 1.1034 + } else if (Attribute.ENTITIES == attr.type()) { 1.1035 + StringTokenizer tokenizer = new StringTokenizer (value); 1.1036 + boolean sawValue = false; 1.1037 + 1.1038 + while (tokenizer.hasMoreTokens ()) { 1.1039 + value = tokenizer.nextToken (); 1.1040 + // see note above re standalone 1.1041 + if (!isUnparsedEntity (value)) 1.1042 + error ("V-031", new Object [] { value }); 1.1043 + sawValue = true; 1.1044 + } 1.1045 + if (!sawValue) 1.1046 + error ("V-040", null); 1.1047 + 1.1048 + } else if (Attribute.CDATA != attr.type()) 1.1049 + throw new InternalError (attr.type()); 1.1050 + } 1.1051 +*/ 1.1052 +/* 1.1053 + private boolean isUnparsedEntity (String name) 1.1054 + { 1.1055 + Object e = entities.getNonInterned (name); 1.1056 + if (e == null || !(e instanceof ExternalEntity)) 1.1057 + return false; 1.1058 + return ((ExternalEntity)e).notation != null; 1.1059 + } 1.1060 +*/ 1.1061 + private boolean maybeElementDecl() 1.1062 + throws IOException, SAXException { 1.1063 + 1.1064 + // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>' 1.1065 + // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 1.1066 + InputEntity start = peekDeclaration("!ELEMENT"); 1.1067 + 1.1068 + if (start == null) 1.1069 + return false; 1.1070 + 1.1071 + // n.b. for content models where inter-element whitespace is 1.1072 + // ignorable, we mark that fact here. 1.1073 + String name = getMarkupDeclname("F-015", true); 1.1074 +// Element element = (Element) elements.get (name); 1.1075 +// boolean declEffective = false; 1.1076 + 1.1077 +/* 1.1078 + if (element != null) { 1.1079 + if (element.contentModel() != null) { 1.1080 + error ("V-012", new Object [] { name }); 1.1081 + } // else <!ATTLIST name ...> came first 1.1082 + } else { 1.1083 + element = new Element(name); 1.1084 + elements.put (element.name(), element); 1.1085 + declEffective = true; 1.1086 + } 1.1087 +*/ 1.1088 + if (declaredElements.contains(name)) 1.1089 + error("V-012", new Object[]{name}); 1.1090 + else { 1.1091 + declaredElements.add(name); 1.1092 +// declEffective = true; 1.1093 + } 1.1094 + 1.1095 + short modelType; 1.1096 + whitespace("F-000"); 1.1097 + if (peek(strEMPTY)) { 1.1098 +/// // leave element.contentModel as null for this case. 1.1099 + dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY); 1.1100 + } else if (peek(strANY)) { 1.1101 +/// element.setContentModel(new StringModel(StringModelType.ANY)); 1.1102 + dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY); 1.1103 + } else { 1.1104 + modelType = getMixedOrChildren(name); 1.1105 + } 1.1106 + 1.1107 + dtdHandler.endContentModel(name, modelType); 1.1108 + 1.1109 + maybeWhitespace(); 1.1110 + char c = getc(); 1.1111 + if (c != '>') 1.1112 + fatal("P-036", new Object[]{name, new Character(c)}); 1.1113 + if (start != in) 1.1114 + error("V-013", null); 1.1115 + 1.1116 +/// dtdHandler.elementDecl(element); 1.1117 + 1.1118 + return true; 1.1119 + } 1.1120 + 1.1121 + // We're leaving the content model as a regular expression; 1.1122 + // it's an efficient natural way to express such things, and 1.1123 + // libraries often interpret them. No whitespace in the 1.1124 + // model we store, though! 1.1125 + 1.1126 + /** 1.1127 + * returns content model type. 1.1128 + */ 1.1129 + private short getMixedOrChildren(String elementName/*Element element*/) 1.1130 + throws IOException, SAXException { 1.1131 + 1.1132 + InputEntity start; 1.1133 + 1.1134 + // [47] children ::= (choice|seq) ('?'|'*'|'+')? 1.1135 + strTmp = new StringBuffer(); 1.1136 + 1.1137 + nextChar('(', "F-028", elementName); 1.1138 + start = in; 1.1139 + maybeWhitespace(); 1.1140 + strTmp.append('('); 1.1141 + 1.1142 + short modelType; 1.1143 + if (peek("#PCDATA")) { 1.1144 + strTmp.append("#PCDATA"); 1.1145 + dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED); 1.1146 + getMixed(elementName, start); 1.1147 + } else { 1.1148 + dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_CHILDREN); 1.1149 + getcps(elementName, start); 1.1150 + } 1.1151 + 1.1152 + return modelType; 1.1153 + } 1.1154 + 1.1155 + // '(' S? already consumed 1.1156 + // matching ')' must be in "start" entity if validating 1.1157 + private void getcps(/*Element element,*/String elementName, InputEntity start) 1.1158 + throws IOException, SAXException { 1.1159 + 1.1160 + // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')? 1.1161 + // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')' 1.1162 + // [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')' 1.1163 + boolean decided = false; 1.1164 + char type = 0; 1.1165 +// ContentModel retval, temp, current; 1.1166 + 1.1167 +// retval = temp = current = null; 1.1168 + 1.1169 + dtdHandler.startModelGroup(); 1.1170 + 1.1171 + do { 1.1172 + String tag; 1.1173 + 1.1174 + tag = maybeGetName(); 1.1175 + if (tag != null) { 1.1176 + strTmp.append(tag); 1.1177 +// temp = new ElementModel(tag); 1.1178 +// getFrequency((RepeatableContent)temp); 1.1179 +///-> 1.1180 + dtdHandler.childElement(tag, getFrequency()); 1.1181 +///<- 1.1182 + } else if (peek("(")) { 1.1183 + InputEntity next = in; 1.1184 + strTmp.append('('); 1.1185 + maybeWhitespace(); 1.1186 +// temp = getcps(element, next); 1.1187 +// getFrequency(temp); 1.1188 +///-> 1.1189 + getcps(elementName, next); 1.1190 +/// getFrequency(); <- this looks like a bug 1.1191 +///<- 1.1192 + } else 1.1193 + fatal((type == 0) ? "P-039" : 1.1194 + ((type == ',') ? "P-037" : "P-038"), 1.1195 + new Object[]{new Character(getc())}); 1.1196 + 1.1197 + maybeWhitespace(); 1.1198 + if (decided) { 1.1199 + char c = getc(); 1.1200 + 1.1201 +// if (current != null) { 1.1202 +// current.addChild(temp); 1.1203 +// } 1.1204 + if (c == type) { 1.1205 + strTmp.append(type); 1.1206 + maybeWhitespace(); 1.1207 + reportConnector(type); 1.1208 + continue; 1.1209 + } else if (c == '\u0029') { // rparen 1.1210 + ungetc(); 1.1211 + continue; 1.1212 + } else { 1.1213 + fatal((type == 0) ? "P-041" : "P-040", 1.1214 + new Object[]{ 1.1215 + new Character(c), 1.1216 + new Character(type) 1.1217 + }); 1.1218 + } 1.1219 + } else { 1.1220 + type = getc(); 1.1221 + switch (type) { 1.1222 + case '|': 1.1223 + case ',': 1.1224 + reportConnector(type); 1.1225 + break; 1.1226 + default: 1.1227 +// retval = temp; 1.1228 + ungetc(); 1.1229 + continue; 1.1230 + } 1.1231 +// retval = (ContentModel)current; 1.1232 + decided = true; 1.1233 +// current.addChild(temp); 1.1234 + strTmp.append(type); 1.1235 + } 1.1236 + maybeWhitespace(); 1.1237 + } while (!peek(")")); 1.1238 + 1.1239 + if (in != start) 1.1240 + error("V-014", new Object[]{elementName}); 1.1241 + strTmp.append(')'); 1.1242 + 1.1243 + dtdHandler.endModelGroup(getFrequency()); 1.1244 +// return retval; 1.1245 + } 1.1246 + 1.1247 + private void reportConnector(char type) throws SAXException { 1.1248 + switch (type) { 1.1249 + case '|': 1.1250 + dtdHandler.connector(DTDEventListener.CHOICE); ///<- 1.1251 + return; 1.1252 + case ',': 1.1253 + dtdHandler.connector(DTDEventListener.SEQUENCE); ///<- 1.1254 + return; 1.1255 + default: 1.1256 + throw new Error(); //assertion failed. 1.1257 + } 1.1258 + } 1.1259 + 1.1260 + private short getFrequency() 1.1261 + throws IOException, SAXException { 1.1262 + 1.1263 + final char c = getc(); 1.1264 + 1.1265 + if (c == '?') { 1.1266 + strTmp.append(c); 1.1267 + return DTDEventListener.OCCURENCE_ZERO_OR_ONE; 1.1268 + // original.setRepeat(Repeat.ZERO_OR_ONE); 1.1269 + } else if (c == '+') { 1.1270 + strTmp.append(c); 1.1271 + return DTDEventListener.OCCURENCE_ONE_OR_MORE; 1.1272 + // original.setRepeat(Repeat.ONE_OR_MORE); 1.1273 + } else if (c == '*') { 1.1274 + strTmp.append(c); 1.1275 + return DTDEventListener.OCCURENCE_ZERO_OR_MORE; 1.1276 + // original.setRepeat(Repeat.ZERO_OR_MORE); 1.1277 + } else { 1.1278 + ungetc(); 1.1279 + return DTDEventListener.OCCURENCE_ONCE; 1.1280 + } 1.1281 + } 1.1282 + 1.1283 + // '(' S? '#PCDATA' already consumed 1.1284 + // matching ')' must be in "start" entity if validating 1.1285 + private void getMixed(String elementName, /*Element element,*/ InputEntity start) 1.1286 + throws IOException, SAXException { 1.1287 + 1.1288 + // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' 1.1289 + // | '(' S? '#PCDATA' S? ')' 1.1290 + maybeWhitespace(); 1.1291 + if (peek("\u0029*") || peek("\u0029")) { 1.1292 + if (in != start) 1.1293 + error("V-014", new Object[]{elementName}); 1.1294 + strTmp.append(')'); 1.1295 +// element.setContentModel(new StringModel(StringModelType.PCDATA)); 1.1296 + return; 1.1297 + } 1.1298 + 1.1299 + ArrayList l = new ArrayList(); 1.1300 +// l.add(new StringModel(StringModelType.PCDATA)); 1.1301 + 1.1302 + 1.1303 + while (peek("|")) { 1.1304 + String name; 1.1305 + 1.1306 + strTmp.append('|'); 1.1307 + maybeWhitespace(); 1.1308 + 1.1309 + doLexicalPE = true; 1.1310 + name = maybeGetName(); 1.1311 + if (name == null) 1.1312 + fatal("P-042", new Object[] 1.1313 + {elementName, Integer.toHexString(getc())}); 1.1314 + if (l.contains(name)) { 1.1315 + error("V-015", new Object[]{name}); 1.1316 + } else { 1.1317 + l.add(name); 1.1318 + dtdHandler.mixedElement(name); 1.1319 + } 1.1320 + strTmp.append(name); 1.1321 + maybeWhitespace(); 1.1322 + } 1.1323 + 1.1324 + if (!peek("\u0029*")) // right paren 1.1325 + fatal("P-043", new Object[] 1.1326 + {elementName, new Character(getc())}); 1.1327 + if (in != start) 1.1328 + error("V-014", new Object[]{elementName}); 1.1329 + strTmp.append(')'); 1.1330 +// ChoiceModel cm = new ChoiceModel((Collection)l); 1.1331 +// cm.setRepeat(Repeat.ZERO_OR_MORE); 1.1332 +// element.setContentModel(cm); 1.1333 + } 1.1334 + 1.1335 + private boolean maybeAttlistDecl() 1.1336 + throws IOException, SAXException { 1.1337 + 1.1338 + // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 1.1339 + InputEntity start = peekDeclaration("!ATTLIST"); 1.1340 + 1.1341 + if (start == null) 1.1342 + return false; 1.1343 + 1.1344 + String elementName = getMarkupDeclname("F-016", true); 1.1345 +// Element element = (Element) elements.get (name); 1.1346 + 1.1347 +// if (element == null) { 1.1348 +// // not yet declared -- no problem. 1.1349 +// element = new Element(name); 1.1350 +// elements.put(name, element); 1.1351 +// } 1.1352 + 1.1353 + while (!peek(">")) { 1.1354 + 1.1355 + // [53] AttDef ::= S Name S AttType S DefaultDecl 1.1356 + // [54] AttType ::= StringType | TokenizedType | EnumeratedType 1.1357 + 1.1358 + // look for global attribute definitions, don't expand for now... 1.1359 + maybeWhitespace(); 1.1360 + char c = getc(); 1.1361 + if (c == '%') { 1.1362 + String entityName = maybeGetName(); 1.1363 + if (entityName != null) { 1.1364 + nextChar(';', "F-021", entityName); 1.1365 + whitespace("F-021"); 1.1366 + continue; 1.1367 + } else 1.1368 + fatal("P-011"); 1.1369 + } 1.1370 + 1.1371 + ungetc(); 1.1372 + // look for attribute name otherwise 1.1373 + String attName = maybeGetName(); 1.1374 + if (attName == null) { 1.1375 + fatal("P-044", new Object[]{new Character(getc())}); 1.1376 + } 1.1377 + whitespace("F-001"); 1.1378 + 1.1379 +/// Attribute a = new Attribute (name); 1.1380 + 1.1381 + String typeName; 1.1382 + Vector values = null; // notation/enumeration values 1.1383 + 1.1384 + // Note: use the type constants from Attribute 1.1385 + // so that "==" may be used (faster) 1.1386 + 1.1387 + // [55] StringType ::= 'CDATA' 1.1388 + if (peek(TYPE_CDATA)) 1.1389 +/// a.setType(Attribute.CDATA); 1.1390 + typeName = TYPE_CDATA; 1.1391 + 1.1392 + // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' 1.1393 + // | 'ENTITY' | 'ENTITIES' 1.1394 + // | 'NMTOKEN' | 'NMTOKENS' 1.1395 + // n.b. if "IDREFS" is there, both "ID" and "IDREF" 1.1396 + // match peekahead ... so this order matters! 1.1397 + else if (peek(TYPE_IDREFS)) 1.1398 + typeName = TYPE_IDREFS; 1.1399 + else if (peek(TYPE_IDREF)) 1.1400 + typeName = TYPE_IDREF; 1.1401 + else if (peek(TYPE_ID)) { 1.1402 + typeName = TYPE_ID; 1.1403 +// TODO: should implement this error check? 1.1404 +/// if (element.id() != null) { 1.1405 +/// error ("V-016", new Object [] { element.id() }); 1.1406 +/// } else 1.1407 +/// element.setId(name); 1.1408 + } else if (peek(TYPE_ENTITY)) 1.1409 + typeName = TYPE_ENTITY; 1.1410 + else if (peek(TYPE_ENTITIES)) 1.1411 + typeName = TYPE_ENTITIES; 1.1412 + else if (peek(TYPE_NMTOKENS)) 1.1413 + typeName = TYPE_NMTOKENS; 1.1414 + else if (peek(TYPE_NMTOKEN)) 1.1415 + typeName = TYPE_NMTOKEN; 1.1416 + 1.1417 + // [57] EnumeratedType ::= NotationType | Enumeration 1.1418 + // [58] NotationType ::= 'NOTATION' S '(' S? Name 1.1419 + // (S? '|' S? Name)* S? ')' 1.1420 + else if (peek(TYPE_NOTATION)) { 1.1421 + typeName = TYPE_NOTATION; 1.1422 + whitespace("F-002"); 1.1423 + nextChar('(', "F-029", null); 1.1424 + maybeWhitespace(); 1.1425 + 1.1426 + values = new Vector(); 1.1427 + do { 1.1428 + String name; 1.1429 + if ((name = maybeGetName()) == null) 1.1430 + fatal("P-068"); 1.1431 + // permit deferred declarations 1.1432 + if (notations.get(name) == null) 1.1433 + notations.put(name, name); 1.1434 + values.addElement(name); 1.1435 + maybeWhitespace(); 1.1436 + if (peek("|")) 1.1437 + maybeWhitespace(); 1.1438 + } while (!peek(")")); 1.1439 +/// a.setValues(new String [v.size ()]); 1.1440 +/// for (int i = 0; i < v.size (); i++) 1.1441 +/// a.setValue(i, (String)v.elementAt(i)); 1.1442 + 1.1443 + // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')' 1.1444 + } else if (peek("(")) { 1.1445 +/// a.setType(Attribute.ENUMERATION); 1.1446 + typeName = TYPE_ENUMERATION; 1.1447 + 1.1448 + maybeWhitespace(); 1.1449 + 1.1450 +/// Vector v = new Vector (); 1.1451 + values = new Vector(); 1.1452 + do { 1.1453 + String name = getNmtoken(); 1.1454 +/// v.addElement (name); 1.1455 + values.addElement(name); 1.1456 + maybeWhitespace(); 1.1457 + if (peek("|")) 1.1458 + maybeWhitespace(); 1.1459 + } while (!peek(")")); 1.1460 +/// a.setValues(new String [v.size ()]); 1.1461 +/// for (int i = 0; i < v.size (); i++) 1.1462 +/// a.setValue(i, (String)v.elementAt(i)); 1.1463 + } else { 1.1464 + fatal("P-045", 1.1465 + new Object[]{attName, new Character(getc())}); 1.1466 + typeName = null; 1.1467 + } 1.1468 + 1.1469 + short attributeUse; 1.1470 + String defaultValue = null; 1.1471 + 1.1472 + // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' 1.1473 + // | (('#FIXED' S)? AttValue) 1.1474 + whitespace("F-003"); 1.1475 + if (peek("#REQUIRED")) 1.1476 + attributeUse = DTDEventListener.USE_REQUIRED; 1.1477 +/// a.setIsRequired(true); 1.1478 + else if (peek("#FIXED")) { 1.1479 +/// if (a.type() == Attribute.ID) 1.1480 + if (typeName == TYPE_ID) 1.1481 + error("V-017", new Object[]{attName}); 1.1482 +/// a.setIsFixed(true); 1.1483 + attributeUse = DTDEventListener.USE_FIXED; 1.1484 + whitespace("F-004"); 1.1485 + parseLiteral(false); 1.1486 +/// if (a.type() != Attribute.CDATA) 1.1487 +/// a.setDefaultValue(normalize(false)); 1.1488 +/// else 1.1489 +/// a.setDefaultValue(strTmp.toString()); 1.1490 + 1.1491 + if (typeName == TYPE_CDATA) 1.1492 + defaultValue = normalize(false); 1.1493 + else 1.1494 + defaultValue = strTmp.toString(); 1.1495 + 1.1496 +// TODO: implement this check 1.1497 +/// if (a.type() != Attribute.CDATA) 1.1498 +/// validateAttributeSyntax (a, a.defaultValue()); 1.1499 + } else if (!peek("#IMPLIED")) { 1.1500 + attributeUse = DTDEventListener.USE_IMPLIED; 1.1501 + 1.1502 +/// if (a.type() == Attribute.ID) 1.1503 + if (typeName == TYPE_ID) 1.1504 + error("V-018", new Object[]{attName}); 1.1505 + parseLiteral(false); 1.1506 +/// if (a.type() != Attribute.CDATA) 1.1507 +/// a.setDefaultValue(normalize(false)); 1.1508 +/// else 1.1509 +/// a.setDefaultValue(strTmp.toString()); 1.1510 + if (typeName == TYPE_CDATA) 1.1511 + defaultValue = normalize(false); 1.1512 + else 1.1513 + defaultValue = strTmp.toString(); 1.1514 + 1.1515 +// TODO: implement this check 1.1516 +/// if (a.type() != Attribute.CDATA) 1.1517 +/// validateAttributeSyntax (a, a.defaultValue()); 1.1518 + } else { 1.1519 + // TODO: this looks like an fatal error. 1.1520 + attributeUse = DTDEventListener.USE_NORMAL; 1.1521 + } 1.1522 + 1.1523 + if (XmlLang.equals(attName) 1.1524 + && defaultValue/* a.defaultValue()*/ != null 1.1525 + && !isXmlLang(defaultValue/*a.defaultValue()*/)) 1.1526 + error("P-033", new Object[]{defaultValue /*a.defaultValue()*/}); 1.1527 + 1.1528 +// TODO: isn't it an error to specify the same attribute twice? 1.1529 +/// if (!element.attributes().contains(a)) { 1.1530 +/// element.addAttribute(a); 1.1531 +/// dtdHandler.attributeDecl(a); 1.1532 +/// } 1.1533 + 1.1534 + String[] v = (values != null) ? (String[]) values.toArray(new String[0]) : null; 1.1535 + dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue); 1.1536 + maybeWhitespace(); 1.1537 + } 1.1538 + if (start != in) 1.1539 + error("V-013", null); 1.1540 + return true; 1.1541 + } 1.1542 + 1.1543 + // used when parsing literal attribute values, 1.1544 + // or public identifiers. 1.1545 + // 1.1546 + // input in strTmp 1.1547 + private String normalize(boolean invalidIfNeeded) { 1.1548 + 1.1549 + // this can allocate an extra string... 1.1550 + 1.1551 + String s = strTmp.toString(); 1.1552 + String s2 = s.trim(); 1.1553 + boolean didStrip = false; 1.1554 + 1.1555 + if (s != s2) { 1.1556 + s = s2; 1.1557 + s2 = null; 1.1558 + didStrip = true; 1.1559 + } 1.1560 + strTmp = new StringBuffer(); 1.1561 + for (int i = 0; i < s.length(); i++) { 1.1562 + char c = s.charAt(i); 1.1563 + if (!XmlChars.isSpace(c)) { 1.1564 + strTmp.append(c); 1.1565 + continue; 1.1566 + } 1.1567 + strTmp.append(' '); 1.1568 + while (++i < s.length() && XmlChars.isSpace(s.charAt(i))) 1.1569 + didStrip = true; 1.1570 + i--; 1.1571 + } 1.1572 + if (didStrip) 1.1573 + return strTmp.toString(); 1.1574 + else 1.1575 + return s; 1.1576 + } 1.1577 + 1.1578 + private boolean maybeConditionalSect() 1.1579 + throws IOException, SAXException { 1.1580 + 1.1581 + // [61] conditionalSect ::= includeSect | ignoreSect 1.1582 + 1.1583 + if (!peek("<![")) 1.1584 + return false; 1.1585 + 1.1586 + String keyword; 1.1587 + InputEntity start = in; 1.1588 + 1.1589 + maybeWhitespace(); 1.1590 + 1.1591 + if ((keyword = maybeGetName()) == null) 1.1592 + fatal("P-046"); 1.1593 + maybeWhitespace(); 1.1594 + nextChar('[', "F-030", null); 1.1595 + 1.1596 + // [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' 1.1597 + // extSubsetDecl ']]>' 1.1598 + if ("INCLUDE".equals(keyword)) { 1.1599 + for (; ;) { 1.1600 + while (in.isEOF() && in != start) 1.1601 + in = in.pop(); 1.1602 + if (in.isEOF()) { 1.1603 + error("V-020", null); 1.1604 + } 1.1605 + if (peek("]]>")) 1.1606 + break; 1.1607 + 1.1608 + doLexicalPE = false; 1.1609 + if (maybeWhitespace()) 1.1610 + continue; 1.1611 + if (maybePEReference()) 1.1612 + continue; 1.1613 + doLexicalPE = true; 1.1614 + if (maybeMarkupDecl() || maybeConditionalSect()) 1.1615 + continue; 1.1616 + 1.1617 + fatal("P-047"); 1.1618 + } 1.1619 + 1.1620 + // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' 1.1621 + // ignoreSectcontents ']]>' 1.1622 + // [64] ignoreSectcontents ::= Ignore ('<![' 1.1623 + // ignoreSectcontents ']]>' Ignore)* 1.1624 + // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 1.1625 + } else if ("IGNORE".equals(keyword)) { 1.1626 + int nestlevel = 1; 1.1627 + // ignoreSectcontents 1.1628 + doLexicalPE = false; 1.1629 + while (nestlevel > 0) { 1.1630 + char c = getc(); // will pop input entities 1.1631 + if (c == '<') { 1.1632 + if (peek("![")) 1.1633 + nestlevel++; 1.1634 + } else if (c == ']') { 1.1635 + if (peek("]>")) 1.1636 + nestlevel--; 1.1637 + } else 1.1638 + continue; 1.1639 + } 1.1640 + } else 1.1641 + fatal("P-048", new Object[]{keyword}); 1.1642 + return true; 1.1643 + } 1.1644 + 1.1645 + 1.1646 + // 1.1647 + // CHAPTER 4: Physical Structures 1.1648 + // 1.1649 + 1.1650 + // parse decimal or hex numeric character reference 1.1651 + private int parseCharNumber() 1.1652 + throws IOException, SAXException { 1.1653 + 1.1654 + char c; 1.1655 + int retval = 0; 1.1656 + 1.1657 + // n.b. we ignore overflow ... 1.1658 + if (getc() != 'x') { 1.1659 + ungetc(); 1.1660 + for (; ;) { 1.1661 + c = getc(); 1.1662 + if (c >= '0' && c <= '9') { 1.1663 + retval *= 10; 1.1664 + retval += (c - '0'); 1.1665 + continue; 1.1666 + } 1.1667 + if (c == ';') 1.1668 + return retval; 1.1669 + fatal("P-049"); 1.1670 + } 1.1671 + } else 1.1672 + for (; ;) { 1.1673 + c = getc(); 1.1674 + if (c >= '0' && c <= '9') { 1.1675 + retval <<= 4; 1.1676 + retval += (c - '0'); 1.1677 + continue; 1.1678 + } 1.1679 + if (c >= 'a' && c <= 'f') { 1.1680 + retval <<= 4; 1.1681 + retval += 10 + (c - 'a'); 1.1682 + continue; 1.1683 + } 1.1684 + if (c >= 'A' && c <= 'F') { 1.1685 + retval <<= 4; 1.1686 + retval += 10 + (c - 'A'); 1.1687 + continue; 1.1688 + } 1.1689 + if (c == ';') 1.1690 + return retval; 1.1691 + fatal("P-050"); 1.1692 + } 1.1693 + } 1.1694 + 1.1695 + // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE, 1.1696 + // though still subject to the 'Char' construct in XML 1.1697 + private int surrogatesToCharTmp(int ucs4) 1.1698 + throws SAXException { 1.1699 + 1.1700 + if (ucs4 <= 0xffff) { 1.1701 + if (XmlChars.isChar(ucs4)) { 1.1702 + charTmp[0] = (char) ucs4; 1.1703 + return 1; 1.1704 + } 1.1705 + } else if (ucs4 <= 0x0010ffff) { 1.1706 + // we represent these as UNICODE surrogate pairs 1.1707 + ucs4 -= 0x10000; 1.1708 + charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff)); 1.1709 + charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff)); 1.1710 + return 2; 1.1711 + } 1.1712 + fatal("P-051", new Object[]{Integer.toHexString(ucs4)}); 1.1713 + // NOTREACHED 1.1714 + return -1; 1.1715 + } 1.1716 + 1.1717 + private boolean maybePEReference() 1.1718 + throws IOException, SAXException { 1.1719 + 1.1720 + // This is the SYNTACTIC version of this construct. 1.1721 + // When processing external entities, there is also 1.1722 + // a LEXICAL version; see getc() and doLexicalPE. 1.1723 + 1.1724 + // [69] PEReference ::= '%' Name ';' 1.1725 + if (!in.peekc('%')) 1.1726 + return false; 1.1727 + 1.1728 + String name = maybeGetName(); 1.1729 + Object entity; 1.1730 + 1.1731 + if (name == null) 1.1732 + fatal("P-011"); 1.1733 + nextChar(';', "F-021", name); 1.1734 + entity = params.get(name); 1.1735 + 1.1736 + if (entity instanceof InternalEntity) { 1.1737 + InternalEntity value = (InternalEntity) entity; 1.1738 + pushReader(value.buf, name, false); 1.1739 + 1.1740 + } else if (entity instanceof ExternalEntity) { 1.1741 + pushReader((ExternalEntity) entity); 1.1742 + externalParameterEntity((ExternalEntity) entity); 1.1743 + 1.1744 + } else if (entity == null) { 1.1745 + error("V-022", new Object[]{name}); 1.1746 + } 1.1747 + return true; 1.1748 + } 1.1749 + 1.1750 + private boolean maybeEntityDecl() 1.1751 + throws IOException, SAXException { 1.1752 + 1.1753 + // [70] EntityDecl ::= GEDecl | PEDecl 1.1754 + // [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 1.1755 + // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF S? '>' 1.1756 + // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 1.1757 + // [74] PEDef ::= EntityValue | ExternalID 1.1758 + // 1.1759 + InputEntity start = peekDeclaration("!ENTITY"); 1.1760 + 1.1761 + if (start == null) 1.1762 + return false; 1.1763 + 1.1764 + String entityName; 1.1765 + SimpleHashtable defns; 1.1766 + ExternalEntity externalId; 1.1767 + boolean doStore; 1.1768 + 1.1769 + // PE expansion gets selectively turned off several places: 1.1770 + // in ENTITY declarations (here), in comments, in PIs. 1.1771 + 1.1772 + // Here, we allow PE entities to be declared, and allows 1.1773 + // literals to include PE refs without the added spaces 1.1774 + // required with their expansion in markup decls. 1.1775 + 1.1776 + doLexicalPE = false; 1.1777 + whitespace("F-005"); 1.1778 + if (in.peekc('%')) { 1.1779 + whitespace("F-006"); 1.1780 + defns = params; 1.1781 + } else 1.1782 + defns = entities; 1.1783 + 1.1784 + ungetc(); // leave some whitespace 1.1785 + doLexicalPE = true; 1.1786 + entityName = getMarkupDeclname("F-017", false); 1.1787 + whitespace("F-007"); 1.1788 + externalId = maybeExternalID(); 1.1789 + 1.1790 + // 1.1791 + // first definition sticks ... e.g. internal subset PEs are used 1.1792 + // to override DTD defaults. It's also an "error" to incorrectly 1.1793 + // redefine builtin internal entities, but since reporting such 1.1794 + // errors is optional we only give warnings ("just in case") for 1.1795 + // non-parameter entities. 1.1796 + // 1.1797 + doStore = (defns.get(entityName) == null); 1.1798 + if (!doStore && defns == entities) 1.1799 + warning("P-054", new Object[]{entityName}); 1.1800 + 1.1801 + // internal entities 1.1802 + if (externalId == null) { 1.1803 + char value []; 1.1804 + InternalEntity entity; 1.1805 + 1.1806 + doLexicalPE = false; // "ab%bar;cd" -maybe-> "abcd" 1.1807 + parseLiteral(true); 1.1808 + doLexicalPE = true; 1.1809 + if (doStore) { 1.1810 + value = new char[strTmp.length()]; 1.1811 + if (value.length != 0) 1.1812 + strTmp.getChars(0, value.length, value, 0); 1.1813 + entity = new InternalEntity(entityName, value); 1.1814 + entity.isPE = (defns == params); 1.1815 + entity.isFromInternalSubset = false; 1.1816 + defns.put(entityName, entity); 1.1817 + if (defns == entities) 1.1818 + dtdHandler.internalGeneralEntityDecl(entityName, 1.1819 + new String(value)); 1.1820 + } 1.1821 + 1.1822 + // external entities (including unparsed) 1.1823 + } else { 1.1824 + // [76] NDataDecl ::= S 'NDATA' S Name 1.1825 + if (defns == entities && maybeWhitespace() 1.1826 + && peek("NDATA")) { 1.1827 + externalId.notation = getMarkupDeclname("F-018", false); 1.1828 + 1.1829 + // flag undeclared notation for checking after 1.1830 + // the DTD is fully processed 1.1831 + if (notations.get(externalId.notation) == null) 1.1832 + notations.put(externalId.notation, Boolean.TRUE); 1.1833 + } 1.1834 + externalId.name = entityName; 1.1835 + externalId.isPE = (defns == params); 1.1836 + externalId.isFromInternalSubset = false; 1.1837 + if (doStore) { 1.1838 + defns.put(entityName, externalId); 1.1839 + if (externalId.notation != null) 1.1840 + dtdHandler.unparsedEntityDecl(entityName, 1.1841 + externalId.publicId, externalId.systemId, 1.1842 + externalId.notation); 1.1843 + else if (defns == entities) 1.1844 + dtdHandler.externalGeneralEntityDecl(entityName, 1.1845 + externalId.publicId, externalId.systemId); 1.1846 + } 1.1847 + } 1.1848 + maybeWhitespace(); 1.1849 + nextChar('>', "F-031", entityName); 1.1850 + if (start != in) 1.1851 + error("V-013", null); 1.1852 + return true; 1.1853 + } 1.1854 + 1.1855 + private ExternalEntity maybeExternalID() 1.1856 + throws IOException, SAXException { 1.1857 + 1.1858 + // [75] ExternalID ::= 'SYSTEM' S SystemLiteral 1.1859 + // | 'PUBLIC' S' PubidLiteral S Systemliteral 1.1860 + String temp = null; 1.1861 + ExternalEntity retval; 1.1862 + 1.1863 + if (peek("PUBLIC")) { 1.1864 + whitespace("F-009"); 1.1865 + temp = parsePublicId(); 1.1866 + } else if (!peek("SYSTEM")) 1.1867 + return null; 1.1868 + 1.1869 + retval = new ExternalEntity(in); 1.1870 + retval.publicId = temp; 1.1871 + whitespace("F-008"); 1.1872 + retval.systemId = parseSystemId(); 1.1873 + return retval; 1.1874 + } 1.1875 + 1.1876 + private String parseSystemId() 1.1877 + throws IOException, SAXException { 1.1878 + 1.1879 + String uri = getQuotedString("F-034", null); 1.1880 + int temp = uri.indexOf(':'); 1.1881 + 1.1882 + // resolve relative URIs ... must do it here since 1.1883 + // it's relative to the source file holding the URI! 1.1884 + 1.1885 + // "new java.net.URL (URL, string)" conforms to RFC 1630, 1.1886 + // but we can't use that except when the URI is a URL. 1.1887 + // The entity resolver is allowed to handle URIs that are 1.1888 + // not URLs, so we pass URIs through with scheme intact 1.1889 + if (temp == -1 || uri.indexOf('/') < temp) { 1.1890 + String baseURI; 1.1891 + 1.1892 + baseURI = in.getSystemId(); 1.1893 + if (baseURI == null) 1.1894 + fatal("P-055", new Object[]{uri}); 1.1895 + if (uri.length() == 0) 1.1896 + uri = "."; 1.1897 + baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1); 1.1898 + if (uri.charAt(0) != '/') 1.1899 + uri = baseURI + uri; 1.1900 + else { 1.1901 + // XXX slashes at the beginning of a relative URI are 1.1902 + // a special case we don't handle. 1.1903 + throw new InternalError(); 1.1904 + } 1.1905 + 1.1906 + // letting other code map any "/xxx/../" or "/./" to "/", 1.1907 + // since all URIs must handle it the same. 1.1908 + } 1.1909 + // check for fragment ID in URI 1.1910 + if (uri.indexOf('#') != -1) 1.1911 + error("P-056", new Object[]{uri}); 1.1912 + return uri; 1.1913 + } 1.1914 + 1.1915 + private void maybeTextDecl() 1.1916 + throws IOException, SAXException { 1.1917 + 1.1918 + // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 1.1919 + if (peek("<?xml")) { 1.1920 + readVersion(false, "1.0"); 1.1921 + readEncoding(true); 1.1922 + maybeWhitespace(); 1.1923 + if (!peek("?>")) 1.1924 + fatal("P-057"); 1.1925 + } 1.1926 + } 1.1927 + 1.1928 + private void externalParameterEntity(ExternalEntity next) 1.1929 + throws IOException, SAXException { 1.1930 + 1.1931 + // 1.1932 + // Reap the intended benefits of standalone declarations: 1.1933 + // don't deal with external parameter entities, except to 1.1934 + // validate the standalone declaration. 1.1935 + // 1.1936 + 1.1937 + // n.b. "in external parameter entities" (and external 1.1938 + // DTD subset, same grammar) parameter references can 1.1939 + // occur "within" markup declarations ... expansions can 1.1940 + // cross syntax rules. Flagged here; affects getc(). 1.1941 + 1.1942 + // [79] ExtPE ::= TextDecl? extSubsetDecl 1.1943 + // [31] extSubsetDecl ::= ( markupdecl | conditionalSect 1.1944 + // | PEReference | S )* 1.1945 + InputEntity pe; 1.1946 + 1.1947 + // XXX if this returns false ... 1.1948 + 1.1949 + pe = in; 1.1950 + maybeTextDecl(); 1.1951 + while (!pe.isEOF()) { 1.1952 + // pop internal PEs (and whitespace before/after) 1.1953 + if (in.isEOF()) { 1.1954 + in = in.pop(); 1.1955 + continue; 1.1956 + } 1.1957 + doLexicalPE = false; 1.1958 + if (maybeWhitespace()) 1.1959 + continue; 1.1960 + if (maybePEReference()) 1.1961 + continue; 1.1962 + doLexicalPE = true; 1.1963 + if (maybeMarkupDecl() || maybeConditionalSect()) 1.1964 + continue; 1.1965 + break; 1.1966 + } 1.1967 + // if (in != pe) throw new InternalError("who popped my PE?"); 1.1968 + if (!pe.isEOF()) 1.1969 + fatal("P-059", new Object[]{in.getName()}); 1.1970 + } 1.1971 + 1.1972 + private void readEncoding(boolean must) 1.1973 + throws IOException, SAXException { 1.1974 + 1.1975 + // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 1.1976 + String name = maybeReadAttribute("encoding", must); 1.1977 + 1.1978 + if (name == null) 1.1979 + return; 1.1980 + for (int i = 0; i < name.length(); i++) { 1.1981 + char c = name.charAt(i); 1.1982 + if ((c >= 'A' && c <= 'Z') 1.1983 + || (c >= 'a' && c <= 'z')) 1.1984 + continue; 1.1985 + if (i != 0 1.1986 + && ((c >= '0' && c <= '9') 1.1987 + || c == '-' 1.1988 + || c == '_' 1.1989 + || c == '.' 1.1990 + )) 1.1991 + continue; 1.1992 + fatal("P-060", new Object[]{new Character(c)}); 1.1993 + } 1.1994 + 1.1995 + // 1.1996 + // This should be the encoding in use, and it's even an error for 1.1997 + // it to be anything else (in certain cases that are impractical to 1.1998 + // to test, and may even be insufficient). So, we do the best we 1.1999 + // can, and warn if things look suspicious. Note that Java doesn't 1.2000 + // uniformly expose the encodings, and that the names it uses 1.2001 + // internally are nonstandard. Also, that the XML spec allows 1.2002 + // such "errors" not to be reported at all. 1.2003 + // 1.2004 + String currentEncoding = in.getEncoding(); 1.2005 + 1.2006 + if (currentEncoding != null 1.2007 + && !name.equalsIgnoreCase(currentEncoding)) 1.2008 + warning("P-061", new Object[]{name, currentEncoding}); 1.2009 + } 1.2010 + 1.2011 + private boolean maybeNotationDecl() 1.2012 + throws IOException, SAXException { 1.2013 + 1.2014 + // [82] NotationDecl ::= '<!NOTATION' S Name S 1.2015 + // (ExternalID | PublicID) S? '>' 1.2016 + // [83] PublicID ::= 'PUBLIC' S PubidLiteral 1.2017 + InputEntity start = peekDeclaration("!NOTATION"); 1.2018 + 1.2019 + if (start == null) 1.2020 + return false; 1.2021 + 1.2022 + String name = getMarkupDeclname("F-019", false); 1.2023 + ExternalEntity entity = new ExternalEntity(in); 1.2024 + 1.2025 + whitespace("F-011"); 1.2026 + if (peek("PUBLIC")) { 1.2027 + whitespace("F-009"); 1.2028 + entity.publicId = parsePublicId(); 1.2029 + if (maybeWhitespace()) { 1.2030 + if (!peek(">")) 1.2031 + entity.systemId = parseSystemId(); 1.2032 + else 1.2033 + ungetc(); 1.2034 + } 1.2035 + } else if (peek("SYSTEM")) { 1.2036 + whitespace("F-008"); 1.2037 + entity.systemId = parseSystemId(); 1.2038 + } else 1.2039 + fatal("P-062"); 1.2040 + maybeWhitespace(); 1.2041 + nextChar('>', "F-032", name); 1.2042 + if (start != in) 1.2043 + error("V-013", null); 1.2044 + if (entity.systemId != null && entity.systemId.indexOf('#') != -1) 1.2045 + error("P-056", new Object[]{entity.systemId}); 1.2046 + 1.2047 + Object value = notations.get(name); 1.2048 + if (value != null && value instanceof ExternalEntity) 1.2049 + warning("P-063", new Object[]{name}); 1.2050 + 1.2051 + else { 1.2052 + notations.put(name, entity); 1.2053 + dtdHandler.notationDecl(name, entity.publicId, 1.2054 + entity.systemId); 1.2055 + } 1.2056 + return true; 1.2057 + } 1.2058 + 1.2059 + 1.2060 + //////////////////////////////////////////////////////////////// 1.2061 + // 1.2062 + // UTILITIES 1.2063 + // 1.2064 + //////////////////////////////////////////////////////////////// 1.2065 + 1.2066 + private char getc() throws IOException, SAXException { 1.2067 + 1.2068 + if (!doLexicalPE) { 1.2069 + char c = in.getc(); 1.2070 + return c; 1.2071 + } 1.2072 + 1.2073 + // 1.2074 + // External parameter entities get funky processing of '%param;' 1.2075 + // references. It's not clearly defined in the XML spec; but it 1.2076 + // boils down to having those refs be _lexical_ in most cases to 1.2077 + // include partial syntax productions. It also needs selective 1.2078 + // enabling; "<!ENTITY % foo ...>" must work, for example, and 1.2079 + // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd" 1.2080 + // if it's expanded in a literal, else "ab cd". PEs also do 1.2081 + // not expand within comments or PIs, and external PEs are only 1.2082 + // allowed to have markup decls (and so aren't handled lexically). 1.2083 + // 1.2084 + // This PE handling should be merged into maybeWhitespace, where 1.2085 + // it can be dealt with more consistently. 1.2086 + // 1.2087 + // Also, there are some validity constraints in this area. 1.2088 + // 1.2089 + char c; 1.2090 + 1.2091 + while (in.isEOF()) { 1.2092 + if (in.isInternal() || (doLexicalPE && !in.isDocument())) 1.2093 + in = in.pop(); 1.2094 + else { 1.2095 + fatal("P-064", new Object[]{in.getName()}); 1.2096 + } 1.2097 + } 1.2098 + if ((c = in.getc()) == '%' && doLexicalPE) { 1.2099 + // PE ref ::= '%' name ';' 1.2100 + String name = maybeGetName(); 1.2101 + Object entity; 1.2102 + 1.2103 + if (name == null) 1.2104 + fatal("P-011"); 1.2105 + nextChar(';', "F-021", name); 1.2106 + entity = params.get(name); 1.2107 + 1.2108 + // push a magic "entity" before and after the 1.2109 + // real one, so ungetc() behaves uniformly 1.2110 + pushReader(" ".toCharArray(), null, false); 1.2111 + if (entity instanceof InternalEntity) 1.2112 + pushReader(((InternalEntity) entity).buf, name, false); 1.2113 + else if (entity instanceof ExternalEntity) 1.2114 + // PEs can't be unparsed! 1.2115 + // XXX if this returns false ... 1.2116 + pushReader((ExternalEntity) entity); 1.2117 + else if (entity == null) 1.2118 + // see note in maybePEReference re making this be nonfatal. 1.2119 + fatal("V-022"); 1.2120 + else 1.2121 + throw new InternalError(); 1.2122 + pushReader(" ".toCharArray(), null, false); 1.2123 + return in.getc(); 1.2124 + } 1.2125 + return c; 1.2126 + } 1.2127 + 1.2128 + private void ungetc() { 1.2129 + 1.2130 + in.ungetc(); 1.2131 + } 1.2132 + 1.2133 + private boolean peek(String s) 1.2134 + throws IOException, SAXException { 1.2135 + 1.2136 + return in.peek(s, null); 1.2137 + } 1.2138 + 1.2139 + // Return the entity starting the specified declaration 1.2140 + // (for validating declaration nesting) else null. 1.2141 + 1.2142 + private InputEntity peekDeclaration(String s) 1.2143 + throws IOException, SAXException { 1.2144 + 1.2145 + InputEntity start; 1.2146 + 1.2147 + if (!in.peekc('<')) 1.2148 + return null; 1.2149 + start = in; 1.2150 + if (in.peek(s, null)) 1.2151 + return start; 1.2152 + in.ungetc(); 1.2153 + return null; 1.2154 + } 1.2155 + 1.2156 + private void nextChar(char c, String location, String near) 1.2157 + throws IOException, SAXException { 1.2158 + 1.2159 + while (in.isEOF() && !in.isDocument()) 1.2160 + in = in.pop(); 1.2161 + if (!in.peekc(c)) 1.2162 + fatal("P-008", new Object[] 1.2163 + {new Character(c), 1.2164 + messages.getMessage(locale, location), 1.2165 + (near == null ? "" : ('"' + near + '"'))}); 1.2166 + } 1.2167 + 1.2168 + 1.2169 + private void pushReader(char buf [], String name, boolean isGeneral) 1.2170 + throws SAXException { 1.2171 + 1.2172 + InputEntity r = InputEntity.getInputEntity(dtdHandler, locale); 1.2173 + r.init(buf, name, in, !isGeneral); 1.2174 + in = r; 1.2175 + } 1.2176 + 1.2177 + private boolean pushReader(ExternalEntity next) 1.2178 + throws IOException, SAXException { 1.2179 + 1.2180 + InputEntity r = InputEntity.getInputEntity(dtdHandler, locale); 1.2181 + InputSource s; 1.2182 + try { 1.2183 + s = next.getInputSource(resolver); 1.2184 + } catch (IOException e) { 1.2185 + String msg = 1.2186 + "unable to open the external entity from :" + next.systemId; 1.2187 + if (next.publicId != null) 1.2188 + msg += " (public id:" + next.publicId + ")"; 1.2189 + 1.2190 + SAXParseException spe = new SAXParseException(msg, 1.2191 + getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e); 1.2192 + dtdHandler.fatalError(spe); 1.2193 + throw e; 1.2194 + } 1.2195 + 1.2196 + r.init(s, next.name, in, next.isPE); 1.2197 + in = r; 1.2198 + return true; 1.2199 + } 1.2200 + 1.2201 + public String getPublicId() { 1.2202 + 1.2203 + return (in == null) ? null : in.getPublicId(); 1.2204 + } 1.2205 + 1.2206 + public String getSystemId() { 1.2207 + 1.2208 + return (in == null) ? null : in.getSystemId(); 1.2209 + } 1.2210 + 1.2211 + public int getLineNumber() { 1.2212 + 1.2213 + return (in == null) ? -1 : in.getLineNumber(); 1.2214 + } 1.2215 + 1.2216 + public int getColumnNumber() { 1.2217 + 1.2218 + return (in == null) ? -1 : in.getColumnNumber(); 1.2219 + } 1.2220 + 1.2221 + // error handling convenience routines 1.2222 + 1.2223 + private void warning(String messageId, Object parameters []) 1.2224 + throws SAXException { 1.2225 + 1.2226 + SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), 1.2227 + getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); 1.2228 + 1.2229 + dtdHandler.warning(e); 1.2230 + } 1.2231 + 1.2232 + void error(String messageId, Object parameters []) 1.2233 + throws SAXException { 1.2234 + 1.2235 + SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), 1.2236 + getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); 1.2237 + 1.2238 + dtdHandler.error(e); 1.2239 + } 1.2240 + 1.2241 + private void fatal(String messageId) throws SAXException { 1.2242 + 1.2243 + fatal(messageId, null); 1.2244 + } 1.2245 + 1.2246 + private void fatal(String messageId, Object parameters []) 1.2247 + throws SAXException { 1.2248 + 1.2249 + SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), 1.2250 + getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); 1.2251 + 1.2252 + dtdHandler.fatalError(e); 1.2253 + 1.2254 + throw e; 1.2255 + } 1.2256 + 1.2257 + // 1.2258 + // Map char arrays to strings ... cuts down both on memory and 1.2259 + // CPU usage for element/attribute/other names that are reused. 1.2260 + // 1.2261 + // Documents typically repeat names a lot, so we more or less 1.2262 + // intern all the strings within the document; since some strings 1.2263 + // are repeated in multiple documents (e.g. stylesheets) we go 1.2264 + // a bit further, and intern globally. 1.2265 + // 1.2266 + static class NameCache { 1.2267 + // 1.2268 + // Unless we auto-grow this, the default size should be a 1.2269 + // reasonable bit larger than needed for most XML files 1.2270 + // we've yet seen (and be prime). If it's too small, the 1.2271 + // penalty is just excess cache collisions. 1.2272 + // 1.2273 + NameCacheEntry hashtable [] = new NameCacheEntry[541]; 1.2274 + 1.2275 + // 1.2276 + // Usually we just want to get the 'symbol' for these chars 1.2277 + // 1.2278 + String lookup(char value [], int len) { 1.2279 + 1.2280 + return lookupEntry(value, len).name; 1.2281 + } 1.2282 + 1.2283 + // 1.2284 + // Sometimes we need to scan the chars in the resulting 1.2285 + // string, so there's an accessor which exposes them. 1.2286 + // (Mostly for element end tags.) 1.2287 + // 1.2288 + NameCacheEntry lookupEntry(char value [], int len) { 1.2289 + 1.2290 + int index = 0; 1.2291 + NameCacheEntry entry; 1.2292 + 1.2293 + // hashing to get index 1.2294 + for (int i = 0; i < len; i++) 1.2295 + index = index * 31 + value[i]; 1.2296 + index &= 0x7fffffff; 1.2297 + index %= hashtable.length; 1.2298 + 1.2299 + // return entry if one's there ... 1.2300 + for (entry = hashtable[index]; 1.2301 + entry != null; 1.2302 + entry = entry.next) { 1.2303 + if (entry.matches(value, len)) 1.2304 + return entry; 1.2305 + } 1.2306 + 1.2307 + // else create new one 1.2308 + entry = new NameCacheEntry(); 1.2309 + entry.chars = new char[len]; 1.2310 + System.arraycopy(value, 0, entry.chars, 0, len); 1.2311 + entry.name = new String(entry.chars); 1.2312 + // 1.2313 + // NOTE: JDK 1.1 has a fixed size string intern table, 1.2314 + // with non-GC'd entries. It can panic here; that's a 1.2315 + // JDK problem, use 1.2 or later with many identifiers. 1.2316 + // 1.2317 + entry.name = entry.name.intern(); // "global" intern 1.2318 + entry.next = hashtable[index]; 1.2319 + hashtable[index] = entry; 1.2320 + return entry; 1.2321 + } 1.2322 + } 1.2323 + 1.2324 + static class NameCacheEntry { 1.2325 + 1.2326 + String name; 1.2327 + char chars []; 1.2328 + NameCacheEntry next; 1.2329 + 1.2330 + boolean matches(char value [], int len) { 1.2331 + 1.2332 + if (chars.length != len) 1.2333 + return false; 1.2334 + for (int i = 0; i < len; i++) 1.2335 + if (value[i] != chars[i]) 1.2336 + return false; 1.2337 + return true; 1.2338 + } 1.2339 + } 1.2340 + 1.2341 + // 1.2342 + // Message catalog for diagnostics. 1.2343 + // 1.2344 + static final Catalog messages = new Catalog(); 1.2345 + 1.2346 + static final class Catalog extends MessageCatalog { 1.2347 + 1.2348 + Catalog() { 1.2349 + super(DTDParser.class); 1.2350 + } 1.2351 + } 1.2352 + 1.2353 +}