src/share/jaxws_classes/com/sun/xml/internal/dtdparser/DTDParser.java

Tue, 06 Mar 2012 16:09:35 -0800

author
ohair
date
Tue, 06 Mar 2012 16:09:35 -0800
changeset 286
f50545b5e2f1
child 397
b99d7e355d4b
permissions
-rw-r--r--

7150322: Stop using drop source bundles in jaxws
Reviewed-by: darcy, ohrstrom

ohair@286 1 /*
ohair@286 2 * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
ohair@286 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
ohair@286 4 *
ohair@286 5 * This code is free software; you can redistribute it and/or modify it
ohair@286 6 * under the terms of the GNU General Public License version 2 only, as
ohair@286 7 * published by the Free Software Foundation. Oracle designates this
ohair@286 8 * particular file as subject to the "Classpath" exception as provided
ohair@286 9 * by Oracle in the LICENSE file that accompanied this code.
ohair@286 10 *
ohair@286 11 * This code is distributed in the hope that it will be useful, but WITHOUT
ohair@286 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
ohair@286 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
ohair@286 14 * version 2 for more details (a copy is included in the LICENSE file that
ohair@286 15 * accompanied this code).
ohair@286 16 *
ohair@286 17 * You should have received a copy of the GNU General Public License version
ohair@286 18 * 2 along with this work; if not, write to the Free Software Foundation,
ohair@286 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
ohair@286 20 *
ohair@286 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
ohair@286 22 * or visit www.oracle.com if you need additional information or have any
ohair@286 23 * questions.
ohair@286 24 */
ohair@286 25
ohair@286 26 package com.sun.xml.internal.dtdparser;
ohair@286 27
ohair@286 28 import org.xml.sax.EntityResolver;
ohair@286 29 import org.xml.sax.InputSource;
ohair@286 30 import org.xml.sax.Locator;
ohair@286 31 import org.xml.sax.SAXException;
ohair@286 32 import org.xml.sax.SAXParseException;
ohair@286 33
ohair@286 34 import java.io.IOException;
ohair@286 35 import java.util.ArrayList;
ohair@286 36 import java.util.Enumeration;
ohair@286 37 import java.util.Hashtable;
ohair@286 38 import java.util.Locale;
ohair@286 39 import java.util.Set;
ohair@286 40 import java.util.Vector;
ohair@286 41
ohair@286 42 /**
ohair@286 43 * This implements parsing of XML 1.0 DTDs.
ohair@286 44 * <p/>
ohair@286 45 * This conforms to the portion of the XML 1.0 specification related
ohair@286 46 * to the external DTD subset.
ohair@286 47 * <p/>
ohair@286 48 * For multi-language applications (such as web servers using XML
ohair@286 49 * processing to create dynamic content), a method supports choosing
ohair@286 50 * a locale for parser diagnostics which is both understood by the
ohair@286 51 * message recipient and supported by the parser.
ohair@286 52 * <p/>
ohair@286 53 * This parser produces a stream of parse events. It supports some
ohair@286 54 * features (exposing comments, CDATA sections, and entity references)
ohair@286 55 * which are not required to be reported by conformant XML processors.
ohair@286 56 *
ohair@286 57 * @author David Brownell
ohair@286 58 * @author Janet Koenig
ohair@286 59 * @author Kohsuke KAWAGUCHI
ohair@286 60 * @version $Id: DTDParser.java,v 1.2 2009-04-16 15:25:49 snajper Exp $
ohair@286 61 */
ohair@286 62 public class DTDParser {
ohair@286 63 public final static String TYPE_CDATA = "CDATA";
ohair@286 64 public final static String TYPE_ID = "ID";
ohair@286 65 public final static String TYPE_IDREF = "IDREF";
ohair@286 66 public final static String TYPE_IDREFS = "IDREFS";
ohair@286 67 public final static String TYPE_ENTITY = "ENTITY";
ohair@286 68 public final static String TYPE_ENTITIES = "ENTITIES";
ohair@286 69 public final static String TYPE_NMTOKEN = "NMTOKEN";
ohair@286 70 public final static String TYPE_NMTOKENS = "NMTOKENS";
ohair@286 71 public final static String TYPE_NOTATION = "NOTATION";
ohair@286 72 public final static String TYPE_ENUMERATION = "ENUMERATION";
ohair@286 73
ohair@286 74
ohair@286 75 // stack of input entities being merged
ohair@286 76 private InputEntity in;
ohair@286 77
ohair@286 78 // temporaries reused during parsing
ohair@286 79 private StringBuffer strTmp;
ohair@286 80 private char nameTmp [];
ohair@286 81 private NameCache nameCache;
ohair@286 82 private char charTmp [] = new char[2];
ohair@286 83
ohair@286 84 // temporary DTD parsing state
ohair@286 85 private boolean doLexicalPE;
ohair@286 86
ohair@286 87 // DTD state, used during parsing
ohair@286 88 // private SimpleHashtable elements = new SimpleHashtable (47);
ohair@286 89 protected final Set declaredElements = new java.util.HashSet();
ohair@286 90 private SimpleHashtable params = new SimpleHashtable(7);
ohair@286 91
ohair@286 92 // exposed to package-private subclass
ohair@286 93 Hashtable notations = new Hashtable(7);
ohair@286 94 SimpleHashtable entities = new SimpleHashtable(17);
ohair@286 95
ohair@286 96 private SimpleHashtable ids = new SimpleHashtable();
ohair@286 97
ohair@286 98 // listeners for DTD parsing events
ohair@286 99 private DTDEventListener dtdHandler;
ohair@286 100
ohair@286 101 private EntityResolver resolver;
ohair@286 102 private Locale locale;
ohair@286 103
ohair@286 104 // string constants -- use these copies so "==" works
ohair@286 105 // package private
ohair@286 106 static final String strANY = "ANY";
ohair@286 107 static final String strEMPTY = "EMPTY";
ohair@286 108
ohair@286 109 /**
ohair@286 110 * Used by applications to request locale for diagnostics.
ohair@286 111 *
ohair@286 112 * @param l The locale to use, or null to use system defaults
ohair@286 113 * (which may include only message IDs).
ohair@286 114 */
ohair@286 115 public void setLocale(Locale l) throws SAXException {
ohair@286 116
ohair@286 117 if (l != null && !messages.isLocaleSupported(l.toString())) {
ohair@286 118 throw new SAXException(messages.getMessage(locale,
ohair@286 119 "P-078", new Object[]{l}));
ohair@286 120 }
ohair@286 121 locale = l;
ohair@286 122 }
ohair@286 123
ohair@286 124 /**
ohair@286 125 * Returns the diagnostic locale.
ohair@286 126 */
ohair@286 127 public Locale getLocale() {
ohair@286 128 return locale;
ohair@286 129 }
ohair@286 130
ohair@286 131 /**
ohair@286 132 * Chooses a client locale to use for diagnostics, using the first
ohair@286 133 * language specified in the list that is supported by this parser.
ohair@286 134 * That locale is then set using <a href="#setLocale(java.util.Locale)">
ohair@286 135 * setLocale()</a>. Such a list could be provided by a variety of user
ohair@286 136 * preference mechanisms, including the HTTP <em>Accept-Language</em>
ohair@286 137 * header field.
ohair@286 138 *
ohair@286 139 * @param languages Array of language specifiers, ordered with the most
ohair@286 140 * preferable one at the front. For example, "en-ca" then "fr-ca",
ohair@286 141 * followed by "zh_CN". Both RFC 1766 and Java styles are supported.
ohair@286 142 * @return The chosen locale, or null.
ohair@286 143 * @see MessageCatalog
ohair@286 144 */
ohair@286 145 public Locale chooseLocale(String languages [])
ohair@286 146 throws SAXException {
ohair@286 147
ohair@286 148 Locale l = messages.chooseLocale(languages);
ohair@286 149
ohair@286 150 if (l != null) {
ohair@286 151 setLocale(l);
ohair@286 152 }
ohair@286 153 return l;
ohair@286 154 }
ohair@286 155
ohair@286 156 /**
ohair@286 157 * Lets applications control entity resolution.
ohair@286 158 */
ohair@286 159 public void setEntityResolver(EntityResolver r) {
ohair@286 160
ohair@286 161 resolver = r;
ohair@286 162 }
ohair@286 163
ohair@286 164 /**
ohair@286 165 * Returns the object used to resolve entities
ohair@286 166 */
ohair@286 167 public EntityResolver getEntityResolver() {
ohair@286 168
ohair@286 169 return resolver;
ohair@286 170 }
ohair@286 171
ohair@286 172 /**
ohair@286 173 * Used by applications to set handling of DTD parsing events.
ohair@286 174 */
ohair@286 175 public void setDtdHandler(DTDEventListener handler) {
ohair@286 176 dtdHandler = handler;
ohair@286 177 if (handler != null)
ohair@286 178 handler.setDocumentLocator(new Locator() {
ohair@286 179 public String getPublicId() {
ohair@286 180 return DTDParser.this.getPublicId();
ohair@286 181 }
ohair@286 182
ohair@286 183 public String getSystemId() {
ohair@286 184 return DTDParser.this.getSystemId();
ohair@286 185 }
ohair@286 186
ohair@286 187 public int getLineNumber() {
ohair@286 188 return DTDParser.this.getLineNumber();
ohair@286 189 }
ohair@286 190
ohair@286 191 public int getColumnNumber() {
ohair@286 192 return DTDParser.this.getColumnNumber();
ohair@286 193 }
ohair@286 194 });
ohair@286 195 }
ohair@286 196
ohair@286 197 /**
ohair@286 198 * Returns the handler used to for DTD parsing events.
ohair@286 199 */
ohair@286 200 public DTDEventListener getDtdHandler() {
ohair@286 201 return dtdHandler;
ohair@286 202 }
ohair@286 203
ohair@286 204 /**
ohair@286 205 * Parse a DTD.
ohair@286 206 */
ohair@286 207 public void parse(InputSource in)
ohair@286 208 throws IOException, SAXException {
ohair@286 209 init();
ohair@286 210 parseInternal(in);
ohair@286 211 }
ohair@286 212
ohair@286 213 /**
ohair@286 214 * Parse a DTD.
ohair@286 215 */
ohair@286 216 public void parse(String uri)
ohair@286 217 throws IOException, SAXException {
ohair@286 218 InputSource inSource;
ohair@286 219
ohair@286 220 init();
ohair@286 221 // System.out.println ("parse (\"" + uri + "\")");
ohair@286 222 inSource = resolver.resolveEntity(null, uri);
ohair@286 223
ohair@286 224 // If custom resolver punts resolution to parser, handle it ...
ohair@286 225 if (inSource == null) {
ohair@286 226 inSource = Resolver.createInputSource(new java.net.URL(uri), false);
ohair@286 227
ohair@286 228 // ... or if custom resolver doesn't correctly construct the
ohair@286 229 // input entity, patch it up enough so relative URIs work, and
ohair@286 230 // issue a warning to minimize later confusion.
ohair@286 231 } else if (inSource.getSystemId() == null) {
ohair@286 232 warning("P-065", null);
ohair@286 233 inSource.setSystemId(uri);
ohair@286 234 }
ohair@286 235
ohair@286 236 parseInternal(inSource);
ohair@286 237 }
ohair@286 238
ohair@286 239 // makes sure the parser is reset to "before a document"
ohair@286 240 private void init() {
ohair@286 241 in = null;
ohair@286 242
ohair@286 243 // alloc temporary data used in parsing
ohair@286 244 strTmp = new StringBuffer();
ohair@286 245 nameTmp = new char[20];
ohair@286 246 nameCache = new NameCache();
ohair@286 247
ohair@286 248 // reset doc info
ohair@286 249 // isInAttribute = false;
ohair@286 250
ohair@286 251 doLexicalPE = false;
ohair@286 252
ohair@286 253 entities.clear();
ohair@286 254 notations.clear();
ohair@286 255 params.clear();
ohair@286 256 // elements.clear ();
ohair@286 257 declaredElements.clear();
ohair@286 258
ohair@286 259 // initialize predefined references ... re-interpreted later
ohair@286 260 builtin("amp", "&#38;");
ohair@286 261 builtin("lt", "&#60;");
ohair@286 262 builtin("gt", ">");
ohair@286 263 builtin("quot", "\"");
ohair@286 264 builtin("apos", "'");
ohair@286 265
ohair@286 266 if (locale == null)
ohair@286 267 locale = Locale.getDefault();
ohair@286 268 if (resolver == null)
ohair@286 269 resolver = new Resolver();
ohair@286 270 if (dtdHandler == null)
ohair@286 271 dtdHandler = new DTDHandlerBase();
ohair@286 272 }
ohair@286 273
ohair@286 274 private void builtin(String entityName, String entityValue) {
ohair@286 275 InternalEntity entity;
ohair@286 276 entity = new InternalEntity(entityName, entityValue.toCharArray());
ohair@286 277 entities.put(entityName, entity);
ohair@286 278 }
ohair@286 279
ohair@286 280
ohair@286 281 ////////////////////////////////////////////////////////////////
ohair@286 282 //
ohair@286 283 // parsing is by recursive descent, code roughly
ohair@286 284 // following the BNF rules except tweaked for simple
ohair@286 285 // lookahead. rules are more or less in numeric order,
ohair@286 286 // except where code sharing suggests other structures.
ohair@286 287 //
ohair@286 288 // a classic benefit of recursive descent parsers: it's
ohair@286 289 // relatively easy to get diagnostics that make sense.
ohair@286 290 //
ohair@286 291 ////////////////////////////////////////////////////////////////
ohair@286 292
ohair@286 293
ohair@286 294 private void parseInternal(InputSource input)
ohair@286 295 throws IOException, SAXException {
ohair@286 296
ohair@286 297 if (input == null)
ohair@286 298 fatal("P-000");
ohair@286 299
ohair@286 300 try {
ohair@286 301 in = InputEntity.getInputEntity(dtdHandler, locale);
ohair@286 302 in.init(input, null, null, false);
ohair@286 303
ohair@286 304 dtdHandler.startDTD(in);
ohair@286 305
ohair@286 306 // [30] extSubset ::= TextDecl? extSubsetDecl
ohair@286 307 // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
ohair@286 308 // | PEReference | S )*
ohair@286 309 // ... same as [79] extPE, which is where the code is
ohair@286 310
ohair@286 311 ExternalEntity externalSubset = new ExternalEntity(in);
ohair@286 312 externalParameterEntity(externalSubset);
ohair@286 313
ohair@286 314 if (!in.isEOF()) {
ohair@286 315 fatal("P-001", new Object[]
ohair@286 316 {Integer.toHexString(((int) getc()))});
ohair@286 317 }
ohair@286 318 afterRoot();
ohair@286 319 dtdHandler.endDTD();
ohair@286 320
ohair@286 321 } catch (EndOfInputException e) {
ohair@286 322 if (!in.isDocument()) {
ohair@286 323 String name = in.getName();
ohair@286 324 do { // force a relevant URI and line number
ohair@286 325 in = in.pop();
ohair@286 326 } while (in.isInternal());
ohair@286 327 fatal("P-002", new Object[]{name});
ohair@286 328 } else {
ohair@286 329 fatal("P-003", null);
ohair@286 330 }
ohair@286 331 } catch (RuntimeException e) {
ohair@286 332 // Don't discard location that triggered the exception
ohair@286 333 // ## Should properly wrap exception
ohair@286 334 System.err.print("Internal DTD parser error: "); // ##
ohair@286 335 e.printStackTrace();
ohair@286 336 throw new SAXParseException(e.getMessage() != null
ohair@286 337 ? e.getMessage() : e.getClass().getName(),
ohair@286 338 getPublicId(), getSystemId(),
ohair@286 339 getLineNumber(), getColumnNumber());
ohair@286 340
ohair@286 341 } finally {
ohair@286 342 // recycle temporary data used during parsing
ohair@286 343 strTmp = null;
ohair@286 344 nameTmp = null;
ohair@286 345 nameCache = null;
ohair@286 346
ohair@286 347 // ditto input sources etc
ohair@286 348 if (in != null) {
ohair@286 349 in.close();
ohair@286 350 in = null;
ohair@286 351 }
ohair@286 352
ohair@286 353 // get rid of all DTD info ... some of it would be
ohair@286 354 // useful for editors etc, investigate later.
ohair@286 355
ohair@286 356 params.clear();
ohair@286 357 entities.clear();
ohair@286 358 notations.clear();
ohair@286 359 declaredElements.clear();
ohair@286 360 // elements.clear();
ohair@286 361 ids.clear();
ohair@286 362 }
ohair@286 363 }
ohair@286 364
ohair@286 365 void afterRoot() throws SAXException {
ohair@286 366 // Make sure all IDREFs match declared ID attributes. We scan
ohair@286 367 // after the document element is parsed, since XML allows forward
ohair@286 368 // references, and only now can we know if they're all resolved.
ohair@286 369
ohair@286 370 for (Enumeration e = ids.keys();
ohair@286 371 e.hasMoreElements();
ohair@286 372 ) {
ohair@286 373 String id = (String) e.nextElement();
ohair@286 374 Boolean value = (Boolean) ids.get(id);
ohair@286 375 if (Boolean.FALSE == value)
ohair@286 376 error("V-024", new Object[]{id});
ohair@286 377 }
ohair@286 378 }
ohair@286 379
ohair@286 380
ohair@286 381 // role is for diagnostics
ohair@286 382 private void whitespace(String roleId)
ohair@286 383 throws IOException, SAXException {
ohair@286 384
ohair@286 385 // [3] S ::= (#x20 | #x9 | #xd | #xa)+
ohair@286 386 if (!maybeWhitespace()) {
ohair@286 387 fatal("P-004", new Object[]
ohair@286 388 {messages.getMessage(locale, roleId)});
ohair@286 389 }
ohair@286 390 }
ohair@286 391
ohair@286 392 // S?
ohair@286 393 private boolean maybeWhitespace()
ohair@286 394 throws IOException, SAXException {
ohair@286 395
ohair@286 396 if (!doLexicalPE)
ohair@286 397 return in.maybeWhitespace();
ohair@286 398
ohair@286 399 // see getc() for the PE logic -- this lets us splice
ohair@286 400 // expansions of PEs in "anywhere". getc() has smarts,
ohair@286 401 // so for external PEs we don't bypass it.
ohair@286 402
ohair@286 403 // XXX we can marginally speed PE handling, and certainly
ohair@286 404 // be cleaner (hence potentially more correct), by using
ohair@286 405 // the observations that expanded PEs only start and stop
ohair@286 406 // where whitespace is allowed. getc wouldn't need any
ohair@286 407 // "lexical" PE expansion logic, and no other method needs
ohair@286 408 // to handle termination of PEs. (parsing of literals would
ohair@286 409 // still need to pop entities, but not parsing of references
ohair@286 410 // in content.)
ohair@286 411
ohair@286 412 char c = getc();
ohair@286 413 boolean saw = false;
ohair@286 414
ohair@286 415 while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
ohair@286 416 saw = true;
ohair@286 417
ohair@286 418 // this gracefully ends things when we stop playing
ohair@286 419 // with internal parameters. caller should have a
ohair@286 420 // grammar rule allowing whitespace at end of entity.
ohair@286 421 if (in.isEOF() && !in.isInternal())
ohair@286 422 return saw;
ohair@286 423 c = getc();
ohair@286 424 }
ohair@286 425 ungetc();
ohair@286 426 return saw;
ohair@286 427 }
ohair@286 428
ohair@286 429 private String maybeGetName()
ohair@286 430 throws IOException, SAXException {
ohair@286 431
ohair@286 432 NameCacheEntry entry = maybeGetNameCacheEntry();
ohair@286 433 return (entry == null) ? null : entry.name;
ohair@286 434 }
ohair@286 435
ohair@286 436 private NameCacheEntry maybeGetNameCacheEntry()
ohair@286 437 throws IOException, SAXException {
ohair@286 438
ohair@286 439 // [5] Name ::= (Letter|'_'|':') (Namechar)*
ohair@286 440 char c = getc();
ohair@286 441
ohair@286 442 if (!XmlChars.isLetter(c) && c != ':' && c != '_') {
ohair@286 443 ungetc();
ohair@286 444 return null;
ohair@286 445 }
ohair@286 446 return nameCharString(c);
ohair@286 447 }
ohair@286 448
ohair@286 449 // Used when parsing enumerations
ohair@286 450 private String getNmtoken()
ohair@286 451 throws IOException, SAXException {
ohair@286 452
ohair@286 453 // [7] Nmtoken ::= (Namechar)+
ohair@286 454 char c = getc();
ohair@286 455 if (!XmlChars.isNameChar(c))
ohair@286 456 fatal("P-006", new Object[]{new Character(c)});
ohair@286 457 return nameCharString(c).name;
ohair@286 458 }
ohair@286 459
ohair@286 460 // n.b. this gets used when parsing attribute values (for
ohair@286 461 // internal references) so we can't use strTmp; it's also
ohair@286 462 // a hotspot for CPU and memory in the parser (called at least
ohair@286 463 // once for each element) so this has been optimized a bit.
ohair@286 464
ohair@286 465 private NameCacheEntry nameCharString(char c)
ohair@286 466 throws IOException, SAXException {
ohair@286 467
ohair@286 468 int i = 1;
ohair@286 469
ohair@286 470 nameTmp[0] = c;
ohair@286 471 for (; ;) {
ohair@286 472 if ((c = in.getNameChar()) == 0)
ohair@286 473 break;
ohair@286 474 if (i >= nameTmp.length) {
ohair@286 475 char tmp [] = new char[nameTmp.length + 10];
ohair@286 476 System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length);
ohair@286 477 nameTmp = tmp;
ohair@286 478 }
ohair@286 479 nameTmp[i++] = c;
ohair@286 480 }
ohair@286 481 return nameCache.lookupEntry(nameTmp, i);
ohair@286 482 }
ohair@286 483
ohair@286 484 //
ohair@286 485 // much similarity between parsing entity values in DTD
ohair@286 486 // and attribute values (in DTD or content) ... both follow
ohair@286 487 // literal parsing rules, newline canonicalization, etc
ohair@286 488 //
ohair@286 489 // leaves value in 'strTmp' ... either a "replacement text" (4.5),
ohair@286 490 // or else partially normalized attribute value (the first bit
ohair@286 491 // of 3.3.3's spec, without the "if not CDATA" bits).
ohair@286 492 //
ohair@286 493 private void parseLiteral(boolean isEntityValue)
ohair@286 494 throws IOException, SAXException {
ohair@286 495
ohair@286 496 // [9] EntityValue ::=
ohair@286 497 // '"' ([^"&%] | Reference | PEReference)* '"'
ohair@286 498 // | "'" ([^'&%] | Reference | PEReference)* "'"
ohair@286 499 // [10] AttValue ::=
ohair@286 500 // '"' ([^"&] | Reference )* '"'
ohair@286 501 // | "'" ([^'&] | Reference )* "'"
ohair@286 502 char quote = getc();
ohair@286 503 char c;
ohair@286 504 InputEntity source = in;
ohair@286 505
ohair@286 506 if (quote != '\'' && quote != '"') {
ohair@286 507 fatal("P-007");
ohair@286 508 }
ohair@286 509
ohair@286 510 // don't report entity expansions within attributes,
ohair@286 511 // they're reported "fully expanded" via SAX
ohair@286 512 // isInAttribute = !isEntityValue;
ohair@286 513
ohair@286 514 // get value into strTmp
ohair@286 515 strTmp = new StringBuffer();
ohair@286 516
ohair@286 517 // scan, allowing entity push/pop wherever ...
ohair@286 518 // expanded entities can't terminate the literal!
ohair@286 519 for (; ;) {
ohair@286 520 if (in != source && in.isEOF()) {
ohair@286 521 // we don't report end of parsed entities
ohair@286 522 // within attributes (no SAX hooks)
ohair@286 523 in = in.pop();
ohair@286 524 continue;
ohair@286 525 }
ohair@286 526 if ((c = getc()) == quote && in == source) {
ohair@286 527 break;
ohair@286 528 }
ohair@286 529
ohair@286 530 //
ohair@286 531 // Basically the "reference in attribute value"
ohair@286 532 // row of the chart in section 4.4 of the spec
ohair@286 533 //
ohair@286 534 if (c == '&') {
ohair@286 535 String entityName = maybeGetName();
ohair@286 536
ohair@286 537 if (entityName != null) {
ohair@286 538 nextChar(';', "F-020", entityName);
ohair@286 539
ohair@286 540 // 4.4 says: bypass these here ... we'll catch
ohair@286 541 // forbidden refs to unparsed entities on use
ohair@286 542 if (isEntityValue) {
ohair@286 543 strTmp.append('&');
ohair@286 544 strTmp.append(entityName);
ohair@286 545 strTmp.append(';');
ohair@286 546 continue;
ohair@286 547 }
ohair@286 548 expandEntityInLiteral(entityName, entities, isEntityValue);
ohair@286 549
ohair@286 550
ohair@286 551 // character references are always included immediately
ohair@286 552 } else if ((c = getc()) == '#') {
ohair@286 553 int tmp = parseCharNumber();
ohair@286 554
ohair@286 555 if (tmp > 0xffff) {
ohair@286 556 tmp = surrogatesToCharTmp(tmp);
ohair@286 557 strTmp.append(charTmp[0]);
ohair@286 558 if (tmp == 2)
ohair@286 559 strTmp.append(charTmp[1]);
ohair@286 560 } else
ohair@286 561 strTmp.append((char) tmp);
ohair@286 562 } else
ohair@286 563 fatal("P-009");
ohair@286 564 continue;
ohair@286 565
ohair@286 566 }
ohair@286 567
ohair@286 568 // expand parameter entities only within entity value literals
ohair@286 569 if (c == '%' && isEntityValue) {
ohair@286 570 String entityName = maybeGetName();
ohair@286 571
ohair@286 572 if (entityName != null) {
ohair@286 573 nextChar(';', "F-021", entityName);
ohair@286 574 expandEntityInLiteral(entityName, params, isEntityValue);
ohair@286 575 continue;
ohair@286 576 } else
ohair@286 577 fatal("P-011");
ohair@286 578 }
ohair@286 579
ohair@286 580 // For attribute values ...
ohair@286 581 if (!isEntityValue) {
ohair@286 582 // 3.3.3 says whitespace normalizes to space...
ohair@286 583 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
ohair@286 584 strTmp.append(' ');
ohair@286 585 continue;
ohair@286 586 }
ohair@286 587
ohair@286 588 // "<" not legal in parsed literals ...
ohair@286 589 if (c == '<')
ohair@286 590 fatal("P-012");
ohair@286 591 }
ohair@286 592
ohair@286 593 strTmp.append(c);
ohair@286 594 }
ohair@286 595 // isInAttribute = false;
ohair@286 596 }
ohair@286 597
ohair@286 598 // does a SINGLE expansion of the entity (often reparsed later)
ohair@286 599 private void expandEntityInLiteral(String name, SimpleHashtable table,
ohair@286 600 boolean isEntityValue)
ohair@286 601 throws IOException, SAXException {
ohair@286 602
ohair@286 603 Object entity = table.get(name);
ohair@286 604
ohair@286 605 if (entity instanceof InternalEntity) {
ohair@286 606 InternalEntity value = (InternalEntity) entity;
ohair@286 607 pushReader(value.buf, name, !value.isPE);
ohair@286 608
ohair@286 609 } else if (entity instanceof ExternalEntity) {
ohair@286 610 if (!isEntityValue) // must be a PE ...
ohair@286 611 fatal("P-013", new Object[]{name});
ohair@286 612 // XXX if this returns false ...
ohair@286 613 pushReader((ExternalEntity) entity);
ohair@286 614
ohair@286 615 } else if (entity == null) {
ohair@286 616 //
ohair@286 617 // Note: much confusion about whether spec requires such
ohair@286 618 // errors to be fatal in many cases, but none about whether
ohair@286 619 // it allows "normal" errors to be unrecoverable!
ohair@286 620 //
ohair@286 621 fatal((table == params) ? "V-022" : "P-014",
ohair@286 622 new Object[]{name});
ohair@286 623 }
ohair@286 624 }
ohair@286 625
ohair@286 626 // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
ohair@286 627 // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>'
ohair@286 628
ohair@286 629 // NOTE: XML spec should explicitly say that PE ref syntax is
ohair@286 630 // ignored in PIs, comments, SystemLiterals, and Pubid Literal
ohair@286 631 // values ... can't process the XML spec's own DTD without doing
ohair@286 632 // that for comments.
ohair@286 633
ohair@286 634 private String getQuotedString(String type, String extra)
ohair@286 635 throws IOException, SAXException {
ohair@286 636
ohair@286 637 // use in.getc to bypass PE processing
ohair@286 638 char quote = in.getc();
ohair@286 639
ohair@286 640 if (quote != '\'' && quote != '"')
ohair@286 641 fatal("P-015", new Object[]{
ohair@286 642 messages.getMessage(locale, type, new Object[]{extra})
ohair@286 643 });
ohair@286 644
ohair@286 645 char c;
ohair@286 646
ohair@286 647 strTmp = new StringBuffer();
ohair@286 648 while ((c = in.getc()) != quote)
ohair@286 649 strTmp.append((char) c);
ohair@286 650 return strTmp.toString();
ohair@286 651 }
ohair@286 652
ohair@286 653
ohair@286 654 private String parsePublicId() throws IOException, SAXException {
ohair@286 655
ohair@286 656 // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
ohair@286 657 // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
ohair@286 658 String retval = getQuotedString("F-033", null);
ohair@286 659 for (int i = 0; i < retval.length(); i++) {
ohair@286 660 char c = retval.charAt(i);
ohair@286 661 if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
ohair@286 662 && !(c >= 'A' && c <= 'Z')
ohair@286 663 && !(c >= 'a' && c <= 'z'))
ohair@286 664 fatal("P-016", new Object[]{new Character(c)});
ohair@286 665 }
ohair@286 666 strTmp = new StringBuffer();
ohair@286 667 strTmp.append(retval);
ohair@286 668 return normalize(false);
ohair@286 669 }
ohair@286 670
ohair@286 671 // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
ohair@286 672 // handled by: InputEntity.parsedContent()
ohair@286 673
ohair@286 674 private boolean maybeComment(boolean skipStart)
ohair@286 675 throws IOException, SAXException {
ohair@286 676
ohair@286 677 // [15] Comment ::= '<!--'
ohair@286 678 // ( (Char - '-') | ('-' (Char - '-'))*
ohair@286 679 // '-->'
ohair@286 680 if (!in.peek(skipStart ? "!--" : "<!--", null))
ohair@286 681 return false;
ohair@286 682
ohair@286 683 boolean savedLexicalPE = doLexicalPE;
ohair@286 684 boolean saveCommentText;
ohair@286 685
ohair@286 686 doLexicalPE = false;
ohair@286 687 saveCommentText = false;
ohair@286 688 if (saveCommentText)
ohair@286 689 strTmp = new StringBuffer();
ohair@286 690
ohair@286 691 oneComment:
ohair@286 692 for (; ;) {
ohair@286 693 try {
ohair@286 694 // bypass PE expansion, but permit PEs
ohair@286 695 // to complete ... valid docs won't care.
ohair@286 696 for (; ;) {
ohair@286 697 int c = getc();
ohair@286 698 if (c == '-') {
ohair@286 699 c = getc();
ohair@286 700 if (c != '-') {
ohair@286 701 if (saveCommentText)
ohair@286 702 strTmp.append('-');
ohair@286 703 ungetc();
ohair@286 704 continue;
ohair@286 705 }
ohair@286 706 nextChar('>', "F-022", null);
ohair@286 707 break oneComment;
ohair@286 708 }
ohair@286 709 if (saveCommentText)
ohair@286 710 strTmp.append((char) c);
ohair@286 711 }
ohair@286 712 } catch (EndOfInputException e) {
ohair@286 713 //
ohair@286 714 // This is fatal EXCEPT when we're processing a PE...
ohair@286 715 // in which case a validating processor reports an error.
ohair@286 716 // External PEs are easy to detect; internal ones we
ohair@286 717 // infer by being an internal entity outside an element.
ohair@286 718 //
ohair@286 719 if (in.isInternal()) {
ohair@286 720 error("V-021", null);
ohair@286 721 }
ohair@286 722 fatal("P-017");
ohair@286 723 }
ohair@286 724 }
ohair@286 725 doLexicalPE = savedLexicalPE;
ohair@286 726 if (saveCommentText)
ohair@286 727 dtdHandler.comment(strTmp.toString());
ohair@286 728 return true;
ohair@286 729 }
ohair@286 730
ohair@286 731 private boolean maybePI(boolean skipStart)
ohair@286 732 throws IOException, SAXException {
ohair@286 733
ohair@286 734 // [16] PI ::= '<?' PITarget
ohair@286 735 // (S (Char* - (Char* '?>' Char*)))?
ohair@286 736 // '?>'
ohair@286 737 // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l')
ohair@286 738 boolean savedLexicalPE = doLexicalPE;
ohair@286 739
ohair@286 740 if (!in.peek(skipStart ? "?" : "<?", null))
ohair@286 741 return false;
ohair@286 742 doLexicalPE = false;
ohair@286 743
ohair@286 744 String target = maybeGetName();
ohair@286 745
ohair@286 746 if (target == null) {
ohair@286 747 fatal("P-018");
ohair@286 748 }
ohair@286 749 if ("xml".equals(target)) {
ohair@286 750 fatal("P-019");
ohair@286 751 }
ohair@286 752 if ("xml".equalsIgnoreCase(target)) {
ohair@286 753 fatal("P-020", new Object[]{target});
ohair@286 754 }
ohair@286 755
ohair@286 756 if (maybeWhitespace()) {
ohair@286 757 strTmp = new StringBuffer();
ohair@286 758 try {
ohair@286 759 for (; ;) {
ohair@286 760 // use in.getc to bypass PE processing
ohair@286 761 char c = in.getc();
ohair@286 762 //Reached the end of PI.
ohair@286 763 if (c == '?' && in.peekc('>'))
ohair@286 764 break;
ohair@286 765 strTmp.append(c);
ohair@286 766 }
ohair@286 767 } catch (EndOfInputException e) {
ohair@286 768 fatal("P-021");
ohair@286 769 }
ohair@286 770 dtdHandler.processingInstruction(target, strTmp.toString());
ohair@286 771 } else {
ohair@286 772 if (!in.peek("?>", null)) {
ohair@286 773 fatal("P-022");
ohair@286 774 }
ohair@286 775 dtdHandler.processingInstruction(target, "");
ohair@286 776 }
ohair@286 777
ohair@286 778 doLexicalPE = savedLexicalPE;
ohair@286 779 return true;
ohair@286 780 }
ohair@286 781
ohair@286 782 // [18] CDSect ::= CDStart CData CDEnd
ohair@286 783 // [19] CDStart ::= '<![CDATA['
ohair@286 784 // [20] CData ::= (Char* - (Char* ']]>' Char*))
ohair@286 785 // [21] CDEnd ::= ']]>'
ohair@286 786 //
ohair@286 787 // ... handled by InputEntity.unparsedContent()
ohair@286 788
ohair@286 789 // collapsing several rules together ...
ohair@286 790 // simpler than attribute literals -- no reference parsing!
ohair@286 791 private String maybeReadAttribute(String name, boolean must)
ohair@286 792 throws IOException, SAXException {
ohair@286 793
ohair@286 794 // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\"
ohair@286 795 // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\"
ohair@286 796 // [32] SDDecl ::= S 'standalone' Eq \'|\" ... \'|\"
ohair@286 797 if (!maybeWhitespace()) {
ohair@286 798 if (!must) {
ohair@286 799 return null;
ohair@286 800 }
ohair@286 801 fatal("P-024", new Object[]{name});
ohair@286 802 // NOTREACHED
ohair@286 803 }
ohair@286 804
ohair@286 805 if (!peek(name)) {
ohair@286 806 if (must) {
ohair@286 807 fatal("P-024", new Object[]{name});
ohair@286 808 } else {
ohair@286 809 // To ensure that the whitespace is there so that when we
ohair@286 810 // check for the next attribute we assure that the
ohair@286 811 // whitespace still exists.
ohair@286 812 ungetc();
ohair@286 813 return null;
ohair@286 814 }
ohair@286 815 }
ohair@286 816
ohair@286 817 // [25] Eq ::= S? '=' S?
ohair@286 818 maybeWhitespace();
ohair@286 819 nextChar('=', "F-023", null);
ohair@286 820 maybeWhitespace();
ohair@286 821
ohair@286 822 return getQuotedString("F-035", name);
ohair@286 823 }
ohair@286 824
ohair@286 825 private void readVersion(boolean must, String versionNum)
ohair@286 826 throws IOException, SAXException {
ohair@286 827
ohair@286 828 String value = maybeReadAttribute("version", must);
ohair@286 829
ohair@286 830 // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+
ohair@286 831
ohair@286 832 if (must && value == null)
ohair@286 833 fatal("P-025", new Object[]{versionNum});
ohair@286 834 if (value != null) {
ohair@286 835 int length = value.length();
ohair@286 836 for (int i = 0; i < length; i++) {
ohair@286 837 char c = value.charAt(i);
ohair@286 838 if (!((c >= '0' && c <= '9')
ohair@286 839 || c == '_' || c == '.'
ohair@286 840 || (c >= 'a' && c <= 'z')
ohair@286 841 || (c >= 'A' && c <= 'Z')
ohair@286 842 || c == ':' || c == '-')
ohair@286 843 )
ohair@286 844 fatal("P-026", new Object[]{value});
ohair@286 845 }
ohair@286 846 }
ohair@286 847 if (value != null && !value.equals(versionNum))
ohair@286 848 error("P-027", new Object[]{versionNum, value});
ohair@286 849 }
ohair@286 850
ohair@286 851 // common code used by most markup declarations
ohair@286 852 // ... S (Q)Name ...
ohair@286 853 private String getMarkupDeclname(String roleId, boolean qname)
ohair@286 854 throws IOException, SAXException {
ohair@286 855
ohair@286 856 String name;
ohair@286 857
ohair@286 858 whitespace(roleId);
ohair@286 859 name = maybeGetName();
ohair@286 860 if (name == null)
ohair@286 861 fatal("P-005", new Object[]
ohair@286 862 {messages.getMessage(locale, roleId)});
ohair@286 863 return name;
ohair@286 864 }
ohair@286 865
ohair@286 866 private boolean maybeMarkupDecl()
ohair@286 867 throws IOException, SAXException {
ohair@286 868
ohair@286 869 // [29] markupdecl ::= elementdecl | Attlistdecl
ohair@286 870 // | EntityDecl | NotationDecl | PI | Comment
ohair@286 871 return maybeElementDecl()
ohair@286 872 || maybeAttlistDecl()
ohair@286 873 || maybeEntityDecl()
ohair@286 874 || maybeNotationDecl()
ohair@286 875 || maybePI(false)
ohair@286 876 || maybeComment(false);
ohair@286 877 }
ohair@286 878
ohair@286 879 private static final String XmlLang = "xml:lang";
ohair@286 880
ohair@286 881 private boolean isXmlLang(String value) {
ohair@286 882
ohair@286 883 // [33] LanguageId ::= Langcode ('-' Subcode)*
ohair@286 884 // [34] Langcode ::= ISO639Code | IanaCode | UserCode
ohair@286 885 // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z]
ohair@286 886 // [36] IanaCode ::= [iI] '-' SubCode
ohair@286 887 // [37] UserCode ::= [xX] '-' SubCode
ohair@286 888 // [38] SubCode ::= [a-zA-Z]+
ohair@286 889
ohair@286 890 // the ISO and IANA codes (and subcodes) are registered,
ohair@286 891 // but that's neither a WF nor a validity constraint.
ohair@286 892
ohair@286 893 int nextSuffix;
ohair@286 894 char c;
ohair@286 895
ohair@286 896 if (value.length() < 2)
ohair@286 897 return false;
ohair@286 898 c = value.charAt(1);
ohair@286 899 if (c == '-') { // IANA, or user, code
ohair@286 900 c = value.charAt(0);
ohair@286 901 if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X'))
ohair@286 902 return false;
ohair@286 903 nextSuffix = 1;
ohair@286 904 } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
ohair@286 905 // 2 letter ISO code, or error
ohair@286 906 c = value.charAt(0);
ohair@286 907 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
ohair@286 908 return false;
ohair@286 909 nextSuffix = 2;
ohair@286 910 } else
ohair@286 911 return false;
ohair@286 912
ohair@286 913 // here "suffix" ::= '-' [a-zA-Z]+ suffix*
ohair@286 914 while (nextSuffix < value.length()) {
ohair@286 915 c = value.charAt(nextSuffix);
ohair@286 916 if (c != '-')
ohair@286 917 break;
ohair@286 918 while (++nextSuffix < value.length()) {
ohair@286 919 c = value.charAt(nextSuffix);
ohair@286 920 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
ohair@286 921 break;
ohair@286 922 }
ohair@286 923 }
ohair@286 924 return value.length() == nextSuffix && c != '-';
ohair@286 925 }
ohair@286 926
ohair@286 927
ohair@286 928 //
ohair@286 929 // CHAPTER 3: Logical Structures
ohair@286 930 //
ohair@286 931
ohair@286 932 /**
ohair@286 933 * To validate, subclassers should at this time make sure that
ohair@286 934 * values are of the declared types:<UL>
ohair@286 935 * <LI> ID and IDREF(S) values are Names
ohair@286 936 * <LI> NMTOKEN(S) are Nmtokens
ohair@286 937 * <LI> ENUMERATION values match one of the tokens
ohair@286 938 * <LI> NOTATION values match a notation name
ohair@286 939 * <LI> ENTITIY(IES) values match an unparsed external entity
ohair@286 940 * </UL>
ohair@286 941 * <p/>
ohair@286 942 * <P> Separately, make sure IDREF values match some ID
ohair@286 943 * provided in the document (in the afterRoot method).
ohair@286 944 */
ohair@286 945 /* void validateAttributeSyntax (Attribute attr, String value)
ohair@286 946 throws DTDParseException {
ohair@286 947 // ID, IDREF(S) ... values are Names
ohair@286 948 if (Attribute.ID == attr.type()) {
ohair@286 949 if (!XmlNames.isName (value))
ohair@286 950 error ("V-025", new Object [] { value });
ohair@286 951
ohair@286 952 Boolean b = (Boolean) ids.getNonInterned (value);
ohair@286 953 if (b == null || b.equals (Boolean.FALSE))
ohair@286 954 ids.put (value.intern (), Boolean.TRUE);
ohair@286 955 else
ohair@286 956 error ("V-026", new Object [] { value });
ohair@286 957
ohair@286 958 } else if (Attribute.IDREF == attr.type()) {
ohair@286 959 if (!XmlNames.isName (value))
ohair@286 960 error ("V-027", new Object [] { value });
ohair@286 961
ohair@286 962 Boolean b = (Boolean) ids.getNonInterned (value);
ohair@286 963 if (b == null)
ohair@286 964 ids.put (value.intern (), Boolean.FALSE);
ohair@286 965
ohair@286 966 } else if (Attribute.IDREFS == attr.type()) {
ohair@286 967 StringTokenizer tokenizer = new StringTokenizer (value);
ohair@286 968 Boolean b;
ohair@286 969 boolean sawValue = false;
ohair@286 970
ohair@286 971 while (tokenizer.hasMoreTokens ()) {
ohair@286 972 value = tokenizer.nextToken ();
ohair@286 973 if (!XmlNames.isName (value))
ohair@286 974 error ("V-027", new Object [] { value });
ohair@286 975 b = (Boolean) ids.getNonInterned (value);
ohair@286 976 if (b == null)
ohair@286 977 ids.put (value.intern (), Boolean.FALSE);
ohair@286 978 sawValue = true;
ohair@286 979 }
ohair@286 980 if (!sawValue)
ohair@286 981 error ("V-039", null);
ohair@286 982
ohair@286 983
ohair@286 984 // NMTOKEN(S) ... values are Nmtoken(s)
ohair@286 985 } else if (Attribute.NMTOKEN == attr.type()) {
ohair@286 986 if (!XmlNames.isNmtoken (value))
ohair@286 987 error ("V-028", new Object [] { value });
ohair@286 988
ohair@286 989 } else if (Attribute.NMTOKENS == attr.type()) {
ohair@286 990 StringTokenizer tokenizer = new StringTokenizer (value);
ohair@286 991 boolean sawValue = false;
ohair@286 992
ohair@286 993 while (tokenizer.hasMoreTokens ()) {
ohair@286 994 value = tokenizer.nextToken ();
ohair@286 995 if (!XmlNames.isNmtoken (value))
ohair@286 996 error ("V-028", new Object [] { value });
ohair@286 997 sawValue = true;
ohair@286 998 }
ohair@286 999 if (!sawValue)
ohair@286 1000 error ("V-032", null);
ohair@286 1001
ohair@286 1002 // ENUMERATION ... values match one of the tokens
ohair@286 1003 } else if (Attribute.ENUMERATION == attr.type()) {
ohair@286 1004 for (int i = 0; i < attr.values().length; i++)
ohair@286 1005 if (value.equals (attr.values()[i]))
ohair@286 1006 return;
ohair@286 1007 error ("V-029", new Object [] { value });
ohair@286 1008
ohair@286 1009 // NOTATION values match a notation name
ohair@286 1010 } else if (Attribute.NOTATION == attr.type()) {
ohair@286 1011 //
ohair@286 1012 // XXX XML 1.0 spec should probably list references to
ohair@286 1013 // externally defined notations in standalone docs as
ohair@286 1014 // validity errors. Ditto externally defined unparsed
ohair@286 1015 // entities; neither should show up in attributes, else
ohair@286 1016 // one needs to read the external declarations in order
ohair@286 1017 // to make sense of the document (exactly what tagging
ohair@286 1018 // a doc as "standalone" intends you won't need to do).
ohair@286 1019 //
ohair@286 1020 for (int i = 0; i < attr.values().length; i++)
ohair@286 1021 if (value.equals (attr.values()[i]))
ohair@286 1022 return;
ohair@286 1023 error ("V-030", new Object [] { value });
ohair@286 1024
ohair@286 1025 // ENTITY(IES) values match an unparsed entity(ies)
ohair@286 1026 } else if (Attribute.ENTITY == attr.type()) {
ohair@286 1027 // see note above re standalone
ohair@286 1028 if (!isUnparsedEntity (value))
ohair@286 1029 error ("V-031", new Object [] { value });
ohair@286 1030
ohair@286 1031 } else if (Attribute.ENTITIES == attr.type()) {
ohair@286 1032 StringTokenizer tokenizer = new StringTokenizer (value);
ohair@286 1033 boolean sawValue = false;
ohair@286 1034
ohair@286 1035 while (tokenizer.hasMoreTokens ()) {
ohair@286 1036 value = tokenizer.nextToken ();
ohair@286 1037 // see note above re standalone
ohair@286 1038 if (!isUnparsedEntity (value))
ohair@286 1039 error ("V-031", new Object [] { value });
ohair@286 1040 sawValue = true;
ohair@286 1041 }
ohair@286 1042 if (!sawValue)
ohair@286 1043 error ("V-040", null);
ohair@286 1044
ohair@286 1045 } else if (Attribute.CDATA != attr.type())
ohair@286 1046 throw new InternalError (attr.type());
ohair@286 1047 }
ohair@286 1048 */
ohair@286 1049 /*
ohair@286 1050 private boolean isUnparsedEntity (String name)
ohair@286 1051 {
ohair@286 1052 Object e = entities.getNonInterned (name);
ohair@286 1053 if (e == null || !(e instanceof ExternalEntity))
ohair@286 1054 return false;
ohair@286 1055 return ((ExternalEntity)e).notation != null;
ohair@286 1056 }
ohair@286 1057 */
ohair@286 1058 private boolean maybeElementDecl()
ohair@286 1059 throws IOException, SAXException {
ohair@286 1060
ohair@286 1061 // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>'
ohair@286 1062 // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
ohair@286 1063 InputEntity start = peekDeclaration("!ELEMENT");
ohair@286 1064
ohair@286 1065 if (start == null)
ohair@286 1066 return false;
ohair@286 1067
ohair@286 1068 // n.b. for content models where inter-element whitespace is
ohair@286 1069 // ignorable, we mark that fact here.
ohair@286 1070 String name = getMarkupDeclname("F-015", true);
ohair@286 1071 // Element element = (Element) elements.get (name);
ohair@286 1072 // boolean declEffective = false;
ohair@286 1073
ohair@286 1074 /*
ohair@286 1075 if (element != null) {
ohair@286 1076 if (element.contentModel() != null) {
ohair@286 1077 error ("V-012", new Object [] { name });
ohair@286 1078 } // else <!ATTLIST name ...> came first
ohair@286 1079 } else {
ohair@286 1080 element = new Element(name);
ohair@286 1081 elements.put (element.name(), element);
ohair@286 1082 declEffective = true;
ohair@286 1083 }
ohair@286 1084 */
ohair@286 1085 if (declaredElements.contains(name))
ohair@286 1086 error("V-012", new Object[]{name});
ohair@286 1087 else {
ohair@286 1088 declaredElements.add(name);
ohair@286 1089 // declEffective = true;
ohair@286 1090 }
ohair@286 1091
ohair@286 1092 short modelType;
ohair@286 1093 whitespace("F-000");
ohair@286 1094 if (peek(strEMPTY)) {
ohair@286 1095 /// // leave element.contentModel as null for this case.
ohair@286 1096 dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY);
ohair@286 1097 } else if (peek(strANY)) {
ohair@286 1098 /// element.setContentModel(new StringModel(StringModelType.ANY));
ohair@286 1099 dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY);
ohair@286 1100 } else {
ohair@286 1101 modelType = getMixedOrChildren(name);
ohair@286 1102 }
ohair@286 1103
ohair@286 1104 dtdHandler.endContentModel(name, modelType);
ohair@286 1105
ohair@286 1106 maybeWhitespace();
ohair@286 1107 char c = getc();
ohair@286 1108 if (c != '>')
ohair@286 1109 fatal("P-036", new Object[]{name, new Character(c)});
ohair@286 1110 if (start != in)
ohair@286 1111 error("V-013", null);
ohair@286 1112
ohair@286 1113 /// dtdHandler.elementDecl(element);
ohair@286 1114
ohair@286 1115 return true;
ohair@286 1116 }
ohair@286 1117
ohair@286 1118 // We're leaving the content model as a regular expression;
ohair@286 1119 // it's an efficient natural way to express such things, and
ohair@286 1120 // libraries often interpret them. No whitespace in the
ohair@286 1121 // model we store, though!
ohair@286 1122
ohair@286 1123 /**
ohair@286 1124 * returns content model type.
ohair@286 1125 */
ohair@286 1126 private short getMixedOrChildren(String elementName/*Element element*/)
ohair@286 1127 throws IOException, SAXException {
ohair@286 1128
ohair@286 1129 InputEntity start;
ohair@286 1130
ohair@286 1131 // [47] children ::= (choice|seq) ('?'|'*'|'+')?
ohair@286 1132 strTmp = new StringBuffer();
ohair@286 1133
ohair@286 1134 nextChar('(', "F-028", elementName);
ohair@286 1135 start = in;
ohair@286 1136 maybeWhitespace();
ohair@286 1137 strTmp.append('(');
ohair@286 1138
ohair@286 1139 short modelType;
ohair@286 1140 if (peek("#PCDATA")) {
ohair@286 1141 strTmp.append("#PCDATA");
ohair@286 1142 dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED);
ohair@286 1143 getMixed(elementName, start);
ohair@286 1144 } else {
ohair@286 1145 dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_CHILDREN);
ohair@286 1146 getcps(elementName, start);
ohair@286 1147 }
ohair@286 1148
ohair@286 1149 return modelType;
ohair@286 1150 }
ohair@286 1151
ohair@286 1152 // '(' S? already consumed
ohair@286 1153 // matching ')' must be in "start" entity if validating
ohair@286 1154 private void getcps(/*Element element,*/String elementName, InputEntity start)
ohair@286 1155 throws IOException, SAXException {
ohair@286 1156
ohair@286 1157 // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')?
ohair@286 1158 // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')'
ohair@286 1159 // [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')'
ohair@286 1160 boolean decided = false;
ohair@286 1161 char type = 0;
ohair@286 1162 // ContentModel retval, temp, current;
ohair@286 1163
ohair@286 1164 // retval = temp = current = null;
ohair@286 1165
ohair@286 1166 dtdHandler.startModelGroup();
ohair@286 1167
ohair@286 1168 do {
ohair@286 1169 String tag;
ohair@286 1170
ohair@286 1171 tag = maybeGetName();
ohair@286 1172 if (tag != null) {
ohair@286 1173 strTmp.append(tag);
ohair@286 1174 // temp = new ElementModel(tag);
ohair@286 1175 // getFrequency((RepeatableContent)temp);
ohair@286 1176 ///->
ohair@286 1177 dtdHandler.childElement(tag, getFrequency());
ohair@286 1178 ///<-
ohair@286 1179 } else if (peek("(")) {
ohair@286 1180 InputEntity next = in;
ohair@286 1181 strTmp.append('(');
ohair@286 1182 maybeWhitespace();
ohair@286 1183 // temp = getcps(element, next);
ohair@286 1184 // getFrequency(temp);
ohair@286 1185 ///->
ohair@286 1186 getcps(elementName, next);
ohair@286 1187 /// getFrequency(); <- this looks like a bug
ohair@286 1188 ///<-
ohair@286 1189 } else
ohair@286 1190 fatal((type == 0) ? "P-039" :
ohair@286 1191 ((type == ',') ? "P-037" : "P-038"),
ohair@286 1192 new Object[]{new Character(getc())});
ohair@286 1193
ohair@286 1194 maybeWhitespace();
ohair@286 1195 if (decided) {
ohair@286 1196 char c = getc();
ohair@286 1197
ohair@286 1198 // if (current != null) {
ohair@286 1199 // current.addChild(temp);
ohair@286 1200 // }
ohair@286 1201 if (c == type) {
ohair@286 1202 strTmp.append(type);
ohair@286 1203 maybeWhitespace();
ohair@286 1204 reportConnector(type);
ohair@286 1205 continue;
ohair@286 1206 } else if (c == '\u0029') { // rparen
ohair@286 1207 ungetc();
ohair@286 1208 continue;
ohair@286 1209 } else {
ohair@286 1210 fatal((type == 0) ? "P-041" : "P-040",
ohair@286 1211 new Object[]{
ohair@286 1212 new Character(c),
ohair@286 1213 new Character(type)
ohair@286 1214 });
ohair@286 1215 }
ohair@286 1216 } else {
ohair@286 1217 type = getc();
ohair@286 1218 switch (type) {
ohair@286 1219 case '|':
ohair@286 1220 case ',':
ohair@286 1221 reportConnector(type);
ohair@286 1222 break;
ohair@286 1223 default:
ohair@286 1224 // retval = temp;
ohair@286 1225 ungetc();
ohair@286 1226 continue;
ohair@286 1227 }
ohair@286 1228 // retval = (ContentModel)current;
ohair@286 1229 decided = true;
ohair@286 1230 // current.addChild(temp);
ohair@286 1231 strTmp.append(type);
ohair@286 1232 }
ohair@286 1233 maybeWhitespace();
ohair@286 1234 } while (!peek(")"));
ohair@286 1235
ohair@286 1236 if (in != start)
ohair@286 1237 error("V-014", new Object[]{elementName});
ohair@286 1238 strTmp.append(')');
ohair@286 1239
ohair@286 1240 dtdHandler.endModelGroup(getFrequency());
ohair@286 1241 // return retval;
ohair@286 1242 }
ohair@286 1243
ohair@286 1244 private void reportConnector(char type) throws SAXException {
ohair@286 1245 switch (type) {
ohair@286 1246 case '|':
ohair@286 1247 dtdHandler.connector(DTDEventListener.CHOICE); ///<-
ohair@286 1248 return;
ohair@286 1249 case ',':
ohair@286 1250 dtdHandler.connector(DTDEventListener.SEQUENCE); ///<-
ohair@286 1251 return;
ohair@286 1252 default:
ohair@286 1253 throw new Error(); //assertion failed.
ohair@286 1254 }
ohair@286 1255 }
ohair@286 1256
ohair@286 1257 private short getFrequency()
ohair@286 1258 throws IOException, SAXException {
ohair@286 1259
ohair@286 1260 final char c = getc();
ohair@286 1261
ohair@286 1262 if (c == '?') {
ohair@286 1263 strTmp.append(c);
ohair@286 1264 return DTDEventListener.OCCURENCE_ZERO_OR_ONE;
ohair@286 1265 // original.setRepeat(Repeat.ZERO_OR_ONE);
ohair@286 1266 } else if (c == '+') {
ohair@286 1267 strTmp.append(c);
ohair@286 1268 return DTDEventListener.OCCURENCE_ONE_OR_MORE;
ohair@286 1269 // original.setRepeat(Repeat.ONE_OR_MORE);
ohair@286 1270 } else if (c == '*') {
ohair@286 1271 strTmp.append(c);
ohair@286 1272 return DTDEventListener.OCCURENCE_ZERO_OR_MORE;
ohair@286 1273 // original.setRepeat(Repeat.ZERO_OR_MORE);
ohair@286 1274 } else {
ohair@286 1275 ungetc();
ohair@286 1276 return DTDEventListener.OCCURENCE_ONCE;
ohair@286 1277 }
ohair@286 1278 }
ohair@286 1279
ohair@286 1280 // '(' S? '#PCDATA' already consumed
ohair@286 1281 // matching ')' must be in "start" entity if validating
ohair@286 1282 private void getMixed(String elementName, /*Element element,*/ InputEntity start)
ohair@286 1283 throws IOException, SAXException {
ohair@286 1284
ohair@286 1285 // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
ohair@286 1286 // | '(' S? '#PCDATA' S? ')'
ohair@286 1287 maybeWhitespace();
ohair@286 1288 if (peek("\u0029*") || peek("\u0029")) {
ohair@286 1289 if (in != start)
ohair@286 1290 error("V-014", new Object[]{elementName});
ohair@286 1291 strTmp.append(')');
ohair@286 1292 // element.setContentModel(new StringModel(StringModelType.PCDATA));
ohair@286 1293 return;
ohair@286 1294 }
ohair@286 1295
ohair@286 1296 ArrayList l = new ArrayList();
ohair@286 1297 // l.add(new StringModel(StringModelType.PCDATA));
ohair@286 1298
ohair@286 1299
ohair@286 1300 while (peek("|")) {
ohair@286 1301 String name;
ohair@286 1302
ohair@286 1303 strTmp.append('|');
ohair@286 1304 maybeWhitespace();
ohair@286 1305
ohair@286 1306 doLexicalPE = true;
ohair@286 1307 name = maybeGetName();
ohair@286 1308 if (name == null)
ohair@286 1309 fatal("P-042", new Object[]
ohair@286 1310 {elementName, Integer.toHexString(getc())});
ohair@286 1311 if (l.contains(name)) {
ohair@286 1312 error("V-015", new Object[]{name});
ohair@286 1313 } else {
ohair@286 1314 l.add(name);
ohair@286 1315 dtdHandler.mixedElement(name);
ohair@286 1316 }
ohair@286 1317 strTmp.append(name);
ohair@286 1318 maybeWhitespace();
ohair@286 1319 }
ohair@286 1320
ohair@286 1321 if (!peek("\u0029*")) // right paren
ohair@286 1322 fatal("P-043", new Object[]
ohair@286 1323 {elementName, new Character(getc())});
ohair@286 1324 if (in != start)
ohair@286 1325 error("V-014", new Object[]{elementName});
ohair@286 1326 strTmp.append(')');
ohair@286 1327 // ChoiceModel cm = new ChoiceModel((Collection)l);
ohair@286 1328 // cm.setRepeat(Repeat.ZERO_OR_MORE);
ohair@286 1329 // element.setContentModel(cm);
ohair@286 1330 }
ohair@286 1331
ohair@286 1332 private boolean maybeAttlistDecl()
ohair@286 1333 throws IOException, SAXException {
ohair@286 1334
ohair@286 1335 // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
ohair@286 1336 InputEntity start = peekDeclaration("!ATTLIST");
ohair@286 1337
ohair@286 1338 if (start == null)
ohair@286 1339 return false;
ohair@286 1340
ohair@286 1341 String elementName = getMarkupDeclname("F-016", true);
ohair@286 1342 // Element element = (Element) elements.get (name);
ohair@286 1343
ohair@286 1344 // if (element == null) {
ohair@286 1345 // // not yet declared -- no problem.
ohair@286 1346 // element = new Element(name);
ohair@286 1347 // elements.put(name, element);
ohair@286 1348 // }
ohair@286 1349
ohair@286 1350 while (!peek(">")) {
ohair@286 1351
ohair@286 1352 // [53] AttDef ::= S Name S AttType S DefaultDecl
ohair@286 1353 // [54] AttType ::= StringType | TokenizedType | EnumeratedType
ohair@286 1354
ohair@286 1355 // look for global attribute definitions, don't expand for now...
ohair@286 1356 maybeWhitespace();
ohair@286 1357 char c = getc();
ohair@286 1358 if (c == '%') {
ohair@286 1359 String entityName = maybeGetName();
ohair@286 1360 if (entityName != null) {
ohair@286 1361 nextChar(';', "F-021", entityName);
ohair@286 1362 whitespace("F-021");
ohair@286 1363 continue;
ohair@286 1364 } else
ohair@286 1365 fatal("P-011");
ohair@286 1366 }
ohair@286 1367
ohair@286 1368 ungetc();
ohair@286 1369 // look for attribute name otherwise
ohair@286 1370 String attName = maybeGetName();
ohair@286 1371 if (attName == null) {
ohair@286 1372 fatal("P-044", new Object[]{new Character(getc())});
ohair@286 1373 }
ohair@286 1374 whitespace("F-001");
ohair@286 1375
ohair@286 1376 /// Attribute a = new Attribute (name);
ohair@286 1377
ohair@286 1378 String typeName;
ohair@286 1379 Vector values = null; // notation/enumeration values
ohair@286 1380
ohair@286 1381 // Note: use the type constants from Attribute
ohair@286 1382 // so that "==" may be used (faster)
ohair@286 1383
ohair@286 1384 // [55] StringType ::= 'CDATA'
ohair@286 1385 if (peek(TYPE_CDATA))
ohair@286 1386 /// a.setType(Attribute.CDATA);
ohair@286 1387 typeName = TYPE_CDATA;
ohair@286 1388
ohair@286 1389 // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS'
ohair@286 1390 // | 'ENTITY' | 'ENTITIES'
ohair@286 1391 // | 'NMTOKEN' | 'NMTOKENS'
ohair@286 1392 // n.b. if "IDREFS" is there, both "ID" and "IDREF"
ohair@286 1393 // match peekahead ... so this order matters!
ohair@286 1394 else if (peek(TYPE_IDREFS))
ohair@286 1395 typeName = TYPE_IDREFS;
ohair@286 1396 else if (peek(TYPE_IDREF))
ohair@286 1397 typeName = TYPE_IDREF;
ohair@286 1398 else if (peek(TYPE_ID)) {
ohair@286 1399 typeName = TYPE_ID;
ohair@286 1400 // TODO: should implement this error check?
ohair@286 1401 /// if (element.id() != null) {
ohair@286 1402 /// error ("V-016", new Object [] { element.id() });
ohair@286 1403 /// } else
ohair@286 1404 /// element.setId(name);
ohair@286 1405 } else if (peek(TYPE_ENTITY))
ohair@286 1406 typeName = TYPE_ENTITY;
ohair@286 1407 else if (peek(TYPE_ENTITIES))
ohair@286 1408 typeName = TYPE_ENTITIES;
ohair@286 1409 else if (peek(TYPE_NMTOKENS))
ohair@286 1410 typeName = TYPE_NMTOKENS;
ohair@286 1411 else if (peek(TYPE_NMTOKEN))
ohair@286 1412 typeName = TYPE_NMTOKEN;
ohair@286 1413
ohair@286 1414 // [57] EnumeratedType ::= NotationType | Enumeration
ohair@286 1415 // [58] NotationType ::= 'NOTATION' S '(' S? Name
ohair@286 1416 // (S? '|' S? Name)* S? ')'
ohair@286 1417 else if (peek(TYPE_NOTATION)) {
ohair@286 1418 typeName = TYPE_NOTATION;
ohair@286 1419 whitespace("F-002");
ohair@286 1420 nextChar('(', "F-029", null);
ohair@286 1421 maybeWhitespace();
ohair@286 1422
ohair@286 1423 values = new Vector();
ohair@286 1424 do {
ohair@286 1425 String name;
ohair@286 1426 if ((name = maybeGetName()) == null)
ohair@286 1427 fatal("P-068");
ohair@286 1428 // permit deferred declarations
ohair@286 1429 if (notations.get(name) == null)
ohair@286 1430 notations.put(name, name);
ohair@286 1431 values.addElement(name);
ohair@286 1432 maybeWhitespace();
ohair@286 1433 if (peek("|"))
ohair@286 1434 maybeWhitespace();
ohair@286 1435 } while (!peek(")"));
ohair@286 1436 /// a.setValues(new String [v.size ()]);
ohair@286 1437 /// for (int i = 0; i < v.size (); i++)
ohair@286 1438 /// a.setValue(i, (String)v.elementAt(i));
ohair@286 1439
ohair@286 1440 // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')'
ohair@286 1441 } else if (peek("(")) {
ohair@286 1442 /// a.setType(Attribute.ENUMERATION);
ohair@286 1443 typeName = TYPE_ENUMERATION;
ohair@286 1444
ohair@286 1445 maybeWhitespace();
ohair@286 1446
ohair@286 1447 /// Vector v = new Vector ();
ohair@286 1448 values = new Vector();
ohair@286 1449 do {
ohair@286 1450 String name = getNmtoken();
ohair@286 1451 /// v.addElement (name);
ohair@286 1452 values.addElement(name);
ohair@286 1453 maybeWhitespace();
ohair@286 1454 if (peek("|"))
ohair@286 1455 maybeWhitespace();
ohair@286 1456 } while (!peek(")"));
ohair@286 1457 /// a.setValues(new String [v.size ()]);
ohair@286 1458 /// for (int i = 0; i < v.size (); i++)
ohair@286 1459 /// a.setValue(i, (String)v.elementAt(i));
ohair@286 1460 } else {
ohair@286 1461 fatal("P-045",
ohair@286 1462 new Object[]{attName, new Character(getc())});
ohair@286 1463 typeName = null;
ohair@286 1464 }
ohair@286 1465
ohair@286 1466 short attributeUse;
ohair@286 1467 String defaultValue = null;
ohair@286 1468
ohair@286 1469 // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
ohair@286 1470 // | (('#FIXED' S)? AttValue)
ohair@286 1471 whitespace("F-003");
ohair@286 1472 if (peek("#REQUIRED"))
ohair@286 1473 attributeUse = DTDEventListener.USE_REQUIRED;
ohair@286 1474 /// a.setIsRequired(true);
ohair@286 1475 else if (peek("#FIXED")) {
ohair@286 1476 /// if (a.type() == Attribute.ID)
ohair@286 1477 if (typeName == TYPE_ID)
ohair@286 1478 error("V-017", new Object[]{attName});
ohair@286 1479 /// a.setIsFixed(true);
ohair@286 1480 attributeUse = DTDEventListener.USE_FIXED;
ohair@286 1481 whitespace("F-004");
ohair@286 1482 parseLiteral(false);
ohair@286 1483 /// if (a.type() != Attribute.CDATA)
ohair@286 1484 /// a.setDefaultValue(normalize(false));
ohair@286 1485 /// else
ohair@286 1486 /// a.setDefaultValue(strTmp.toString());
ohair@286 1487
ohair@286 1488 if (typeName == TYPE_CDATA)
ohair@286 1489 defaultValue = normalize(false);
ohair@286 1490 else
ohair@286 1491 defaultValue = strTmp.toString();
ohair@286 1492
ohair@286 1493 // TODO: implement this check
ohair@286 1494 /// if (a.type() != Attribute.CDATA)
ohair@286 1495 /// validateAttributeSyntax (a, a.defaultValue());
ohair@286 1496 } else if (!peek("#IMPLIED")) {
ohair@286 1497 attributeUse = DTDEventListener.USE_IMPLIED;
ohair@286 1498
ohair@286 1499 /// if (a.type() == Attribute.ID)
ohair@286 1500 if (typeName == TYPE_ID)
ohair@286 1501 error("V-018", new Object[]{attName});
ohair@286 1502 parseLiteral(false);
ohair@286 1503 /// if (a.type() != Attribute.CDATA)
ohair@286 1504 /// a.setDefaultValue(normalize(false));
ohair@286 1505 /// else
ohair@286 1506 /// a.setDefaultValue(strTmp.toString());
ohair@286 1507 if (typeName == TYPE_CDATA)
ohair@286 1508 defaultValue = normalize(false);
ohair@286 1509 else
ohair@286 1510 defaultValue = strTmp.toString();
ohair@286 1511
ohair@286 1512 // TODO: implement this check
ohair@286 1513 /// if (a.type() != Attribute.CDATA)
ohair@286 1514 /// validateAttributeSyntax (a, a.defaultValue());
ohair@286 1515 } else {
ohair@286 1516 // TODO: this looks like an fatal error.
ohair@286 1517 attributeUse = DTDEventListener.USE_NORMAL;
ohair@286 1518 }
ohair@286 1519
ohair@286 1520 if (XmlLang.equals(attName)
ohair@286 1521 && defaultValue/* a.defaultValue()*/ != null
ohair@286 1522 && !isXmlLang(defaultValue/*a.defaultValue()*/))
ohair@286 1523 error("P-033", new Object[]{defaultValue /*a.defaultValue()*/});
ohair@286 1524
ohair@286 1525 // TODO: isn't it an error to specify the same attribute twice?
ohair@286 1526 /// if (!element.attributes().contains(a)) {
ohair@286 1527 /// element.addAttribute(a);
ohair@286 1528 /// dtdHandler.attributeDecl(a);
ohair@286 1529 /// }
ohair@286 1530
ohair@286 1531 String[] v = (values != null) ? (String[]) values.toArray(new String[0]) : null;
ohair@286 1532 dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue);
ohair@286 1533 maybeWhitespace();
ohair@286 1534 }
ohair@286 1535 if (start != in)
ohair@286 1536 error("V-013", null);
ohair@286 1537 return true;
ohair@286 1538 }
ohair@286 1539
ohair@286 1540 // used when parsing literal attribute values,
ohair@286 1541 // or public identifiers.
ohair@286 1542 //
ohair@286 1543 // input in strTmp
ohair@286 1544 private String normalize(boolean invalidIfNeeded) {
ohair@286 1545
ohair@286 1546 // this can allocate an extra string...
ohair@286 1547
ohair@286 1548 String s = strTmp.toString();
ohair@286 1549 String s2 = s.trim();
ohair@286 1550 boolean didStrip = false;
ohair@286 1551
ohair@286 1552 if (s != s2) {
ohair@286 1553 s = s2;
ohair@286 1554 s2 = null;
ohair@286 1555 didStrip = true;
ohair@286 1556 }
ohair@286 1557 strTmp = new StringBuffer();
ohair@286 1558 for (int i = 0; i < s.length(); i++) {
ohair@286 1559 char c = s.charAt(i);
ohair@286 1560 if (!XmlChars.isSpace(c)) {
ohair@286 1561 strTmp.append(c);
ohair@286 1562 continue;
ohair@286 1563 }
ohair@286 1564 strTmp.append(' ');
ohair@286 1565 while (++i < s.length() && XmlChars.isSpace(s.charAt(i)))
ohair@286 1566 didStrip = true;
ohair@286 1567 i--;
ohair@286 1568 }
ohair@286 1569 if (didStrip)
ohair@286 1570 return strTmp.toString();
ohair@286 1571 else
ohair@286 1572 return s;
ohair@286 1573 }
ohair@286 1574
ohair@286 1575 private boolean maybeConditionalSect()
ohair@286 1576 throws IOException, SAXException {
ohair@286 1577
ohair@286 1578 // [61] conditionalSect ::= includeSect | ignoreSect
ohair@286 1579
ohair@286 1580 if (!peek("<!["))
ohair@286 1581 return false;
ohair@286 1582
ohair@286 1583 String keyword;
ohair@286 1584 InputEntity start = in;
ohair@286 1585
ohair@286 1586 maybeWhitespace();
ohair@286 1587
ohair@286 1588 if ((keyword = maybeGetName()) == null)
ohair@286 1589 fatal("P-046");
ohair@286 1590 maybeWhitespace();
ohair@286 1591 nextChar('[', "F-030", null);
ohair@286 1592
ohair@286 1593 // [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
ohair@286 1594 // extSubsetDecl ']]>'
ohair@286 1595 if ("INCLUDE".equals(keyword)) {
ohair@286 1596 for (; ;) {
ohair@286 1597 while (in.isEOF() && in != start)
ohair@286 1598 in = in.pop();
ohair@286 1599 if (in.isEOF()) {
ohair@286 1600 error("V-020", null);
ohair@286 1601 }
ohair@286 1602 if (peek("]]>"))
ohair@286 1603 break;
ohair@286 1604
ohair@286 1605 doLexicalPE = false;
ohair@286 1606 if (maybeWhitespace())
ohair@286 1607 continue;
ohair@286 1608 if (maybePEReference())
ohair@286 1609 continue;
ohair@286 1610 doLexicalPE = true;
ohair@286 1611 if (maybeMarkupDecl() || maybeConditionalSect())
ohair@286 1612 continue;
ohair@286 1613
ohair@286 1614 fatal("P-047");
ohair@286 1615 }
ohair@286 1616
ohair@286 1617 // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
ohair@286 1618 // ignoreSectcontents ']]>'
ohair@286 1619 // [64] ignoreSectcontents ::= Ignore ('<!['
ohair@286 1620 // ignoreSectcontents ']]>' Ignore)*
ohair@286 1621 // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
ohair@286 1622 } else if ("IGNORE".equals(keyword)) {
ohair@286 1623 int nestlevel = 1;
ohair@286 1624 // ignoreSectcontents
ohair@286 1625 doLexicalPE = false;
ohair@286 1626 while (nestlevel > 0) {
ohair@286 1627 char c = getc(); // will pop input entities
ohair@286 1628 if (c == '<') {
ohair@286 1629 if (peek("!["))
ohair@286 1630 nestlevel++;
ohair@286 1631 } else if (c == ']') {
ohair@286 1632 if (peek("]>"))
ohair@286 1633 nestlevel--;
ohair@286 1634 } else
ohair@286 1635 continue;
ohair@286 1636 }
ohair@286 1637 } else
ohair@286 1638 fatal("P-048", new Object[]{keyword});
ohair@286 1639 return true;
ohair@286 1640 }
ohair@286 1641
ohair@286 1642
ohair@286 1643 //
ohair@286 1644 // CHAPTER 4: Physical Structures
ohair@286 1645 //
ohair@286 1646
ohair@286 1647 // parse decimal or hex numeric character reference
ohair@286 1648 private int parseCharNumber()
ohair@286 1649 throws IOException, SAXException {
ohair@286 1650
ohair@286 1651 char c;
ohair@286 1652 int retval = 0;
ohair@286 1653
ohair@286 1654 // n.b. we ignore overflow ...
ohair@286 1655 if (getc() != 'x') {
ohair@286 1656 ungetc();
ohair@286 1657 for (; ;) {
ohair@286 1658 c = getc();
ohair@286 1659 if (c >= '0' && c <= '9') {
ohair@286 1660 retval *= 10;
ohair@286 1661 retval += (c - '0');
ohair@286 1662 continue;
ohair@286 1663 }
ohair@286 1664 if (c == ';')
ohair@286 1665 return retval;
ohair@286 1666 fatal("P-049");
ohair@286 1667 }
ohair@286 1668 } else
ohair@286 1669 for (; ;) {
ohair@286 1670 c = getc();
ohair@286 1671 if (c >= '0' && c <= '9') {
ohair@286 1672 retval <<= 4;
ohair@286 1673 retval += (c - '0');
ohair@286 1674 continue;
ohair@286 1675 }
ohair@286 1676 if (c >= 'a' && c <= 'f') {
ohair@286 1677 retval <<= 4;
ohair@286 1678 retval += 10 + (c - 'a');
ohair@286 1679 continue;
ohair@286 1680 }
ohair@286 1681 if (c >= 'A' && c <= 'F') {
ohair@286 1682 retval <<= 4;
ohair@286 1683 retval += 10 + (c - 'A');
ohair@286 1684 continue;
ohair@286 1685 }
ohair@286 1686 if (c == ';')
ohair@286 1687 return retval;
ohair@286 1688 fatal("P-050");
ohair@286 1689 }
ohair@286 1690 }
ohair@286 1691
ohair@286 1692 // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE,
ohair@286 1693 // though still subject to the 'Char' construct in XML
ohair@286 1694 private int surrogatesToCharTmp(int ucs4)
ohair@286 1695 throws SAXException {
ohair@286 1696
ohair@286 1697 if (ucs4 <= 0xffff) {
ohair@286 1698 if (XmlChars.isChar(ucs4)) {
ohair@286 1699 charTmp[0] = (char) ucs4;
ohair@286 1700 return 1;
ohair@286 1701 }
ohair@286 1702 } else if (ucs4 <= 0x0010ffff) {
ohair@286 1703 // we represent these as UNICODE surrogate pairs
ohair@286 1704 ucs4 -= 0x10000;
ohair@286 1705 charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff));
ohair@286 1706 charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff));
ohair@286 1707 return 2;
ohair@286 1708 }
ohair@286 1709 fatal("P-051", new Object[]{Integer.toHexString(ucs4)});
ohair@286 1710 // NOTREACHED
ohair@286 1711 return -1;
ohair@286 1712 }
ohair@286 1713
ohair@286 1714 private boolean maybePEReference()
ohair@286 1715 throws IOException, SAXException {
ohair@286 1716
ohair@286 1717 // This is the SYNTACTIC version of this construct.
ohair@286 1718 // When processing external entities, there is also
ohair@286 1719 // a LEXICAL version; see getc() and doLexicalPE.
ohair@286 1720
ohair@286 1721 // [69] PEReference ::= '%' Name ';'
ohair@286 1722 if (!in.peekc('%'))
ohair@286 1723 return false;
ohair@286 1724
ohair@286 1725 String name = maybeGetName();
ohair@286 1726 Object entity;
ohair@286 1727
ohair@286 1728 if (name == null)
ohair@286 1729 fatal("P-011");
ohair@286 1730 nextChar(';', "F-021", name);
ohair@286 1731 entity = params.get(name);
ohair@286 1732
ohair@286 1733 if (entity instanceof InternalEntity) {
ohair@286 1734 InternalEntity value = (InternalEntity) entity;
ohair@286 1735 pushReader(value.buf, name, false);
ohair@286 1736
ohair@286 1737 } else if (entity instanceof ExternalEntity) {
ohair@286 1738 pushReader((ExternalEntity) entity);
ohair@286 1739 externalParameterEntity((ExternalEntity) entity);
ohair@286 1740
ohair@286 1741 } else if (entity == null) {
ohair@286 1742 error("V-022", new Object[]{name});
ohair@286 1743 }
ohair@286 1744 return true;
ohair@286 1745 }
ohair@286 1746
ohair@286 1747 private boolean maybeEntityDecl()
ohair@286 1748 throws IOException, SAXException {
ohair@286 1749
ohair@286 1750 // [70] EntityDecl ::= GEDecl | PEDecl
ohair@286 1751 // [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
ohair@286 1752 // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF S? '>'
ohair@286 1753 // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
ohair@286 1754 // [74] PEDef ::= EntityValue | ExternalID
ohair@286 1755 //
ohair@286 1756 InputEntity start = peekDeclaration("!ENTITY");
ohair@286 1757
ohair@286 1758 if (start == null)
ohair@286 1759 return false;
ohair@286 1760
ohair@286 1761 String entityName;
ohair@286 1762 SimpleHashtable defns;
ohair@286 1763 ExternalEntity externalId;
ohair@286 1764 boolean doStore;
ohair@286 1765
ohair@286 1766 // PE expansion gets selectively turned off several places:
ohair@286 1767 // in ENTITY declarations (here), in comments, in PIs.
ohair@286 1768
ohair@286 1769 // Here, we allow PE entities to be declared, and allows
ohair@286 1770 // literals to include PE refs without the added spaces
ohair@286 1771 // required with their expansion in markup decls.
ohair@286 1772
ohair@286 1773 doLexicalPE = false;
ohair@286 1774 whitespace("F-005");
ohair@286 1775 if (in.peekc('%')) {
ohair@286 1776 whitespace("F-006");
ohair@286 1777 defns = params;
ohair@286 1778 } else
ohair@286 1779 defns = entities;
ohair@286 1780
ohair@286 1781 ungetc(); // leave some whitespace
ohair@286 1782 doLexicalPE = true;
ohair@286 1783 entityName = getMarkupDeclname("F-017", false);
ohair@286 1784 whitespace("F-007");
ohair@286 1785 externalId = maybeExternalID();
ohair@286 1786
ohair@286 1787 //
ohair@286 1788 // first definition sticks ... e.g. internal subset PEs are used
ohair@286 1789 // to override DTD defaults. It's also an "error" to incorrectly
ohair@286 1790 // redefine builtin internal entities, but since reporting such
ohair@286 1791 // errors is optional we only give warnings ("just in case") for
ohair@286 1792 // non-parameter entities.
ohair@286 1793 //
ohair@286 1794 doStore = (defns.get(entityName) == null);
ohair@286 1795 if (!doStore && defns == entities)
ohair@286 1796 warning("P-054", new Object[]{entityName});
ohair@286 1797
ohair@286 1798 // internal entities
ohair@286 1799 if (externalId == null) {
ohair@286 1800 char value [];
ohair@286 1801 InternalEntity entity;
ohair@286 1802
ohair@286 1803 doLexicalPE = false; // "ab%bar;cd" -maybe-> "abcd"
ohair@286 1804 parseLiteral(true);
ohair@286 1805 doLexicalPE = true;
ohair@286 1806 if (doStore) {
ohair@286 1807 value = new char[strTmp.length()];
ohair@286 1808 if (value.length != 0)
ohair@286 1809 strTmp.getChars(0, value.length, value, 0);
ohair@286 1810 entity = new InternalEntity(entityName, value);
ohair@286 1811 entity.isPE = (defns == params);
ohair@286 1812 entity.isFromInternalSubset = false;
ohair@286 1813 defns.put(entityName, entity);
ohair@286 1814 if (defns == entities)
ohair@286 1815 dtdHandler.internalGeneralEntityDecl(entityName,
ohair@286 1816 new String(value));
ohair@286 1817 }
ohair@286 1818
ohair@286 1819 // external entities (including unparsed)
ohair@286 1820 } else {
ohair@286 1821 // [76] NDataDecl ::= S 'NDATA' S Name
ohair@286 1822 if (defns == entities && maybeWhitespace()
ohair@286 1823 && peek("NDATA")) {
ohair@286 1824 externalId.notation = getMarkupDeclname("F-018", false);
ohair@286 1825
ohair@286 1826 // flag undeclared notation for checking after
ohair@286 1827 // the DTD is fully processed
ohair@286 1828 if (notations.get(externalId.notation) == null)
ohair@286 1829 notations.put(externalId.notation, Boolean.TRUE);
ohair@286 1830 }
ohair@286 1831 externalId.name = entityName;
ohair@286 1832 externalId.isPE = (defns == params);
ohair@286 1833 externalId.isFromInternalSubset = false;
ohair@286 1834 if (doStore) {
ohair@286 1835 defns.put(entityName, externalId);
ohair@286 1836 if (externalId.notation != null)
ohair@286 1837 dtdHandler.unparsedEntityDecl(entityName,
ohair@286 1838 externalId.publicId, externalId.systemId,
ohair@286 1839 externalId.notation);
ohair@286 1840 else if (defns == entities)
ohair@286 1841 dtdHandler.externalGeneralEntityDecl(entityName,
ohair@286 1842 externalId.publicId, externalId.systemId);
ohair@286 1843 }
ohair@286 1844 }
ohair@286 1845 maybeWhitespace();
ohair@286 1846 nextChar('>', "F-031", entityName);
ohair@286 1847 if (start != in)
ohair@286 1848 error("V-013", null);
ohair@286 1849 return true;
ohair@286 1850 }
ohair@286 1851
ohair@286 1852 private ExternalEntity maybeExternalID()
ohair@286 1853 throws IOException, SAXException {
ohair@286 1854
ohair@286 1855 // [75] ExternalID ::= 'SYSTEM' S SystemLiteral
ohair@286 1856 // | 'PUBLIC' S' PubidLiteral S Systemliteral
ohair@286 1857 String temp = null;
ohair@286 1858 ExternalEntity retval;
ohair@286 1859
ohair@286 1860 if (peek("PUBLIC")) {
ohair@286 1861 whitespace("F-009");
ohair@286 1862 temp = parsePublicId();
ohair@286 1863 } else if (!peek("SYSTEM"))
ohair@286 1864 return null;
ohair@286 1865
ohair@286 1866 retval = new ExternalEntity(in);
ohair@286 1867 retval.publicId = temp;
ohair@286 1868 whitespace("F-008");
ohair@286 1869 retval.systemId = parseSystemId();
ohair@286 1870 return retval;
ohair@286 1871 }
ohair@286 1872
ohair@286 1873 private String parseSystemId()
ohair@286 1874 throws IOException, SAXException {
ohair@286 1875
ohair@286 1876 String uri = getQuotedString("F-034", null);
ohair@286 1877 int temp = uri.indexOf(':');
ohair@286 1878
ohair@286 1879 // resolve relative URIs ... must do it here since
ohair@286 1880 // it's relative to the source file holding the URI!
ohair@286 1881
ohair@286 1882 // "new java.net.URL (URL, string)" conforms to RFC 1630,
ohair@286 1883 // but we can't use that except when the URI is a URL.
ohair@286 1884 // The entity resolver is allowed to handle URIs that are
ohair@286 1885 // not URLs, so we pass URIs through with scheme intact
ohair@286 1886 if (temp == -1 || uri.indexOf('/') < temp) {
ohair@286 1887 String baseURI;
ohair@286 1888
ohair@286 1889 baseURI = in.getSystemId();
ohair@286 1890 if (baseURI == null)
ohair@286 1891 fatal("P-055", new Object[]{uri});
ohair@286 1892 if (uri.length() == 0)
ohair@286 1893 uri = ".";
ohair@286 1894 baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1);
ohair@286 1895 if (uri.charAt(0) != '/')
ohair@286 1896 uri = baseURI + uri;
ohair@286 1897 else {
ohair@286 1898 // XXX slashes at the beginning of a relative URI are
ohair@286 1899 // a special case we don't handle.
ohair@286 1900 throw new InternalError();
ohair@286 1901 }
ohair@286 1902
ohair@286 1903 // letting other code map any "/xxx/../" or "/./" to "/",
ohair@286 1904 // since all URIs must handle it the same.
ohair@286 1905 }
ohair@286 1906 // check for fragment ID in URI
ohair@286 1907 if (uri.indexOf('#') != -1)
ohair@286 1908 error("P-056", new Object[]{uri});
ohair@286 1909 return uri;
ohair@286 1910 }
ohair@286 1911
ohair@286 1912 private void maybeTextDecl()
ohair@286 1913 throws IOException, SAXException {
ohair@286 1914
ohair@286 1915 // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
ohair@286 1916 if (peek("<?xml")) {
ohair@286 1917 readVersion(false, "1.0");
ohair@286 1918 readEncoding(true);
ohair@286 1919 maybeWhitespace();
ohair@286 1920 if (!peek("?>"))
ohair@286 1921 fatal("P-057");
ohair@286 1922 }
ohair@286 1923 }
ohair@286 1924
ohair@286 1925 private void externalParameterEntity(ExternalEntity next)
ohair@286 1926 throws IOException, SAXException {
ohair@286 1927
ohair@286 1928 //
ohair@286 1929 // Reap the intended benefits of standalone declarations:
ohair@286 1930 // don't deal with external parameter entities, except to
ohair@286 1931 // validate the standalone declaration.
ohair@286 1932 //
ohair@286 1933
ohair@286 1934 // n.b. "in external parameter entities" (and external
ohair@286 1935 // DTD subset, same grammar) parameter references can
ohair@286 1936 // occur "within" markup declarations ... expansions can
ohair@286 1937 // cross syntax rules. Flagged here; affects getc().
ohair@286 1938
ohair@286 1939 // [79] ExtPE ::= TextDecl? extSubsetDecl
ohair@286 1940 // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
ohair@286 1941 // | PEReference | S )*
ohair@286 1942 InputEntity pe;
ohair@286 1943
ohair@286 1944 // XXX if this returns false ...
ohair@286 1945
ohair@286 1946 pe = in;
ohair@286 1947 maybeTextDecl();
ohair@286 1948 while (!pe.isEOF()) {
ohair@286 1949 // pop internal PEs (and whitespace before/after)
ohair@286 1950 if (in.isEOF()) {
ohair@286 1951 in = in.pop();
ohair@286 1952 continue;
ohair@286 1953 }
ohair@286 1954 doLexicalPE = false;
ohair@286 1955 if (maybeWhitespace())
ohair@286 1956 continue;
ohair@286 1957 if (maybePEReference())
ohair@286 1958 continue;
ohair@286 1959 doLexicalPE = true;
ohair@286 1960 if (maybeMarkupDecl() || maybeConditionalSect())
ohair@286 1961 continue;
ohair@286 1962 break;
ohair@286 1963 }
ohair@286 1964 // if (in != pe) throw new InternalError("who popped my PE?");
ohair@286 1965 if (!pe.isEOF())
ohair@286 1966 fatal("P-059", new Object[]{in.getName()});
ohair@286 1967 }
ohair@286 1968
ohair@286 1969 private void readEncoding(boolean must)
ohair@286 1970 throws IOException, SAXException {
ohair@286 1971
ohair@286 1972 // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
ohair@286 1973 String name = maybeReadAttribute("encoding", must);
ohair@286 1974
ohair@286 1975 if (name == null)
ohair@286 1976 return;
ohair@286 1977 for (int i = 0; i < name.length(); i++) {
ohair@286 1978 char c = name.charAt(i);
ohair@286 1979 if ((c >= 'A' && c <= 'Z')
ohair@286 1980 || (c >= 'a' && c <= 'z'))
ohair@286 1981 continue;
ohair@286 1982 if (i != 0
ohair@286 1983 && ((c >= '0' && c <= '9')
ohair@286 1984 || c == '-'
ohair@286 1985 || c == '_'
ohair@286 1986 || c == '.'
ohair@286 1987 ))
ohair@286 1988 continue;
ohair@286 1989 fatal("P-060", new Object[]{new Character(c)});
ohair@286 1990 }
ohair@286 1991
ohair@286 1992 //
ohair@286 1993 // This should be the encoding in use, and it's even an error for
ohair@286 1994 // it to be anything else (in certain cases that are impractical to
ohair@286 1995 // to test, and may even be insufficient). So, we do the best we
ohair@286 1996 // can, and warn if things look suspicious. Note that Java doesn't
ohair@286 1997 // uniformly expose the encodings, and that the names it uses
ohair@286 1998 // internally are nonstandard. Also, that the XML spec allows
ohair@286 1999 // such "errors" not to be reported at all.
ohair@286 2000 //
ohair@286 2001 String currentEncoding = in.getEncoding();
ohair@286 2002
ohair@286 2003 if (currentEncoding != null
ohair@286 2004 && !name.equalsIgnoreCase(currentEncoding))
ohair@286 2005 warning("P-061", new Object[]{name, currentEncoding});
ohair@286 2006 }
ohair@286 2007
ohair@286 2008 private boolean maybeNotationDecl()
ohair@286 2009 throws IOException, SAXException {
ohair@286 2010
ohair@286 2011 // [82] NotationDecl ::= '<!NOTATION' S Name S
ohair@286 2012 // (ExternalID | PublicID) S? '>'
ohair@286 2013 // [83] PublicID ::= 'PUBLIC' S PubidLiteral
ohair@286 2014 InputEntity start = peekDeclaration("!NOTATION");
ohair@286 2015
ohair@286 2016 if (start == null)
ohair@286 2017 return false;
ohair@286 2018
ohair@286 2019 String name = getMarkupDeclname("F-019", false);
ohair@286 2020 ExternalEntity entity = new ExternalEntity(in);
ohair@286 2021
ohair@286 2022 whitespace("F-011");
ohair@286 2023 if (peek("PUBLIC")) {
ohair@286 2024 whitespace("F-009");
ohair@286 2025 entity.publicId = parsePublicId();
ohair@286 2026 if (maybeWhitespace()) {
ohair@286 2027 if (!peek(">"))
ohair@286 2028 entity.systemId = parseSystemId();
ohair@286 2029 else
ohair@286 2030 ungetc();
ohair@286 2031 }
ohair@286 2032 } else if (peek("SYSTEM")) {
ohair@286 2033 whitespace("F-008");
ohair@286 2034 entity.systemId = parseSystemId();
ohair@286 2035 } else
ohair@286 2036 fatal("P-062");
ohair@286 2037 maybeWhitespace();
ohair@286 2038 nextChar('>', "F-032", name);
ohair@286 2039 if (start != in)
ohair@286 2040 error("V-013", null);
ohair@286 2041 if (entity.systemId != null && entity.systemId.indexOf('#') != -1)
ohair@286 2042 error("P-056", new Object[]{entity.systemId});
ohair@286 2043
ohair@286 2044 Object value = notations.get(name);
ohair@286 2045 if (value != null && value instanceof ExternalEntity)
ohair@286 2046 warning("P-063", new Object[]{name});
ohair@286 2047
ohair@286 2048 else {
ohair@286 2049 notations.put(name, entity);
ohair@286 2050 dtdHandler.notationDecl(name, entity.publicId,
ohair@286 2051 entity.systemId);
ohair@286 2052 }
ohair@286 2053 return true;
ohair@286 2054 }
ohair@286 2055
ohair@286 2056
ohair@286 2057 ////////////////////////////////////////////////////////////////
ohair@286 2058 //
ohair@286 2059 // UTILITIES
ohair@286 2060 //
ohair@286 2061 ////////////////////////////////////////////////////////////////
ohair@286 2062
ohair@286 2063 private char getc() throws IOException, SAXException {
ohair@286 2064
ohair@286 2065 if (!doLexicalPE) {
ohair@286 2066 char c = in.getc();
ohair@286 2067 return c;
ohair@286 2068 }
ohair@286 2069
ohair@286 2070 //
ohair@286 2071 // External parameter entities get funky processing of '%param;'
ohair@286 2072 // references. It's not clearly defined in the XML spec; but it
ohair@286 2073 // boils down to having those refs be _lexical_ in most cases to
ohair@286 2074 // include partial syntax productions. It also needs selective
ohair@286 2075 // enabling; "<!ENTITY % foo ...>" must work, for example, and
ohair@286 2076 // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd"
ohair@286 2077 // if it's expanded in a literal, else "ab cd". PEs also do
ohair@286 2078 // not expand within comments or PIs, and external PEs are only
ohair@286 2079 // allowed to have markup decls (and so aren't handled lexically).
ohair@286 2080 //
ohair@286 2081 // This PE handling should be merged into maybeWhitespace, where
ohair@286 2082 // it can be dealt with more consistently.
ohair@286 2083 //
ohair@286 2084 // Also, there are some validity constraints in this area.
ohair@286 2085 //
ohair@286 2086 char c;
ohair@286 2087
ohair@286 2088 while (in.isEOF()) {
ohair@286 2089 if (in.isInternal() || (doLexicalPE && !in.isDocument()))
ohair@286 2090 in = in.pop();
ohair@286 2091 else {
ohair@286 2092 fatal("P-064", new Object[]{in.getName()});
ohair@286 2093 }
ohair@286 2094 }
ohair@286 2095 if ((c = in.getc()) == '%' && doLexicalPE) {
ohair@286 2096 // PE ref ::= '%' name ';'
ohair@286 2097 String name = maybeGetName();
ohair@286 2098 Object entity;
ohair@286 2099
ohair@286 2100 if (name == null)
ohair@286 2101 fatal("P-011");
ohair@286 2102 nextChar(';', "F-021", name);
ohair@286 2103 entity = params.get(name);
ohair@286 2104
ohair@286 2105 // push a magic "entity" before and after the
ohair@286 2106 // real one, so ungetc() behaves uniformly
ohair@286 2107 pushReader(" ".toCharArray(), null, false);
ohair@286 2108 if (entity instanceof InternalEntity)
ohair@286 2109 pushReader(((InternalEntity) entity).buf, name, false);
ohair@286 2110 else if (entity instanceof ExternalEntity)
ohair@286 2111 // PEs can't be unparsed!
ohair@286 2112 // XXX if this returns false ...
ohair@286 2113 pushReader((ExternalEntity) entity);
ohair@286 2114 else if (entity == null)
ohair@286 2115 // see note in maybePEReference re making this be nonfatal.
ohair@286 2116 fatal("V-022");
ohair@286 2117 else
ohair@286 2118 throw new InternalError();
ohair@286 2119 pushReader(" ".toCharArray(), null, false);
ohair@286 2120 return in.getc();
ohair@286 2121 }
ohair@286 2122 return c;
ohair@286 2123 }
ohair@286 2124
ohair@286 2125 private void ungetc() {
ohair@286 2126
ohair@286 2127 in.ungetc();
ohair@286 2128 }
ohair@286 2129
ohair@286 2130 private boolean peek(String s)
ohair@286 2131 throws IOException, SAXException {
ohair@286 2132
ohair@286 2133 return in.peek(s, null);
ohair@286 2134 }
ohair@286 2135
ohair@286 2136 // Return the entity starting the specified declaration
ohair@286 2137 // (for validating declaration nesting) else null.
ohair@286 2138
ohair@286 2139 private InputEntity peekDeclaration(String s)
ohair@286 2140 throws IOException, SAXException {
ohair@286 2141
ohair@286 2142 InputEntity start;
ohair@286 2143
ohair@286 2144 if (!in.peekc('<'))
ohair@286 2145 return null;
ohair@286 2146 start = in;
ohair@286 2147 if (in.peek(s, null))
ohair@286 2148 return start;
ohair@286 2149 in.ungetc();
ohair@286 2150 return null;
ohair@286 2151 }
ohair@286 2152
ohair@286 2153 private void nextChar(char c, String location, String near)
ohair@286 2154 throws IOException, SAXException {
ohair@286 2155
ohair@286 2156 while (in.isEOF() && !in.isDocument())
ohair@286 2157 in = in.pop();
ohair@286 2158 if (!in.peekc(c))
ohair@286 2159 fatal("P-008", new Object[]
ohair@286 2160 {new Character(c),
ohair@286 2161 messages.getMessage(locale, location),
ohair@286 2162 (near == null ? "" : ('"' + near + '"'))});
ohair@286 2163 }
ohair@286 2164
ohair@286 2165
ohair@286 2166 private void pushReader(char buf [], String name, boolean isGeneral)
ohair@286 2167 throws SAXException {
ohair@286 2168
ohair@286 2169 InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
ohair@286 2170 r.init(buf, name, in, !isGeneral);
ohair@286 2171 in = r;
ohair@286 2172 }
ohair@286 2173
ohair@286 2174 private boolean pushReader(ExternalEntity next)
ohair@286 2175 throws IOException, SAXException {
ohair@286 2176
ohair@286 2177 InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
ohair@286 2178 InputSource s;
ohair@286 2179 try {
ohair@286 2180 s = next.getInputSource(resolver);
ohair@286 2181 } catch (IOException e) {
ohair@286 2182 String msg =
ohair@286 2183 "unable to open the external entity from :" + next.systemId;
ohair@286 2184 if (next.publicId != null)
ohair@286 2185 msg += " (public id:" + next.publicId + ")";
ohair@286 2186
ohair@286 2187 SAXParseException spe = new SAXParseException(msg,
ohair@286 2188 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e);
ohair@286 2189 dtdHandler.fatalError(spe);
ohair@286 2190 throw e;
ohair@286 2191 }
ohair@286 2192
ohair@286 2193 r.init(s, next.name, in, next.isPE);
ohair@286 2194 in = r;
ohair@286 2195 return true;
ohair@286 2196 }
ohair@286 2197
ohair@286 2198 public String getPublicId() {
ohair@286 2199
ohair@286 2200 return (in == null) ? null : in.getPublicId();
ohair@286 2201 }
ohair@286 2202
ohair@286 2203 public String getSystemId() {
ohair@286 2204
ohair@286 2205 return (in == null) ? null : in.getSystemId();
ohair@286 2206 }
ohair@286 2207
ohair@286 2208 public int getLineNumber() {
ohair@286 2209
ohair@286 2210 return (in == null) ? -1 : in.getLineNumber();
ohair@286 2211 }
ohair@286 2212
ohair@286 2213 public int getColumnNumber() {
ohair@286 2214
ohair@286 2215 return (in == null) ? -1 : in.getColumnNumber();
ohair@286 2216 }
ohair@286 2217
ohair@286 2218 // error handling convenience routines
ohair@286 2219
ohair@286 2220 private void warning(String messageId, Object parameters [])
ohair@286 2221 throws SAXException {
ohair@286 2222
ohair@286 2223 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
ohair@286 2224 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
ohair@286 2225
ohair@286 2226 dtdHandler.warning(e);
ohair@286 2227 }
ohair@286 2228
ohair@286 2229 void error(String messageId, Object parameters [])
ohair@286 2230 throws SAXException {
ohair@286 2231
ohair@286 2232 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
ohair@286 2233 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
ohair@286 2234
ohair@286 2235 dtdHandler.error(e);
ohair@286 2236 }
ohair@286 2237
ohair@286 2238 private void fatal(String messageId) throws SAXException {
ohair@286 2239
ohair@286 2240 fatal(messageId, null);
ohair@286 2241 }
ohair@286 2242
ohair@286 2243 private void fatal(String messageId, Object parameters [])
ohair@286 2244 throws SAXException {
ohair@286 2245
ohair@286 2246 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
ohair@286 2247 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
ohair@286 2248
ohair@286 2249 dtdHandler.fatalError(e);
ohair@286 2250
ohair@286 2251 throw e;
ohair@286 2252 }
ohair@286 2253
ohair@286 2254 //
ohair@286 2255 // Map char arrays to strings ... cuts down both on memory and
ohair@286 2256 // CPU usage for element/attribute/other names that are reused.
ohair@286 2257 //
ohair@286 2258 // Documents typically repeat names a lot, so we more or less
ohair@286 2259 // intern all the strings within the document; since some strings
ohair@286 2260 // are repeated in multiple documents (e.g. stylesheets) we go
ohair@286 2261 // a bit further, and intern globally.
ohair@286 2262 //
ohair@286 2263 static class NameCache {
ohair@286 2264 //
ohair@286 2265 // Unless we auto-grow this, the default size should be a
ohair@286 2266 // reasonable bit larger than needed for most XML files
ohair@286 2267 // we've yet seen (and be prime). If it's too small, the
ohair@286 2268 // penalty is just excess cache collisions.
ohair@286 2269 //
ohair@286 2270 NameCacheEntry hashtable [] = new NameCacheEntry[541];
ohair@286 2271
ohair@286 2272 //
ohair@286 2273 // Usually we just want to get the 'symbol' for these chars
ohair@286 2274 //
ohair@286 2275 String lookup(char value [], int len) {
ohair@286 2276
ohair@286 2277 return lookupEntry(value, len).name;
ohair@286 2278 }
ohair@286 2279
ohair@286 2280 //
ohair@286 2281 // Sometimes we need to scan the chars in the resulting
ohair@286 2282 // string, so there's an accessor which exposes them.
ohair@286 2283 // (Mostly for element end tags.)
ohair@286 2284 //
ohair@286 2285 NameCacheEntry lookupEntry(char value [], int len) {
ohair@286 2286
ohair@286 2287 int index = 0;
ohair@286 2288 NameCacheEntry entry;
ohair@286 2289
ohair@286 2290 // hashing to get index
ohair@286 2291 for (int i = 0; i < len; i++)
ohair@286 2292 index = index * 31 + value[i];
ohair@286 2293 index &= 0x7fffffff;
ohair@286 2294 index %= hashtable.length;
ohair@286 2295
ohair@286 2296 // return entry if one's there ...
ohair@286 2297 for (entry = hashtable[index];
ohair@286 2298 entry != null;
ohair@286 2299 entry = entry.next) {
ohair@286 2300 if (entry.matches(value, len))
ohair@286 2301 return entry;
ohair@286 2302 }
ohair@286 2303
ohair@286 2304 // else create new one
ohair@286 2305 entry = new NameCacheEntry();
ohair@286 2306 entry.chars = new char[len];
ohair@286 2307 System.arraycopy(value, 0, entry.chars, 0, len);
ohair@286 2308 entry.name = new String(entry.chars);
ohair@286 2309 //
ohair@286 2310 // NOTE: JDK 1.1 has a fixed size string intern table,
ohair@286 2311 // with non-GC'd entries. It can panic here; that's a
ohair@286 2312 // JDK problem, use 1.2 or later with many identifiers.
ohair@286 2313 //
ohair@286 2314 entry.name = entry.name.intern(); // "global" intern
ohair@286 2315 entry.next = hashtable[index];
ohair@286 2316 hashtable[index] = entry;
ohair@286 2317 return entry;
ohair@286 2318 }
ohair@286 2319 }
ohair@286 2320
ohair@286 2321 static class NameCacheEntry {
ohair@286 2322
ohair@286 2323 String name;
ohair@286 2324 char chars [];
ohair@286 2325 NameCacheEntry next;
ohair@286 2326
ohair@286 2327 boolean matches(char value [], int len) {
ohair@286 2328
ohair@286 2329 if (chars.length != len)
ohair@286 2330 return false;
ohair@286 2331 for (int i = 0; i < len; i++)
ohair@286 2332 if (value[i] != chars[i])
ohair@286 2333 return false;
ohair@286 2334 return true;
ohair@286 2335 }
ohair@286 2336 }
ohair@286 2337
ohair@286 2338 //
ohair@286 2339 // Message catalog for diagnostics.
ohair@286 2340 //
ohair@286 2341 static final Catalog messages = new Catalog();
ohair@286 2342
ohair@286 2343 static final class Catalog extends MessageCatalog {
ohair@286 2344
ohair@286 2345 Catalog() {
ohair@286 2346 super(DTDParser.class);
ohair@286 2347 }
ohair@286 2348 }
ohair@286 2349
ohair@286 2350 }

mercurial