src/share/jaxws_classes/com/sun/xml/internal/dtdparser/DTDParser.java

Thu, 31 Aug 2017 15:18:52 +0800

author
aoqi
date
Thu, 31 Aug 2017 15:18:52 +0800
changeset 637
9c07ef4934dd
parent 397
b99d7e355d4b
parent 0
373ffda63c9a
permissions
-rw-r--r--

merge

aoqi@0 1 /*
aoqi@0 2 * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
aoqi@0 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@0 4 *
aoqi@0 5 * This code is free software; you can redistribute it and/or modify it
aoqi@0 6 * under the terms of the GNU General Public License version 2 only, as
aoqi@0 7 * published by the Free Software Foundation. Oracle designates this
aoqi@0 8 * particular file as subject to the "Classpath" exception as provided
aoqi@0 9 * by Oracle in the LICENSE file that accompanied this code.
aoqi@0 10 *
aoqi@0 11 * This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@0 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@0 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@0 14 * version 2 for more details (a copy is included in the LICENSE file that
aoqi@0 15 * accompanied this code).
aoqi@0 16 *
aoqi@0 17 * You should have received a copy of the GNU General Public License version
aoqi@0 18 * 2 along with this work; if not, write to the Free Software Foundation,
aoqi@0 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@0 20 *
aoqi@0 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@0 22 * or visit www.oracle.com if you need additional information or have any
aoqi@0 23 * questions.
aoqi@0 24 */
aoqi@0 25
aoqi@0 26 package com.sun.xml.internal.dtdparser;
aoqi@0 27
aoqi@0 28 import org.xml.sax.EntityResolver;
aoqi@0 29 import org.xml.sax.InputSource;
aoqi@0 30 import org.xml.sax.Locator;
aoqi@0 31 import org.xml.sax.SAXException;
aoqi@0 32 import org.xml.sax.SAXParseException;
aoqi@0 33
aoqi@0 34 import java.io.IOException;
aoqi@0 35 import java.util.ArrayList;
aoqi@0 36 import java.util.Enumeration;
aoqi@0 37 import java.util.Hashtable;
aoqi@0 38 import java.util.Locale;
aoqi@0 39 import java.util.Set;
aoqi@0 40 import java.util.Vector;
aoqi@0 41
aoqi@0 42 /**
aoqi@0 43 * This implements parsing of XML 1.0 DTDs.
aoqi@0 44 * <p/>
aoqi@0 45 * This conforms to the portion of the XML 1.0 specification related
aoqi@0 46 * to the external DTD subset.
aoqi@0 47 * <p/>
aoqi@0 48 * For multi-language applications (such as web servers using XML
aoqi@0 49 * processing to create dynamic content), a method supports choosing
aoqi@0 50 * a locale for parser diagnostics which is both understood by the
aoqi@0 51 * message recipient and supported by the parser.
aoqi@0 52 * <p/>
aoqi@0 53 * This parser produces a stream of parse events. It supports some
aoqi@0 54 * features (exposing comments, CDATA sections, and entity references)
aoqi@0 55 * which are not required to be reported by conformant XML processors.
aoqi@0 56 *
aoqi@0 57 * @author David Brownell
aoqi@0 58 * @author Janet Koenig
aoqi@0 59 * @author Kohsuke KAWAGUCHI
aoqi@0 60 * @version $Id: DTDParser.java,v 1.2 2009/04/16 15:25:49 snajper Exp $
aoqi@0 61 */
aoqi@0 62 public class DTDParser {
aoqi@0 63 public final static String TYPE_CDATA = "CDATA";
aoqi@0 64 public final static String TYPE_ID = "ID";
aoqi@0 65 public final static String TYPE_IDREF = "IDREF";
aoqi@0 66 public final static String TYPE_IDREFS = "IDREFS";
aoqi@0 67 public final static String TYPE_ENTITY = "ENTITY";
aoqi@0 68 public final static String TYPE_ENTITIES = "ENTITIES";
aoqi@0 69 public final static String TYPE_NMTOKEN = "NMTOKEN";
aoqi@0 70 public final static String TYPE_NMTOKENS = "NMTOKENS";
aoqi@0 71 public final static String TYPE_NOTATION = "NOTATION";
aoqi@0 72 public final static String TYPE_ENUMERATION = "ENUMERATION";
aoqi@0 73
aoqi@0 74
aoqi@0 75 // stack of input entities being merged
aoqi@0 76 private InputEntity in;
aoqi@0 77
aoqi@0 78 // temporaries reused during parsing
aoqi@0 79 private StringBuffer strTmp;
aoqi@0 80 private char nameTmp [];
aoqi@0 81 private NameCache nameCache;
aoqi@0 82 private char charTmp [] = new char[2];
aoqi@0 83
aoqi@0 84 // temporary DTD parsing state
aoqi@0 85 private boolean doLexicalPE;
aoqi@0 86
aoqi@0 87 // DTD state, used during parsing
aoqi@0 88 // private SimpleHashtable elements = new SimpleHashtable (47);
aoqi@0 89 protected final Set declaredElements = new java.util.HashSet();
aoqi@0 90 private SimpleHashtable params = new SimpleHashtable(7);
aoqi@0 91
aoqi@0 92 // exposed to package-private subclass
aoqi@0 93 Hashtable notations = new Hashtable(7);
aoqi@0 94 SimpleHashtable entities = new SimpleHashtable(17);
aoqi@0 95
aoqi@0 96 private SimpleHashtable ids = new SimpleHashtable();
aoqi@0 97
aoqi@0 98 // listeners for DTD parsing events
aoqi@0 99 private DTDEventListener dtdHandler;
aoqi@0 100
aoqi@0 101 private EntityResolver resolver;
aoqi@0 102 private Locale locale;
aoqi@0 103
aoqi@0 104 // string constants -- use these copies so "==" works
aoqi@0 105 // package private
aoqi@0 106 static final String strANY = "ANY";
aoqi@0 107 static final String strEMPTY = "EMPTY";
aoqi@0 108
aoqi@0 109 /**
aoqi@0 110 * Used by applications to request locale for diagnostics.
aoqi@0 111 *
aoqi@0 112 * @param l The locale to use, or null to use system defaults
aoqi@0 113 * (which may include only message IDs).
aoqi@0 114 */
aoqi@0 115 public void setLocale(Locale l) throws SAXException {
aoqi@0 116
aoqi@0 117 if (l != null && !messages.isLocaleSupported(l.toString())) {
aoqi@0 118 throw new SAXException(messages.getMessage(locale,
aoqi@0 119 "P-078", new Object[]{l}));
aoqi@0 120 }
aoqi@0 121 locale = l;
aoqi@0 122 }
aoqi@0 123
aoqi@0 124 /**
aoqi@0 125 * Returns the diagnostic locale.
aoqi@0 126 */
aoqi@0 127 public Locale getLocale() {
aoqi@0 128 return locale;
aoqi@0 129 }
aoqi@0 130
aoqi@0 131 /**
aoqi@0 132 * Chooses a client locale to use for diagnostics, using the first
aoqi@0 133 * language specified in the list that is supported by this parser.
aoqi@0 134 * That locale is then set using <a href="#setLocale(java.util.Locale)">
aoqi@0 135 * setLocale()</a>. Such a list could be provided by a variety of user
aoqi@0 136 * preference mechanisms, including the HTTP <em>Accept-Language</em>
aoqi@0 137 * header field.
aoqi@0 138 *
aoqi@0 139 * @param languages Array of language specifiers, ordered with the most
aoqi@0 140 * preferable one at the front. For example, "en-ca" then "fr-ca",
aoqi@0 141 * followed by "zh_CN". Both RFC 1766 and Java styles are supported.
aoqi@0 142 * @return The chosen locale, or null.
aoqi@0 143 * @see MessageCatalog
aoqi@0 144 */
aoqi@0 145 public Locale chooseLocale(String languages [])
aoqi@0 146 throws SAXException {
aoqi@0 147
aoqi@0 148 Locale l = messages.chooseLocale(languages);
aoqi@0 149
aoqi@0 150 if (l != null) {
aoqi@0 151 setLocale(l);
aoqi@0 152 }
aoqi@0 153 return l;
aoqi@0 154 }
aoqi@0 155
aoqi@0 156 /**
aoqi@0 157 * Lets applications control entity resolution.
aoqi@0 158 */
aoqi@0 159 public void setEntityResolver(EntityResolver r) {
aoqi@0 160
aoqi@0 161 resolver = r;
aoqi@0 162 }
aoqi@0 163
aoqi@0 164 /**
aoqi@0 165 * Returns the object used to resolve entities
aoqi@0 166 */
aoqi@0 167 public EntityResolver getEntityResolver() {
aoqi@0 168
aoqi@0 169 return resolver;
aoqi@0 170 }
aoqi@0 171
aoqi@0 172 /**
aoqi@0 173 * Used by applications to set handling of DTD parsing events.
aoqi@0 174 */
aoqi@0 175 public void setDtdHandler(DTDEventListener handler) {
aoqi@0 176 dtdHandler = handler;
aoqi@0 177 if (handler != null)
aoqi@0 178 handler.setDocumentLocator(new Locator() {
aoqi@0 179 public String getPublicId() {
aoqi@0 180 return DTDParser.this.getPublicId();
aoqi@0 181 }
aoqi@0 182
aoqi@0 183 public String getSystemId() {
aoqi@0 184 return DTDParser.this.getSystemId();
aoqi@0 185 }
aoqi@0 186
aoqi@0 187 public int getLineNumber() {
aoqi@0 188 return DTDParser.this.getLineNumber();
aoqi@0 189 }
aoqi@0 190
aoqi@0 191 public int getColumnNumber() {
aoqi@0 192 return DTDParser.this.getColumnNumber();
aoqi@0 193 }
aoqi@0 194 });
aoqi@0 195 }
aoqi@0 196
aoqi@0 197 /**
aoqi@0 198 * Returns the handler used to for DTD parsing events.
aoqi@0 199 */
aoqi@0 200 public DTDEventListener getDtdHandler() {
aoqi@0 201 return dtdHandler;
aoqi@0 202 }
aoqi@0 203
aoqi@0 204 /**
aoqi@0 205 * Parse a DTD.
aoqi@0 206 */
aoqi@0 207 public void parse(InputSource in)
aoqi@0 208 throws IOException, SAXException {
aoqi@0 209 init();
aoqi@0 210 parseInternal(in);
aoqi@0 211 }
aoqi@0 212
aoqi@0 213 /**
aoqi@0 214 * Parse a DTD.
aoqi@0 215 */
aoqi@0 216 public void parse(String uri)
aoqi@0 217 throws IOException, SAXException {
aoqi@0 218 InputSource in;
aoqi@0 219
aoqi@0 220 init();
aoqi@0 221 // System.out.println ("parse (\"" + uri + "\")");
aoqi@0 222 in = resolver.resolveEntity(null, uri);
aoqi@0 223
aoqi@0 224 // If custom resolver punts resolution to parser, handle it ...
aoqi@0 225 if (in == null) {
aoqi@0 226 in = Resolver.createInputSource(new java.net.URL(uri), false);
aoqi@0 227
aoqi@0 228 // ... or if custom resolver doesn't correctly construct the
aoqi@0 229 // input entity, patch it up enough so relative URIs work, and
aoqi@0 230 // issue a warning to minimize later confusion.
aoqi@0 231 } else if (in.getSystemId() == null) {
aoqi@0 232 warning("P-065", null);
aoqi@0 233 in.setSystemId(uri);
aoqi@0 234 }
aoqi@0 235
aoqi@0 236 parseInternal(in);
aoqi@0 237 }
aoqi@0 238
aoqi@0 239 // makes sure the parser is reset to "before a document"
aoqi@0 240 private void init() {
aoqi@0 241 in = null;
aoqi@0 242
aoqi@0 243 // alloc temporary data used in parsing
aoqi@0 244 strTmp = new StringBuffer();
aoqi@0 245 nameTmp = new char[20];
aoqi@0 246 nameCache = new NameCache();
aoqi@0 247
aoqi@0 248 // reset doc info
aoqi@0 249 // isInAttribute = false;
aoqi@0 250
aoqi@0 251 doLexicalPE = false;
aoqi@0 252
aoqi@0 253 entities.clear();
aoqi@0 254 notations.clear();
aoqi@0 255 params.clear();
aoqi@0 256 // elements.clear ();
aoqi@0 257 declaredElements.clear();
aoqi@0 258
aoqi@0 259 // initialize predefined references ... re-interpreted later
aoqi@0 260 builtin("amp", "&#38;");
aoqi@0 261 builtin("lt", "&#60;");
aoqi@0 262 builtin("gt", ">");
aoqi@0 263 builtin("quot", "\"");
aoqi@0 264 builtin("apos", "'");
aoqi@0 265
aoqi@0 266 if (locale == null)
aoqi@0 267 locale = Locale.getDefault();
aoqi@0 268 if (resolver == null)
aoqi@0 269 resolver = new Resolver();
aoqi@0 270 if (dtdHandler == null)
aoqi@0 271 dtdHandler = new DTDHandlerBase();
aoqi@0 272 }
aoqi@0 273
aoqi@0 274 private void builtin(String entityName, String entityValue) {
aoqi@0 275 InternalEntity entity;
aoqi@0 276 entity = new InternalEntity(entityName, entityValue.toCharArray());
aoqi@0 277 entities.put(entityName, entity);
aoqi@0 278 }
aoqi@0 279
aoqi@0 280
aoqi@0 281 ////////////////////////////////////////////////////////////////
aoqi@0 282 //
aoqi@0 283 // parsing is by recursive descent, code roughly
aoqi@0 284 // following the BNF rules except tweaked for simple
aoqi@0 285 // lookahead. rules are more or less in numeric order,
aoqi@0 286 // except where code sharing suggests other structures.
aoqi@0 287 //
aoqi@0 288 // a classic benefit of recursive descent parsers: it's
aoqi@0 289 // relatively easy to get diagnostics that make sense.
aoqi@0 290 //
aoqi@0 291 ////////////////////////////////////////////////////////////////
aoqi@0 292
aoqi@0 293
aoqi@0 294 private void parseInternal(InputSource input)
aoqi@0 295 throws IOException, SAXException {
aoqi@0 296
aoqi@0 297 if (input == null)
aoqi@0 298 fatal("P-000");
aoqi@0 299
aoqi@0 300 try {
aoqi@0 301 in = InputEntity.getInputEntity(dtdHandler, locale);
aoqi@0 302 in.init(input, null, null, false);
aoqi@0 303
aoqi@0 304 dtdHandler.startDTD(in);
aoqi@0 305
aoqi@0 306 // [30] extSubset ::= TextDecl? extSubsetDecl
aoqi@0 307 // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
aoqi@0 308 // | PEReference | S )*
aoqi@0 309 // ... same as [79] extPE, which is where the code is
aoqi@0 310
aoqi@0 311 ExternalEntity externalSubset = new ExternalEntity(in);
aoqi@0 312 externalParameterEntity(externalSubset);
aoqi@0 313
aoqi@0 314 if (!in.isEOF()) {
aoqi@0 315 fatal("P-001", new Object[]
aoqi@0 316 {Integer.toHexString(((int) getc()))});
aoqi@0 317 }
aoqi@0 318 afterRoot();
aoqi@0 319 dtdHandler.endDTD();
aoqi@0 320
aoqi@0 321 } catch (EndOfInputException e) {
aoqi@0 322 if (!in.isDocument()) {
aoqi@0 323 String name = in.getName();
aoqi@0 324 do { // force a relevant URI and line number
aoqi@0 325 in = in.pop();
aoqi@0 326 } while (in.isInternal());
aoqi@0 327 fatal("P-002", new Object[]{name});
aoqi@0 328 } else {
aoqi@0 329 fatal("P-003", null);
aoqi@0 330 }
aoqi@0 331 } catch (RuntimeException e) {
aoqi@0 332 // Don't discard location that triggered the exception
aoqi@0 333 // ## Should properly wrap exception
aoqi@0 334 System.err.print("Internal DTD parser error: "); // ##
aoqi@0 335 e.printStackTrace();
aoqi@0 336 throw new SAXParseException(e.getMessage() != null
aoqi@0 337 ? e.getMessage() : e.getClass().getName(),
aoqi@0 338 getPublicId(), getSystemId(),
aoqi@0 339 getLineNumber(), getColumnNumber());
aoqi@0 340
aoqi@0 341 } finally {
aoqi@0 342 // recycle temporary data used during parsing
aoqi@0 343 strTmp = null;
aoqi@0 344 nameTmp = null;
aoqi@0 345 nameCache = null;
aoqi@0 346
aoqi@0 347 // ditto input sources etc
aoqi@0 348 if (in != null) {
aoqi@0 349 in.close();
aoqi@0 350 in = null;
aoqi@0 351 }
aoqi@0 352
aoqi@0 353 // get rid of all DTD info ... some of it would be
aoqi@0 354 // useful for editors etc, investigate later.
aoqi@0 355
aoqi@0 356 params.clear();
aoqi@0 357 entities.clear();
aoqi@0 358 notations.clear();
aoqi@0 359 declaredElements.clear();
aoqi@0 360 // elements.clear();
aoqi@0 361 ids.clear();
aoqi@0 362 }
aoqi@0 363 }
aoqi@0 364
aoqi@0 365 void afterRoot() throws SAXException {
aoqi@0 366 // Make sure all IDREFs match declared ID attributes. We scan
aoqi@0 367 // after the document element is parsed, since XML allows forward
aoqi@0 368 // references, and only now can we know if they're all resolved.
aoqi@0 369
aoqi@0 370 for (Enumeration e = ids.keys();
aoqi@0 371 e.hasMoreElements();
aoqi@0 372 ) {
aoqi@0 373 String id = (String) e.nextElement();
aoqi@0 374 Boolean value = (Boolean) ids.get(id);
aoqi@0 375 if (Boolean.FALSE == value)
aoqi@0 376 error("V-024", new Object[]{id});
aoqi@0 377 }
aoqi@0 378 }
aoqi@0 379
aoqi@0 380
aoqi@0 381 // role is for diagnostics
aoqi@0 382 private void whitespace(String roleId)
aoqi@0 383 throws IOException, SAXException {
aoqi@0 384
aoqi@0 385 // [3] S ::= (#x20 | #x9 | #xd | #xa)+
aoqi@0 386 if (!maybeWhitespace()) {
aoqi@0 387 fatal("P-004", new Object[]
aoqi@0 388 {messages.getMessage(locale, roleId)});
aoqi@0 389 }
aoqi@0 390 }
aoqi@0 391
aoqi@0 392 // S?
aoqi@0 393 private boolean maybeWhitespace()
aoqi@0 394 throws IOException, SAXException {
aoqi@0 395
aoqi@0 396 if (!doLexicalPE)
aoqi@0 397 return in.maybeWhitespace();
aoqi@0 398
aoqi@0 399 // see getc() for the PE logic -- this lets us splice
aoqi@0 400 // expansions of PEs in "anywhere". getc() has smarts,
aoqi@0 401 // so for external PEs we don't bypass it.
aoqi@0 402
aoqi@0 403 // XXX we can marginally speed PE handling, and certainly
aoqi@0 404 // be cleaner (hence potentially more correct), by using
aoqi@0 405 // the observations that expanded PEs only start and stop
aoqi@0 406 // where whitespace is allowed. getc wouldn't need any
aoqi@0 407 // "lexical" PE expansion logic, and no other method needs
aoqi@0 408 // to handle termination of PEs. (parsing of literals would
aoqi@0 409 // still need to pop entities, but not parsing of references
aoqi@0 410 // in content.)
aoqi@0 411
aoqi@0 412 char c = getc();
aoqi@0 413 boolean saw = false;
aoqi@0 414
aoqi@0 415 while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
aoqi@0 416 saw = true;
aoqi@0 417
aoqi@0 418 // this gracefully ends things when we stop playing
aoqi@0 419 // with internal parameters. caller should have a
aoqi@0 420 // grammar rule allowing whitespace at end of entity.
aoqi@0 421 if (in.isEOF() && !in.isInternal())
aoqi@0 422 return saw;
aoqi@0 423 c = getc();
aoqi@0 424 }
aoqi@0 425 ungetc();
aoqi@0 426 return saw;
aoqi@0 427 }
aoqi@0 428
aoqi@0 429 private String maybeGetName()
aoqi@0 430 throws IOException, SAXException {
aoqi@0 431
aoqi@0 432 NameCacheEntry entry = maybeGetNameCacheEntry();
aoqi@0 433 return (entry == null) ? null : entry.name;
aoqi@0 434 }
aoqi@0 435
aoqi@0 436 private NameCacheEntry maybeGetNameCacheEntry()
aoqi@0 437 throws IOException, SAXException {
aoqi@0 438
aoqi@0 439 // [5] Name ::= (Letter|'_'|':') (Namechar)*
aoqi@0 440 char c = getc();
aoqi@0 441
aoqi@0 442 if (!XmlChars.isLetter(c) && c != ':' && c != '_') {
aoqi@0 443 ungetc();
aoqi@0 444 return null;
aoqi@0 445 }
aoqi@0 446 return nameCharString(c);
aoqi@0 447 }
aoqi@0 448
aoqi@0 449 // Used when parsing enumerations
aoqi@0 450 private String getNmtoken()
aoqi@0 451 throws IOException, SAXException {
aoqi@0 452
aoqi@0 453 // [7] Nmtoken ::= (Namechar)+
aoqi@0 454 char c = getc();
aoqi@0 455 if (!XmlChars.isNameChar(c))
aoqi@0 456 fatal("P-006", new Object[]{new Character(c)});
aoqi@0 457 return nameCharString(c).name;
aoqi@0 458 }
aoqi@0 459
aoqi@0 460 // n.b. this gets used when parsing attribute values (for
aoqi@0 461 // internal references) so we can't use strTmp; it's also
aoqi@0 462 // a hotspot for CPU and memory in the parser (called at least
aoqi@0 463 // once for each element) so this has been optimized a bit.
aoqi@0 464
aoqi@0 465 private NameCacheEntry nameCharString(char c)
aoqi@0 466 throws IOException, SAXException {
aoqi@0 467
aoqi@0 468 int i = 1;
aoqi@0 469
aoqi@0 470 nameTmp[0] = c;
aoqi@0 471 for (; ;) {
aoqi@0 472 if ((c = in.getNameChar()) == 0)
aoqi@0 473 break;
aoqi@0 474 if (i >= nameTmp.length) {
aoqi@0 475 char tmp [] = new char[nameTmp.length + 10];
aoqi@0 476 System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length);
aoqi@0 477 nameTmp = tmp;
aoqi@0 478 }
aoqi@0 479 nameTmp[i++] = c;
aoqi@0 480 }
aoqi@0 481 return nameCache.lookupEntry(nameTmp, i);
aoqi@0 482 }
aoqi@0 483
aoqi@0 484 //
aoqi@0 485 // much similarity between parsing entity values in DTD
aoqi@0 486 // and attribute values (in DTD or content) ... both follow
aoqi@0 487 // literal parsing rules, newline canonicalization, etc
aoqi@0 488 //
aoqi@0 489 // leaves value in 'strTmp' ... either a "replacement text" (4.5),
aoqi@0 490 // or else partially normalized attribute value (the first bit
aoqi@0 491 // of 3.3.3's spec, without the "if not CDATA" bits).
aoqi@0 492 //
aoqi@0 493 private void parseLiteral(boolean isEntityValue)
aoqi@0 494 throws IOException, SAXException {
aoqi@0 495
aoqi@0 496 // [9] EntityValue ::=
aoqi@0 497 // '"' ([^"&%] | Reference | PEReference)* '"'
aoqi@0 498 // | "'" ([^'&%] | Reference | PEReference)* "'"
aoqi@0 499 // [10] AttValue ::=
aoqi@0 500 // '"' ([^"&] | Reference )* '"'
aoqi@0 501 // | "'" ([^'&] | Reference )* "'"
aoqi@0 502 char quote = getc();
aoqi@0 503 char c;
aoqi@0 504 InputEntity source = in;
aoqi@0 505
aoqi@0 506 if (quote != '\'' && quote != '"') {
aoqi@0 507 fatal("P-007");
aoqi@0 508 }
aoqi@0 509
aoqi@0 510 // don't report entity expansions within attributes,
aoqi@0 511 // they're reported "fully expanded" via SAX
aoqi@0 512 // isInAttribute = !isEntityValue;
aoqi@0 513
aoqi@0 514 // get value into strTmp
aoqi@0 515 strTmp = new StringBuffer();
aoqi@0 516
aoqi@0 517 // scan, allowing entity push/pop wherever ...
aoqi@0 518 // expanded entities can't terminate the literal!
aoqi@0 519 for (; ;) {
aoqi@0 520 if (in != source && in.isEOF()) {
aoqi@0 521 // we don't report end of parsed entities
aoqi@0 522 // within attributes (no SAX hooks)
aoqi@0 523 in = in.pop();
aoqi@0 524 continue;
aoqi@0 525 }
aoqi@0 526 if ((c = getc()) == quote && in == source) {
aoqi@0 527 break;
aoqi@0 528 }
aoqi@0 529
aoqi@0 530 //
aoqi@0 531 // Basically the "reference in attribute value"
aoqi@0 532 // row of the chart in section 4.4 of the spec
aoqi@0 533 //
aoqi@0 534 if (c == '&') {
aoqi@0 535 String entityName = maybeGetName();
aoqi@0 536
aoqi@0 537 if (entityName != null) {
aoqi@0 538 nextChar(';', "F-020", entityName);
aoqi@0 539
aoqi@0 540 // 4.4 says: bypass these here ... we'll catch
aoqi@0 541 // forbidden refs to unparsed entities on use
aoqi@0 542 if (isEntityValue) {
aoqi@0 543 strTmp.append('&');
aoqi@0 544 strTmp.append(entityName);
aoqi@0 545 strTmp.append(';');
aoqi@0 546 continue;
aoqi@0 547 }
aoqi@0 548 expandEntityInLiteral(entityName, entities, isEntityValue);
aoqi@0 549
aoqi@0 550
aoqi@0 551 // character references are always included immediately
aoqi@0 552 } else if ((c = getc()) == '#') {
aoqi@0 553 int tmp = parseCharNumber();
aoqi@0 554
aoqi@0 555 if (tmp > 0xffff) {
aoqi@0 556 tmp = surrogatesToCharTmp(tmp);
aoqi@0 557 strTmp.append(charTmp[0]);
aoqi@0 558 if (tmp == 2)
aoqi@0 559 strTmp.append(charTmp[1]);
aoqi@0 560 } else
aoqi@0 561 strTmp.append((char) tmp);
aoqi@0 562 } else
aoqi@0 563 fatal("P-009");
aoqi@0 564 continue;
aoqi@0 565
aoqi@0 566 }
aoqi@0 567
aoqi@0 568 // expand parameter entities only within entity value literals
aoqi@0 569 if (c == '%' && isEntityValue) {
aoqi@0 570 String entityName = maybeGetName();
aoqi@0 571
aoqi@0 572 if (entityName != null) {
aoqi@0 573 nextChar(';', "F-021", entityName);
aoqi@0 574 expandEntityInLiteral(entityName, params, isEntityValue);
aoqi@0 575 continue;
aoqi@0 576 } else
aoqi@0 577 fatal("P-011");
aoqi@0 578 }
aoqi@0 579
aoqi@0 580 // For attribute values ...
aoqi@0 581 if (!isEntityValue) {
aoqi@0 582 // 3.3.3 says whitespace normalizes to space...
aoqi@0 583 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
aoqi@0 584 strTmp.append(' ');
aoqi@0 585 continue;
aoqi@0 586 }
aoqi@0 587
aoqi@0 588 // "<" not legal in parsed literals ...
aoqi@0 589 if (c == '<')
aoqi@0 590 fatal("P-012");
aoqi@0 591 }
aoqi@0 592
aoqi@0 593 strTmp.append(c);
aoqi@0 594 }
aoqi@0 595 // isInAttribute = false;
aoqi@0 596 }
aoqi@0 597
aoqi@0 598 // does a SINGLE expansion of the entity (often reparsed later)
aoqi@0 599 private void expandEntityInLiteral(String name, SimpleHashtable table,
aoqi@0 600 boolean isEntityValue)
aoqi@0 601 throws IOException, SAXException {
aoqi@0 602
aoqi@0 603 Object entity = table.get(name);
aoqi@0 604
aoqi@0 605 if (entity instanceof InternalEntity) {
aoqi@0 606 InternalEntity value = (InternalEntity) entity;
aoqi@0 607 pushReader(value.buf, name, !value.isPE);
aoqi@0 608
aoqi@0 609 } else if (entity instanceof ExternalEntity) {
aoqi@0 610 if (!isEntityValue) // must be a PE ...
aoqi@0 611 fatal("P-013", new Object[]{name});
aoqi@0 612 // XXX if this returns false ...
aoqi@0 613 pushReader((ExternalEntity) entity);
aoqi@0 614
aoqi@0 615 } else if (entity == null) {
aoqi@0 616 //
aoqi@0 617 // Note: much confusion about whether spec requires such
aoqi@0 618 // errors to be fatal in many cases, but none about whether
aoqi@0 619 // it allows "normal" errors to be unrecoverable!
aoqi@0 620 //
aoqi@0 621 fatal((table == params) ? "V-022" : "P-014",
aoqi@0 622 new Object[]{name});
aoqi@0 623 }
aoqi@0 624 }
aoqi@0 625
aoqi@0 626 // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
aoqi@0 627 // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>'
aoqi@0 628
aoqi@0 629 // NOTE: XML spec should explicitly say that PE ref syntax is
aoqi@0 630 // ignored in PIs, comments, SystemLiterals, and Pubid Literal
aoqi@0 631 // values ... can't process the XML spec's own DTD without doing
aoqi@0 632 // that for comments.
aoqi@0 633
aoqi@0 634 private String getQuotedString(String type, String extra)
aoqi@0 635 throws IOException, SAXException {
aoqi@0 636
aoqi@0 637 // use in.getc to bypass PE processing
aoqi@0 638 char quote = in.getc();
aoqi@0 639
aoqi@0 640 if (quote != '\'' && quote != '"')
aoqi@0 641 fatal("P-015", new Object[]{
aoqi@0 642 messages.getMessage(locale, type, new Object[]{extra})
aoqi@0 643 });
aoqi@0 644
aoqi@0 645 char c;
aoqi@0 646
aoqi@0 647 strTmp = new StringBuffer();
aoqi@0 648 while ((c = in.getc()) != quote)
aoqi@0 649 strTmp.append((char) c);
aoqi@0 650 return strTmp.toString();
aoqi@0 651 }
aoqi@0 652
aoqi@0 653
aoqi@0 654 private String parsePublicId() throws IOException, SAXException {
aoqi@0 655
aoqi@0 656 // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
aoqi@0 657 // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
aoqi@0 658 String retval = getQuotedString("F-033", null);
aoqi@0 659 for (int i = 0; i < retval.length(); i++) {
aoqi@0 660 char c = retval.charAt(i);
aoqi@0 661 if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
aoqi@0 662 && !(c >= 'A' && c <= 'Z')
aoqi@0 663 && !(c >= 'a' && c <= 'z'))
aoqi@0 664 fatal("P-016", new Object[]{new Character(c)});
aoqi@0 665 }
aoqi@0 666 strTmp = new StringBuffer();
aoqi@0 667 strTmp.append(retval);
aoqi@0 668 return normalize(false);
aoqi@0 669 }
aoqi@0 670
aoqi@0 671 // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
aoqi@0 672 // handled by: InputEntity.parsedContent()
aoqi@0 673
aoqi@0 674 private boolean maybeComment(boolean skipStart)
aoqi@0 675 throws IOException, SAXException {
aoqi@0 676
aoqi@0 677 // [15] Comment ::= '<!--'
aoqi@0 678 // ( (Char - '-') | ('-' (Char - '-'))*
aoqi@0 679 // '-->'
aoqi@0 680 if (!in.peek(skipStart ? "!--" : "<!--", null))
aoqi@0 681 return false;
aoqi@0 682
aoqi@0 683 boolean savedLexicalPE = doLexicalPE;
aoqi@0 684 boolean saveCommentText;
aoqi@0 685
aoqi@0 686 doLexicalPE = false;
aoqi@0 687 saveCommentText = false;
aoqi@0 688 if (saveCommentText)
aoqi@0 689 strTmp = new StringBuffer();
aoqi@0 690
aoqi@0 691 oneComment:
aoqi@0 692 for (; ;) {
aoqi@0 693 try {
aoqi@0 694 // bypass PE expansion, but permit PEs
aoqi@0 695 // to complete ... valid docs won't care.
aoqi@0 696 for (; ;) {
aoqi@0 697 int c = getc();
aoqi@0 698 if (c == '-') {
aoqi@0 699 c = getc();
aoqi@0 700 if (c != '-') {
aoqi@0 701 if (saveCommentText)
aoqi@0 702 strTmp.append('-');
aoqi@0 703 ungetc();
aoqi@0 704 continue;
aoqi@0 705 }
aoqi@0 706 nextChar('>', "F-022", null);
aoqi@0 707 break oneComment;
aoqi@0 708 }
aoqi@0 709 if (saveCommentText)
aoqi@0 710 strTmp.append((char) c);
aoqi@0 711 }
aoqi@0 712 } catch (EndOfInputException e) {
aoqi@0 713 //
aoqi@0 714 // This is fatal EXCEPT when we're processing a PE...
aoqi@0 715 // in which case a validating processor reports an error.
aoqi@0 716 // External PEs are easy to detect; internal ones we
aoqi@0 717 // infer by being an internal entity outside an element.
aoqi@0 718 //
aoqi@0 719 if (in.isInternal()) {
aoqi@0 720 error("V-021", null);
aoqi@0 721 }
aoqi@0 722 fatal("P-017");
aoqi@0 723 }
aoqi@0 724 }
aoqi@0 725 doLexicalPE = savedLexicalPE;
aoqi@0 726 if (saveCommentText)
aoqi@0 727 dtdHandler.comment(strTmp.toString());
aoqi@0 728 return true;
aoqi@0 729 }
aoqi@0 730
aoqi@0 731 private boolean maybePI(boolean skipStart)
aoqi@0 732 throws IOException, SAXException {
aoqi@0 733
aoqi@0 734 // [16] PI ::= '<?' PITarget
aoqi@0 735 // (S (Char* - (Char* '?>' Char*)))?
aoqi@0 736 // '?>'
aoqi@0 737 // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l')
aoqi@0 738 boolean savedLexicalPE = doLexicalPE;
aoqi@0 739
aoqi@0 740 if (!in.peek(skipStart ? "?" : "<?", null))
aoqi@0 741 return false;
aoqi@0 742 doLexicalPE = false;
aoqi@0 743
aoqi@0 744 String target = maybeGetName();
aoqi@0 745
aoqi@0 746 if (target == null) {
aoqi@0 747 fatal("P-018");
aoqi@0 748 }
aoqi@0 749 if ("xml".equals(target)) {
aoqi@0 750 fatal("P-019");
aoqi@0 751 }
aoqi@0 752 if ("xml".equalsIgnoreCase(target)) {
aoqi@0 753 fatal("P-020", new Object[]{target});
aoqi@0 754 }
aoqi@0 755
aoqi@0 756 if (maybeWhitespace()) {
aoqi@0 757 strTmp = new StringBuffer();
aoqi@0 758 try {
aoqi@0 759 for (; ;) {
aoqi@0 760 // use in.getc to bypass PE processing
aoqi@0 761 char c = in.getc();
aoqi@0 762 //Reached the end of PI.
aoqi@0 763 if (c == '?' && in.peekc('>'))
aoqi@0 764 break;
aoqi@0 765 strTmp.append(c);
aoqi@0 766 }
aoqi@0 767 } catch (EndOfInputException e) {
aoqi@0 768 fatal("P-021");
aoqi@0 769 }
aoqi@0 770 dtdHandler.processingInstruction(target, strTmp.toString());
aoqi@0 771 } else {
aoqi@0 772 if (!in.peek("?>", null)) {
aoqi@0 773 fatal("P-022");
aoqi@0 774 }
aoqi@0 775 dtdHandler.processingInstruction(target, "");
aoqi@0 776 }
aoqi@0 777
aoqi@0 778 doLexicalPE = savedLexicalPE;
aoqi@0 779 return true;
aoqi@0 780 }
aoqi@0 781
aoqi@0 782 // [18] CDSect ::= CDStart CData CDEnd
aoqi@0 783 // [19] CDStart ::= '<![CDATA['
aoqi@0 784 // [20] CData ::= (Char* - (Char* ']]>' Char*))
aoqi@0 785 // [21] CDEnd ::= ']]>'
aoqi@0 786 //
aoqi@0 787 // ... handled by InputEntity.unparsedContent()
aoqi@0 788
aoqi@0 789 // collapsing several rules together ...
aoqi@0 790 // simpler than attribute literals -- no reference parsing!
aoqi@0 791 private String maybeReadAttribute(String name, boolean must)
aoqi@0 792 throws IOException, SAXException {
aoqi@0 793
aoqi@0 794 // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\"
aoqi@0 795 // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\"
aoqi@0 796 // [32] SDDecl ::= S 'standalone' Eq \'|\" ... \'|\"
aoqi@0 797 if (!maybeWhitespace()) {
aoqi@0 798 if (!must) {
aoqi@0 799 return null;
aoqi@0 800 }
aoqi@0 801 fatal("P-024", new Object[]{name});
aoqi@0 802 // NOTREACHED
aoqi@0 803 }
aoqi@0 804
aoqi@0 805 if (!peek(name)) {
aoqi@0 806 if (must) {
aoqi@0 807 fatal("P-024", new Object[]{name});
aoqi@0 808 } else {
aoqi@0 809 // To ensure that the whitespace is there so that when we
aoqi@0 810 // check for the next attribute we assure that the
aoqi@0 811 // whitespace still exists.
aoqi@0 812 ungetc();
aoqi@0 813 return null;
aoqi@0 814 }
aoqi@0 815 }
aoqi@0 816
aoqi@0 817 // [25] Eq ::= S? '=' S?
aoqi@0 818 maybeWhitespace();
aoqi@0 819 nextChar('=', "F-023", null);
aoqi@0 820 maybeWhitespace();
aoqi@0 821
aoqi@0 822 return getQuotedString("F-035", name);
aoqi@0 823 }
aoqi@0 824
aoqi@0 825 private void readVersion(boolean must, String versionNum)
aoqi@0 826 throws IOException, SAXException {
aoqi@0 827
aoqi@0 828 String value = maybeReadAttribute("version", must);
aoqi@0 829
aoqi@0 830 // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+
aoqi@0 831
aoqi@0 832 if (must && value == null)
aoqi@0 833 fatal("P-025", new Object[]{versionNum});
aoqi@0 834 if (value != null) {
aoqi@0 835 int length = value.length();
aoqi@0 836 for (int i = 0; i < length; i++) {
aoqi@0 837 char c = value.charAt(i);
aoqi@0 838 if (!((c >= '0' && c <= '9')
aoqi@0 839 || c == '_' || c == '.'
aoqi@0 840 || (c >= 'a' && c <= 'z')
aoqi@0 841 || (c >= 'A' && c <= 'Z')
aoqi@0 842 || c == ':' || c == '-')
aoqi@0 843 )
aoqi@0 844 fatal("P-026", new Object[]{value});
aoqi@0 845 }
aoqi@0 846 }
aoqi@0 847 if (value != null && !value.equals(versionNum))
aoqi@0 848 error("P-027", new Object[]{versionNum, value});
aoqi@0 849 }
aoqi@0 850
aoqi@0 851 // common code used by most markup declarations
aoqi@0 852 // ... S (Q)Name ...
aoqi@0 853 private String getMarkupDeclname(String roleId, boolean qname)
aoqi@0 854 throws IOException, SAXException {
aoqi@0 855
aoqi@0 856 String name;
aoqi@0 857
aoqi@0 858 whitespace(roleId);
aoqi@0 859 name = maybeGetName();
aoqi@0 860 if (name == null)
aoqi@0 861 fatal("P-005", new Object[]
aoqi@0 862 {messages.getMessage(locale, roleId)});
aoqi@0 863 return name;
aoqi@0 864 }
aoqi@0 865
aoqi@0 866 private boolean maybeMarkupDecl()
aoqi@0 867 throws IOException, SAXException {
aoqi@0 868
aoqi@0 869 // [29] markupdecl ::= elementdecl | Attlistdecl
aoqi@0 870 // | EntityDecl | NotationDecl | PI | Comment
aoqi@0 871 return maybeElementDecl()
aoqi@0 872 || maybeAttlistDecl()
aoqi@0 873 || maybeEntityDecl()
aoqi@0 874 || maybeNotationDecl()
aoqi@0 875 || maybePI(false)
aoqi@0 876 || maybeComment(false);
aoqi@0 877 }
aoqi@0 878
aoqi@0 879 private static final String XmlLang = "xml:lang";
aoqi@0 880
aoqi@0 881 private boolean isXmlLang(String value) {
aoqi@0 882
aoqi@0 883 // [33] LanguageId ::= Langcode ('-' Subcode)*
aoqi@0 884 // [34] Langcode ::= ISO639Code | IanaCode | UserCode
aoqi@0 885 // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z]
aoqi@0 886 // [36] IanaCode ::= [iI] '-' SubCode
aoqi@0 887 // [37] UserCode ::= [xX] '-' SubCode
aoqi@0 888 // [38] SubCode ::= [a-zA-Z]+
aoqi@0 889
aoqi@0 890 // the ISO and IANA codes (and subcodes) are registered,
aoqi@0 891 // but that's neither a WF nor a validity constraint.
aoqi@0 892
aoqi@0 893 int nextSuffix;
aoqi@0 894 char c;
aoqi@0 895
aoqi@0 896 if (value.length() < 2)
aoqi@0 897 return false;
aoqi@0 898 c = value.charAt(1);
aoqi@0 899 if (c == '-') { // IANA, or user, code
aoqi@0 900 c = value.charAt(0);
aoqi@0 901 if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X'))
aoqi@0 902 return false;
aoqi@0 903 nextSuffix = 1;
aoqi@0 904 } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
aoqi@0 905 // 2 letter ISO code, or error
aoqi@0 906 c = value.charAt(0);
aoqi@0 907 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
aoqi@0 908 return false;
aoqi@0 909 nextSuffix = 2;
aoqi@0 910 } else
aoqi@0 911 return false;
aoqi@0 912
aoqi@0 913 // here "suffix" ::= '-' [a-zA-Z]+ suffix*
aoqi@0 914 while (nextSuffix < value.length()) {
aoqi@0 915 c = value.charAt(nextSuffix);
aoqi@0 916 if (c != '-')
aoqi@0 917 break;
aoqi@0 918 while (++nextSuffix < value.length()) {
aoqi@0 919 c = value.charAt(nextSuffix);
aoqi@0 920 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
aoqi@0 921 break;
aoqi@0 922 }
aoqi@0 923 }
aoqi@0 924 return value.length() == nextSuffix && c != '-';
aoqi@0 925 }
aoqi@0 926
aoqi@0 927
aoqi@0 928 //
aoqi@0 929 // CHAPTER 3: Logical Structures
aoqi@0 930 //
aoqi@0 931
aoqi@0 932 /**
aoqi@0 933 * To validate, subclassers should at this time make sure that
aoqi@0 934 * values are of the declared types:<UL>
aoqi@0 935 * <LI> ID and IDREF(S) values are Names
aoqi@0 936 * <LI> NMTOKEN(S) are Nmtokens
aoqi@0 937 * <LI> ENUMERATION values match one of the tokens
aoqi@0 938 * <LI> NOTATION values match a notation name
aoqi@0 939 * <LI> ENTITIY(IES) values match an unparsed external entity
aoqi@0 940 * </UL>
aoqi@0 941 * <p/>
aoqi@0 942 * <P> Separately, make sure IDREF values match some ID
aoqi@0 943 * provided in the document (in the afterRoot method).
aoqi@0 944 */
aoqi@0 945 /* void validateAttributeSyntax (Attribute attr, String value)
aoqi@0 946 throws DTDParseException {
aoqi@0 947 // ID, IDREF(S) ... values are Names
aoqi@0 948 if (Attribute.ID == attr.type()) {
aoqi@0 949 if (!XmlNames.isName (value))
aoqi@0 950 error ("V-025", new Object [] { value });
aoqi@0 951
aoqi@0 952 Boolean b = (Boolean) ids.getNonInterned (value);
aoqi@0 953 if (b == null || b.equals (Boolean.FALSE))
aoqi@0 954 ids.put (value.intern (), Boolean.TRUE);
aoqi@0 955 else
aoqi@0 956 error ("V-026", new Object [] { value });
aoqi@0 957
aoqi@0 958 } else if (Attribute.IDREF == attr.type()) {
aoqi@0 959 if (!XmlNames.isName (value))
aoqi@0 960 error ("V-027", new Object [] { value });
aoqi@0 961
aoqi@0 962 Boolean b = (Boolean) ids.getNonInterned (value);
aoqi@0 963 if (b == null)
aoqi@0 964 ids.put (value.intern (), Boolean.FALSE);
aoqi@0 965
aoqi@0 966 } else if (Attribute.IDREFS == attr.type()) {
aoqi@0 967 StringTokenizer tokenizer = new StringTokenizer (value);
aoqi@0 968 Boolean b;
aoqi@0 969 boolean sawValue = false;
aoqi@0 970
aoqi@0 971 while (tokenizer.hasMoreTokens ()) {
aoqi@0 972 value = tokenizer.nextToken ();
aoqi@0 973 if (!XmlNames.isName (value))
aoqi@0 974 error ("V-027", new Object [] { value });
aoqi@0 975 b = (Boolean) ids.getNonInterned (value);
aoqi@0 976 if (b == null)
aoqi@0 977 ids.put (value.intern (), Boolean.FALSE);
aoqi@0 978 sawValue = true;
aoqi@0 979 }
aoqi@0 980 if (!sawValue)
aoqi@0 981 error ("V-039", null);
aoqi@0 982
aoqi@0 983
aoqi@0 984 // NMTOKEN(S) ... values are Nmtoken(s)
aoqi@0 985 } else if (Attribute.NMTOKEN == attr.type()) {
aoqi@0 986 if (!XmlNames.isNmtoken (value))
aoqi@0 987 error ("V-028", new Object [] { value });
aoqi@0 988
aoqi@0 989 } else if (Attribute.NMTOKENS == attr.type()) {
aoqi@0 990 StringTokenizer tokenizer = new StringTokenizer (value);
aoqi@0 991 boolean sawValue = false;
aoqi@0 992
aoqi@0 993 while (tokenizer.hasMoreTokens ()) {
aoqi@0 994 value = tokenizer.nextToken ();
aoqi@0 995 if (!XmlNames.isNmtoken (value))
aoqi@0 996 error ("V-028", new Object [] { value });
aoqi@0 997 sawValue = true;
aoqi@0 998 }
aoqi@0 999 if (!sawValue)
aoqi@0 1000 error ("V-032", null);
aoqi@0 1001
aoqi@0 1002 // ENUMERATION ... values match one of the tokens
aoqi@0 1003 } else if (Attribute.ENUMERATION == attr.type()) {
aoqi@0 1004 for (int i = 0; i < attr.values().length; i++)
aoqi@0 1005 if (value.equals (attr.values()[i]))
aoqi@0 1006 return;
aoqi@0 1007 error ("V-029", new Object [] { value });
aoqi@0 1008
aoqi@0 1009 // NOTATION values match a notation name
aoqi@0 1010 } else if (Attribute.NOTATION == attr.type()) {
aoqi@0 1011 //
aoqi@0 1012 // XXX XML 1.0 spec should probably list references to
aoqi@0 1013 // externally defined notations in standalone docs as
aoqi@0 1014 // validity errors. Ditto externally defined unparsed
aoqi@0 1015 // entities; neither should show up in attributes, else
aoqi@0 1016 // one needs to read the external declarations in order
aoqi@0 1017 // to make sense of the document (exactly what tagging
aoqi@0 1018 // a doc as "standalone" intends you won't need to do).
aoqi@0 1019 //
aoqi@0 1020 for (int i = 0; i < attr.values().length; i++)
aoqi@0 1021 if (value.equals (attr.values()[i]))
aoqi@0 1022 return;
aoqi@0 1023 error ("V-030", new Object [] { value });
aoqi@0 1024
aoqi@0 1025 // ENTITY(IES) values match an unparsed entity(ies)
aoqi@0 1026 } else if (Attribute.ENTITY == attr.type()) {
aoqi@0 1027 // see note above re standalone
aoqi@0 1028 if (!isUnparsedEntity (value))
aoqi@0 1029 error ("V-031", new Object [] { value });
aoqi@0 1030
aoqi@0 1031 } else if (Attribute.ENTITIES == attr.type()) {
aoqi@0 1032 StringTokenizer tokenizer = new StringTokenizer (value);
aoqi@0 1033 boolean sawValue = false;
aoqi@0 1034
aoqi@0 1035 while (tokenizer.hasMoreTokens ()) {
aoqi@0 1036 value = tokenizer.nextToken ();
aoqi@0 1037 // see note above re standalone
aoqi@0 1038 if (!isUnparsedEntity (value))
aoqi@0 1039 error ("V-031", new Object [] { value });
aoqi@0 1040 sawValue = true;
aoqi@0 1041 }
aoqi@0 1042 if (!sawValue)
aoqi@0 1043 error ("V-040", null);
aoqi@0 1044
aoqi@0 1045 } else if (Attribute.CDATA != attr.type())
aoqi@0 1046 throw new InternalError (attr.type());
aoqi@0 1047 }
aoqi@0 1048 */
aoqi@0 1049 /*
aoqi@0 1050 private boolean isUnparsedEntity (String name)
aoqi@0 1051 {
aoqi@0 1052 Object e = entities.getNonInterned (name);
aoqi@0 1053 if (e == null || !(e instanceof ExternalEntity))
aoqi@0 1054 return false;
aoqi@0 1055 return ((ExternalEntity)e).notation != null;
aoqi@0 1056 }
aoqi@0 1057 */
aoqi@0 1058 private boolean maybeElementDecl()
aoqi@0 1059 throws IOException, SAXException {
aoqi@0 1060
aoqi@0 1061 // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>'
aoqi@0 1062 // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
aoqi@0 1063 InputEntity start = peekDeclaration("!ELEMENT");
aoqi@0 1064
aoqi@0 1065 if (start == null)
aoqi@0 1066 return false;
aoqi@0 1067
aoqi@0 1068 // n.b. for content models where inter-element whitespace is
aoqi@0 1069 // ignorable, we mark that fact here.
aoqi@0 1070 String name = getMarkupDeclname("F-015", true);
aoqi@0 1071 // Element element = (Element) elements.get (name);
aoqi@0 1072 // boolean declEffective = false;
aoqi@0 1073
aoqi@0 1074 /*
aoqi@0 1075 if (element != null) {
aoqi@0 1076 if (element.contentModel() != null) {
aoqi@0 1077 error ("V-012", new Object [] { name });
aoqi@0 1078 } // else <!ATTLIST name ...> came first
aoqi@0 1079 } else {
aoqi@0 1080 element = new Element(name);
aoqi@0 1081 elements.put (element.name(), element);
aoqi@0 1082 declEffective = true;
aoqi@0 1083 }
aoqi@0 1084 */
aoqi@0 1085 if (declaredElements.contains(name))
aoqi@0 1086 error("V-012", new Object[]{name});
aoqi@0 1087 else {
aoqi@0 1088 declaredElements.add(name);
aoqi@0 1089 // declEffective = true;
aoqi@0 1090 }
aoqi@0 1091
aoqi@0 1092 short modelType;
aoqi@0 1093 whitespace("F-000");
aoqi@0 1094 if (peek(strEMPTY)) {
aoqi@0 1095 /// // leave element.contentModel as null for this case.
aoqi@0 1096 dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY);
aoqi@0 1097 } else if (peek(strANY)) {
aoqi@0 1098 /// element.setContentModel(new StringModel(StringModelType.ANY));
aoqi@0 1099 dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY);
aoqi@0 1100 } else {
aoqi@0 1101 modelType = getMixedOrChildren(name);
aoqi@0 1102 }
aoqi@0 1103
aoqi@0 1104 dtdHandler.endContentModel(name, modelType);
aoqi@0 1105
aoqi@0 1106 maybeWhitespace();
aoqi@0 1107 char c = getc();
aoqi@0 1108 if (c != '>')
aoqi@0 1109 fatal("P-036", new Object[]{name, new Character(c)});
aoqi@0 1110 if (start != in)
aoqi@0 1111 error("V-013", null);
aoqi@0 1112
aoqi@0 1113 /// dtdHandler.elementDecl(element);
aoqi@0 1114
aoqi@0 1115 return true;
aoqi@0 1116 }
aoqi@0 1117
aoqi@0 1118 // We're leaving the content model as a regular expression;
aoqi@0 1119 // it's an efficient natural way to express such things, and
aoqi@0 1120 // libraries often interpret them. No whitespace in the
aoqi@0 1121 // model we store, though!
aoqi@0 1122
aoqi@0 1123 /**
aoqi@0 1124 * returns content model type.
aoqi@0 1125 */
aoqi@0 1126 private short getMixedOrChildren(String elementName/*Element element*/)
aoqi@0 1127 throws IOException, SAXException {
aoqi@0 1128
aoqi@0 1129 InputEntity start;
aoqi@0 1130
aoqi@0 1131 // [47] children ::= (choice|seq) ('?'|'*'|'+')?
aoqi@0 1132 strTmp = new StringBuffer();
aoqi@0 1133
aoqi@0 1134 nextChar('(', "F-028", elementName);
aoqi@0 1135 start = in;
aoqi@0 1136 maybeWhitespace();
aoqi@0 1137 strTmp.append('(');
aoqi@0 1138
aoqi@0 1139 short modelType;
aoqi@0 1140 if (peek("#PCDATA")) {
aoqi@0 1141 strTmp.append("#PCDATA");
aoqi@0 1142 dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED);
aoqi@0 1143 getMixed(elementName, start);
aoqi@0 1144 } else {
aoqi@0 1145 dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_CHILDREN);
aoqi@0 1146 getcps(elementName, start);
aoqi@0 1147 }
aoqi@0 1148
aoqi@0 1149 return modelType;
aoqi@0 1150 }
aoqi@0 1151
aoqi@0 1152 // '(' S? already consumed
aoqi@0 1153 // matching ')' must be in "start" entity if validating
aoqi@0 1154 private void getcps(/*Element element,*/String elementName, InputEntity start)
aoqi@0 1155 throws IOException, SAXException {
aoqi@0 1156
aoqi@0 1157 // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')?
aoqi@0 1158 // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')'
aoqi@0 1159 // [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')'
aoqi@0 1160 boolean decided = false;
aoqi@0 1161 char type = 0;
aoqi@0 1162 // ContentModel retval, temp, current;
aoqi@0 1163
aoqi@0 1164 // retval = temp = current = null;
aoqi@0 1165
aoqi@0 1166 dtdHandler.startModelGroup();
aoqi@0 1167
aoqi@0 1168 do {
aoqi@0 1169 String tag;
aoqi@0 1170
aoqi@0 1171 tag = maybeGetName();
aoqi@0 1172 if (tag != null) {
aoqi@0 1173 strTmp.append(tag);
aoqi@0 1174 // temp = new ElementModel(tag);
aoqi@0 1175 // getFrequency((RepeatableContent)temp);
aoqi@0 1176 ///->
aoqi@0 1177 dtdHandler.childElement(tag, getFrequency());
aoqi@0 1178 ///<-
aoqi@0 1179 } else if (peek("(")) {
aoqi@0 1180 InputEntity next = in;
aoqi@0 1181 strTmp.append('(');
aoqi@0 1182 maybeWhitespace();
aoqi@0 1183 // temp = getcps(element, next);
aoqi@0 1184 // getFrequency(temp);
aoqi@0 1185 ///->
aoqi@0 1186 getcps(elementName, next);
aoqi@0 1187 /// getFrequency(); <- this looks like a bug
aoqi@0 1188 ///<-
aoqi@0 1189 } else
aoqi@0 1190 fatal((type == 0) ? "P-039" :
aoqi@0 1191 ((type == ',') ? "P-037" : "P-038"),
aoqi@0 1192 new Object[]{new Character(getc())});
aoqi@0 1193
aoqi@0 1194 maybeWhitespace();
aoqi@0 1195 if (decided) {
aoqi@0 1196 char c = getc();
aoqi@0 1197
aoqi@0 1198 // if (current != null) {
aoqi@0 1199 // current.addChild(temp);
aoqi@0 1200 // }
aoqi@0 1201 if (c == type) {
aoqi@0 1202 strTmp.append(type);
aoqi@0 1203 maybeWhitespace();
aoqi@0 1204 reportConnector(type);
aoqi@0 1205 continue;
aoqi@0 1206 } else if (c == '\u0029') { // rparen
aoqi@0 1207 ungetc();
aoqi@0 1208 continue;
aoqi@0 1209 } else {
aoqi@0 1210 fatal((type == 0) ? "P-041" : "P-040",
aoqi@0 1211 new Object[]{
aoqi@0 1212 new Character(c),
aoqi@0 1213 new Character(type)
aoqi@0 1214 });
aoqi@0 1215 }
aoqi@0 1216 } else {
aoqi@0 1217 type = getc();
aoqi@0 1218 switch (type) {
aoqi@0 1219 case '|':
aoqi@0 1220 case ',':
aoqi@0 1221 reportConnector(type);
aoqi@0 1222 break;
aoqi@0 1223 default:
aoqi@0 1224 // retval = temp;
aoqi@0 1225 ungetc();
aoqi@0 1226 continue;
aoqi@0 1227 }
aoqi@0 1228 // retval = (ContentModel)current;
aoqi@0 1229 decided = true;
aoqi@0 1230 // current.addChild(temp);
aoqi@0 1231 strTmp.append(type);
aoqi@0 1232 }
aoqi@0 1233 maybeWhitespace();
aoqi@0 1234 } while (!peek(")"));
aoqi@0 1235
aoqi@0 1236 if (in != start)
aoqi@0 1237 error("V-014", new Object[]{elementName});
aoqi@0 1238 strTmp.append(')');
aoqi@0 1239
aoqi@0 1240 dtdHandler.endModelGroup(getFrequency());
aoqi@0 1241 // return retval;
aoqi@0 1242 }
aoqi@0 1243
aoqi@0 1244 private void reportConnector(char type) throws SAXException {
aoqi@0 1245 switch (type) {
aoqi@0 1246 case '|':
aoqi@0 1247 dtdHandler.connector(DTDEventListener.CHOICE); ///<-
aoqi@0 1248 return;
aoqi@0 1249 case ',':
aoqi@0 1250 dtdHandler.connector(DTDEventListener.SEQUENCE); ///<-
aoqi@0 1251 return;
aoqi@0 1252 default:
aoqi@0 1253 throw new Error(); //assertion failed.
aoqi@0 1254 }
aoqi@0 1255 }
aoqi@0 1256
aoqi@0 1257 private short getFrequency()
aoqi@0 1258 throws IOException, SAXException {
aoqi@0 1259
aoqi@0 1260 final char c = getc();
aoqi@0 1261
aoqi@0 1262 if (c == '?') {
aoqi@0 1263 strTmp.append(c);
aoqi@0 1264 return DTDEventListener.OCCURENCE_ZERO_OR_ONE;
aoqi@0 1265 // original.setRepeat(Repeat.ZERO_OR_ONE);
aoqi@0 1266 } else if (c == '+') {
aoqi@0 1267 strTmp.append(c);
aoqi@0 1268 return DTDEventListener.OCCURENCE_ONE_OR_MORE;
aoqi@0 1269 // original.setRepeat(Repeat.ONE_OR_MORE);
aoqi@0 1270 } else if (c == '*') {
aoqi@0 1271 strTmp.append(c);
aoqi@0 1272 return DTDEventListener.OCCURENCE_ZERO_OR_MORE;
aoqi@0 1273 // original.setRepeat(Repeat.ZERO_OR_MORE);
aoqi@0 1274 } else {
aoqi@0 1275 ungetc();
aoqi@0 1276 return DTDEventListener.OCCURENCE_ONCE;
aoqi@0 1277 }
aoqi@0 1278 }
aoqi@0 1279
aoqi@0 1280 // '(' S? '#PCDATA' already consumed
aoqi@0 1281 // matching ')' must be in "start" entity if validating
aoqi@0 1282 private void getMixed(String elementName, /*Element element,*/ InputEntity start)
aoqi@0 1283 throws IOException, SAXException {
aoqi@0 1284
aoqi@0 1285 // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
aoqi@0 1286 // | '(' S? '#PCDATA' S? ')'
aoqi@0 1287 maybeWhitespace();
aoqi@0 1288 if (peek("\u0029*") || peek("\u0029")) {
aoqi@0 1289 if (in != start)
aoqi@0 1290 error("V-014", new Object[]{elementName});
aoqi@0 1291 strTmp.append(')');
aoqi@0 1292 // element.setContentModel(new StringModel(StringModelType.PCDATA));
aoqi@0 1293 return;
aoqi@0 1294 }
aoqi@0 1295
aoqi@0 1296 ArrayList l = new ArrayList();
aoqi@0 1297 // l.add(new StringModel(StringModelType.PCDATA));
aoqi@0 1298
aoqi@0 1299
aoqi@0 1300 while (peek("|")) {
aoqi@0 1301 String name;
aoqi@0 1302
aoqi@0 1303 strTmp.append('|');
aoqi@0 1304 maybeWhitespace();
aoqi@0 1305
aoqi@0 1306 doLexicalPE = true;
aoqi@0 1307 name = maybeGetName();
aoqi@0 1308 if (name == null)
aoqi@0 1309 fatal("P-042", new Object[]
aoqi@0 1310 {elementName, Integer.toHexString(getc())});
aoqi@0 1311 if (l.contains(name)) {
aoqi@0 1312 error("V-015", new Object[]{name});
aoqi@0 1313 } else {
aoqi@0 1314 l.add(name);
aoqi@0 1315 dtdHandler.mixedElement(name);
aoqi@0 1316 }
aoqi@0 1317 strTmp.append(name);
aoqi@0 1318 maybeWhitespace();
aoqi@0 1319 }
aoqi@0 1320
aoqi@0 1321 if (!peek("\u0029*")) // right paren
aoqi@0 1322 fatal("P-043", new Object[]
aoqi@0 1323 {elementName, new Character(getc())});
aoqi@0 1324 if (in != start)
aoqi@0 1325 error("V-014", new Object[]{elementName});
aoqi@0 1326 strTmp.append(')');
aoqi@0 1327 // ChoiceModel cm = new ChoiceModel((Collection)l);
aoqi@0 1328 // cm.setRepeat(Repeat.ZERO_OR_MORE);
aoqi@0 1329 // element.setContentModel(cm);
aoqi@0 1330 }
aoqi@0 1331
aoqi@0 1332 private boolean maybeAttlistDecl()
aoqi@0 1333 throws IOException, SAXException {
aoqi@0 1334
aoqi@0 1335 // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
aoqi@0 1336 InputEntity start = peekDeclaration("!ATTLIST");
aoqi@0 1337
aoqi@0 1338 if (start == null)
aoqi@0 1339 return false;
aoqi@0 1340
aoqi@0 1341 String elementName = getMarkupDeclname("F-016", true);
aoqi@0 1342 // Element element = (Element) elements.get (name);
aoqi@0 1343
aoqi@0 1344 // if (element == null) {
aoqi@0 1345 // // not yet declared -- no problem.
aoqi@0 1346 // element = new Element(name);
aoqi@0 1347 // elements.put(name, element);
aoqi@0 1348 // }
aoqi@0 1349
aoqi@0 1350 while (!peek(">")) {
aoqi@0 1351
aoqi@0 1352 // [53] AttDef ::= S Name S AttType S DefaultDecl
aoqi@0 1353 // [54] AttType ::= StringType | TokenizedType | EnumeratedType
aoqi@0 1354
aoqi@0 1355 // look for global attribute definitions, don't expand for now...
aoqi@0 1356 maybeWhitespace();
aoqi@0 1357 char c = getc();
aoqi@0 1358 if (c == '%') {
aoqi@0 1359 String entityName = maybeGetName();
aoqi@0 1360 if (entityName != null) {
aoqi@0 1361 nextChar(';', "F-021", entityName);
aoqi@0 1362 whitespace("F-021");
aoqi@0 1363 continue;
aoqi@0 1364 } else
aoqi@0 1365 fatal("P-011");
aoqi@0 1366 }
aoqi@0 1367
aoqi@0 1368 ungetc();
aoqi@0 1369 // look for attribute name otherwise
aoqi@0 1370 String attName = maybeGetName();
aoqi@0 1371 if (attName == null) {
aoqi@0 1372 fatal("P-044", new Object[]{new Character(getc())});
aoqi@0 1373 }
aoqi@0 1374 whitespace("F-001");
aoqi@0 1375
aoqi@0 1376 /// Attribute a = new Attribute (name);
aoqi@0 1377
aoqi@0 1378 String typeName;
aoqi@0 1379 Vector values = null; // notation/enumeration values
aoqi@0 1380
aoqi@0 1381 // Note: use the type constants from Attribute
aoqi@0 1382 // so that "==" may be used (faster)
aoqi@0 1383
aoqi@0 1384 // [55] StringType ::= 'CDATA'
aoqi@0 1385 if (peek(TYPE_CDATA))
aoqi@0 1386 /// a.setType(Attribute.CDATA);
aoqi@0 1387 typeName = TYPE_CDATA;
aoqi@0 1388
aoqi@0 1389 // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS'
aoqi@0 1390 // | 'ENTITY' | 'ENTITIES'
aoqi@0 1391 // | 'NMTOKEN' | 'NMTOKENS'
aoqi@0 1392 // n.b. if "IDREFS" is there, both "ID" and "IDREF"
aoqi@0 1393 // match peekahead ... so this order matters!
aoqi@0 1394 else if (peek(TYPE_IDREFS))
aoqi@0 1395 typeName = TYPE_IDREFS;
aoqi@0 1396 else if (peek(TYPE_IDREF))
aoqi@0 1397 typeName = TYPE_IDREF;
aoqi@0 1398 else if (peek(TYPE_ID)) {
aoqi@0 1399 typeName = TYPE_ID;
aoqi@0 1400 // TODO: should implement this error check?
aoqi@0 1401 /// if (element.id() != null) {
aoqi@0 1402 /// error ("V-016", new Object [] { element.id() });
aoqi@0 1403 /// } else
aoqi@0 1404 /// element.setId(name);
aoqi@0 1405 } else if (peek(TYPE_ENTITY))
aoqi@0 1406 typeName = TYPE_ENTITY;
aoqi@0 1407 else if (peek(TYPE_ENTITIES))
aoqi@0 1408 typeName = TYPE_ENTITIES;
aoqi@0 1409 else if (peek(TYPE_NMTOKENS))
aoqi@0 1410 typeName = TYPE_NMTOKENS;
aoqi@0 1411 else if (peek(TYPE_NMTOKEN))
aoqi@0 1412 typeName = TYPE_NMTOKEN;
aoqi@0 1413
aoqi@0 1414 // [57] EnumeratedType ::= NotationType | Enumeration
aoqi@0 1415 // [58] NotationType ::= 'NOTATION' S '(' S? Name
aoqi@0 1416 // (S? '|' S? Name)* S? ')'
aoqi@0 1417 else if (peek(TYPE_NOTATION)) {
aoqi@0 1418 typeName = TYPE_NOTATION;
aoqi@0 1419 whitespace("F-002");
aoqi@0 1420 nextChar('(', "F-029", null);
aoqi@0 1421 maybeWhitespace();
aoqi@0 1422
aoqi@0 1423 values = new Vector();
aoqi@0 1424 do {
aoqi@0 1425 String name;
aoqi@0 1426 if ((name = maybeGetName()) == null)
aoqi@0 1427 fatal("P-068");
aoqi@0 1428 // permit deferred declarations
aoqi@0 1429 if (notations.get(name) == null)
aoqi@0 1430 notations.put(name, name);
aoqi@0 1431 values.addElement(name);
aoqi@0 1432 maybeWhitespace();
aoqi@0 1433 if (peek("|"))
aoqi@0 1434 maybeWhitespace();
aoqi@0 1435 } while (!peek(")"));
aoqi@0 1436 /// a.setValues(new String [v.size ()]);
aoqi@0 1437 /// for (int i = 0; i < v.size (); i++)
aoqi@0 1438 /// a.setValue(i, (String)v.elementAt(i));
aoqi@0 1439
aoqi@0 1440 // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')'
aoqi@0 1441 } else if (peek("(")) {
aoqi@0 1442 /// a.setType(Attribute.ENUMERATION);
aoqi@0 1443 typeName = TYPE_ENUMERATION;
aoqi@0 1444
aoqi@0 1445 maybeWhitespace();
aoqi@0 1446
aoqi@0 1447 /// Vector v = new Vector ();
aoqi@0 1448 values = new Vector();
aoqi@0 1449 do {
aoqi@0 1450 String name = getNmtoken();
aoqi@0 1451 /// v.addElement (name);
aoqi@0 1452 values.addElement(name);
aoqi@0 1453 maybeWhitespace();
aoqi@0 1454 if (peek("|"))
aoqi@0 1455 maybeWhitespace();
aoqi@0 1456 } while (!peek(")"));
aoqi@0 1457 /// a.setValues(new String [v.size ()]);
aoqi@0 1458 /// for (int i = 0; i < v.size (); i++)
aoqi@0 1459 /// a.setValue(i, (String)v.elementAt(i));
aoqi@0 1460 } else {
aoqi@0 1461 fatal("P-045",
aoqi@0 1462 new Object[]{attName, new Character(getc())});
aoqi@0 1463 typeName = null;
aoqi@0 1464 }
aoqi@0 1465
aoqi@0 1466 short attributeUse;
aoqi@0 1467 String defaultValue = null;
aoqi@0 1468
aoqi@0 1469 // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
aoqi@0 1470 // | (('#FIXED' S)? AttValue)
aoqi@0 1471 whitespace("F-003");
aoqi@0 1472 if (peek("#REQUIRED"))
aoqi@0 1473 attributeUse = DTDEventListener.USE_REQUIRED;
aoqi@0 1474 /// a.setIsRequired(true);
aoqi@0 1475 else if (peek("#FIXED")) {
aoqi@0 1476 /// if (a.type() == Attribute.ID)
aoqi@0 1477 if (typeName == TYPE_ID)
aoqi@0 1478 error("V-017", new Object[]{attName});
aoqi@0 1479 /// a.setIsFixed(true);
aoqi@0 1480 attributeUse = DTDEventListener.USE_FIXED;
aoqi@0 1481 whitespace("F-004");
aoqi@0 1482 parseLiteral(false);
aoqi@0 1483 /// if (a.type() != Attribute.CDATA)
aoqi@0 1484 /// a.setDefaultValue(normalize(false));
aoqi@0 1485 /// else
aoqi@0 1486 /// a.setDefaultValue(strTmp.toString());
aoqi@0 1487
aoqi@0 1488 if (typeName == TYPE_CDATA)
aoqi@0 1489 defaultValue = normalize(false);
aoqi@0 1490 else
aoqi@0 1491 defaultValue = strTmp.toString();
aoqi@0 1492
aoqi@0 1493 // TODO: implement this check
aoqi@0 1494 /// if (a.type() != Attribute.CDATA)
aoqi@0 1495 /// validateAttributeSyntax (a, a.defaultValue());
aoqi@0 1496 } else if (!peek("#IMPLIED")) {
aoqi@0 1497 attributeUse = DTDEventListener.USE_IMPLIED;
aoqi@0 1498
aoqi@0 1499 /// if (a.type() == Attribute.ID)
aoqi@0 1500 if (typeName == TYPE_ID)
aoqi@0 1501 error("V-018", new Object[]{attName});
aoqi@0 1502 parseLiteral(false);
aoqi@0 1503 /// if (a.type() != Attribute.CDATA)
aoqi@0 1504 /// a.setDefaultValue(normalize(false));
aoqi@0 1505 /// else
aoqi@0 1506 /// a.setDefaultValue(strTmp.toString());
aoqi@0 1507 if (typeName == TYPE_CDATA)
aoqi@0 1508 defaultValue = normalize(false);
aoqi@0 1509 else
aoqi@0 1510 defaultValue = strTmp.toString();
aoqi@0 1511
aoqi@0 1512 // TODO: implement this check
aoqi@0 1513 /// if (a.type() != Attribute.CDATA)
aoqi@0 1514 /// validateAttributeSyntax (a, a.defaultValue());
aoqi@0 1515 } else {
aoqi@0 1516 // TODO: this looks like an fatal error.
aoqi@0 1517 attributeUse = DTDEventListener.USE_NORMAL;
aoqi@0 1518 }
aoqi@0 1519
aoqi@0 1520 if (XmlLang.equals(attName)
aoqi@0 1521 && defaultValue/* a.defaultValue()*/ != null
aoqi@0 1522 && !isXmlLang(defaultValue/*a.defaultValue()*/))
aoqi@0 1523 error("P-033", new Object[]{defaultValue /*a.defaultValue()*/});
aoqi@0 1524
aoqi@0 1525 // TODO: isn't it an error to specify the same attribute twice?
aoqi@0 1526 /// if (!element.attributes().contains(a)) {
aoqi@0 1527 /// element.addAttribute(a);
aoqi@0 1528 /// dtdHandler.attributeDecl(a);
aoqi@0 1529 /// }
aoqi@0 1530
aoqi@0 1531 String[] v = (values != null) ? (String[]) values.toArray(new String[0]) : null;
aoqi@0 1532 dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue);
aoqi@0 1533 maybeWhitespace();
aoqi@0 1534 }
aoqi@0 1535 if (start != in)
aoqi@0 1536 error("V-013", null);
aoqi@0 1537 return true;
aoqi@0 1538 }
aoqi@0 1539
aoqi@0 1540 // used when parsing literal attribute values,
aoqi@0 1541 // or public identifiers.
aoqi@0 1542 //
aoqi@0 1543 // input in strTmp
aoqi@0 1544 private String normalize(boolean invalidIfNeeded) {
aoqi@0 1545
aoqi@0 1546 // this can allocate an extra string...
aoqi@0 1547
aoqi@0 1548 String s = strTmp.toString();
aoqi@0 1549 String s2 = s.trim();
aoqi@0 1550 boolean didStrip = false;
aoqi@0 1551
aoqi@0 1552 if (s != s2) {
aoqi@0 1553 s = s2;
aoqi@0 1554 s2 = null;
aoqi@0 1555 didStrip = true;
aoqi@0 1556 }
aoqi@0 1557 strTmp = new StringBuffer();
aoqi@0 1558 for (int i = 0; i < s.length(); i++) {
aoqi@0 1559 char c = s.charAt(i);
aoqi@0 1560 if (!XmlChars.isSpace(c)) {
aoqi@0 1561 strTmp.append(c);
aoqi@0 1562 continue;
aoqi@0 1563 }
aoqi@0 1564 strTmp.append(' ');
aoqi@0 1565 while (++i < s.length() && XmlChars.isSpace(s.charAt(i)))
aoqi@0 1566 didStrip = true;
aoqi@0 1567 i--;
aoqi@0 1568 }
aoqi@0 1569 if (didStrip)
aoqi@0 1570 return strTmp.toString();
aoqi@0 1571 else
aoqi@0 1572 return s;
aoqi@0 1573 }
aoqi@0 1574
aoqi@0 1575 private boolean maybeConditionalSect()
aoqi@0 1576 throws IOException, SAXException {
aoqi@0 1577
aoqi@0 1578 // [61] conditionalSect ::= includeSect | ignoreSect
aoqi@0 1579
aoqi@0 1580 if (!peek("<!["))
aoqi@0 1581 return false;
aoqi@0 1582
aoqi@0 1583 String keyword;
aoqi@0 1584 InputEntity start = in;
aoqi@0 1585
aoqi@0 1586 maybeWhitespace();
aoqi@0 1587
aoqi@0 1588 if ((keyword = maybeGetName()) == null)
aoqi@0 1589 fatal("P-046");
aoqi@0 1590 maybeWhitespace();
aoqi@0 1591 nextChar('[', "F-030", null);
aoqi@0 1592
aoqi@0 1593 // [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
aoqi@0 1594 // extSubsetDecl ']]>'
aoqi@0 1595 if ("INCLUDE".equals(keyword)) {
aoqi@0 1596 for (; ;) {
aoqi@0 1597 while (in.isEOF() && in != start)
aoqi@0 1598 in = in.pop();
aoqi@0 1599 if (in.isEOF()) {
aoqi@0 1600 error("V-020", null);
aoqi@0 1601 }
aoqi@0 1602 if (peek("]]>"))
aoqi@0 1603 break;
aoqi@0 1604
aoqi@0 1605 doLexicalPE = false;
aoqi@0 1606 if (maybeWhitespace())
aoqi@0 1607 continue;
aoqi@0 1608 if (maybePEReference())
aoqi@0 1609 continue;
aoqi@0 1610 doLexicalPE = true;
aoqi@0 1611 if (maybeMarkupDecl() || maybeConditionalSect())
aoqi@0 1612 continue;
aoqi@0 1613
aoqi@0 1614 fatal("P-047");
aoqi@0 1615 }
aoqi@0 1616
aoqi@0 1617 // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
aoqi@0 1618 // ignoreSectcontents ']]>'
aoqi@0 1619 // [64] ignoreSectcontents ::= Ignore ('<!['
aoqi@0 1620 // ignoreSectcontents ']]>' Ignore)*
aoqi@0 1621 // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
aoqi@0 1622 } else if ("IGNORE".equals(keyword)) {
aoqi@0 1623 int nestlevel = 1;
aoqi@0 1624 // ignoreSectcontents
aoqi@0 1625 doLexicalPE = false;
aoqi@0 1626 while (nestlevel > 0) {
aoqi@0 1627 char c = getc(); // will pop input entities
aoqi@0 1628 if (c == '<') {
aoqi@0 1629 if (peek("!["))
aoqi@0 1630 nestlevel++;
aoqi@0 1631 } else if (c == ']') {
aoqi@0 1632 if (peek("]>"))
aoqi@0 1633 nestlevel--;
aoqi@0 1634 } else
aoqi@0 1635 continue;
aoqi@0 1636 }
aoqi@0 1637 } else
aoqi@0 1638 fatal("P-048", new Object[]{keyword});
aoqi@0 1639 return true;
aoqi@0 1640 }
aoqi@0 1641
aoqi@0 1642
aoqi@0 1643 //
aoqi@0 1644 // CHAPTER 4: Physical Structures
aoqi@0 1645 //
aoqi@0 1646
aoqi@0 1647 // parse decimal or hex numeric character reference
aoqi@0 1648 private int parseCharNumber()
aoqi@0 1649 throws IOException, SAXException {
aoqi@0 1650
aoqi@0 1651 char c;
aoqi@0 1652 int retval = 0;
aoqi@0 1653
aoqi@0 1654 // n.b. we ignore overflow ...
aoqi@0 1655 if (getc() != 'x') {
aoqi@0 1656 ungetc();
aoqi@0 1657 for (; ;) {
aoqi@0 1658 c = getc();
aoqi@0 1659 if (c >= '0' && c <= '9') {
aoqi@0 1660 retval *= 10;
aoqi@0 1661 retval += (c - '0');
aoqi@0 1662 continue;
aoqi@0 1663 }
aoqi@0 1664 if (c == ';')
aoqi@0 1665 return retval;
aoqi@0 1666 fatal("P-049");
aoqi@0 1667 }
aoqi@0 1668 } else
aoqi@0 1669 for (; ;) {
aoqi@0 1670 c = getc();
aoqi@0 1671 if (c >= '0' && c <= '9') {
aoqi@0 1672 retval <<= 4;
aoqi@0 1673 retval += (c - '0');
aoqi@0 1674 continue;
aoqi@0 1675 }
aoqi@0 1676 if (c >= 'a' && c <= 'f') {
aoqi@0 1677 retval <<= 4;
aoqi@0 1678 retval += 10 + (c - 'a');
aoqi@0 1679 continue;
aoqi@0 1680 }
aoqi@0 1681 if (c >= 'A' && c <= 'F') {
aoqi@0 1682 retval <<= 4;
aoqi@0 1683 retval += 10 + (c - 'A');
aoqi@0 1684 continue;
aoqi@0 1685 }
aoqi@0 1686 if (c == ';')
aoqi@0 1687 return retval;
aoqi@0 1688 fatal("P-050");
aoqi@0 1689 }
aoqi@0 1690 }
aoqi@0 1691
aoqi@0 1692 // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE,
aoqi@0 1693 // though still subject to the 'Char' construct in XML
aoqi@0 1694 private int surrogatesToCharTmp(int ucs4)
aoqi@0 1695 throws SAXException {
aoqi@0 1696
aoqi@0 1697 if (ucs4 <= 0xffff) {
aoqi@0 1698 if (XmlChars.isChar(ucs4)) {
aoqi@0 1699 charTmp[0] = (char) ucs4;
aoqi@0 1700 return 1;
aoqi@0 1701 }
aoqi@0 1702 } else if (ucs4 <= 0x0010ffff) {
aoqi@0 1703 // we represent these as UNICODE surrogate pairs
aoqi@0 1704 ucs4 -= 0x10000;
aoqi@0 1705 charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff));
aoqi@0 1706 charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff));
aoqi@0 1707 return 2;
aoqi@0 1708 }
aoqi@0 1709 fatal("P-051", new Object[]{Integer.toHexString(ucs4)});
aoqi@0 1710 // NOTREACHED
aoqi@0 1711 return -1;
aoqi@0 1712 }
aoqi@0 1713
aoqi@0 1714 private boolean maybePEReference()
aoqi@0 1715 throws IOException, SAXException {
aoqi@0 1716
aoqi@0 1717 // This is the SYNTACTIC version of this construct.
aoqi@0 1718 // When processing external entities, there is also
aoqi@0 1719 // a LEXICAL version; see getc() and doLexicalPE.
aoqi@0 1720
aoqi@0 1721 // [69] PEReference ::= '%' Name ';'
aoqi@0 1722 if (!in.peekc('%'))
aoqi@0 1723 return false;
aoqi@0 1724
aoqi@0 1725 String name = maybeGetName();
aoqi@0 1726 Object entity;
aoqi@0 1727
aoqi@0 1728 if (name == null)
aoqi@0 1729 fatal("P-011");
aoqi@0 1730 nextChar(';', "F-021", name);
aoqi@0 1731 entity = params.get(name);
aoqi@0 1732
aoqi@0 1733 if (entity instanceof InternalEntity) {
aoqi@0 1734 InternalEntity value = (InternalEntity) entity;
aoqi@0 1735 pushReader(value.buf, name, false);
aoqi@0 1736
aoqi@0 1737 } else if (entity instanceof ExternalEntity) {
aoqi@0 1738 pushReader((ExternalEntity) entity);
aoqi@0 1739 externalParameterEntity((ExternalEntity) entity);
aoqi@0 1740
aoqi@0 1741 } else if (entity == null) {
aoqi@0 1742 error("V-022", new Object[]{name});
aoqi@0 1743 }
aoqi@0 1744 return true;
aoqi@0 1745 }
aoqi@0 1746
aoqi@0 1747 private boolean maybeEntityDecl()
aoqi@0 1748 throws IOException, SAXException {
aoqi@0 1749
aoqi@0 1750 // [70] EntityDecl ::= GEDecl | PEDecl
aoqi@0 1751 // [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
aoqi@0 1752 // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF S? '>'
aoqi@0 1753 // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
aoqi@0 1754 // [74] PEDef ::= EntityValue | ExternalID
aoqi@0 1755 //
aoqi@0 1756 InputEntity start = peekDeclaration("!ENTITY");
aoqi@0 1757
aoqi@0 1758 if (start == null)
aoqi@0 1759 return false;
aoqi@0 1760
aoqi@0 1761 String entityName;
aoqi@0 1762 SimpleHashtable defns;
aoqi@0 1763 ExternalEntity externalId;
aoqi@0 1764 boolean doStore;
aoqi@0 1765
aoqi@0 1766 // PE expansion gets selectively turned off several places:
aoqi@0 1767 // in ENTITY declarations (here), in comments, in PIs.
aoqi@0 1768
aoqi@0 1769 // Here, we allow PE entities to be declared, and allows
aoqi@0 1770 // literals to include PE refs without the added spaces
aoqi@0 1771 // required with their expansion in markup decls.
aoqi@0 1772
aoqi@0 1773 doLexicalPE = false;
aoqi@0 1774 whitespace("F-005");
aoqi@0 1775 if (in.peekc('%')) {
aoqi@0 1776 whitespace("F-006");
aoqi@0 1777 defns = params;
aoqi@0 1778 } else
aoqi@0 1779 defns = entities;
aoqi@0 1780
aoqi@0 1781 ungetc(); // leave some whitespace
aoqi@0 1782 doLexicalPE = true;
aoqi@0 1783 entityName = getMarkupDeclname("F-017", false);
aoqi@0 1784 whitespace("F-007");
aoqi@0 1785 externalId = maybeExternalID();
aoqi@0 1786
aoqi@0 1787 //
aoqi@0 1788 // first definition sticks ... e.g. internal subset PEs are used
aoqi@0 1789 // to override DTD defaults. It's also an "error" to incorrectly
aoqi@0 1790 // redefine builtin internal entities, but since reporting such
aoqi@0 1791 // errors is optional we only give warnings ("just in case") for
aoqi@0 1792 // non-parameter entities.
aoqi@0 1793 //
aoqi@0 1794 doStore = (defns.get(entityName) == null);
aoqi@0 1795 if (!doStore && defns == entities)
aoqi@0 1796 warning("P-054", new Object[]{entityName});
aoqi@0 1797
aoqi@0 1798 // internal entities
aoqi@0 1799 if (externalId == null) {
aoqi@0 1800 char value [];
aoqi@0 1801 InternalEntity entity;
aoqi@0 1802
aoqi@0 1803 doLexicalPE = false; // "ab%bar;cd" -maybe-> "abcd"
aoqi@0 1804 parseLiteral(true);
aoqi@0 1805 doLexicalPE = true;
aoqi@0 1806 if (doStore) {
aoqi@0 1807 value = new char[strTmp.length()];
aoqi@0 1808 if (value.length != 0)
aoqi@0 1809 strTmp.getChars(0, value.length, value, 0);
aoqi@0 1810 entity = new InternalEntity(entityName, value);
aoqi@0 1811 entity.isPE = (defns == params);
aoqi@0 1812 entity.isFromInternalSubset = false;
aoqi@0 1813 defns.put(entityName, entity);
aoqi@0 1814 if (defns == entities)
aoqi@0 1815 dtdHandler.internalGeneralEntityDecl(entityName,
aoqi@0 1816 new String(value));
aoqi@0 1817 }
aoqi@0 1818
aoqi@0 1819 // external entities (including unparsed)
aoqi@0 1820 } else {
aoqi@0 1821 // [76] NDataDecl ::= S 'NDATA' S Name
aoqi@0 1822 if (defns == entities && maybeWhitespace()
aoqi@0 1823 && peek("NDATA")) {
aoqi@0 1824 externalId.notation = getMarkupDeclname("F-018", false);
aoqi@0 1825
aoqi@0 1826 // flag undeclared notation for checking after
aoqi@0 1827 // the DTD is fully processed
aoqi@0 1828 if (notations.get(externalId.notation) == null)
aoqi@0 1829 notations.put(externalId.notation, Boolean.TRUE);
aoqi@0 1830 }
aoqi@0 1831 externalId.name = entityName;
aoqi@0 1832 externalId.isPE = (defns == params);
aoqi@0 1833 externalId.isFromInternalSubset = false;
aoqi@0 1834 if (doStore) {
aoqi@0 1835 defns.put(entityName, externalId);
aoqi@0 1836 if (externalId.notation != null)
aoqi@0 1837 dtdHandler.unparsedEntityDecl(entityName,
aoqi@0 1838 externalId.publicId, externalId.systemId,
aoqi@0 1839 externalId.notation);
aoqi@0 1840 else if (defns == entities)
aoqi@0 1841 dtdHandler.externalGeneralEntityDecl(entityName,
aoqi@0 1842 externalId.publicId, externalId.systemId);
aoqi@0 1843 }
aoqi@0 1844 }
aoqi@0 1845 maybeWhitespace();
aoqi@0 1846 nextChar('>', "F-031", entityName);
aoqi@0 1847 if (start != in)
aoqi@0 1848 error("V-013", null);
aoqi@0 1849 return true;
aoqi@0 1850 }
aoqi@0 1851
aoqi@0 1852 private ExternalEntity maybeExternalID()
aoqi@0 1853 throws IOException, SAXException {
aoqi@0 1854
aoqi@0 1855 // [75] ExternalID ::= 'SYSTEM' S SystemLiteral
aoqi@0 1856 // | 'PUBLIC' S' PubidLiteral S Systemliteral
aoqi@0 1857 String temp = null;
aoqi@0 1858 ExternalEntity retval;
aoqi@0 1859
aoqi@0 1860 if (peek("PUBLIC")) {
aoqi@0 1861 whitespace("F-009");
aoqi@0 1862 temp = parsePublicId();
aoqi@0 1863 } else if (!peek("SYSTEM"))
aoqi@0 1864 return null;
aoqi@0 1865
aoqi@0 1866 retval = new ExternalEntity(in);
aoqi@0 1867 retval.publicId = temp;
aoqi@0 1868 whitespace("F-008");
aoqi@0 1869 retval.systemId = parseSystemId();
aoqi@0 1870 return retval;
aoqi@0 1871 }
aoqi@0 1872
aoqi@0 1873 private String parseSystemId()
aoqi@0 1874 throws IOException, SAXException {
aoqi@0 1875
aoqi@0 1876 String uri = getQuotedString("F-034", null);
aoqi@0 1877 int temp = uri.indexOf(':');
aoqi@0 1878
aoqi@0 1879 // resolve relative URIs ... must do it here since
aoqi@0 1880 // it's relative to the source file holding the URI!
aoqi@0 1881
aoqi@0 1882 // "new java.net.URL (URL, string)" conforms to RFC 1630,
aoqi@0 1883 // but we can't use that except when the URI is a URL.
aoqi@0 1884 // The entity resolver is allowed to handle URIs that are
aoqi@0 1885 // not URLs, so we pass URIs through with scheme intact
aoqi@0 1886 if (temp == -1 || uri.indexOf('/') < temp) {
aoqi@0 1887 String baseURI;
aoqi@0 1888
aoqi@0 1889 baseURI = in.getSystemId();
aoqi@0 1890 if (baseURI == null)
aoqi@0 1891 fatal("P-055", new Object[]{uri});
aoqi@0 1892 if (uri.length() == 0)
aoqi@0 1893 uri = ".";
aoqi@0 1894 baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1);
aoqi@0 1895 if (uri.charAt(0) != '/')
aoqi@0 1896 uri = baseURI + uri;
aoqi@0 1897 else {
aoqi@0 1898 // XXX slashes at the beginning of a relative URI are
aoqi@0 1899 // a special case we don't handle.
aoqi@0 1900 throw new InternalError();
aoqi@0 1901 }
aoqi@0 1902
aoqi@0 1903 // letting other code map any "/xxx/../" or "/./" to "/",
aoqi@0 1904 // since all URIs must handle it the same.
aoqi@0 1905 }
aoqi@0 1906 // check for fragment ID in URI
aoqi@0 1907 if (uri.indexOf('#') != -1)
aoqi@0 1908 error("P-056", new Object[]{uri});
aoqi@0 1909 return uri;
aoqi@0 1910 }
aoqi@0 1911
aoqi@0 1912 private void maybeTextDecl()
aoqi@0 1913 throws IOException, SAXException {
aoqi@0 1914
aoqi@0 1915 // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
aoqi@0 1916 if (peek("<?xml")) {
aoqi@0 1917 readVersion(false, "1.0");
aoqi@0 1918 readEncoding(true);
aoqi@0 1919 maybeWhitespace();
aoqi@0 1920 if (!peek("?>"))
aoqi@0 1921 fatal("P-057");
aoqi@0 1922 }
aoqi@0 1923 }
aoqi@0 1924
aoqi@0 1925 private void externalParameterEntity(ExternalEntity next)
aoqi@0 1926 throws IOException, SAXException {
aoqi@0 1927
aoqi@0 1928 //
aoqi@0 1929 // Reap the intended benefits of standalone declarations:
aoqi@0 1930 // don't deal with external parameter entities, except to
aoqi@0 1931 // validate the standalone declaration.
aoqi@0 1932 //
aoqi@0 1933
aoqi@0 1934 // n.b. "in external parameter entities" (and external
aoqi@0 1935 // DTD subset, same grammar) parameter references can
aoqi@0 1936 // occur "within" markup declarations ... expansions can
aoqi@0 1937 // cross syntax rules. Flagged here; affects getc().
aoqi@0 1938
aoqi@0 1939 // [79] ExtPE ::= TextDecl? extSubsetDecl
aoqi@0 1940 // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
aoqi@0 1941 // | PEReference | S )*
aoqi@0 1942 InputEntity pe;
aoqi@0 1943
aoqi@0 1944 // XXX if this returns false ...
aoqi@0 1945
aoqi@0 1946 pe = in;
aoqi@0 1947 maybeTextDecl();
aoqi@0 1948 while (!pe.isEOF()) {
aoqi@0 1949 // pop internal PEs (and whitespace before/after)
aoqi@0 1950 if (in.isEOF()) {
aoqi@0 1951 in = in.pop();
aoqi@0 1952 continue;
aoqi@0 1953 }
aoqi@0 1954 doLexicalPE = false;
aoqi@0 1955 if (maybeWhitespace())
aoqi@0 1956 continue;
aoqi@0 1957 if (maybePEReference())
aoqi@0 1958 continue;
aoqi@0 1959 doLexicalPE = true;
aoqi@0 1960 if (maybeMarkupDecl() || maybeConditionalSect())
aoqi@0 1961 continue;
aoqi@0 1962 break;
aoqi@0 1963 }
aoqi@0 1964 // if (in != pe) throw new InternalError("who popped my PE?");
aoqi@0 1965 if (!pe.isEOF())
aoqi@0 1966 fatal("P-059", new Object[]{in.getName()});
aoqi@0 1967 }
aoqi@0 1968
aoqi@0 1969 private void readEncoding(boolean must)
aoqi@0 1970 throws IOException, SAXException {
aoqi@0 1971
aoqi@0 1972 // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
aoqi@0 1973 String name = maybeReadAttribute("encoding", must);
aoqi@0 1974
aoqi@0 1975 if (name == null)
aoqi@0 1976 return;
aoqi@0 1977 for (int i = 0; i < name.length(); i++) {
aoqi@0 1978 char c = name.charAt(i);
aoqi@0 1979 if ((c >= 'A' && c <= 'Z')
aoqi@0 1980 || (c >= 'a' && c <= 'z'))
aoqi@0 1981 continue;
aoqi@0 1982 if (i != 0
aoqi@0 1983 && ((c >= '0' && c <= '9')
aoqi@0 1984 || c == '-'
aoqi@0 1985 || c == '_'
aoqi@0 1986 || c == '.'
aoqi@0 1987 ))
aoqi@0 1988 continue;
aoqi@0 1989 fatal("P-060", new Object[]{new Character(c)});
aoqi@0 1990 }
aoqi@0 1991
aoqi@0 1992 //
aoqi@0 1993 // This should be the encoding in use, and it's even an error for
aoqi@0 1994 // it to be anything else (in certain cases that are impractical to
aoqi@0 1995 // to test, and may even be insufficient). So, we do the best we
aoqi@0 1996 // can, and warn if things look suspicious. Note that Java doesn't
aoqi@0 1997 // uniformly expose the encodings, and that the names it uses
aoqi@0 1998 // internally are nonstandard. Also, that the XML spec allows
aoqi@0 1999 // such "errors" not to be reported at all.
aoqi@0 2000 //
aoqi@0 2001 String currentEncoding = in.getEncoding();
aoqi@0 2002
aoqi@0 2003 if (currentEncoding != null
aoqi@0 2004 && !name.equalsIgnoreCase(currentEncoding))
aoqi@0 2005 warning("P-061", new Object[]{name, currentEncoding});
aoqi@0 2006 }
aoqi@0 2007
aoqi@0 2008 private boolean maybeNotationDecl()
aoqi@0 2009 throws IOException, SAXException {
aoqi@0 2010
aoqi@0 2011 // [82] NotationDecl ::= '<!NOTATION' S Name S
aoqi@0 2012 // (ExternalID | PublicID) S? '>'
aoqi@0 2013 // [83] PublicID ::= 'PUBLIC' S PubidLiteral
aoqi@0 2014 InputEntity start = peekDeclaration("!NOTATION");
aoqi@0 2015
aoqi@0 2016 if (start == null)
aoqi@0 2017 return false;
aoqi@0 2018
aoqi@0 2019 String name = getMarkupDeclname("F-019", false);
aoqi@0 2020 ExternalEntity entity = new ExternalEntity(in);
aoqi@0 2021
aoqi@0 2022 whitespace("F-011");
aoqi@0 2023 if (peek("PUBLIC")) {
aoqi@0 2024 whitespace("F-009");
aoqi@0 2025 entity.publicId = parsePublicId();
aoqi@0 2026 if (maybeWhitespace()) {
aoqi@0 2027 if (!peek(">"))
aoqi@0 2028 entity.systemId = parseSystemId();
aoqi@0 2029 else
aoqi@0 2030 ungetc();
aoqi@0 2031 }
aoqi@0 2032 } else if (peek("SYSTEM")) {
aoqi@0 2033 whitespace("F-008");
aoqi@0 2034 entity.systemId = parseSystemId();
aoqi@0 2035 } else
aoqi@0 2036 fatal("P-062");
aoqi@0 2037 maybeWhitespace();
aoqi@0 2038 nextChar('>', "F-032", name);
aoqi@0 2039 if (start != in)
aoqi@0 2040 error("V-013", null);
aoqi@0 2041 if (entity.systemId != null && entity.systemId.indexOf('#') != -1)
aoqi@0 2042 error("P-056", new Object[]{entity.systemId});
aoqi@0 2043
aoqi@0 2044 Object value = notations.get(name);
aoqi@0 2045 if (value != null && value instanceof ExternalEntity)
aoqi@0 2046 warning("P-063", new Object[]{name});
aoqi@0 2047
aoqi@0 2048 else {
aoqi@0 2049 notations.put(name, entity);
aoqi@0 2050 dtdHandler.notationDecl(name, entity.publicId,
aoqi@0 2051 entity.systemId);
aoqi@0 2052 }
aoqi@0 2053 return true;
aoqi@0 2054 }
aoqi@0 2055
aoqi@0 2056
aoqi@0 2057 ////////////////////////////////////////////////////////////////
aoqi@0 2058 //
aoqi@0 2059 // UTILITIES
aoqi@0 2060 //
aoqi@0 2061 ////////////////////////////////////////////////////////////////
aoqi@0 2062
aoqi@0 2063 private char getc() throws IOException, SAXException {
aoqi@0 2064
aoqi@0 2065 if (!doLexicalPE) {
aoqi@0 2066 char c = in.getc();
aoqi@0 2067 return c;
aoqi@0 2068 }
aoqi@0 2069
aoqi@0 2070 //
aoqi@0 2071 // External parameter entities get funky processing of '%param;'
aoqi@0 2072 // references. It's not clearly defined in the XML spec; but it
aoqi@0 2073 // boils down to having those refs be _lexical_ in most cases to
aoqi@0 2074 // include partial syntax productions. It also needs selective
aoqi@0 2075 // enabling; "<!ENTITY % foo ...>" must work, for example, and
aoqi@0 2076 // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd"
aoqi@0 2077 // if it's expanded in a literal, else "ab cd". PEs also do
aoqi@0 2078 // not expand within comments or PIs, and external PEs are only
aoqi@0 2079 // allowed to have markup decls (and so aren't handled lexically).
aoqi@0 2080 //
aoqi@0 2081 // This PE handling should be merged into maybeWhitespace, where
aoqi@0 2082 // it can be dealt with more consistently.
aoqi@0 2083 //
aoqi@0 2084 // Also, there are some validity constraints in this area.
aoqi@0 2085 //
aoqi@0 2086 char c;
aoqi@0 2087
aoqi@0 2088 while (in.isEOF()) {
aoqi@0 2089 if (in.isInternal() || (doLexicalPE && !in.isDocument()))
aoqi@0 2090 in = in.pop();
aoqi@0 2091 else {
aoqi@0 2092 fatal("P-064", new Object[]{in.getName()});
aoqi@0 2093 }
aoqi@0 2094 }
aoqi@0 2095 if ((c = in.getc()) == '%' && doLexicalPE) {
aoqi@0 2096 // PE ref ::= '%' name ';'
aoqi@0 2097 String name = maybeGetName();
aoqi@0 2098 Object entity;
aoqi@0 2099
aoqi@0 2100 if (name == null)
aoqi@0 2101 fatal("P-011");
aoqi@0 2102 nextChar(';', "F-021", name);
aoqi@0 2103 entity = params.get(name);
aoqi@0 2104
aoqi@0 2105 // push a magic "entity" before and after the
aoqi@0 2106 // real one, so ungetc() behaves uniformly
aoqi@0 2107 pushReader(" ".toCharArray(), null, false);
aoqi@0 2108 if (entity instanceof InternalEntity)
aoqi@0 2109 pushReader(((InternalEntity) entity).buf, name, false);
aoqi@0 2110 else if (entity instanceof ExternalEntity)
aoqi@0 2111 // PEs can't be unparsed!
aoqi@0 2112 // XXX if this returns false ...
aoqi@0 2113 pushReader((ExternalEntity) entity);
aoqi@0 2114 else if (entity == null)
aoqi@0 2115 // see note in maybePEReference re making this be nonfatal.
aoqi@0 2116 fatal("V-022");
aoqi@0 2117 else
aoqi@0 2118 throw new InternalError();
aoqi@0 2119 pushReader(" ".toCharArray(), null, false);
aoqi@0 2120 return in.getc();
aoqi@0 2121 }
aoqi@0 2122 return c;
aoqi@0 2123 }
aoqi@0 2124
aoqi@0 2125 private void ungetc() {
aoqi@0 2126
aoqi@0 2127 in.ungetc();
aoqi@0 2128 }
aoqi@0 2129
aoqi@0 2130 private boolean peek(String s)
aoqi@0 2131 throws IOException, SAXException {
aoqi@0 2132
aoqi@0 2133 return in.peek(s, null);
aoqi@0 2134 }
aoqi@0 2135
aoqi@0 2136 // Return the entity starting the specified declaration
aoqi@0 2137 // (for validating declaration nesting) else null.
aoqi@0 2138
aoqi@0 2139 private InputEntity peekDeclaration(String s)
aoqi@0 2140 throws IOException, SAXException {
aoqi@0 2141
aoqi@0 2142 InputEntity start;
aoqi@0 2143
aoqi@0 2144 if (!in.peekc('<'))
aoqi@0 2145 return null;
aoqi@0 2146 start = in;
aoqi@0 2147 if (in.peek(s, null))
aoqi@0 2148 return start;
aoqi@0 2149 in.ungetc();
aoqi@0 2150 return null;
aoqi@0 2151 }
aoqi@0 2152
aoqi@0 2153 private void nextChar(char c, String location, String near)
aoqi@0 2154 throws IOException, SAXException {
aoqi@0 2155
aoqi@0 2156 while (in.isEOF() && !in.isDocument())
aoqi@0 2157 in = in.pop();
aoqi@0 2158 if (!in.peekc(c))
aoqi@0 2159 fatal("P-008", new Object[]
aoqi@0 2160 {new Character(c),
aoqi@0 2161 messages.getMessage(locale, location),
aoqi@0 2162 (near == null ? "" : ('"' + near + '"'))});
aoqi@0 2163 }
aoqi@0 2164
aoqi@0 2165
aoqi@0 2166 private void pushReader(char buf [], String name, boolean isGeneral)
aoqi@0 2167 throws SAXException {
aoqi@0 2168
aoqi@0 2169 InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
aoqi@0 2170 r.init(buf, name, in, !isGeneral);
aoqi@0 2171 in = r;
aoqi@0 2172 }
aoqi@0 2173
aoqi@0 2174 private boolean pushReader(ExternalEntity next)
aoqi@0 2175 throws IOException, SAXException {
aoqi@0 2176
aoqi@0 2177 InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
aoqi@0 2178 InputSource s;
aoqi@0 2179 try {
aoqi@0 2180 s = next.getInputSource(resolver);
aoqi@0 2181 } catch (IOException e) {
aoqi@0 2182 String msg =
aoqi@0 2183 "unable to open the external entity from :" + next.systemId;
aoqi@0 2184 if (next.publicId != null)
aoqi@0 2185 msg += " (public id:" + next.publicId + ")";
aoqi@0 2186
aoqi@0 2187 SAXParseException spe = new SAXParseException(msg,
aoqi@0 2188 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e);
aoqi@0 2189 dtdHandler.fatalError(spe);
aoqi@0 2190 throw e;
aoqi@0 2191 }
aoqi@0 2192
aoqi@0 2193 r.init(s, next.name, in, next.isPE);
aoqi@0 2194 in = r;
aoqi@0 2195 return true;
aoqi@0 2196 }
aoqi@0 2197
aoqi@0 2198 public String getPublicId() {
aoqi@0 2199
aoqi@0 2200 return (in == null) ? null : in.getPublicId();
aoqi@0 2201 }
aoqi@0 2202
aoqi@0 2203 public String getSystemId() {
aoqi@0 2204
aoqi@0 2205 return (in == null) ? null : in.getSystemId();
aoqi@0 2206 }
aoqi@0 2207
aoqi@0 2208 public int getLineNumber() {
aoqi@0 2209
aoqi@0 2210 return (in == null) ? -1 : in.getLineNumber();
aoqi@0 2211 }
aoqi@0 2212
aoqi@0 2213 public int getColumnNumber() {
aoqi@0 2214
aoqi@0 2215 return (in == null) ? -1 : in.getColumnNumber();
aoqi@0 2216 }
aoqi@0 2217
aoqi@0 2218 // error handling convenience routines
aoqi@0 2219
aoqi@0 2220 private void warning(String messageId, Object parameters [])
aoqi@0 2221 throws SAXException {
aoqi@0 2222
aoqi@0 2223 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
aoqi@0 2224 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
aoqi@0 2225
aoqi@0 2226 dtdHandler.warning(e);
aoqi@0 2227 }
aoqi@0 2228
aoqi@0 2229 void error(String messageId, Object parameters [])
aoqi@0 2230 throws SAXException {
aoqi@0 2231
aoqi@0 2232 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
aoqi@0 2233 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
aoqi@0 2234
aoqi@0 2235 dtdHandler.error(e);
aoqi@0 2236 }
aoqi@0 2237
aoqi@0 2238 private void fatal(String messageId) throws SAXException {
aoqi@0 2239
aoqi@0 2240 fatal(messageId, null);
aoqi@0 2241 }
aoqi@0 2242
aoqi@0 2243 private void fatal(String messageId, Object parameters [])
aoqi@0 2244 throws SAXException {
aoqi@0 2245
aoqi@0 2246 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
aoqi@0 2247 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
aoqi@0 2248
aoqi@0 2249 dtdHandler.fatalError(e);
aoqi@0 2250
aoqi@0 2251 throw e;
aoqi@0 2252 }
aoqi@0 2253
aoqi@0 2254 //
aoqi@0 2255 // Map char arrays to strings ... cuts down both on memory and
aoqi@0 2256 // CPU usage for element/attribute/other names that are reused.
aoqi@0 2257 //
aoqi@0 2258 // Documents typically repeat names a lot, so we more or less
aoqi@0 2259 // intern all the strings within the document; since some strings
aoqi@0 2260 // are repeated in multiple documents (e.g. stylesheets) we go
aoqi@0 2261 // a bit further, and intern globally.
aoqi@0 2262 //
aoqi@0 2263 static class NameCache {
aoqi@0 2264 //
aoqi@0 2265 // Unless we auto-grow this, the default size should be a
aoqi@0 2266 // reasonable bit larger than needed for most XML files
aoqi@0 2267 // we've yet seen (and be prime). If it's too small, the
aoqi@0 2268 // penalty is just excess cache collisions.
aoqi@0 2269 //
aoqi@0 2270 NameCacheEntry hashtable [] = new NameCacheEntry[541];
aoqi@0 2271
aoqi@0 2272 //
aoqi@0 2273 // Usually we just want to get the 'symbol' for these chars
aoqi@0 2274 //
aoqi@0 2275 String lookup(char value [], int len) {
aoqi@0 2276
aoqi@0 2277 return lookupEntry(value, len).name;
aoqi@0 2278 }
aoqi@0 2279
aoqi@0 2280 //
aoqi@0 2281 // Sometimes we need to scan the chars in the resulting
aoqi@0 2282 // string, so there's an accessor which exposes them.
aoqi@0 2283 // (Mostly for element end tags.)
aoqi@0 2284 //
aoqi@0 2285 NameCacheEntry lookupEntry(char value [], int len) {
aoqi@0 2286
aoqi@0 2287 int index = 0;
aoqi@0 2288 NameCacheEntry entry;
aoqi@0 2289
aoqi@0 2290 // hashing to get index
aoqi@0 2291 for (int i = 0; i < len; i++)
aoqi@0 2292 index = index * 31 + value[i];
aoqi@0 2293 index &= 0x7fffffff;
aoqi@0 2294 index %= hashtable.length;
aoqi@0 2295
aoqi@0 2296 // return entry if one's there ...
aoqi@0 2297 for (entry = hashtable[index];
aoqi@0 2298 entry != null;
aoqi@0 2299 entry = entry.next) {
aoqi@0 2300 if (entry.matches(value, len))
aoqi@0 2301 return entry;
aoqi@0 2302 }
aoqi@0 2303
aoqi@0 2304 // else create new one
aoqi@0 2305 entry = new NameCacheEntry();
aoqi@0 2306 entry.chars = new char[len];
aoqi@0 2307 System.arraycopy(value, 0, entry.chars, 0, len);
aoqi@0 2308 entry.name = new String(entry.chars);
aoqi@0 2309 //
aoqi@0 2310 // NOTE: JDK 1.1 has a fixed size string intern table,
aoqi@0 2311 // with non-GC'd entries. It can panic here; that's a
aoqi@0 2312 // JDK problem, use 1.2 or later with many identifiers.
aoqi@0 2313 //
aoqi@0 2314 entry.name = entry.name.intern(); // "global" intern
aoqi@0 2315 entry.next = hashtable[index];
aoqi@0 2316 hashtable[index] = entry;
aoqi@0 2317 return entry;
aoqi@0 2318 }
aoqi@0 2319 }
aoqi@0 2320
aoqi@0 2321 static class NameCacheEntry {
aoqi@0 2322
aoqi@0 2323 String name;
aoqi@0 2324 char chars [];
aoqi@0 2325 NameCacheEntry next;
aoqi@0 2326
aoqi@0 2327 boolean matches(char value [], int len) {
aoqi@0 2328
aoqi@0 2329 if (chars.length != len)
aoqi@0 2330 return false;
aoqi@0 2331 for (int i = 0; i < len; i++)
aoqi@0 2332 if (value[i] != chars[i])
aoqi@0 2333 return false;
aoqi@0 2334 return true;
aoqi@0 2335 }
aoqi@0 2336 }
aoqi@0 2337
aoqi@0 2338 //
aoqi@0 2339 // Message catalog for diagnostics.
aoqi@0 2340 //
aoqi@0 2341 static final Catalog messages = new Catalog();
aoqi@0 2342
aoqi@0 2343 static final class Catalog extends MessageCatalog {
aoqi@0 2344
aoqi@0 2345 Catalog() {
aoqi@0 2346 super(DTDParser.class);
aoqi@0 2347 }
aoqi@0 2348 }
aoqi@0 2349
aoqi@0 2350 }

mercurial