Thu, 31 Aug 2017 15:18:52 +0800
merge
aoqi@0 | 1 | /* |
aoqi@0 | 2 | * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved. |
aoqi@0 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
aoqi@0 | 4 | * |
aoqi@0 | 5 | * This code is free software; you can redistribute it and/or modify it |
aoqi@0 | 6 | * under the terms of the GNU General Public License version 2 only, as |
aoqi@0 | 7 | * published by the Free Software Foundation. Oracle designates this |
aoqi@0 | 8 | * particular file as subject to the "Classpath" exception as provided |
aoqi@0 | 9 | * by Oracle in the LICENSE file that accompanied this code. |
aoqi@0 | 10 | * |
aoqi@0 | 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
aoqi@0 | 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
aoqi@0 | 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
aoqi@0 | 14 | * version 2 for more details (a copy is included in the LICENSE file that |
aoqi@0 | 15 | * accompanied this code). |
aoqi@0 | 16 | * |
aoqi@0 | 17 | * You should have received a copy of the GNU General Public License version |
aoqi@0 | 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
aoqi@0 | 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
aoqi@0 | 20 | * |
aoqi@0 | 21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
aoqi@0 | 22 | * or visit www.oracle.com if you need additional information or have any |
aoqi@0 | 23 | * questions. |
aoqi@0 | 24 | */ |
aoqi@0 | 25 | |
aoqi@0 | 26 | package com.sun.xml.internal.dtdparser; |
aoqi@0 | 27 | |
aoqi@0 | 28 | import org.xml.sax.EntityResolver; |
aoqi@0 | 29 | import org.xml.sax.InputSource; |
aoqi@0 | 30 | import org.xml.sax.Locator; |
aoqi@0 | 31 | import org.xml.sax.SAXException; |
aoqi@0 | 32 | import org.xml.sax.SAXParseException; |
aoqi@0 | 33 | |
aoqi@0 | 34 | import java.io.IOException; |
aoqi@0 | 35 | import java.util.ArrayList; |
aoqi@0 | 36 | import java.util.Enumeration; |
aoqi@0 | 37 | import java.util.Hashtable; |
aoqi@0 | 38 | import java.util.Locale; |
aoqi@0 | 39 | import java.util.Set; |
aoqi@0 | 40 | import java.util.Vector; |
aoqi@0 | 41 | |
aoqi@0 | 42 | /** |
aoqi@0 | 43 | * This implements parsing of XML 1.0 DTDs. |
aoqi@0 | 44 | * <p/> |
aoqi@0 | 45 | * This conforms to the portion of the XML 1.0 specification related |
aoqi@0 | 46 | * to the external DTD subset. |
aoqi@0 | 47 | * <p/> |
aoqi@0 | 48 | * For multi-language applications (such as web servers using XML |
aoqi@0 | 49 | * processing to create dynamic content), a method supports choosing |
aoqi@0 | 50 | * a locale for parser diagnostics which is both understood by the |
aoqi@0 | 51 | * message recipient and supported by the parser. |
aoqi@0 | 52 | * <p/> |
aoqi@0 | 53 | * This parser produces a stream of parse events. It supports some |
aoqi@0 | 54 | * features (exposing comments, CDATA sections, and entity references) |
aoqi@0 | 55 | * which are not required to be reported by conformant XML processors. |
aoqi@0 | 56 | * |
aoqi@0 | 57 | * @author David Brownell |
aoqi@0 | 58 | * @author Janet Koenig |
aoqi@0 | 59 | * @author Kohsuke KAWAGUCHI |
aoqi@0 | 60 | * @version $Id: DTDParser.java,v 1.2 2009/04/16 15:25:49 snajper Exp $ |
aoqi@0 | 61 | */ |
aoqi@0 | 62 | public class DTDParser { |
aoqi@0 | 63 | public final static String TYPE_CDATA = "CDATA"; |
aoqi@0 | 64 | public final static String TYPE_ID = "ID"; |
aoqi@0 | 65 | public final static String TYPE_IDREF = "IDREF"; |
aoqi@0 | 66 | public final static String TYPE_IDREFS = "IDREFS"; |
aoqi@0 | 67 | public final static String TYPE_ENTITY = "ENTITY"; |
aoqi@0 | 68 | public final static String TYPE_ENTITIES = "ENTITIES"; |
aoqi@0 | 69 | public final static String TYPE_NMTOKEN = "NMTOKEN"; |
aoqi@0 | 70 | public final static String TYPE_NMTOKENS = "NMTOKENS"; |
aoqi@0 | 71 | public final static String TYPE_NOTATION = "NOTATION"; |
aoqi@0 | 72 | public final static String TYPE_ENUMERATION = "ENUMERATION"; |
aoqi@0 | 73 | |
aoqi@0 | 74 | |
aoqi@0 | 75 | // stack of input entities being merged |
aoqi@0 | 76 | private InputEntity in; |
aoqi@0 | 77 | |
aoqi@0 | 78 | // temporaries reused during parsing |
aoqi@0 | 79 | private StringBuffer strTmp; |
aoqi@0 | 80 | private char nameTmp []; |
aoqi@0 | 81 | private NameCache nameCache; |
aoqi@0 | 82 | private char charTmp [] = new char[2]; |
aoqi@0 | 83 | |
aoqi@0 | 84 | // temporary DTD parsing state |
aoqi@0 | 85 | private boolean doLexicalPE; |
aoqi@0 | 86 | |
aoqi@0 | 87 | // DTD state, used during parsing |
aoqi@0 | 88 | // private SimpleHashtable elements = new SimpleHashtable (47); |
aoqi@0 | 89 | protected final Set declaredElements = new java.util.HashSet(); |
aoqi@0 | 90 | private SimpleHashtable params = new SimpleHashtable(7); |
aoqi@0 | 91 | |
aoqi@0 | 92 | // exposed to package-private subclass |
aoqi@0 | 93 | Hashtable notations = new Hashtable(7); |
aoqi@0 | 94 | SimpleHashtable entities = new SimpleHashtable(17); |
aoqi@0 | 95 | |
aoqi@0 | 96 | private SimpleHashtable ids = new SimpleHashtable(); |
aoqi@0 | 97 | |
aoqi@0 | 98 | // listeners for DTD parsing events |
aoqi@0 | 99 | private DTDEventListener dtdHandler; |
aoqi@0 | 100 | |
aoqi@0 | 101 | private EntityResolver resolver; |
aoqi@0 | 102 | private Locale locale; |
aoqi@0 | 103 | |
aoqi@0 | 104 | // string constants -- use these copies so "==" works |
aoqi@0 | 105 | // package private |
aoqi@0 | 106 | static final String strANY = "ANY"; |
aoqi@0 | 107 | static final String strEMPTY = "EMPTY"; |
aoqi@0 | 108 | |
aoqi@0 | 109 | /** |
aoqi@0 | 110 | * Used by applications to request locale for diagnostics. |
aoqi@0 | 111 | * |
aoqi@0 | 112 | * @param l The locale to use, or null to use system defaults |
aoqi@0 | 113 | * (which may include only message IDs). |
aoqi@0 | 114 | */ |
aoqi@0 | 115 | public void setLocale(Locale l) throws SAXException { |
aoqi@0 | 116 | |
aoqi@0 | 117 | if (l != null && !messages.isLocaleSupported(l.toString())) { |
aoqi@0 | 118 | throw new SAXException(messages.getMessage(locale, |
aoqi@0 | 119 | "P-078", new Object[]{l})); |
aoqi@0 | 120 | } |
aoqi@0 | 121 | locale = l; |
aoqi@0 | 122 | } |
aoqi@0 | 123 | |
aoqi@0 | 124 | /** |
aoqi@0 | 125 | * Returns the diagnostic locale. |
aoqi@0 | 126 | */ |
aoqi@0 | 127 | public Locale getLocale() { |
aoqi@0 | 128 | return locale; |
aoqi@0 | 129 | } |
aoqi@0 | 130 | |
aoqi@0 | 131 | /** |
aoqi@0 | 132 | * Chooses a client locale to use for diagnostics, using the first |
aoqi@0 | 133 | * language specified in the list that is supported by this parser. |
aoqi@0 | 134 | * That locale is then set using <a href="#setLocale(java.util.Locale)"> |
aoqi@0 | 135 | * setLocale()</a>. Such a list could be provided by a variety of user |
aoqi@0 | 136 | * preference mechanisms, including the HTTP <em>Accept-Language</em> |
aoqi@0 | 137 | * header field. |
aoqi@0 | 138 | * |
aoqi@0 | 139 | * @param languages Array of language specifiers, ordered with the most |
aoqi@0 | 140 | * preferable one at the front. For example, "en-ca" then "fr-ca", |
aoqi@0 | 141 | * followed by "zh_CN". Both RFC 1766 and Java styles are supported. |
aoqi@0 | 142 | * @return The chosen locale, or null. |
aoqi@0 | 143 | * @see MessageCatalog |
aoqi@0 | 144 | */ |
aoqi@0 | 145 | public Locale chooseLocale(String languages []) |
aoqi@0 | 146 | throws SAXException { |
aoqi@0 | 147 | |
aoqi@0 | 148 | Locale l = messages.chooseLocale(languages); |
aoqi@0 | 149 | |
aoqi@0 | 150 | if (l != null) { |
aoqi@0 | 151 | setLocale(l); |
aoqi@0 | 152 | } |
aoqi@0 | 153 | return l; |
aoqi@0 | 154 | } |
aoqi@0 | 155 | |
aoqi@0 | 156 | /** |
aoqi@0 | 157 | * Lets applications control entity resolution. |
aoqi@0 | 158 | */ |
aoqi@0 | 159 | public void setEntityResolver(EntityResolver r) { |
aoqi@0 | 160 | |
aoqi@0 | 161 | resolver = r; |
aoqi@0 | 162 | } |
aoqi@0 | 163 | |
aoqi@0 | 164 | /** |
aoqi@0 | 165 | * Returns the object used to resolve entities |
aoqi@0 | 166 | */ |
aoqi@0 | 167 | public EntityResolver getEntityResolver() { |
aoqi@0 | 168 | |
aoqi@0 | 169 | return resolver; |
aoqi@0 | 170 | } |
aoqi@0 | 171 | |
aoqi@0 | 172 | /** |
aoqi@0 | 173 | * Used by applications to set handling of DTD parsing events. |
aoqi@0 | 174 | */ |
aoqi@0 | 175 | public void setDtdHandler(DTDEventListener handler) { |
aoqi@0 | 176 | dtdHandler = handler; |
aoqi@0 | 177 | if (handler != null) |
aoqi@0 | 178 | handler.setDocumentLocator(new Locator() { |
aoqi@0 | 179 | public String getPublicId() { |
aoqi@0 | 180 | return DTDParser.this.getPublicId(); |
aoqi@0 | 181 | } |
aoqi@0 | 182 | |
aoqi@0 | 183 | public String getSystemId() { |
aoqi@0 | 184 | return DTDParser.this.getSystemId(); |
aoqi@0 | 185 | } |
aoqi@0 | 186 | |
aoqi@0 | 187 | public int getLineNumber() { |
aoqi@0 | 188 | return DTDParser.this.getLineNumber(); |
aoqi@0 | 189 | } |
aoqi@0 | 190 | |
aoqi@0 | 191 | public int getColumnNumber() { |
aoqi@0 | 192 | return DTDParser.this.getColumnNumber(); |
aoqi@0 | 193 | } |
aoqi@0 | 194 | }); |
aoqi@0 | 195 | } |
aoqi@0 | 196 | |
aoqi@0 | 197 | /** |
aoqi@0 | 198 | * Returns the handler used to for DTD parsing events. |
aoqi@0 | 199 | */ |
aoqi@0 | 200 | public DTDEventListener getDtdHandler() { |
aoqi@0 | 201 | return dtdHandler; |
aoqi@0 | 202 | } |
aoqi@0 | 203 | |
aoqi@0 | 204 | /** |
aoqi@0 | 205 | * Parse a DTD. |
aoqi@0 | 206 | */ |
aoqi@0 | 207 | public void parse(InputSource in) |
aoqi@0 | 208 | throws IOException, SAXException { |
aoqi@0 | 209 | init(); |
aoqi@0 | 210 | parseInternal(in); |
aoqi@0 | 211 | } |
aoqi@0 | 212 | |
aoqi@0 | 213 | /** |
aoqi@0 | 214 | * Parse a DTD. |
aoqi@0 | 215 | */ |
aoqi@0 | 216 | public void parse(String uri) |
aoqi@0 | 217 | throws IOException, SAXException { |
aoqi@0 | 218 | InputSource in; |
aoqi@0 | 219 | |
aoqi@0 | 220 | init(); |
aoqi@0 | 221 | // System.out.println ("parse (\"" + uri + "\")"); |
aoqi@0 | 222 | in = resolver.resolveEntity(null, uri); |
aoqi@0 | 223 | |
aoqi@0 | 224 | // If custom resolver punts resolution to parser, handle it ... |
aoqi@0 | 225 | if (in == null) { |
aoqi@0 | 226 | in = Resolver.createInputSource(new java.net.URL(uri), false); |
aoqi@0 | 227 | |
aoqi@0 | 228 | // ... or if custom resolver doesn't correctly construct the |
aoqi@0 | 229 | // input entity, patch it up enough so relative URIs work, and |
aoqi@0 | 230 | // issue a warning to minimize later confusion. |
aoqi@0 | 231 | } else if (in.getSystemId() == null) { |
aoqi@0 | 232 | warning("P-065", null); |
aoqi@0 | 233 | in.setSystemId(uri); |
aoqi@0 | 234 | } |
aoqi@0 | 235 | |
aoqi@0 | 236 | parseInternal(in); |
aoqi@0 | 237 | } |
aoqi@0 | 238 | |
aoqi@0 | 239 | // makes sure the parser is reset to "before a document" |
aoqi@0 | 240 | private void init() { |
aoqi@0 | 241 | in = null; |
aoqi@0 | 242 | |
aoqi@0 | 243 | // alloc temporary data used in parsing |
aoqi@0 | 244 | strTmp = new StringBuffer(); |
aoqi@0 | 245 | nameTmp = new char[20]; |
aoqi@0 | 246 | nameCache = new NameCache(); |
aoqi@0 | 247 | |
aoqi@0 | 248 | // reset doc info |
aoqi@0 | 249 | // isInAttribute = false; |
aoqi@0 | 250 | |
aoqi@0 | 251 | doLexicalPE = false; |
aoqi@0 | 252 | |
aoqi@0 | 253 | entities.clear(); |
aoqi@0 | 254 | notations.clear(); |
aoqi@0 | 255 | params.clear(); |
aoqi@0 | 256 | // elements.clear (); |
aoqi@0 | 257 | declaredElements.clear(); |
aoqi@0 | 258 | |
aoqi@0 | 259 | // initialize predefined references ... re-interpreted later |
aoqi@0 | 260 | builtin("amp", "&"); |
aoqi@0 | 261 | builtin("lt", "<"); |
aoqi@0 | 262 | builtin("gt", ">"); |
aoqi@0 | 263 | builtin("quot", "\""); |
aoqi@0 | 264 | builtin("apos", "'"); |
aoqi@0 | 265 | |
aoqi@0 | 266 | if (locale == null) |
aoqi@0 | 267 | locale = Locale.getDefault(); |
aoqi@0 | 268 | if (resolver == null) |
aoqi@0 | 269 | resolver = new Resolver(); |
aoqi@0 | 270 | if (dtdHandler == null) |
aoqi@0 | 271 | dtdHandler = new DTDHandlerBase(); |
aoqi@0 | 272 | } |
aoqi@0 | 273 | |
aoqi@0 | 274 | private void builtin(String entityName, String entityValue) { |
aoqi@0 | 275 | InternalEntity entity; |
aoqi@0 | 276 | entity = new InternalEntity(entityName, entityValue.toCharArray()); |
aoqi@0 | 277 | entities.put(entityName, entity); |
aoqi@0 | 278 | } |
aoqi@0 | 279 | |
aoqi@0 | 280 | |
aoqi@0 | 281 | //////////////////////////////////////////////////////////////// |
aoqi@0 | 282 | // |
aoqi@0 | 283 | // parsing is by recursive descent, code roughly |
aoqi@0 | 284 | // following the BNF rules except tweaked for simple |
aoqi@0 | 285 | // lookahead. rules are more or less in numeric order, |
aoqi@0 | 286 | // except where code sharing suggests other structures. |
aoqi@0 | 287 | // |
aoqi@0 | 288 | // a classic benefit of recursive descent parsers: it's |
aoqi@0 | 289 | // relatively easy to get diagnostics that make sense. |
aoqi@0 | 290 | // |
aoqi@0 | 291 | //////////////////////////////////////////////////////////////// |
aoqi@0 | 292 | |
aoqi@0 | 293 | |
aoqi@0 | 294 | private void parseInternal(InputSource input) |
aoqi@0 | 295 | throws IOException, SAXException { |
aoqi@0 | 296 | |
aoqi@0 | 297 | if (input == null) |
aoqi@0 | 298 | fatal("P-000"); |
aoqi@0 | 299 | |
aoqi@0 | 300 | try { |
aoqi@0 | 301 | in = InputEntity.getInputEntity(dtdHandler, locale); |
aoqi@0 | 302 | in.init(input, null, null, false); |
aoqi@0 | 303 | |
aoqi@0 | 304 | dtdHandler.startDTD(in); |
aoqi@0 | 305 | |
aoqi@0 | 306 | // [30] extSubset ::= TextDecl? extSubsetDecl |
aoqi@0 | 307 | // [31] extSubsetDecl ::= ( markupdecl | conditionalSect |
aoqi@0 | 308 | // | PEReference | S )* |
aoqi@0 | 309 | // ... same as [79] extPE, which is where the code is |
aoqi@0 | 310 | |
aoqi@0 | 311 | ExternalEntity externalSubset = new ExternalEntity(in); |
aoqi@0 | 312 | externalParameterEntity(externalSubset); |
aoqi@0 | 313 | |
aoqi@0 | 314 | if (!in.isEOF()) { |
aoqi@0 | 315 | fatal("P-001", new Object[] |
aoqi@0 | 316 | {Integer.toHexString(((int) getc()))}); |
aoqi@0 | 317 | } |
aoqi@0 | 318 | afterRoot(); |
aoqi@0 | 319 | dtdHandler.endDTD(); |
aoqi@0 | 320 | |
aoqi@0 | 321 | } catch (EndOfInputException e) { |
aoqi@0 | 322 | if (!in.isDocument()) { |
aoqi@0 | 323 | String name = in.getName(); |
aoqi@0 | 324 | do { // force a relevant URI and line number |
aoqi@0 | 325 | in = in.pop(); |
aoqi@0 | 326 | } while (in.isInternal()); |
aoqi@0 | 327 | fatal("P-002", new Object[]{name}); |
aoqi@0 | 328 | } else { |
aoqi@0 | 329 | fatal("P-003", null); |
aoqi@0 | 330 | } |
aoqi@0 | 331 | } catch (RuntimeException e) { |
aoqi@0 | 332 | // Don't discard location that triggered the exception |
aoqi@0 | 333 | // ## Should properly wrap exception |
aoqi@0 | 334 | System.err.print("Internal DTD parser error: "); // ## |
aoqi@0 | 335 | e.printStackTrace(); |
aoqi@0 | 336 | throw new SAXParseException(e.getMessage() != null |
aoqi@0 | 337 | ? e.getMessage() : e.getClass().getName(), |
aoqi@0 | 338 | getPublicId(), getSystemId(), |
aoqi@0 | 339 | getLineNumber(), getColumnNumber()); |
aoqi@0 | 340 | |
aoqi@0 | 341 | } finally { |
aoqi@0 | 342 | // recycle temporary data used during parsing |
aoqi@0 | 343 | strTmp = null; |
aoqi@0 | 344 | nameTmp = null; |
aoqi@0 | 345 | nameCache = null; |
aoqi@0 | 346 | |
aoqi@0 | 347 | // ditto input sources etc |
aoqi@0 | 348 | if (in != null) { |
aoqi@0 | 349 | in.close(); |
aoqi@0 | 350 | in = null; |
aoqi@0 | 351 | } |
aoqi@0 | 352 | |
aoqi@0 | 353 | // get rid of all DTD info ... some of it would be |
aoqi@0 | 354 | // useful for editors etc, investigate later. |
aoqi@0 | 355 | |
aoqi@0 | 356 | params.clear(); |
aoqi@0 | 357 | entities.clear(); |
aoqi@0 | 358 | notations.clear(); |
aoqi@0 | 359 | declaredElements.clear(); |
aoqi@0 | 360 | // elements.clear(); |
aoqi@0 | 361 | ids.clear(); |
aoqi@0 | 362 | } |
aoqi@0 | 363 | } |
aoqi@0 | 364 | |
aoqi@0 | 365 | void afterRoot() throws SAXException { |
aoqi@0 | 366 | // Make sure all IDREFs match declared ID attributes. We scan |
aoqi@0 | 367 | // after the document element is parsed, since XML allows forward |
aoqi@0 | 368 | // references, and only now can we know if they're all resolved. |
aoqi@0 | 369 | |
aoqi@0 | 370 | for (Enumeration e = ids.keys(); |
aoqi@0 | 371 | e.hasMoreElements(); |
aoqi@0 | 372 | ) { |
aoqi@0 | 373 | String id = (String) e.nextElement(); |
aoqi@0 | 374 | Boolean value = (Boolean) ids.get(id); |
aoqi@0 | 375 | if (Boolean.FALSE == value) |
aoqi@0 | 376 | error("V-024", new Object[]{id}); |
aoqi@0 | 377 | } |
aoqi@0 | 378 | } |
aoqi@0 | 379 | |
aoqi@0 | 380 | |
aoqi@0 | 381 | // role is for diagnostics |
aoqi@0 | 382 | private void whitespace(String roleId) |
aoqi@0 | 383 | throws IOException, SAXException { |
aoqi@0 | 384 | |
aoqi@0 | 385 | // [3] S ::= (#x20 | #x9 | #xd | #xa)+ |
aoqi@0 | 386 | if (!maybeWhitespace()) { |
aoqi@0 | 387 | fatal("P-004", new Object[] |
aoqi@0 | 388 | {messages.getMessage(locale, roleId)}); |
aoqi@0 | 389 | } |
aoqi@0 | 390 | } |
aoqi@0 | 391 | |
aoqi@0 | 392 | // S? |
aoqi@0 | 393 | private boolean maybeWhitespace() |
aoqi@0 | 394 | throws IOException, SAXException { |
aoqi@0 | 395 | |
aoqi@0 | 396 | if (!doLexicalPE) |
aoqi@0 | 397 | return in.maybeWhitespace(); |
aoqi@0 | 398 | |
aoqi@0 | 399 | // see getc() for the PE logic -- this lets us splice |
aoqi@0 | 400 | // expansions of PEs in "anywhere". getc() has smarts, |
aoqi@0 | 401 | // so for external PEs we don't bypass it. |
aoqi@0 | 402 | |
aoqi@0 | 403 | // XXX we can marginally speed PE handling, and certainly |
aoqi@0 | 404 | // be cleaner (hence potentially more correct), by using |
aoqi@0 | 405 | // the observations that expanded PEs only start and stop |
aoqi@0 | 406 | // where whitespace is allowed. getc wouldn't need any |
aoqi@0 | 407 | // "lexical" PE expansion logic, and no other method needs |
aoqi@0 | 408 | // to handle termination of PEs. (parsing of literals would |
aoqi@0 | 409 | // still need to pop entities, but not parsing of references |
aoqi@0 | 410 | // in content.) |
aoqi@0 | 411 | |
aoqi@0 | 412 | char c = getc(); |
aoqi@0 | 413 | boolean saw = false; |
aoqi@0 | 414 | |
aoqi@0 | 415 | while (c == ' ' || c == '\t' || c == '\n' || c == '\r') { |
aoqi@0 | 416 | saw = true; |
aoqi@0 | 417 | |
aoqi@0 | 418 | // this gracefully ends things when we stop playing |
aoqi@0 | 419 | // with internal parameters. caller should have a |
aoqi@0 | 420 | // grammar rule allowing whitespace at end of entity. |
aoqi@0 | 421 | if (in.isEOF() && !in.isInternal()) |
aoqi@0 | 422 | return saw; |
aoqi@0 | 423 | c = getc(); |
aoqi@0 | 424 | } |
aoqi@0 | 425 | ungetc(); |
aoqi@0 | 426 | return saw; |
aoqi@0 | 427 | } |
aoqi@0 | 428 | |
aoqi@0 | 429 | private String maybeGetName() |
aoqi@0 | 430 | throws IOException, SAXException { |
aoqi@0 | 431 | |
aoqi@0 | 432 | NameCacheEntry entry = maybeGetNameCacheEntry(); |
aoqi@0 | 433 | return (entry == null) ? null : entry.name; |
aoqi@0 | 434 | } |
aoqi@0 | 435 | |
aoqi@0 | 436 | private NameCacheEntry maybeGetNameCacheEntry() |
aoqi@0 | 437 | throws IOException, SAXException { |
aoqi@0 | 438 | |
aoqi@0 | 439 | // [5] Name ::= (Letter|'_'|':') (Namechar)* |
aoqi@0 | 440 | char c = getc(); |
aoqi@0 | 441 | |
aoqi@0 | 442 | if (!XmlChars.isLetter(c) && c != ':' && c != '_') { |
aoqi@0 | 443 | ungetc(); |
aoqi@0 | 444 | return null; |
aoqi@0 | 445 | } |
aoqi@0 | 446 | return nameCharString(c); |
aoqi@0 | 447 | } |
aoqi@0 | 448 | |
aoqi@0 | 449 | // Used when parsing enumerations |
aoqi@0 | 450 | private String getNmtoken() |
aoqi@0 | 451 | throws IOException, SAXException { |
aoqi@0 | 452 | |
aoqi@0 | 453 | // [7] Nmtoken ::= (Namechar)+ |
aoqi@0 | 454 | char c = getc(); |
aoqi@0 | 455 | if (!XmlChars.isNameChar(c)) |
aoqi@0 | 456 | fatal("P-006", new Object[]{new Character(c)}); |
aoqi@0 | 457 | return nameCharString(c).name; |
aoqi@0 | 458 | } |
aoqi@0 | 459 | |
aoqi@0 | 460 | // n.b. this gets used when parsing attribute values (for |
aoqi@0 | 461 | // internal references) so we can't use strTmp; it's also |
aoqi@0 | 462 | // a hotspot for CPU and memory in the parser (called at least |
aoqi@0 | 463 | // once for each element) so this has been optimized a bit. |
aoqi@0 | 464 | |
aoqi@0 | 465 | private NameCacheEntry nameCharString(char c) |
aoqi@0 | 466 | throws IOException, SAXException { |
aoqi@0 | 467 | |
aoqi@0 | 468 | int i = 1; |
aoqi@0 | 469 | |
aoqi@0 | 470 | nameTmp[0] = c; |
aoqi@0 | 471 | for (; ;) { |
aoqi@0 | 472 | if ((c = in.getNameChar()) == 0) |
aoqi@0 | 473 | break; |
aoqi@0 | 474 | if (i >= nameTmp.length) { |
aoqi@0 | 475 | char tmp [] = new char[nameTmp.length + 10]; |
aoqi@0 | 476 | System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length); |
aoqi@0 | 477 | nameTmp = tmp; |
aoqi@0 | 478 | } |
aoqi@0 | 479 | nameTmp[i++] = c; |
aoqi@0 | 480 | } |
aoqi@0 | 481 | return nameCache.lookupEntry(nameTmp, i); |
aoqi@0 | 482 | } |
aoqi@0 | 483 | |
aoqi@0 | 484 | // |
aoqi@0 | 485 | // much similarity between parsing entity values in DTD |
aoqi@0 | 486 | // and attribute values (in DTD or content) ... both follow |
aoqi@0 | 487 | // literal parsing rules, newline canonicalization, etc |
aoqi@0 | 488 | // |
aoqi@0 | 489 | // leaves value in 'strTmp' ... either a "replacement text" (4.5), |
aoqi@0 | 490 | // or else partially normalized attribute value (the first bit |
aoqi@0 | 491 | // of 3.3.3's spec, without the "if not CDATA" bits). |
aoqi@0 | 492 | // |
aoqi@0 | 493 | private void parseLiteral(boolean isEntityValue) |
aoqi@0 | 494 | throws IOException, SAXException { |
aoqi@0 | 495 | |
aoqi@0 | 496 | // [9] EntityValue ::= |
aoqi@0 | 497 | // '"' ([^"&%] | Reference | PEReference)* '"' |
aoqi@0 | 498 | // | "'" ([^'&%] | Reference | PEReference)* "'" |
aoqi@0 | 499 | // [10] AttValue ::= |
aoqi@0 | 500 | // '"' ([^"&] | Reference )* '"' |
aoqi@0 | 501 | // | "'" ([^'&] | Reference )* "'" |
aoqi@0 | 502 | char quote = getc(); |
aoqi@0 | 503 | char c; |
aoqi@0 | 504 | InputEntity source = in; |
aoqi@0 | 505 | |
aoqi@0 | 506 | if (quote != '\'' && quote != '"') { |
aoqi@0 | 507 | fatal("P-007"); |
aoqi@0 | 508 | } |
aoqi@0 | 509 | |
aoqi@0 | 510 | // don't report entity expansions within attributes, |
aoqi@0 | 511 | // they're reported "fully expanded" via SAX |
aoqi@0 | 512 | // isInAttribute = !isEntityValue; |
aoqi@0 | 513 | |
aoqi@0 | 514 | // get value into strTmp |
aoqi@0 | 515 | strTmp = new StringBuffer(); |
aoqi@0 | 516 | |
aoqi@0 | 517 | // scan, allowing entity push/pop wherever ... |
aoqi@0 | 518 | // expanded entities can't terminate the literal! |
aoqi@0 | 519 | for (; ;) { |
aoqi@0 | 520 | if (in != source && in.isEOF()) { |
aoqi@0 | 521 | // we don't report end of parsed entities |
aoqi@0 | 522 | // within attributes (no SAX hooks) |
aoqi@0 | 523 | in = in.pop(); |
aoqi@0 | 524 | continue; |
aoqi@0 | 525 | } |
aoqi@0 | 526 | if ((c = getc()) == quote && in == source) { |
aoqi@0 | 527 | break; |
aoqi@0 | 528 | } |
aoqi@0 | 529 | |
aoqi@0 | 530 | // |
aoqi@0 | 531 | // Basically the "reference in attribute value" |
aoqi@0 | 532 | // row of the chart in section 4.4 of the spec |
aoqi@0 | 533 | // |
aoqi@0 | 534 | if (c == '&') { |
aoqi@0 | 535 | String entityName = maybeGetName(); |
aoqi@0 | 536 | |
aoqi@0 | 537 | if (entityName != null) { |
aoqi@0 | 538 | nextChar(';', "F-020", entityName); |
aoqi@0 | 539 | |
aoqi@0 | 540 | // 4.4 says: bypass these here ... we'll catch |
aoqi@0 | 541 | // forbidden refs to unparsed entities on use |
aoqi@0 | 542 | if (isEntityValue) { |
aoqi@0 | 543 | strTmp.append('&'); |
aoqi@0 | 544 | strTmp.append(entityName); |
aoqi@0 | 545 | strTmp.append(';'); |
aoqi@0 | 546 | continue; |
aoqi@0 | 547 | } |
aoqi@0 | 548 | expandEntityInLiteral(entityName, entities, isEntityValue); |
aoqi@0 | 549 | |
aoqi@0 | 550 | |
aoqi@0 | 551 | // character references are always included immediately |
aoqi@0 | 552 | } else if ((c = getc()) == '#') { |
aoqi@0 | 553 | int tmp = parseCharNumber(); |
aoqi@0 | 554 | |
aoqi@0 | 555 | if (tmp > 0xffff) { |
aoqi@0 | 556 | tmp = surrogatesToCharTmp(tmp); |
aoqi@0 | 557 | strTmp.append(charTmp[0]); |
aoqi@0 | 558 | if (tmp == 2) |
aoqi@0 | 559 | strTmp.append(charTmp[1]); |
aoqi@0 | 560 | } else |
aoqi@0 | 561 | strTmp.append((char) tmp); |
aoqi@0 | 562 | } else |
aoqi@0 | 563 | fatal("P-009"); |
aoqi@0 | 564 | continue; |
aoqi@0 | 565 | |
aoqi@0 | 566 | } |
aoqi@0 | 567 | |
aoqi@0 | 568 | // expand parameter entities only within entity value literals |
aoqi@0 | 569 | if (c == '%' && isEntityValue) { |
aoqi@0 | 570 | String entityName = maybeGetName(); |
aoqi@0 | 571 | |
aoqi@0 | 572 | if (entityName != null) { |
aoqi@0 | 573 | nextChar(';', "F-021", entityName); |
aoqi@0 | 574 | expandEntityInLiteral(entityName, params, isEntityValue); |
aoqi@0 | 575 | continue; |
aoqi@0 | 576 | } else |
aoqi@0 | 577 | fatal("P-011"); |
aoqi@0 | 578 | } |
aoqi@0 | 579 | |
aoqi@0 | 580 | // For attribute values ... |
aoqi@0 | 581 | if (!isEntityValue) { |
aoqi@0 | 582 | // 3.3.3 says whitespace normalizes to space... |
aoqi@0 | 583 | if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { |
aoqi@0 | 584 | strTmp.append(' '); |
aoqi@0 | 585 | continue; |
aoqi@0 | 586 | } |
aoqi@0 | 587 | |
aoqi@0 | 588 | // "<" not legal in parsed literals ... |
aoqi@0 | 589 | if (c == '<') |
aoqi@0 | 590 | fatal("P-012"); |
aoqi@0 | 591 | } |
aoqi@0 | 592 | |
aoqi@0 | 593 | strTmp.append(c); |
aoqi@0 | 594 | } |
aoqi@0 | 595 | // isInAttribute = false; |
aoqi@0 | 596 | } |
aoqi@0 | 597 | |
aoqi@0 | 598 | // does a SINGLE expansion of the entity (often reparsed later) |
aoqi@0 | 599 | private void expandEntityInLiteral(String name, SimpleHashtable table, |
aoqi@0 | 600 | boolean isEntityValue) |
aoqi@0 | 601 | throws IOException, SAXException { |
aoqi@0 | 602 | |
aoqi@0 | 603 | Object entity = table.get(name); |
aoqi@0 | 604 | |
aoqi@0 | 605 | if (entity instanceof InternalEntity) { |
aoqi@0 | 606 | InternalEntity value = (InternalEntity) entity; |
aoqi@0 | 607 | pushReader(value.buf, name, !value.isPE); |
aoqi@0 | 608 | |
aoqi@0 | 609 | } else if (entity instanceof ExternalEntity) { |
aoqi@0 | 610 | if (!isEntityValue) // must be a PE ... |
aoqi@0 | 611 | fatal("P-013", new Object[]{name}); |
aoqi@0 | 612 | // XXX if this returns false ... |
aoqi@0 | 613 | pushReader((ExternalEntity) entity); |
aoqi@0 | 614 | |
aoqi@0 | 615 | } else if (entity == null) { |
aoqi@0 | 616 | // |
aoqi@0 | 617 | // Note: much confusion about whether spec requires such |
aoqi@0 | 618 | // errors to be fatal in many cases, but none about whether |
aoqi@0 | 619 | // it allows "normal" errors to be unrecoverable! |
aoqi@0 | 620 | // |
aoqi@0 | 621 | fatal((table == params) ? "V-022" : "P-014", |
aoqi@0 | 622 | new Object[]{name}); |
aoqi@0 | 623 | } |
aoqi@0 | 624 | } |
aoqi@0 | 625 | |
aoqi@0 | 626 | // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") |
aoqi@0 | 627 | // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>' |
aoqi@0 | 628 | |
aoqi@0 | 629 | // NOTE: XML spec should explicitly say that PE ref syntax is |
aoqi@0 | 630 | // ignored in PIs, comments, SystemLiterals, and Pubid Literal |
aoqi@0 | 631 | // values ... can't process the XML spec's own DTD without doing |
aoqi@0 | 632 | // that for comments. |
aoqi@0 | 633 | |
aoqi@0 | 634 | private String getQuotedString(String type, String extra) |
aoqi@0 | 635 | throws IOException, SAXException { |
aoqi@0 | 636 | |
aoqi@0 | 637 | // use in.getc to bypass PE processing |
aoqi@0 | 638 | char quote = in.getc(); |
aoqi@0 | 639 | |
aoqi@0 | 640 | if (quote != '\'' && quote != '"') |
aoqi@0 | 641 | fatal("P-015", new Object[]{ |
aoqi@0 | 642 | messages.getMessage(locale, type, new Object[]{extra}) |
aoqi@0 | 643 | }); |
aoqi@0 | 644 | |
aoqi@0 | 645 | char c; |
aoqi@0 | 646 | |
aoqi@0 | 647 | strTmp = new StringBuffer(); |
aoqi@0 | 648 | while ((c = in.getc()) != quote) |
aoqi@0 | 649 | strTmp.append((char) c); |
aoqi@0 | 650 | return strTmp.toString(); |
aoqi@0 | 651 | } |
aoqi@0 | 652 | |
aoqi@0 | 653 | |
aoqi@0 | 654 | private String parsePublicId() throws IOException, SAXException { |
aoqi@0 | 655 | |
aoqi@0 | 656 | // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'") |
aoqi@0 | 657 | // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%] |
aoqi@0 | 658 | String retval = getQuotedString("F-033", null); |
aoqi@0 | 659 | for (int i = 0; i < retval.length(); i++) { |
aoqi@0 | 660 | char c = retval.charAt(i); |
aoqi@0 | 661 | if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1 |
aoqi@0 | 662 | && !(c >= 'A' && c <= 'Z') |
aoqi@0 | 663 | && !(c >= 'a' && c <= 'z')) |
aoqi@0 | 664 | fatal("P-016", new Object[]{new Character(c)}); |
aoqi@0 | 665 | } |
aoqi@0 | 666 | strTmp = new StringBuffer(); |
aoqi@0 | 667 | strTmp.append(retval); |
aoqi@0 | 668 | return normalize(false); |
aoqi@0 | 669 | } |
aoqi@0 | 670 | |
aoqi@0 | 671 | // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) |
aoqi@0 | 672 | // handled by: InputEntity.parsedContent() |
aoqi@0 | 673 | |
aoqi@0 | 674 | private boolean maybeComment(boolean skipStart) |
aoqi@0 | 675 | throws IOException, SAXException { |
aoqi@0 | 676 | |
aoqi@0 | 677 | // [15] Comment ::= '<!--' |
aoqi@0 | 678 | // ( (Char - '-') | ('-' (Char - '-'))* |
aoqi@0 | 679 | // '-->' |
aoqi@0 | 680 | if (!in.peek(skipStart ? "!--" : "<!--", null)) |
aoqi@0 | 681 | return false; |
aoqi@0 | 682 | |
aoqi@0 | 683 | boolean savedLexicalPE = doLexicalPE; |
aoqi@0 | 684 | boolean saveCommentText; |
aoqi@0 | 685 | |
aoqi@0 | 686 | doLexicalPE = false; |
aoqi@0 | 687 | saveCommentText = false; |
aoqi@0 | 688 | if (saveCommentText) |
aoqi@0 | 689 | strTmp = new StringBuffer(); |
aoqi@0 | 690 | |
aoqi@0 | 691 | oneComment: |
aoqi@0 | 692 | for (; ;) { |
aoqi@0 | 693 | try { |
aoqi@0 | 694 | // bypass PE expansion, but permit PEs |
aoqi@0 | 695 | // to complete ... valid docs won't care. |
aoqi@0 | 696 | for (; ;) { |
aoqi@0 | 697 | int c = getc(); |
aoqi@0 | 698 | if (c == '-') { |
aoqi@0 | 699 | c = getc(); |
aoqi@0 | 700 | if (c != '-') { |
aoqi@0 | 701 | if (saveCommentText) |
aoqi@0 | 702 | strTmp.append('-'); |
aoqi@0 | 703 | ungetc(); |
aoqi@0 | 704 | continue; |
aoqi@0 | 705 | } |
aoqi@0 | 706 | nextChar('>', "F-022", null); |
aoqi@0 | 707 | break oneComment; |
aoqi@0 | 708 | } |
aoqi@0 | 709 | if (saveCommentText) |
aoqi@0 | 710 | strTmp.append((char) c); |
aoqi@0 | 711 | } |
aoqi@0 | 712 | } catch (EndOfInputException e) { |
aoqi@0 | 713 | // |
aoqi@0 | 714 | // This is fatal EXCEPT when we're processing a PE... |
aoqi@0 | 715 | // in which case a validating processor reports an error. |
aoqi@0 | 716 | // External PEs are easy to detect; internal ones we |
aoqi@0 | 717 | // infer by being an internal entity outside an element. |
aoqi@0 | 718 | // |
aoqi@0 | 719 | if (in.isInternal()) { |
aoqi@0 | 720 | error("V-021", null); |
aoqi@0 | 721 | } |
aoqi@0 | 722 | fatal("P-017"); |
aoqi@0 | 723 | } |
aoqi@0 | 724 | } |
aoqi@0 | 725 | doLexicalPE = savedLexicalPE; |
aoqi@0 | 726 | if (saveCommentText) |
aoqi@0 | 727 | dtdHandler.comment(strTmp.toString()); |
aoqi@0 | 728 | return true; |
aoqi@0 | 729 | } |
aoqi@0 | 730 | |
aoqi@0 | 731 | private boolean maybePI(boolean skipStart) |
aoqi@0 | 732 | throws IOException, SAXException { |
aoqi@0 | 733 | |
aoqi@0 | 734 | // [16] PI ::= '<?' PITarget |
aoqi@0 | 735 | // (S (Char* - (Char* '?>' Char*)))? |
aoqi@0 | 736 | // '?>' |
aoqi@0 | 737 | // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l') |
aoqi@0 | 738 | boolean savedLexicalPE = doLexicalPE; |
aoqi@0 | 739 | |
aoqi@0 | 740 | if (!in.peek(skipStart ? "?" : "<?", null)) |
aoqi@0 | 741 | return false; |
aoqi@0 | 742 | doLexicalPE = false; |
aoqi@0 | 743 | |
aoqi@0 | 744 | String target = maybeGetName(); |
aoqi@0 | 745 | |
aoqi@0 | 746 | if (target == null) { |
aoqi@0 | 747 | fatal("P-018"); |
aoqi@0 | 748 | } |
aoqi@0 | 749 | if ("xml".equals(target)) { |
aoqi@0 | 750 | fatal("P-019"); |
aoqi@0 | 751 | } |
aoqi@0 | 752 | if ("xml".equalsIgnoreCase(target)) { |
aoqi@0 | 753 | fatal("P-020", new Object[]{target}); |
aoqi@0 | 754 | } |
aoqi@0 | 755 | |
aoqi@0 | 756 | if (maybeWhitespace()) { |
aoqi@0 | 757 | strTmp = new StringBuffer(); |
aoqi@0 | 758 | try { |
aoqi@0 | 759 | for (; ;) { |
aoqi@0 | 760 | // use in.getc to bypass PE processing |
aoqi@0 | 761 | char c = in.getc(); |
aoqi@0 | 762 | //Reached the end of PI. |
aoqi@0 | 763 | if (c == '?' && in.peekc('>')) |
aoqi@0 | 764 | break; |
aoqi@0 | 765 | strTmp.append(c); |
aoqi@0 | 766 | } |
aoqi@0 | 767 | } catch (EndOfInputException e) { |
aoqi@0 | 768 | fatal("P-021"); |
aoqi@0 | 769 | } |
aoqi@0 | 770 | dtdHandler.processingInstruction(target, strTmp.toString()); |
aoqi@0 | 771 | } else { |
aoqi@0 | 772 | if (!in.peek("?>", null)) { |
aoqi@0 | 773 | fatal("P-022"); |
aoqi@0 | 774 | } |
aoqi@0 | 775 | dtdHandler.processingInstruction(target, ""); |
aoqi@0 | 776 | } |
aoqi@0 | 777 | |
aoqi@0 | 778 | doLexicalPE = savedLexicalPE; |
aoqi@0 | 779 | return true; |
aoqi@0 | 780 | } |
aoqi@0 | 781 | |
aoqi@0 | 782 | // [18] CDSect ::= CDStart CData CDEnd |
aoqi@0 | 783 | // [19] CDStart ::= '<![CDATA[' |
aoqi@0 | 784 | // [20] CData ::= (Char* - (Char* ']]>' Char*)) |
aoqi@0 | 785 | // [21] CDEnd ::= ']]>' |
aoqi@0 | 786 | // |
aoqi@0 | 787 | // ... handled by InputEntity.unparsedContent() |
aoqi@0 | 788 | |
aoqi@0 | 789 | // collapsing several rules together ... |
aoqi@0 | 790 | // simpler than attribute literals -- no reference parsing! |
aoqi@0 | 791 | private String maybeReadAttribute(String name, boolean must) |
aoqi@0 | 792 | throws IOException, SAXException { |
aoqi@0 | 793 | |
aoqi@0 | 794 | // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\" |
aoqi@0 | 795 | // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\" |
aoqi@0 | 796 | // [32] SDDecl ::= S 'standalone' Eq \'|\" ... \'|\" |
aoqi@0 | 797 | if (!maybeWhitespace()) { |
aoqi@0 | 798 | if (!must) { |
aoqi@0 | 799 | return null; |
aoqi@0 | 800 | } |
aoqi@0 | 801 | fatal("P-024", new Object[]{name}); |
aoqi@0 | 802 | // NOTREACHED |
aoqi@0 | 803 | } |
aoqi@0 | 804 | |
aoqi@0 | 805 | if (!peek(name)) { |
aoqi@0 | 806 | if (must) { |
aoqi@0 | 807 | fatal("P-024", new Object[]{name}); |
aoqi@0 | 808 | } else { |
aoqi@0 | 809 | // To ensure that the whitespace is there so that when we |
aoqi@0 | 810 | // check for the next attribute we assure that the |
aoqi@0 | 811 | // whitespace still exists. |
aoqi@0 | 812 | ungetc(); |
aoqi@0 | 813 | return null; |
aoqi@0 | 814 | } |
aoqi@0 | 815 | } |
aoqi@0 | 816 | |
aoqi@0 | 817 | // [25] Eq ::= S? '=' S? |
aoqi@0 | 818 | maybeWhitespace(); |
aoqi@0 | 819 | nextChar('=', "F-023", null); |
aoqi@0 | 820 | maybeWhitespace(); |
aoqi@0 | 821 | |
aoqi@0 | 822 | return getQuotedString("F-035", name); |
aoqi@0 | 823 | } |
aoqi@0 | 824 | |
aoqi@0 | 825 | private void readVersion(boolean must, String versionNum) |
aoqi@0 | 826 | throws IOException, SAXException { |
aoqi@0 | 827 | |
aoqi@0 | 828 | String value = maybeReadAttribute("version", must); |
aoqi@0 | 829 | |
aoqi@0 | 830 | // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+ |
aoqi@0 | 831 | |
aoqi@0 | 832 | if (must && value == null) |
aoqi@0 | 833 | fatal("P-025", new Object[]{versionNum}); |
aoqi@0 | 834 | if (value != null) { |
aoqi@0 | 835 | int length = value.length(); |
aoqi@0 | 836 | for (int i = 0; i < length; i++) { |
aoqi@0 | 837 | char c = value.charAt(i); |
aoqi@0 | 838 | if (!((c >= '0' && c <= '9') |
aoqi@0 | 839 | || c == '_' || c == '.' |
aoqi@0 | 840 | || (c >= 'a' && c <= 'z') |
aoqi@0 | 841 | || (c >= 'A' && c <= 'Z') |
aoqi@0 | 842 | || c == ':' || c == '-') |
aoqi@0 | 843 | ) |
aoqi@0 | 844 | fatal("P-026", new Object[]{value}); |
aoqi@0 | 845 | } |
aoqi@0 | 846 | } |
aoqi@0 | 847 | if (value != null && !value.equals(versionNum)) |
aoqi@0 | 848 | error("P-027", new Object[]{versionNum, value}); |
aoqi@0 | 849 | } |
aoqi@0 | 850 | |
aoqi@0 | 851 | // common code used by most markup declarations |
aoqi@0 | 852 | // ... S (Q)Name ... |
aoqi@0 | 853 | private String getMarkupDeclname(String roleId, boolean qname) |
aoqi@0 | 854 | throws IOException, SAXException { |
aoqi@0 | 855 | |
aoqi@0 | 856 | String name; |
aoqi@0 | 857 | |
aoqi@0 | 858 | whitespace(roleId); |
aoqi@0 | 859 | name = maybeGetName(); |
aoqi@0 | 860 | if (name == null) |
aoqi@0 | 861 | fatal("P-005", new Object[] |
aoqi@0 | 862 | {messages.getMessage(locale, roleId)}); |
aoqi@0 | 863 | return name; |
aoqi@0 | 864 | } |
aoqi@0 | 865 | |
aoqi@0 | 866 | private boolean maybeMarkupDecl() |
aoqi@0 | 867 | throws IOException, SAXException { |
aoqi@0 | 868 | |
aoqi@0 | 869 | // [29] markupdecl ::= elementdecl | Attlistdecl |
aoqi@0 | 870 | // | EntityDecl | NotationDecl | PI | Comment |
aoqi@0 | 871 | return maybeElementDecl() |
aoqi@0 | 872 | || maybeAttlistDecl() |
aoqi@0 | 873 | || maybeEntityDecl() |
aoqi@0 | 874 | || maybeNotationDecl() |
aoqi@0 | 875 | || maybePI(false) |
aoqi@0 | 876 | || maybeComment(false); |
aoqi@0 | 877 | } |
aoqi@0 | 878 | |
aoqi@0 | 879 | private static final String XmlLang = "xml:lang"; |
aoqi@0 | 880 | |
aoqi@0 | 881 | private boolean isXmlLang(String value) { |
aoqi@0 | 882 | |
aoqi@0 | 883 | // [33] LanguageId ::= Langcode ('-' Subcode)* |
aoqi@0 | 884 | // [34] Langcode ::= ISO639Code | IanaCode | UserCode |
aoqi@0 | 885 | // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z] |
aoqi@0 | 886 | // [36] IanaCode ::= [iI] '-' SubCode |
aoqi@0 | 887 | // [37] UserCode ::= [xX] '-' SubCode |
aoqi@0 | 888 | // [38] SubCode ::= [a-zA-Z]+ |
aoqi@0 | 889 | |
aoqi@0 | 890 | // the ISO and IANA codes (and subcodes) are registered, |
aoqi@0 | 891 | // but that's neither a WF nor a validity constraint. |
aoqi@0 | 892 | |
aoqi@0 | 893 | int nextSuffix; |
aoqi@0 | 894 | char c; |
aoqi@0 | 895 | |
aoqi@0 | 896 | if (value.length() < 2) |
aoqi@0 | 897 | return false; |
aoqi@0 | 898 | c = value.charAt(1); |
aoqi@0 | 899 | if (c == '-') { // IANA, or user, code |
aoqi@0 | 900 | c = value.charAt(0); |
aoqi@0 | 901 | if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X')) |
aoqi@0 | 902 | return false; |
aoqi@0 | 903 | nextSuffix = 1; |
aoqi@0 | 904 | } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { |
aoqi@0 | 905 | // 2 letter ISO code, or error |
aoqi@0 | 906 | c = value.charAt(0); |
aoqi@0 | 907 | if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) |
aoqi@0 | 908 | return false; |
aoqi@0 | 909 | nextSuffix = 2; |
aoqi@0 | 910 | } else |
aoqi@0 | 911 | return false; |
aoqi@0 | 912 | |
aoqi@0 | 913 | // here "suffix" ::= '-' [a-zA-Z]+ suffix* |
aoqi@0 | 914 | while (nextSuffix < value.length()) { |
aoqi@0 | 915 | c = value.charAt(nextSuffix); |
aoqi@0 | 916 | if (c != '-') |
aoqi@0 | 917 | break; |
aoqi@0 | 918 | while (++nextSuffix < value.length()) { |
aoqi@0 | 919 | c = value.charAt(nextSuffix); |
aoqi@0 | 920 | if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) |
aoqi@0 | 921 | break; |
aoqi@0 | 922 | } |
aoqi@0 | 923 | } |
aoqi@0 | 924 | return value.length() == nextSuffix && c != '-'; |
aoqi@0 | 925 | } |
aoqi@0 | 926 | |
aoqi@0 | 927 | |
aoqi@0 | 928 | // |
aoqi@0 | 929 | // CHAPTER 3: Logical Structures |
aoqi@0 | 930 | // |
aoqi@0 | 931 | |
aoqi@0 | 932 | /** |
aoqi@0 | 933 | * To validate, subclassers should at this time make sure that |
aoqi@0 | 934 | * values are of the declared types:<UL> |
aoqi@0 | 935 | * <LI> ID and IDREF(S) values are Names |
aoqi@0 | 936 | * <LI> NMTOKEN(S) are Nmtokens |
aoqi@0 | 937 | * <LI> ENUMERATION values match one of the tokens |
aoqi@0 | 938 | * <LI> NOTATION values match a notation name |
aoqi@0 | 939 | * <LI> ENTITIY(IES) values match an unparsed external entity |
aoqi@0 | 940 | * </UL> |
aoqi@0 | 941 | * <p/> |
aoqi@0 | 942 | * <P> Separately, make sure IDREF values match some ID |
aoqi@0 | 943 | * provided in the document (in the afterRoot method). |
aoqi@0 | 944 | */ |
aoqi@0 | 945 | /* void validateAttributeSyntax (Attribute attr, String value) |
aoqi@0 | 946 | throws DTDParseException { |
aoqi@0 | 947 | // ID, IDREF(S) ... values are Names |
aoqi@0 | 948 | if (Attribute.ID == attr.type()) { |
aoqi@0 | 949 | if (!XmlNames.isName (value)) |
aoqi@0 | 950 | error ("V-025", new Object [] { value }); |
aoqi@0 | 951 | |
aoqi@0 | 952 | Boolean b = (Boolean) ids.getNonInterned (value); |
aoqi@0 | 953 | if (b == null || b.equals (Boolean.FALSE)) |
aoqi@0 | 954 | ids.put (value.intern (), Boolean.TRUE); |
aoqi@0 | 955 | else |
aoqi@0 | 956 | error ("V-026", new Object [] { value }); |
aoqi@0 | 957 | |
aoqi@0 | 958 | } else if (Attribute.IDREF == attr.type()) { |
aoqi@0 | 959 | if (!XmlNames.isName (value)) |
aoqi@0 | 960 | error ("V-027", new Object [] { value }); |
aoqi@0 | 961 | |
aoqi@0 | 962 | Boolean b = (Boolean) ids.getNonInterned (value); |
aoqi@0 | 963 | if (b == null) |
aoqi@0 | 964 | ids.put (value.intern (), Boolean.FALSE); |
aoqi@0 | 965 | |
aoqi@0 | 966 | } else if (Attribute.IDREFS == attr.type()) { |
aoqi@0 | 967 | StringTokenizer tokenizer = new StringTokenizer (value); |
aoqi@0 | 968 | Boolean b; |
aoqi@0 | 969 | boolean sawValue = false; |
aoqi@0 | 970 | |
aoqi@0 | 971 | while (tokenizer.hasMoreTokens ()) { |
aoqi@0 | 972 | value = tokenizer.nextToken (); |
aoqi@0 | 973 | if (!XmlNames.isName (value)) |
aoqi@0 | 974 | error ("V-027", new Object [] { value }); |
aoqi@0 | 975 | b = (Boolean) ids.getNonInterned (value); |
aoqi@0 | 976 | if (b == null) |
aoqi@0 | 977 | ids.put (value.intern (), Boolean.FALSE); |
aoqi@0 | 978 | sawValue = true; |
aoqi@0 | 979 | } |
aoqi@0 | 980 | if (!sawValue) |
aoqi@0 | 981 | error ("V-039", null); |
aoqi@0 | 982 | |
aoqi@0 | 983 | |
aoqi@0 | 984 | // NMTOKEN(S) ... values are Nmtoken(s) |
aoqi@0 | 985 | } else if (Attribute.NMTOKEN == attr.type()) { |
aoqi@0 | 986 | if (!XmlNames.isNmtoken (value)) |
aoqi@0 | 987 | error ("V-028", new Object [] { value }); |
aoqi@0 | 988 | |
aoqi@0 | 989 | } else if (Attribute.NMTOKENS == attr.type()) { |
aoqi@0 | 990 | StringTokenizer tokenizer = new StringTokenizer (value); |
aoqi@0 | 991 | boolean sawValue = false; |
aoqi@0 | 992 | |
aoqi@0 | 993 | while (tokenizer.hasMoreTokens ()) { |
aoqi@0 | 994 | value = tokenizer.nextToken (); |
aoqi@0 | 995 | if (!XmlNames.isNmtoken (value)) |
aoqi@0 | 996 | error ("V-028", new Object [] { value }); |
aoqi@0 | 997 | sawValue = true; |
aoqi@0 | 998 | } |
aoqi@0 | 999 | if (!sawValue) |
aoqi@0 | 1000 | error ("V-032", null); |
aoqi@0 | 1001 | |
aoqi@0 | 1002 | // ENUMERATION ... values match one of the tokens |
aoqi@0 | 1003 | } else if (Attribute.ENUMERATION == attr.type()) { |
aoqi@0 | 1004 | for (int i = 0; i < attr.values().length; i++) |
aoqi@0 | 1005 | if (value.equals (attr.values()[i])) |
aoqi@0 | 1006 | return; |
aoqi@0 | 1007 | error ("V-029", new Object [] { value }); |
aoqi@0 | 1008 | |
aoqi@0 | 1009 | // NOTATION values match a notation name |
aoqi@0 | 1010 | } else if (Attribute.NOTATION == attr.type()) { |
aoqi@0 | 1011 | // |
aoqi@0 | 1012 | // XXX XML 1.0 spec should probably list references to |
aoqi@0 | 1013 | // externally defined notations in standalone docs as |
aoqi@0 | 1014 | // validity errors. Ditto externally defined unparsed |
aoqi@0 | 1015 | // entities; neither should show up in attributes, else |
aoqi@0 | 1016 | // one needs to read the external declarations in order |
aoqi@0 | 1017 | // to make sense of the document (exactly what tagging |
aoqi@0 | 1018 | // a doc as "standalone" intends you won't need to do). |
aoqi@0 | 1019 | // |
aoqi@0 | 1020 | for (int i = 0; i < attr.values().length; i++) |
aoqi@0 | 1021 | if (value.equals (attr.values()[i])) |
aoqi@0 | 1022 | return; |
aoqi@0 | 1023 | error ("V-030", new Object [] { value }); |
aoqi@0 | 1024 | |
aoqi@0 | 1025 | // ENTITY(IES) values match an unparsed entity(ies) |
aoqi@0 | 1026 | } else if (Attribute.ENTITY == attr.type()) { |
aoqi@0 | 1027 | // see note above re standalone |
aoqi@0 | 1028 | if (!isUnparsedEntity (value)) |
aoqi@0 | 1029 | error ("V-031", new Object [] { value }); |
aoqi@0 | 1030 | |
aoqi@0 | 1031 | } else if (Attribute.ENTITIES == attr.type()) { |
aoqi@0 | 1032 | StringTokenizer tokenizer = new StringTokenizer (value); |
aoqi@0 | 1033 | boolean sawValue = false; |
aoqi@0 | 1034 | |
aoqi@0 | 1035 | while (tokenizer.hasMoreTokens ()) { |
aoqi@0 | 1036 | value = tokenizer.nextToken (); |
aoqi@0 | 1037 | // see note above re standalone |
aoqi@0 | 1038 | if (!isUnparsedEntity (value)) |
aoqi@0 | 1039 | error ("V-031", new Object [] { value }); |
aoqi@0 | 1040 | sawValue = true; |
aoqi@0 | 1041 | } |
aoqi@0 | 1042 | if (!sawValue) |
aoqi@0 | 1043 | error ("V-040", null); |
aoqi@0 | 1044 | |
aoqi@0 | 1045 | } else if (Attribute.CDATA != attr.type()) |
aoqi@0 | 1046 | throw new InternalError (attr.type()); |
aoqi@0 | 1047 | } |
aoqi@0 | 1048 | */ |
aoqi@0 | 1049 | /* |
aoqi@0 | 1050 | private boolean isUnparsedEntity (String name) |
aoqi@0 | 1051 | { |
aoqi@0 | 1052 | Object e = entities.getNonInterned (name); |
aoqi@0 | 1053 | if (e == null || !(e instanceof ExternalEntity)) |
aoqi@0 | 1054 | return false; |
aoqi@0 | 1055 | return ((ExternalEntity)e).notation != null; |
aoqi@0 | 1056 | } |
aoqi@0 | 1057 | */ |
aoqi@0 | 1058 | private boolean maybeElementDecl() |
aoqi@0 | 1059 | throws IOException, SAXException { |
aoqi@0 | 1060 | |
aoqi@0 | 1061 | // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>' |
aoqi@0 | 1062 | // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children |
aoqi@0 | 1063 | InputEntity start = peekDeclaration("!ELEMENT"); |
aoqi@0 | 1064 | |
aoqi@0 | 1065 | if (start == null) |
aoqi@0 | 1066 | return false; |
aoqi@0 | 1067 | |
aoqi@0 | 1068 | // n.b. for content models where inter-element whitespace is |
aoqi@0 | 1069 | // ignorable, we mark that fact here. |
aoqi@0 | 1070 | String name = getMarkupDeclname("F-015", true); |
aoqi@0 | 1071 | // Element element = (Element) elements.get (name); |
aoqi@0 | 1072 | // boolean declEffective = false; |
aoqi@0 | 1073 | |
aoqi@0 | 1074 | /* |
aoqi@0 | 1075 | if (element != null) { |
aoqi@0 | 1076 | if (element.contentModel() != null) { |
aoqi@0 | 1077 | error ("V-012", new Object [] { name }); |
aoqi@0 | 1078 | } // else <!ATTLIST name ...> came first |
aoqi@0 | 1079 | } else { |
aoqi@0 | 1080 | element = new Element(name); |
aoqi@0 | 1081 | elements.put (element.name(), element); |
aoqi@0 | 1082 | declEffective = true; |
aoqi@0 | 1083 | } |
aoqi@0 | 1084 | */ |
aoqi@0 | 1085 | if (declaredElements.contains(name)) |
aoqi@0 | 1086 | error("V-012", new Object[]{name}); |
aoqi@0 | 1087 | else { |
aoqi@0 | 1088 | declaredElements.add(name); |
aoqi@0 | 1089 | // declEffective = true; |
aoqi@0 | 1090 | } |
aoqi@0 | 1091 | |
aoqi@0 | 1092 | short modelType; |
aoqi@0 | 1093 | whitespace("F-000"); |
aoqi@0 | 1094 | if (peek(strEMPTY)) { |
aoqi@0 | 1095 | /// // leave element.contentModel as null for this case. |
aoqi@0 | 1096 | dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY); |
aoqi@0 | 1097 | } else if (peek(strANY)) { |
aoqi@0 | 1098 | /// element.setContentModel(new StringModel(StringModelType.ANY)); |
aoqi@0 | 1099 | dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY); |
aoqi@0 | 1100 | } else { |
aoqi@0 | 1101 | modelType = getMixedOrChildren(name); |
aoqi@0 | 1102 | } |
aoqi@0 | 1103 | |
aoqi@0 | 1104 | dtdHandler.endContentModel(name, modelType); |
aoqi@0 | 1105 | |
aoqi@0 | 1106 | maybeWhitespace(); |
aoqi@0 | 1107 | char c = getc(); |
aoqi@0 | 1108 | if (c != '>') |
aoqi@0 | 1109 | fatal("P-036", new Object[]{name, new Character(c)}); |
aoqi@0 | 1110 | if (start != in) |
aoqi@0 | 1111 | error("V-013", null); |
aoqi@0 | 1112 | |
aoqi@0 | 1113 | /// dtdHandler.elementDecl(element); |
aoqi@0 | 1114 | |
aoqi@0 | 1115 | return true; |
aoqi@0 | 1116 | } |
aoqi@0 | 1117 | |
aoqi@0 | 1118 | // We're leaving the content model as a regular expression; |
aoqi@0 | 1119 | // it's an efficient natural way to express such things, and |
aoqi@0 | 1120 | // libraries often interpret them. No whitespace in the |
aoqi@0 | 1121 | // model we store, though! |
aoqi@0 | 1122 | |
aoqi@0 | 1123 | /** |
aoqi@0 | 1124 | * returns content model type. |
aoqi@0 | 1125 | */ |
aoqi@0 | 1126 | private short getMixedOrChildren(String elementName/*Element element*/) |
aoqi@0 | 1127 | throws IOException, SAXException { |
aoqi@0 | 1128 | |
aoqi@0 | 1129 | InputEntity start; |
aoqi@0 | 1130 | |
aoqi@0 | 1131 | // [47] children ::= (choice|seq) ('?'|'*'|'+')? |
aoqi@0 | 1132 | strTmp = new StringBuffer(); |
aoqi@0 | 1133 | |
aoqi@0 | 1134 | nextChar('(', "F-028", elementName); |
aoqi@0 | 1135 | start = in; |
aoqi@0 | 1136 | maybeWhitespace(); |
aoqi@0 | 1137 | strTmp.append('('); |
aoqi@0 | 1138 | |
aoqi@0 | 1139 | short modelType; |
aoqi@0 | 1140 | if (peek("#PCDATA")) { |
aoqi@0 | 1141 | strTmp.append("#PCDATA"); |
aoqi@0 | 1142 | dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED); |
aoqi@0 | 1143 | getMixed(elementName, start); |
aoqi@0 | 1144 | } else { |
aoqi@0 | 1145 | dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_CHILDREN); |
aoqi@0 | 1146 | getcps(elementName, start); |
aoqi@0 | 1147 | } |
aoqi@0 | 1148 | |
aoqi@0 | 1149 | return modelType; |
aoqi@0 | 1150 | } |
aoqi@0 | 1151 | |
aoqi@0 | 1152 | // '(' S? already consumed |
aoqi@0 | 1153 | // matching ')' must be in "start" entity if validating |
aoqi@0 | 1154 | private void getcps(/*Element element,*/String elementName, InputEntity start) |
aoqi@0 | 1155 | throws IOException, SAXException { |
aoqi@0 | 1156 | |
aoqi@0 | 1157 | // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')? |
aoqi@0 | 1158 | // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')' |
aoqi@0 | 1159 | // [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')' |
aoqi@0 | 1160 | boolean decided = false; |
aoqi@0 | 1161 | char type = 0; |
aoqi@0 | 1162 | // ContentModel retval, temp, current; |
aoqi@0 | 1163 | |
aoqi@0 | 1164 | // retval = temp = current = null; |
aoqi@0 | 1165 | |
aoqi@0 | 1166 | dtdHandler.startModelGroup(); |
aoqi@0 | 1167 | |
aoqi@0 | 1168 | do { |
aoqi@0 | 1169 | String tag; |
aoqi@0 | 1170 | |
aoqi@0 | 1171 | tag = maybeGetName(); |
aoqi@0 | 1172 | if (tag != null) { |
aoqi@0 | 1173 | strTmp.append(tag); |
aoqi@0 | 1174 | // temp = new ElementModel(tag); |
aoqi@0 | 1175 | // getFrequency((RepeatableContent)temp); |
aoqi@0 | 1176 | ///-> |
aoqi@0 | 1177 | dtdHandler.childElement(tag, getFrequency()); |
aoqi@0 | 1178 | ///<- |
aoqi@0 | 1179 | } else if (peek("(")) { |
aoqi@0 | 1180 | InputEntity next = in; |
aoqi@0 | 1181 | strTmp.append('('); |
aoqi@0 | 1182 | maybeWhitespace(); |
aoqi@0 | 1183 | // temp = getcps(element, next); |
aoqi@0 | 1184 | // getFrequency(temp); |
aoqi@0 | 1185 | ///-> |
aoqi@0 | 1186 | getcps(elementName, next); |
aoqi@0 | 1187 | /// getFrequency(); <- this looks like a bug |
aoqi@0 | 1188 | ///<- |
aoqi@0 | 1189 | } else |
aoqi@0 | 1190 | fatal((type == 0) ? "P-039" : |
aoqi@0 | 1191 | ((type == ',') ? "P-037" : "P-038"), |
aoqi@0 | 1192 | new Object[]{new Character(getc())}); |
aoqi@0 | 1193 | |
aoqi@0 | 1194 | maybeWhitespace(); |
aoqi@0 | 1195 | if (decided) { |
aoqi@0 | 1196 | char c = getc(); |
aoqi@0 | 1197 | |
aoqi@0 | 1198 | // if (current != null) { |
aoqi@0 | 1199 | // current.addChild(temp); |
aoqi@0 | 1200 | // } |
aoqi@0 | 1201 | if (c == type) { |
aoqi@0 | 1202 | strTmp.append(type); |
aoqi@0 | 1203 | maybeWhitespace(); |
aoqi@0 | 1204 | reportConnector(type); |
aoqi@0 | 1205 | continue; |
aoqi@0 | 1206 | } else if (c == '\u0029') { // rparen |
aoqi@0 | 1207 | ungetc(); |
aoqi@0 | 1208 | continue; |
aoqi@0 | 1209 | } else { |
aoqi@0 | 1210 | fatal((type == 0) ? "P-041" : "P-040", |
aoqi@0 | 1211 | new Object[]{ |
aoqi@0 | 1212 | new Character(c), |
aoqi@0 | 1213 | new Character(type) |
aoqi@0 | 1214 | }); |
aoqi@0 | 1215 | } |
aoqi@0 | 1216 | } else { |
aoqi@0 | 1217 | type = getc(); |
aoqi@0 | 1218 | switch (type) { |
aoqi@0 | 1219 | case '|': |
aoqi@0 | 1220 | case ',': |
aoqi@0 | 1221 | reportConnector(type); |
aoqi@0 | 1222 | break; |
aoqi@0 | 1223 | default: |
aoqi@0 | 1224 | // retval = temp; |
aoqi@0 | 1225 | ungetc(); |
aoqi@0 | 1226 | continue; |
aoqi@0 | 1227 | } |
aoqi@0 | 1228 | // retval = (ContentModel)current; |
aoqi@0 | 1229 | decided = true; |
aoqi@0 | 1230 | // current.addChild(temp); |
aoqi@0 | 1231 | strTmp.append(type); |
aoqi@0 | 1232 | } |
aoqi@0 | 1233 | maybeWhitespace(); |
aoqi@0 | 1234 | } while (!peek(")")); |
aoqi@0 | 1235 | |
aoqi@0 | 1236 | if (in != start) |
aoqi@0 | 1237 | error("V-014", new Object[]{elementName}); |
aoqi@0 | 1238 | strTmp.append(')'); |
aoqi@0 | 1239 | |
aoqi@0 | 1240 | dtdHandler.endModelGroup(getFrequency()); |
aoqi@0 | 1241 | // return retval; |
aoqi@0 | 1242 | } |
aoqi@0 | 1243 | |
aoqi@0 | 1244 | private void reportConnector(char type) throws SAXException { |
aoqi@0 | 1245 | switch (type) { |
aoqi@0 | 1246 | case '|': |
aoqi@0 | 1247 | dtdHandler.connector(DTDEventListener.CHOICE); ///<- |
aoqi@0 | 1248 | return; |
aoqi@0 | 1249 | case ',': |
aoqi@0 | 1250 | dtdHandler.connector(DTDEventListener.SEQUENCE); ///<- |
aoqi@0 | 1251 | return; |
aoqi@0 | 1252 | default: |
aoqi@0 | 1253 | throw new Error(); //assertion failed. |
aoqi@0 | 1254 | } |
aoqi@0 | 1255 | } |
aoqi@0 | 1256 | |
aoqi@0 | 1257 | private short getFrequency() |
aoqi@0 | 1258 | throws IOException, SAXException { |
aoqi@0 | 1259 | |
aoqi@0 | 1260 | final char c = getc(); |
aoqi@0 | 1261 | |
aoqi@0 | 1262 | if (c == '?') { |
aoqi@0 | 1263 | strTmp.append(c); |
aoqi@0 | 1264 | return DTDEventListener.OCCURENCE_ZERO_OR_ONE; |
aoqi@0 | 1265 | // original.setRepeat(Repeat.ZERO_OR_ONE); |
aoqi@0 | 1266 | } else if (c == '+') { |
aoqi@0 | 1267 | strTmp.append(c); |
aoqi@0 | 1268 | return DTDEventListener.OCCURENCE_ONE_OR_MORE; |
aoqi@0 | 1269 | // original.setRepeat(Repeat.ONE_OR_MORE); |
aoqi@0 | 1270 | } else if (c == '*') { |
aoqi@0 | 1271 | strTmp.append(c); |
aoqi@0 | 1272 | return DTDEventListener.OCCURENCE_ZERO_OR_MORE; |
aoqi@0 | 1273 | // original.setRepeat(Repeat.ZERO_OR_MORE); |
aoqi@0 | 1274 | } else { |
aoqi@0 | 1275 | ungetc(); |
aoqi@0 | 1276 | return DTDEventListener.OCCURENCE_ONCE; |
aoqi@0 | 1277 | } |
aoqi@0 | 1278 | } |
aoqi@0 | 1279 | |
aoqi@0 | 1280 | // '(' S? '#PCDATA' already consumed |
aoqi@0 | 1281 | // matching ')' must be in "start" entity if validating |
aoqi@0 | 1282 | private void getMixed(String elementName, /*Element element,*/ InputEntity start) |
aoqi@0 | 1283 | throws IOException, SAXException { |
aoqi@0 | 1284 | |
aoqi@0 | 1285 | // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
aoqi@0 | 1286 | // | '(' S? '#PCDATA' S? ')' |
aoqi@0 | 1287 | maybeWhitespace(); |
aoqi@0 | 1288 | if (peek("\u0029*") || peek("\u0029")) { |
aoqi@0 | 1289 | if (in != start) |
aoqi@0 | 1290 | error("V-014", new Object[]{elementName}); |
aoqi@0 | 1291 | strTmp.append(')'); |
aoqi@0 | 1292 | // element.setContentModel(new StringModel(StringModelType.PCDATA)); |
aoqi@0 | 1293 | return; |
aoqi@0 | 1294 | } |
aoqi@0 | 1295 | |
aoqi@0 | 1296 | ArrayList l = new ArrayList(); |
aoqi@0 | 1297 | // l.add(new StringModel(StringModelType.PCDATA)); |
aoqi@0 | 1298 | |
aoqi@0 | 1299 | |
aoqi@0 | 1300 | while (peek("|")) { |
aoqi@0 | 1301 | String name; |
aoqi@0 | 1302 | |
aoqi@0 | 1303 | strTmp.append('|'); |
aoqi@0 | 1304 | maybeWhitespace(); |
aoqi@0 | 1305 | |
aoqi@0 | 1306 | doLexicalPE = true; |
aoqi@0 | 1307 | name = maybeGetName(); |
aoqi@0 | 1308 | if (name == null) |
aoqi@0 | 1309 | fatal("P-042", new Object[] |
aoqi@0 | 1310 | {elementName, Integer.toHexString(getc())}); |
aoqi@0 | 1311 | if (l.contains(name)) { |
aoqi@0 | 1312 | error("V-015", new Object[]{name}); |
aoqi@0 | 1313 | } else { |
aoqi@0 | 1314 | l.add(name); |
aoqi@0 | 1315 | dtdHandler.mixedElement(name); |
aoqi@0 | 1316 | } |
aoqi@0 | 1317 | strTmp.append(name); |
aoqi@0 | 1318 | maybeWhitespace(); |
aoqi@0 | 1319 | } |
aoqi@0 | 1320 | |
aoqi@0 | 1321 | if (!peek("\u0029*")) // right paren |
aoqi@0 | 1322 | fatal("P-043", new Object[] |
aoqi@0 | 1323 | {elementName, new Character(getc())}); |
aoqi@0 | 1324 | if (in != start) |
aoqi@0 | 1325 | error("V-014", new Object[]{elementName}); |
aoqi@0 | 1326 | strTmp.append(')'); |
aoqi@0 | 1327 | // ChoiceModel cm = new ChoiceModel((Collection)l); |
aoqi@0 | 1328 | // cm.setRepeat(Repeat.ZERO_OR_MORE); |
aoqi@0 | 1329 | // element.setContentModel(cm); |
aoqi@0 | 1330 | } |
aoqi@0 | 1331 | |
aoqi@0 | 1332 | private boolean maybeAttlistDecl() |
aoqi@0 | 1333 | throws IOException, SAXException { |
aoqi@0 | 1334 | |
aoqi@0 | 1335 | // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' |
aoqi@0 | 1336 | InputEntity start = peekDeclaration("!ATTLIST"); |
aoqi@0 | 1337 | |
aoqi@0 | 1338 | if (start == null) |
aoqi@0 | 1339 | return false; |
aoqi@0 | 1340 | |
aoqi@0 | 1341 | String elementName = getMarkupDeclname("F-016", true); |
aoqi@0 | 1342 | // Element element = (Element) elements.get (name); |
aoqi@0 | 1343 | |
aoqi@0 | 1344 | // if (element == null) { |
aoqi@0 | 1345 | // // not yet declared -- no problem. |
aoqi@0 | 1346 | // element = new Element(name); |
aoqi@0 | 1347 | // elements.put(name, element); |
aoqi@0 | 1348 | // } |
aoqi@0 | 1349 | |
aoqi@0 | 1350 | while (!peek(">")) { |
aoqi@0 | 1351 | |
aoqi@0 | 1352 | // [53] AttDef ::= S Name S AttType S DefaultDecl |
aoqi@0 | 1353 | // [54] AttType ::= StringType | TokenizedType | EnumeratedType |
aoqi@0 | 1354 | |
aoqi@0 | 1355 | // look for global attribute definitions, don't expand for now... |
aoqi@0 | 1356 | maybeWhitespace(); |
aoqi@0 | 1357 | char c = getc(); |
aoqi@0 | 1358 | if (c == '%') { |
aoqi@0 | 1359 | String entityName = maybeGetName(); |
aoqi@0 | 1360 | if (entityName != null) { |
aoqi@0 | 1361 | nextChar(';', "F-021", entityName); |
aoqi@0 | 1362 | whitespace("F-021"); |
aoqi@0 | 1363 | continue; |
aoqi@0 | 1364 | } else |
aoqi@0 | 1365 | fatal("P-011"); |
aoqi@0 | 1366 | } |
aoqi@0 | 1367 | |
aoqi@0 | 1368 | ungetc(); |
aoqi@0 | 1369 | // look for attribute name otherwise |
aoqi@0 | 1370 | String attName = maybeGetName(); |
aoqi@0 | 1371 | if (attName == null) { |
aoqi@0 | 1372 | fatal("P-044", new Object[]{new Character(getc())}); |
aoqi@0 | 1373 | } |
aoqi@0 | 1374 | whitespace("F-001"); |
aoqi@0 | 1375 | |
aoqi@0 | 1376 | /// Attribute a = new Attribute (name); |
aoqi@0 | 1377 | |
aoqi@0 | 1378 | String typeName; |
aoqi@0 | 1379 | Vector values = null; // notation/enumeration values |
aoqi@0 | 1380 | |
aoqi@0 | 1381 | // Note: use the type constants from Attribute |
aoqi@0 | 1382 | // so that "==" may be used (faster) |
aoqi@0 | 1383 | |
aoqi@0 | 1384 | // [55] StringType ::= 'CDATA' |
aoqi@0 | 1385 | if (peek(TYPE_CDATA)) |
aoqi@0 | 1386 | /// a.setType(Attribute.CDATA); |
aoqi@0 | 1387 | typeName = TYPE_CDATA; |
aoqi@0 | 1388 | |
aoqi@0 | 1389 | // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' |
aoqi@0 | 1390 | // | 'ENTITY' | 'ENTITIES' |
aoqi@0 | 1391 | // | 'NMTOKEN' | 'NMTOKENS' |
aoqi@0 | 1392 | // n.b. if "IDREFS" is there, both "ID" and "IDREF" |
aoqi@0 | 1393 | // match peekahead ... so this order matters! |
aoqi@0 | 1394 | else if (peek(TYPE_IDREFS)) |
aoqi@0 | 1395 | typeName = TYPE_IDREFS; |
aoqi@0 | 1396 | else if (peek(TYPE_IDREF)) |
aoqi@0 | 1397 | typeName = TYPE_IDREF; |
aoqi@0 | 1398 | else if (peek(TYPE_ID)) { |
aoqi@0 | 1399 | typeName = TYPE_ID; |
aoqi@0 | 1400 | // TODO: should implement this error check? |
aoqi@0 | 1401 | /// if (element.id() != null) { |
aoqi@0 | 1402 | /// error ("V-016", new Object [] { element.id() }); |
aoqi@0 | 1403 | /// } else |
aoqi@0 | 1404 | /// element.setId(name); |
aoqi@0 | 1405 | } else if (peek(TYPE_ENTITY)) |
aoqi@0 | 1406 | typeName = TYPE_ENTITY; |
aoqi@0 | 1407 | else if (peek(TYPE_ENTITIES)) |
aoqi@0 | 1408 | typeName = TYPE_ENTITIES; |
aoqi@0 | 1409 | else if (peek(TYPE_NMTOKENS)) |
aoqi@0 | 1410 | typeName = TYPE_NMTOKENS; |
aoqi@0 | 1411 | else if (peek(TYPE_NMTOKEN)) |
aoqi@0 | 1412 | typeName = TYPE_NMTOKEN; |
aoqi@0 | 1413 | |
aoqi@0 | 1414 | // [57] EnumeratedType ::= NotationType | Enumeration |
aoqi@0 | 1415 | // [58] NotationType ::= 'NOTATION' S '(' S? Name |
aoqi@0 | 1416 | // (S? '|' S? Name)* S? ')' |
aoqi@0 | 1417 | else if (peek(TYPE_NOTATION)) { |
aoqi@0 | 1418 | typeName = TYPE_NOTATION; |
aoqi@0 | 1419 | whitespace("F-002"); |
aoqi@0 | 1420 | nextChar('(', "F-029", null); |
aoqi@0 | 1421 | maybeWhitespace(); |
aoqi@0 | 1422 | |
aoqi@0 | 1423 | values = new Vector(); |
aoqi@0 | 1424 | do { |
aoqi@0 | 1425 | String name; |
aoqi@0 | 1426 | if ((name = maybeGetName()) == null) |
aoqi@0 | 1427 | fatal("P-068"); |
aoqi@0 | 1428 | // permit deferred declarations |
aoqi@0 | 1429 | if (notations.get(name) == null) |
aoqi@0 | 1430 | notations.put(name, name); |
aoqi@0 | 1431 | values.addElement(name); |
aoqi@0 | 1432 | maybeWhitespace(); |
aoqi@0 | 1433 | if (peek("|")) |
aoqi@0 | 1434 | maybeWhitespace(); |
aoqi@0 | 1435 | } while (!peek(")")); |
aoqi@0 | 1436 | /// a.setValues(new String [v.size ()]); |
aoqi@0 | 1437 | /// for (int i = 0; i < v.size (); i++) |
aoqi@0 | 1438 | /// a.setValue(i, (String)v.elementAt(i)); |
aoqi@0 | 1439 | |
aoqi@0 | 1440 | // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')' |
aoqi@0 | 1441 | } else if (peek("(")) { |
aoqi@0 | 1442 | /// a.setType(Attribute.ENUMERATION); |
aoqi@0 | 1443 | typeName = TYPE_ENUMERATION; |
aoqi@0 | 1444 | |
aoqi@0 | 1445 | maybeWhitespace(); |
aoqi@0 | 1446 | |
aoqi@0 | 1447 | /// Vector v = new Vector (); |
aoqi@0 | 1448 | values = new Vector(); |
aoqi@0 | 1449 | do { |
aoqi@0 | 1450 | String name = getNmtoken(); |
aoqi@0 | 1451 | /// v.addElement (name); |
aoqi@0 | 1452 | values.addElement(name); |
aoqi@0 | 1453 | maybeWhitespace(); |
aoqi@0 | 1454 | if (peek("|")) |
aoqi@0 | 1455 | maybeWhitespace(); |
aoqi@0 | 1456 | } while (!peek(")")); |
aoqi@0 | 1457 | /// a.setValues(new String [v.size ()]); |
aoqi@0 | 1458 | /// for (int i = 0; i < v.size (); i++) |
aoqi@0 | 1459 | /// a.setValue(i, (String)v.elementAt(i)); |
aoqi@0 | 1460 | } else { |
aoqi@0 | 1461 | fatal("P-045", |
aoqi@0 | 1462 | new Object[]{attName, new Character(getc())}); |
aoqi@0 | 1463 | typeName = null; |
aoqi@0 | 1464 | } |
aoqi@0 | 1465 | |
aoqi@0 | 1466 | short attributeUse; |
aoqi@0 | 1467 | String defaultValue = null; |
aoqi@0 | 1468 | |
aoqi@0 | 1469 | // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' |
aoqi@0 | 1470 | // | (('#FIXED' S)? AttValue) |
aoqi@0 | 1471 | whitespace("F-003"); |
aoqi@0 | 1472 | if (peek("#REQUIRED")) |
aoqi@0 | 1473 | attributeUse = DTDEventListener.USE_REQUIRED; |
aoqi@0 | 1474 | /// a.setIsRequired(true); |
aoqi@0 | 1475 | else if (peek("#FIXED")) { |
aoqi@0 | 1476 | /// if (a.type() == Attribute.ID) |
aoqi@0 | 1477 | if (typeName == TYPE_ID) |
aoqi@0 | 1478 | error("V-017", new Object[]{attName}); |
aoqi@0 | 1479 | /// a.setIsFixed(true); |
aoqi@0 | 1480 | attributeUse = DTDEventListener.USE_FIXED; |
aoqi@0 | 1481 | whitespace("F-004"); |
aoqi@0 | 1482 | parseLiteral(false); |
aoqi@0 | 1483 | /// if (a.type() != Attribute.CDATA) |
aoqi@0 | 1484 | /// a.setDefaultValue(normalize(false)); |
aoqi@0 | 1485 | /// else |
aoqi@0 | 1486 | /// a.setDefaultValue(strTmp.toString()); |
aoqi@0 | 1487 | |
aoqi@0 | 1488 | if (typeName == TYPE_CDATA) |
aoqi@0 | 1489 | defaultValue = normalize(false); |
aoqi@0 | 1490 | else |
aoqi@0 | 1491 | defaultValue = strTmp.toString(); |
aoqi@0 | 1492 | |
aoqi@0 | 1493 | // TODO: implement this check |
aoqi@0 | 1494 | /// if (a.type() != Attribute.CDATA) |
aoqi@0 | 1495 | /// validateAttributeSyntax (a, a.defaultValue()); |
aoqi@0 | 1496 | } else if (!peek("#IMPLIED")) { |
aoqi@0 | 1497 | attributeUse = DTDEventListener.USE_IMPLIED; |
aoqi@0 | 1498 | |
aoqi@0 | 1499 | /// if (a.type() == Attribute.ID) |
aoqi@0 | 1500 | if (typeName == TYPE_ID) |
aoqi@0 | 1501 | error("V-018", new Object[]{attName}); |
aoqi@0 | 1502 | parseLiteral(false); |
aoqi@0 | 1503 | /// if (a.type() != Attribute.CDATA) |
aoqi@0 | 1504 | /// a.setDefaultValue(normalize(false)); |
aoqi@0 | 1505 | /// else |
aoqi@0 | 1506 | /// a.setDefaultValue(strTmp.toString()); |
aoqi@0 | 1507 | if (typeName == TYPE_CDATA) |
aoqi@0 | 1508 | defaultValue = normalize(false); |
aoqi@0 | 1509 | else |
aoqi@0 | 1510 | defaultValue = strTmp.toString(); |
aoqi@0 | 1511 | |
aoqi@0 | 1512 | // TODO: implement this check |
aoqi@0 | 1513 | /// if (a.type() != Attribute.CDATA) |
aoqi@0 | 1514 | /// validateAttributeSyntax (a, a.defaultValue()); |
aoqi@0 | 1515 | } else { |
aoqi@0 | 1516 | // TODO: this looks like an fatal error. |
aoqi@0 | 1517 | attributeUse = DTDEventListener.USE_NORMAL; |
aoqi@0 | 1518 | } |
aoqi@0 | 1519 | |
aoqi@0 | 1520 | if (XmlLang.equals(attName) |
aoqi@0 | 1521 | && defaultValue/* a.defaultValue()*/ != null |
aoqi@0 | 1522 | && !isXmlLang(defaultValue/*a.defaultValue()*/)) |
aoqi@0 | 1523 | error("P-033", new Object[]{defaultValue /*a.defaultValue()*/}); |
aoqi@0 | 1524 | |
aoqi@0 | 1525 | // TODO: isn't it an error to specify the same attribute twice? |
aoqi@0 | 1526 | /// if (!element.attributes().contains(a)) { |
aoqi@0 | 1527 | /// element.addAttribute(a); |
aoqi@0 | 1528 | /// dtdHandler.attributeDecl(a); |
aoqi@0 | 1529 | /// } |
aoqi@0 | 1530 | |
aoqi@0 | 1531 | String[] v = (values != null) ? (String[]) values.toArray(new String[0]) : null; |
aoqi@0 | 1532 | dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue); |
aoqi@0 | 1533 | maybeWhitespace(); |
aoqi@0 | 1534 | } |
aoqi@0 | 1535 | if (start != in) |
aoqi@0 | 1536 | error("V-013", null); |
aoqi@0 | 1537 | return true; |
aoqi@0 | 1538 | } |
aoqi@0 | 1539 | |
aoqi@0 | 1540 | // used when parsing literal attribute values, |
aoqi@0 | 1541 | // or public identifiers. |
aoqi@0 | 1542 | // |
aoqi@0 | 1543 | // input in strTmp |
aoqi@0 | 1544 | private String normalize(boolean invalidIfNeeded) { |
aoqi@0 | 1545 | |
aoqi@0 | 1546 | // this can allocate an extra string... |
aoqi@0 | 1547 | |
aoqi@0 | 1548 | String s = strTmp.toString(); |
aoqi@0 | 1549 | String s2 = s.trim(); |
aoqi@0 | 1550 | boolean didStrip = false; |
aoqi@0 | 1551 | |
aoqi@0 | 1552 | if (s != s2) { |
aoqi@0 | 1553 | s = s2; |
aoqi@0 | 1554 | s2 = null; |
aoqi@0 | 1555 | didStrip = true; |
aoqi@0 | 1556 | } |
aoqi@0 | 1557 | strTmp = new StringBuffer(); |
aoqi@0 | 1558 | for (int i = 0; i < s.length(); i++) { |
aoqi@0 | 1559 | char c = s.charAt(i); |
aoqi@0 | 1560 | if (!XmlChars.isSpace(c)) { |
aoqi@0 | 1561 | strTmp.append(c); |
aoqi@0 | 1562 | continue; |
aoqi@0 | 1563 | } |
aoqi@0 | 1564 | strTmp.append(' '); |
aoqi@0 | 1565 | while (++i < s.length() && XmlChars.isSpace(s.charAt(i))) |
aoqi@0 | 1566 | didStrip = true; |
aoqi@0 | 1567 | i--; |
aoqi@0 | 1568 | } |
aoqi@0 | 1569 | if (didStrip) |
aoqi@0 | 1570 | return strTmp.toString(); |
aoqi@0 | 1571 | else |
aoqi@0 | 1572 | return s; |
aoqi@0 | 1573 | } |
aoqi@0 | 1574 | |
aoqi@0 | 1575 | private boolean maybeConditionalSect() |
aoqi@0 | 1576 | throws IOException, SAXException { |
aoqi@0 | 1577 | |
aoqi@0 | 1578 | // [61] conditionalSect ::= includeSect | ignoreSect |
aoqi@0 | 1579 | |
aoqi@0 | 1580 | if (!peek("<![")) |
aoqi@0 | 1581 | return false; |
aoqi@0 | 1582 | |
aoqi@0 | 1583 | String keyword; |
aoqi@0 | 1584 | InputEntity start = in; |
aoqi@0 | 1585 | |
aoqi@0 | 1586 | maybeWhitespace(); |
aoqi@0 | 1587 | |
aoqi@0 | 1588 | if ((keyword = maybeGetName()) == null) |
aoqi@0 | 1589 | fatal("P-046"); |
aoqi@0 | 1590 | maybeWhitespace(); |
aoqi@0 | 1591 | nextChar('[', "F-030", null); |
aoqi@0 | 1592 | |
aoqi@0 | 1593 | // [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' |
aoqi@0 | 1594 | // extSubsetDecl ']]>' |
aoqi@0 | 1595 | if ("INCLUDE".equals(keyword)) { |
aoqi@0 | 1596 | for (; ;) { |
aoqi@0 | 1597 | while (in.isEOF() && in != start) |
aoqi@0 | 1598 | in = in.pop(); |
aoqi@0 | 1599 | if (in.isEOF()) { |
aoqi@0 | 1600 | error("V-020", null); |
aoqi@0 | 1601 | } |
aoqi@0 | 1602 | if (peek("]]>")) |
aoqi@0 | 1603 | break; |
aoqi@0 | 1604 | |
aoqi@0 | 1605 | doLexicalPE = false; |
aoqi@0 | 1606 | if (maybeWhitespace()) |
aoqi@0 | 1607 | continue; |
aoqi@0 | 1608 | if (maybePEReference()) |
aoqi@0 | 1609 | continue; |
aoqi@0 | 1610 | doLexicalPE = true; |
aoqi@0 | 1611 | if (maybeMarkupDecl() || maybeConditionalSect()) |
aoqi@0 | 1612 | continue; |
aoqi@0 | 1613 | |
aoqi@0 | 1614 | fatal("P-047"); |
aoqi@0 | 1615 | } |
aoqi@0 | 1616 | |
aoqi@0 | 1617 | // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' |
aoqi@0 | 1618 | // ignoreSectcontents ']]>' |
aoqi@0 | 1619 | // [64] ignoreSectcontents ::= Ignore ('<![' |
aoqi@0 | 1620 | // ignoreSectcontents ']]>' Ignore)* |
aoqi@0 | 1621 | // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) |
aoqi@0 | 1622 | } else if ("IGNORE".equals(keyword)) { |
aoqi@0 | 1623 | int nestlevel = 1; |
aoqi@0 | 1624 | // ignoreSectcontents |
aoqi@0 | 1625 | doLexicalPE = false; |
aoqi@0 | 1626 | while (nestlevel > 0) { |
aoqi@0 | 1627 | char c = getc(); // will pop input entities |
aoqi@0 | 1628 | if (c == '<') { |
aoqi@0 | 1629 | if (peek("![")) |
aoqi@0 | 1630 | nestlevel++; |
aoqi@0 | 1631 | } else if (c == ']') { |
aoqi@0 | 1632 | if (peek("]>")) |
aoqi@0 | 1633 | nestlevel--; |
aoqi@0 | 1634 | } else |
aoqi@0 | 1635 | continue; |
aoqi@0 | 1636 | } |
aoqi@0 | 1637 | } else |
aoqi@0 | 1638 | fatal("P-048", new Object[]{keyword}); |
aoqi@0 | 1639 | return true; |
aoqi@0 | 1640 | } |
aoqi@0 | 1641 | |
aoqi@0 | 1642 | |
aoqi@0 | 1643 | // |
aoqi@0 | 1644 | // CHAPTER 4: Physical Structures |
aoqi@0 | 1645 | // |
aoqi@0 | 1646 | |
aoqi@0 | 1647 | // parse decimal or hex numeric character reference |
aoqi@0 | 1648 | private int parseCharNumber() |
aoqi@0 | 1649 | throws IOException, SAXException { |
aoqi@0 | 1650 | |
aoqi@0 | 1651 | char c; |
aoqi@0 | 1652 | int retval = 0; |
aoqi@0 | 1653 | |
aoqi@0 | 1654 | // n.b. we ignore overflow ... |
aoqi@0 | 1655 | if (getc() != 'x') { |
aoqi@0 | 1656 | ungetc(); |
aoqi@0 | 1657 | for (; ;) { |
aoqi@0 | 1658 | c = getc(); |
aoqi@0 | 1659 | if (c >= '0' && c <= '9') { |
aoqi@0 | 1660 | retval *= 10; |
aoqi@0 | 1661 | retval += (c - '0'); |
aoqi@0 | 1662 | continue; |
aoqi@0 | 1663 | } |
aoqi@0 | 1664 | if (c == ';') |
aoqi@0 | 1665 | return retval; |
aoqi@0 | 1666 | fatal("P-049"); |
aoqi@0 | 1667 | } |
aoqi@0 | 1668 | } else |
aoqi@0 | 1669 | for (; ;) { |
aoqi@0 | 1670 | c = getc(); |
aoqi@0 | 1671 | if (c >= '0' && c <= '9') { |
aoqi@0 | 1672 | retval <<= 4; |
aoqi@0 | 1673 | retval += (c - '0'); |
aoqi@0 | 1674 | continue; |
aoqi@0 | 1675 | } |
aoqi@0 | 1676 | if (c >= 'a' && c <= 'f') { |
aoqi@0 | 1677 | retval <<= 4; |
aoqi@0 | 1678 | retval += 10 + (c - 'a'); |
aoqi@0 | 1679 | continue; |
aoqi@0 | 1680 | } |
aoqi@0 | 1681 | if (c >= 'A' && c <= 'F') { |
aoqi@0 | 1682 | retval <<= 4; |
aoqi@0 | 1683 | retval += 10 + (c - 'A'); |
aoqi@0 | 1684 | continue; |
aoqi@0 | 1685 | } |
aoqi@0 | 1686 | if (c == ';') |
aoqi@0 | 1687 | return retval; |
aoqi@0 | 1688 | fatal("P-050"); |
aoqi@0 | 1689 | } |
aoqi@0 | 1690 | } |
aoqi@0 | 1691 | |
aoqi@0 | 1692 | // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE, |
aoqi@0 | 1693 | // though still subject to the 'Char' construct in XML |
aoqi@0 | 1694 | private int surrogatesToCharTmp(int ucs4) |
aoqi@0 | 1695 | throws SAXException { |
aoqi@0 | 1696 | |
aoqi@0 | 1697 | if (ucs4 <= 0xffff) { |
aoqi@0 | 1698 | if (XmlChars.isChar(ucs4)) { |
aoqi@0 | 1699 | charTmp[0] = (char) ucs4; |
aoqi@0 | 1700 | return 1; |
aoqi@0 | 1701 | } |
aoqi@0 | 1702 | } else if (ucs4 <= 0x0010ffff) { |
aoqi@0 | 1703 | // we represent these as UNICODE surrogate pairs |
aoqi@0 | 1704 | ucs4 -= 0x10000; |
aoqi@0 | 1705 | charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff)); |
aoqi@0 | 1706 | charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff)); |
aoqi@0 | 1707 | return 2; |
aoqi@0 | 1708 | } |
aoqi@0 | 1709 | fatal("P-051", new Object[]{Integer.toHexString(ucs4)}); |
aoqi@0 | 1710 | // NOTREACHED |
aoqi@0 | 1711 | return -1; |
aoqi@0 | 1712 | } |
aoqi@0 | 1713 | |
aoqi@0 | 1714 | private boolean maybePEReference() |
aoqi@0 | 1715 | throws IOException, SAXException { |
aoqi@0 | 1716 | |
aoqi@0 | 1717 | // This is the SYNTACTIC version of this construct. |
aoqi@0 | 1718 | // When processing external entities, there is also |
aoqi@0 | 1719 | // a LEXICAL version; see getc() and doLexicalPE. |
aoqi@0 | 1720 | |
aoqi@0 | 1721 | // [69] PEReference ::= '%' Name ';' |
aoqi@0 | 1722 | if (!in.peekc('%')) |
aoqi@0 | 1723 | return false; |
aoqi@0 | 1724 | |
aoqi@0 | 1725 | String name = maybeGetName(); |
aoqi@0 | 1726 | Object entity; |
aoqi@0 | 1727 | |
aoqi@0 | 1728 | if (name == null) |
aoqi@0 | 1729 | fatal("P-011"); |
aoqi@0 | 1730 | nextChar(';', "F-021", name); |
aoqi@0 | 1731 | entity = params.get(name); |
aoqi@0 | 1732 | |
aoqi@0 | 1733 | if (entity instanceof InternalEntity) { |
aoqi@0 | 1734 | InternalEntity value = (InternalEntity) entity; |
aoqi@0 | 1735 | pushReader(value.buf, name, false); |
aoqi@0 | 1736 | |
aoqi@0 | 1737 | } else if (entity instanceof ExternalEntity) { |
aoqi@0 | 1738 | pushReader((ExternalEntity) entity); |
aoqi@0 | 1739 | externalParameterEntity((ExternalEntity) entity); |
aoqi@0 | 1740 | |
aoqi@0 | 1741 | } else if (entity == null) { |
aoqi@0 | 1742 | error("V-022", new Object[]{name}); |
aoqi@0 | 1743 | } |
aoqi@0 | 1744 | return true; |
aoqi@0 | 1745 | } |
aoqi@0 | 1746 | |
aoqi@0 | 1747 | private boolean maybeEntityDecl() |
aoqi@0 | 1748 | throws IOException, SAXException { |
aoqi@0 | 1749 | |
aoqi@0 | 1750 | // [70] EntityDecl ::= GEDecl | PEDecl |
aoqi@0 | 1751 | // [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' |
aoqi@0 | 1752 | // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF S? '>' |
aoqi@0 | 1753 | // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) |
aoqi@0 | 1754 | // [74] PEDef ::= EntityValue | ExternalID |
aoqi@0 | 1755 | // |
aoqi@0 | 1756 | InputEntity start = peekDeclaration("!ENTITY"); |
aoqi@0 | 1757 | |
aoqi@0 | 1758 | if (start == null) |
aoqi@0 | 1759 | return false; |
aoqi@0 | 1760 | |
aoqi@0 | 1761 | String entityName; |
aoqi@0 | 1762 | SimpleHashtable defns; |
aoqi@0 | 1763 | ExternalEntity externalId; |
aoqi@0 | 1764 | boolean doStore; |
aoqi@0 | 1765 | |
aoqi@0 | 1766 | // PE expansion gets selectively turned off several places: |
aoqi@0 | 1767 | // in ENTITY declarations (here), in comments, in PIs. |
aoqi@0 | 1768 | |
aoqi@0 | 1769 | // Here, we allow PE entities to be declared, and allows |
aoqi@0 | 1770 | // literals to include PE refs without the added spaces |
aoqi@0 | 1771 | // required with their expansion in markup decls. |
aoqi@0 | 1772 | |
aoqi@0 | 1773 | doLexicalPE = false; |
aoqi@0 | 1774 | whitespace("F-005"); |
aoqi@0 | 1775 | if (in.peekc('%')) { |
aoqi@0 | 1776 | whitespace("F-006"); |
aoqi@0 | 1777 | defns = params; |
aoqi@0 | 1778 | } else |
aoqi@0 | 1779 | defns = entities; |
aoqi@0 | 1780 | |
aoqi@0 | 1781 | ungetc(); // leave some whitespace |
aoqi@0 | 1782 | doLexicalPE = true; |
aoqi@0 | 1783 | entityName = getMarkupDeclname("F-017", false); |
aoqi@0 | 1784 | whitespace("F-007"); |
aoqi@0 | 1785 | externalId = maybeExternalID(); |
aoqi@0 | 1786 | |
aoqi@0 | 1787 | // |
aoqi@0 | 1788 | // first definition sticks ... e.g. internal subset PEs are used |
aoqi@0 | 1789 | // to override DTD defaults. It's also an "error" to incorrectly |
aoqi@0 | 1790 | // redefine builtin internal entities, but since reporting such |
aoqi@0 | 1791 | // errors is optional we only give warnings ("just in case") for |
aoqi@0 | 1792 | // non-parameter entities. |
aoqi@0 | 1793 | // |
aoqi@0 | 1794 | doStore = (defns.get(entityName) == null); |
aoqi@0 | 1795 | if (!doStore && defns == entities) |
aoqi@0 | 1796 | warning("P-054", new Object[]{entityName}); |
aoqi@0 | 1797 | |
aoqi@0 | 1798 | // internal entities |
aoqi@0 | 1799 | if (externalId == null) { |
aoqi@0 | 1800 | char value []; |
aoqi@0 | 1801 | InternalEntity entity; |
aoqi@0 | 1802 | |
aoqi@0 | 1803 | doLexicalPE = false; // "ab%bar;cd" -maybe-> "abcd" |
aoqi@0 | 1804 | parseLiteral(true); |
aoqi@0 | 1805 | doLexicalPE = true; |
aoqi@0 | 1806 | if (doStore) { |
aoqi@0 | 1807 | value = new char[strTmp.length()]; |
aoqi@0 | 1808 | if (value.length != 0) |
aoqi@0 | 1809 | strTmp.getChars(0, value.length, value, 0); |
aoqi@0 | 1810 | entity = new InternalEntity(entityName, value); |
aoqi@0 | 1811 | entity.isPE = (defns == params); |
aoqi@0 | 1812 | entity.isFromInternalSubset = false; |
aoqi@0 | 1813 | defns.put(entityName, entity); |
aoqi@0 | 1814 | if (defns == entities) |
aoqi@0 | 1815 | dtdHandler.internalGeneralEntityDecl(entityName, |
aoqi@0 | 1816 | new String(value)); |
aoqi@0 | 1817 | } |
aoqi@0 | 1818 | |
aoqi@0 | 1819 | // external entities (including unparsed) |
aoqi@0 | 1820 | } else { |
aoqi@0 | 1821 | // [76] NDataDecl ::= S 'NDATA' S Name |
aoqi@0 | 1822 | if (defns == entities && maybeWhitespace() |
aoqi@0 | 1823 | && peek("NDATA")) { |
aoqi@0 | 1824 | externalId.notation = getMarkupDeclname("F-018", false); |
aoqi@0 | 1825 | |
aoqi@0 | 1826 | // flag undeclared notation for checking after |
aoqi@0 | 1827 | // the DTD is fully processed |
aoqi@0 | 1828 | if (notations.get(externalId.notation) == null) |
aoqi@0 | 1829 | notations.put(externalId.notation, Boolean.TRUE); |
aoqi@0 | 1830 | } |
aoqi@0 | 1831 | externalId.name = entityName; |
aoqi@0 | 1832 | externalId.isPE = (defns == params); |
aoqi@0 | 1833 | externalId.isFromInternalSubset = false; |
aoqi@0 | 1834 | if (doStore) { |
aoqi@0 | 1835 | defns.put(entityName, externalId); |
aoqi@0 | 1836 | if (externalId.notation != null) |
aoqi@0 | 1837 | dtdHandler.unparsedEntityDecl(entityName, |
aoqi@0 | 1838 | externalId.publicId, externalId.systemId, |
aoqi@0 | 1839 | externalId.notation); |
aoqi@0 | 1840 | else if (defns == entities) |
aoqi@0 | 1841 | dtdHandler.externalGeneralEntityDecl(entityName, |
aoqi@0 | 1842 | externalId.publicId, externalId.systemId); |
aoqi@0 | 1843 | } |
aoqi@0 | 1844 | } |
aoqi@0 | 1845 | maybeWhitespace(); |
aoqi@0 | 1846 | nextChar('>', "F-031", entityName); |
aoqi@0 | 1847 | if (start != in) |
aoqi@0 | 1848 | error("V-013", null); |
aoqi@0 | 1849 | return true; |
aoqi@0 | 1850 | } |
aoqi@0 | 1851 | |
aoqi@0 | 1852 | private ExternalEntity maybeExternalID() |
aoqi@0 | 1853 | throws IOException, SAXException { |
aoqi@0 | 1854 | |
aoqi@0 | 1855 | // [75] ExternalID ::= 'SYSTEM' S SystemLiteral |
aoqi@0 | 1856 | // | 'PUBLIC' S' PubidLiteral S Systemliteral |
aoqi@0 | 1857 | String temp = null; |
aoqi@0 | 1858 | ExternalEntity retval; |
aoqi@0 | 1859 | |
aoqi@0 | 1860 | if (peek("PUBLIC")) { |
aoqi@0 | 1861 | whitespace("F-009"); |
aoqi@0 | 1862 | temp = parsePublicId(); |
aoqi@0 | 1863 | } else if (!peek("SYSTEM")) |
aoqi@0 | 1864 | return null; |
aoqi@0 | 1865 | |
aoqi@0 | 1866 | retval = new ExternalEntity(in); |
aoqi@0 | 1867 | retval.publicId = temp; |
aoqi@0 | 1868 | whitespace("F-008"); |
aoqi@0 | 1869 | retval.systemId = parseSystemId(); |
aoqi@0 | 1870 | return retval; |
aoqi@0 | 1871 | } |
aoqi@0 | 1872 | |
aoqi@0 | 1873 | private String parseSystemId() |
aoqi@0 | 1874 | throws IOException, SAXException { |
aoqi@0 | 1875 | |
aoqi@0 | 1876 | String uri = getQuotedString("F-034", null); |
aoqi@0 | 1877 | int temp = uri.indexOf(':'); |
aoqi@0 | 1878 | |
aoqi@0 | 1879 | // resolve relative URIs ... must do it here since |
aoqi@0 | 1880 | // it's relative to the source file holding the URI! |
aoqi@0 | 1881 | |
aoqi@0 | 1882 | // "new java.net.URL (URL, string)" conforms to RFC 1630, |
aoqi@0 | 1883 | // but we can't use that except when the URI is a URL. |
aoqi@0 | 1884 | // The entity resolver is allowed to handle URIs that are |
aoqi@0 | 1885 | // not URLs, so we pass URIs through with scheme intact |
aoqi@0 | 1886 | if (temp == -1 || uri.indexOf('/') < temp) { |
aoqi@0 | 1887 | String baseURI; |
aoqi@0 | 1888 | |
aoqi@0 | 1889 | baseURI = in.getSystemId(); |
aoqi@0 | 1890 | if (baseURI == null) |
aoqi@0 | 1891 | fatal("P-055", new Object[]{uri}); |
aoqi@0 | 1892 | if (uri.length() == 0) |
aoqi@0 | 1893 | uri = "."; |
aoqi@0 | 1894 | baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1); |
aoqi@0 | 1895 | if (uri.charAt(0) != '/') |
aoqi@0 | 1896 | uri = baseURI + uri; |
aoqi@0 | 1897 | else { |
aoqi@0 | 1898 | // XXX slashes at the beginning of a relative URI are |
aoqi@0 | 1899 | // a special case we don't handle. |
aoqi@0 | 1900 | throw new InternalError(); |
aoqi@0 | 1901 | } |
aoqi@0 | 1902 | |
aoqi@0 | 1903 | // letting other code map any "/xxx/../" or "/./" to "/", |
aoqi@0 | 1904 | // since all URIs must handle it the same. |
aoqi@0 | 1905 | } |
aoqi@0 | 1906 | // check for fragment ID in URI |
aoqi@0 | 1907 | if (uri.indexOf('#') != -1) |
aoqi@0 | 1908 | error("P-056", new Object[]{uri}); |
aoqi@0 | 1909 | return uri; |
aoqi@0 | 1910 | } |
aoqi@0 | 1911 | |
aoqi@0 | 1912 | private void maybeTextDecl() |
aoqi@0 | 1913 | throws IOException, SAXException { |
aoqi@0 | 1914 | |
aoqi@0 | 1915 | // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' |
aoqi@0 | 1916 | if (peek("<?xml")) { |
aoqi@0 | 1917 | readVersion(false, "1.0"); |
aoqi@0 | 1918 | readEncoding(true); |
aoqi@0 | 1919 | maybeWhitespace(); |
aoqi@0 | 1920 | if (!peek("?>")) |
aoqi@0 | 1921 | fatal("P-057"); |
aoqi@0 | 1922 | } |
aoqi@0 | 1923 | } |
aoqi@0 | 1924 | |
aoqi@0 | 1925 | private void externalParameterEntity(ExternalEntity next) |
aoqi@0 | 1926 | throws IOException, SAXException { |
aoqi@0 | 1927 | |
aoqi@0 | 1928 | // |
aoqi@0 | 1929 | // Reap the intended benefits of standalone declarations: |
aoqi@0 | 1930 | // don't deal with external parameter entities, except to |
aoqi@0 | 1931 | // validate the standalone declaration. |
aoqi@0 | 1932 | // |
aoqi@0 | 1933 | |
aoqi@0 | 1934 | // n.b. "in external parameter entities" (and external |
aoqi@0 | 1935 | // DTD subset, same grammar) parameter references can |
aoqi@0 | 1936 | // occur "within" markup declarations ... expansions can |
aoqi@0 | 1937 | // cross syntax rules. Flagged here; affects getc(). |
aoqi@0 | 1938 | |
aoqi@0 | 1939 | // [79] ExtPE ::= TextDecl? extSubsetDecl |
aoqi@0 | 1940 | // [31] extSubsetDecl ::= ( markupdecl | conditionalSect |
aoqi@0 | 1941 | // | PEReference | S )* |
aoqi@0 | 1942 | InputEntity pe; |
aoqi@0 | 1943 | |
aoqi@0 | 1944 | // XXX if this returns false ... |
aoqi@0 | 1945 | |
aoqi@0 | 1946 | pe = in; |
aoqi@0 | 1947 | maybeTextDecl(); |
aoqi@0 | 1948 | while (!pe.isEOF()) { |
aoqi@0 | 1949 | // pop internal PEs (and whitespace before/after) |
aoqi@0 | 1950 | if (in.isEOF()) { |
aoqi@0 | 1951 | in = in.pop(); |
aoqi@0 | 1952 | continue; |
aoqi@0 | 1953 | } |
aoqi@0 | 1954 | doLexicalPE = false; |
aoqi@0 | 1955 | if (maybeWhitespace()) |
aoqi@0 | 1956 | continue; |
aoqi@0 | 1957 | if (maybePEReference()) |
aoqi@0 | 1958 | continue; |
aoqi@0 | 1959 | doLexicalPE = true; |
aoqi@0 | 1960 | if (maybeMarkupDecl() || maybeConditionalSect()) |
aoqi@0 | 1961 | continue; |
aoqi@0 | 1962 | break; |
aoqi@0 | 1963 | } |
aoqi@0 | 1964 | // if (in != pe) throw new InternalError("who popped my PE?"); |
aoqi@0 | 1965 | if (!pe.isEOF()) |
aoqi@0 | 1966 | fatal("P-059", new Object[]{in.getName()}); |
aoqi@0 | 1967 | } |
aoqi@0 | 1968 | |
aoqi@0 | 1969 | private void readEncoding(boolean must) |
aoqi@0 | 1970 | throws IOException, SAXException { |
aoqi@0 | 1971 | |
aoqi@0 | 1972 | // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* |
aoqi@0 | 1973 | String name = maybeReadAttribute("encoding", must); |
aoqi@0 | 1974 | |
aoqi@0 | 1975 | if (name == null) |
aoqi@0 | 1976 | return; |
aoqi@0 | 1977 | for (int i = 0; i < name.length(); i++) { |
aoqi@0 | 1978 | char c = name.charAt(i); |
aoqi@0 | 1979 | if ((c >= 'A' && c <= 'Z') |
aoqi@0 | 1980 | || (c >= 'a' && c <= 'z')) |
aoqi@0 | 1981 | continue; |
aoqi@0 | 1982 | if (i != 0 |
aoqi@0 | 1983 | && ((c >= '0' && c <= '9') |
aoqi@0 | 1984 | || c == '-' |
aoqi@0 | 1985 | || c == '_' |
aoqi@0 | 1986 | || c == '.' |
aoqi@0 | 1987 | )) |
aoqi@0 | 1988 | continue; |
aoqi@0 | 1989 | fatal("P-060", new Object[]{new Character(c)}); |
aoqi@0 | 1990 | } |
aoqi@0 | 1991 | |
aoqi@0 | 1992 | // |
aoqi@0 | 1993 | // This should be the encoding in use, and it's even an error for |
aoqi@0 | 1994 | // it to be anything else (in certain cases that are impractical to |
aoqi@0 | 1995 | // to test, and may even be insufficient). So, we do the best we |
aoqi@0 | 1996 | // can, and warn if things look suspicious. Note that Java doesn't |
aoqi@0 | 1997 | // uniformly expose the encodings, and that the names it uses |
aoqi@0 | 1998 | // internally are nonstandard. Also, that the XML spec allows |
aoqi@0 | 1999 | // such "errors" not to be reported at all. |
aoqi@0 | 2000 | // |
aoqi@0 | 2001 | String currentEncoding = in.getEncoding(); |
aoqi@0 | 2002 | |
aoqi@0 | 2003 | if (currentEncoding != null |
aoqi@0 | 2004 | && !name.equalsIgnoreCase(currentEncoding)) |
aoqi@0 | 2005 | warning("P-061", new Object[]{name, currentEncoding}); |
aoqi@0 | 2006 | } |
aoqi@0 | 2007 | |
aoqi@0 | 2008 | private boolean maybeNotationDecl() |
aoqi@0 | 2009 | throws IOException, SAXException { |
aoqi@0 | 2010 | |
aoqi@0 | 2011 | // [82] NotationDecl ::= '<!NOTATION' S Name S |
aoqi@0 | 2012 | // (ExternalID | PublicID) S? '>' |
aoqi@0 | 2013 | // [83] PublicID ::= 'PUBLIC' S PubidLiteral |
aoqi@0 | 2014 | InputEntity start = peekDeclaration("!NOTATION"); |
aoqi@0 | 2015 | |
aoqi@0 | 2016 | if (start == null) |
aoqi@0 | 2017 | return false; |
aoqi@0 | 2018 | |
aoqi@0 | 2019 | String name = getMarkupDeclname("F-019", false); |
aoqi@0 | 2020 | ExternalEntity entity = new ExternalEntity(in); |
aoqi@0 | 2021 | |
aoqi@0 | 2022 | whitespace("F-011"); |
aoqi@0 | 2023 | if (peek("PUBLIC")) { |
aoqi@0 | 2024 | whitespace("F-009"); |
aoqi@0 | 2025 | entity.publicId = parsePublicId(); |
aoqi@0 | 2026 | if (maybeWhitespace()) { |
aoqi@0 | 2027 | if (!peek(">")) |
aoqi@0 | 2028 | entity.systemId = parseSystemId(); |
aoqi@0 | 2029 | else |
aoqi@0 | 2030 | ungetc(); |
aoqi@0 | 2031 | } |
aoqi@0 | 2032 | } else if (peek("SYSTEM")) { |
aoqi@0 | 2033 | whitespace("F-008"); |
aoqi@0 | 2034 | entity.systemId = parseSystemId(); |
aoqi@0 | 2035 | } else |
aoqi@0 | 2036 | fatal("P-062"); |
aoqi@0 | 2037 | maybeWhitespace(); |
aoqi@0 | 2038 | nextChar('>', "F-032", name); |
aoqi@0 | 2039 | if (start != in) |
aoqi@0 | 2040 | error("V-013", null); |
aoqi@0 | 2041 | if (entity.systemId != null && entity.systemId.indexOf('#') != -1) |
aoqi@0 | 2042 | error("P-056", new Object[]{entity.systemId}); |
aoqi@0 | 2043 | |
aoqi@0 | 2044 | Object value = notations.get(name); |
aoqi@0 | 2045 | if (value != null && value instanceof ExternalEntity) |
aoqi@0 | 2046 | warning("P-063", new Object[]{name}); |
aoqi@0 | 2047 | |
aoqi@0 | 2048 | else { |
aoqi@0 | 2049 | notations.put(name, entity); |
aoqi@0 | 2050 | dtdHandler.notationDecl(name, entity.publicId, |
aoqi@0 | 2051 | entity.systemId); |
aoqi@0 | 2052 | } |
aoqi@0 | 2053 | return true; |
aoqi@0 | 2054 | } |
aoqi@0 | 2055 | |
aoqi@0 | 2056 | |
aoqi@0 | 2057 | //////////////////////////////////////////////////////////////// |
aoqi@0 | 2058 | // |
aoqi@0 | 2059 | // UTILITIES |
aoqi@0 | 2060 | // |
aoqi@0 | 2061 | //////////////////////////////////////////////////////////////// |
aoqi@0 | 2062 | |
aoqi@0 | 2063 | private char getc() throws IOException, SAXException { |
aoqi@0 | 2064 | |
aoqi@0 | 2065 | if (!doLexicalPE) { |
aoqi@0 | 2066 | char c = in.getc(); |
aoqi@0 | 2067 | return c; |
aoqi@0 | 2068 | } |
aoqi@0 | 2069 | |
aoqi@0 | 2070 | // |
aoqi@0 | 2071 | // External parameter entities get funky processing of '%param;' |
aoqi@0 | 2072 | // references. It's not clearly defined in the XML spec; but it |
aoqi@0 | 2073 | // boils down to having those refs be _lexical_ in most cases to |
aoqi@0 | 2074 | // include partial syntax productions. It also needs selective |
aoqi@0 | 2075 | // enabling; "<!ENTITY % foo ...>" must work, for example, and |
aoqi@0 | 2076 | // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd" |
aoqi@0 | 2077 | // if it's expanded in a literal, else "ab cd". PEs also do |
aoqi@0 | 2078 | // not expand within comments or PIs, and external PEs are only |
aoqi@0 | 2079 | // allowed to have markup decls (and so aren't handled lexically). |
aoqi@0 | 2080 | // |
aoqi@0 | 2081 | // This PE handling should be merged into maybeWhitespace, where |
aoqi@0 | 2082 | // it can be dealt with more consistently. |
aoqi@0 | 2083 | // |
aoqi@0 | 2084 | // Also, there are some validity constraints in this area. |
aoqi@0 | 2085 | // |
aoqi@0 | 2086 | char c; |
aoqi@0 | 2087 | |
aoqi@0 | 2088 | while (in.isEOF()) { |
aoqi@0 | 2089 | if (in.isInternal() || (doLexicalPE && !in.isDocument())) |
aoqi@0 | 2090 | in = in.pop(); |
aoqi@0 | 2091 | else { |
aoqi@0 | 2092 | fatal("P-064", new Object[]{in.getName()}); |
aoqi@0 | 2093 | } |
aoqi@0 | 2094 | } |
aoqi@0 | 2095 | if ((c = in.getc()) == '%' && doLexicalPE) { |
aoqi@0 | 2096 | // PE ref ::= '%' name ';' |
aoqi@0 | 2097 | String name = maybeGetName(); |
aoqi@0 | 2098 | Object entity; |
aoqi@0 | 2099 | |
aoqi@0 | 2100 | if (name == null) |
aoqi@0 | 2101 | fatal("P-011"); |
aoqi@0 | 2102 | nextChar(';', "F-021", name); |
aoqi@0 | 2103 | entity = params.get(name); |
aoqi@0 | 2104 | |
aoqi@0 | 2105 | // push a magic "entity" before and after the |
aoqi@0 | 2106 | // real one, so ungetc() behaves uniformly |
aoqi@0 | 2107 | pushReader(" ".toCharArray(), null, false); |
aoqi@0 | 2108 | if (entity instanceof InternalEntity) |
aoqi@0 | 2109 | pushReader(((InternalEntity) entity).buf, name, false); |
aoqi@0 | 2110 | else if (entity instanceof ExternalEntity) |
aoqi@0 | 2111 | // PEs can't be unparsed! |
aoqi@0 | 2112 | // XXX if this returns false ... |
aoqi@0 | 2113 | pushReader((ExternalEntity) entity); |
aoqi@0 | 2114 | else if (entity == null) |
aoqi@0 | 2115 | // see note in maybePEReference re making this be nonfatal. |
aoqi@0 | 2116 | fatal("V-022"); |
aoqi@0 | 2117 | else |
aoqi@0 | 2118 | throw new InternalError(); |
aoqi@0 | 2119 | pushReader(" ".toCharArray(), null, false); |
aoqi@0 | 2120 | return in.getc(); |
aoqi@0 | 2121 | } |
aoqi@0 | 2122 | return c; |
aoqi@0 | 2123 | } |
aoqi@0 | 2124 | |
aoqi@0 | 2125 | private void ungetc() { |
aoqi@0 | 2126 | |
aoqi@0 | 2127 | in.ungetc(); |
aoqi@0 | 2128 | } |
aoqi@0 | 2129 | |
aoqi@0 | 2130 | private boolean peek(String s) |
aoqi@0 | 2131 | throws IOException, SAXException { |
aoqi@0 | 2132 | |
aoqi@0 | 2133 | return in.peek(s, null); |
aoqi@0 | 2134 | } |
aoqi@0 | 2135 | |
aoqi@0 | 2136 | // Return the entity starting the specified declaration |
aoqi@0 | 2137 | // (for validating declaration nesting) else null. |
aoqi@0 | 2138 | |
aoqi@0 | 2139 | private InputEntity peekDeclaration(String s) |
aoqi@0 | 2140 | throws IOException, SAXException { |
aoqi@0 | 2141 | |
aoqi@0 | 2142 | InputEntity start; |
aoqi@0 | 2143 | |
aoqi@0 | 2144 | if (!in.peekc('<')) |
aoqi@0 | 2145 | return null; |
aoqi@0 | 2146 | start = in; |
aoqi@0 | 2147 | if (in.peek(s, null)) |
aoqi@0 | 2148 | return start; |
aoqi@0 | 2149 | in.ungetc(); |
aoqi@0 | 2150 | return null; |
aoqi@0 | 2151 | } |
aoqi@0 | 2152 | |
aoqi@0 | 2153 | private void nextChar(char c, String location, String near) |
aoqi@0 | 2154 | throws IOException, SAXException { |
aoqi@0 | 2155 | |
aoqi@0 | 2156 | while (in.isEOF() && !in.isDocument()) |
aoqi@0 | 2157 | in = in.pop(); |
aoqi@0 | 2158 | if (!in.peekc(c)) |
aoqi@0 | 2159 | fatal("P-008", new Object[] |
aoqi@0 | 2160 | {new Character(c), |
aoqi@0 | 2161 | messages.getMessage(locale, location), |
aoqi@0 | 2162 | (near == null ? "" : ('"' + near + '"'))}); |
aoqi@0 | 2163 | } |
aoqi@0 | 2164 | |
aoqi@0 | 2165 | |
aoqi@0 | 2166 | private void pushReader(char buf [], String name, boolean isGeneral) |
aoqi@0 | 2167 | throws SAXException { |
aoqi@0 | 2168 | |
aoqi@0 | 2169 | InputEntity r = InputEntity.getInputEntity(dtdHandler, locale); |
aoqi@0 | 2170 | r.init(buf, name, in, !isGeneral); |
aoqi@0 | 2171 | in = r; |
aoqi@0 | 2172 | } |
aoqi@0 | 2173 | |
aoqi@0 | 2174 | private boolean pushReader(ExternalEntity next) |
aoqi@0 | 2175 | throws IOException, SAXException { |
aoqi@0 | 2176 | |
aoqi@0 | 2177 | InputEntity r = InputEntity.getInputEntity(dtdHandler, locale); |
aoqi@0 | 2178 | InputSource s; |
aoqi@0 | 2179 | try { |
aoqi@0 | 2180 | s = next.getInputSource(resolver); |
aoqi@0 | 2181 | } catch (IOException e) { |
aoqi@0 | 2182 | String msg = |
aoqi@0 | 2183 | "unable to open the external entity from :" + next.systemId; |
aoqi@0 | 2184 | if (next.publicId != null) |
aoqi@0 | 2185 | msg += " (public id:" + next.publicId + ")"; |
aoqi@0 | 2186 | |
aoqi@0 | 2187 | SAXParseException spe = new SAXParseException(msg, |
aoqi@0 | 2188 | getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e); |
aoqi@0 | 2189 | dtdHandler.fatalError(spe); |
aoqi@0 | 2190 | throw e; |
aoqi@0 | 2191 | } |
aoqi@0 | 2192 | |
aoqi@0 | 2193 | r.init(s, next.name, in, next.isPE); |
aoqi@0 | 2194 | in = r; |
aoqi@0 | 2195 | return true; |
aoqi@0 | 2196 | } |
aoqi@0 | 2197 | |
aoqi@0 | 2198 | public String getPublicId() { |
aoqi@0 | 2199 | |
aoqi@0 | 2200 | return (in == null) ? null : in.getPublicId(); |
aoqi@0 | 2201 | } |
aoqi@0 | 2202 | |
aoqi@0 | 2203 | public String getSystemId() { |
aoqi@0 | 2204 | |
aoqi@0 | 2205 | return (in == null) ? null : in.getSystemId(); |
aoqi@0 | 2206 | } |
aoqi@0 | 2207 | |
aoqi@0 | 2208 | public int getLineNumber() { |
aoqi@0 | 2209 | |
aoqi@0 | 2210 | return (in == null) ? -1 : in.getLineNumber(); |
aoqi@0 | 2211 | } |
aoqi@0 | 2212 | |
aoqi@0 | 2213 | public int getColumnNumber() { |
aoqi@0 | 2214 | |
aoqi@0 | 2215 | return (in == null) ? -1 : in.getColumnNumber(); |
aoqi@0 | 2216 | } |
aoqi@0 | 2217 | |
aoqi@0 | 2218 | // error handling convenience routines |
aoqi@0 | 2219 | |
aoqi@0 | 2220 | private void warning(String messageId, Object parameters []) |
aoqi@0 | 2221 | throws SAXException { |
aoqi@0 | 2222 | |
aoqi@0 | 2223 | SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), |
aoqi@0 | 2224 | getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); |
aoqi@0 | 2225 | |
aoqi@0 | 2226 | dtdHandler.warning(e); |
aoqi@0 | 2227 | } |
aoqi@0 | 2228 | |
aoqi@0 | 2229 | void error(String messageId, Object parameters []) |
aoqi@0 | 2230 | throws SAXException { |
aoqi@0 | 2231 | |
aoqi@0 | 2232 | SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), |
aoqi@0 | 2233 | getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); |
aoqi@0 | 2234 | |
aoqi@0 | 2235 | dtdHandler.error(e); |
aoqi@0 | 2236 | } |
aoqi@0 | 2237 | |
aoqi@0 | 2238 | private void fatal(String messageId) throws SAXException { |
aoqi@0 | 2239 | |
aoqi@0 | 2240 | fatal(messageId, null); |
aoqi@0 | 2241 | } |
aoqi@0 | 2242 | |
aoqi@0 | 2243 | private void fatal(String messageId, Object parameters []) |
aoqi@0 | 2244 | throws SAXException { |
aoqi@0 | 2245 | |
aoqi@0 | 2246 | SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), |
aoqi@0 | 2247 | getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); |
aoqi@0 | 2248 | |
aoqi@0 | 2249 | dtdHandler.fatalError(e); |
aoqi@0 | 2250 | |
aoqi@0 | 2251 | throw e; |
aoqi@0 | 2252 | } |
aoqi@0 | 2253 | |
aoqi@0 | 2254 | // |
aoqi@0 | 2255 | // Map char arrays to strings ... cuts down both on memory and |
aoqi@0 | 2256 | // CPU usage for element/attribute/other names that are reused. |
aoqi@0 | 2257 | // |
aoqi@0 | 2258 | // Documents typically repeat names a lot, so we more or less |
aoqi@0 | 2259 | // intern all the strings within the document; since some strings |
aoqi@0 | 2260 | // are repeated in multiple documents (e.g. stylesheets) we go |
aoqi@0 | 2261 | // a bit further, and intern globally. |
aoqi@0 | 2262 | // |
aoqi@0 | 2263 | static class NameCache { |
aoqi@0 | 2264 | // |
aoqi@0 | 2265 | // Unless we auto-grow this, the default size should be a |
aoqi@0 | 2266 | // reasonable bit larger than needed for most XML files |
aoqi@0 | 2267 | // we've yet seen (and be prime). If it's too small, the |
aoqi@0 | 2268 | // penalty is just excess cache collisions. |
aoqi@0 | 2269 | // |
aoqi@0 | 2270 | NameCacheEntry hashtable [] = new NameCacheEntry[541]; |
aoqi@0 | 2271 | |
aoqi@0 | 2272 | // |
aoqi@0 | 2273 | // Usually we just want to get the 'symbol' for these chars |
aoqi@0 | 2274 | // |
aoqi@0 | 2275 | String lookup(char value [], int len) { |
aoqi@0 | 2276 | |
aoqi@0 | 2277 | return lookupEntry(value, len).name; |
aoqi@0 | 2278 | } |
aoqi@0 | 2279 | |
aoqi@0 | 2280 | // |
aoqi@0 | 2281 | // Sometimes we need to scan the chars in the resulting |
aoqi@0 | 2282 | // string, so there's an accessor which exposes them. |
aoqi@0 | 2283 | // (Mostly for element end tags.) |
aoqi@0 | 2284 | // |
aoqi@0 | 2285 | NameCacheEntry lookupEntry(char value [], int len) { |
aoqi@0 | 2286 | |
aoqi@0 | 2287 | int index = 0; |
aoqi@0 | 2288 | NameCacheEntry entry; |
aoqi@0 | 2289 | |
aoqi@0 | 2290 | // hashing to get index |
aoqi@0 | 2291 | for (int i = 0; i < len; i++) |
aoqi@0 | 2292 | index = index * 31 + value[i]; |
aoqi@0 | 2293 | index &= 0x7fffffff; |
aoqi@0 | 2294 | index %= hashtable.length; |
aoqi@0 | 2295 | |
aoqi@0 | 2296 | // return entry if one's there ... |
aoqi@0 | 2297 | for (entry = hashtable[index]; |
aoqi@0 | 2298 | entry != null; |
aoqi@0 | 2299 | entry = entry.next) { |
aoqi@0 | 2300 | if (entry.matches(value, len)) |
aoqi@0 | 2301 | return entry; |
aoqi@0 | 2302 | } |
aoqi@0 | 2303 | |
aoqi@0 | 2304 | // else create new one |
aoqi@0 | 2305 | entry = new NameCacheEntry(); |
aoqi@0 | 2306 | entry.chars = new char[len]; |
aoqi@0 | 2307 | System.arraycopy(value, 0, entry.chars, 0, len); |
aoqi@0 | 2308 | entry.name = new String(entry.chars); |
aoqi@0 | 2309 | // |
aoqi@0 | 2310 | // NOTE: JDK 1.1 has a fixed size string intern table, |
aoqi@0 | 2311 | // with non-GC'd entries. It can panic here; that's a |
aoqi@0 | 2312 | // JDK problem, use 1.2 or later with many identifiers. |
aoqi@0 | 2313 | // |
aoqi@0 | 2314 | entry.name = entry.name.intern(); // "global" intern |
aoqi@0 | 2315 | entry.next = hashtable[index]; |
aoqi@0 | 2316 | hashtable[index] = entry; |
aoqi@0 | 2317 | return entry; |
aoqi@0 | 2318 | } |
aoqi@0 | 2319 | } |
aoqi@0 | 2320 | |
aoqi@0 | 2321 | static class NameCacheEntry { |
aoqi@0 | 2322 | |
aoqi@0 | 2323 | String name; |
aoqi@0 | 2324 | char chars []; |
aoqi@0 | 2325 | NameCacheEntry next; |
aoqi@0 | 2326 | |
aoqi@0 | 2327 | boolean matches(char value [], int len) { |
aoqi@0 | 2328 | |
aoqi@0 | 2329 | if (chars.length != len) |
aoqi@0 | 2330 | return false; |
aoqi@0 | 2331 | for (int i = 0; i < len; i++) |
aoqi@0 | 2332 | if (value[i] != chars[i]) |
aoqi@0 | 2333 | return false; |
aoqi@0 | 2334 | return true; |
aoqi@0 | 2335 | } |
aoqi@0 | 2336 | } |
aoqi@0 | 2337 | |
aoqi@0 | 2338 | // |
aoqi@0 | 2339 | // Message catalog for diagnostics. |
aoqi@0 | 2340 | // |
aoqi@0 | 2341 | static final Catalog messages = new Catalog(); |
aoqi@0 | 2342 | |
aoqi@0 | 2343 | static final class Catalog extends MessageCatalog { |
aoqi@0 | 2344 | |
aoqi@0 | 2345 | Catalog() { |
aoqi@0 | 2346 | super(DTDParser.class); |
aoqi@0 | 2347 | } |
aoqi@0 | 2348 | } |
aoqi@0 | 2349 | |
aoqi@0 | 2350 | } |