src/share/jaxws_classes/com/sun/xml/internal/dtdparser/InputEntity.java

Tue, 06 Mar 2012 16:09:35 -0800

author
ohair
date
Tue, 06 Mar 2012 16:09:35 -0800
changeset 286
f50545b5e2f1
child 397
b99d7e355d4b
permissions
-rw-r--r--

7150322: Stop using drop source bundles in jaxws
Reviewed-by: darcy, ohrstrom

ohair@286 1 /*
ohair@286 2 * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
ohair@286 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
ohair@286 4 *
ohair@286 5 * This code is free software; you can redistribute it and/or modify it
ohair@286 6 * under the terms of the GNU General Public License version 2 only, as
ohair@286 7 * published by the Free Software Foundation. Oracle designates this
ohair@286 8 * particular file as subject to the "Classpath" exception as provided
ohair@286 9 * by Oracle in the LICENSE file that accompanied this code.
ohair@286 10 *
ohair@286 11 * This code is distributed in the hope that it will be useful, but WITHOUT
ohair@286 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
ohair@286 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
ohair@286 14 * version 2 for more details (a copy is included in the LICENSE file that
ohair@286 15 * accompanied this code).
ohair@286 16 *
ohair@286 17 * You should have received a copy of the GNU General Public License version
ohair@286 18 * 2 along with this work; if not, write to the Free Software Foundation,
ohair@286 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
ohair@286 20 *
ohair@286 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
ohair@286 22 * or visit www.oracle.com if you need additional information or have any
ohair@286 23 * questions.
ohair@286 24 */
ohair@286 25
ohair@286 26 package com.sun.xml.internal.dtdparser;
ohair@286 27
ohair@286 28 import org.xml.sax.InputSource;
ohair@286 29 import org.xml.sax.SAXException;
ohair@286 30 import org.xml.sax.SAXParseException;
ohair@286 31
ohair@286 32 import java.io.CharConversionException;
ohair@286 33 import java.io.IOException;
ohair@286 34 import java.io.InputStream;
ohair@286 35 import java.io.InputStreamReader;
ohair@286 36 import java.io.Reader;
ohair@286 37 import java.io.UnsupportedEncodingException;
ohair@286 38 import java.net.URL;
ohair@286 39 import java.util.Locale;
ohair@286 40
ohair@286 41 /**
ohair@286 42 * This is how the parser talks to its input entities, of all kinds.
ohair@286 43 * The entities are in a stack.
ohair@286 44 * <p/>
ohair@286 45 * <P> For internal entities, the character arrays are referenced here,
ohair@286 46 * and read from as needed (they're read-only). External entities have
ohair@286 47 * mutable buffers, that are read into as needed.
ohair@286 48 * <p/>
ohair@286 49 * <P> <em>Note:</em> This maps CRLF (and CR) to LF without regard for
ohair@286 50 * whether it's in an external (parsed) entity or not. The XML 1.0 spec
ohair@286 51 * is inconsistent in explaining EOL handling; this is the sensible way.
ohair@286 52 *
ohair@286 53 * @author David Brownell
ohair@286 54 * @author Janet Koenig
ohair@286 55 * @version 1.4 00/08/05
ohair@286 56 */
ohair@286 57 public class InputEntity {
ohair@286 58 private int start, finish;
ohair@286 59 private char buf [];
ohair@286 60 private int lineNumber = 1;
ohair@286 61 private boolean returnedFirstHalf = false;
ohair@286 62 private boolean maybeInCRLF = false;
ohair@286 63
ohair@286 64 // name of entity (never main document or unnamed DTD PE)
ohair@286 65 private String name;
ohair@286 66
ohair@286 67 private InputEntity next;
ohair@286 68
ohair@286 69 // for system and public IDs in diagnostics
ohair@286 70 private InputSource input;
ohair@286 71
ohair@286 72 // this is a buffer; some buffers can be replenished.
ohair@286 73 private Reader reader;
ohair@286 74 private boolean isClosed;
ohair@286 75
ohair@286 76 private DTDEventListener errHandler;
ohair@286 77 private Locale locale;
ohair@286 78
ohair@286 79 private StringBuffer rememberedText;
ohair@286 80 private int startRemember;
ohair@286 81
ohair@286 82 // record if this is a PE, so endParsedEntity won't be called
ohair@286 83 private boolean isPE;
ohair@286 84
ohair@286 85 // InputStreamReader throws an internal per-read exception, so
ohair@286 86 // we minimize reads. We also add a byte to compensate for the
ohair@286 87 // "ungetc" byte we keep, so that our downstream reads are as
ohair@286 88 // nicely sized as we can make them.
ohair@286 89 final private static int BUFSIZ = 8 * 1024 + 1;
ohair@286 90
ohair@286 91 final private static char newline [] = {'\n'};
ohair@286 92
ohair@286 93 public static InputEntity getInputEntity(DTDEventListener h, Locale l) {
ohair@286 94 InputEntity retval = new InputEntity();
ohair@286 95 retval.errHandler = h;
ohair@286 96 retval.locale = l;
ohair@286 97 return retval;
ohair@286 98 }
ohair@286 99
ohair@286 100 private InputEntity() {
ohair@286 101 }
ohair@286 102
ohair@286 103 //
ohair@286 104 // predicate: return true iff this is an internal entity reader,
ohair@286 105 // and so may safely be "popped" as needed. external entities have
ohair@286 106 // syntax to uphold; internal parameter entities have at most validity
ohair@286 107 // constraints to monitor. also, only external entities get decent
ohair@286 108 // location diagnostics.
ohair@286 109 //
ohair@286 110 public boolean isInternal() {
ohair@286 111 return reader == null;
ohair@286 112 }
ohair@286 113
ohair@286 114 //
ohair@286 115 // predicate: return true iff this is the toplevel document
ohair@286 116 //
ohair@286 117 public boolean isDocument() {
ohair@286 118 return next == null;
ohair@286 119 }
ohair@286 120
ohair@286 121 //
ohair@286 122 // predicate: return true iff this is a PE expansion (so that
ohair@286 123 // LexicalEventListner.endParsedEntity won't be called)
ohair@286 124 //
ohair@286 125 public boolean isParameterEntity() {
ohair@286 126 return isPE;
ohair@286 127 }
ohair@286 128
ohair@286 129 //
ohair@286 130 // return name of current entity
ohair@286 131 //
ohair@286 132 public String getName() {
ohair@286 133 return name;
ohair@286 134 }
ohair@286 135
ohair@286 136 //
ohair@286 137 // use this for an external parsed entity
ohair@286 138 //
ohair@286 139 public void init(InputSource in, String name, InputEntity stack,
ohair@286 140 boolean isPE)
ohair@286 141 throws IOException, SAXException {
ohair@286 142
ohair@286 143 input = in;
ohair@286 144 this.isPE = isPE;
ohair@286 145 reader = in.getCharacterStream();
ohair@286 146
ohair@286 147 if (reader == null) {
ohair@286 148 InputStream bytes = in.getByteStream();
ohair@286 149
ohair@286 150 if (bytes == null)
ohair@286 151 reader = XmlReader.createReader(new URL(in.getSystemId())
ohair@286 152 .openStream());
ohair@286 153 else if (in.getEncoding() != null)
ohair@286 154 reader = XmlReader.createReader(in.getByteStream(),
ohair@286 155 in.getEncoding());
ohair@286 156 else
ohair@286 157 reader = XmlReader.createReader(in.getByteStream());
ohair@286 158 }
ohair@286 159 next = stack;
ohair@286 160 buf = new char[BUFSIZ];
ohair@286 161 this.name = name;
ohair@286 162 checkRecursion(stack);
ohair@286 163 }
ohair@286 164
ohair@286 165 //
ohair@286 166 // use this for an internal parsed entity; buffer is readonly
ohair@286 167 //
ohair@286 168 public void init(char b [], String name, InputEntity stack, boolean isPE)
ohair@286 169 throws SAXException {
ohair@286 170
ohair@286 171 next = stack;
ohair@286 172 buf = b;
ohair@286 173 finish = b.length;
ohair@286 174 this.name = name;
ohair@286 175 this.isPE = isPE;
ohair@286 176 checkRecursion(stack);
ohair@286 177 }
ohair@286 178
ohair@286 179 private void checkRecursion(InputEntity stack)
ohair@286 180 throws SAXException {
ohair@286 181
ohair@286 182 if (stack == null)
ohair@286 183 return;
ohair@286 184 for (stack = stack.next; stack != null; stack = stack.next) {
ohair@286 185 if (stack.name != null && stack.name.equals(name))
ohair@286 186 fatal("P-069", new Object[]{name});
ohair@286 187 }
ohair@286 188 }
ohair@286 189
ohair@286 190 public InputEntity pop() throws IOException {
ohair@286 191
ohair@286 192 // caller has ensured there's nothing left to read
ohair@286 193 close();
ohair@286 194 return next;
ohair@286 195 }
ohair@286 196
ohair@286 197 /**
ohair@286 198 * returns true iff there's no more data to consume ...
ohair@286 199 */
ohair@286 200 public boolean isEOF() throws IOException, SAXException {
ohair@286 201
ohair@286 202 // called to ensure WF-ness of included entities and to pop
ohair@286 203 // input entities appropriately ... EOF is not always legal.
ohair@286 204 if (start >= finish) {
ohair@286 205 fillbuf();
ohair@286 206 return start >= finish;
ohair@286 207 } else
ohair@286 208 return false;
ohair@286 209 }
ohair@286 210
ohair@286 211 /**
ohair@286 212 * Returns the name of the encoding in use, else null; the name
ohair@286 213 * returned is in as standard a form as we can get.
ohair@286 214 */
ohair@286 215 public String getEncoding() {
ohair@286 216
ohair@286 217 if (reader == null)
ohair@286 218 return null;
ohair@286 219 if (reader instanceof XmlReader)
ohair@286 220 return ((XmlReader) reader).getEncoding();
ohair@286 221
ohair@286 222 // XXX prefer a java2std() call to normalize names...
ohair@286 223
ohair@286 224 if (reader instanceof InputStreamReader)
ohair@286 225 return ((InputStreamReader) reader).getEncoding();
ohair@286 226 return null;
ohair@286 227 }
ohair@286 228
ohair@286 229
ohair@286 230 /**
ohair@286 231 * returns the next name char, or NUL ... faster than getc(),
ohair@286 232 * and the common "name or nmtoken must be next" case won't
ohair@286 233 * need ungetc().
ohair@286 234 */
ohair@286 235 public char getNameChar() throws IOException, SAXException {
ohair@286 236
ohair@286 237 if (finish <= start)
ohair@286 238 fillbuf();
ohair@286 239 if (finish > start) {
ohair@286 240 char c = buf[start++];
ohair@286 241 if (XmlChars.isNameChar(c))
ohair@286 242 return c;
ohair@286 243 start--;
ohair@286 244 }
ohair@286 245 return 0;
ohair@286 246 }
ohair@286 247
ohair@286 248 /**
ohair@286 249 * gets the next Java character -- might be part of an XML
ohair@286 250 * text character represented by a surrogate pair, or be
ohair@286 251 * the end of the entity.
ohair@286 252 */
ohair@286 253 public char getc() throws IOException, SAXException {
ohair@286 254
ohair@286 255 if (finish <= start)
ohair@286 256 fillbuf();
ohair@286 257 if (finish > start) {
ohair@286 258 char c = buf[start++];
ohair@286 259
ohair@286 260 // [2] Char ::= #x0009 | #x000A | #x000D
ohair@286 261 // | [#x0020-#xD7FF]
ohair@286 262 // | [#xE000-#xFFFD]
ohair@286 263 // plus surrogate _pairs_ representing [#x10000-#x10ffff]
ohair@286 264 if (returnedFirstHalf) {
ohair@286 265 if (c >= 0xdc00 && c <= 0xdfff) {
ohair@286 266 returnedFirstHalf = false;
ohair@286 267 return c;
ohair@286 268 } else
ohair@286 269 fatal("P-070", new Object[]{Integer.toHexString(c)});
ohair@286 270 }
ohair@286 271 if ((c >= 0x0020 && c <= 0xD7FF)
ohair@286 272 || c == 0x0009
ohair@286 273 // no surrogates!
ohair@286 274 || (c >= 0xE000 && c <= 0xFFFD))
ohair@286 275 return c;
ohair@286 276
ohair@286 277 //
ohair@286 278 // CRLF and CR are both line ends; map both to LF, and
ohair@286 279 // keep line count correct.
ohair@286 280 //
ohair@286 281 else if (c == '\r' && !isInternal()) {
ohair@286 282 maybeInCRLF = true;
ohair@286 283 c = getc();
ohair@286 284 if (c != '\n')
ohair@286 285 ungetc();
ohair@286 286 maybeInCRLF = false;
ohair@286 287
ohair@286 288 lineNumber++;
ohair@286 289 return '\n';
ohair@286 290
ohair@286 291 } else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF
ohair@286 292 if (!isInternal() && !maybeInCRLF)
ohair@286 293 lineNumber++;
ohair@286 294 return c;
ohair@286 295 }
ohair@286 296
ohair@286 297 // surrogates...
ohair@286 298 if (c >= 0xd800 && c < 0xdc00) {
ohair@286 299 returnedFirstHalf = true;
ohair@286 300 return c;
ohair@286 301 }
ohair@286 302
ohair@286 303 fatal("P-071", new Object[]{Integer.toHexString(c)});
ohair@286 304 }
ohair@286 305 throw new EndOfInputException();
ohair@286 306 }
ohair@286 307
ohair@286 308
ohair@286 309 /**
ohair@286 310 * lookahead one character
ohair@286 311 */
ohair@286 312 public boolean peekc(char c) throws IOException, SAXException {
ohair@286 313
ohair@286 314 if (finish <= start)
ohair@286 315 fillbuf();
ohair@286 316 if (finish > start) {
ohair@286 317 if (buf[start] == c) {
ohair@286 318 start++;
ohair@286 319 return true;
ohair@286 320 } else
ohair@286 321 return false;
ohair@286 322 }
ohair@286 323 return false;
ohair@286 324 }
ohair@286 325
ohair@286 326
ohair@286 327 /**
ohair@286 328 * two character pushback is guaranteed
ohair@286 329 */
ohair@286 330 public void ungetc() {
ohair@286 331
ohair@286 332 if (start == 0)
ohair@286 333 throw new InternalError("ungetc");
ohair@286 334 start--;
ohair@286 335
ohair@286 336 if (buf[start] == '\n' || buf[start] == '\r') {
ohair@286 337 if (!isInternal())
ohair@286 338 lineNumber--;
ohair@286 339 } else if (returnedFirstHalf)
ohair@286 340 returnedFirstHalf = false;
ohair@286 341 }
ohair@286 342
ohair@286 343
ohair@286 344 /**
ohair@286 345 * optional grammatical whitespace (discarded)
ohair@286 346 */
ohair@286 347 public boolean maybeWhitespace()
ohair@286 348 throws IOException, SAXException {
ohair@286 349
ohair@286 350 char c;
ohair@286 351 boolean isSpace = false;
ohair@286 352 boolean sawCR = false;
ohair@286 353
ohair@286 354 // [3] S ::= #20 | #09 | #0D | #0A
ohair@286 355 for (; ;) {
ohair@286 356 if (finish <= start)
ohair@286 357 fillbuf();
ohair@286 358 if (finish <= start)
ohair@286 359 return isSpace;
ohair@286 360
ohair@286 361 c = buf[start++];
ohair@286 362 if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') {
ohair@286 363 isSpace = true;
ohair@286 364
ohair@286 365 //
ohair@286 366 // CR, LF are line endings ... CLRF is one, not two!
ohair@286 367 //
ohair@286 368 if ((c == '\n' || c == '\r') && !isInternal()) {
ohair@286 369 if (!(c == '\n' && sawCR)) {
ohair@286 370 lineNumber++;
ohair@286 371 sawCR = false;
ohair@286 372 }
ohair@286 373 if (c == '\r')
ohair@286 374 sawCR = true;
ohair@286 375 }
ohair@286 376 } else {
ohair@286 377 start--;
ohair@286 378 return isSpace;
ohair@286 379 }
ohair@286 380 }
ohair@286 381 }
ohair@286 382
ohair@286 383
ohair@286 384 /**
ohair@286 385 * normal content; whitespace in markup may be handled
ohair@286 386 * specially if the parser uses the content model.
ohair@286 387 * <p/>
ohair@286 388 * <P> content terminates with markup delimiter characters,
ohair@286 389 * namely ampersand (&amp;amp;) and left angle bracket (&amp;lt;).
ohair@286 390 * <p/>
ohair@286 391 * <P> the document handler's characters() method is called
ohair@286 392 * on all the content found
ohair@286 393 */
ohair@286 394 public boolean parsedContent(DTDEventListener docHandler
ohair@286 395 /*ElementValidator validator*/)
ohair@286 396 throws IOException, SAXException {
ohair@286 397
ohair@286 398 // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
ohair@286 399
ohair@286 400 int first; // first char to return
ohair@286 401 int last; // last char to return
ohair@286 402 boolean sawContent; // sent any chars?
ohair@286 403 char c;
ohair@286 404
ohair@286 405 // deliver right out of the buffer, until delimiter, EOF,
ohair@286 406 // or error, refilling as we go
ohair@286 407 for (first = last = start, sawContent = false; ; last++) {
ohair@286 408
ohair@286 409 // buffer empty?
ohair@286 410 if (last >= finish) {
ohair@286 411 if (last > first) {
ohair@286 412 // validator.text ();
ohair@286 413 docHandler.characters(buf, first, last - first);
ohair@286 414 sawContent = true;
ohair@286 415 start = last;
ohair@286 416 }
ohair@286 417 if (isEOF()) // calls fillbuf
ohair@286 418 return sawContent;
ohair@286 419 first = start;
ohair@286 420 last = first - 1; // incremented in loop
ohair@286 421 continue;
ohair@286 422 }
ohair@286 423
ohair@286 424 c = buf[last];
ohair@286 425
ohair@286 426 //
ohair@286 427 // pass most chars through ASAP; this inlines the code of
ohair@286 428 // [2] !XmlChars.isChar(c) leaving only characters needing
ohair@286 429 // special treatment ... line ends, surrogates, and:
ohair@286 430 // 0x0026 == '&'
ohair@286 431 // 0x003C == '<'
ohair@286 432 // 0x005D == ']'
ohair@286 433 // Comparisons ordered for speed on 'typical' text
ohair@286 434 //
ohair@286 435 if ((c > 0x005D && c <= 0xD7FF) // a-z and more
ohair@286 436 || (c < 0x0026 && c >= 0x0020) // space & punct
ohair@286 437 || (c > 0x003C && c < 0x005D) // A-Z & punct
ohair@286 438 || (c > 0x0026 && c < 0x003C) // 0-9 & punct
ohair@286 439 || c == 0x0009
ohair@286 440 || (c >= 0xE000 && c <= 0xFFFD)
ohair@286 441 )
ohair@286 442 continue;
ohair@286 443
ohair@286 444 // terminate on markup delimiters
ohair@286 445 if (c == '<' || c == '&')
ohair@286 446 break;
ohair@286 447
ohair@286 448 // count lines
ohair@286 449 if (c == '\n') {
ohair@286 450 if (!isInternal())
ohair@286 451 lineNumber++;
ohair@286 452 continue;
ohair@286 453 }
ohair@286 454
ohair@286 455 // External entities get CR, CRLF --> LF mapping
ohair@286 456 // Internal ones got it already, and we can't repeat
ohair@286 457 // else we break char ref handling!!
ohair@286 458 if (c == '\r') {
ohair@286 459 if (isInternal())
ohair@286 460 continue;
ohair@286 461
ohair@286 462 docHandler.characters(buf, first, last - first);
ohair@286 463 docHandler.characters(newline, 0, 1);
ohair@286 464 sawContent = true;
ohair@286 465 lineNumber++;
ohair@286 466 if (finish > (last + 1)) {
ohair@286 467 if (buf[last + 1] == '\n')
ohair@286 468 last++;
ohair@286 469 } else { // CR at end of buffer
ohair@286 470 // XXX case not yet handled: CRLF here will look like two lines
ohair@286 471 }
ohair@286 472 first = start = last + 1;
ohair@286 473 continue;
ohair@286 474 }
ohair@286 475
ohair@286 476 // ']]>' is a WF error -- must fail if we see it
ohair@286 477 if (c == ']') {
ohair@286 478 switch (finish - last) {
ohair@286 479 // for suspicious end-of-buffer cases, get more data
ohair@286 480 // into the buffer to rule out this sequence.
ohair@286 481 case 2:
ohair@286 482 if (buf[last + 1] != ']')
ohair@286 483 continue;
ohair@286 484 // FALLTHROUGH
ohair@286 485
ohair@286 486 case 1:
ohair@286 487 if (reader == null || isClosed)
ohair@286 488 continue;
ohair@286 489 if (last == first)
ohair@286 490 throw new InternalError("fillbuf");
ohair@286 491 last--;
ohair@286 492 if (last > first) {
ohair@286 493 // validator.text ();
ohair@286 494 docHandler.characters(buf, first, last - first);
ohair@286 495 sawContent = true;
ohair@286 496 start = last;
ohair@286 497 }
ohair@286 498 fillbuf();
ohair@286 499 first = last = start;
ohair@286 500 continue;
ohair@286 501
ohair@286 502 // otherwise any "]]>" would be buffered, and we can
ohair@286 503 // see right away if that's what we have
ohair@286 504 default:
ohair@286 505 if (buf[last + 1] == ']' && buf[last + 2] == '>')
ohair@286 506 fatal("P-072", null);
ohair@286 507 continue;
ohair@286 508 }
ohair@286 509 }
ohair@286 510
ohair@286 511 // correctly paired surrogates are OK
ohair@286 512 if (c >= 0xd800 && c <= 0xdfff) {
ohair@286 513 if ((last + 1) >= finish) {
ohair@286 514 if (last > first) {
ohair@286 515 // validator.text ();
ohair@286 516 docHandler.characters(buf, first, last - first);
ohair@286 517 sawContent = true;
ohair@286 518 start = last + 1;
ohair@286 519 }
ohair@286 520 if (isEOF()) { // calls fillbuf
ohair@286 521 fatal("P-081",
ohair@286 522 new Object[]{Integer.toHexString(c)});
ohair@286 523 }
ohair@286 524 first = start;
ohair@286 525 last = first;
ohair@286 526 continue;
ohair@286 527 }
ohair@286 528 if (checkSurrogatePair(last))
ohair@286 529 last++;
ohair@286 530 else {
ohair@286 531 last--;
ohair@286 532 // also terminate on surrogate pair oddities
ohair@286 533 break;
ohair@286 534 }
ohair@286 535 continue;
ohair@286 536 }
ohair@286 537
ohair@286 538 fatal("P-071", new Object[]{Integer.toHexString(c)});
ohair@286 539 }
ohair@286 540 if (last == first)
ohair@286 541 return sawContent;
ohair@286 542 // validator.text ();
ohair@286 543 docHandler.characters(buf, first, last - first);
ohair@286 544 start = last;
ohair@286 545 return true;
ohair@286 546 }
ohair@286 547
ohair@286 548
ohair@286 549 /**
ohair@286 550 * CDATA -- character data, terminated by "]]>" and optionally
ohair@286 551 * including unescaped markup delimiters (ampersand and left angle
ohair@286 552 * bracket). This should otherwise be exactly like character data,
ohair@286 553 * modulo differences in error report details.
ohair@286 554 * <p/>
ohair@286 555 * <P> The document handler's characters() or ignorableWhitespace()
ohair@286 556 * methods are invoked on all the character data found
ohair@286 557 *
ohair@286 558 * @param docHandler gets callbacks for character data
ohair@286 559 * @param ignorableWhitespace if true, whitespace characters will
ohair@286 560 * be reported using docHandler.ignorableWhitespace(); implicitly,
ohair@286 561 * non-whitespace characters will cause validation errors
ohair@286 562 * @param whitespaceInvalidMessage if true, ignorable whitespace
ohair@286 563 * causes a validity error report as well as a callback
ohair@286 564 */
ohair@286 565 public boolean unparsedContent(DTDEventListener docHandler,
ohair@286 566 /*ElementValidator validator,*/
ohair@286 567 boolean ignorableWhitespace,
ohair@286 568 String whitespaceInvalidMessage)
ohair@286 569 throws IOException, SAXException {
ohair@286 570
ohair@286 571 // [18] CDSect ::= CDStart CData CDEnd
ohair@286 572 // [19] CDStart ::= '<![CDATA['
ohair@286 573 // [20] CData ::= (Char* - (Char* ']]>' Char*))
ohair@286 574 // [21] CDEnd ::= ']]>'
ohair@286 575
ohair@286 576 // caller peeked the leading '<' ...
ohair@286 577 if (!peek("![CDATA[", null))
ohair@286 578 return false;
ohair@286 579 docHandler.startCDATA();
ohair@286 580
ohair@286 581 // only a literal ']]>' stops this ...
ohair@286 582 int last;
ohair@286 583
ohair@286 584 for (; ;) { // until ']]>' seen
ohair@286 585 boolean done = false;
ohair@286 586 char c;
ohair@286 587
ohair@286 588 // don't report ignorable whitespace as "text" for
ohair@286 589 // validation purposes.
ohair@286 590 boolean white = ignorableWhitespace;
ohair@286 591
ohair@286 592 for (last = start; last < finish; last++) {
ohair@286 593 c = buf[last];
ohair@286 594
ohair@286 595 //
ohair@286 596 // Reject illegal characters.
ohair@286 597 //
ohair@286 598 if (!XmlChars.isChar(c)) {
ohair@286 599 white = false;
ohair@286 600 if (c >= 0xd800 && c <= 0xdfff) {
ohair@286 601 if (checkSurrogatePair(last)) {
ohair@286 602 last++;
ohair@286 603 continue;
ohair@286 604 } else {
ohair@286 605 last--;
ohair@286 606 break;
ohair@286 607 }
ohair@286 608 }
ohair@286 609 fatal("P-071", new Object[]
ohair@286 610 {Integer.toHexString(buf[last])});
ohair@286 611 }
ohair@286 612 if (c == '\n') {
ohair@286 613 if (!isInternal())
ohair@286 614 lineNumber++;
ohair@286 615 continue;
ohair@286 616 }
ohair@286 617 if (c == '\r') {
ohair@286 618 // As above, we can't repeat CR/CRLF --> LF mapping
ohair@286 619 if (isInternal())
ohair@286 620 continue;
ohair@286 621
ohair@286 622 if (white) {
ohair@286 623 if (whitespaceInvalidMessage != null)
ohair@286 624 errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
ohair@286 625 whitespaceInvalidMessage), null));
ohair@286 626 docHandler.ignorableWhitespace(buf, start,
ohair@286 627 last - start);
ohair@286 628 docHandler.ignorableWhitespace(newline, 0, 1);
ohair@286 629 } else {
ohair@286 630 // validator.text ();
ohair@286 631 docHandler.characters(buf, start, last - start);
ohair@286 632 docHandler.characters(newline, 0, 1);
ohair@286 633 }
ohair@286 634 lineNumber++;
ohair@286 635 if (finish > (last + 1)) {
ohair@286 636 if (buf[last + 1] == '\n')
ohair@286 637 last++;
ohair@286 638 } else { // CR at end of buffer
ohair@286 639 // XXX case not yet handled ... as above
ohair@286 640 }
ohair@286 641 start = last + 1;
ohair@286 642 continue;
ohair@286 643 }
ohair@286 644 if (c != ']') {
ohair@286 645 if (c != ' ' && c != '\t')
ohair@286 646 white = false;
ohair@286 647 continue;
ohair@286 648 }
ohair@286 649 if ((last + 2) < finish) {
ohair@286 650 if (buf[last + 1] == ']' && buf[last + 2] == '>') {
ohair@286 651 done = true;
ohair@286 652 break;
ohair@286 653 }
ohair@286 654 white = false;
ohair@286 655 continue;
ohair@286 656 } else {
ohair@286 657 //last--;
ohair@286 658 break;
ohair@286 659 }
ohair@286 660 }
ohair@286 661 if (white) {
ohair@286 662 if (whitespaceInvalidMessage != null)
ohair@286 663 errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
ohair@286 664 whitespaceInvalidMessage), null));
ohair@286 665 docHandler.ignorableWhitespace(buf, start, last - start);
ohair@286 666 } else {
ohair@286 667 // validator.text ();
ohair@286 668 docHandler.characters(buf, start, last - start);
ohair@286 669 }
ohair@286 670 if (done) {
ohair@286 671 start = last + 3;
ohair@286 672 break;
ohair@286 673 }
ohair@286 674 start = last;
ohair@286 675 if (isEOF())
ohair@286 676 fatal("P-073", null);
ohair@286 677 }
ohair@286 678 docHandler.endCDATA();
ohair@286 679 return true;
ohair@286 680 }
ohair@286 681
ohair@286 682 // return false to backstep at end of buffer)
ohair@286 683 private boolean checkSurrogatePair(int offset)
ohair@286 684 throws SAXException {
ohair@286 685
ohair@286 686 if ((offset + 1) >= finish)
ohair@286 687 return false;
ohair@286 688
ohair@286 689 char c1 = buf[offset++];
ohair@286 690 char c2 = buf[offset];
ohair@286 691
ohair@286 692 if ((c1 >= 0xd800 && c1 < 0xdc00) && (c2 >= 0xdc00 && c2 <= 0xdfff))
ohair@286 693 return true;
ohair@286 694 fatal("P-074", new Object[]{
ohair@286 695 Integer.toHexString(c1 & 0x0ffff),
ohair@286 696 Integer.toHexString(c2 & 0x0ffff)
ohair@286 697 });
ohair@286 698 return false;
ohair@286 699 }
ohair@286 700
ohair@286 701
ohair@286 702 /**
ohair@286 703 * whitespace in markup (flagged to app, discardable)
ohair@286 704 * <p/>
ohair@286 705 * <P> the document handler's ignorableWhitespace() method
ohair@286 706 * is called on all the whitespace found
ohair@286 707 */
ohair@286 708 public boolean ignorableWhitespace(DTDEventListener handler)
ohair@286 709 throws IOException, SAXException {
ohair@286 710
ohair@286 711 char c;
ohair@286 712 boolean isSpace = false;
ohair@286 713 int first;
ohair@286 714
ohair@286 715 // [3] S ::= #20 | #09 | #0D | #0A
ohair@286 716 for (first = start; ;) {
ohair@286 717 if (finish <= start) {
ohair@286 718 if (isSpace)
ohair@286 719 handler.ignorableWhitespace(buf, first, start - first);
ohair@286 720 fillbuf();
ohair@286 721 first = start;
ohair@286 722 }
ohair@286 723 if (finish <= start)
ohair@286 724 return isSpace;
ohair@286 725
ohair@286 726 c = buf[start++];
ohair@286 727 switch (c) {
ohair@286 728 case '\n':
ohair@286 729 if (!isInternal())
ohair@286 730 lineNumber++;
ohair@286 731 // XXX handles Macintosh line endings wrong
ohair@286 732 // fallthrough
ohair@286 733 case 0x09:
ohair@286 734 case 0x20:
ohair@286 735 isSpace = true;
ohair@286 736 continue;
ohair@286 737
ohair@286 738 case '\r':
ohair@286 739 isSpace = true;
ohair@286 740 if (!isInternal())
ohair@286 741 lineNumber++;
ohair@286 742 handler.ignorableWhitespace(buf, first,
ohair@286 743 (start - 1) - first);
ohair@286 744 handler.ignorableWhitespace(newline, 0, 1);
ohair@286 745 if (start < finish && buf[start] == '\n')
ohair@286 746 ++start;
ohair@286 747 first = start;
ohair@286 748 continue;
ohair@286 749
ohair@286 750 default:
ohair@286 751 ungetc();
ohair@286 752 if (isSpace)
ohair@286 753 handler.ignorableWhitespace(buf, first, start - first);
ohair@286 754 return isSpace;
ohair@286 755 }
ohair@286 756 }
ohair@286 757 }
ohair@286 758
ohair@286 759 /**
ohair@286 760 * returns false iff 'next' string isn't as provided,
ohair@286 761 * else skips that text and returns true.
ohair@286 762 * <p/>
ohair@286 763 * <P> NOTE: two alternative string representations are
ohair@286 764 * both passed in, since one is faster.
ohair@286 765 */
ohair@286 766 public boolean peek(String next, char chars [])
ohair@286 767 throws IOException, SAXException {
ohair@286 768
ohair@286 769 int len;
ohair@286 770 int i;
ohair@286 771
ohair@286 772 if (chars != null)
ohair@286 773 len = chars.length;
ohair@286 774 else
ohair@286 775 len = next.length();
ohair@286 776
ohair@286 777 // buffer should hold the whole thing ... give it a
ohair@286 778 // chance for the end-of-buffer case and cope with EOF
ohair@286 779 // by letting fillbuf compact and fill
ohair@286 780 if (finish <= start || (finish - start) < len)
ohair@286 781 fillbuf();
ohair@286 782
ohair@286 783 // can't peek past EOF
ohair@286 784 if (finish <= start)
ohair@286 785 return false;
ohair@286 786
ohair@286 787 // compare the string; consume iff it matches
ohair@286 788 if (chars != null) {
ohair@286 789 for (i = 0; i < len && (start + i) < finish; i++) {
ohair@286 790 if (buf[start + i] != chars[i])
ohair@286 791 return false;
ohair@286 792 }
ohair@286 793 } else {
ohair@286 794 for (i = 0; i < len && (start + i) < finish; i++) {
ohair@286 795 if (buf[start + i] != next.charAt(i))
ohair@286 796 return false;
ohair@286 797 }
ohair@286 798 }
ohair@286 799
ohair@286 800 // if the first fillbuf didn't get enough data, give
ohair@286 801 // fillbuf another chance to read
ohair@286 802 if (i < len) {
ohair@286 803 if (reader == null || isClosed)
ohair@286 804 return false;
ohair@286 805
ohair@286 806 //
ohair@286 807 // This diagnostic "knows" that the only way big strings would
ohair@286 808 // fail to be peeked is where it's a symbol ... e.g. for an
ohair@286 809 // </EndTag> construct. That knowledge could also be applied
ohair@286 810 // to get rid of the symbol length constraint, since having
ohair@286 811 // the wrong symbol is a fatal error anyway ...
ohair@286 812 //
ohair@286 813 if (len > buf.length)
ohair@286 814 fatal("P-077", new Object[]{new Integer(buf.length)});
ohair@286 815
ohair@286 816 fillbuf();
ohair@286 817 return peek(next, chars);
ohair@286 818 }
ohair@286 819
ohair@286 820 start += len;
ohair@286 821 return true;
ohair@286 822 }
ohair@286 823
ohair@286 824
ohair@286 825 //
ohair@286 826 // Support for reporting the internal DTD subset, so <!DOCTYPE...>
ohair@286 827 // declarations can be recreated. This is collected as a single
ohair@286 828 // string; such subsets are normally small, and many applications
ohair@286 829 // don't even care about this.
ohair@286 830 //
ohair@286 831 public void startRemembering() {
ohair@286 832
ohair@286 833 if (startRemember != 0)
ohair@286 834 throw new InternalError();
ohair@286 835 startRemember = start;
ohair@286 836 }
ohair@286 837
ohair@286 838 public String rememberText() {
ohair@286 839
ohair@286 840 String retval;
ohair@286 841
ohair@286 842 // If the internal subset crossed a buffer boundary, we
ohair@286 843 // created a temporary buffer.
ohair@286 844 if (rememberedText != null) {
ohair@286 845 rememberedText.append(buf, startRemember,
ohair@286 846 start - startRemember);
ohair@286 847 retval = rememberedText.toString();
ohair@286 848 } else
ohair@286 849 retval = new String(buf, startRemember,
ohair@286 850 start - startRemember);
ohair@286 851
ohair@286 852 startRemember = 0;
ohair@286 853 rememberedText = null;
ohair@286 854 return retval;
ohair@286 855 }
ohair@286 856
ohair@286 857 private InputEntity getTopEntity() {
ohair@286 858
ohair@286 859 InputEntity current = this;
ohair@286 860
ohair@286 861 // don't report locations within internal entities!
ohair@286 862
ohair@286 863 while (current != null && current.input == null)
ohair@286 864 current = current.next;
ohair@286 865 return current == null ? this : current;
ohair@286 866 }
ohair@286 867
ohair@286 868 /**
ohair@286 869 * Returns the public ID of this input source, if known
ohair@286 870 */
ohair@286 871 public String getPublicId() {
ohair@286 872
ohair@286 873 InputEntity where = getTopEntity();
ohair@286 874 if (where == this)
ohair@286 875 return input.getPublicId();
ohair@286 876 return where.getPublicId();
ohair@286 877 }
ohair@286 878
ohair@286 879 /**
ohair@286 880 * Returns the system ID of this input source, if known
ohair@286 881 */
ohair@286 882 public String getSystemId() {
ohair@286 883
ohair@286 884 InputEntity where = getTopEntity();
ohair@286 885 if (where == this)
ohair@286 886 return input.getSystemId();
ohair@286 887 return where.getSystemId();
ohair@286 888 }
ohair@286 889
ohair@286 890 /**
ohair@286 891 * Returns the current line number in this input source
ohair@286 892 */
ohair@286 893 public int getLineNumber() {
ohair@286 894
ohair@286 895 InputEntity where = getTopEntity();
ohair@286 896 if (where == this)
ohair@286 897 return lineNumber;
ohair@286 898 return where.getLineNumber();
ohair@286 899 }
ohair@286 900
ohair@286 901 /**
ohair@286 902 * returns -1; maintaining column numbers hurts performance
ohair@286 903 */
ohair@286 904 public int getColumnNumber() {
ohair@286 905
ohair@286 906 return -1; // not maintained (speed)
ohair@286 907 }
ohair@286 908
ohair@286 909
ohair@286 910 //
ohair@286 911 // n.b. for non-EOF end-of-buffer cases, reader should return
ohair@286 912 // at least a handful of bytes so various lookaheads behave.
ohair@286 913 //
ohair@286 914 // two character pushback exists except at first; characters
ohair@286 915 // represented by surrogate pairs can't be pushed back (they'd
ohair@286 916 // only be in character data anyway).
ohair@286 917 //
ohair@286 918 // DTD exception thrown on char conversion problems; line number
ohair@286 919 // will be low, as a rule.
ohair@286 920 //
ohair@286 921 private void fillbuf() throws IOException, SAXException {
ohair@286 922
ohair@286 923 // don't touched fixed buffers, that'll usually
ohair@286 924 // change entity values (and isn't needed anyway)
ohair@286 925 // likewise, ignore closed streams
ohair@286 926 if (reader == null || isClosed)
ohair@286 927 return;
ohair@286 928
ohair@286 929 // if remembering DTD text, copy!
ohair@286 930 if (startRemember != 0) {
ohair@286 931 if (rememberedText == null)
ohair@286 932 rememberedText = new StringBuffer(buf.length);
ohair@286 933 rememberedText.append(buf, startRemember,
ohair@286 934 start - startRemember);
ohair@286 935 }
ohair@286 936
ohair@286 937 boolean extra = (finish > 0) && (start > 0);
ohair@286 938 int len;
ohair@286 939
ohair@286 940 if (extra) // extra pushback
ohair@286 941 start--;
ohair@286 942 len = finish - start;
ohair@286 943
ohair@286 944 System.arraycopy(buf, start, buf, 0, len);
ohair@286 945 start = 0;
ohair@286 946 finish = len;
ohair@286 947
ohair@286 948 try {
ohair@286 949 len = buf.length - len;
ohair@286 950 len = reader.read(buf, finish, len);
ohair@286 951 } catch (UnsupportedEncodingException e) {
ohair@286 952 fatal("P-075", new Object[]{e.getMessage()});
ohair@286 953 } catch (CharConversionException e) {
ohair@286 954 fatal("P-076", new Object[]{e.getMessage()});
ohair@286 955 }
ohair@286 956 if (len >= 0)
ohair@286 957 finish += len;
ohair@286 958 else
ohair@286 959 close();
ohair@286 960 if (extra) // extra pushback
ohair@286 961 start++;
ohair@286 962
ohair@286 963 if (startRemember != 0)
ohair@286 964 // assert extra == true
ohair@286 965 startRemember = 1;
ohair@286 966 }
ohair@286 967
ohair@286 968 public void close() {
ohair@286 969
ohair@286 970 try {
ohair@286 971 if (reader != null && !isClosed)
ohair@286 972 reader.close();
ohair@286 973 isClosed = true;
ohair@286 974 } catch (IOException e) {
ohair@286 975 /* NOTHING */
ohair@286 976 }
ohair@286 977 }
ohair@286 978
ohair@286 979
ohair@286 980 private void fatal(String messageId, Object params [])
ohair@286 981 throws SAXException {
ohair@286 982
ohair@286 983 SAXParseException x = new SAXParseException(DTDParser.messages.getMessage(locale, messageId, params), null);
ohair@286 984
ohair@286 985 // not continuable ... e.g. WF errors
ohair@286 986 close();
ohair@286 987 errHandler.fatalError(x);
ohair@286 988 throw x;
ohair@286 989 }
ohair@286 990 }

mercurial