src/share/jaxws_classes/com/sun/xml/internal/dtdparser/InputEntity.java

Thu, 12 Oct 2017 19:44:07 +0800

author
aoqi
date
Thu, 12 Oct 2017 19:44:07 +0800
changeset 760
e530533619ec
parent 637
9c07ef4934dd
permissions
-rw-r--r--

merge

aoqi@0 1 /*
aoqi@0 2 * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
aoqi@0 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@0 4 *
aoqi@0 5 * This code is free software; you can redistribute it and/or modify it
aoqi@0 6 * under the terms of the GNU General Public License version 2 only, as
aoqi@0 7 * published by the Free Software Foundation. Oracle designates this
aoqi@0 8 * particular file as subject to the "Classpath" exception as provided
aoqi@0 9 * by Oracle in the LICENSE file that accompanied this code.
aoqi@0 10 *
aoqi@0 11 * This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@0 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@0 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@0 14 * version 2 for more details (a copy is included in the LICENSE file that
aoqi@0 15 * accompanied this code).
aoqi@0 16 *
aoqi@0 17 * You should have received a copy of the GNU General Public License version
aoqi@0 18 * 2 along with this work; if not, write to the Free Software Foundation,
aoqi@0 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@0 20 *
aoqi@0 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@0 22 * or visit www.oracle.com if you need additional information or have any
aoqi@0 23 * questions.
aoqi@0 24 */
aoqi@0 25
aoqi@0 26 package com.sun.xml.internal.dtdparser;
aoqi@0 27
aoqi@0 28 import org.xml.sax.InputSource;
aoqi@0 29 import org.xml.sax.SAXException;
aoqi@0 30 import org.xml.sax.SAXParseException;
aoqi@0 31
aoqi@0 32 import java.io.CharConversionException;
aoqi@0 33 import java.io.IOException;
aoqi@0 34 import java.io.InputStream;
aoqi@0 35 import java.io.InputStreamReader;
aoqi@0 36 import java.io.Reader;
aoqi@0 37 import java.io.UnsupportedEncodingException;
aoqi@0 38 import java.net.URL;
aoqi@0 39 import java.util.Locale;
aoqi@0 40
aoqi@0 41 /**
aoqi@0 42 * This is how the parser talks to its input entities, of all kinds.
aoqi@0 43 * The entities are in a stack.
aoqi@0 44 * <p/>
aoqi@0 45 * <P> For internal entities, the character arrays are referenced here,
aoqi@0 46 * and read from as needed (they're read-only). External entities have
aoqi@0 47 * mutable buffers, that are read into as needed.
aoqi@0 48 * <p/>
aoqi@0 49 * <P> <em>Note:</em> This maps CRLF (and CR) to LF without regard for
aoqi@0 50 * whether it's in an external (parsed) entity or not. The XML 1.0 spec
aoqi@0 51 * is inconsistent in explaining EOL handling; this is the sensible way.
aoqi@0 52 *
aoqi@0 53 * @author David Brownell
aoqi@0 54 * @author Janet Koenig
aoqi@0 55 * @version 1.4 00/08/05
aoqi@0 56 */
aoqi@0 57 public class InputEntity {
aoqi@0 58 private int start, finish;
aoqi@0 59 private char buf [];
aoqi@0 60 private int lineNumber = 1;
aoqi@0 61 private boolean returnedFirstHalf = false;
aoqi@0 62 private boolean maybeInCRLF = false;
aoqi@0 63
aoqi@0 64 // name of entity (never main document or unnamed DTD PE)
aoqi@0 65 private String name;
aoqi@0 66
aoqi@0 67 private InputEntity next;
aoqi@0 68
aoqi@0 69 // for system and public IDs in diagnostics
aoqi@0 70 private InputSource input;
aoqi@0 71
aoqi@0 72 // this is a buffer; some buffers can be replenished.
aoqi@0 73 private Reader reader;
aoqi@0 74 private boolean isClosed;
aoqi@0 75
aoqi@0 76 private DTDEventListener errHandler;
aoqi@0 77 private Locale locale;
aoqi@0 78
aoqi@0 79 private StringBuffer rememberedText;
aoqi@0 80 private int startRemember;
aoqi@0 81
aoqi@0 82 // record if this is a PE, so endParsedEntity won't be called
aoqi@0 83 private boolean isPE;
aoqi@0 84
aoqi@0 85 // InputStreamReader throws an internal per-read exception, so
aoqi@0 86 // we minimize reads. We also add a byte to compensate for the
aoqi@0 87 // "ungetc" byte we keep, so that our downstream reads are as
aoqi@0 88 // nicely sized as we can make them.
aoqi@0 89 final private static int BUFSIZ = 8 * 1024 + 1;
aoqi@0 90
aoqi@0 91 final private static char newline [] = {'\n'};
aoqi@0 92
aoqi@0 93 public static InputEntity getInputEntity(DTDEventListener h, Locale l) {
aoqi@0 94 InputEntity retval = new InputEntity();
aoqi@0 95 retval.errHandler = h;
aoqi@0 96 retval.locale = l;
aoqi@0 97 return retval;
aoqi@0 98 }
aoqi@0 99
aoqi@0 100 private InputEntity() {
aoqi@0 101 }
aoqi@0 102
aoqi@0 103 //
aoqi@0 104 // predicate: return true iff this is an internal entity reader,
aoqi@0 105 // and so may safely be "popped" as needed. external entities have
aoqi@0 106 // syntax to uphold; internal parameter entities have at most validity
aoqi@0 107 // constraints to monitor. also, only external entities get decent
aoqi@0 108 // location diagnostics.
aoqi@0 109 //
aoqi@0 110 public boolean isInternal() {
aoqi@0 111 return reader == null;
aoqi@0 112 }
aoqi@0 113
aoqi@0 114 //
aoqi@0 115 // predicate: return true iff this is the toplevel document
aoqi@0 116 //
aoqi@0 117 public boolean isDocument() {
aoqi@0 118 return next == null;
aoqi@0 119 }
aoqi@0 120
aoqi@0 121 //
aoqi@0 122 // predicate: return true iff this is a PE expansion (so that
aoqi@0 123 // LexicalEventListner.endParsedEntity won't be called)
aoqi@0 124 //
aoqi@0 125 public boolean isParameterEntity() {
aoqi@0 126 return isPE;
aoqi@0 127 }
aoqi@0 128
aoqi@0 129 //
aoqi@0 130 // return name of current entity
aoqi@0 131 //
aoqi@0 132 public String getName() {
aoqi@0 133 return name;
aoqi@0 134 }
aoqi@0 135
aoqi@0 136 //
aoqi@0 137 // use this for an external parsed entity
aoqi@0 138 //
aoqi@0 139 public void init(InputSource in, String name, InputEntity stack,
aoqi@0 140 boolean isPE)
aoqi@0 141 throws IOException, SAXException {
aoqi@0 142
aoqi@0 143 input = in;
aoqi@0 144 this.isPE = isPE;
aoqi@0 145 reader = in.getCharacterStream();
aoqi@0 146
aoqi@0 147 if (reader == null) {
aoqi@0 148 InputStream bytes = in.getByteStream();
aoqi@0 149
aoqi@0 150 if (bytes == null)
aoqi@0 151 reader = XmlReader.createReader(new URL(in.getSystemId())
aoqi@0 152 .openStream());
aoqi@0 153 else if (in.getEncoding() != null)
aoqi@0 154 reader = XmlReader.createReader(in.getByteStream(),
aoqi@0 155 in.getEncoding());
aoqi@0 156 else
aoqi@0 157 reader = XmlReader.createReader(in.getByteStream());
aoqi@0 158 }
aoqi@0 159 next = stack;
aoqi@0 160 buf = new char[BUFSIZ];
aoqi@0 161 this.name = name;
aoqi@0 162 checkRecursion(stack);
aoqi@0 163 }
aoqi@0 164
aoqi@0 165 //
aoqi@0 166 // use this for an internal parsed entity; buffer is readonly
aoqi@0 167 //
aoqi@0 168 public void init(char b [], String name, InputEntity stack, boolean isPE)
aoqi@0 169 throws SAXException {
aoqi@0 170
aoqi@0 171 next = stack;
aoqi@0 172 buf = b;
aoqi@0 173 finish = b.length;
aoqi@0 174 this.name = name;
aoqi@0 175 this.isPE = isPE;
aoqi@0 176 checkRecursion(stack);
aoqi@0 177 }
aoqi@0 178
aoqi@0 179 private void checkRecursion(InputEntity stack)
aoqi@0 180 throws SAXException {
aoqi@0 181
aoqi@0 182 if (stack == null)
aoqi@0 183 return;
aoqi@0 184 for (stack = stack.next; stack != null; stack = stack.next) {
aoqi@0 185 if (stack.name != null && stack.name.equals(name))
aoqi@0 186 fatal("P-069", new Object[]{name});
aoqi@0 187 }
aoqi@0 188 }
aoqi@0 189
aoqi@0 190 public InputEntity pop() throws IOException {
aoqi@0 191
aoqi@0 192 // caller has ensured there's nothing left to read
aoqi@0 193 close();
aoqi@0 194 return next;
aoqi@0 195 }
aoqi@0 196
aoqi@0 197 /**
aoqi@0 198 * returns true iff there's no more data to consume ...
aoqi@0 199 */
aoqi@0 200 public boolean isEOF() throws IOException, SAXException {
aoqi@0 201
aoqi@0 202 // called to ensure WF-ness of included entities and to pop
aoqi@0 203 // input entities appropriately ... EOF is not always legal.
aoqi@0 204 if (start >= finish) {
aoqi@0 205 fillbuf();
aoqi@0 206 return start >= finish;
aoqi@0 207 } else
aoqi@0 208 return false;
aoqi@0 209 }
aoqi@0 210
aoqi@0 211 /**
aoqi@0 212 * Returns the name of the encoding in use, else null; the name
aoqi@0 213 * returned is in as standard a form as we can get.
aoqi@0 214 */
aoqi@0 215 public String getEncoding() {
aoqi@0 216
aoqi@0 217 if (reader == null)
aoqi@0 218 return null;
aoqi@0 219 if (reader instanceof XmlReader)
aoqi@0 220 return ((XmlReader) reader).getEncoding();
aoqi@0 221
aoqi@0 222 // XXX prefer a java2std() call to normalize names...
aoqi@0 223
aoqi@0 224 if (reader instanceof InputStreamReader)
aoqi@0 225 return ((InputStreamReader) reader).getEncoding();
aoqi@0 226 return null;
aoqi@0 227 }
aoqi@0 228
aoqi@0 229
aoqi@0 230 /**
aoqi@0 231 * returns the next name char, or NUL ... faster than getc(),
aoqi@0 232 * and the common "name or nmtoken must be next" case won't
aoqi@0 233 * need ungetc().
aoqi@0 234 */
aoqi@0 235 public char getNameChar() throws IOException, SAXException {
aoqi@0 236
aoqi@0 237 if (finish <= start)
aoqi@0 238 fillbuf();
aoqi@0 239 if (finish > start) {
aoqi@0 240 char c = buf[start++];
aoqi@0 241 if (XmlChars.isNameChar(c))
aoqi@0 242 return c;
aoqi@0 243 start--;
aoqi@0 244 }
aoqi@0 245 return 0;
aoqi@0 246 }
aoqi@0 247
aoqi@0 248 /**
aoqi@0 249 * gets the next Java character -- might be part of an XML
aoqi@0 250 * text character represented by a surrogate pair, or be
aoqi@0 251 * the end of the entity.
aoqi@0 252 */
aoqi@0 253 public char getc() throws IOException, SAXException {
aoqi@0 254
aoqi@0 255 if (finish <= start)
aoqi@0 256 fillbuf();
aoqi@0 257 if (finish > start) {
aoqi@0 258 char c = buf[start++];
aoqi@0 259
aoqi@0 260 // [2] Char ::= #x0009 | #x000A | #x000D
aoqi@0 261 // | [#x0020-#xD7FF]
aoqi@0 262 // | [#xE000-#xFFFD]
aoqi@0 263 // plus surrogate _pairs_ representing [#x10000-#x10ffff]
aoqi@0 264 if (returnedFirstHalf) {
aoqi@0 265 if (c >= 0xdc00 && c <= 0xdfff) {
aoqi@0 266 returnedFirstHalf = false;
aoqi@0 267 return c;
aoqi@0 268 } else
aoqi@0 269 fatal("P-070", new Object[]{Integer.toHexString(c)});
aoqi@0 270 }
aoqi@0 271 if ((c >= 0x0020 && c <= 0xD7FF)
aoqi@0 272 || c == 0x0009
aoqi@0 273 // no surrogates!
aoqi@0 274 || (c >= 0xE000 && c <= 0xFFFD))
aoqi@0 275 return c;
aoqi@0 276
aoqi@0 277 //
aoqi@0 278 // CRLF and CR are both line ends; map both to LF, and
aoqi@0 279 // keep line count correct.
aoqi@0 280 //
aoqi@0 281 else if (c == '\r' && !isInternal()) {
aoqi@0 282 maybeInCRLF = true;
aoqi@0 283 c = getc();
aoqi@0 284 if (c != '\n')
aoqi@0 285 ungetc();
aoqi@0 286 maybeInCRLF = false;
aoqi@0 287
aoqi@0 288 lineNumber++;
aoqi@0 289 return '\n';
aoqi@0 290
aoqi@0 291 } else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF
aoqi@0 292 if (!isInternal() && !maybeInCRLF)
aoqi@0 293 lineNumber++;
aoqi@0 294 return c;
aoqi@0 295 }
aoqi@0 296
aoqi@0 297 // surrogates...
aoqi@0 298 if (c >= 0xd800 && c < 0xdc00) {
aoqi@0 299 returnedFirstHalf = true;
aoqi@0 300 return c;
aoqi@0 301 }
aoqi@0 302
aoqi@0 303 fatal("P-071", new Object[]{Integer.toHexString(c)});
aoqi@0 304 }
aoqi@0 305 throw new EndOfInputException();
aoqi@0 306 }
aoqi@0 307
aoqi@0 308
aoqi@0 309 /**
aoqi@0 310 * lookahead one character
aoqi@0 311 */
aoqi@0 312 public boolean peekc(char c) throws IOException, SAXException {
aoqi@0 313
aoqi@0 314 if (finish <= start)
aoqi@0 315 fillbuf();
aoqi@0 316 if (finish > start) {
aoqi@0 317 if (buf[start] == c) {
aoqi@0 318 start++;
aoqi@0 319 return true;
aoqi@0 320 } else
aoqi@0 321 return false;
aoqi@0 322 }
aoqi@0 323 return false;
aoqi@0 324 }
aoqi@0 325
aoqi@0 326
aoqi@0 327 /**
aoqi@0 328 * two character pushback is guaranteed
aoqi@0 329 */
aoqi@0 330 public void ungetc() {
aoqi@0 331
aoqi@0 332 if (start == 0)
aoqi@0 333 throw new InternalError("ungetc");
aoqi@0 334 start--;
aoqi@0 335
aoqi@0 336 if (buf[start] == '\n' || buf[start] == '\r') {
aoqi@0 337 if (!isInternal())
aoqi@0 338 lineNumber--;
aoqi@0 339 } else if (returnedFirstHalf)
aoqi@0 340 returnedFirstHalf = false;
aoqi@0 341 }
aoqi@0 342
aoqi@0 343
aoqi@0 344 /**
aoqi@0 345 * optional grammatical whitespace (discarded)
aoqi@0 346 */
aoqi@0 347 public boolean maybeWhitespace()
aoqi@0 348 throws IOException, SAXException {
aoqi@0 349
aoqi@0 350 char c;
aoqi@0 351 boolean isSpace = false;
aoqi@0 352 boolean sawCR = false;
aoqi@0 353
aoqi@0 354 // [3] S ::= #20 | #09 | #0D | #0A
aoqi@0 355 for (; ;) {
aoqi@0 356 if (finish <= start)
aoqi@0 357 fillbuf();
aoqi@0 358 if (finish <= start)
aoqi@0 359 return isSpace;
aoqi@0 360
aoqi@0 361 c = buf[start++];
aoqi@0 362 if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') {
aoqi@0 363 isSpace = true;
aoqi@0 364
aoqi@0 365 //
aoqi@0 366 // CR, LF are line endings ... CLRF is one, not two!
aoqi@0 367 //
aoqi@0 368 if ((c == '\n' || c == '\r') && !isInternal()) {
aoqi@0 369 if (!(c == '\n' && sawCR)) {
aoqi@0 370 lineNumber++;
aoqi@0 371 sawCR = false;
aoqi@0 372 }
aoqi@0 373 if (c == '\r')
aoqi@0 374 sawCR = true;
aoqi@0 375 }
aoqi@0 376 } else {
aoqi@0 377 start--;
aoqi@0 378 return isSpace;
aoqi@0 379 }
aoqi@0 380 }
aoqi@0 381 }
aoqi@0 382
aoqi@0 383
aoqi@0 384 /**
aoqi@0 385 * normal content; whitespace in markup may be handled
aoqi@0 386 * specially if the parser uses the content model.
aoqi@0 387 * <p/>
aoqi@0 388 * <P> content terminates with markup delimiter characters,
aoqi@0 389 * namely ampersand (&amp;amp;) and left angle bracket (&amp;lt;).
aoqi@0 390 * <p/>
aoqi@0 391 * <P> the document handler's characters() method is called
aoqi@0 392 * on all the content found
aoqi@0 393 */
aoqi@0 394 public boolean parsedContent(DTDEventListener docHandler
aoqi@0 395 /*ElementValidator validator*/)
aoqi@0 396 throws IOException, SAXException {
aoqi@0 397
aoqi@0 398 // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
aoqi@0 399
aoqi@0 400 int first; // first char to return
aoqi@0 401 int last; // last char to return
aoqi@0 402 boolean sawContent; // sent any chars?
aoqi@0 403 char c;
aoqi@0 404
aoqi@0 405 // deliver right out of the buffer, until delimiter, EOF,
aoqi@0 406 // or error, refilling as we go
aoqi@0 407 for (first = last = start, sawContent = false; ; last++) {
aoqi@0 408
aoqi@0 409 // buffer empty?
aoqi@0 410 if (last >= finish) {
aoqi@0 411 if (last > first) {
aoqi@0 412 // validator.text ();
aoqi@0 413 docHandler.characters(buf, first, last - first);
aoqi@0 414 sawContent = true;
aoqi@0 415 start = last;
aoqi@0 416 }
aoqi@0 417 if (isEOF()) // calls fillbuf
aoqi@0 418 return sawContent;
aoqi@0 419 first = start;
aoqi@0 420 last = first - 1; // incremented in loop
aoqi@0 421 continue;
aoqi@0 422 }
aoqi@0 423
aoqi@0 424 c = buf[last];
aoqi@0 425
aoqi@0 426 //
aoqi@0 427 // pass most chars through ASAP; this inlines the code of
aoqi@0 428 // [2] !XmlChars.isChar(c) leaving only characters needing
aoqi@0 429 // special treatment ... line ends, surrogates, and:
aoqi@0 430 // 0x0026 == '&'
aoqi@0 431 // 0x003C == '<'
aoqi@0 432 // 0x005D == ']'
aoqi@0 433 // Comparisons ordered for speed on 'typical' text
aoqi@0 434 //
aoqi@0 435 if ((c > 0x005D && c <= 0xD7FF) // a-z and more
aoqi@0 436 || (c < 0x0026 && c >= 0x0020) // space & punct
aoqi@0 437 || (c > 0x003C && c < 0x005D) // A-Z & punct
aoqi@0 438 || (c > 0x0026 && c < 0x003C) // 0-9 & punct
aoqi@0 439 || c == 0x0009
aoqi@0 440 || (c >= 0xE000 && c <= 0xFFFD)
aoqi@0 441 )
aoqi@0 442 continue;
aoqi@0 443
aoqi@0 444 // terminate on markup delimiters
aoqi@0 445 if (c == '<' || c == '&')
aoqi@0 446 break;
aoqi@0 447
aoqi@0 448 // count lines
aoqi@0 449 if (c == '\n') {
aoqi@0 450 if (!isInternal())
aoqi@0 451 lineNumber++;
aoqi@0 452 continue;
aoqi@0 453 }
aoqi@0 454
aoqi@0 455 // External entities get CR, CRLF --> LF mapping
aoqi@0 456 // Internal ones got it already, and we can't repeat
aoqi@0 457 // else we break char ref handling!!
aoqi@0 458 if (c == '\r') {
aoqi@0 459 if (isInternal())
aoqi@0 460 continue;
aoqi@0 461
aoqi@0 462 docHandler.characters(buf, first, last - first);
aoqi@0 463 docHandler.characters(newline, 0, 1);
aoqi@0 464 sawContent = true;
aoqi@0 465 lineNumber++;
aoqi@0 466 if (finish > (last + 1)) {
aoqi@0 467 if (buf[last + 1] == '\n')
aoqi@0 468 last++;
aoqi@0 469 } else { // CR at end of buffer
aoqi@0 470 // XXX case not yet handled: CRLF here will look like two lines
aoqi@0 471 }
aoqi@0 472 first = start = last + 1;
aoqi@0 473 continue;
aoqi@0 474 }
aoqi@0 475
aoqi@0 476 // ']]>' is a WF error -- must fail if we see it
aoqi@0 477 if (c == ']') {
aoqi@0 478 switch (finish - last) {
aoqi@0 479 // for suspicious end-of-buffer cases, get more data
aoqi@0 480 // into the buffer to rule out this sequence.
aoqi@0 481 case 2:
aoqi@0 482 if (buf[last + 1] != ']')
aoqi@0 483 continue;
aoqi@0 484 // FALLTHROUGH
aoqi@0 485
aoqi@0 486 case 1:
aoqi@0 487 if (reader == null || isClosed)
aoqi@0 488 continue;
aoqi@0 489 if (last == first)
aoqi@0 490 throw new InternalError("fillbuf");
aoqi@0 491 last--;
aoqi@0 492 if (last > first) {
aoqi@0 493 // validator.text ();
aoqi@0 494 docHandler.characters(buf, first, last - first);
aoqi@0 495 sawContent = true;
aoqi@0 496 start = last;
aoqi@0 497 }
aoqi@0 498 fillbuf();
aoqi@0 499 first = last = start;
aoqi@0 500 continue;
aoqi@0 501
aoqi@0 502 // otherwise any "]]>" would be buffered, and we can
aoqi@0 503 // see right away if that's what we have
aoqi@0 504 default:
aoqi@0 505 if (buf[last + 1] == ']' && buf[last + 2] == '>')
aoqi@0 506 fatal("P-072", null);
aoqi@0 507 continue;
aoqi@0 508 }
aoqi@0 509 }
aoqi@0 510
aoqi@0 511 // correctly paired surrogates are OK
aoqi@0 512 if (c >= 0xd800 && c <= 0xdfff) {
aoqi@0 513 if ((last + 1) >= finish) {
aoqi@0 514 if (last > first) {
aoqi@0 515 // validator.text ();
aoqi@0 516 docHandler.characters(buf, first, last - first);
aoqi@0 517 sawContent = true;
aoqi@0 518 start = last + 1;
aoqi@0 519 }
aoqi@0 520 if (isEOF()) { // calls fillbuf
aoqi@0 521 fatal("P-081",
aoqi@0 522 new Object[]{Integer.toHexString(c)});
aoqi@0 523 }
aoqi@0 524 first = start;
aoqi@0 525 last = first;
aoqi@0 526 continue;
aoqi@0 527 }
aoqi@0 528 if (checkSurrogatePair(last))
aoqi@0 529 last++;
aoqi@0 530 else {
aoqi@0 531 last--;
aoqi@0 532 // also terminate on surrogate pair oddities
aoqi@0 533 break;
aoqi@0 534 }
aoqi@0 535 continue;
aoqi@0 536 }
aoqi@0 537
aoqi@0 538 fatal("P-071", new Object[]{Integer.toHexString(c)});
aoqi@0 539 }
aoqi@0 540 if (last == first)
aoqi@0 541 return sawContent;
aoqi@0 542 // validator.text ();
aoqi@0 543 docHandler.characters(buf, first, last - first);
aoqi@0 544 start = last;
aoqi@0 545 return true;
aoqi@0 546 }
aoqi@0 547
aoqi@0 548
aoqi@0 549 /**
aoqi@0 550 * CDATA -- character data, terminated by "]]>" and optionally
aoqi@0 551 * including unescaped markup delimiters (ampersand and left angle
aoqi@0 552 * bracket). This should otherwise be exactly like character data,
aoqi@0 553 * modulo differences in error report details.
aoqi@0 554 * <p/>
aoqi@0 555 * <P> The document handler's characters() or ignorableWhitespace()
aoqi@0 556 * methods are invoked on all the character data found
aoqi@0 557 *
aoqi@0 558 * @param docHandler gets callbacks for character data
aoqi@0 559 * @param ignorableWhitespace if true, whitespace characters will
aoqi@0 560 * be reported using docHandler.ignorableWhitespace(); implicitly,
aoqi@0 561 * non-whitespace characters will cause validation errors
aoqi@0 562 * @param whitespaceInvalidMessage if true, ignorable whitespace
aoqi@0 563 * causes a validity error report as well as a callback
aoqi@0 564 */
aoqi@0 565 public boolean unparsedContent(DTDEventListener docHandler,
aoqi@0 566 /*ElementValidator validator,*/
aoqi@0 567 boolean ignorableWhitespace,
aoqi@0 568 String whitespaceInvalidMessage)
aoqi@0 569 throws IOException, SAXException {
aoqi@0 570
aoqi@0 571 // [18] CDSect ::= CDStart CData CDEnd
aoqi@0 572 // [19] CDStart ::= '<![CDATA['
aoqi@0 573 // [20] CData ::= (Char* - (Char* ']]>' Char*))
aoqi@0 574 // [21] CDEnd ::= ']]>'
aoqi@0 575
aoqi@0 576 // caller peeked the leading '<' ...
aoqi@0 577 if (!peek("![CDATA[", null))
aoqi@0 578 return false;
aoqi@0 579 docHandler.startCDATA();
aoqi@0 580
aoqi@0 581 // only a literal ']]>' stops this ...
aoqi@0 582 int last;
aoqi@0 583
aoqi@0 584 for (; ;) { // until ']]>' seen
aoqi@0 585 boolean done = false;
aoqi@0 586 char c;
aoqi@0 587
aoqi@0 588 // don't report ignorable whitespace as "text" for
aoqi@0 589 // validation purposes.
aoqi@0 590 boolean white = ignorableWhitespace;
aoqi@0 591
aoqi@0 592 for (last = start; last < finish; last++) {
aoqi@0 593 c = buf[last];
aoqi@0 594
aoqi@0 595 //
aoqi@0 596 // Reject illegal characters.
aoqi@0 597 //
aoqi@0 598 if (!XmlChars.isChar(c)) {
aoqi@0 599 white = false;
aoqi@0 600 if (c >= 0xd800 && c <= 0xdfff) {
aoqi@0 601 if (checkSurrogatePair(last)) {
aoqi@0 602 last++;
aoqi@0 603 continue;
aoqi@0 604 } else {
aoqi@0 605 last--;
aoqi@0 606 break;
aoqi@0 607 }
aoqi@0 608 }
aoqi@0 609 fatal("P-071", new Object[]
aoqi@0 610 {Integer.toHexString(buf[last])});
aoqi@0 611 }
aoqi@0 612 if (c == '\n') {
aoqi@0 613 if (!isInternal())
aoqi@0 614 lineNumber++;
aoqi@0 615 continue;
aoqi@0 616 }
aoqi@0 617 if (c == '\r') {
aoqi@0 618 // As above, we can't repeat CR/CRLF --> LF mapping
aoqi@0 619 if (isInternal())
aoqi@0 620 continue;
aoqi@0 621
aoqi@0 622 if (white) {
aoqi@0 623 if (whitespaceInvalidMessage != null)
aoqi@0 624 errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
aoqi@0 625 whitespaceInvalidMessage), null));
aoqi@0 626 docHandler.ignorableWhitespace(buf, start,
aoqi@0 627 last - start);
aoqi@0 628 docHandler.ignorableWhitespace(newline, 0, 1);
aoqi@0 629 } else {
aoqi@0 630 // validator.text ();
aoqi@0 631 docHandler.characters(buf, start, last - start);
aoqi@0 632 docHandler.characters(newline, 0, 1);
aoqi@0 633 }
aoqi@0 634 lineNumber++;
aoqi@0 635 if (finish > (last + 1)) {
aoqi@0 636 if (buf[last + 1] == '\n')
aoqi@0 637 last++;
aoqi@0 638 } else { // CR at end of buffer
aoqi@0 639 // XXX case not yet handled ... as above
aoqi@0 640 }
aoqi@0 641 start = last + 1;
aoqi@0 642 continue;
aoqi@0 643 }
aoqi@0 644 if (c != ']') {
aoqi@0 645 if (c != ' ' && c != '\t')
aoqi@0 646 white = false;
aoqi@0 647 continue;
aoqi@0 648 }
aoqi@0 649 if ((last + 2) < finish) {
aoqi@0 650 if (buf[last + 1] == ']' && buf[last + 2] == '>') {
aoqi@0 651 done = true;
aoqi@0 652 break;
aoqi@0 653 }
aoqi@0 654 white = false;
aoqi@0 655 continue;
aoqi@0 656 } else {
aoqi@0 657 //last--;
aoqi@0 658 break;
aoqi@0 659 }
aoqi@0 660 }
aoqi@0 661 if (white) {
aoqi@0 662 if (whitespaceInvalidMessage != null)
aoqi@0 663 errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
aoqi@0 664 whitespaceInvalidMessage), null));
aoqi@0 665 docHandler.ignorableWhitespace(buf, start, last - start);
aoqi@0 666 } else {
aoqi@0 667 // validator.text ();
aoqi@0 668 docHandler.characters(buf, start, last - start);
aoqi@0 669 }
aoqi@0 670 if (done) {
aoqi@0 671 start = last + 3;
aoqi@0 672 break;
aoqi@0 673 }
aoqi@0 674 start = last;
aoqi@0 675 if (isEOF())
aoqi@0 676 fatal("P-073", null);
aoqi@0 677 }
aoqi@0 678 docHandler.endCDATA();
aoqi@0 679 return true;
aoqi@0 680 }
aoqi@0 681
aoqi@0 682 // return false to backstep at end of buffer)
aoqi@0 683 private boolean checkSurrogatePair(int offset)
aoqi@0 684 throws SAXException {
aoqi@0 685
aoqi@0 686 if ((offset + 1) >= finish)
aoqi@0 687 return false;
aoqi@0 688
aoqi@0 689 char c1 = buf[offset++];
aoqi@0 690 char c2 = buf[offset];
aoqi@0 691
aoqi@0 692 if ((c1 >= 0xd800 && c1 < 0xdc00) && (c2 >= 0xdc00 && c2 <= 0xdfff))
aoqi@0 693 return true;
aoqi@0 694 fatal("P-074", new Object[]{
aoqi@0 695 Integer.toHexString(c1 & 0x0ffff),
aoqi@0 696 Integer.toHexString(c2 & 0x0ffff)
aoqi@0 697 });
aoqi@0 698 return false;
aoqi@0 699 }
aoqi@0 700
aoqi@0 701
aoqi@0 702 /**
aoqi@0 703 * whitespace in markup (flagged to app, discardable)
aoqi@0 704 * <p/>
aoqi@0 705 * <P> the document handler's ignorableWhitespace() method
aoqi@0 706 * is called on all the whitespace found
aoqi@0 707 */
aoqi@0 708 public boolean ignorableWhitespace(DTDEventListener handler)
aoqi@0 709 throws IOException, SAXException {
aoqi@0 710
aoqi@0 711 char c;
aoqi@0 712 boolean isSpace = false;
aoqi@0 713 int first;
aoqi@0 714
aoqi@0 715 // [3] S ::= #20 | #09 | #0D | #0A
aoqi@0 716 for (first = start; ;) {
aoqi@0 717 if (finish <= start) {
aoqi@0 718 if (isSpace)
aoqi@0 719 handler.ignorableWhitespace(buf, first, start - first);
aoqi@0 720 fillbuf();
aoqi@0 721 first = start;
aoqi@0 722 }
aoqi@0 723 if (finish <= start)
aoqi@0 724 return isSpace;
aoqi@0 725
aoqi@0 726 c = buf[start++];
aoqi@0 727 switch (c) {
aoqi@0 728 case '\n':
aoqi@0 729 if (!isInternal())
aoqi@0 730 lineNumber++;
aoqi@0 731 // XXX handles Macintosh line endings wrong
aoqi@0 732 // fallthrough
aoqi@0 733 case 0x09:
aoqi@0 734 case 0x20:
aoqi@0 735 isSpace = true;
aoqi@0 736 continue;
aoqi@0 737
aoqi@0 738 case '\r':
aoqi@0 739 isSpace = true;
aoqi@0 740 if (!isInternal())
aoqi@0 741 lineNumber++;
aoqi@0 742 handler.ignorableWhitespace(buf, first,
aoqi@0 743 (start - 1) - first);
aoqi@0 744 handler.ignorableWhitespace(newline, 0, 1);
aoqi@0 745 if (start < finish && buf[start] == '\n')
aoqi@0 746 ++start;
aoqi@0 747 first = start;
aoqi@0 748 continue;
aoqi@0 749
aoqi@0 750 default:
aoqi@0 751 ungetc();
aoqi@0 752 if (isSpace)
aoqi@0 753 handler.ignorableWhitespace(buf, first, start - first);
aoqi@0 754 return isSpace;
aoqi@0 755 }
aoqi@0 756 }
aoqi@0 757 }
aoqi@0 758
aoqi@0 759 /**
aoqi@0 760 * returns false iff 'next' string isn't as provided,
aoqi@0 761 * else skips that text and returns true.
aoqi@0 762 * <p/>
aoqi@0 763 * <P> NOTE: two alternative string representations are
aoqi@0 764 * both passed in, since one is faster.
aoqi@0 765 */
aoqi@0 766 public boolean peek(String next, char chars [])
aoqi@0 767 throws IOException, SAXException {
aoqi@0 768
aoqi@0 769 int len;
aoqi@0 770 int i;
aoqi@0 771
aoqi@0 772 if (chars != null)
aoqi@0 773 len = chars.length;
aoqi@0 774 else
aoqi@0 775 len = next.length();
aoqi@0 776
aoqi@0 777 // buffer should hold the whole thing ... give it a
aoqi@0 778 // chance for the end-of-buffer case and cope with EOF
aoqi@0 779 // by letting fillbuf compact and fill
aoqi@0 780 if (finish <= start || (finish - start) < len)
aoqi@0 781 fillbuf();
aoqi@0 782
aoqi@0 783 // can't peek past EOF
aoqi@0 784 if (finish <= start)
aoqi@0 785 return false;
aoqi@0 786
aoqi@0 787 // compare the string; consume iff it matches
aoqi@0 788 if (chars != null) {
aoqi@0 789 for (i = 0; i < len && (start + i) < finish; i++) {
aoqi@0 790 if (buf[start + i] != chars[i])
aoqi@0 791 return false;
aoqi@0 792 }
aoqi@0 793 } else {
aoqi@0 794 for (i = 0; i < len && (start + i) < finish; i++) {
aoqi@0 795 if (buf[start + i] != next.charAt(i))
aoqi@0 796 return false;
aoqi@0 797 }
aoqi@0 798 }
aoqi@0 799
aoqi@0 800 // if the first fillbuf didn't get enough data, give
aoqi@0 801 // fillbuf another chance to read
aoqi@0 802 if (i < len) {
aoqi@0 803 if (reader == null || isClosed)
aoqi@0 804 return false;
aoqi@0 805
aoqi@0 806 //
aoqi@0 807 // This diagnostic "knows" that the only way big strings would
aoqi@0 808 // fail to be peeked is where it's a symbol ... e.g. for an
aoqi@0 809 // </EndTag> construct. That knowledge could also be applied
aoqi@0 810 // to get rid of the symbol length constraint, since having
aoqi@0 811 // the wrong symbol is a fatal error anyway ...
aoqi@0 812 //
aoqi@0 813 if (len > buf.length)
aoqi@0 814 fatal("P-077", new Object[]{new Integer(buf.length)});
aoqi@0 815
aoqi@0 816 fillbuf();
aoqi@0 817 return peek(next, chars);
aoqi@0 818 }
aoqi@0 819
aoqi@0 820 start += len;
aoqi@0 821 return true;
aoqi@0 822 }
aoqi@0 823
aoqi@0 824
aoqi@0 825 //
aoqi@0 826 // Support for reporting the internal DTD subset, so <!DOCTYPE...>
aoqi@0 827 // declarations can be recreated. This is collected as a single
aoqi@0 828 // string; such subsets are normally small, and many applications
aoqi@0 829 // don't even care about this.
aoqi@0 830 //
aoqi@0 831 public void startRemembering() {
aoqi@0 832
aoqi@0 833 if (startRemember != 0)
aoqi@0 834 throw new InternalError();
aoqi@0 835 startRemember = start;
aoqi@0 836 }
aoqi@0 837
aoqi@0 838 public String rememberText() {
aoqi@0 839
aoqi@0 840 String retval;
aoqi@0 841
aoqi@0 842 // If the internal subset crossed a buffer boundary, we
aoqi@0 843 // created a temporary buffer.
aoqi@0 844 if (rememberedText != null) {
aoqi@0 845 rememberedText.append(buf, startRemember,
aoqi@0 846 start - startRemember);
aoqi@0 847 retval = rememberedText.toString();
aoqi@0 848 } else
aoqi@0 849 retval = new String(buf, startRemember,
aoqi@0 850 start - startRemember);
aoqi@0 851
aoqi@0 852 startRemember = 0;
aoqi@0 853 rememberedText = null;
aoqi@0 854 return retval;
aoqi@0 855 }
aoqi@0 856
aoqi@0 857 private InputEntity getTopEntity() {
aoqi@0 858
aoqi@0 859 InputEntity current = this;
aoqi@0 860
aoqi@0 861 // don't report locations within internal entities!
aoqi@0 862
aoqi@0 863 while (current != null && current.input == null)
aoqi@0 864 current = current.next;
aoqi@0 865 return current == null ? this : current;
aoqi@0 866 }
aoqi@0 867
aoqi@0 868 /**
aoqi@0 869 * Returns the public ID of this input source, if known
aoqi@0 870 */
aoqi@0 871 public String getPublicId() {
aoqi@0 872
aoqi@0 873 InputEntity where = getTopEntity();
aoqi@0 874 if (where == this)
aoqi@0 875 return input.getPublicId();
aoqi@0 876 return where.getPublicId();
aoqi@0 877 }
aoqi@0 878
aoqi@0 879 /**
aoqi@0 880 * Returns the system ID of this input source, if known
aoqi@0 881 */
aoqi@0 882 public String getSystemId() {
aoqi@0 883
aoqi@0 884 InputEntity where = getTopEntity();
aoqi@0 885 if (where == this)
aoqi@0 886 return input.getSystemId();
aoqi@0 887 return where.getSystemId();
aoqi@0 888 }
aoqi@0 889
aoqi@0 890 /**
aoqi@0 891 * Returns the current line number in this input source
aoqi@0 892 */
aoqi@0 893 public int getLineNumber() {
aoqi@0 894
aoqi@0 895 InputEntity where = getTopEntity();
aoqi@0 896 if (where == this)
aoqi@0 897 return lineNumber;
aoqi@0 898 return where.getLineNumber();
aoqi@0 899 }
aoqi@0 900
aoqi@0 901 /**
aoqi@0 902 * returns -1; maintaining column numbers hurts performance
aoqi@0 903 */
aoqi@0 904 public int getColumnNumber() {
aoqi@0 905
aoqi@0 906 return -1; // not maintained (speed)
aoqi@0 907 }
aoqi@0 908
aoqi@0 909
aoqi@0 910 //
aoqi@0 911 // n.b. for non-EOF end-of-buffer cases, reader should return
aoqi@0 912 // at least a handful of bytes so various lookaheads behave.
aoqi@0 913 //
aoqi@0 914 // two character pushback exists except at first; characters
aoqi@0 915 // represented by surrogate pairs can't be pushed back (they'd
aoqi@0 916 // only be in character data anyway).
aoqi@0 917 //
aoqi@0 918 // DTD exception thrown on char conversion problems; line number
aoqi@0 919 // will be low, as a rule.
aoqi@0 920 //
aoqi@0 921 private void fillbuf() throws IOException, SAXException {
aoqi@0 922
aoqi@0 923 // don't touched fixed buffers, that'll usually
aoqi@0 924 // change entity values (and isn't needed anyway)
aoqi@0 925 // likewise, ignore closed streams
aoqi@0 926 if (reader == null || isClosed)
aoqi@0 927 return;
aoqi@0 928
aoqi@0 929 // if remembering DTD text, copy!
aoqi@0 930 if (startRemember != 0) {
aoqi@0 931 if (rememberedText == null)
aoqi@0 932 rememberedText = new StringBuffer(buf.length);
aoqi@0 933 rememberedText.append(buf, startRemember,
aoqi@0 934 start - startRemember);
aoqi@0 935 }
aoqi@0 936
aoqi@0 937 boolean extra = (finish > 0) && (start > 0);
aoqi@0 938 int len;
aoqi@0 939
aoqi@0 940 if (extra) // extra pushback
aoqi@0 941 start--;
aoqi@0 942 len = finish - start;
aoqi@0 943
aoqi@0 944 System.arraycopy(buf, start, buf, 0, len);
aoqi@0 945 start = 0;
aoqi@0 946 finish = len;
aoqi@0 947
aoqi@0 948 try {
aoqi@0 949 len = buf.length - len;
aoqi@0 950 len = reader.read(buf, finish, len);
aoqi@0 951 } catch (UnsupportedEncodingException e) {
aoqi@0 952 fatal("P-075", new Object[]{e.getMessage()});
aoqi@0 953 } catch (CharConversionException e) {
aoqi@0 954 fatal("P-076", new Object[]{e.getMessage()});
aoqi@0 955 }
aoqi@0 956 if (len >= 0)
aoqi@0 957 finish += len;
aoqi@0 958 else
aoqi@0 959 close();
aoqi@0 960 if (extra) // extra pushback
aoqi@0 961 start++;
aoqi@0 962
aoqi@0 963 if (startRemember != 0)
aoqi@0 964 // assert extra == true
aoqi@0 965 startRemember = 1;
aoqi@0 966 }
aoqi@0 967
aoqi@0 968 public void close() {
aoqi@0 969
aoqi@0 970 try {
aoqi@0 971 if (reader != null && !isClosed)
aoqi@0 972 reader.close();
aoqi@0 973 isClosed = true;
aoqi@0 974 } catch (IOException e) {
aoqi@0 975 /* NOTHING */
aoqi@0 976 }
aoqi@0 977 }
aoqi@0 978
aoqi@0 979
aoqi@0 980 private void fatal(String messageId, Object params [])
aoqi@0 981 throws SAXException {
aoqi@0 982
aoqi@0 983 SAXParseException x = new SAXParseException(DTDParser.messages.getMessage(locale, messageId, params), null);
aoqi@0 984
aoqi@0 985 // not continuable ... e.g. WF errors
aoqi@0 986 close();
aoqi@0 987 errHandler.fatalError(x);
aoqi@0 988 throw x;
aoqi@0 989 }
aoqi@0 990 }

mercurial