1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/jaxws_classes/com/sun/xml/internal/dtdparser/InputEntity.java Wed Apr 27 01:27:09 2016 +0800 1.3 @@ -0,0 +1,990 @@ 1.4 +/* 1.5 + * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. Oracle designates this 1.11 + * particular file as subject to the "Classpath" exception as provided 1.12 + * by Oracle in the LICENSE file that accompanied this code. 1.13 + * 1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.16 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.17 + * version 2 for more details (a copy is included in the LICENSE file that 1.18 + * accompanied this code). 1.19 + * 1.20 + * You should have received a copy of the GNU General Public License version 1.21 + * 2 along with this work; if not, write to the Free Software Foundation, 1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.23 + * 1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.25 + * or visit www.oracle.com if you need additional information or have any 1.26 + * questions. 1.27 + */ 1.28 + 1.29 +package com.sun.xml.internal.dtdparser; 1.30 + 1.31 +import org.xml.sax.InputSource; 1.32 +import org.xml.sax.SAXException; 1.33 +import org.xml.sax.SAXParseException; 1.34 + 1.35 +import java.io.CharConversionException; 1.36 +import java.io.IOException; 1.37 +import java.io.InputStream; 1.38 +import java.io.InputStreamReader; 1.39 +import java.io.Reader; 1.40 +import java.io.UnsupportedEncodingException; 1.41 +import java.net.URL; 1.42 +import java.util.Locale; 1.43 + 1.44 +/** 1.45 + * This is how the parser talks to its input entities, of all kinds. 1.46 + * The entities are in a stack. 1.47 + * <p/> 1.48 + * <P> For internal entities, the character arrays are referenced here, 1.49 + * and read from as needed (they're read-only). External entities have 1.50 + * mutable buffers, that are read into as needed. 1.51 + * <p/> 1.52 + * <P> <em>Note:</em> This maps CRLF (and CR) to LF without regard for 1.53 + * whether it's in an external (parsed) entity or not. The XML 1.0 spec 1.54 + * is inconsistent in explaining EOL handling; this is the sensible way. 1.55 + * 1.56 + * @author David Brownell 1.57 + * @author Janet Koenig 1.58 + * @version 1.4 00/08/05 1.59 + */ 1.60 +public class InputEntity { 1.61 + private int start, finish; 1.62 + private char buf []; 1.63 + private int lineNumber = 1; 1.64 + private boolean returnedFirstHalf = false; 1.65 + private boolean maybeInCRLF = false; 1.66 + 1.67 + // name of entity (never main document or unnamed DTD PE) 1.68 + private String name; 1.69 + 1.70 + private InputEntity next; 1.71 + 1.72 + // for system and public IDs in diagnostics 1.73 + private InputSource input; 1.74 + 1.75 + // this is a buffer; some buffers can be replenished. 1.76 + private Reader reader; 1.77 + private boolean isClosed; 1.78 + 1.79 + private DTDEventListener errHandler; 1.80 + private Locale locale; 1.81 + 1.82 + private StringBuffer rememberedText; 1.83 + private int startRemember; 1.84 + 1.85 + // record if this is a PE, so endParsedEntity won't be called 1.86 + private boolean isPE; 1.87 + 1.88 + // InputStreamReader throws an internal per-read exception, so 1.89 + // we minimize reads. We also add a byte to compensate for the 1.90 + // "ungetc" byte we keep, so that our downstream reads are as 1.91 + // nicely sized as we can make them. 1.92 + final private static int BUFSIZ = 8 * 1024 + 1; 1.93 + 1.94 + final private static char newline [] = {'\n'}; 1.95 + 1.96 + public static InputEntity getInputEntity(DTDEventListener h, Locale l) { 1.97 + InputEntity retval = new InputEntity(); 1.98 + retval.errHandler = h; 1.99 + retval.locale = l; 1.100 + return retval; 1.101 + } 1.102 + 1.103 + private InputEntity() { 1.104 + } 1.105 + 1.106 + // 1.107 + // predicate: return true iff this is an internal entity reader, 1.108 + // and so may safely be "popped" as needed. external entities have 1.109 + // syntax to uphold; internal parameter entities have at most validity 1.110 + // constraints to monitor. also, only external entities get decent 1.111 + // location diagnostics. 1.112 + // 1.113 + public boolean isInternal() { 1.114 + return reader == null; 1.115 + } 1.116 + 1.117 + // 1.118 + // predicate: return true iff this is the toplevel document 1.119 + // 1.120 + public boolean isDocument() { 1.121 + return next == null; 1.122 + } 1.123 + 1.124 + // 1.125 + // predicate: return true iff this is a PE expansion (so that 1.126 + // LexicalEventListner.endParsedEntity won't be called) 1.127 + // 1.128 + public boolean isParameterEntity() { 1.129 + return isPE; 1.130 + } 1.131 + 1.132 + // 1.133 + // return name of current entity 1.134 + // 1.135 + public String getName() { 1.136 + return name; 1.137 + } 1.138 + 1.139 + // 1.140 + // use this for an external parsed entity 1.141 + // 1.142 + public void init(InputSource in, String name, InputEntity stack, 1.143 + boolean isPE) 1.144 + throws IOException, SAXException { 1.145 + 1.146 + input = in; 1.147 + this.isPE = isPE; 1.148 + reader = in.getCharacterStream(); 1.149 + 1.150 + if (reader == null) { 1.151 + InputStream bytes = in.getByteStream(); 1.152 + 1.153 + if (bytes == null) 1.154 + reader = XmlReader.createReader(new URL(in.getSystemId()) 1.155 + .openStream()); 1.156 + else if (in.getEncoding() != null) 1.157 + reader = XmlReader.createReader(in.getByteStream(), 1.158 + in.getEncoding()); 1.159 + else 1.160 + reader = XmlReader.createReader(in.getByteStream()); 1.161 + } 1.162 + next = stack; 1.163 + buf = new char[BUFSIZ]; 1.164 + this.name = name; 1.165 + checkRecursion(stack); 1.166 + } 1.167 + 1.168 + // 1.169 + // use this for an internal parsed entity; buffer is readonly 1.170 + // 1.171 + public void init(char b [], String name, InputEntity stack, boolean isPE) 1.172 + throws SAXException { 1.173 + 1.174 + next = stack; 1.175 + buf = b; 1.176 + finish = b.length; 1.177 + this.name = name; 1.178 + this.isPE = isPE; 1.179 + checkRecursion(stack); 1.180 + } 1.181 + 1.182 + private void checkRecursion(InputEntity stack) 1.183 + throws SAXException { 1.184 + 1.185 + if (stack == null) 1.186 + return; 1.187 + for (stack = stack.next; stack != null; stack = stack.next) { 1.188 + if (stack.name != null && stack.name.equals(name)) 1.189 + fatal("P-069", new Object[]{name}); 1.190 + } 1.191 + } 1.192 + 1.193 + public InputEntity pop() throws IOException { 1.194 + 1.195 + // caller has ensured there's nothing left to read 1.196 + close(); 1.197 + return next; 1.198 + } 1.199 + 1.200 + /** 1.201 + * returns true iff there's no more data to consume ... 1.202 + */ 1.203 + public boolean isEOF() throws IOException, SAXException { 1.204 + 1.205 + // called to ensure WF-ness of included entities and to pop 1.206 + // input entities appropriately ... EOF is not always legal. 1.207 + if (start >= finish) { 1.208 + fillbuf(); 1.209 + return start >= finish; 1.210 + } else 1.211 + return false; 1.212 + } 1.213 + 1.214 + /** 1.215 + * Returns the name of the encoding in use, else null; the name 1.216 + * returned is in as standard a form as we can get. 1.217 + */ 1.218 + public String getEncoding() { 1.219 + 1.220 + if (reader == null) 1.221 + return null; 1.222 + if (reader instanceof XmlReader) 1.223 + return ((XmlReader) reader).getEncoding(); 1.224 + 1.225 + // XXX prefer a java2std() call to normalize names... 1.226 + 1.227 + if (reader instanceof InputStreamReader) 1.228 + return ((InputStreamReader) reader).getEncoding(); 1.229 + return null; 1.230 + } 1.231 + 1.232 + 1.233 + /** 1.234 + * returns the next name char, or NUL ... faster than getc(), 1.235 + * and the common "name or nmtoken must be next" case won't 1.236 + * need ungetc(). 1.237 + */ 1.238 + public char getNameChar() throws IOException, SAXException { 1.239 + 1.240 + if (finish <= start) 1.241 + fillbuf(); 1.242 + if (finish > start) { 1.243 + char c = buf[start++]; 1.244 + if (XmlChars.isNameChar(c)) 1.245 + return c; 1.246 + start--; 1.247 + } 1.248 + return 0; 1.249 + } 1.250 + 1.251 + /** 1.252 + * gets the next Java character -- might be part of an XML 1.253 + * text character represented by a surrogate pair, or be 1.254 + * the end of the entity. 1.255 + */ 1.256 + public char getc() throws IOException, SAXException { 1.257 + 1.258 + if (finish <= start) 1.259 + fillbuf(); 1.260 + if (finish > start) { 1.261 + char c = buf[start++]; 1.262 + 1.263 + // [2] Char ::= #x0009 | #x000A | #x000D 1.264 + // | [#x0020-#xD7FF] 1.265 + // | [#xE000-#xFFFD] 1.266 + // plus surrogate _pairs_ representing [#x10000-#x10ffff] 1.267 + if (returnedFirstHalf) { 1.268 + if (c >= 0xdc00 && c <= 0xdfff) { 1.269 + returnedFirstHalf = false; 1.270 + return c; 1.271 + } else 1.272 + fatal("P-070", new Object[]{Integer.toHexString(c)}); 1.273 + } 1.274 + if ((c >= 0x0020 && c <= 0xD7FF) 1.275 + || c == 0x0009 1.276 + // no surrogates! 1.277 + || (c >= 0xE000 && c <= 0xFFFD)) 1.278 + return c; 1.279 + 1.280 + // 1.281 + // CRLF and CR are both line ends; map both to LF, and 1.282 + // keep line count correct. 1.283 + // 1.284 + else if (c == '\r' && !isInternal()) { 1.285 + maybeInCRLF = true; 1.286 + c = getc(); 1.287 + if (c != '\n') 1.288 + ungetc(); 1.289 + maybeInCRLF = false; 1.290 + 1.291 + lineNumber++; 1.292 + return '\n'; 1.293 + 1.294 + } else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF 1.295 + if (!isInternal() && !maybeInCRLF) 1.296 + lineNumber++; 1.297 + return c; 1.298 + } 1.299 + 1.300 + // surrogates... 1.301 + if (c >= 0xd800 && c < 0xdc00) { 1.302 + returnedFirstHalf = true; 1.303 + return c; 1.304 + } 1.305 + 1.306 + fatal("P-071", new Object[]{Integer.toHexString(c)}); 1.307 + } 1.308 + throw new EndOfInputException(); 1.309 + } 1.310 + 1.311 + 1.312 + /** 1.313 + * lookahead one character 1.314 + */ 1.315 + public boolean peekc(char c) throws IOException, SAXException { 1.316 + 1.317 + if (finish <= start) 1.318 + fillbuf(); 1.319 + if (finish > start) { 1.320 + if (buf[start] == c) { 1.321 + start++; 1.322 + return true; 1.323 + } else 1.324 + return false; 1.325 + } 1.326 + return false; 1.327 + } 1.328 + 1.329 + 1.330 + /** 1.331 + * two character pushback is guaranteed 1.332 + */ 1.333 + public void ungetc() { 1.334 + 1.335 + if (start == 0) 1.336 + throw new InternalError("ungetc"); 1.337 + start--; 1.338 + 1.339 + if (buf[start] == '\n' || buf[start] == '\r') { 1.340 + if (!isInternal()) 1.341 + lineNumber--; 1.342 + } else if (returnedFirstHalf) 1.343 + returnedFirstHalf = false; 1.344 + } 1.345 + 1.346 + 1.347 + /** 1.348 + * optional grammatical whitespace (discarded) 1.349 + */ 1.350 + public boolean maybeWhitespace() 1.351 + throws IOException, SAXException { 1.352 + 1.353 + char c; 1.354 + boolean isSpace = false; 1.355 + boolean sawCR = false; 1.356 + 1.357 + // [3] S ::= #20 | #09 | #0D | #0A 1.358 + for (; ;) { 1.359 + if (finish <= start) 1.360 + fillbuf(); 1.361 + if (finish <= start) 1.362 + return isSpace; 1.363 + 1.364 + c = buf[start++]; 1.365 + if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') { 1.366 + isSpace = true; 1.367 + 1.368 + // 1.369 + // CR, LF are line endings ... CLRF is one, not two! 1.370 + // 1.371 + if ((c == '\n' || c == '\r') && !isInternal()) { 1.372 + if (!(c == '\n' && sawCR)) { 1.373 + lineNumber++; 1.374 + sawCR = false; 1.375 + } 1.376 + if (c == '\r') 1.377 + sawCR = true; 1.378 + } 1.379 + } else { 1.380 + start--; 1.381 + return isSpace; 1.382 + } 1.383 + } 1.384 + } 1.385 + 1.386 + 1.387 + /** 1.388 + * normal content; whitespace in markup may be handled 1.389 + * specially if the parser uses the content model. 1.390 + * <p/> 1.391 + * <P> content terminates with markup delimiter characters, 1.392 + * namely ampersand (&amp;) and left angle bracket (&lt;). 1.393 + * <p/> 1.394 + * <P> the document handler's characters() method is called 1.395 + * on all the content found 1.396 + */ 1.397 + public boolean parsedContent(DTDEventListener docHandler 1.398 + /*ElementValidator validator*/) 1.399 + throws IOException, SAXException { 1.400 + 1.401 + // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 1.402 + 1.403 + int first; // first char to return 1.404 + int last; // last char to return 1.405 + boolean sawContent; // sent any chars? 1.406 + char c; 1.407 + 1.408 + // deliver right out of the buffer, until delimiter, EOF, 1.409 + // or error, refilling as we go 1.410 + for (first = last = start, sawContent = false; ; last++) { 1.411 + 1.412 + // buffer empty? 1.413 + if (last >= finish) { 1.414 + if (last > first) { 1.415 +// validator.text (); 1.416 + docHandler.characters(buf, first, last - first); 1.417 + sawContent = true; 1.418 + start = last; 1.419 + } 1.420 + if (isEOF()) // calls fillbuf 1.421 + return sawContent; 1.422 + first = start; 1.423 + last = first - 1; // incremented in loop 1.424 + continue; 1.425 + } 1.426 + 1.427 + c = buf[last]; 1.428 + 1.429 + // 1.430 + // pass most chars through ASAP; this inlines the code of 1.431 + // [2] !XmlChars.isChar(c) leaving only characters needing 1.432 + // special treatment ... line ends, surrogates, and: 1.433 + // 0x0026 == '&' 1.434 + // 0x003C == '<' 1.435 + // 0x005D == ']' 1.436 + // Comparisons ordered for speed on 'typical' text 1.437 + // 1.438 + if ((c > 0x005D && c <= 0xD7FF) // a-z and more 1.439 + || (c < 0x0026 && c >= 0x0020) // space & punct 1.440 + || (c > 0x003C && c < 0x005D) // A-Z & punct 1.441 + || (c > 0x0026 && c < 0x003C) // 0-9 & punct 1.442 + || c == 0x0009 1.443 + || (c >= 0xE000 && c <= 0xFFFD) 1.444 + ) 1.445 + continue; 1.446 + 1.447 + // terminate on markup delimiters 1.448 + if (c == '<' || c == '&') 1.449 + break; 1.450 + 1.451 + // count lines 1.452 + if (c == '\n') { 1.453 + if (!isInternal()) 1.454 + lineNumber++; 1.455 + continue; 1.456 + } 1.457 + 1.458 + // External entities get CR, CRLF --> LF mapping 1.459 + // Internal ones got it already, and we can't repeat 1.460 + // else we break char ref handling!! 1.461 + if (c == '\r') { 1.462 + if (isInternal()) 1.463 + continue; 1.464 + 1.465 + docHandler.characters(buf, first, last - first); 1.466 + docHandler.characters(newline, 0, 1); 1.467 + sawContent = true; 1.468 + lineNumber++; 1.469 + if (finish > (last + 1)) { 1.470 + if (buf[last + 1] == '\n') 1.471 + last++; 1.472 + } else { // CR at end of buffer 1.473 +// XXX case not yet handled: CRLF here will look like two lines 1.474 + } 1.475 + first = start = last + 1; 1.476 + continue; 1.477 + } 1.478 + 1.479 + // ']]>' is a WF error -- must fail if we see it 1.480 + if (c == ']') { 1.481 + switch (finish - last) { 1.482 + // for suspicious end-of-buffer cases, get more data 1.483 + // into the buffer to rule out this sequence. 1.484 + case 2: 1.485 + if (buf[last + 1] != ']') 1.486 + continue; 1.487 + // FALLTHROUGH 1.488 + 1.489 + case 1: 1.490 + if (reader == null || isClosed) 1.491 + continue; 1.492 + if (last == first) 1.493 + throw new InternalError("fillbuf"); 1.494 + last--; 1.495 + if (last > first) { 1.496 +// validator.text (); 1.497 + docHandler.characters(buf, first, last - first); 1.498 + sawContent = true; 1.499 + start = last; 1.500 + } 1.501 + fillbuf(); 1.502 + first = last = start; 1.503 + continue; 1.504 + 1.505 + // otherwise any "]]>" would be buffered, and we can 1.506 + // see right away if that's what we have 1.507 + default: 1.508 + if (buf[last + 1] == ']' && buf[last + 2] == '>') 1.509 + fatal("P-072", null); 1.510 + continue; 1.511 + } 1.512 + } 1.513 + 1.514 + // correctly paired surrogates are OK 1.515 + if (c >= 0xd800 && c <= 0xdfff) { 1.516 + if ((last + 1) >= finish) { 1.517 + if (last > first) { 1.518 +// validator.text (); 1.519 + docHandler.characters(buf, first, last - first); 1.520 + sawContent = true; 1.521 + start = last + 1; 1.522 + } 1.523 + if (isEOF()) { // calls fillbuf 1.524 + fatal("P-081", 1.525 + new Object[]{Integer.toHexString(c)}); 1.526 + } 1.527 + first = start; 1.528 + last = first; 1.529 + continue; 1.530 + } 1.531 + if (checkSurrogatePair(last)) 1.532 + last++; 1.533 + else { 1.534 + last--; 1.535 + // also terminate on surrogate pair oddities 1.536 + break; 1.537 + } 1.538 + continue; 1.539 + } 1.540 + 1.541 + fatal("P-071", new Object[]{Integer.toHexString(c)}); 1.542 + } 1.543 + if (last == first) 1.544 + return sawContent; 1.545 +// validator.text (); 1.546 + docHandler.characters(buf, first, last - first); 1.547 + start = last; 1.548 + return true; 1.549 + } 1.550 + 1.551 + 1.552 + /** 1.553 + * CDATA -- character data, terminated by "]]>" and optionally 1.554 + * including unescaped markup delimiters (ampersand and left angle 1.555 + * bracket). This should otherwise be exactly like character data, 1.556 + * modulo differences in error report details. 1.557 + * <p/> 1.558 + * <P> The document handler's characters() or ignorableWhitespace() 1.559 + * methods are invoked on all the character data found 1.560 + * 1.561 + * @param docHandler gets callbacks for character data 1.562 + * @param ignorableWhitespace if true, whitespace characters will 1.563 + * be reported using docHandler.ignorableWhitespace(); implicitly, 1.564 + * non-whitespace characters will cause validation errors 1.565 + * @param whitespaceInvalidMessage if true, ignorable whitespace 1.566 + * causes a validity error report as well as a callback 1.567 + */ 1.568 + public boolean unparsedContent(DTDEventListener docHandler, 1.569 + /*ElementValidator validator,*/ 1.570 + boolean ignorableWhitespace, 1.571 + String whitespaceInvalidMessage) 1.572 + throws IOException, SAXException { 1.573 + 1.574 + // [18] CDSect ::= CDStart CData CDEnd 1.575 + // [19] CDStart ::= '<![CDATA[' 1.576 + // [20] CData ::= (Char* - (Char* ']]>' Char*)) 1.577 + // [21] CDEnd ::= ']]>' 1.578 + 1.579 + // caller peeked the leading '<' ... 1.580 + if (!peek("![CDATA[", null)) 1.581 + return false; 1.582 + docHandler.startCDATA(); 1.583 + 1.584 + // only a literal ']]>' stops this ... 1.585 + int last; 1.586 + 1.587 + for (; ;) { // until ']]>' seen 1.588 + boolean done = false; 1.589 + char c; 1.590 + 1.591 + // don't report ignorable whitespace as "text" for 1.592 + // validation purposes. 1.593 + boolean white = ignorableWhitespace; 1.594 + 1.595 + for (last = start; last < finish; last++) { 1.596 + c = buf[last]; 1.597 + 1.598 + // 1.599 + // Reject illegal characters. 1.600 + // 1.601 + if (!XmlChars.isChar(c)) { 1.602 + white = false; 1.603 + if (c >= 0xd800 && c <= 0xdfff) { 1.604 + if (checkSurrogatePair(last)) { 1.605 + last++; 1.606 + continue; 1.607 + } else { 1.608 + last--; 1.609 + break; 1.610 + } 1.611 + } 1.612 + fatal("P-071", new Object[] 1.613 + {Integer.toHexString(buf[last])}); 1.614 + } 1.615 + if (c == '\n') { 1.616 + if (!isInternal()) 1.617 + lineNumber++; 1.618 + continue; 1.619 + } 1.620 + if (c == '\r') { 1.621 + // As above, we can't repeat CR/CRLF --> LF mapping 1.622 + if (isInternal()) 1.623 + continue; 1.624 + 1.625 + if (white) { 1.626 + if (whitespaceInvalidMessage != null) 1.627 + errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale, 1.628 + whitespaceInvalidMessage), null)); 1.629 + docHandler.ignorableWhitespace(buf, start, 1.630 + last - start); 1.631 + docHandler.ignorableWhitespace(newline, 0, 1); 1.632 + } else { 1.633 +// validator.text (); 1.634 + docHandler.characters(buf, start, last - start); 1.635 + docHandler.characters(newline, 0, 1); 1.636 + } 1.637 + lineNumber++; 1.638 + if (finish > (last + 1)) { 1.639 + if (buf[last + 1] == '\n') 1.640 + last++; 1.641 + } else { // CR at end of buffer 1.642 +// XXX case not yet handled ... as above 1.643 + } 1.644 + start = last + 1; 1.645 + continue; 1.646 + } 1.647 + if (c != ']') { 1.648 + if (c != ' ' && c != '\t') 1.649 + white = false; 1.650 + continue; 1.651 + } 1.652 + if ((last + 2) < finish) { 1.653 + if (buf[last + 1] == ']' && buf[last + 2] == '>') { 1.654 + done = true; 1.655 + break; 1.656 + } 1.657 + white = false; 1.658 + continue; 1.659 + } else { 1.660 + //last--; 1.661 + break; 1.662 + } 1.663 + } 1.664 + if (white) { 1.665 + if (whitespaceInvalidMessage != null) 1.666 + errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale, 1.667 + whitespaceInvalidMessage), null)); 1.668 + docHandler.ignorableWhitespace(buf, start, last - start); 1.669 + } else { 1.670 +// validator.text (); 1.671 + docHandler.characters(buf, start, last - start); 1.672 + } 1.673 + if (done) { 1.674 + start = last + 3; 1.675 + break; 1.676 + } 1.677 + start = last; 1.678 + if (isEOF()) 1.679 + fatal("P-073", null); 1.680 + } 1.681 + docHandler.endCDATA(); 1.682 + return true; 1.683 + } 1.684 + 1.685 + // return false to backstep at end of buffer) 1.686 + private boolean checkSurrogatePair(int offset) 1.687 + throws SAXException { 1.688 + 1.689 + if ((offset + 1) >= finish) 1.690 + return false; 1.691 + 1.692 + char c1 = buf[offset++]; 1.693 + char c2 = buf[offset]; 1.694 + 1.695 + if ((c1 >= 0xd800 && c1 < 0xdc00) && (c2 >= 0xdc00 && c2 <= 0xdfff)) 1.696 + return true; 1.697 + fatal("P-074", new Object[]{ 1.698 + Integer.toHexString(c1 & 0x0ffff), 1.699 + Integer.toHexString(c2 & 0x0ffff) 1.700 + }); 1.701 + return false; 1.702 + } 1.703 + 1.704 + 1.705 + /** 1.706 + * whitespace in markup (flagged to app, discardable) 1.707 + * <p/> 1.708 + * <P> the document handler's ignorableWhitespace() method 1.709 + * is called on all the whitespace found 1.710 + */ 1.711 + public boolean ignorableWhitespace(DTDEventListener handler) 1.712 + throws IOException, SAXException { 1.713 + 1.714 + char c; 1.715 + boolean isSpace = false; 1.716 + int first; 1.717 + 1.718 + // [3] S ::= #20 | #09 | #0D | #0A 1.719 + for (first = start; ;) { 1.720 + if (finish <= start) { 1.721 + if (isSpace) 1.722 + handler.ignorableWhitespace(buf, first, start - first); 1.723 + fillbuf(); 1.724 + first = start; 1.725 + } 1.726 + if (finish <= start) 1.727 + return isSpace; 1.728 + 1.729 + c = buf[start++]; 1.730 + switch (c) { 1.731 + case '\n': 1.732 + if (!isInternal()) 1.733 + lineNumber++; 1.734 +// XXX handles Macintosh line endings wrong 1.735 + // fallthrough 1.736 + case 0x09: 1.737 + case 0x20: 1.738 + isSpace = true; 1.739 + continue; 1.740 + 1.741 + case '\r': 1.742 + isSpace = true; 1.743 + if (!isInternal()) 1.744 + lineNumber++; 1.745 + handler.ignorableWhitespace(buf, first, 1.746 + (start - 1) - first); 1.747 + handler.ignorableWhitespace(newline, 0, 1); 1.748 + if (start < finish && buf[start] == '\n') 1.749 + ++start; 1.750 + first = start; 1.751 + continue; 1.752 + 1.753 + default: 1.754 + ungetc(); 1.755 + if (isSpace) 1.756 + handler.ignorableWhitespace(buf, first, start - first); 1.757 + return isSpace; 1.758 + } 1.759 + } 1.760 + } 1.761 + 1.762 + /** 1.763 + * returns false iff 'next' string isn't as provided, 1.764 + * else skips that text and returns true. 1.765 + * <p/> 1.766 + * <P> NOTE: two alternative string representations are 1.767 + * both passed in, since one is faster. 1.768 + */ 1.769 + public boolean peek(String next, char chars []) 1.770 + throws IOException, SAXException { 1.771 + 1.772 + int len; 1.773 + int i; 1.774 + 1.775 + if (chars != null) 1.776 + len = chars.length; 1.777 + else 1.778 + len = next.length(); 1.779 + 1.780 + // buffer should hold the whole thing ... give it a 1.781 + // chance for the end-of-buffer case and cope with EOF 1.782 + // by letting fillbuf compact and fill 1.783 + if (finish <= start || (finish - start) < len) 1.784 + fillbuf(); 1.785 + 1.786 + // can't peek past EOF 1.787 + if (finish <= start) 1.788 + return false; 1.789 + 1.790 + // compare the string; consume iff it matches 1.791 + if (chars != null) { 1.792 + for (i = 0; i < len && (start + i) < finish; i++) { 1.793 + if (buf[start + i] != chars[i]) 1.794 + return false; 1.795 + } 1.796 + } else { 1.797 + for (i = 0; i < len && (start + i) < finish; i++) { 1.798 + if (buf[start + i] != next.charAt(i)) 1.799 + return false; 1.800 + } 1.801 + } 1.802 + 1.803 + // if the first fillbuf didn't get enough data, give 1.804 + // fillbuf another chance to read 1.805 + if (i < len) { 1.806 + if (reader == null || isClosed) 1.807 + return false; 1.808 + 1.809 + // 1.810 + // This diagnostic "knows" that the only way big strings would 1.811 + // fail to be peeked is where it's a symbol ... e.g. for an 1.812 + // </EndTag> construct. That knowledge could also be applied 1.813 + // to get rid of the symbol length constraint, since having 1.814 + // the wrong symbol is a fatal error anyway ... 1.815 + // 1.816 + if (len > buf.length) 1.817 + fatal("P-077", new Object[]{new Integer(buf.length)}); 1.818 + 1.819 + fillbuf(); 1.820 + return peek(next, chars); 1.821 + } 1.822 + 1.823 + start += len; 1.824 + return true; 1.825 + } 1.826 + 1.827 + 1.828 + // 1.829 + // Support for reporting the internal DTD subset, so <!DOCTYPE...> 1.830 + // declarations can be recreated. This is collected as a single 1.831 + // string; such subsets are normally small, and many applications 1.832 + // don't even care about this. 1.833 + // 1.834 + public void startRemembering() { 1.835 + 1.836 + if (startRemember != 0) 1.837 + throw new InternalError(); 1.838 + startRemember = start; 1.839 + } 1.840 + 1.841 + public String rememberText() { 1.842 + 1.843 + String retval; 1.844 + 1.845 + // If the internal subset crossed a buffer boundary, we 1.846 + // created a temporary buffer. 1.847 + if (rememberedText != null) { 1.848 + rememberedText.append(buf, startRemember, 1.849 + start - startRemember); 1.850 + retval = rememberedText.toString(); 1.851 + } else 1.852 + retval = new String(buf, startRemember, 1.853 + start - startRemember); 1.854 + 1.855 + startRemember = 0; 1.856 + rememberedText = null; 1.857 + return retval; 1.858 + } 1.859 + 1.860 + private InputEntity getTopEntity() { 1.861 + 1.862 + InputEntity current = this; 1.863 + 1.864 + // don't report locations within internal entities! 1.865 + 1.866 + while (current != null && current.input == null) 1.867 + current = current.next; 1.868 + return current == null ? this : current; 1.869 + } 1.870 + 1.871 + /** 1.872 + * Returns the public ID of this input source, if known 1.873 + */ 1.874 + public String getPublicId() { 1.875 + 1.876 + InputEntity where = getTopEntity(); 1.877 + if (where == this) 1.878 + return input.getPublicId(); 1.879 + return where.getPublicId(); 1.880 + } 1.881 + 1.882 + /** 1.883 + * Returns the system ID of this input source, if known 1.884 + */ 1.885 + public String getSystemId() { 1.886 + 1.887 + InputEntity where = getTopEntity(); 1.888 + if (where == this) 1.889 + return input.getSystemId(); 1.890 + return where.getSystemId(); 1.891 + } 1.892 + 1.893 + /** 1.894 + * Returns the current line number in this input source 1.895 + */ 1.896 + public int getLineNumber() { 1.897 + 1.898 + InputEntity where = getTopEntity(); 1.899 + if (where == this) 1.900 + return lineNumber; 1.901 + return where.getLineNumber(); 1.902 + } 1.903 + 1.904 + /** 1.905 + * returns -1; maintaining column numbers hurts performance 1.906 + */ 1.907 + public int getColumnNumber() { 1.908 + 1.909 + return -1; // not maintained (speed) 1.910 + } 1.911 + 1.912 + 1.913 + // 1.914 + // n.b. for non-EOF end-of-buffer cases, reader should return 1.915 + // at least a handful of bytes so various lookaheads behave. 1.916 + // 1.917 + // two character pushback exists except at first; characters 1.918 + // represented by surrogate pairs can't be pushed back (they'd 1.919 + // only be in character data anyway). 1.920 + // 1.921 + // DTD exception thrown on char conversion problems; line number 1.922 + // will be low, as a rule. 1.923 + // 1.924 + private void fillbuf() throws IOException, SAXException { 1.925 + 1.926 + // don't touched fixed buffers, that'll usually 1.927 + // change entity values (and isn't needed anyway) 1.928 + // likewise, ignore closed streams 1.929 + if (reader == null || isClosed) 1.930 + return; 1.931 + 1.932 + // if remembering DTD text, copy! 1.933 + if (startRemember != 0) { 1.934 + if (rememberedText == null) 1.935 + rememberedText = new StringBuffer(buf.length); 1.936 + rememberedText.append(buf, startRemember, 1.937 + start - startRemember); 1.938 + } 1.939 + 1.940 + boolean extra = (finish > 0) && (start > 0); 1.941 + int len; 1.942 + 1.943 + if (extra) // extra pushback 1.944 + start--; 1.945 + len = finish - start; 1.946 + 1.947 + System.arraycopy(buf, start, buf, 0, len); 1.948 + start = 0; 1.949 + finish = len; 1.950 + 1.951 + try { 1.952 + len = buf.length - len; 1.953 + len = reader.read(buf, finish, len); 1.954 + } catch (UnsupportedEncodingException e) { 1.955 + fatal("P-075", new Object[]{e.getMessage()}); 1.956 + } catch (CharConversionException e) { 1.957 + fatal("P-076", new Object[]{e.getMessage()}); 1.958 + } 1.959 + if (len >= 0) 1.960 + finish += len; 1.961 + else 1.962 + close(); 1.963 + if (extra) // extra pushback 1.964 + start++; 1.965 + 1.966 + if (startRemember != 0) 1.967 + // assert extra == true 1.968 + startRemember = 1; 1.969 + } 1.970 + 1.971 + public void close() { 1.972 + 1.973 + try { 1.974 + if (reader != null && !isClosed) 1.975 + reader.close(); 1.976 + isClosed = true; 1.977 + } catch (IOException e) { 1.978 + /* NOTHING */ 1.979 + } 1.980 + } 1.981 + 1.982 + 1.983 + private void fatal(String messageId, Object params []) 1.984 + throws SAXException { 1.985 + 1.986 + SAXParseException x = new SAXParseException(DTDParser.messages.getMessage(locale, messageId, params), null); 1.987 + 1.988 + // not continuable ... e.g. WF errors 1.989 + close(); 1.990 + errHandler.fatalError(x); 1.991 + throw x; 1.992 + } 1.993 +}