Wed, 27 Apr 2016 01:27:09 +0800
Initial load
http://hg.openjdk.java.net/jdk8u/jdk8u/jaxws/
changeset: 657:d47a47f961ee
tag: jdk8u25-b17
aoqi@0 | 1 | /* |
aoqi@0 | 2 | * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved. |
aoqi@0 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
aoqi@0 | 4 | * |
aoqi@0 | 5 | * This code is free software; you can redistribute it and/or modify it |
aoqi@0 | 6 | * under the terms of the GNU General Public License version 2 only, as |
aoqi@0 | 7 | * published by the Free Software Foundation. Oracle designates this |
aoqi@0 | 8 | * particular file as subject to the "Classpath" exception as provided |
aoqi@0 | 9 | * by Oracle in the LICENSE file that accompanied this code. |
aoqi@0 | 10 | * |
aoqi@0 | 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
aoqi@0 | 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
aoqi@0 | 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
aoqi@0 | 14 | * version 2 for more details (a copy is included in the LICENSE file that |
aoqi@0 | 15 | * accompanied this code). |
aoqi@0 | 16 | * |
aoqi@0 | 17 | * You should have received a copy of the GNU General Public License version |
aoqi@0 | 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
aoqi@0 | 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
aoqi@0 | 20 | * |
aoqi@0 | 21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
aoqi@0 | 22 | * or visit www.oracle.com if you need additional information or have any |
aoqi@0 | 23 | * questions. |
aoqi@0 | 24 | */ |
aoqi@0 | 25 | |
aoqi@0 | 26 | package com.sun.xml.internal.dtdparser; |
aoqi@0 | 27 | |
aoqi@0 | 28 | import org.xml.sax.InputSource; |
aoqi@0 | 29 | import org.xml.sax.SAXException; |
aoqi@0 | 30 | import org.xml.sax.SAXParseException; |
aoqi@0 | 31 | |
aoqi@0 | 32 | import java.io.CharConversionException; |
aoqi@0 | 33 | import java.io.IOException; |
aoqi@0 | 34 | import java.io.InputStream; |
aoqi@0 | 35 | import java.io.InputStreamReader; |
aoqi@0 | 36 | import java.io.Reader; |
aoqi@0 | 37 | import java.io.UnsupportedEncodingException; |
aoqi@0 | 38 | import java.net.URL; |
aoqi@0 | 39 | import java.util.Locale; |
aoqi@0 | 40 | |
aoqi@0 | 41 | /** |
aoqi@0 | 42 | * This is how the parser talks to its input entities, of all kinds. |
aoqi@0 | 43 | * The entities are in a stack. |
aoqi@0 | 44 | * <p/> |
aoqi@0 | 45 | * <P> For internal entities, the character arrays are referenced here, |
aoqi@0 | 46 | * and read from as needed (they're read-only). External entities have |
aoqi@0 | 47 | * mutable buffers, that are read into as needed. |
aoqi@0 | 48 | * <p/> |
aoqi@0 | 49 | * <P> <em>Note:</em> This maps CRLF (and CR) to LF without regard for |
aoqi@0 | 50 | * whether it's in an external (parsed) entity or not. The XML 1.0 spec |
aoqi@0 | 51 | * is inconsistent in explaining EOL handling; this is the sensible way. |
aoqi@0 | 52 | * |
aoqi@0 | 53 | * @author David Brownell |
aoqi@0 | 54 | * @author Janet Koenig |
aoqi@0 | 55 | * @version 1.4 00/08/05 |
aoqi@0 | 56 | */ |
aoqi@0 | 57 | public class InputEntity { |
aoqi@0 | 58 | private int start, finish; |
aoqi@0 | 59 | private char buf []; |
aoqi@0 | 60 | private int lineNumber = 1; |
aoqi@0 | 61 | private boolean returnedFirstHalf = false; |
aoqi@0 | 62 | private boolean maybeInCRLF = false; |
aoqi@0 | 63 | |
aoqi@0 | 64 | // name of entity (never main document or unnamed DTD PE) |
aoqi@0 | 65 | private String name; |
aoqi@0 | 66 | |
aoqi@0 | 67 | private InputEntity next; |
aoqi@0 | 68 | |
aoqi@0 | 69 | // for system and public IDs in diagnostics |
aoqi@0 | 70 | private InputSource input; |
aoqi@0 | 71 | |
aoqi@0 | 72 | // this is a buffer; some buffers can be replenished. |
aoqi@0 | 73 | private Reader reader; |
aoqi@0 | 74 | private boolean isClosed; |
aoqi@0 | 75 | |
aoqi@0 | 76 | private DTDEventListener errHandler; |
aoqi@0 | 77 | private Locale locale; |
aoqi@0 | 78 | |
aoqi@0 | 79 | private StringBuffer rememberedText; |
aoqi@0 | 80 | private int startRemember; |
aoqi@0 | 81 | |
aoqi@0 | 82 | // record if this is a PE, so endParsedEntity won't be called |
aoqi@0 | 83 | private boolean isPE; |
aoqi@0 | 84 | |
aoqi@0 | 85 | // InputStreamReader throws an internal per-read exception, so |
aoqi@0 | 86 | // we minimize reads. We also add a byte to compensate for the |
aoqi@0 | 87 | // "ungetc" byte we keep, so that our downstream reads are as |
aoqi@0 | 88 | // nicely sized as we can make them. |
aoqi@0 | 89 | final private static int BUFSIZ = 8 * 1024 + 1; |
aoqi@0 | 90 | |
aoqi@0 | 91 | final private static char newline [] = {'\n'}; |
aoqi@0 | 92 | |
aoqi@0 | 93 | public static InputEntity getInputEntity(DTDEventListener h, Locale l) { |
aoqi@0 | 94 | InputEntity retval = new InputEntity(); |
aoqi@0 | 95 | retval.errHandler = h; |
aoqi@0 | 96 | retval.locale = l; |
aoqi@0 | 97 | return retval; |
aoqi@0 | 98 | } |
aoqi@0 | 99 | |
aoqi@0 | 100 | private InputEntity() { |
aoqi@0 | 101 | } |
aoqi@0 | 102 | |
aoqi@0 | 103 | // |
aoqi@0 | 104 | // predicate: return true iff this is an internal entity reader, |
aoqi@0 | 105 | // and so may safely be "popped" as needed. external entities have |
aoqi@0 | 106 | // syntax to uphold; internal parameter entities have at most validity |
aoqi@0 | 107 | // constraints to monitor. also, only external entities get decent |
aoqi@0 | 108 | // location diagnostics. |
aoqi@0 | 109 | // |
aoqi@0 | 110 | public boolean isInternal() { |
aoqi@0 | 111 | return reader == null; |
aoqi@0 | 112 | } |
aoqi@0 | 113 | |
aoqi@0 | 114 | // |
aoqi@0 | 115 | // predicate: return true iff this is the toplevel document |
aoqi@0 | 116 | // |
aoqi@0 | 117 | public boolean isDocument() { |
aoqi@0 | 118 | return next == null; |
aoqi@0 | 119 | } |
aoqi@0 | 120 | |
aoqi@0 | 121 | // |
aoqi@0 | 122 | // predicate: return true iff this is a PE expansion (so that |
aoqi@0 | 123 | // LexicalEventListner.endParsedEntity won't be called) |
aoqi@0 | 124 | // |
aoqi@0 | 125 | public boolean isParameterEntity() { |
aoqi@0 | 126 | return isPE; |
aoqi@0 | 127 | } |
aoqi@0 | 128 | |
aoqi@0 | 129 | // |
aoqi@0 | 130 | // return name of current entity |
aoqi@0 | 131 | // |
aoqi@0 | 132 | public String getName() { |
aoqi@0 | 133 | return name; |
aoqi@0 | 134 | } |
aoqi@0 | 135 | |
aoqi@0 | 136 | // |
aoqi@0 | 137 | // use this for an external parsed entity |
aoqi@0 | 138 | // |
aoqi@0 | 139 | public void init(InputSource in, String name, InputEntity stack, |
aoqi@0 | 140 | boolean isPE) |
aoqi@0 | 141 | throws IOException, SAXException { |
aoqi@0 | 142 | |
aoqi@0 | 143 | input = in; |
aoqi@0 | 144 | this.isPE = isPE; |
aoqi@0 | 145 | reader = in.getCharacterStream(); |
aoqi@0 | 146 | |
aoqi@0 | 147 | if (reader == null) { |
aoqi@0 | 148 | InputStream bytes = in.getByteStream(); |
aoqi@0 | 149 | |
aoqi@0 | 150 | if (bytes == null) |
aoqi@0 | 151 | reader = XmlReader.createReader(new URL(in.getSystemId()) |
aoqi@0 | 152 | .openStream()); |
aoqi@0 | 153 | else if (in.getEncoding() != null) |
aoqi@0 | 154 | reader = XmlReader.createReader(in.getByteStream(), |
aoqi@0 | 155 | in.getEncoding()); |
aoqi@0 | 156 | else |
aoqi@0 | 157 | reader = XmlReader.createReader(in.getByteStream()); |
aoqi@0 | 158 | } |
aoqi@0 | 159 | next = stack; |
aoqi@0 | 160 | buf = new char[BUFSIZ]; |
aoqi@0 | 161 | this.name = name; |
aoqi@0 | 162 | checkRecursion(stack); |
aoqi@0 | 163 | } |
aoqi@0 | 164 | |
aoqi@0 | 165 | // |
aoqi@0 | 166 | // use this for an internal parsed entity; buffer is readonly |
aoqi@0 | 167 | // |
aoqi@0 | 168 | public void init(char b [], String name, InputEntity stack, boolean isPE) |
aoqi@0 | 169 | throws SAXException { |
aoqi@0 | 170 | |
aoqi@0 | 171 | next = stack; |
aoqi@0 | 172 | buf = b; |
aoqi@0 | 173 | finish = b.length; |
aoqi@0 | 174 | this.name = name; |
aoqi@0 | 175 | this.isPE = isPE; |
aoqi@0 | 176 | checkRecursion(stack); |
aoqi@0 | 177 | } |
aoqi@0 | 178 | |
aoqi@0 | 179 | private void checkRecursion(InputEntity stack) |
aoqi@0 | 180 | throws SAXException { |
aoqi@0 | 181 | |
aoqi@0 | 182 | if (stack == null) |
aoqi@0 | 183 | return; |
aoqi@0 | 184 | for (stack = stack.next; stack != null; stack = stack.next) { |
aoqi@0 | 185 | if (stack.name != null && stack.name.equals(name)) |
aoqi@0 | 186 | fatal("P-069", new Object[]{name}); |
aoqi@0 | 187 | } |
aoqi@0 | 188 | } |
aoqi@0 | 189 | |
aoqi@0 | 190 | public InputEntity pop() throws IOException { |
aoqi@0 | 191 | |
aoqi@0 | 192 | // caller has ensured there's nothing left to read |
aoqi@0 | 193 | close(); |
aoqi@0 | 194 | return next; |
aoqi@0 | 195 | } |
aoqi@0 | 196 | |
aoqi@0 | 197 | /** |
aoqi@0 | 198 | * returns true iff there's no more data to consume ... |
aoqi@0 | 199 | */ |
aoqi@0 | 200 | public boolean isEOF() throws IOException, SAXException { |
aoqi@0 | 201 | |
aoqi@0 | 202 | // called to ensure WF-ness of included entities and to pop |
aoqi@0 | 203 | // input entities appropriately ... EOF is not always legal. |
aoqi@0 | 204 | if (start >= finish) { |
aoqi@0 | 205 | fillbuf(); |
aoqi@0 | 206 | return start >= finish; |
aoqi@0 | 207 | } else |
aoqi@0 | 208 | return false; |
aoqi@0 | 209 | } |
aoqi@0 | 210 | |
aoqi@0 | 211 | /** |
aoqi@0 | 212 | * Returns the name of the encoding in use, else null; the name |
aoqi@0 | 213 | * returned is in as standard a form as we can get. |
aoqi@0 | 214 | */ |
aoqi@0 | 215 | public String getEncoding() { |
aoqi@0 | 216 | |
aoqi@0 | 217 | if (reader == null) |
aoqi@0 | 218 | return null; |
aoqi@0 | 219 | if (reader instanceof XmlReader) |
aoqi@0 | 220 | return ((XmlReader) reader).getEncoding(); |
aoqi@0 | 221 | |
aoqi@0 | 222 | // XXX prefer a java2std() call to normalize names... |
aoqi@0 | 223 | |
aoqi@0 | 224 | if (reader instanceof InputStreamReader) |
aoqi@0 | 225 | return ((InputStreamReader) reader).getEncoding(); |
aoqi@0 | 226 | return null; |
aoqi@0 | 227 | } |
aoqi@0 | 228 | |
aoqi@0 | 229 | |
aoqi@0 | 230 | /** |
aoqi@0 | 231 | * returns the next name char, or NUL ... faster than getc(), |
aoqi@0 | 232 | * and the common "name or nmtoken must be next" case won't |
aoqi@0 | 233 | * need ungetc(). |
aoqi@0 | 234 | */ |
aoqi@0 | 235 | public char getNameChar() throws IOException, SAXException { |
aoqi@0 | 236 | |
aoqi@0 | 237 | if (finish <= start) |
aoqi@0 | 238 | fillbuf(); |
aoqi@0 | 239 | if (finish > start) { |
aoqi@0 | 240 | char c = buf[start++]; |
aoqi@0 | 241 | if (XmlChars.isNameChar(c)) |
aoqi@0 | 242 | return c; |
aoqi@0 | 243 | start--; |
aoqi@0 | 244 | } |
aoqi@0 | 245 | return 0; |
aoqi@0 | 246 | } |
aoqi@0 | 247 | |
aoqi@0 | 248 | /** |
aoqi@0 | 249 | * gets the next Java character -- might be part of an XML |
aoqi@0 | 250 | * text character represented by a surrogate pair, or be |
aoqi@0 | 251 | * the end of the entity. |
aoqi@0 | 252 | */ |
aoqi@0 | 253 | public char getc() throws IOException, SAXException { |
aoqi@0 | 254 | |
aoqi@0 | 255 | if (finish <= start) |
aoqi@0 | 256 | fillbuf(); |
aoqi@0 | 257 | if (finish > start) { |
aoqi@0 | 258 | char c = buf[start++]; |
aoqi@0 | 259 | |
aoqi@0 | 260 | // [2] Char ::= #x0009 | #x000A | #x000D |
aoqi@0 | 261 | // | [#x0020-#xD7FF] |
aoqi@0 | 262 | // | [#xE000-#xFFFD] |
aoqi@0 | 263 | // plus surrogate _pairs_ representing [#x10000-#x10ffff] |
aoqi@0 | 264 | if (returnedFirstHalf) { |
aoqi@0 | 265 | if (c >= 0xdc00 && c <= 0xdfff) { |
aoqi@0 | 266 | returnedFirstHalf = false; |
aoqi@0 | 267 | return c; |
aoqi@0 | 268 | } else |
aoqi@0 | 269 | fatal("P-070", new Object[]{Integer.toHexString(c)}); |
aoqi@0 | 270 | } |
aoqi@0 | 271 | if ((c >= 0x0020 && c <= 0xD7FF) |
aoqi@0 | 272 | || c == 0x0009 |
aoqi@0 | 273 | // no surrogates! |
aoqi@0 | 274 | || (c >= 0xE000 && c <= 0xFFFD)) |
aoqi@0 | 275 | return c; |
aoqi@0 | 276 | |
aoqi@0 | 277 | // |
aoqi@0 | 278 | // CRLF and CR are both line ends; map both to LF, and |
aoqi@0 | 279 | // keep line count correct. |
aoqi@0 | 280 | // |
aoqi@0 | 281 | else if (c == '\r' && !isInternal()) { |
aoqi@0 | 282 | maybeInCRLF = true; |
aoqi@0 | 283 | c = getc(); |
aoqi@0 | 284 | if (c != '\n') |
aoqi@0 | 285 | ungetc(); |
aoqi@0 | 286 | maybeInCRLF = false; |
aoqi@0 | 287 | |
aoqi@0 | 288 | lineNumber++; |
aoqi@0 | 289 | return '\n'; |
aoqi@0 | 290 | |
aoqi@0 | 291 | } else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF |
aoqi@0 | 292 | if (!isInternal() && !maybeInCRLF) |
aoqi@0 | 293 | lineNumber++; |
aoqi@0 | 294 | return c; |
aoqi@0 | 295 | } |
aoqi@0 | 296 | |
aoqi@0 | 297 | // surrogates... |
aoqi@0 | 298 | if (c >= 0xd800 && c < 0xdc00) { |
aoqi@0 | 299 | returnedFirstHalf = true; |
aoqi@0 | 300 | return c; |
aoqi@0 | 301 | } |
aoqi@0 | 302 | |
aoqi@0 | 303 | fatal("P-071", new Object[]{Integer.toHexString(c)}); |
aoqi@0 | 304 | } |
aoqi@0 | 305 | throw new EndOfInputException(); |
aoqi@0 | 306 | } |
aoqi@0 | 307 | |
aoqi@0 | 308 | |
aoqi@0 | 309 | /** |
aoqi@0 | 310 | * lookahead one character |
aoqi@0 | 311 | */ |
aoqi@0 | 312 | public boolean peekc(char c) throws IOException, SAXException { |
aoqi@0 | 313 | |
aoqi@0 | 314 | if (finish <= start) |
aoqi@0 | 315 | fillbuf(); |
aoqi@0 | 316 | if (finish > start) { |
aoqi@0 | 317 | if (buf[start] == c) { |
aoqi@0 | 318 | start++; |
aoqi@0 | 319 | return true; |
aoqi@0 | 320 | } else |
aoqi@0 | 321 | return false; |
aoqi@0 | 322 | } |
aoqi@0 | 323 | return false; |
aoqi@0 | 324 | } |
aoqi@0 | 325 | |
aoqi@0 | 326 | |
aoqi@0 | 327 | /** |
aoqi@0 | 328 | * two character pushback is guaranteed |
aoqi@0 | 329 | */ |
aoqi@0 | 330 | public void ungetc() { |
aoqi@0 | 331 | |
aoqi@0 | 332 | if (start == 0) |
aoqi@0 | 333 | throw new InternalError("ungetc"); |
aoqi@0 | 334 | start--; |
aoqi@0 | 335 | |
aoqi@0 | 336 | if (buf[start] == '\n' || buf[start] == '\r') { |
aoqi@0 | 337 | if (!isInternal()) |
aoqi@0 | 338 | lineNumber--; |
aoqi@0 | 339 | } else if (returnedFirstHalf) |
aoqi@0 | 340 | returnedFirstHalf = false; |
aoqi@0 | 341 | } |
aoqi@0 | 342 | |
aoqi@0 | 343 | |
aoqi@0 | 344 | /** |
aoqi@0 | 345 | * optional grammatical whitespace (discarded) |
aoqi@0 | 346 | */ |
aoqi@0 | 347 | public boolean maybeWhitespace() |
aoqi@0 | 348 | throws IOException, SAXException { |
aoqi@0 | 349 | |
aoqi@0 | 350 | char c; |
aoqi@0 | 351 | boolean isSpace = false; |
aoqi@0 | 352 | boolean sawCR = false; |
aoqi@0 | 353 | |
aoqi@0 | 354 | // [3] S ::= #20 | #09 | #0D | #0A |
aoqi@0 | 355 | for (; ;) { |
aoqi@0 | 356 | if (finish <= start) |
aoqi@0 | 357 | fillbuf(); |
aoqi@0 | 358 | if (finish <= start) |
aoqi@0 | 359 | return isSpace; |
aoqi@0 | 360 | |
aoqi@0 | 361 | c = buf[start++]; |
aoqi@0 | 362 | if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') { |
aoqi@0 | 363 | isSpace = true; |
aoqi@0 | 364 | |
aoqi@0 | 365 | // |
aoqi@0 | 366 | // CR, LF are line endings ... CLRF is one, not two! |
aoqi@0 | 367 | // |
aoqi@0 | 368 | if ((c == '\n' || c == '\r') && !isInternal()) { |
aoqi@0 | 369 | if (!(c == '\n' && sawCR)) { |
aoqi@0 | 370 | lineNumber++; |
aoqi@0 | 371 | sawCR = false; |
aoqi@0 | 372 | } |
aoqi@0 | 373 | if (c == '\r') |
aoqi@0 | 374 | sawCR = true; |
aoqi@0 | 375 | } |
aoqi@0 | 376 | } else { |
aoqi@0 | 377 | start--; |
aoqi@0 | 378 | return isSpace; |
aoqi@0 | 379 | } |
aoqi@0 | 380 | } |
aoqi@0 | 381 | } |
aoqi@0 | 382 | |
aoqi@0 | 383 | |
aoqi@0 | 384 | /** |
aoqi@0 | 385 | * normal content; whitespace in markup may be handled |
aoqi@0 | 386 | * specially if the parser uses the content model. |
aoqi@0 | 387 | * <p/> |
aoqi@0 | 388 | * <P> content terminates with markup delimiter characters, |
aoqi@0 | 389 | * namely ampersand (&amp;) and left angle bracket (&lt;). |
aoqi@0 | 390 | * <p/> |
aoqi@0 | 391 | * <P> the document handler's characters() method is called |
aoqi@0 | 392 | * on all the content found |
aoqi@0 | 393 | */ |
aoqi@0 | 394 | public boolean parsedContent(DTDEventListener docHandler |
aoqi@0 | 395 | /*ElementValidator validator*/) |
aoqi@0 | 396 | throws IOException, SAXException { |
aoqi@0 | 397 | |
aoqi@0 | 398 | // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) |
aoqi@0 | 399 | |
aoqi@0 | 400 | int first; // first char to return |
aoqi@0 | 401 | int last; // last char to return |
aoqi@0 | 402 | boolean sawContent; // sent any chars? |
aoqi@0 | 403 | char c; |
aoqi@0 | 404 | |
aoqi@0 | 405 | // deliver right out of the buffer, until delimiter, EOF, |
aoqi@0 | 406 | // or error, refilling as we go |
aoqi@0 | 407 | for (first = last = start, sawContent = false; ; last++) { |
aoqi@0 | 408 | |
aoqi@0 | 409 | // buffer empty? |
aoqi@0 | 410 | if (last >= finish) { |
aoqi@0 | 411 | if (last > first) { |
aoqi@0 | 412 | // validator.text (); |
aoqi@0 | 413 | docHandler.characters(buf, first, last - first); |
aoqi@0 | 414 | sawContent = true; |
aoqi@0 | 415 | start = last; |
aoqi@0 | 416 | } |
aoqi@0 | 417 | if (isEOF()) // calls fillbuf |
aoqi@0 | 418 | return sawContent; |
aoqi@0 | 419 | first = start; |
aoqi@0 | 420 | last = first - 1; // incremented in loop |
aoqi@0 | 421 | continue; |
aoqi@0 | 422 | } |
aoqi@0 | 423 | |
aoqi@0 | 424 | c = buf[last]; |
aoqi@0 | 425 | |
aoqi@0 | 426 | // |
aoqi@0 | 427 | // pass most chars through ASAP; this inlines the code of |
aoqi@0 | 428 | // [2] !XmlChars.isChar(c) leaving only characters needing |
aoqi@0 | 429 | // special treatment ... line ends, surrogates, and: |
aoqi@0 | 430 | // 0x0026 == '&' |
aoqi@0 | 431 | // 0x003C == '<' |
aoqi@0 | 432 | // 0x005D == ']' |
aoqi@0 | 433 | // Comparisons ordered for speed on 'typical' text |
aoqi@0 | 434 | // |
aoqi@0 | 435 | if ((c > 0x005D && c <= 0xD7FF) // a-z and more |
aoqi@0 | 436 | || (c < 0x0026 && c >= 0x0020) // space & punct |
aoqi@0 | 437 | || (c > 0x003C && c < 0x005D) // A-Z & punct |
aoqi@0 | 438 | || (c > 0x0026 && c < 0x003C) // 0-9 & punct |
aoqi@0 | 439 | || c == 0x0009 |
aoqi@0 | 440 | || (c >= 0xE000 && c <= 0xFFFD) |
aoqi@0 | 441 | ) |
aoqi@0 | 442 | continue; |
aoqi@0 | 443 | |
aoqi@0 | 444 | // terminate on markup delimiters |
aoqi@0 | 445 | if (c == '<' || c == '&') |
aoqi@0 | 446 | break; |
aoqi@0 | 447 | |
aoqi@0 | 448 | // count lines |
aoqi@0 | 449 | if (c == '\n') { |
aoqi@0 | 450 | if (!isInternal()) |
aoqi@0 | 451 | lineNumber++; |
aoqi@0 | 452 | continue; |
aoqi@0 | 453 | } |
aoqi@0 | 454 | |
aoqi@0 | 455 | // External entities get CR, CRLF --> LF mapping |
aoqi@0 | 456 | // Internal ones got it already, and we can't repeat |
aoqi@0 | 457 | // else we break char ref handling!! |
aoqi@0 | 458 | if (c == '\r') { |
aoqi@0 | 459 | if (isInternal()) |
aoqi@0 | 460 | continue; |
aoqi@0 | 461 | |
aoqi@0 | 462 | docHandler.characters(buf, first, last - first); |
aoqi@0 | 463 | docHandler.characters(newline, 0, 1); |
aoqi@0 | 464 | sawContent = true; |
aoqi@0 | 465 | lineNumber++; |
aoqi@0 | 466 | if (finish > (last + 1)) { |
aoqi@0 | 467 | if (buf[last + 1] == '\n') |
aoqi@0 | 468 | last++; |
aoqi@0 | 469 | } else { // CR at end of buffer |
aoqi@0 | 470 | // XXX case not yet handled: CRLF here will look like two lines |
aoqi@0 | 471 | } |
aoqi@0 | 472 | first = start = last + 1; |
aoqi@0 | 473 | continue; |
aoqi@0 | 474 | } |
aoqi@0 | 475 | |
aoqi@0 | 476 | // ']]>' is a WF error -- must fail if we see it |
aoqi@0 | 477 | if (c == ']') { |
aoqi@0 | 478 | switch (finish - last) { |
aoqi@0 | 479 | // for suspicious end-of-buffer cases, get more data |
aoqi@0 | 480 | // into the buffer to rule out this sequence. |
aoqi@0 | 481 | case 2: |
aoqi@0 | 482 | if (buf[last + 1] != ']') |
aoqi@0 | 483 | continue; |
aoqi@0 | 484 | // FALLTHROUGH |
aoqi@0 | 485 | |
aoqi@0 | 486 | case 1: |
aoqi@0 | 487 | if (reader == null || isClosed) |
aoqi@0 | 488 | continue; |
aoqi@0 | 489 | if (last == first) |
aoqi@0 | 490 | throw new InternalError("fillbuf"); |
aoqi@0 | 491 | last--; |
aoqi@0 | 492 | if (last > first) { |
aoqi@0 | 493 | // validator.text (); |
aoqi@0 | 494 | docHandler.characters(buf, first, last - first); |
aoqi@0 | 495 | sawContent = true; |
aoqi@0 | 496 | start = last; |
aoqi@0 | 497 | } |
aoqi@0 | 498 | fillbuf(); |
aoqi@0 | 499 | first = last = start; |
aoqi@0 | 500 | continue; |
aoqi@0 | 501 | |
aoqi@0 | 502 | // otherwise any "]]>" would be buffered, and we can |
aoqi@0 | 503 | // see right away if that's what we have |
aoqi@0 | 504 | default: |
aoqi@0 | 505 | if (buf[last + 1] == ']' && buf[last + 2] == '>') |
aoqi@0 | 506 | fatal("P-072", null); |
aoqi@0 | 507 | continue; |
aoqi@0 | 508 | } |
aoqi@0 | 509 | } |
aoqi@0 | 510 | |
aoqi@0 | 511 | // correctly paired surrogates are OK |
aoqi@0 | 512 | if (c >= 0xd800 && c <= 0xdfff) { |
aoqi@0 | 513 | if ((last + 1) >= finish) { |
aoqi@0 | 514 | if (last > first) { |
aoqi@0 | 515 | // validator.text (); |
aoqi@0 | 516 | docHandler.characters(buf, first, last - first); |
aoqi@0 | 517 | sawContent = true; |
aoqi@0 | 518 | start = last + 1; |
aoqi@0 | 519 | } |
aoqi@0 | 520 | if (isEOF()) { // calls fillbuf |
aoqi@0 | 521 | fatal("P-081", |
aoqi@0 | 522 | new Object[]{Integer.toHexString(c)}); |
aoqi@0 | 523 | } |
aoqi@0 | 524 | first = start; |
aoqi@0 | 525 | last = first; |
aoqi@0 | 526 | continue; |
aoqi@0 | 527 | } |
aoqi@0 | 528 | if (checkSurrogatePair(last)) |
aoqi@0 | 529 | last++; |
aoqi@0 | 530 | else { |
aoqi@0 | 531 | last--; |
aoqi@0 | 532 | // also terminate on surrogate pair oddities |
aoqi@0 | 533 | break; |
aoqi@0 | 534 | } |
aoqi@0 | 535 | continue; |
aoqi@0 | 536 | } |
aoqi@0 | 537 | |
aoqi@0 | 538 | fatal("P-071", new Object[]{Integer.toHexString(c)}); |
aoqi@0 | 539 | } |
aoqi@0 | 540 | if (last == first) |
aoqi@0 | 541 | return sawContent; |
aoqi@0 | 542 | // validator.text (); |
aoqi@0 | 543 | docHandler.characters(buf, first, last - first); |
aoqi@0 | 544 | start = last; |
aoqi@0 | 545 | return true; |
aoqi@0 | 546 | } |
aoqi@0 | 547 | |
aoqi@0 | 548 | |
aoqi@0 | 549 | /** |
aoqi@0 | 550 | * CDATA -- character data, terminated by "]]>" and optionally |
aoqi@0 | 551 | * including unescaped markup delimiters (ampersand and left angle |
aoqi@0 | 552 | * bracket). This should otherwise be exactly like character data, |
aoqi@0 | 553 | * modulo differences in error report details. |
aoqi@0 | 554 | * <p/> |
aoqi@0 | 555 | * <P> The document handler's characters() or ignorableWhitespace() |
aoqi@0 | 556 | * methods are invoked on all the character data found |
aoqi@0 | 557 | * |
aoqi@0 | 558 | * @param docHandler gets callbacks for character data |
aoqi@0 | 559 | * @param ignorableWhitespace if true, whitespace characters will |
aoqi@0 | 560 | * be reported using docHandler.ignorableWhitespace(); implicitly, |
aoqi@0 | 561 | * non-whitespace characters will cause validation errors |
aoqi@0 | 562 | * @param whitespaceInvalidMessage if true, ignorable whitespace |
aoqi@0 | 563 | * causes a validity error report as well as a callback |
aoqi@0 | 564 | */ |
aoqi@0 | 565 | public boolean unparsedContent(DTDEventListener docHandler, |
aoqi@0 | 566 | /*ElementValidator validator,*/ |
aoqi@0 | 567 | boolean ignorableWhitespace, |
aoqi@0 | 568 | String whitespaceInvalidMessage) |
aoqi@0 | 569 | throws IOException, SAXException { |
aoqi@0 | 570 | |
aoqi@0 | 571 | // [18] CDSect ::= CDStart CData CDEnd |
aoqi@0 | 572 | // [19] CDStart ::= '<![CDATA[' |
aoqi@0 | 573 | // [20] CData ::= (Char* - (Char* ']]>' Char*)) |
aoqi@0 | 574 | // [21] CDEnd ::= ']]>' |
aoqi@0 | 575 | |
aoqi@0 | 576 | // caller peeked the leading '<' ... |
aoqi@0 | 577 | if (!peek("![CDATA[", null)) |
aoqi@0 | 578 | return false; |
aoqi@0 | 579 | docHandler.startCDATA(); |
aoqi@0 | 580 | |
aoqi@0 | 581 | // only a literal ']]>' stops this ... |
aoqi@0 | 582 | int last; |
aoqi@0 | 583 | |
aoqi@0 | 584 | for (; ;) { // until ']]>' seen |
aoqi@0 | 585 | boolean done = false; |
aoqi@0 | 586 | char c; |
aoqi@0 | 587 | |
aoqi@0 | 588 | // don't report ignorable whitespace as "text" for |
aoqi@0 | 589 | // validation purposes. |
aoqi@0 | 590 | boolean white = ignorableWhitespace; |
aoqi@0 | 591 | |
aoqi@0 | 592 | for (last = start; last < finish; last++) { |
aoqi@0 | 593 | c = buf[last]; |
aoqi@0 | 594 | |
aoqi@0 | 595 | // |
aoqi@0 | 596 | // Reject illegal characters. |
aoqi@0 | 597 | // |
aoqi@0 | 598 | if (!XmlChars.isChar(c)) { |
aoqi@0 | 599 | white = false; |
aoqi@0 | 600 | if (c >= 0xd800 && c <= 0xdfff) { |
aoqi@0 | 601 | if (checkSurrogatePair(last)) { |
aoqi@0 | 602 | last++; |
aoqi@0 | 603 | continue; |
aoqi@0 | 604 | } else { |
aoqi@0 | 605 | last--; |
aoqi@0 | 606 | break; |
aoqi@0 | 607 | } |
aoqi@0 | 608 | } |
aoqi@0 | 609 | fatal("P-071", new Object[] |
aoqi@0 | 610 | {Integer.toHexString(buf[last])}); |
aoqi@0 | 611 | } |
aoqi@0 | 612 | if (c == '\n') { |
aoqi@0 | 613 | if (!isInternal()) |
aoqi@0 | 614 | lineNumber++; |
aoqi@0 | 615 | continue; |
aoqi@0 | 616 | } |
aoqi@0 | 617 | if (c == '\r') { |
aoqi@0 | 618 | // As above, we can't repeat CR/CRLF --> LF mapping |
aoqi@0 | 619 | if (isInternal()) |
aoqi@0 | 620 | continue; |
aoqi@0 | 621 | |
aoqi@0 | 622 | if (white) { |
aoqi@0 | 623 | if (whitespaceInvalidMessage != null) |
aoqi@0 | 624 | errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale, |
aoqi@0 | 625 | whitespaceInvalidMessage), null)); |
aoqi@0 | 626 | docHandler.ignorableWhitespace(buf, start, |
aoqi@0 | 627 | last - start); |
aoqi@0 | 628 | docHandler.ignorableWhitespace(newline, 0, 1); |
aoqi@0 | 629 | } else { |
aoqi@0 | 630 | // validator.text (); |
aoqi@0 | 631 | docHandler.characters(buf, start, last - start); |
aoqi@0 | 632 | docHandler.characters(newline, 0, 1); |
aoqi@0 | 633 | } |
aoqi@0 | 634 | lineNumber++; |
aoqi@0 | 635 | if (finish > (last + 1)) { |
aoqi@0 | 636 | if (buf[last + 1] == '\n') |
aoqi@0 | 637 | last++; |
aoqi@0 | 638 | } else { // CR at end of buffer |
aoqi@0 | 639 | // XXX case not yet handled ... as above |
aoqi@0 | 640 | } |
aoqi@0 | 641 | start = last + 1; |
aoqi@0 | 642 | continue; |
aoqi@0 | 643 | } |
aoqi@0 | 644 | if (c != ']') { |
aoqi@0 | 645 | if (c != ' ' && c != '\t') |
aoqi@0 | 646 | white = false; |
aoqi@0 | 647 | continue; |
aoqi@0 | 648 | } |
aoqi@0 | 649 | if ((last + 2) < finish) { |
aoqi@0 | 650 | if (buf[last + 1] == ']' && buf[last + 2] == '>') { |
aoqi@0 | 651 | done = true; |
aoqi@0 | 652 | break; |
aoqi@0 | 653 | } |
aoqi@0 | 654 | white = false; |
aoqi@0 | 655 | continue; |
aoqi@0 | 656 | } else { |
aoqi@0 | 657 | //last--; |
aoqi@0 | 658 | break; |
aoqi@0 | 659 | } |
aoqi@0 | 660 | } |
aoqi@0 | 661 | if (white) { |
aoqi@0 | 662 | if (whitespaceInvalidMessage != null) |
aoqi@0 | 663 | errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale, |
aoqi@0 | 664 | whitespaceInvalidMessage), null)); |
aoqi@0 | 665 | docHandler.ignorableWhitespace(buf, start, last - start); |
aoqi@0 | 666 | } else { |
aoqi@0 | 667 | // validator.text (); |
aoqi@0 | 668 | docHandler.characters(buf, start, last - start); |
aoqi@0 | 669 | } |
aoqi@0 | 670 | if (done) { |
aoqi@0 | 671 | start = last + 3; |
aoqi@0 | 672 | break; |
aoqi@0 | 673 | } |
aoqi@0 | 674 | start = last; |
aoqi@0 | 675 | if (isEOF()) |
aoqi@0 | 676 | fatal("P-073", null); |
aoqi@0 | 677 | } |
aoqi@0 | 678 | docHandler.endCDATA(); |
aoqi@0 | 679 | return true; |
aoqi@0 | 680 | } |
aoqi@0 | 681 | |
aoqi@0 | 682 | // return false to backstep at end of buffer) |
aoqi@0 | 683 | private boolean checkSurrogatePair(int offset) |
aoqi@0 | 684 | throws SAXException { |
aoqi@0 | 685 | |
aoqi@0 | 686 | if ((offset + 1) >= finish) |
aoqi@0 | 687 | return false; |
aoqi@0 | 688 | |
aoqi@0 | 689 | char c1 = buf[offset++]; |
aoqi@0 | 690 | char c2 = buf[offset]; |
aoqi@0 | 691 | |
aoqi@0 | 692 | if ((c1 >= 0xd800 && c1 < 0xdc00) && (c2 >= 0xdc00 && c2 <= 0xdfff)) |
aoqi@0 | 693 | return true; |
aoqi@0 | 694 | fatal("P-074", new Object[]{ |
aoqi@0 | 695 | Integer.toHexString(c1 & 0x0ffff), |
aoqi@0 | 696 | Integer.toHexString(c2 & 0x0ffff) |
aoqi@0 | 697 | }); |
aoqi@0 | 698 | return false; |
aoqi@0 | 699 | } |
aoqi@0 | 700 | |
aoqi@0 | 701 | |
aoqi@0 | 702 | /** |
aoqi@0 | 703 | * whitespace in markup (flagged to app, discardable) |
aoqi@0 | 704 | * <p/> |
aoqi@0 | 705 | * <P> the document handler's ignorableWhitespace() method |
aoqi@0 | 706 | * is called on all the whitespace found |
aoqi@0 | 707 | */ |
aoqi@0 | 708 | public boolean ignorableWhitespace(DTDEventListener handler) |
aoqi@0 | 709 | throws IOException, SAXException { |
aoqi@0 | 710 | |
aoqi@0 | 711 | char c; |
aoqi@0 | 712 | boolean isSpace = false; |
aoqi@0 | 713 | int first; |
aoqi@0 | 714 | |
aoqi@0 | 715 | // [3] S ::= #20 | #09 | #0D | #0A |
aoqi@0 | 716 | for (first = start; ;) { |
aoqi@0 | 717 | if (finish <= start) { |
aoqi@0 | 718 | if (isSpace) |
aoqi@0 | 719 | handler.ignorableWhitespace(buf, first, start - first); |
aoqi@0 | 720 | fillbuf(); |
aoqi@0 | 721 | first = start; |
aoqi@0 | 722 | } |
aoqi@0 | 723 | if (finish <= start) |
aoqi@0 | 724 | return isSpace; |
aoqi@0 | 725 | |
aoqi@0 | 726 | c = buf[start++]; |
aoqi@0 | 727 | switch (c) { |
aoqi@0 | 728 | case '\n': |
aoqi@0 | 729 | if (!isInternal()) |
aoqi@0 | 730 | lineNumber++; |
aoqi@0 | 731 | // XXX handles Macintosh line endings wrong |
aoqi@0 | 732 | // fallthrough |
aoqi@0 | 733 | case 0x09: |
aoqi@0 | 734 | case 0x20: |
aoqi@0 | 735 | isSpace = true; |
aoqi@0 | 736 | continue; |
aoqi@0 | 737 | |
aoqi@0 | 738 | case '\r': |
aoqi@0 | 739 | isSpace = true; |
aoqi@0 | 740 | if (!isInternal()) |
aoqi@0 | 741 | lineNumber++; |
aoqi@0 | 742 | handler.ignorableWhitespace(buf, first, |
aoqi@0 | 743 | (start - 1) - first); |
aoqi@0 | 744 | handler.ignorableWhitespace(newline, 0, 1); |
aoqi@0 | 745 | if (start < finish && buf[start] == '\n') |
aoqi@0 | 746 | ++start; |
aoqi@0 | 747 | first = start; |
aoqi@0 | 748 | continue; |
aoqi@0 | 749 | |
aoqi@0 | 750 | default: |
aoqi@0 | 751 | ungetc(); |
aoqi@0 | 752 | if (isSpace) |
aoqi@0 | 753 | handler.ignorableWhitespace(buf, first, start - first); |
aoqi@0 | 754 | return isSpace; |
aoqi@0 | 755 | } |
aoqi@0 | 756 | } |
aoqi@0 | 757 | } |
aoqi@0 | 758 | |
aoqi@0 | 759 | /** |
aoqi@0 | 760 | * returns false iff 'next' string isn't as provided, |
aoqi@0 | 761 | * else skips that text and returns true. |
aoqi@0 | 762 | * <p/> |
aoqi@0 | 763 | * <P> NOTE: two alternative string representations are |
aoqi@0 | 764 | * both passed in, since one is faster. |
aoqi@0 | 765 | */ |
aoqi@0 | 766 | public boolean peek(String next, char chars []) |
aoqi@0 | 767 | throws IOException, SAXException { |
aoqi@0 | 768 | |
aoqi@0 | 769 | int len; |
aoqi@0 | 770 | int i; |
aoqi@0 | 771 | |
aoqi@0 | 772 | if (chars != null) |
aoqi@0 | 773 | len = chars.length; |
aoqi@0 | 774 | else |
aoqi@0 | 775 | len = next.length(); |
aoqi@0 | 776 | |
aoqi@0 | 777 | // buffer should hold the whole thing ... give it a |
aoqi@0 | 778 | // chance for the end-of-buffer case and cope with EOF |
aoqi@0 | 779 | // by letting fillbuf compact and fill |
aoqi@0 | 780 | if (finish <= start || (finish - start) < len) |
aoqi@0 | 781 | fillbuf(); |
aoqi@0 | 782 | |
aoqi@0 | 783 | // can't peek past EOF |
aoqi@0 | 784 | if (finish <= start) |
aoqi@0 | 785 | return false; |
aoqi@0 | 786 | |
aoqi@0 | 787 | // compare the string; consume iff it matches |
aoqi@0 | 788 | if (chars != null) { |
aoqi@0 | 789 | for (i = 0; i < len && (start + i) < finish; i++) { |
aoqi@0 | 790 | if (buf[start + i] != chars[i]) |
aoqi@0 | 791 | return false; |
aoqi@0 | 792 | } |
aoqi@0 | 793 | } else { |
aoqi@0 | 794 | for (i = 0; i < len && (start + i) < finish; i++) { |
aoqi@0 | 795 | if (buf[start + i] != next.charAt(i)) |
aoqi@0 | 796 | return false; |
aoqi@0 | 797 | } |
aoqi@0 | 798 | } |
aoqi@0 | 799 | |
aoqi@0 | 800 | // if the first fillbuf didn't get enough data, give |
aoqi@0 | 801 | // fillbuf another chance to read |
aoqi@0 | 802 | if (i < len) { |
aoqi@0 | 803 | if (reader == null || isClosed) |
aoqi@0 | 804 | return false; |
aoqi@0 | 805 | |
aoqi@0 | 806 | // |
aoqi@0 | 807 | // This diagnostic "knows" that the only way big strings would |
aoqi@0 | 808 | // fail to be peeked is where it's a symbol ... e.g. for an |
aoqi@0 | 809 | // </EndTag> construct. That knowledge could also be applied |
aoqi@0 | 810 | // to get rid of the symbol length constraint, since having |
aoqi@0 | 811 | // the wrong symbol is a fatal error anyway ... |
aoqi@0 | 812 | // |
aoqi@0 | 813 | if (len > buf.length) |
aoqi@0 | 814 | fatal("P-077", new Object[]{new Integer(buf.length)}); |
aoqi@0 | 815 | |
aoqi@0 | 816 | fillbuf(); |
aoqi@0 | 817 | return peek(next, chars); |
aoqi@0 | 818 | } |
aoqi@0 | 819 | |
aoqi@0 | 820 | start += len; |
aoqi@0 | 821 | return true; |
aoqi@0 | 822 | } |
aoqi@0 | 823 | |
aoqi@0 | 824 | |
aoqi@0 | 825 | // |
aoqi@0 | 826 | // Support for reporting the internal DTD subset, so <!DOCTYPE...> |
aoqi@0 | 827 | // declarations can be recreated. This is collected as a single |
aoqi@0 | 828 | // string; such subsets are normally small, and many applications |
aoqi@0 | 829 | // don't even care about this. |
aoqi@0 | 830 | // |
aoqi@0 | 831 | public void startRemembering() { |
aoqi@0 | 832 | |
aoqi@0 | 833 | if (startRemember != 0) |
aoqi@0 | 834 | throw new InternalError(); |
aoqi@0 | 835 | startRemember = start; |
aoqi@0 | 836 | } |
aoqi@0 | 837 | |
aoqi@0 | 838 | public String rememberText() { |
aoqi@0 | 839 | |
aoqi@0 | 840 | String retval; |
aoqi@0 | 841 | |
aoqi@0 | 842 | // If the internal subset crossed a buffer boundary, we |
aoqi@0 | 843 | // created a temporary buffer. |
aoqi@0 | 844 | if (rememberedText != null) { |
aoqi@0 | 845 | rememberedText.append(buf, startRemember, |
aoqi@0 | 846 | start - startRemember); |
aoqi@0 | 847 | retval = rememberedText.toString(); |
aoqi@0 | 848 | } else |
aoqi@0 | 849 | retval = new String(buf, startRemember, |
aoqi@0 | 850 | start - startRemember); |
aoqi@0 | 851 | |
aoqi@0 | 852 | startRemember = 0; |
aoqi@0 | 853 | rememberedText = null; |
aoqi@0 | 854 | return retval; |
aoqi@0 | 855 | } |
aoqi@0 | 856 | |
aoqi@0 | 857 | private InputEntity getTopEntity() { |
aoqi@0 | 858 | |
aoqi@0 | 859 | InputEntity current = this; |
aoqi@0 | 860 | |
aoqi@0 | 861 | // don't report locations within internal entities! |
aoqi@0 | 862 | |
aoqi@0 | 863 | while (current != null && current.input == null) |
aoqi@0 | 864 | current = current.next; |
aoqi@0 | 865 | return current == null ? this : current; |
aoqi@0 | 866 | } |
aoqi@0 | 867 | |
aoqi@0 | 868 | /** |
aoqi@0 | 869 | * Returns the public ID of this input source, if known |
aoqi@0 | 870 | */ |
aoqi@0 | 871 | public String getPublicId() { |
aoqi@0 | 872 | |
aoqi@0 | 873 | InputEntity where = getTopEntity(); |
aoqi@0 | 874 | if (where == this) |
aoqi@0 | 875 | return input.getPublicId(); |
aoqi@0 | 876 | return where.getPublicId(); |
aoqi@0 | 877 | } |
aoqi@0 | 878 | |
aoqi@0 | 879 | /** |
aoqi@0 | 880 | * Returns the system ID of this input source, if known |
aoqi@0 | 881 | */ |
aoqi@0 | 882 | public String getSystemId() { |
aoqi@0 | 883 | |
aoqi@0 | 884 | InputEntity where = getTopEntity(); |
aoqi@0 | 885 | if (where == this) |
aoqi@0 | 886 | return input.getSystemId(); |
aoqi@0 | 887 | return where.getSystemId(); |
aoqi@0 | 888 | } |
aoqi@0 | 889 | |
aoqi@0 | 890 | /** |
aoqi@0 | 891 | * Returns the current line number in this input source |
aoqi@0 | 892 | */ |
aoqi@0 | 893 | public int getLineNumber() { |
aoqi@0 | 894 | |
aoqi@0 | 895 | InputEntity where = getTopEntity(); |
aoqi@0 | 896 | if (where == this) |
aoqi@0 | 897 | return lineNumber; |
aoqi@0 | 898 | return where.getLineNumber(); |
aoqi@0 | 899 | } |
aoqi@0 | 900 | |
aoqi@0 | 901 | /** |
aoqi@0 | 902 | * returns -1; maintaining column numbers hurts performance |
aoqi@0 | 903 | */ |
aoqi@0 | 904 | public int getColumnNumber() { |
aoqi@0 | 905 | |
aoqi@0 | 906 | return -1; // not maintained (speed) |
aoqi@0 | 907 | } |
aoqi@0 | 908 | |
aoqi@0 | 909 | |
aoqi@0 | 910 | // |
aoqi@0 | 911 | // n.b. for non-EOF end-of-buffer cases, reader should return |
aoqi@0 | 912 | // at least a handful of bytes so various lookaheads behave. |
aoqi@0 | 913 | // |
aoqi@0 | 914 | // two character pushback exists except at first; characters |
aoqi@0 | 915 | // represented by surrogate pairs can't be pushed back (they'd |
aoqi@0 | 916 | // only be in character data anyway). |
aoqi@0 | 917 | // |
aoqi@0 | 918 | // DTD exception thrown on char conversion problems; line number |
aoqi@0 | 919 | // will be low, as a rule. |
aoqi@0 | 920 | // |
aoqi@0 | 921 | private void fillbuf() throws IOException, SAXException { |
aoqi@0 | 922 | |
aoqi@0 | 923 | // don't touched fixed buffers, that'll usually |
aoqi@0 | 924 | // change entity values (and isn't needed anyway) |
aoqi@0 | 925 | // likewise, ignore closed streams |
aoqi@0 | 926 | if (reader == null || isClosed) |
aoqi@0 | 927 | return; |
aoqi@0 | 928 | |
aoqi@0 | 929 | // if remembering DTD text, copy! |
aoqi@0 | 930 | if (startRemember != 0) { |
aoqi@0 | 931 | if (rememberedText == null) |
aoqi@0 | 932 | rememberedText = new StringBuffer(buf.length); |
aoqi@0 | 933 | rememberedText.append(buf, startRemember, |
aoqi@0 | 934 | start - startRemember); |
aoqi@0 | 935 | } |
aoqi@0 | 936 | |
aoqi@0 | 937 | boolean extra = (finish > 0) && (start > 0); |
aoqi@0 | 938 | int len; |
aoqi@0 | 939 | |
aoqi@0 | 940 | if (extra) // extra pushback |
aoqi@0 | 941 | start--; |
aoqi@0 | 942 | len = finish - start; |
aoqi@0 | 943 | |
aoqi@0 | 944 | System.arraycopy(buf, start, buf, 0, len); |
aoqi@0 | 945 | start = 0; |
aoqi@0 | 946 | finish = len; |
aoqi@0 | 947 | |
aoqi@0 | 948 | try { |
aoqi@0 | 949 | len = buf.length - len; |
aoqi@0 | 950 | len = reader.read(buf, finish, len); |
aoqi@0 | 951 | } catch (UnsupportedEncodingException e) { |
aoqi@0 | 952 | fatal("P-075", new Object[]{e.getMessage()}); |
aoqi@0 | 953 | } catch (CharConversionException e) { |
aoqi@0 | 954 | fatal("P-076", new Object[]{e.getMessage()}); |
aoqi@0 | 955 | } |
aoqi@0 | 956 | if (len >= 0) |
aoqi@0 | 957 | finish += len; |
aoqi@0 | 958 | else |
aoqi@0 | 959 | close(); |
aoqi@0 | 960 | if (extra) // extra pushback |
aoqi@0 | 961 | start++; |
aoqi@0 | 962 | |
aoqi@0 | 963 | if (startRemember != 0) |
aoqi@0 | 964 | // assert extra == true |
aoqi@0 | 965 | startRemember = 1; |
aoqi@0 | 966 | } |
aoqi@0 | 967 | |
aoqi@0 | 968 | public void close() { |
aoqi@0 | 969 | |
aoqi@0 | 970 | try { |
aoqi@0 | 971 | if (reader != null && !isClosed) |
aoqi@0 | 972 | reader.close(); |
aoqi@0 | 973 | isClosed = true; |
aoqi@0 | 974 | } catch (IOException e) { |
aoqi@0 | 975 | /* NOTHING */ |
aoqi@0 | 976 | } |
aoqi@0 | 977 | } |
aoqi@0 | 978 | |
aoqi@0 | 979 | |
aoqi@0 | 980 | private void fatal(String messageId, Object params []) |
aoqi@0 | 981 | throws SAXException { |
aoqi@0 | 982 | |
aoqi@0 | 983 | SAXParseException x = new SAXParseException(DTDParser.messages.getMessage(locale, messageId, params), null); |
aoqi@0 | 984 | |
aoqi@0 | 985 | // not continuable ... e.g. WF errors |
aoqi@0 | 986 | close(); |
aoqi@0 | 987 | errHandler.fatalError(x); |
aoqi@0 | 988 | throw x; |
aoqi@0 | 989 | } |
aoqi@0 | 990 | } |