src/share/jaxws_classes/com/sun/xml/internal/dtdparser/InputEntity.java

changeset 286
f50545b5e2f1
child 397
b99d7e355d4b
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/share/jaxws_classes/com/sun/xml/internal/dtdparser/InputEntity.java	Tue Mar 06 16:09:35 2012 -0800
     1.3 @@ -0,0 +1,990 @@
     1.4 +/*
     1.5 + * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.  Oracle designates this
    1.11 + * particular file as subject to the "Classpath" exception as provided
    1.12 + * by Oracle in the LICENSE file that accompanied this code.
    1.13 + *
    1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.17 + * version 2 for more details (a copy is included in the LICENSE file that
    1.18 + * accompanied this code).
    1.19 + *
    1.20 + * You should have received a copy of the GNU General Public License version
    1.21 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.23 + *
    1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.25 + * or visit www.oracle.com if you need additional information or have any
    1.26 + * questions.
    1.27 + */
    1.28 +
    1.29 +package com.sun.xml.internal.dtdparser;
    1.30 +
    1.31 +import org.xml.sax.InputSource;
    1.32 +import org.xml.sax.SAXException;
    1.33 +import org.xml.sax.SAXParseException;
    1.34 +
    1.35 +import java.io.CharConversionException;
    1.36 +import java.io.IOException;
    1.37 +import java.io.InputStream;
    1.38 +import java.io.InputStreamReader;
    1.39 +import java.io.Reader;
    1.40 +import java.io.UnsupportedEncodingException;
    1.41 +import java.net.URL;
    1.42 +import java.util.Locale;
    1.43 +
    1.44 +/**
    1.45 + * This is how the parser talks to its input entities, of all kinds.
    1.46 + * The entities are in a stack.
    1.47 + * <p/>
    1.48 + * <P> For internal entities, the character arrays are referenced here,
    1.49 + * and read from as needed (they're read-only).  External entities have
    1.50 + * mutable buffers, that are read into as needed.
    1.51 + * <p/>
    1.52 + * <P> <em>Note:</em> This maps CRLF (and CR) to LF without regard for
    1.53 + * whether it's in an external (parsed) entity or not.  The XML 1.0 spec
    1.54 + * is inconsistent in explaining EOL handling; this is the sensible way.
    1.55 + *
    1.56 + * @author David Brownell
    1.57 + * @author Janet Koenig
    1.58 + * @version 1.4 00/08/05
    1.59 + */
    1.60 +public class InputEntity {
    1.61 +    private int start, finish;
    1.62 +    private char buf [];
    1.63 +    private int lineNumber = 1;
    1.64 +    private boolean returnedFirstHalf = false;
    1.65 +    private boolean maybeInCRLF = false;
    1.66 +
    1.67 +    // name of entity (never main document or unnamed DTD PE)
    1.68 +    private String name;
    1.69 +
    1.70 +    private InputEntity next;
    1.71 +
    1.72 +    // for system and public IDs in diagnostics
    1.73 +    private InputSource input;
    1.74 +
    1.75 +    // this is a buffer; some buffers can be replenished.
    1.76 +    private Reader reader;
    1.77 +    private boolean isClosed;
    1.78 +
    1.79 +    private DTDEventListener errHandler;
    1.80 +    private Locale locale;
    1.81 +
    1.82 +    private StringBuffer rememberedText;
    1.83 +    private int startRemember;
    1.84 +
    1.85 +    // record if this is a PE, so endParsedEntity won't be called
    1.86 +    private boolean isPE;
    1.87 +
    1.88 +    // InputStreamReader throws an internal per-read exception, so
    1.89 +    // we minimize reads.  We also add a byte to compensate for the
    1.90 +    // "ungetc" byte we keep, so that our downstream reads are as
    1.91 +    // nicely sized as we can make them.
    1.92 +    final private static int BUFSIZ = 8 * 1024 + 1;
    1.93 +
    1.94 +    final private static char newline [] = {'\n'};
    1.95 +
    1.96 +    public static InputEntity getInputEntity(DTDEventListener h, Locale l) {
    1.97 +        InputEntity retval = new InputEntity();
    1.98 +        retval.errHandler = h;
    1.99 +        retval.locale = l;
   1.100 +        return retval;
   1.101 +    }
   1.102 +
   1.103 +    private InputEntity() {
   1.104 +    }
   1.105 +
   1.106 +    //
   1.107 +    // predicate:  return true iff this is an internal entity reader,
   1.108 +    // and so may safely be "popped" as needed.  external entities have
   1.109 +    // syntax to uphold; internal parameter entities have at most validity
   1.110 +    // constraints to monitor.  also, only external entities get decent
   1.111 +    // location diagnostics.
   1.112 +    //
   1.113 +    public boolean isInternal() {
   1.114 +        return reader == null;
   1.115 +    }
   1.116 +
   1.117 +    //
   1.118 +    // predicate:  return true iff this is the toplevel document
   1.119 +    //
   1.120 +    public boolean isDocument() {
   1.121 +        return next == null;
   1.122 +    }
   1.123 +
   1.124 +    //
   1.125 +    // predicate:  return true iff this is a PE expansion (so that
   1.126 +    // LexicalEventListner.endParsedEntity won't be called)
   1.127 +    //
   1.128 +    public boolean isParameterEntity() {
   1.129 +        return isPE;
   1.130 +    }
   1.131 +
   1.132 +    //
   1.133 +    // return name of current entity
   1.134 +    //
   1.135 +    public String getName() {
   1.136 +        return name;
   1.137 +    }
   1.138 +
   1.139 +    //
   1.140 +    // use this for an external parsed entity
   1.141 +    //
   1.142 +    public void init(InputSource in, String name, InputEntity stack,
   1.143 +                     boolean isPE)
   1.144 +            throws IOException, SAXException {
   1.145 +
   1.146 +        input = in;
   1.147 +        this.isPE = isPE;
   1.148 +        reader = in.getCharacterStream();
   1.149 +
   1.150 +        if (reader == null) {
   1.151 +            InputStream bytes = in.getByteStream();
   1.152 +
   1.153 +            if (bytes == null)
   1.154 +                reader = XmlReader.createReader(new URL(in.getSystemId())
   1.155 +                        .openStream());
   1.156 +            else if (in.getEncoding() != null)
   1.157 +                reader = XmlReader.createReader(in.getByteStream(),
   1.158 +                        in.getEncoding());
   1.159 +            else
   1.160 +                reader = XmlReader.createReader(in.getByteStream());
   1.161 +        }
   1.162 +        next = stack;
   1.163 +        buf = new char[BUFSIZ];
   1.164 +        this.name = name;
   1.165 +        checkRecursion(stack);
   1.166 +    }
   1.167 +
   1.168 +    //
   1.169 +    // use this for an internal parsed entity; buffer is readonly
   1.170 +    //
   1.171 +    public void init(char b [], String name, InputEntity stack, boolean isPE)
   1.172 +            throws SAXException {
   1.173 +
   1.174 +        next = stack;
   1.175 +        buf = b;
   1.176 +        finish = b.length;
   1.177 +        this.name = name;
   1.178 +        this.isPE = isPE;
   1.179 +        checkRecursion(stack);
   1.180 +    }
   1.181 +
   1.182 +    private void checkRecursion(InputEntity stack)
   1.183 +            throws SAXException {
   1.184 +
   1.185 +        if (stack == null)
   1.186 +            return;
   1.187 +        for (stack = stack.next; stack != null; stack = stack.next) {
   1.188 +            if (stack.name != null && stack.name.equals(name))
   1.189 +                fatal("P-069", new Object[]{name});
   1.190 +        }
   1.191 +    }
   1.192 +
   1.193 +    public InputEntity pop() throws IOException {
   1.194 +
   1.195 +        // caller has ensured there's nothing left to read
   1.196 +        close();
   1.197 +        return next;
   1.198 +    }
   1.199 +
   1.200 +    /**
   1.201 +     * returns true iff there's no more data to consume ...
   1.202 +     */
   1.203 +    public boolean isEOF() throws IOException, SAXException {
   1.204 +
   1.205 +        // called to ensure WF-ness of included entities and to pop
   1.206 +        // input entities appropriately ... EOF is not always legal.
   1.207 +        if (start >= finish) {
   1.208 +            fillbuf();
   1.209 +            return start >= finish;
   1.210 +        } else
   1.211 +            return false;
   1.212 +    }
   1.213 +
   1.214 +    /**
   1.215 +     * Returns the name of the encoding in use, else null; the name
   1.216 +     * returned is in as standard a form as we can get.
   1.217 +     */
   1.218 +    public String getEncoding() {
   1.219 +
   1.220 +        if (reader == null)
   1.221 +            return null;
   1.222 +        if (reader instanceof XmlReader)
   1.223 +            return ((XmlReader) reader).getEncoding();
   1.224 +
   1.225 +        // XXX prefer a java2std() call to normalize names...
   1.226 +
   1.227 +        if (reader instanceof InputStreamReader)
   1.228 +            return ((InputStreamReader) reader).getEncoding();
   1.229 +        return null;
   1.230 +    }
   1.231 +
   1.232 +
   1.233 +    /**
   1.234 +     * returns the next name char, or NUL ... faster than getc(),
   1.235 +     * and the common "name or nmtoken must be next" case won't
   1.236 +     * need ungetc().
   1.237 +     */
   1.238 +    public char getNameChar() throws IOException, SAXException {
   1.239 +
   1.240 +        if (finish <= start)
   1.241 +            fillbuf();
   1.242 +        if (finish > start) {
   1.243 +            char c = buf[start++];
   1.244 +            if (XmlChars.isNameChar(c))
   1.245 +                return c;
   1.246 +            start--;
   1.247 +        }
   1.248 +        return 0;
   1.249 +    }
   1.250 +
   1.251 +    /**
   1.252 +     * gets the next Java character -- might be part of an XML
   1.253 +     * text character represented by a surrogate pair, or be
   1.254 +     * the end of the entity.
   1.255 +     */
   1.256 +    public char getc() throws IOException, SAXException {
   1.257 +
   1.258 +        if (finish <= start)
   1.259 +            fillbuf();
   1.260 +        if (finish > start) {
   1.261 +            char c = buf[start++];
   1.262 +
   1.263 +            // [2] Char ::= #x0009 | #x000A | #x000D
   1.264 +            //            | [#x0020-#xD7FF]
   1.265 +            //            | [#xE000-#xFFFD]
   1.266 +            // plus surrogate _pairs_ representing [#x10000-#x10ffff]
   1.267 +            if (returnedFirstHalf) {
   1.268 +                if (c >= 0xdc00 && c <= 0xdfff) {
   1.269 +                    returnedFirstHalf = false;
   1.270 +                    return c;
   1.271 +                } else
   1.272 +                    fatal("P-070", new Object[]{Integer.toHexString(c)});
   1.273 +            }
   1.274 +            if ((c >= 0x0020 && c <= 0xD7FF)
   1.275 +                    || c == 0x0009
   1.276 +                    // no surrogates!
   1.277 +                    || (c >= 0xE000 && c <= 0xFFFD))
   1.278 +                return c;
   1.279 +
   1.280 +            //
   1.281 +            // CRLF and CR are both line ends; map both to LF, and
   1.282 +            // keep line count correct.
   1.283 +            //
   1.284 +            else if (c == '\r' && !isInternal()) {
   1.285 +                maybeInCRLF = true;
   1.286 +                c = getc();
   1.287 +                if (c != '\n')
   1.288 +                    ungetc();
   1.289 +                maybeInCRLF = false;
   1.290 +
   1.291 +                lineNumber++;
   1.292 +                return '\n';
   1.293 +
   1.294 +            } else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF
   1.295 +                if (!isInternal() && !maybeInCRLF)
   1.296 +                    lineNumber++;
   1.297 +                return c;
   1.298 +            }
   1.299 +
   1.300 +            // surrogates...
   1.301 +            if (c >= 0xd800 && c < 0xdc00) {
   1.302 +                returnedFirstHalf = true;
   1.303 +                return c;
   1.304 +            }
   1.305 +
   1.306 +            fatal("P-071", new Object[]{Integer.toHexString(c)});
   1.307 +        }
   1.308 +        throw new EndOfInputException();
   1.309 +    }
   1.310 +
   1.311 +
   1.312 +    /**
   1.313 +     * lookahead one character
   1.314 +     */
   1.315 +    public boolean peekc(char c) throws IOException, SAXException {
   1.316 +
   1.317 +        if (finish <= start)
   1.318 +            fillbuf();
   1.319 +        if (finish > start) {
   1.320 +            if (buf[start] == c) {
   1.321 +                start++;
   1.322 +                return true;
   1.323 +            } else
   1.324 +                return false;
   1.325 +        }
   1.326 +        return false;
   1.327 +    }
   1.328 +
   1.329 +
   1.330 +    /**
   1.331 +     * two character pushback is guaranteed
   1.332 +     */
   1.333 +    public void ungetc() {
   1.334 +
   1.335 +        if (start == 0)
   1.336 +            throw new InternalError("ungetc");
   1.337 +        start--;
   1.338 +
   1.339 +        if (buf[start] == '\n' || buf[start] == '\r') {
   1.340 +            if (!isInternal())
   1.341 +                lineNumber--;
   1.342 +        } else if (returnedFirstHalf)
   1.343 +            returnedFirstHalf = false;
   1.344 +    }
   1.345 +
   1.346 +
   1.347 +    /**
   1.348 +     * optional grammatical whitespace (discarded)
   1.349 +     */
   1.350 +    public boolean maybeWhitespace()
   1.351 +            throws IOException, SAXException {
   1.352 +
   1.353 +        char c;
   1.354 +        boolean isSpace = false;
   1.355 +        boolean sawCR = false;
   1.356 +
   1.357 +        // [3] S ::= #20 | #09 | #0D | #0A
   1.358 +        for (; ;) {
   1.359 +            if (finish <= start)
   1.360 +                fillbuf();
   1.361 +            if (finish <= start)
   1.362 +                return isSpace;
   1.363 +
   1.364 +            c = buf[start++];
   1.365 +            if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') {
   1.366 +                isSpace = true;
   1.367 +
   1.368 +                //
   1.369 +                // CR, LF are line endings ... CLRF is one, not two!
   1.370 +                //
   1.371 +                if ((c == '\n' || c == '\r') && !isInternal()) {
   1.372 +                    if (!(c == '\n' && sawCR)) {
   1.373 +                        lineNumber++;
   1.374 +                        sawCR = false;
   1.375 +                    }
   1.376 +                    if (c == '\r')
   1.377 +                        sawCR = true;
   1.378 +                }
   1.379 +            } else {
   1.380 +                start--;
   1.381 +                return isSpace;
   1.382 +            }
   1.383 +        }
   1.384 +    }
   1.385 +
   1.386 +
   1.387 +    /**
   1.388 +     * normal content; whitespace in markup may be handled
   1.389 +     * specially if the parser uses the content model.
   1.390 +     * <p/>
   1.391 +     * <P> content terminates with markup delimiter characters,
   1.392 +     * namely ampersand (&amp;amp;) and left angle bracket (&amp;lt;).
   1.393 +     * <p/>
   1.394 +     * <P> the document handler's characters() method is called
   1.395 +     * on all the content found
   1.396 +     */
   1.397 +    public boolean parsedContent(DTDEventListener docHandler
   1.398 +                                 /*ElementValidator validator*/)
   1.399 +            throws IOException, SAXException {
   1.400 +
   1.401 +        // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
   1.402 +
   1.403 +        int first;        // first char to return
   1.404 +        int last;        // last char to return
   1.405 +        boolean sawContent;    // sent any chars?
   1.406 +        char c;
   1.407 +
   1.408 +        // deliver right out of the buffer, until delimiter, EOF,
   1.409 +        // or error, refilling as we go
   1.410 +        for (first = last = start, sawContent = false; ; last++) {
   1.411 +
   1.412 +            // buffer empty?
   1.413 +            if (last >= finish) {
   1.414 +                if (last > first) {
   1.415 +//            validator.text ();
   1.416 +                    docHandler.characters(buf, first, last - first);
   1.417 +                    sawContent = true;
   1.418 +                    start = last;
   1.419 +                }
   1.420 +                if (isEOF())    // calls fillbuf
   1.421 +                    return sawContent;
   1.422 +                first = start;
   1.423 +                last = first - 1;    // incremented in loop
   1.424 +                continue;
   1.425 +            }
   1.426 +
   1.427 +            c = buf[last];
   1.428 +
   1.429 +            //
   1.430 +            // pass most chars through ASAP; this inlines the code of
   1.431 +            // [2] !XmlChars.isChar(c) leaving only characters needing
   1.432 +            // special treatment ... line ends, surrogates, and:
   1.433 +            //    0x0026 == '&'
   1.434 +            //    0x003C == '<'
   1.435 +            //    0x005D == ']'
   1.436 +            // Comparisons ordered for speed on 'typical' text
   1.437 +            //
   1.438 +            if ((c > 0x005D && c <= 0xD7FF)    // a-z and more
   1.439 +                    || (c < 0x0026 && c >= 0x0020)    // space & punct
   1.440 +                    || (c > 0x003C && c < 0x005D)    // A-Z & punct
   1.441 +                    || (c > 0x0026 && c < 0x003C)    // 0-9 & punct
   1.442 +                    || c == 0x0009
   1.443 +                    || (c >= 0xE000 && c <= 0xFFFD)
   1.444 +            )
   1.445 +                continue;
   1.446 +
   1.447 +            // terminate on markup delimiters
   1.448 +            if (c == '<' || c == '&')
   1.449 +                break;
   1.450 +
   1.451 +            // count lines
   1.452 +            if (c == '\n') {
   1.453 +                if (!isInternal())
   1.454 +                    lineNumber++;
   1.455 +                continue;
   1.456 +            }
   1.457 +
   1.458 +            // External entities get CR, CRLF --> LF mapping
   1.459 +            // Internal ones got it already, and we can't repeat
   1.460 +            // else we break char ref handling!!
   1.461 +            if (c == '\r') {
   1.462 +                if (isInternal())
   1.463 +                    continue;
   1.464 +
   1.465 +                docHandler.characters(buf, first, last - first);
   1.466 +                docHandler.characters(newline, 0, 1);
   1.467 +                sawContent = true;
   1.468 +                lineNumber++;
   1.469 +                if (finish > (last + 1)) {
   1.470 +                    if (buf[last + 1] == '\n')
   1.471 +                        last++;
   1.472 +                } else {    // CR at end of buffer
   1.473 +// XXX case not yet handled:  CRLF here will look like two lines
   1.474 +                }
   1.475 +                first = start = last + 1;
   1.476 +                continue;
   1.477 +            }
   1.478 +
   1.479 +            // ']]>' is a WF error -- must fail if we see it
   1.480 +            if (c == ']') {
   1.481 +                switch (finish - last) {
   1.482 +                // for suspicious end-of-buffer cases, get more data
   1.483 +                // into the buffer to rule out this sequence.
   1.484 +                case 2:
   1.485 +                    if (buf[last + 1] != ']')
   1.486 +                        continue;
   1.487 +                    // FALLTHROUGH
   1.488 +
   1.489 +                case 1:
   1.490 +                    if (reader == null || isClosed)
   1.491 +                        continue;
   1.492 +                    if (last == first)
   1.493 +                        throw new InternalError("fillbuf");
   1.494 +                    last--;
   1.495 +                    if (last > first) {
   1.496 +//            validator.text ();
   1.497 +                        docHandler.characters(buf, first, last - first);
   1.498 +                        sawContent = true;
   1.499 +                        start = last;
   1.500 +                    }
   1.501 +                    fillbuf();
   1.502 +                    first = last = start;
   1.503 +                    continue;
   1.504 +
   1.505 +                    // otherwise any "]]>" would be buffered, and we can
   1.506 +                    // see right away if that's what we have
   1.507 +                default:
   1.508 +                    if (buf[last + 1] == ']' && buf[last + 2] == '>')
   1.509 +                        fatal("P-072", null);
   1.510 +                    continue;
   1.511 +                }
   1.512 +            }
   1.513 +
   1.514 +            // correctly paired surrogates are OK
   1.515 +            if (c >= 0xd800 && c <= 0xdfff) {
   1.516 +                if ((last + 1) >= finish) {
   1.517 +                    if (last > first) {
   1.518 +//            validator.text ();
   1.519 +                        docHandler.characters(buf, first, last - first);
   1.520 +                        sawContent = true;
   1.521 +                        start = last + 1;
   1.522 +                    }
   1.523 +                    if (isEOF()) {    // calls fillbuf
   1.524 +                        fatal("P-081",
   1.525 +                                new Object[]{Integer.toHexString(c)});
   1.526 +                    }
   1.527 +                    first = start;
   1.528 +                    last = first;
   1.529 +                    continue;
   1.530 +                }
   1.531 +                if (checkSurrogatePair(last))
   1.532 +                    last++;
   1.533 +                else {
   1.534 +                    last--;
   1.535 +                    // also terminate on surrogate pair oddities
   1.536 +                    break;
   1.537 +                }
   1.538 +                continue;
   1.539 +            }
   1.540 +
   1.541 +            fatal("P-071", new Object[]{Integer.toHexString(c)});
   1.542 +        }
   1.543 +        if (last == first)
   1.544 +            return sawContent;
   1.545 +//    validator.text ();
   1.546 +        docHandler.characters(buf, first, last - first);
   1.547 +        start = last;
   1.548 +        return true;
   1.549 +    }
   1.550 +
   1.551 +
   1.552 +    /**
   1.553 +     * CDATA -- character data, terminated by "]]>" and optionally
   1.554 +     * including unescaped markup delimiters (ampersand and left angle
   1.555 +     * bracket).  This should otherwise be exactly like character data,
   1.556 +     * modulo differences in error report details.
   1.557 +     * <p/>
   1.558 +     * <P> The document handler's characters() or ignorableWhitespace()
   1.559 +     * methods are invoked on all the character data found
   1.560 +     *
   1.561 +     * @param docHandler               gets callbacks for character data
   1.562 +     * @param ignorableWhitespace      if true, whitespace characters will
   1.563 +     *                                 be reported using docHandler.ignorableWhitespace(); implicitly,
   1.564 +     *                                 non-whitespace characters will cause validation errors
   1.565 +     * @param whitespaceInvalidMessage if true, ignorable whitespace
   1.566 +     *                                 causes a validity error report as well as a callback
   1.567 +     */
   1.568 +    public boolean unparsedContent(DTDEventListener docHandler,
   1.569 +                                   /*ElementValidator validator,*/
   1.570 +                                   boolean ignorableWhitespace,
   1.571 +                                   String whitespaceInvalidMessage)
   1.572 +            throws IOException, SAXException {
   1.573 +
   1.574 +        // [18] CDSect ::= CDStart CData CDEnd
   1.575 +        // [19] CDStart ::= '<![CDATA['
   1.576 +        // [20] CData ::= (Char* - (Char* ']]>' Char*))
   1.577 +        // [21] CDEnd ::= ']]>'
   1.578 +
   1.579 +        // caller peeked the leading '<' ...
   1.580 +        if (!peek("![CDATA[", null))
   1.581 +            return false;
   1.582 +        docHandler.startCDATA();
   1.583 +
   1.584 +        // only a literal ']]>' stops this ...
   1.585 +        int last;
   1.586 +
   1.587 +        for (; ;) {        // until ']]>' seen
   1.588 +            boolean done = false;
   1.589 +            char c;
   1.590 +
   1.591 +            // don't report ignorable whitespace as "text" for
   1.592 +            // validation purposes.
   1.593 +            boolean white = ignorableWhitespace;
   1.594 +
   1.595 +            for (last = start; last < finish; last++) {
   1.596 +                c = buf[last];
   1.597 +
   1.598 +                //
   1.599 +                // Reject illegal characters.
   1.600 +                //
   1.601 +                if (!XmlChars.isChar(c)) {
   1.602 +                    white = false;
   1.603 +                    if (c >= 0xd800 && c <= 0xdfff) {
   1.604 +                        if (checkSurrogatePair(last)) {
   1.605 +                            last++;
   1.606 +                            continue;
   1.607 +                        } else {
   1.608 +                            last--;
   1.609 +                            break;
   1.610 +                        }
   1.611 +                    }
   1.612 +                    fatal("P-071", new Object[]
   1.613 +                    {Integer.toHexString(buf[last])});
   1.614 +                }
   1.615 +                if (c == '\n') {
   1.616 +                    if (!isInternal())
   1.617 +                        lineNumber++;
   1.618 +                    continue;
   1.619 +                }
   1.620 +                if (c == '\r') {
   1.621 +                    // As above, we can't repeat CR/CRLF --> LF mapping
   1.622 +                    if (isInternal())
   1.623 +                        continue;
   1.624 +
   1.625 +                    if (white) {
   1.626 +                        if (whitespaceInvalidMessage != null)
   1.627 +                            errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
   1.628 +                                    whitespaceInvalidMessage), null));
   1.629 +                        docHandler.ignorableWhitespace(buf, start,
   1.630 +                                last - start);
   1.631 +                        docHandler.ignorableWhitespace(newline, 0, 1);
   1.632 +                    } else {
   1.633 +//            validator.text ();
   1.634 +                        docHandler.characters(buf, start, last - start);
   1.635 +                        docHandler.characters(newline, 0, 1);
   1.636 +                    }
   1.637 +                    lineNumber++;
   1.638 +                    if (finish > (last + 1)) {
   1.639 +                        if (buf[last + 1] == '\n')
   1.640 +                            last++;
   1.641 +                    } else {    // CR at end of buffer
   1.642 +// XXX case not yet handled ... as above
   1.643 +                    }
   1.644 +                    start = last + 1;
   1.645 +                    continue;
   1.646 +                }
   1.647 +                if (c != ']') {
   1.648 +                    if (c != ' ' && c != '\t')
   1.649 +                        white = false;
   1.650 +                    continue;
   1.651 +                }
   1.652 +                if ((last + 2) < finish) {
   1.653 +                    if (buf[last + 1] == ']' && buf[last + 2] == '>') {
   1.654 +                        done = true;
   1.655 +                        break;
   1.656 +                    }
   1.657 +                    white = false;
   1.658 +                    continue;
   1.659 +                } else {
   1.660 +                    //last--;
   1.661 +                    break;
   1.662 +                }
   1.663 +            }
   1.664 +            if (white) {
   1.665 +                if (whitespaceInvalidMessage != null)
   1.666 +                    errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
   1.667 +                            whitespaceInvalidMessage), null));
   1.668 +                docHandler.ignorableWhitespace(buf, start, last - start);
   1.669 +            } else {
   1.670 +//        validator.text ();
   1.671 +                docHandler.characters(buf, start, last - start);
   1.672 +            }
   1.673 +            if (done) {
   1.674 +                start = last + 3;
   1.675 +                break;
   1.676 +            }
   1.677 +            start = last;
   1.678 +            if (isEOF())
   1.679 +                fatal("P-073", null);
   1.680 +        }
   1.681 +        docHandler.endCDATA();
   1.682 +        return true;
   1.683 +    }
   1.684 +
   1.685 +    // return false to backstep at end of buffer)
   1.686 +    private boolean checkSurrogatePair(int offset)
   1.687 +            throws SAXException {
   1.688 +
   1.689 +        if ((offset + 1) >= finish)
   1.690 +            return false;
   1.691 +
   1.692 +        char c1 = buf[offset++];
   1.693 +        char c2 = buf[offset];
   1.694 +
   1.695 +        if ((c1 >= 0xd800 && c1 < 0xdc00) && (c2 >= 0xdc00 && c2 <= 0xdfff))
   1.696 +            return true;
   1.697 +        fatal("P-074", new Object[]{
   1.698 +            Integer.toHexString(c1 & 0x0ffff),
   1.699 +            Integer.toHexString(c2 & 0x0ffff)
   1.700 +        });
   1.701 +        return false;
   1.702 +    }
   1.703 +
   1.704 +
   1.705 +    /**
   1.706 +     * whitespace in markup (flagged to app, discardable)
   1.707 +     * <p/>
   1.708 +     * <P> the document handler's ignorableWhitespace() method
   1.709 +     * is called on all the whitespace found
   1.710 +     */
   1.711 +    public boolean ignorableWhitespace(DTDEventListener handler)
   1.712 +            throws IOException, SAXException {
   1.713 +
   1.714 +        char c;
   1.715 +        boolean isSpace = false;
   1.716 +        int first;
   1.717 +
   1.718 +        // [3] S ::= #20 | #09 | #0D | #0A
   1.719 +        for (first = start; ;) {
   1.720 +            if (finish <= start) {
   1.721 +                if (isSpace)
   1.722 +                    handler.ignorableWhitespace(buf, first, start - first);
   1.723 +                fillbuf();
   1.724 +                first = start;
   1.725 +            }
   1.726 +            if (finish <= start)
   1.727 +                return isSpace;
   1.728 +
   1.729 +            c = buf[start++];
   1.730 +            switch (c) {
   1.731 +            case '\n':
   1.732 +                if (!isInternal())
   1.733 +                    lineNumber++;
   1.734 +// XXX handles Macintosh line endings wrong
   1.735 +                // fallthrough
   1.736 +            case 0x09:
   1.737 +            case 0x20:
   1.738 +                isSpace = true;
   1.739 +                continue;
   1.740 +
   1.741 +            case '\r':
   1.742 +                isSpace = true;
   1.743 +                if (!isInternal())
   1.744 +                    lineNumber++;
   1.745 +                handler.ignorableWhitespace(buf, first,
   1.746 +                        (start - 1) - first);
   1.747 +                handler.ignorableWhitespace(newline, 0, 1);
   1.748 +                if (start < finish && buf[start] == '\n')
   1.749 +                    ++start;
   1.750 +                first = start;
   1.751 +                continue;
   1.752 +
   1.753 +            default:
   1.754 +                ungetc();
   1.755 +                if (isSpace)
   1.756 +                    handler.ignorableWhitespace(buf, first, start - first);
   1.757 +                return isSpace;
   1.758 +            }
   1.759 +        }
   1.760 +    }
   1.761 +
   1.762 +    /**
   1.763 +     * returns false iff 'next' string isn't as provided,
   1.764 +     * else skips that text and returns true.
   1.765 +     * <p/>
   1.766 +     * <P> NOTE:  two alternative string representations are
   1.767 +     * both passed in, since one is faster.
   1.768 +     */
   1.769 +    public boolean peek(String next, char chars [])
   1.770 +            throws IOException, SAXException {
   1.771 +
   1.772 +        int len;
   1.773 +        int i;
   1.774 +
   1.775 +        if (chars != null)
   1.776 +            len = chars.length;
   1.777 +        else
   1.778 +            len = next.length();
   1.779 +
   1.780 +        // buffer should hold the whole thing ... give it a
   1.781 +        // chance for the end-of-buffer case and cope with EOF
   1.782 +        // by letting fillbuf compact and fill
   1.783 +        if (finish <= start || (finish - start) < len)
   1.784 +            fillbuf();
   1.785 +
   1.786 +        // can't peek past EOF
   1.787 +        if (finish <= start)
   1.788 +            return false;
   1.789 +
   1.790 +        // compare the string; consume iff it matches
   1.791 +        if (chars != null) {
   1.792 +            for (i = 0; i < len && (start + i) < finish; i++) {
   1.793 +                if (buf[start + i] != chars[i])
   1.794 +                    return false;
   1.795 +            }
   1.796 +        } else {
   1.797 +            for (i = 0; i < len && (start + i) < finish; i++) {
   1.798 +                if (buf[start + i] != next.charAt(i))
   1.799 +                    return false;
   1.800 +            }
   1.801 +        }
   1.802 +
   1.803 +        // if the first fillbuf didn't get enough data, give
   1.804 +        // fillbuf another chance to read
   1.805 +        if (i < len) {
   1.806 +            if (reader == null || isClosed)
   1.807 +                return false;
   1.808 +
   1.809 +            //
   1.810 +            // This diagnostic "knows" that the only way big strings would
   1.811 +            // fail to be peeked is where it's a symbol ... e.g. for an
   1.812 +            // </EndTag> construct.  That knowledge could also be applied
   1.813 +            // to get rid of the symbol length constraint, since having
   1.814 +            // the wrong symbol is a fatal error anyway ...
   1.815 +            //
   1.816 +            if (len > buf.length)
   1.817 +                fatal("P-077", new Object[]{new Integer(buf.length)});
   1.818 +
   1.819 +            fillbuf();
   1.820 +            return peek(next, chars);
   1.821 +        }
   1.822 +
   1.823 +        start += len;
   1.824 +        return true;
   1.825 +    }
   1.826 +
   1.827 +
   1.828 +    //
   1.829 +    // Support for reporting the internal DTD subset, so <!DOCTYPE...>
   1.830 +    // declarations can be recreated.  This is collected as a single
   1.831 +    // string; such subsets are normally small, and many applications
   1.832 +    // don't even care about this.
   1.833 +    //
   1.834 +    public void startRemembering() {
   1.835 +
   1.836 +        if (startRemember != 0)
   1.837 +            throw new InternalError();
   1.838 +        startRemember = start;
   1.839 +    }
   1.840 +
   1.841 +    public String rememberText() {
   1.842 +
   1.843 +        String retval;
   1.844 +
   1.845 +        // If the internal subset crossed a buffer boundary, we
   1.846 +        // created a temporary buffer.
   1.847 +        if (rememberedText != null) {
   1.848 +            rememberedText.append(buf, startRemember,
   1.849 +                    start - startRemember);
   1.850 +            retval = rememberedText.toString();
   1.851 +        } else
   1.852 +            retval = new String(buf, startRemember,
   1.853 +                    start - startRemember);
   1.854 +
   1.855 +        startRemember = 0;
   1.856 +        rememberedText = null;
   1.857 +        return retval;
   1.858 +    }
   1.859 +
   1.860 +    private InputEntity getTopEntity() {
   1.861 +
   1.862 +        InputEntity current = this;
   1.863 +
   1.864 +        // don't report locations within internal entities!
   1.865 +
   1.866 +        while (current != null && current.input == null)
   1.867 +            current = current.next;
   1.868 +        return current == null ? this : current;
   1.869 +    }
   1.870 +
   1.871 +    /**
   1.872 +     * Returns the public ID of this input source, if known
   1.873 +     */
   1.874 +    public String getPublicId() {
   1.875 +
   1.876 +        InputEntity where = getTopEntity();
   1.877 +        if (where == this)
   1.878 +            return input.getPublicId();
   1.879 +        return where.getPublicId();
   1.880 +    }
   1.881 +
   1.882 +    /**
   1.883 +     * Returns the system ID of this input source, if known
   1.884 +     */
   1.885 +    public String getSystemId() {
   1.886 +
   1.887 +        InputEntity where = getTopEntity();
   1.888 +        if (where == this)
   1.889 +            return input.getSystemId();
   1.890 +        return where.getSystemId();
   1.891 +    }
   1.892 +
   1.893 +    /**
   1.894 +     * Returns the current line number in this input source
   1.895 +     */
   1.896 +    public int getLineNumber() {
   1.897 +
   1.898 +        InputEntity where = getTopEntity();
   1.899 +        if (where == this)
   1.900 +            return lineNumber;
   1.901 +        return where.getLineNumber();
   1.902 +    }
   1.903 +
   1.904 +    /**
   1.905 +     * returns -1; maintaining column numbers hurts performance
   1.906 +     */
   1.907 +    public int getColumnNumber() {
   1.908 +
   1.909 +        return -1;        // not maintained (speed)
   1.910 +    }
   1.911 +
   1.912 +
   1.913 +    //
   1.914 +    // n.b. for non-EOF end-of-buffer cases, reader should return
   1.915 +    // at least a handful of bytes so various lookaheads behave.
   1.916 +    //
   1.917 +    // two character pushback exists except at first; characters
   1.918 +    // represented by surrogate pairs can't be pushed back (they'd
   1.919 +    // only be in character data anyway).
   1.920 +    //
   1.921 +    // DTD exception thrown on char conversion problems; line number
   1.922 +    // will be low, as a rule.
   1.923 +    //
   1.924 +    private void fillbuf() throws IOException, SAXException {
   1.925 +
   1.926 +        // don't touched fixed buffers, that'll usually
   1.927 +        // change entity values (and isn't needed anyway)
   1.928 +        // likewise, ignore closed streams
   1.929 +        if (reader == null || isClosed)
   1.930 +            return;
   1.931 +
   1.932 +        // if remembering DTD text, copy!
   1.933 +        if (startRemember != 0) {
   1.934 +            if (rememberedText == null)
   1.935 +                rememberedText = new StringBuffer(buf.length);
   1.936 +            rememberedText.append(buf, startRemember,
   1.937 +                    start - startRemember);
   1.938 +        }
   1.939 +
   1.940 +        boolean extra = (finish > 0) && (start > 0);
   1.941 +        int len;
   1.942 +
   1.943 +        if (extra)        // extra pushback
   1.944 +            start--;
   1.945 +        len = finish - start;
   1.946 +
   1.947 +        System.arraycopy(buf, start, buf, 0, len);
   1.948 +        start = 0;
   1.949 +        finish = len;
   1.950 +
   1.951 +        try {
   1.952 +            len = buf.length - len;
   1.953 +            len = reader.read(buf, finish, len);
   1.954 +        } catch (UnsupportedEncodingException e) {
   1.955 +            fatal("P-075", new Object[]{e.getMessage()});
   1.956 +        } catch (CharConversionException e) {
   1.957 +            fatal("P-076", new Object[]{e.getMessage()});
   1.958 +        }
   1.959 +        if (len >= 0)
   1.960 +            finish += len;
   1.961 +        else
   1.962 +            close();
   1.963 +        if (extra)        // extra pushback
   1.964 +            start++;
   1.965 +
   1.966 +        if (startRemember != 0)
   1.967 +        // assert extra == true
   1.968 +            startRemember = 1;
   1.969 +    }
   1.970 +
   1.971 +    public void close() {
   1.972 +
   1.973 +        try {
   1.974 +            if (reader != null && !isClosed)
   1.975 +                reader.close();
   1.976 +            isClosed = true;
   1.977 +        } catch (IOException e) {
   1.978 +            /* NOTHING */
   1.979 +        }
   1.980 +    }
   1.981 +
   1.982 +
   1.983 +    private void fatal(String messageId, Object params [])
   1.984 +            throws SAXException {
   1.985 +
   1.986 +        SAXParseException x = new SAXParseException(DTDParser.messages.getMessage(locale, messageId, params), null);
   1.987 +
   1.988 +        // not continuable ... e.g. WF errors
   1.989 +        close();
   1.990 +        errHandler.fatalError(x);
   1.991 +        throw x;
   1.992 +    }
   1.993 +}

mercurial