aoqi@0: /* aoqi@0: * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved. aoqi@0: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. aoqi@0: * aoqi@0: * This code is free software; you can redistribute it and/or modify it aoqi@0: * under the terms of the GNU General Public License version 2 only, as aoqi@0: * published by the Free Software Foundation. Oracle designates this aoqi@0: * particular file as subject to the "Classpath" exception as provided aoqi@0: * by Oracle in the LICENSE file that accompanied this code. aoqi@0: * aoqi@0: * This code is distributed in the hope that it will be useful, but WITHOUT aoqi@0: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or aoqi@0: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License aoqi@0: * version 2 for more details (a copy is included in the LICENSE file that aoqi@0: * accompanied this code). aoqi@0: * aoqi@0: * You should have received a copy of the GNU General Public License version aoqi@0: * 2 along with this work; if not, write to the Free Software Foundation, aoqi@0: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. aoqi@0: * aoqi@0: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA aoqi@0: * or visit www.oracle.com if you need additional information or have any aoqi@0: * questions. aoqi@0: */ aoqi@0: aoqi@0: package com.sun.xml.internal.dtdparser; aoqi@0: aoqi@0: import org.xml.sax.InputSource; aoqi@0: import org.xml.sax.SAXException; aoqi@0: import org.xml.sax.SAXParseException; aoqi@0: aoqi@0: import java.io.CharConversionException; aoqi@0: import java.io.IOException; aoqi@0: import java.io.InputStream; aoqi@0: import java.io.InputStreamReader; aoqi@0: import java.io.Reader; aoqi@0: import java.io.UnsupportedEncodingException; aoqi@0: import java.net.URL; aoqi@0: import java.util.Locale; aoqi@0: aoqi@0: /** aoqi@0: * This is how the parser talks to its input entities, of all kinds. aoqi@0: * The entities are in a stack. aoqi@0: *

aoqi@0: *

For internal entities, the character arrays are referenced here, aoqi@0: * and read from as needed (they're read-only). External entities have aoqi@0: * mutable buffers, that are read into as needed. aoqi@0: *

aoqi@0: *

Note: This maps CRLF (and CR) to LF without regard for aoqi@0: * whether it's in an external (parsed) entity or not. The XML 1.0 spec aoqi@0: * is inconsistent in explaining EOL handling; this is the sensible way. aoqi@0: * aoqi@0: * @author David Brownell aoqi@0: * @author Janet Koenig aoqi@0: * @version 1.4 00/08/05 aoqi@0: */ aoqi@0: public class InputEntity { aoqi@0: private int start, finish; aoqi@0: private char buf []; aoqi@0: private int lineNumber = 1; aoqi@0: private boolean returnedFirstHalf = false; aoqi@0: private boolean maybeInCRLF = false; aoqi@0: aoqi@0: // name of entity (never main document or unnamed DTD PE) aoqi@0: private String name; aoqi@0: aoqi@0: private InputEntity next; aoqi@0: aoqi@0: // for system and public IDs in diagnostics aoqi@0: private InputSource input; aoqi@0: aoqi@0: // this is a buffer; some buffers can be replenished. aoqi@0: private Reader reader; aoqi@0: private boolean isClosed; aoqi@0: aoqi@0: private DTDEventListener errHandler; aoqi@0: private Locale locale; aoqi@0: aoqi@0: private StringBuffer rememberedText; aoqi@0: private int startRemember; aoqi@0: aoqi@0: // record if this is a PE, so endParsedEntity won't be called aoqi@0: private boolean isPE; aoqi@0: aoqi@0: // InputStreamReader throws an internal per-read exception, so aoqi@0: // we minimize reads. We also add a byte to compensate for the aoqi@0: // "ungetc" byte we keep, so that our downstream reads are as aoqi@0: // nicely sized as we can make them. aoqi@0: final private static int BUFSIZ = 8 * 1024 + 1; aoqi@0: aoqi@0: final private static char newline [] = {'\n'}; aoqi@0: aoqi@0: public static InputEntity getInputEntity(DTDEventListener h, Locale l) { aoqi@0: InputEntity retval = new InputEntity(); aoqi@0: retval.errHandler = h; aoqi@0: retval.locale = l; aoqi@0: return retval; aoqi@0: } aoqi@0: aoqi@0: private InputEntity() { aoqi@0: } aoqi@0: aoqi@0: // aoqi@0: // predicate: return true iff this is an internal entity reader, aoqi@0: // and so may safely be "popped" as needed. external entities have aoqi@0: // syntax to uphold; internal parameter entities have at most validity aoqi@0: // constraints to monitor. also, only external entities get decent aoqi@0: // location diagnostics. aoqi@0: // aoqi@0: public boolean isInternal() { aoqi@0: return reader == null; aoqi@0: } aoqi@0: aoqi@0: // aoqi@0: // predicate: return true iff this is the toplevel document aoqi@0: // aoqi@0: public boolean isDocument() { aoqi@0: return next == null; aoqi@0: } aoqi@0: aoqi@0: // aoqi@0: // predicate: return true iff this is a PE expansion (so that aoqi@0: // LexicalEventListner.endParsedEntity won't be called) aoqi@0: // aoqi@0: public boolean isParameterEntity() { aoqi@0: return isPE; aoqi@0: } aoqi@0: aoqi@0: // aoqi@0: // return name of current entity aoqi@0: // aoqi@0: public String getName() { aoqi@0: return name; aoqi@0: } aoqi@0: aoqi@0: // aoqi@0: // use this for an external parsed entity aoqi@0: // aoqi@0: public void init(InputSource in, String name, InputEntity stack, aoqi@0: boolean isPE) aoqi@0: throws IOException, SAXException { aoqi@0: aoqi@0: input = in; aoqi@0: this.isPE = isPE; aoqi@0: reader = in.getCharacterStream(); aoqi@0: aoqi@0: if (reader == null) { aoqi@0: InputStream bytes = in.getByteStream(); aoqi@0: aoqi@0: if (bytes == null) aoqi@0: reader = XmlReader.createReader(new URL(in.getSystemId()) aoqi@0: .openStream()); aoqi@0: else if (in.getEncoding() != null) aoqi@0: reader = XmlReader.createReader(in.getByteStream(), aoqi@0: in.getEncoding()); aoqi@0: else aoqi@0: reader = XmlReader.createReader(in.getByteStream()); aoqi@0: } aoqi@0: next = stack; aoqi@0: buf = new char[BUFSIZ]; aoqi@0: this.name = name; aoqi@0: checkRecursion(stack); aoqi@0: } aoqi@0: aoqi@0: // aoqi@0: // use this for an internal parsed entity; buffer is readonly aoqi@0: // aoqi@0: public void init(char b [], String name, InputEntity stack, boolean isPE) aoqi@0: throws SAXException { aoqi@0: aoqi@0: next = stack; aoqi@0: buf = b; aoqi@0: finish = b.length; aoqi@0: this.name = name; aoqi@0: this.isPE = isPE; aoqi@0: checkRecursion(stack); aoqi@0: } aoqi@0: aoqi@0: private void checkRecursion(InputEntity stack) aoqi@0: throws SAXException { aoqi@0: aoqi@0: if (stack == null) aoqi@0: return; aoqi@0: for (stack = stack.next; stack != null; stack = stack.next) { aoqi@0: if (stack.name != null && stack.name.equals(name)) aoqi@0: fatal("P-069", new Object[]{name}); aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: public InputEntity pop() throws IOException { aoqi@0: aoqi@0: // caller has ensured there's nothing left to read aoqi@0: close(); aoqi@0: return next; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * returns true iff there's no more data to consume ... aoqi@0: */ aoqi@0: public boolean isEOF() throws IOException, SAXException { aoqi@0: aoqi@0: // called to ensure WF-ness of included entities and to pop aoqi@0: // input entities appropriately ... EOF is not always legal. aoqi@0: if (start >= finish) { aoqi@0: fillbuf(); aoqi@0: return start >= finish; aoqi@0: } else aoqi@0: return false; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Returns the name of the encoding in use, else null; the name aoqi@0: * returned is in as standard a form as we can get. aoqi@0: */ aoqi@0: public String getEncoding() { aoqi@0: aoqi@0: if (reader == null) aoqi@0: return null; aoqi@0: if (reader instanceof XmlReader) aoqi@0: return ((XmlReader) reader).getEncoding(); aoqi@0: aoqi@0: // XXX prefer a java2std() call to normalize names... aoqi@0: aoqi@0: if (reader instanceof InputStreamReader) aoqi@0: return ((InputStreamReader) reader).getEncoding(); aoqi@0: return null; aoqi@0: } aoqi@0: aoqi@0: aoqi@0: /** aoqi@0: * returns the next name char, or NUL ... faster than getc(), aoqi@0: * and the common "name or nmtoken must be next" case won't aoqi@0: * need ungetc(). aoqi@0: */ aoqi@0: public char getNameChar() throws IOException, SAXException { aoqi@0: aoqi@0: if (finish <= start) aoqi@0: fillbuf(); aoqi@0: if (finish > start) { aoqi@0: char c = buf[start++]; aoqi@0: if (XmlChars.isNameChar(c)) aoqi@0: return c; aoqi@0: start--; aoqi@0: } aoqi@0: return 0; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * gets the next Java character -- might be part of an XML aoqi@0: * text character represented by a surrogate pair, or be aoqi@0: * the end of the entity. aoqi@0: */ aoqi@0: public char getc() throws IOException, SAXException { aoqi@0: aoqi@0: if (finish <= start) aoqi@0: fillbuf(); aoqi@0: if (finish > start) { aoqi@0: char c = buf[start++]; aoqi@0: aoqi@0: // [2] Char ::= #x0009 | #x000A | #x000D aoqi@0: // | [#x0020-#xD7FF] aoqi@0: // | [#xE000-#xFFFD] aoqi@0: // plus surrogate _pairs_ representing [#x10000-#x10ffff] aoqi@0: if (returnedFirstHalf) { aoqi@0: if (c >= 0xdc00 && c <= 0xdfff) { aoqi@0: returnedFirstHalf = false; aoqi@0: return c; aoqi@0: } else aoqi@0: fatal("P-070", new Object[]{Integer.toHexString(c)}); aoqi@0: } aoqi@0: if ((c >= 0x0020 && c <= 0xD7FF) aoqi@0: || c == 0x0009 aoqi@0: // no surrogates! aoqi@0: || (c >= 0xE000 && c <= 0xFFFD)) aoqi@0: return c; aoqi@0: aoqi@0: // aoqi@0: // CRLF and CR are both line ends; map both to LF, and aoqi@0: // keep line count correct. aoqi@0: // aoqi@0: else if (c == '\r' && !isInternal()) { aoqi@0: maybeInCRLF = true; aoqi@0: c = getc(); aoqi@0: if (c != '\n') aoqi@0: ungetc(); aoqi@0: maybeInCRLF = false; aoqi@0: aoqi@0: lineNumber++; aoqi@0: return '\n'; aoqi@0: aoqi@0: } else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF aoqi@0: if (!isInternal() && !maybeInCRLF) aoqi@0: lineNumber++; aoqi@0: return c; aoqi@0: } aoqi@0: aoqi@0: // surrogates... aoqi@0: if (c >= 0xd800 && c < 0xdc00) { aoqi@0: returnedFirstHalf = true; aoqi@0: return c; aoqi@0: } aoqi@0: aoqi@0: fatal("P-071", new Object[]{Integer.toHexString(c)}); aoqi@0: } aoqi@0: throw new EndOfInputException(); aoqi@0: } aoqi@0: aoqi@0: aoqi@0: /** aoqi@0: * lookahead one character aoqi@0: */ aoqi@0: public boolean peekc(char c) throws IOException, SAXException { aoqi@0: aoqi@0: if (finish <= start) aoqi@0: fillbuf(); aoqi@0: if (finish > start) { aoqi@0: if (buf[start] == c) { aoqi@0: start++; aoqi@0: return true; aoqi@0: } else aoqi@0: return false; aoqi@0: } aoqi@0: return false; aoqi@0: } aoqi@0: aoqi@0: aoqi@0: /** aoqi@0: * two character pushback is guaranteed aoqi@0: */ aoqi@0: public void ungetc() { aoqi@0: aoqi@0: if (start == 0) aoqi@0: throw new InternalError("ungetc"); aoqi@0: start--; aoqi@0: aoqi@0: if (buf[start] == '\n' || buf[start] == '\r') { aoqi@0: if (!isInternal()) aoqi@0: lineNumber--; aoqi@0: } else if (returnedFirstHalf) aoqi@0: returnedFirstHalf = false; aoqi@0: } aoqi@0: aoqi@0: aoqi@0: /** aoqi@0: * optional grammatical whitespace (discarded) aoqi@0: */ aoqi@0: public boolean maybeWhitespace() aoqi@0: throws IOException, SAXException { aoqi@0: aoqi@0: char c; aoqi@0: boolean isSpace = false; aoqi@0: boolean sawCR = false; aoqi@0: aoqi@0: // [3] S ::= #20 | #09 | #0D | #0A aoqi@0: for (; ;) { aoqi@0: if (finish <= start) aoqi@0: fillbuf(); aoqi@0: if (finish <= start) aoqi@0: return isSpace; aoqi@0: aoqi@0: c = buf[start++]; aoqi@0: if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') { aoqi@0: isSpace = true; aoqi@0: aoqi@0: // aoqi@0: // CR, LF are line endings ... CLRF is one, not two! aoqi@0: // aoqi@0: if ((c == '\n' || c == '\r') && !isInternal()) { aoqi@0: if (!(c == '\n' && sawCR)) { aoqi@0: lineNumber++; aoqi@0: sawCR = false; aoqi@0: } aoqi@0: if (c == '\r') aoqi@0: sawCR = true; aoqi@0: } aoqi@0: } else { aoqi@0: start--; aoqi@0: return isSpace; aoqi@0: } aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: aoqi@0: /** aoqi@0: * normal content; whitespace in markup may be handled aoqi@0: * specially if the parser uses the content model. aoqi@0: *

aoqi@0: *

content terminates with markup delimiter characters, aoqi@0: * namely ampersand (&) and left angle bracket (<). aoqi@0: *

aoqi@0: *

the document handler's characters() method is called aoqi@0: * on all the content found aoqi@0: */ aoqi@0: public boolean parsedContent(DTDEventListener docHandler aoqi@0: /*ElementValidator validator*/) aoqi@0: throws IOException, SAXException { aoqi@0: aoqi@0: // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) aoqi@0: aoqi@0: int first; // first char to return aoqi@0: int last; // last char to return aoqi@0: boolean sawContent; // sent any chars? aoqi@0: char c; aoqi@0: aoqi@0: // deliver right out of the buffer, until delimiter, EOF, aoqi@0: // or error, refilling as we go aoqi@0: for (first = last = start, sawContent = false; ; last++) { aoqi@0: aoqi@0: // buffer empty? aoqi@0: if (last >= finish) { aoqi@0: if (last > first) { aoqi@0: // validator.text (); aoqi@0: docHandler.characters(buf, first, last - first); aoqi@0: sawContent = true; aoqi@0: start = last; aoqi@0: } aoqi@0: if (isEOF()) // calls fillbuf aoqi@0: return sawContent; aoqi@0: first = start; aoqi@0: last = first - 1; // incremented in loop aoqi@0: continue; aoqi@0: } aoqi@0: aoqi@0: c = buf[last]; aoqi@0: aoqi@0: // aoqi@0: // pass most chars through ASAP; this inlines the code of aoqi@0: // [2] !XmlChars.isChar(c) leaving only characters needing aoqi@0: // special treatment ... line ends, surrogates, and: aoqi@0: // 0x0026 == '&' aoqi@0: // 0x003C == '<' aoqi@0: // 0x005D == ']' aoqi@0: // Comparisons ordered for speed on 'typical' text aoqi@0: // aoqi@0: if ((c > 0x005D && c <= 0xD7FF) // a-z and more aoqi@0: || (c < 0x0026 && c >= 0x0020) // space & punct aoqi@0: || (c > 0x003C && c < 0x005D) // A-Z & punct aoqi@0: || (c > 0x0026 && c < 0x003C) // 0-9 & punct aoqi@0: || c == 0x0009 aoqi@0: || (c >= 0xE000 && c <= 0xFFFD) aoqi@0: ) aoqi@0: continue; aoqi@0: aoqi@0: // terminate on markup delimiters aoqi@0: if (c == '<' || c == '&') aoqi@0: break; aoqi@0: aoqi@0: // count lines aoqi@0: if (c == '\n') { aoqi@0: if (!isInternal()) aoqi@0: lineNumber++; aoqi@0: continue; aoqi@0: } aoqi@0: aoqi@0: // External entities get CR, CRLF --> LF mapping aoqi@0: // Internal ones got it already, and we can't repeat aoqi@0: // else we break char ref handling!! aoqi@0: if (c == '\r') { aoqi@0: if (isInternal()) aoqi@0: continue; aoqi@0: aoqi@0: docHandler.characters(buf, first, last - first); aoqi@0: docHandler.characters(newline, 0, 1); aoqi@0: sawContent = true; aoqi@0: lineNumber++; aoqi@0: if (finish > (last + 1)) { aoqi@0: if (buf[last + 1] == '\n') aoqi@0: last++; aoqi@0: } else { // CR at end of buffer aoqi@0: // XXX case not yet handled: CRLF here will look like two lines aoqi@0: } aoqi@0: first = start = last + 1; aoqi@0: continue; aoqi@0: } aoqi@0: aoqi@0: // ']]>' is a WF error -- must fail if we see it aoqi@0: if (c == ']') { aoqi@0: switch (finish - last) { aoqi@0: // for suspicious end-of-buffer cases, get more data aoqi@0: // into the buffer to rule out this sequence. aoqi@0: case 2: aoqi@0: if (buf[last + 1] != ']') aoqi@0: continue; aoqi@0: // FALLTHROUGH aoqi@0: aoqi@0: case 1: aoqi@0: if (reader == null || isClosed) aoqi@0: continue; aoqi@0: if (last == first) aoqi@0: throw new InternalError("fillbuf"); aoqi@0: last--; aoqi@0: if (last > first) { aoqi@0: // validator.text (); aoqi@0: docHandler.characters(buf, first, last - first); aoqi@0: sawContent = true; aoqi@0: start = last; aoqi@0: } aoqi@0: fillbuf(); aoqi@0: first = last = start; aoqi@0: continue; aoqi@0: aoqi@0: // otherwise any "]]>" would be buffered, and we can aoqi@0: // see right away if that's what we have aoqi@0: default: aoqi@0: if (buf[last + 1] == ']' && buf[last + 2] == '>') aoqi@0: fatal("P-072", null); aoqi@0: continue; aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: // correctly paired surrogates are OK aoqi@0: if (c >= 0xd800 && c <= 0xdfff) { aoqi@0: if ((last + 1) >= finish) { aoqi@0: if (last > first) { aoqi@0: // validator.text (); aoqi@0: docHandler.characters(buf, first, last - first); aoqi@0: sawContent = true; aoqi@0: start = last + 1; aoqi@0: } aoqi@0: if (isEOF()) { // calls fillbuf aoqi@0: fatal("P-081", aoqi@0: new Object[]{Integer.toHexString(c)}); aoqi@0: } aoqi@0: first = start; aoqi@0: last = first; aoqi@0: continue; aoqi@0: } aoqi@0: if (checkSurrogatePair(last)) aoqi@0: last++; aoqi@0: else { aoqi@0: last--; aoqi@0: // also terminate on surrogate pair oddities aoqi@0: break; aoqi@0: } aoqi@0: continue; aoqi@0: } aoqi@0: aoqi@0: fatal("P-071", new Object[]{Integer.toHexString(c)}); aoqi@0: } aoqi@0: if (last == first) aoqi@0: return sawContent; aoqi@0: // validator.text (); aoqi@0: docHandler.characters(buf, first, last - first); aoqi@0: start = last; aoqi@0: return true; aoqi@0: } aoqi@0: aoqi@0: aoqi@0: /** aoqi@0: * CDATA -- character data, terminated by "]]>" and optionally aoqi@0: * including unescaped markup delimiters (ampersand and left angle aoqi@0: * bracket). This should otherwise be exactly like character data, aoqi@0: * modulo differences in error report details. aoqi@0: *

aoqi@0: *

The document handler's characters() or ignorableWhitespace() aoqi@0: * methods are invoked on all the character data found aoqi@0: * aoqi@0: * @param docHandler gets callbacks for character data aoqi@0: * @param ignorableWhitespace if true, whitespace characters will aoqi@0: * be reported using docHandler.ignorableWhitespace(); implicitly, aoqi@0: * non-whitespace characters will cause validation errors aoqi@0: * @param whitespaceInvalidMessage if true, ignorable whitespace aoqi@0: * causes a validity error report as well as a callback aoqi@0: */ aoqi@0: public boolean unparsedContent(DTDEventListener docHandler, aoqi@0: /*ElementValidator validator,*/ aoqi@0: boolean ignorableWhitespace, aoqi@0: String whitespaceInvalidMessage) aoqi@0: throws IOException, SAXException { aoqi@0: aoqi@0: // [18] CDSect ::= CDStart CData CDEnd aoqi@0: // [19] CDStart ::= '' Char*)) aoqi@0: // [21] CDEnd ::= ']]>' aoqi@0: aoqi@0: // caller peeked the leading '<' ... aoqi@0: if (!peek("![CDATA[", null)) aoqi@0: return false; aoqi@0: docHandler.startCDATA(); aoqi@0: aoqi@0: // only a literal ']]>' stops this ... aoqi@0: int last; aoqi@0: aoqi@0: for (; ;) { // until ']]>' seen aoqi@0: boolean done = false; aoqi@0: char c; aoqi@0: aoqi@0: // don't report ignorable whitespace as "text" for aoqi@0: // validation purposes. aoqi@0: boolean white = ignorableWhitespace; aoqi@0: aoqi@0: for (last = start; last < finish; last++) { aoqi@0: c = buf[last]; aoqi@0: aoqi@0: // aoqi@0: // Reject illegal characters. aoqi@0: // aoqi@0: if (!XmlChars.isChar(c)) { aoqi@0: white = false; aoqi@0: if (c >= 0xd800 && c <= 0xdfff) { aoqi@0: if (checkSurrogatePair(last)) { aoqi@0: last++; aoqi@0: continue; aoqi@0: } else { aoqi@0: last--; aoqi@0: break; aoqi@0: } aoqi@0: } aoqi@0: fatal("P-071", new Object[] aoqi@0: {Integer.toHexString(buf[last])}); aoqi@0: } aoqi@0: if (c == '\n') { aoqi@0: if (!isInternal()) aoqi@0: lineNumber++; aoqi@0: continue; aoqi@0: } aoqi@0: if (c == '\r') { aoqi@0: // As above, we can't repeat CR/CRLF --> LF mapping aoqi@0: if (isInternal()) aoqi@0: continue; aoqi@0: aoqi@0: if (white) { aoqi@0: if (whitespaceInvalidMessage != null) aoqi@0: errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale, aoqi@0: whitespaceInvalidMessage), null)); aoqi@0: docHandler.ignorableWhitespace(buf, start, aoqi@0: last - start); aoqi@0: docHandler.ignorableWhitespace(newline, 0, 1); aoqi@0: } else { aoqi@0: // validator.text (); aoqi@0: docHandler.characters(buf, start, last - start); aoqi@0: docHandler.characters(newline, 0, 1); aoqi@0: } aoqi@0: lineNumber++; aoqi@0: if (finish > (last + 1)) { aoqi@0: if (buf[last + 1] == '\n') aoqi@0: last++; aoqi@0: } else { // CR at end of buffer aoqi@0: // XXX case not yet handled ... as above aoqi@0: } aoqi@0: start = last + 1; aoqi@0: continue; aoqi@0: } aoqi@0: if (c != ']') { aoqi@0: if (c != ' ' && c != '\t') aoqi@0: white = false; aoqi@0: continue; aoqi@0: } aoqi@0: if ((last + 2) < finish) { aoqi@0: if (buf[last + 1] == ']' && buf[last + 2] == '>') { aoqi@0: done = true; aoqi@0: break; aoqi@0: } aoqi@0: white = false; aoqi@0: continue; aoqi@0: } else { aoqi@0: //last--; aoqi@0: break; aoqi@0: } aoqi@0: } aoqi@0: if (white) { aoqi@0: if (whitespaceInvalidMessage != null) aoqi@0: errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale, aoqi@0: whitespaceInvalidMessage), null)); aoqi@0: docHandler.ignorableWhitespace(buf, start, last - start); aoqi@0: } else { aoqi@0: // validator.text (); aoqi@0: docHandler.characters(buf, start, last - start); aoqi@0: } aoqi@0: if (done) { aoqi@0: start = last + 3; aoqi@0: break; aoqi@0: } aoqi@0: start = last; aoqi@0: if (isEOF()) aoqi@0: fatal("P-073", null); aoqi@0: } aoqi@0: docHandler.endCDATA(); aoqi@0: return true; aoqi@0: } aoqi@0: aoqi@0: // return false to backstep at end of buffer) aoqi@0: private boolean checkSurrogatePair(int offset) aoqi@0: throws SAXException { aoqi@0: aoqi@0: if ((offset + 1) >= finish) aoqi@0: return false; aoqi@0: aoqi@0: char c1 = buf[offset++]; aoqi@0: char c2 = buf[offset]; aoqi@0: aoqi@0: if ((c1 >= 0xd800 && c1 < 0xdc00) && (c2 >= 0xdc00 && c2 <= 0xdfff)) aoqi@0: return true; aoqi@0: fatal("P-074", new Object[]{ aoqi@0: Integer.toHexString(c1 & 0x0ffff), aoqi@0: Integer.toHexString(c2 & 0x0ffff) aoqi@0: }); aoqi@0: return false; aoqi@0: } aoqi@0: aoqi@0: aoqi@0: /** aoqi@0: * whitespace in markup (flagged to app, discardable) aoqi@0: *

aoqi@0: *

the document handler's ignorableWhitespace() method aoqi@0: * is called on all the whitespace found aoqi@0: */ aoqi@0: public boolean ignorableWhitespace(DTDEventListener handler) aoqi@0: throws IOException, SAXException { aoqi@0: aoqi@0: char c; aoqi@0: boolean isSpace = false; aoqi@0: int first; aoqi@0: aoqi@0: // [3] S ::= #20 | #09 | #0D | #0A aoqi@0: for (first = start; ;) { aoqi@0: if (finish <= start) { aoqi@0: if (isSpace) aoqi@0: handler.ignorableWhitespace(buf, first, start - first); aoqi@0: fillbuf(); aoqi@0: first = start; aoqi@0: } aoqi@0: if (finish <= start) aoqi@0: return isSpace; aoqi@0: aoqi@0: c = buf[start++]; aoqi@0: switch (c) { aoqi@0: case '\n': aoqi@0: if (!isInternal()) aoqi@0: lineNumber++; aoqi@0: // XXX handles Macintosh line endings wrong aoqi@0: // fallthrough aoqi@0: case 0x09: aoqi@0: case 0x20: aoqi@0: isSpace = true; aoqi@0: continue; aoqi@0: aoqi@0: case '\r': aoqi@0: isSpace = true; aoqi@0: if (!isInternal()) aoqi@0: lineNumber++; aoqi@0: handler.ignorableWhitespace(buf, first, aoqi@0: (start - 1) - first); aoqi@0: handler.ignorableWhitespace(newline, 0, 1); aoqi@0: if (start < finish && buf[start] == '\n') aoqi@0: ++start; aoqi@0: first = start; aoqi@0: continue; aoqi@0: aoqi@0: default: aoqi@0: ungetc(); aoqi@0: if (isSpace) aoqi@0: handler.ignorableWhitespace(buf, first, start - first); aoqi@0: return isSpace; aoqi@0: } aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * returns false iff 'next' string isn't as provided, aoqi@0: * else skips that text and returns true. aoqi@0: *

aoqi@0: *

NOTE: two alternative string representations are aoqi@0: * both passed in, since one is faster. aoqi@0: */ aoqi@0: public boolean peek(String next, char chars []) aoqi@0: throws IOException, SAXException { aoqi@0: aoqi@0: int len; aoqi@0: int i; aoqi@0: aoqi@0: if (chars != null) aoqi@0: len = chars.length; aoqi@0: else aoqi@0: len = next.length(); aoqi@0: aoqi@0: // buffer should hold the whole thing ... give it a aoqi@0: // chance for the end-of-buffer case and cope with EOF aoqi@0: // by letting fillbuf compact and fill aoqi@0: if (finish <= start || (finish - start) < len) aoqi@0: fillbuf(); aoqi@0: aoqi@0: // can't peek past EOF aoqi@0: if (finish <= start) aoqi@0: return false; aoqi@0: aoqi@0: // compare the string; consume iff it matches aoqi@0: if (chars != null) { aoqi@0: for (i = 0; i < len && (start + i) < finish; i++) { aoqi@0: if (buf[start + i] != chars[i]) aoqi@0: return false; aoqi@0: } aoqi@0: } else { aoqi@0: for (i = 0; i < len && (start + i) < finish; i++) { aoqi@0: if (buf[start + i] != next.charAt(i)) aoqi@0: return false; aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: // if the first fillbuf didn't get enough data, give aoqi@0: // fillbuf another chance to read aoqi@0: if (i < len) { aoqi@0: if (reader == null || isClosed) aoqi@0: return false; aoqi@0: aoqi@0: // aoqi@0: // This diagnostic "knows" that the only way big strings would aoqi@0: // fail to be peeked is where it's a symbol ... e.g. for an aoqi@0: // construct. That knowledge could also be applied aoqi@0: // to get rid of the symbol length constraint, since having aoqi@0: // the wrong symbol is a fatal error anyway ... aoqi@0: // aoqi@0: if (len > buf.length) aoqi@0: fatal("P-077", new Object[]{new Integer(buf.length)}); aoqi@0: aoqi@0: fillbuf(); aoqi@0: return peek(next, chars); aoqi@0: } aoqi@0: aoqi@0: start += len; aoqi@0: return true; aoqi@0: } aoqi@0: aoqi@0: aoqi@0: // aoqi@0: // Support for reporting the internal DTD subset, so aoqi@0: // declarations can be recreated. This is collected as a single aoqi@0: // string; such subsets are normally small, and many applications aoqi@0: // don't even care about this. aoqi@0: // aoqi@0: public void startRemembering() { aoqi@0: aoqi@0: if (startRemember != 0) aoqi@0: throw new InternalError(); aoqi@0: startRemember = start; aoqi@0: } aoqi@0: aoqi@0: public String rememberText() { aoqi@0: aoqi@0: String retval; aoqi@0: aoqi@0: // If the internal subset crossed a buffer boundary, we aoqi@0: // created a temporary buffer. aoqi@0: if (rememberedText != null) { aoqi@0: rememberedText.append(buf, startRemember, aoqi@0: start - startRemember); aoqi@0: retval = rememberedText.toString(); aoqi@0: } else aoqi@0: retval = new String(buf, startRemember, aoqi@0: start - startRemember); aoqi@0: aoqi@0: startRemember = 0; aoqi@0: rememberedText = null; aoqi@0: return retval; aoqi@0: } aoqi@0: aoqi@0: private InputEntity getTopEntity() { aoqi@0: aoqi@0: InputEntity current = this; aoqi@0: aoqi@0: // don't report locations within internal entities! aoqi@0: aoqi@0: while (current != null && current.input == null) aoqi@0: current = current.next; aoqi@0: return current == null ? this : current; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Returns the public ID of this input source, if known aoqi@0: */ aoqi@0: public String getPublicId() { aoqi@0: aoqi@0: InputEntity where = getTopEntity(); aoqi@0: if (where == this) aoqi@0: return input.getPublicId(); aoqi@0: return where.getPublicId(); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Returns the system ID of this input source, if known aoqi@0: */ aoqi@0: public String getSystemId() { aoqi@0: aoqi@0: InputEntity where = getTopEntity(); aoqi@0: if (where == this) aoqi@0: return input.getSystemId(); aoqi@0: return where.getSystemId(); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Returns the current line number in this input source aoqi@0: */ aoqi@0: public int getLineNumber() { aoqi@0: aoqi@0: InputEntity where = getTopEntity(); aoqi@0: if (where == this) aoqi@0: return lineNumber; aoqi@0: return where.getLineNumber(); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * returns -1; maintaining column numbers hurts performance aoqi@0: */ aoqi@0: public int getColumnNumber() { aoqi@0: aoqi@0: return -1; // not maintained (speed) aoqi@0: } aoqi@0: aoqi@0: aoqi@0: // aoqi@0: // n.b. for non-EOF end-of-buffer cases, reader should return aoqi@0: // at least a handful of bytes so various lookaheads behave. aoqi@0: // aoqi@0: // two character pushback exists except at first; characters aoqi@0: // represented by surrogate pairs can't be pushed back (they'd aoqi@0: // only be in character data anyway). aoqi@0: // aoqi@0: // DTD exception thrown on char conversion problems; line number aoqi@0: // will be low, as a rule. aoqi@0: // aoqi@0: private void fillbuf() throws IOException, SAXException { aoqi@0: aoqi@0: // don't touched fixed buffers, that'll usually aoqi@0: // change entity values (and isn't needed anyway) aoqi@0: // likewise, ignore closed streams aoqi@0: if (reader == null || isClosed) aoqi@0: return; aoqi@0: aoqi@0: // if remembering DTD text, copy! aoqi@0: if (startRemember != 0) { aoqi@0: if (rememberedText == null) aoqi@0: rememberedText = new StringBuffer(buf.length); aoqi@0: rememberedText.append(buf, startRemember, aoqi@0: start - startRemember); aoqi@0: } aoqi@0: aoqi@0: boolean extra = (finish > 0) && (start > 0); aoqi@0: int len; aoqi@0: aoqi@0: if (extra) // extra pushback aoqi@0: start--; aoqi@0: len = finish - start; aoqi@0: aoqi@0: System.arraycopy(buf, start, buf, 0, len); aoqi@0: start = 0; aoqi@0: finish = len; aoqi@0: aoqi@0: try { aoqi@0: len = buf.length - len; aoqi@0: len = reader.read(buf, finish, len); aoqi@0: } catch (UnsupportedEncodingException e) { aoqi@0: fatal("P-075", new Object[]{e.getMessage()}); aoqi@0: } catch (CharConversionException e) { aoqi@0: fatal("P-076", new Object[]{e.getMessage()}); aoqi@0: } aoqi@0: if (len >= 0) aoqi@0: finish += len; aoqi@0: else aoqi@0: close(); aoqi@0: if (extra) // extra pushback aoqi@0: start++; aoqi@0: aoqi@0: if (startRemember != 0) aoqi@0: // assert extra == true aoqi@0: startRemember = 1; aoqi@0: } aoqi@0: aoqi@0: public void close() { aoqi@0: aoqi@0: try { aoqi@0: if (reader != null && !isClosed) aoqi@0: reader.close(); aoqi@0: isClosed = true; aoqi@0: } catch (IOException e) { aoqi@0: /* NOTHING */ aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: aoqi@0: private void fatal(String messageId, Object params []) aoqi@0: throws SAXException { aoqi@0: aoqi@0: SAXParseException x = new SAXParseException(DTDParser.messages.getMessage(locale, messageId, params), null); aoqi@0: aoqi@0: // not continuable ... e.g. WF errors aoqi@0: close(); aoqi@0: errHandler.fatalError(x); aoqi@0: throw x; aoqi@0: } aoqi@0: }