ohair@286: /* ohair@286: * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved. ohair@286: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ohair@286: * ohair@286: * This code is free software; you can redistribute it and/or modify it ohair@286: * under the terms of the GNU General Public License version 2 only, as ohair@286: * published by the Free Software Foundation. Oracle designates this ohair@286: * particular file as subject to the "Classpath" exception as provided ohair@286: * by Oracle in the LICENSE file that accompanied this code. ohair@286: * ohair@286: * This code is distributed in the hope that it will be useful, but WITHOUT ohair@286: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ohair@286: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ohair@286: * version 2 for more details (a copy is included in the LICENSE file that ohair@286: * accompanied this code). ohair@286: * ohair@286: * You should have received a copy of the GNU General Public License version ohair@286: * 2 along with this work; if not, write to the Free Software Foundation, ohair@286: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ohair@286: * ohair@286: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ohair@286: * or visit www.oracle.com if you need additional information or have any ohair@286: * questions. ohair@286: */ ohair@286: ohair@286: package com.sun.xml.internal.dtdparser; ohair@286: ohair@286: import org.xml.sax.InputSource; ohair@286: import org.xml.sax.SAXException; ohair@286: import org.xml.sax.SAXParseException; ohair@286: ohair@286: import java.io.CharConversionException; ohair@286: import java.io.IOException; ohair@286: import java.io.InputStream; ohair@286: import java.io.InputStreamReader; ohair@286: import java.io.Reader; ohair@286: import java.io.UnsupportedEncodingException; ohair@286: import java.net.URL; ohair@286: import java.util.Locale; ohair@286: ohair@286: /** ohair@286: * This is how the parser talks to its input entities, of all kinds. ohair@286: * The entities are in a stack. ohair@286: *

ohair@286: *

For internal entities, the character arrays are referenced here, ohair@286: * and read from as needed (they're read-only). External entities have ohair@286: * mutable buffers, that are read into as needed. ohair@286: *

ohair@286: *

Note: This maps CRLF (and CR) to LF without regard for ohair@286: * whether it's in an external (parsed) entity or not. The XML 1.0 spec ohair@286: * is inconsistent in explaining EOL handling; this is the sensible way. ohair@286: * ohair@286: * @author David Brownell ohair@286: * @author Janet Koenig ohair@286: * @version 1.4 00/08/05 ohair@286: */ ohair@286: public class InputEntity { ohair@286: private int start, finish; ohair@286: private char buf []; ohair@286: private int lineNumber = 1; ohair@286: private boolean returnedFirstHalf = false; ohair@286: private boolean maybeInCRLF = false; ohair@286: ohair@286: // name of entity (never main document or unnamed DTD PE) ohair@286: private String name; ohair@286: ohair@286: private InputEntity next; ohair@286: ohair@286: // for system and public IDs in diagnostics ohair@286: private InputSource input; ohair@286: ohair@286: // this is a buffer; some buffers can be replenished. ohair@286: private Reader reader; ohair@286: private boolean isClosed; ohair@286: ohair@286: private DTDEventListener errHandler; ohair@286: private Locale locale; ohair@286: ohair@286: private StringBuffer rememberedText; ohair@286: private int startRemember; ohair@286: ohair@286: // record if this is a PE, so endParsedEntity won't be called ohair@286: private boolean isPE; ohair@286: ohair@286: // InputStreamReader throws an internal per-read exception, so ohair@286: // we minimize reads. We also add a byte to compensate for the ohair@286: // "ungetc" byte we keep, so that our downstream reads are as ohair@286: // nicely sized as we can make them. ohair@286: final private static int BUFSIZ = 8 * 1024 + 1; ohair@286: ohair@286: final private static char newline [] = {'\n'}; ohair@286: ohair@286: public static InputEntity getInputEntity(DTDEventListener h, Locale l) { ohair@286: InputEntity retval = new InputEntity(); ohair@286: retval.errHandler = h; ohair@286: retval.locale = l; ohair@286: return retval; ohair@286: } ohair@286: ohair@286: private InputEntity() { ohair@286: } ohair@286: ohair@286: // ohair@286: // predicate: return true iff this is an internal entity reader, ohair@286: // and so may safely be "popped" as needed. external entities have ohair@286: // syntax to uphold; internal parameter entities have at most validity ohair@286: // constraints to monitor. also, only external entities get decent ohair@286: // location diagnostics. ohair@286: // ohair@286: public boolean isInternal() { ohair@286: return reader == null; ohair@286: } ohair@286: ohair@286: // ohair@286: // predicate: return true iff this is the toplevel document ohair@286: // ohair@286: public boolean isDocument() { ohair@286: return next == null; ohair@286: } ohair@286: ohair@286: // ohair@286: // predicate: return true iff this is a PE expansion (so that ohair@286: // LexicalEventListner.endParsedEntity won't be called) ohair@286: // ohair@286: public boolean isParameterEntity() { ohair@286: return isPE; ohair@286: } ohair@286: ohair@286: // ohair@286: // return name of current entity ohair@286: // ohair@286: public String getName() { ohair@286: return name; ohair@286: } ohair@286: ohair@286: // ohair@286: // use this for an external parsed entity ohair@286: // ohair@286: public void init(InputSource in, String name, InputEntity stack, ohair@286: boolean isPE) ohair@286: throws IOException, SAXException { ohair@286: ohair@286: input = in; ohair@286: this.isPE = isPE; ohair@286: reader = in.getCharacterStream(); ohair@286: ohair@286: if (reader == null) { ohair@286: InputStream bytes = in.getByteStream(); ohair@286: ohair@286: if (bytes == null) ohair@286: reader = XmlReader.createReader(new URL(in.getSystemId()) ohair@286: .openStream()); ohair@286: else if (in.getEncoding() != null) ohair@286: reader = XmlReader.createReader(in.getByteStream(), ohair@286: in.getEncoding()); ohair@286: else ohair@286: reader = XmlReader.createReader(in.getByteStream()); ohair@286: } ohair@286: next = stack; ohair@286: buf = new char[BUFSIZ]; ohair@286: this.name = name; ohair@286: checkRecursion(stack); ohair@286: } ohair@286: ohair@286: // ohair@286: // use this for an internal parsed entity; buffer is readonly ohair@286: // ohair@286: public void init(char b [], String name, InputEntity stack, boolean isPE) ohair@286: throws SAXException { ohair@286: ohair@286: next = stack; ohair@286: buf = b; ohair@286: finish = b.length; ohair@286: this.name = name; ohair@286: this.isPE = isPE; ohair@286: checkRecursion(stack); ohair@286: } ohair@286: ohair@286: private void checkRecursion(InputEntity stack) ohair@286: throws SAXException { ohair@286: ohair@286: if (stack == null) ohair@286: return; ohair@286: for (stack = stack.next; stack != null; stack = stack.next) { ohair@286: if (stack.name != null && stack.name.equals(name)) ohair@286: fatal("P-069", new Object[]{name}); ohair@286: } ohair@286: } ohair@286: ohair@286: public InputEntity pop() throws IOException { ohair@286: ohair@286: // caller has ensured there's nothing left to read ohair@286: close(); ohair@286: return next; ohair@286: } ohair@286: ohair@286: /** ohair@286: * returns true iff there's no more data to consume ... ohair@286: */ ohair@286: public boolean isEOF() throws IOException, SAXException { ohair@286: ohair@286: // called to ensure WF-ness of included entities and to pop ohair@286: // input entities appropriately ... EOF is not always legal. ohair@286: if (start >= finish) { ohair@286: fillbuf(); ohair@286: return start >= finish; ohair@286: } else ohair@286: return false; ohair@286: } ohair@286: ohair@286: /** ohair@286: * Returns the name of the encoding in use, else null; the name ohair@286: * returned is in as standard a form as we can get. ohair@286: */ ohair@286: public String getEncoding() { ohair@286: ohair@286: if (reader == null) ohair@286: return null; ohair@286: if (reader instanceof XmlReader) ohair@286: return ((XmlReader) reader).getEncoding(); ohair@286: ohair@286: // XXX prefer a java2std() call to normalize names... ohair@286: ohair@286: if (reader instanceof InputStreamReader) ohair@286: return ((InputStreamReader) reader).getEncoding(); ohair@286: return null; ohair@286: } ohair@286: ohair@286: ohair@286: /** ohair@286: * returns the next name char, or NUL ... faster than getc(), ohair@286: * and the common "name or nmtoken must be next" case won't ohair@286: * need ungetc(). ohair@286: */ ohair@286: public char getNameChar() throws IOException, SAXException { ohair@286: ohair@286: if (finish <= start) ohair@286: fillbuf(); ohair@286: if (finish > start) { ohair@286: char c = buf[start++]; ohair@286: if (XmlChars.isNameChar(c)) ohair@286: return c; ohair@286: start--; ohair@286: } ohair@286: return 0; ohair@286: } ohair@286: ohair@286: /** ohair@286: * gets the next Java character -- might be part of an XML ohair@286: * text character represented by a surrogate pair, or be ohair@286: * the end of the entity. ohair@286: */ ohair@286: public char getc() throws IOException, SAXException { ohair@286: ohair@286: if (finish <= start) ohair@286: fillbuf(); ohair@286: if (finish > start) { ohair@286: char c = buf[start++]; ohair@286: ohair@286: // [2] Char ::= #x0009 | #x000A | #x000D ohair@286: // | [#x0020-#xD7FF] ohair@286: // | [#xE000-#xFFFD] ohair@286: // plus surrogate _pairs_ representing [#x10000-#x10ffff] ohair@286: if (returnedFirstHalf) { ohair@286: if (c >= 0xdc00 && c <= 0xdfff) { ohair@286: returnedFirstHalf = false; ohair@286: return c; ohair@286: } else ohair@286: fatal("P-070", new Object[]{Integer.toHexString(c)}); ohair@286: } ohair@286: if ((c >= 0x0020 && c <= 0xD7FF) ohair@286: || c == 0x0009 ohair@286: // no surrogates! ohair@286: || (c >= 0xE000 && c <= 0xFFFD)) ohair@286: return c; ohair@286: ohair@286: // ohair@286: // CRLF and CR are both line ends; map both to LF, and ohair@286: // keep line count correct. ohair@286: // ohair@286: else if (c == '\r' && !isInternal()) { ohair@286: maybeInCRLF = true; ohair@286: c = getc(); ohair@286: if (c != '\n') ohair@286: ungetc(); ohair@286: maybeInCRLF = false; ohair@286: ohair@286: lineNumber++; ohair@286: return '\n'; ohair@286: ohair@286: } else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF ohair@286: if (!isInternal() && !maybeInCRLF) ohair@286: lineNumber++; ohair@286: return c; ohair@286: } ohair@286: ohair@286: // surrogates... ohair@286: if (c >= 0xd800 && c < 0xdc00) { ohair@286: returnedFirstHalf = true; ohair@286: return c; ohair@286: } ohair@286: ohair@286: fatal("P-071", new Object[]{Integer.toHexString(c)}); ohair@286: } ohair@286: throw new EndOfInputException(); ohair@286: } ohair@286: ohair@286: ohair@286: /** ohair@286: * lookahead one character ohair@286: */ ohair@286: public boolean peekc(char c) throws IOException, SAXException { ohair@286: ohair@286: if (finish <= start) ohair@286: fillbuf(); ohair@286: if (finish > start) { ohair@286: if (buf[start] == c) { ohair@286: start++; ohair@286: return true; ohair@286: } else ohair@286: return false; ohair@286: } ohair@286: return false; ohair@286: } ohair@286: ohair@286: ohair@286: /** ohair@286: * two character pushback is guaranteed ohair@286: */ ohair@286: public void ungetc() { ohair@286: ohair@286: if (start == 0) ohair@286: throw new InternalError("ungetc"); ohair@286: start--; ohair@286: ohair@286: if (buf[start] == '\n' || buf[start] == '\r') { ohair@286: if (!isInternal()) ohair@286: lineNumber--; ohair@286: } else if (returnedFirstHalf) ohair@286: returnedFirstHalf = false; ohair@286: } ohair@286: ohair@286: ohair@286: /** ohair@286: * optional grammatical whitespace (discarded) ohair@286: */ ohair@286: public boolean maybeWhitespace() ohair@286: throws IOException, SAXException { ohair@286: ohair@286: char c; ohair@286: boolean isSpace = false; ohair@286: boolean sawCR = false; ohair@286: ohair@286: // [3] S ::= #20 | #09 | #0D | #0A ohair@286: for (; ;) { ohair@286: if (finish <= start) ohair@286: fillbuf(); ohair@286: if (finish <= start) ohair@286: return isSpace; ohair@286: ohair@286: c = buf[start++]; ohair@286: if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') { ohair@286: isSpace = true; ohair@286: ohair@286: // ohair@286: // CR, LF are line endings ... CLRF is one, not two! ohair@286: // ohair@286: if ((c == '\n' || c == '\r') && !isInternal()) { ohair@286: if (!(c == '\n' && sawCR)) { ohair@286: lineNumber++; ohair@286: sawCR = false; ohair@286: } ohair@286: if (c == '\r') ohair@286: sawCR = true; ohair@286: } ohair@286: } else { ohair@286: start--; ohair@286: return isSpace; ohair@286: } ohair@286: } ohair@286: } ohair@286: ohair@286: ohair@286: /** ohair@286: * normal content; whitespace in markup may be handled ohair@286: * specially if the parser uses the content model. ohair@286: *

ohair@286: *

content terminates with markup delimiter characters, ohair@286: * namely ampersand (&amp;) and left angle bracket (&lt;). ohair@286: *

ohair@286: *

the document handler's characters() method is called ohair@286: * on all the content found ohair@286: */ ohair@286: public boolean parsedContent(DTDEventListener docHandler ohair@286: /*ElementValidator validator*/) ohair@286: throws IOException, SAXException { ohair@286: ohair@286: // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) ohair@286: ohair@286: int first; // first char to return ohair@286: int last; // last char to return ohair@286: boolean sawContent; // sent any chars? ohair@286: char c; ohair@286: ohair@286: // deliver right out of the buffer, until delimiter, EOF, ohair@286: // or error, refilling as we go ohair@286: for (first = last = start, sawContent = false; ; last++) { ohair@286: ohair@286: // buffer empty? ohair@286: if (last >= finish) { ohair@286: if (last > first) { ohair@286: // validator.text (); ohair@286: docHandler.characters(buf, first, last - first); ohair@286: sawContent = true; ohair@286: start = last; ohair@286: } ohair@286: if (isEOF()) // calls fillbuf ohair@286: return sawContent; ohair@286: first = start; ohair@286: last = first - 1; // incremented in loop ohair@286: continue; ohair@286: } ohair@286: ohair@286: c = buf[last]; ohair@286: ohair@286: // ohair@286: // pass most chars through ASAP; this inlines the code of ohair@286: // [2] !XmlChars.isChar(c) leaving only characters needing ohair@286: // special treatment ... line ends, surrogates, and: ohair@286: // 0x0026 == '&' ohair@286: // 0x003C == '<' ohair@286: // 0x005D == ']' ohair@286: // Comparisons ordered for speed on 'typical' text ohair@286: // ohair@286: if ((c > 0x005D && c <= 0xD7FF) // a-z and more ohair@286: || (c < 0x0026 && c >= 0x0020) // space & punct ohair@286: || (c > 0x003C && c < 0x005D) // A-Z & punct ohair@286: || (c > 0x0026 && c < 0x003C) // 0-9 & punct ohair@286: || c == 0x0009 ohair@286: || (c >= 0xE000 && c <= 0xFFFD) ohair@286: ) ohair@286: continue; ohair@286: ohair@286: // terminate on markup delimiters ohair@286: if (c == '<' || c == '&') ohair@286: break; ohair@286: ohair@286: // count lines ohair@286: if (c == '\n') { ohair@286: if (!isInternal()) ohair@286: lineNumber++; ohair@286: continue; ohair@286: } ohair@286: ohair@286: // External entities get CR, CRLF --> LF mapping ohair@286: // Internal ones got it already, and we can't repeat ohair@286: // else we break char ref handling!! ohair@286: if (c == '\r') { ohair@286: if (isInternal()) ohair@286: continue; ohair@286: ohair@286: docHandler.characters(buf, first, last - first); ohair@286: docHandler.characters(newline, 0, 1); ohair@286: sawContent = true; ohair@286: lineNumber++; ohair@286: if (finish > (last + 1)) { ohair@286: if (buf[last + 1] == '\n') ohair@286: last++; ohair@286: } else { // CR at end of buffer ohair@286: // XXX case not yet handled: CRLF here will look like two lines ohair@286: } ohair@286: first = start = last + 1; ohair@286: continue; ohair@286: } ohair@286: ohair@286: // ']]>' is a WF error -- must fail if we see it ohair@286: if (c == ']') { ohair@286: switch (finish - last) { ohair@286: // for suspicious end-of-buffer cases, get more data ohair@286: // into the buffer to rule out this sequence. ohair@286: case 2: ohair@286: if (buf[last + 1] != ']') ohair@286: continue; ohair@286: // FALLTHROUGH ohair@286: ohair@286: case 1: ohair@286: if (reader == null || isClosed) ohair@286: continue; ohair@286: if (last == first) ohair@286: throw new InternalError("fillbuf"); ohair@286: last--; ohair@286: if (last > first) { ohair@286: // validator.text (); ohair@286: docHandler.characters(buf, first, last - first); ohair@286: sawContent = true; ohair@286: start = last; ohair@286: } ohair@286: fillbuf(); ohair@286: first = last = start; ohair@286: continue; ohair@286: ohair@286: // otherwise any "]]>" would be buffered, and we can ohair@286: // see right away if that's what we have ohair@286: default: ohair@286: if (buf[last + 1] == ']' && buf[last + 2] == '>') ohair@286: fatal("P-072", null); ohair@286: continue; ohair@286: } ohair@286: } ohair@286: ohair@286: // correctly paired surrogates are OK ohair@286: if (c >= 0xd800 && c <= 0xdfff) { ohair@286: if ((last + 1) >= finish) { ohair@286: if (last > first) { ohair@286: // validator.text (); ohair@286: docHandler.characters(buf, first, last - first); ohair@286: sawContent = true; ohair@286: start = last + 1; ohair@286: } ohair@286: if (isEOF()) { // calls fillbuf ohair@286: fatal("P-081", ohair@286: new Object[]{Integer.toHexString(c)}); ohair@286: } ohair@286: first = start; ohair@286: last = first; ohair@286: continue; ohair@286: } ohair@286: if (checkSurrogatePair(last)) ohair@286: last++; ohair@286: else { ohair@286: last--; ohair@286: // also terminate on surrogate pair oddities ohair@286: break; ohair@286: } ohair@286: continue; ohair@286: } ohair@286: ohair@286: fatal("P-071", new Object[]{Integer.toHexString(c)}); ohair@286: } ohair@286: if (last == first) ohair@286: return sawContent; ohair@286: // validator.text (); ohair@286: docHandler.characters(buf, first, last - first); ohair@286: start = last; ohair@286: return true; ohair@286: } ohair@286: ohair@286: ohair@286: /** ohair@286: * CDATA -- character data, terminated by "]]>" and optionally ohair@286: * including unescaped markup delimiters (ampersand and left angle ohair@286: * bracket). This should otherwise be exactly like character data, ohair@286: * modulo differences in error report details. ohair@286: *

ohair@286: *

The document handler's characters() or ignorableWhitespace() ohair@286: * methods are invoked on all the character data found ohair@286: * ohair@286: * @param docHandler gets callbacks for character data ohair@286: * @param ignorableWhitespace if true, whitespace characters will ohair@286: * be reported using docHandler.ignorableWhitespace(); implicitly, ohair@286: * non-whitespace characters will cause validation errors ohair@286: * @param whitespaceInvalidMessage if true, ignorable whitespace ohair@286: * causes a validity error report as well as a callback ohair@286: */ ohair@286: public boolean unparsedContent(DTDEventListener docHandler, ohair@286: /*ElementValidator validator,*/ ohair@286: boolean ignorableWhitespace, ohair@286: String whitespaceInvalidMessage) ohair@286: throws IOException, SAXException { ohair@286: ohair@286: // [18] CDSect ::= CDStart CData CDEnd ohair@286: // [19] CDStart ::= '' Char*)) ohair@286: // [21] CDEnd ::= ']]>' ohair@286: ohair@286: // caller peeked the leading '<' ... ohair@286: if (!peek("![CDATA[", null)) ohair@286: return false; ohair@286: docHandler.startCDATA(); ohair@286: ohair@286: // only a literal ']]>' stops this ... ohair@286: int last; ohair@286: ohair@286: for (; ;) { // until ']]>' seen ohair@286: boolean done = false; ohair@286: char c; ohair@286: ohair@286: // don't report ignorable whitespace as "text" for ohair@286: // validation purposes. ohair@286: boolean white = ignorableWhitespace; ohair@286: ohair@286: for (last = start; last < finish; last++) { ohair@286: c = buf[last]; ohair@286: ohair@286: // ohair@286: // Reject illegal characters. ohair@286: // ohair@286: if (!XmlChars.isChar(c)) { ohair@286: white = false; ohair@286: if (c >= 0xd800 && c <= 0xdfff) { ohair@286: if (checkSurrogatePair(last)) { ohair@286: last++; ohair@286: continue; ohair@286: } else { ohair@286: last--; ohair@286: break; ohair@286: } ohair@286: } ohair@286: fatal("P-071", new Object[] ohair@286: {Integer.toHexString(buf[last])}); ohair@286: } ohair@286: if (c == '\n') { ohair@286: if (!isInternal()) ohair@286: lineNumber++; ohair@286: continue; ohair@286: } ohair@286: if (c == '\r') { ohair@286: // As above, we can't repeat CR/CRLF --> LF mapping ohair@286: if (isInternal()) ohair@286: continue; ohair@286: ohair@286: if (white) { ohair@286: if (whitespaceInvalidMessage != null) ohair@286: errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale, ohair@286: whitespaceInvalidMessage), null)); ohair@286: docHandler.ignorableWhitespace(buf, start, ohair@286: last - start); ohair@286: docHandler.ignorableWhitespace(newline, 0, 1); ohair@286: } else { ohair@286: // validator.text (); ohair@286: docHandler.characters(buf, start, last - start); ohair@286: docHandler.characters(newline, 0, 1); ohair@286: } ohair@286: lineNumber++; ohair@286: if (finish > (last + 1)) { ohair@286: if (buf[last + 1] == '\n') ohair@286: last++; ohair@286: } else { // CR at end of buffer ohair@286: // XXX case not yet handled ... as above ohair@286: } ohair@286: start = last + 1; ohair@286: continue; ohair@286: } ohair@286: if (c != ']') { ohair@286: if (c != ' ' && c != '\t') ohair@286: white = false; ohair@286: continue; ohair@286: } ohair@286: if ((last + 2) < finish) { ohair@286: if (buf[last + 1] == ']' && buf[last + 2] == '>') { ohair@286: done = true; ohair@286: break; ohair@286: } ohair@286: white = false; ohair@286: continue; ohair@286: } else { ohair@286: //last--; ohair@286: break; ohair@286: } ohair@286: } ohair@286: if (white) { ohair@286: if (whitespaceInvalidMessage != null) ohair@286: errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale, ohair@286: whitespaceInvalidMessage), null)); ohair@286: docHandler.ignorableWhitespace(buf, start, last - start); ohair@286: } else { ohair@286: // validator.text (); ohair@286: docHandler.characters(buf, start, last - start); ohair@286: } ohair@286: if (done) { ohair@286: start = last + 3; ohair@286: break; ohair@286: } ohair@286: start = last; ohair@286: if (isEOF()) ohair@286: fatal("P-073", null); ohair@286: } ohair@286: docHandler.endCDATA(); ohair@286: return true; ohair@286: } ohair@286: ohair@286: // return false to backstep at end of buffer) ohair@286: private boolean checkSurrogatePair(int offset) ohair@286: throws SAXException { ohair@286: ohair@286: if ((offset + 1) >= finish) ohair@286: return false; ohair@286: ohair@286: char c1 = buf[offset++]; ohair@286: char c2 = buf[offset]; ohair@286: ohair@286: if ((c1 >= 0xd800 && c1 < 0xdc00) && (c2 >= 0xdc00 && c2 <= 0xdfff)) ohair@286: return true; ohair@286: fatal("P-074", new Object[]{ ohair@286: Integer.toHexString(c1 & 0x0ffff), ohair@286: Integer.toHexString(c2 & 0x0ffff) ohair@286: }); ohair@286: return false; ohair@286: } ohair@286: ohair@286: ohair@286: /** ohair@286: * whitespace in markup (flagged to app, discardable) ohair@286: *

ohair@286: *

the document handler's ignorableWhitespace() method ohair@286: * is called on all the whitespace found ohair@286: */ ohair@286: public boolean ignorableWhitespace(DTDEventListener handler) ohair@286: throws IOException, SAXException { ohair@286: ohair@286: char c; ohair@286: boolean isSpace = false; ohair@286: int first; ohair@286: ohair@286: // [3] S ::= #20 | #09 | #0D | #0A ohair@286: for (first = start; ;) { ohair@286: if (finish <= start) { ohair@286: if (isSpace) ohair@286: handler.ignorableWhitespace(buf, first, start - first); ohair@286: fillbuf(); ohair@286: first = start; ohair@286: } ohair@286: if (finish <= start) ohair@286: return isSpace; ohair@286: ohair@286: c = buf[start++]; ohair@286: switch (c) { ohair@286: case '\n': ohair@286: if (!isInternal()) ohair@286: lineNumber++; ohair@286: // XXX handles Macintosh line endings wrong ohair@286: // fallthrough ohair@286: case 0x09: ohair@286: case 0x20: ohair@286: isSpace = true; ohair@286: continue; ohair@286: ohair@286: case '\r': ohair@286: isSpace = true; ohair@286: if (!isInternal()) ohair@286: lineNumber++; ohair@286: handler.ignorableWhitespace(buf, first, ohair@286: (start - 1) - first); ohair@286: handler.ignorableWhitespace(newline, 0, 1); ohair@286: if (start < finish && buf[start] == '\n') ohair@286: ++start; ohair@286: first = start; ohair@286: continue; ohair@286: ohair@286: default: ohair@286: ungetc(); ohair@286: if (isSpace) ohair@286: handler.ignorableWhitespace(buf, first, start - first); ohair@286: return isSpace; ohair@286: } ohair@286: } ohair@286: } ohair@286: ohair@286: /** ohair@286: * returns false iff 'next' string isn't as provided, ohair@286: * else skips that text and returns true. ohair@286: *

ohair@286: *

NOTE: two alternative string representations are ohair@286: * both passed in, since one is faster. ohair@286: */ ohair@286: public boolean peek(String next, char chars []) ohair@286: throws IOException, SAXException { ohair@286: ohair@286: int len; ohair@286: int i; ohair@286: ohair@286: if (chars != null) ohair@286: len = chars.length; ohair@286: else ohair@286: len = next.length(); ohair@286: ohair@286: // buffer should hold the whole thing ... give it a ohair@286: // chance for the end-of-buffer case and cope with EOF ohair@286: // by letting fillbuf compact and fill ohair@286: if (finish <= start || (finish - start) < len) ohair@286: fillbuf(); ohair@286: ohair@286: // can't peek past EOF ohair@286: if (finish <= start) ohair@286: return false; ohair@286: ohair@286: // compare the string; consume iff it matches ohair@286: if (chars != null) { ohair@286: for (i = 0; i < len && (start + i) < finish; i++) { ohair@286: if (buf[start + i] != chars[i]) ohair@286: return false; ohair@286: } ohair@286: } else { ohair@286: for (i = 0; i < len && (start + i) < finish; i++) { ohair@286: if (buf[start + i] != next.charAt(i)) ohair@286: return false; ohair@286: } ohair@286: } ohair@286: ohair@286: // if the first fillbuf didn't get enough data, give ohair@286: // fillbuf another chance to read ohair@286: if (i < len) { ohair@286: if (reader == null || isClosed) ohair@286: return false; ohair@286: ohair@286: // ohair@286: // This diagnostic "knows" that the only way big strings would ohair@286: // fail to be peeked is where it's a symbol ... e.g. for an ohair@286: // construct. That knowledge could also be applied ohair@286: // to get rid of the symbol length constraint, since having ohair@286: // the wrong symbol is a fatal error anyway ... ohair@286: // ohair@286: if (len > buf.length) ohair@286: fatal("P-077", new Object[]{new Integer(buf.length)}); ohair@286: ohair@286: fillbuf(); ohair@286: return peek(next, chars); ohair@286: } ohair@286: ohair@286: start += len; ohair@286: return true; ohair@286: } ohair@286: ohair@286: ohair@286: // ohair@286: // Support for reporting the internal DTD subset, so ohair@286: // declarations can be recreated. This is collected as a single ohair@286: // string; such subsets are normally small, and many applications ohair@286: // don't even care about this. ohair@286: // ohair@286: public void startRemembering() { ohair@286: ohair@286: if (startRemember != 0) ohair@286: throw new InternalError(); ohair@286: startRemember = start; ohair@286: } ohair@286: ohair@286: public String rememberText() { ohair@286: ohair@286: String retval; ohair@286: ohair@286: // If the internal subset crossed a buffer boundary, we ohair@286: // created a temporary buffer. ohair@286: if (rememberedText != null) { ohair@286: rememberedText.append(buf, startRemember, ohair@286: start - startRemember); ohair@286: retval = rememberedText.toString(); ohair@286: } else ohair@286: retval = new String(buf, startRemember, ohair@286: start - startRemember); ohair@286: ohair@286: startRemember = 0; ohair@286: rememberedText = null; ohair@286: return retval; ohair@286: } ohair@286: ohair@286: private InputEntity getTopEntity() { ohair@286: ohair@286: InputEntity current = this; ohair@286: ohair@286: // don't report locations within internal entities! ohair@286: ohair@286: while (current != null && current.input == null) ohair@286: current = current.next; ohair@286: return current == null ? this : current; ohair@286: } ohair@286: ohair@286: /** ohair@286: * Returns the public ID of this input source, if known ohair@286: */ ohair@286: public String getPublicId() { ohair@286: ohair@286: InputEntity where = getTopEntity(); ohair@286: if (where == this) ohair@286: return input.getPublicId(); ohair@286: return where.getPublicId(); ohair@286: } ohair@286: ohair@286: /** ohair@286: * Returns the system ID of this input source, if known ohair@286: */ ohair@286: public String getSystemId() { ohair@286: ohair@286: InputEntity where = getTopEntity(); ohair@286: if (where == this) ohair@286: return input.getSystemId(); ohair@286: return where.getSystemId(); ohair@286: } ohair@286: ohair@286: /** ohair@286: * Returns the current line number in this input source ohair@286: */ ohair@286: public int getLineNumber() { ohair@286: ohair@286: InputEntity where = getTopEntity(); ohair@286: if (where == this) ohair@286: return lineNumber; ohair@286: return where.getLineNumber(); ohair@286: } ohair@286: ohair@286: /** ohair@286: * returns -1; maintaining column numbers hurts performance ohair@286: */ ohair@286: public int getColumnNumber() { ohair@286: ohair@286: return -1; // not maintained (speed) ohair@286: } ohair@286: ohair@286: ohair@286: // ohair@286: // n.b. for non-EOF end-of-buffer cases, reader should return ohair@286: // at least a handful of bytes so various lookaheads behave. ohair@286: // ohair@286: // two character pushback exists except at first; characters ohair@286: // represented by surrogate pairs can't be pushed back (they'd ohair@286: // only be in character data anyway). ohair@286: // ohair@286: // DTD exception thrown on char conversion problems; line number ohair@286: // will be low, as a rule. ohair@286: // ohair@286: private void fillbuf() throws IOException, SAXException { ohair@286: ohair@286: // don't touched fixed buffers, that'll usually ohair@286: // change entity values (and isn't needed anyway) ohair@286: // likewise, ignore closed streams ohair@286: if (reader == null || isClosed) ohair@286: return; ohair@286: ohair@286: // if remembering DTD text, copy! ohair@286: if (startRemember != 0) { ohair@286: if (rememberedText == null) ohair@286: rememberedText = new StringBuffer(buf.length); ohair@286: rememberedText.append(buf, startRemember, ohair@286: start - startRemember); ohair@286: } ohair@286: ohair@286: boolean extra = (finish > 0) && (start > 0); ohair@286: int len; ohair@286: ohair@286: if (extra) // extra pushback ohair@286: start--; ohair@286: len = finish - start; ohair@286: ohair@286: System.arraycopy(buf, start, buf, 0, len); ohair@286: start = 0; ohair@286: finish = len; ohair@286: ohair@286: try { ohair@286: len = buf.length - len; ohair@286: len = reader.read(buf, finish, len); ohair@286: } catch (UnsupportedEncodingException e) { ohair@286: fatal("P-075", new Object[]{e.getMessage()}); ohair@286: } catch (CharConversionException e) { ohair@286: fatal("P-076", new Object[]{e.getMessage()}); ohair@286: } ohair@286: if (len >= 0) ohair@286: finish += len; ohair@286: else ohair@286: close(); ohair@286: if (extra) // extra pushback ohair@286: start++; ohair@286: ohair@286: if (startRemember != 0) ohair@286: // assert extra == true ohair@286: startRemember = 1; ohair@286: } ohair@286: ohair@286: public void close() { ohair@286: ohair@286: try { ohair@286: if (reader != null && !isClosed) ohair@286: reader.close(); ohair@286: isClosed = true; ohair@286: } catch (IOException e) { ohair@286: /* NOTHING */ ohair@286: } ohair@286: } ohair@286: ohair@286: ohair@286: private void fatal(String messageId, Object params []) ohair@286: throws SAXException { ohair@286: ohair@286: SAXParseException x = new SAXParseException(DTDParser.messages.getMessage(locale, messageId, params), null); ohair@286: ohair@286: // not continuable ... e.g. WF errors ohair@286: close(); ohair@286: errHandler.fatalError(x); ohair@286: throw x; ohair@286: } ohair@286: }