src/share/jaxws_classes/com/sun/xml/internal/dtdparser/InputEntity.java

Thu, 31 Aug 2017 15:18:52 +0800

author
aoqi
date
Thu, 31 Aug 2017 15:18:52 +0800
changeset 637
9c07ef4934dd
parent 397
b99d7e355d4b
parent 0
373ffda63c9a
permissions
-rw-r--r--

merge

     1 /*
     2  * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.xml.internal.dtdparser;
    28 import org.xml.sax.InputSource;
    29 import org.xml.sax.SAXException;
    30 import org.xml.sax.SAXParseException;
    32 import java.io.CharConversionException;
    33 import java.io.IOException;
    34 import java.io.InputStream;
    35 import java.io.InputStreamReader;
    36 import java.io.Reader;
    37 import java.io.UnsupportedEncodingException;
    38 import java.net.URL;
    39 import java.util.Locale;
    41 /**
    42  * This is how the parser talks to its input entities, of all kinds.
    43  * The entities are in a stack.
    44  * <p/>
    45  * <P> For internal entities, the character arrays are referenced here,
    46  * and read from as needed (they're read-only).  External entities have
    47  * mutable buffers, that are read into as needed.
    48  * <p/>
    49  * <P> <em>Note:</em> This maps CRLF (and CR) to LF without regard for
    50  * whether it's in an external (parsed) entity or not.  The XML 1.0 spec
    51  * is inconsistent in explaining EOL handling; this is the sensible way.
    52  *
    53  * @author David Brownell
    54  * @author Janet Koenig
    55  * @version 1.4 00/08/05
    56  */
    57 public class InputEntity {
    58     private int start, finish;
    59     private char buf [];
    60     private int lineNumber = 1;
    61     private boolean returnedFirstHalf = false;
    62     private boolean maybeInCRLF = false;
    64     // name of entity (never main document or unnamed DTD PE)
    65     private String name;
    67     private InputEntity next;
    69     // for system and public IDs in diagnostics
    70     private InputSource input;
    72     // this is a buffer; some buffers can be replenished.
    73     private Reader reader;
    74     private boolean isClosed;
    76     private DTDEventListener errHandler;
    77     private Locale locale;
    79     private StringBuffer rememberedText;
    80     private int startRemember;
    82     // record if this is a PE, so endParsedEntity won't be called
    83     private boolean isPE;
    85     // InputStreamReader throws an internal per-read exception, so
    86     // we minimize reads.  We also add a byte to compensate for the
    87     // "ungetc" byte we keep, so that our downstream reads are as
    88     // nicely sized as we can make them.
    89     final private static int BUFSIZ = 8 * 1024 + 1;
    91     final private static char newline [] = {'\n'};
    93     public static InputEntity getInputEntity(DTDEventListener h, Locale l) {
    94         InputEntity retval = new InputEntity();
    95         retval.errHandler = h;
    96         retval.locale = l;
    97         return retval;
    98     }
   100     private InputEntity() {
   101     }
   103     //
   104     // predicate:  return true iff this is an internal entity reader,
   105     // and so may safely be "popped" as needed.  external entities have
   106     // syntax to uphold; internal parameter entities have at most validity
   107     // constraints to monitor.  also, only external entities get decent
   108     // location diagnostics.
   109     //
   110     public boolean isInternal() {
   111         return reader == null;
   112     }
   114     //
   115     // predicate:  return true iff this is the toplevel document
   116     //
   117     public boolean isDocument() {
   118         return next == null;
   119     }
   121     //
   122     // predicate:  return true iff this is a PE expansion (so that
   123     // LexicalEventListner.endParsedEntity won't be called)
   124     //
   125     public boolean isParameterEntity() {
   126         return isPE;
   127     }
   129     //
   130     // return name of current entity
   131     //
   132     public String getName() {
   133         return name;
   134     }
   136     //
   137     // use this for an external parsed entity
   138     //
   139     public void init(InputSource in, String name, InputEntity stack,
   140                      boolean isPE)
   141             throws IOException, SAXException {
   143         input = in;
   144         this.isPE = isPE;
   145         reader = in.getCharacterStream();
   147         if (reader == null) {
   148             InputStream bytes = in.getByteStream();
   150             if (bytes == null)
   151                 reader = XmlReader.createReader(new URL(in.getSystemId())
   152                         .openStream());
   153             else if (in.getEncoding() != null)
   154                 reader = XmlReader.createReader(in.getByteStream(),
   155                         in.getEncoding());
   156             else
   157                 reader = XmlReader.createReader(in.getByteStream());
   158         }
   159         next = stack;
   160         buf = new char[BUFSIZ];
   161         this.name = name;
   162         checkRecursion(stack);
   163     }
   165     //
   166     // use this for an internal parsed entity; buffer is readonly
   167     //
   168     public void init(char b [], String name, InputEntity stack, boolean isPE)
   169             throws SAXException {
   171         next = stack;
   172         buf = b;
   173         finish = b.length;
   174         this.name = name;
   175         this.isPE = isPE;
   176         checkRecursion(stack);
   177     }
   179     private void checkRecursion(InputEntity stack)
   180             throws SAXException {
   182         if (stack == null)
   183             return;
   184         for (stack = stack.next; stack != null; stack = stack.next) {
   185             if (stack.name != null && stack.name.equals(name))
   186                 fatal("P-069", new Object[]{name});
   187         }
   188     }
   190     public InputEntity pop() throws IOException {
   192         // caller has ensured there's nothing left to read
   193         close();
   194         return next;
   195     }
   197     /**
   198      * returns true iff there's no more data to consume ...
   199      */
   200     public boolean isEOF() throws IOException, SAXException {
   202         // called to ensure WF-ness of included entities and to pop
   203         // input entities appropriately ... EOF is not always legal.
   204         if (start >= finish) {
   205             fillbuf();
   206             return start >= finish;
   207         } else
   208             return false;
   209     }
   211     /**
   212      * Returns the name of the encoding in use, else null; the name
   213      * returned is in as standard a form as we can get.
   214      */
   215     public String getEncoding() {
   217         if (reader == null)
   218             return null;
   219         if (reader instanceof XmlReader)
   220             return ((XmlReader) reader).getEncoding();
   222         // XXX prefer a java2std() call to normalize names...
   224         if (reader instanceof InputStreamReader)
   225             return ((InputStreamReader) reader).getEncoding();
   226         return null;
   227     }
   230     /**
   231      * returns the next name char, or NUL ... faster than getc(),
   232      * and the common "name or nmtoken must be next" case won't
   233      * need ungetc().
   234      */
   235     public char getNameChar() throws IOException, SAXException {
   237         if (finish <= start)
   238             fillbuf();
   239         if (finish > start) {
   240             char c = buf[start++];
   241             if (XmlChars.isNameChar(c))
   242                 return c;
   243             start--;
   244         }
   245         return 0;
   246     }
   248     /**
   249      * gets the next Java character -- might be part of an XML
   250      * text character represented by a surrogate pair, or be
   251      * the end of the entity.
   252      */
   253     public char getc() throws IOException, SAXException {
   255         if (finish <= start)
   256             fillbuf();
   257         if (finish > start) {
   258             char c = buf[start++];
   260             // [2] Char ::= #x0009 | #x000A | #x000D
   261             //            | [#x0020-#xD7FF]
   262             //            | [#xE000-#xFFFD]
   263             // plus surrogate _pairs_ representing [#x10000-#x10ffff]
   264             if (returnedFirstHalf) {
   265                 if (c >= 0xdc00 && c <= 0xdfff) {
   266                     returnedFirstHalf = false;
   267                     return c;
   268                 } else
   269                     fatal("P-070", new Object[]{Integer.toHexString(c)});
   270             }
   271             if ((c >= 0x0020 && c <= 0xD7FF)
   272                     || c == 0x0009
   273                     // no surrogates!
   274                     || (c >= 0xE000 && c <= 0xFFFD))
   275                 return c;
   277             //
   278             // CRLF and CR are both line ends; map both to LF, and
   279             // keep line count correct.
   280             //
   281             else if (c == '\r' && !isInternal()) {
   282                 maybeInCRLF = true;
   283                 c = getc();
   284                 if (c != '\n')
   285                     ungetc();
   286                 maybeInCRLF = false;
   288                 lineNumber++;
   289                 return '\n';
   291             } else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF
   292                 if (!isInternal() && !maybeInCRLF)
   293                     lineNumber++;
   294                 return c;
   295             }
   297             // surrogates...
   298             if (c >= 0xd800 && c < 0xdc00) {
   299                 returnedFirstHalf = true;
   300                 return c;
   301             }
   303             fatal("P-071", new Object[]{Integer.toHexString(c)});
   304         }
   305         throw new EndOfInputException();
   306     }
   309     /**
   310      * lookahead one character
   311      */
   312     public boolean peekc(char c) throws IOException, SAXException {
   314         if (finish <= start)
   315             fillbuf();
   316         if (finish > start) {
   317             if (buf[start] == c) {
   318                 start++;
   319                 return true;
   320             } else
   321                 return false;
   322         }
   323         return false;
   324     }
   327     /**
   328      * two character pushback is guaranteed
   329      */
   330     public void ungetc() {
   332         if (start == 0)
   333             throw new InternalError("ungetc");
   334         start--;
   336         if (buf[start] == '\n' || buf[start] == '\r') {
   337             if (!isInternal())
   338                 lineNumber--;
   339         } else if (returnedFirstHalf)
   340             returnedFirstHalf = false;
   341     }
   344     /**
   345      * optional grammatical whitespace (discarded)
   346      */
   347     public boolean maybeWhitespace()
   348             throws IOException, SAXException {
   350         char c;
   351         boolean isSpace = false;
   352         boolean sawCR = false;
   354         // [3] S ::= #20 | #09 | #0D | #0A
   355         for (; ;) {
   356             if (finish <= start)
   357                 fillbuf();
   358             if (finish <= start)
   359                 return isSpace;
   361             c = buf[start++];
   362             if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') {
   363                 isSpace = true;
   365                 //
   366                 // CR, LF are line endings ... CLRF is one, not two!
   367                 //
   368                 if ((c == '\n' || c == '\r') && !isInternal()) {
   369                     if (!(c == '\n' && sawCR)) {
   370                         lineNumber++;
   371                         sawCR = false;
   372                     }
   373                     if (c == '\r')
   374                         sawCR = true;
   375                 }
   376             } else {
   377                 start--;
   378                 return isSpace;
   379             }
   380         }
   381     }
   384     /**
   385      * normal content; whitespace in markup may be handled
   386      * specially if the parser uses the content model.
   387      * <p/>
   388      * <P> content terminates with markup delimiter characters,
   389      * namely ampersand (&amp;amp;) and left angle bracket (&amp;lt;).
   390      * <p/>
   391      * <P> the document handler's characters() method is called
   392      * on all the content found
   393      */
   394     public boolean parsedContent(DTDEventListener docHandler
   395                                  /*ElementValidator validator*/)
   396             throws IOException, SAXException {
   398         // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
   400         int first;        // first char to return
   401         int last;        // last char to return
   402         boolean sawContent;    // sent any chars?
   403         char c;
   405         // deliver right out of the buffer, until delimiter, EOF,
   406         // or error, refilling as we go
   407         for (first = last = start, sawContent = false; ; last++) {
   409             // buffer empty?
   410             if (last >= finish) {
   411                 if (last > first) {
   412 //            validator.text ();
   413                     docHandler.characters(buf, first, last - first);
   414                     sawContent = true;
   415                     start = last;
   416                 }
   417                 if (isEOF())    // calls fillbuf
   418                     return sawContent;
   419                 first = start;
   420                 last = first - 1;    // incremented in loop
   421                 continue;
   422             }
   424             c = buf[last];
   426             //
   427             // pass most chars through ASAP; this inlines the code of
   428             // [2] !XmlChars.isChar(c) leaving only characters needing
   429             // special treatment ... line ends, surrogates, and:
   430             //    0x0026 == '&'
   431             //    0x003C == '<'
   432             //    0x005D == ']'
   433             // Comparisons ordered for speed on 'typical' text
   434             //
   435             if ((c > 0x005D && c <= 0xD7FF)    // a-z and more
   436                     || (c < 0x0026 && c >= 0x0020)    // space & punct
   437                     || (c > 0x003C && c < 0x005D)    // A-Z & punct
   438                     || (c > 0x0026 && c < 0x003C)    // 0-9 & punct
   439                     || c == 0x0009
   440                     || (c >= 0xE000 && c <= 0xFFFD)
   441             )
   442                 continue;
   444             // terminate on markup delimiters
   445             if (c == '<' || c == '&')
   446                 break;
   448             // count lines
   449             if (c == '\n') {
   450                 if (!isInternal())
   451                     lineNumber++;
   452                 continue;
   453             }
   455             // External entities get CR, CRLF --> LF mapping
   456             // Internal ones got it already, and we can't repeat
   457             // else we break char ref handling!!
   458             if (c == '\r') {
   459                 if (isInternal())
   460                     continue;
   462                 docHandler.characters(buf, first, last - first);
   463                 docHandler.characters(newline, 0, 1);
   464                 sawContent = true;
   465                 lineNumber++;
   466                 if (finish > (last + 1)) {
   467                     if (buf[last + 1] == '\n')
   468                         last++;
   469                 } else {    // CR at end of buffer
   470 // XXX case not yet handled:  CRLF here will look like two lines
   471                 }
   472                 first = start = last + 1;
   473                 continue;
   474             }
   476             // ']]>' is a WF error -- must fail if we see it
   477             if (c == ']') {
   478                 switch (finish - last) {
   479                 // for suspicious end-of-buffer cases, get more data
   480                 // into the buffer to rule out this sequence.
   481                 case 2:
   482                     if (buf[last + 1] != ']')
   483                         continue;
   484                     // FALLTHROUGH
   486                 case 1:
   487                     if (reader == null || isClosed)
   488                         continue;
   489                     if (last == first)
   490                         throw new InternalError("fillbuf");
   491                     last--;
   492                     if (last > first) {
   493 //            validator.text ();
   494                         docHandler.characters(buf, first, last - first);
   495                         sawContent = true;
   496                         start = last;
   497                     }
   498                     fillbuf();
   499                     first = last = start;
   500                     continue;
   502                     // otherwise any "]]>" would be buffered, and we can
   503                     // see right away if that's what we have
   504                 default:
   505                     if (buf[last + 1] == ']' && buf[last + 2] == '>')
   506                         fatal("P-072", null);
   507                     continue;
   508                 }
   509             }
   511             // correctly paired surrogates are OK
   512             if (c >= 0xd800 && c <= 0xdfff) {
   513                 if ((last + 1) >= finish) {
   514                     if (last > first) {
   515 //            validator.text ();
   516                         docHandler.characters(buf, first, last - first);
   517                         sawContent = true;
   518                         start = last + 1;
   519                     }
   520                     if (isEOF()) {    // calls fillbuf
   521                         fatal("P-081",
   522                                 new Object[]{Integer.toHexString(c)});
   523                     }
   524                     first = start;
   525                     last = first;
   526                     continue;
   527                 }
   528                 if (checkSurrogatePair(last))
   529                     last++;
   530                 else {
   531                     last--;
   532                     // also terminate on surrogate pair oddities
   533                     break;
   534                 }
   535                 continue;
   536             }
   538             fatal("P-071", new Object[]{Integer.toHexString(c)});
   539         }
   540         if (last == first)
   541             return sawContent;
   542 //    validator.text ();
   543         docHandler.characters(buf, first, last - first);
   544         start = last;
   545         return true;
   546     }
   549     /**
   550      * CDATA -- character data, terminated by "]]>" and optionally
   551      * including unescaped markup delimiters (ampersand and left angle
   552      * bracket).  This should otherwise be exactly like character data,
   553      * modulo differences in error report details.
   554      * <p/>
   555      * <P> The document handler's characters() or ignorableWhitespace()
   556      * methods are invoked on all the character data found
   557      *
   558      * @param docHandler               gets callbacks for character data
   559      * @param ignorableWhitespace      if true, whitespace characters will
   560      *                                 be reported using docHandler.ignorableWhitespace(); implicitly,
   561      *                                 non-whitespace characters will cause validation errors
   562      * @param whitespaceInvalidMessage if true, ignorable whitespace
   563      *                                 causes a validity error report as well as a callback
   564      */
   565     public boolean unparsedContent(DTDEventListener docHandler,
   566                                    /*ElementValidator validator,*/
   567                                    boolean ignorableWhitespace,
   568                                    String whitespaceInvalidMessage)
   569             throws IOException, SAXException {
   571         // [18] CDSect ::= CDStart CData CDEnd
   572         // [19] CDStart ::= '<![CDATA['
   573         // [20] CData ::= (Char* - (Char* ']]>' Char*))
   574         // [21] CDEnd ::= ']]>'
   576         // caller peeked the leading '<' ...
   577         if (!peek("![CDATA[", null))
   578             return false;
   579         docHandler.startCDATA();
   581         // only a literal ']]>' stops this ...
   582         int last;
   584         for (; ;) {        // until ']]>' seen
   585             boolean done = false;
   586             char c;
   588             // don't report ignorable whitespace as "text" for
   589             // validation purposes.
   590             boolean white = ignorableWhitespace;
   592             for (last = start; last < finish; last++) {
   593                 c = buf[last];
   595                 //
   596                 // Reject illegal characters.
   597                 //
   598                 if (!XmlChars.isChar(c)) {
   599                     white = false;
   600                     if (c >= 0xd800 && c <= 0xdfff) {
   601                         if (checkSurrogatePair(last)) {
   602                             last++;
   603                             continue;
   604                         } else {
   605                             last--;
   606                             break;
   607                         }
   608                     }
   609                     fatal("P-071", new Object[]
   610                     {Integer.toHexString(buf[last])});
   611                 }
   612                 if (c == '\n') {
   613                     if (!isInternal())
   614                         lineNumber++;
   615                     continue;
   616                 }
   617                 if (c == '\r') {
   618                     // As above, we can't repeat CR/CRLF --> LF mapping
   619                     if (isInternal())
   620                         continue;
   622                     if (white) {
   623                         if (whitespaceInvalidMessage != null)
   624                             errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
   625                                     whitespaceInvalidMessage), null));
   626                         docHandler.ignorableWhitespace(buf, start,
   627                                 last - start);
   628                         docHandler.ignorableWhitespace(newline, 0, 1);
   629                     } else {
   630 //            validator.text ();
   631                         docHandler.characters(buf, start, last - start);
   632                         docHandler.characters(newline, 0, 1);
   633                     }
   634                     lineNumber++;
   635                     if (finish > (last + 1)) {
   636                         if (buf[last + 1] == '\n')
   637                             last++;
   638                     } else {    // CR at end of buffer
   639 // XXX case not yet handled ... as above
   640                     }
   641                     start = last + 1;
   642                     continue;
   643                 }
   644                 if (c != ']') {
   645                     if (c != ' ' && c != '\t')
   646                         white = false;
   647                     continue;
   648                 }
   649                 if ((last + 2) < finish) {
   650                     if (buf[last + 1] == ']' && buf[last + 2] == '>') {
   651                         done = true;
   652                         break;
   653                     }
   654                     white = false;
   655                     continue;
   656                 } else {
   657                     //last--;
   658                     break;
   659                 }
   660             }
   661             if (white) {
   662                 if (whitespaceInvalidMessage != null)
   663                     errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
   664                             whitespaceInvalidMessage), null));
   665                 docHandler.ignorableWhitespace(buf, start, last - start);
   666             } else {
   667 //        validator.text ();
   668                 docHandler.characters(buf, start, last - start);
   669             }
   670             if (done) {
   671                 start = last + 3;
   672                 break;
   673             }
   674             start = last;
   675             if (isEOF())
   676                 fatal("P-073", null);
   677         }
   678         docHandler.endCDATA();
   679         return true;
   680     }
   682     // return false to backstep at end of buffer)
   683     private boolean checkSurrogatePair(int offset)
   684             throws SAXException {
   686         if ((offset + 1) >= finish)
   687             return false;
   689         char c1 = buf[offset++];
   690         char c2 = buf[offset];
   692         if ((c1 >= 0xd800 && c1 < 0xdc00) && (c2 >= 0xdc00 && c2 <= 0xdfff))
   693             return true;
   694         fatal("P-074", new Object[]{
   695             Integer.toHexString(c1 & 0x0ffff),
   696             Integer.toHexString(c2 & 0x0ffff)
   697         });
   698         return false;
   699     }
   702     /**
   703      * whitespace in markup (flagged to app, discardable)
   704      * <p/>
   705      * <P> the document handler's ignorableWhitespace() method
   706      * is called on all the whitespace found
   707      */
   708     public boolean ignorableWhitespace(DTDEventListener handler)
   709             throws IOException, SAXException {
   711         char c;
   712         boolean isSpace = false;
   713         int first;
   715         // [3] S ::= #20 | #09 | #0D | #0A
   716         for (first = start; ;) {
   717             if (finish <= start) {
   718                 if (isSpace)
   719                     handler.ignorableWhitespace(buf, first, start - first);
   720                 fillbuf();
   721                 first = start;
   722             }
   723             if (finish <= start)
   724                 return isSpace;
   726             c = buf[start++];
   727             switch (c) {
   728             case '\n':
   729                 if (!isInternal())
   730                     lineNumber++;
   731 // XXX handles Macintosh line endings wrong
   732                 // fallthrough
   733             case 0x09:
   734             case 0x20:
   735                 isSpace = true;
   736                 continue;
   738             case '\r':
   739                 isSpace = true;
   740                 if (!isInternal())
   741                     lineNumber++;
   742                 handler.ignorableWhitespace(buf, first,
   743                         (start - 1) - first);
   744                 handler.ignorableWhitespace(newline, 0, 1);
   745                 if (start < finish && buf[start] == '\n')
   746                     ++start;
   747                 first = start;
   748                 continue;
   750             default:
   751                 ungetc();
   752                 if (isSpace)
   753                     handler.ignorableWhitespace(buf, first, start - first);
   754                 return isSpace;
   755             }
   756         }
   757     }
   759     /**
   760      * returns false iff 'next' string isn't as provided,
   761      * else skips that text and returns true.
   762      * <p/>
   763      * <P> NOTE:  two alternative string representations are
   764      * both passed in, since one is faster.
   765      */
   766     public boolean peek(String next, char chars [])
   767             throws IOException, SAXException {
   769         int len;
   770         int i;
   772         if (chars != null)
   773             len = chars.length;
   774         else
   775             len = next.length();
   777         // buffer should hold the whole thing ... give it a
   778         // chance for the end-of-buffer case and cope with EOF
   779         // by letting fillbuf compact and fill
   780         if (finish <= start || (finish - start) < len)
   781             fillbuf();
   783         // can't peek past EOF
   784         if (finish <= start)
   785             return false;
   787         // compare the string; consume iff it matches
   788         if (chars != null) {
   789             for (i = 0; i < len && (start + i) < finish; i++) {
   790                 if (buf[start + i] != chars[i])
   791                     return false;
   792             }
   793         } else {
   794             for (i = 0; i < len && (start + i) < finish; i++) {
   795                 if (buf[start + i] != next.charAt(i))
   796                     return false;
   797             }
   798         }
   800         // if the first fillbuf didn't get enough data, give
   801         // fillbuf another chance to read
   802         if (i < len) {
   803             if (reader == null || isClosed)
   804                 return false;
   806             //
   807             // This diagnostic "knows" that the only way big strings would
   808             // fail to be peeked is where it's a symbol ... e.g. for an
   809             // </EndTag> construct.  That knowledge could also be applied
   810             // to get rid of the symbol length constraint, since having
   811             // the wrong symbol is a fatal error anyway ...
   812             //
   813             if (len > buf.length)
   814                 fatal("P-077", new Object[]{new Integer(buf.length)});
   816             fillbuf();
   817             return peek(next, chars);
   818         }
   820         start += len;
   821         return true;
   822     }
   825     //
   826     // Support for reporting the internal DTD subset, so <!DOCTYPE...>
   827     // declarations can be recreated.  This is collected as a single
   828     // string; such subsets are normally small, and many applications
   829     // don't even care about this.
   830     //
   831     public void startRemembering() {
   833         if (startRemember != 0)
   834             throw new InternalError();
   835         startRemember = start;
   836     }
   838     public String rememberText() {
   840         String retval;
   842         // If the internal subset crossed a buffer boundary, we
   843         // created a temporary buffer.
   844         if (rememberedText != null) {
   845             rememberedText.append(buf, startRemember,
   846                     start - startRemember);
   847             retval = rememberedText.toString();
   848         } else
   849             retval = new String(buf, startRemember,
   850                     start - startRemember);
   852         startRemember = 0;
   853         rememberedText = null;
   854         return retval;
   855     }
   857     private InputEntity getTopEntity() {
   859         InputEntity current = this;
   861         // don't report locations within internal entities!
   863         while (current != null && current.input == null)
   864             current = current.next;
   865         return current == null ? this : current;
   866     }
   868     /**
   869      * Returns the public ID of this input source, if known
   870      */
   871     public String getPublicId() {
   873         InputEntity where = getTopEntity();
   874         if (where == this)
   875             return input.getPublicId();
   876         return where.getPublicId();
   877     }
   879     /**
   880      * Returns the system ID of this input source, if known
   881      */
   882     public String getSystemId() {
   884         InputEntity where = getTopEntity();
   885         if (where == this)
   886             return input.getSystemId();
   887         return where.getSystemId();
   888     }
   890     /**
   891      * Returns the current line number in this input source
   892      */
   893     public int getLineNumber() {
   895         InputEntity where = getTopEntity();
   896         if (where == this)
   897             return lineNumber;
   898         return where.getLineNumber();
   899     }
   901     /**
   902      * returns -1; maintaining column numbers hurts performance
   903      */
   904     public int getColumnNumber() {
   906         return -1;        // not maintained (speed)
   907     }
   910     //
   911     // n.b. for non-EOF end-of-buffer cases, reader should return
   912     // at least a handful of bytes so various lookaheads behave.
   913     //
   914     // two character pushback exists except at first; characters
   915     // represented by surrogate pairs can't be pushed back (they'd
   916     // only be in character data anyway).
   917     //
   918     // DTD exception thrown on char conversion problems; line number
   919     // will be low, as a rule.
   920     //
   921     private void fillbuf() throws IOException, SAXException {
   923         // don't touched fixed buffers, that'll usually
   924         // change entity values (and isn't needed anyway)
   925         // likewise, ignore closed streams
   926         if (reader == null || isClosed)
   927             return;
   929         // if remembering DTD text, copy!
   930         if (startRemember != 0) {
   931             if (rememberedText == null)
   932                 rememberedText = new StringBuffer(buf.length);
   933             rememberedText.append(buf, startRemember,
   934                     start - startRemember);
   935         }
   937         boolean extra = (finish > 0) && (start > 0);
   938         int len;
   940         if (extra)        // extra pushback
   941             start--;
   942         len = finish - start;
   944         System.arraycopy(buf, start, buf, 0, len);
   945         start = 0;
   946         finish = len;
   948         try {
   949             len = buf.length - len;
   950             len = reader.read(buf, finish, len);
   951         } catch (UnsupportedEncodingException e) {
   952             fatal("P-075", new Object[]{e.getMessage()});
   953         } catch (CharConversionException e) {
   954             fatal("P-076", new Object[]{e.getMessage()});
   955         }
   956         if (len >= 0)
   957             finish += len;
   958         else
   959             close();
   960         if (extra)        // extra pushback
   961             start++;
   963         if (startRemember != 0)
   964         // assert extra == true
   965             startRemember = 1;
   966     }
   968     public void close() {
   970         try {
   971             if (reader != null && !isClosed)
   972                 reader.close();
   973             isClosed = true;
   974         } catch (IOException e) {
   975             /* NOTHING */
   976         }
   977     }
   980     private void fatal(String messageId, Object params [])
   981             throws SAXException {
   983         SAXParseException x = new SAXParseException(DTDParser.messages.getMessage(locale, messageId, params), null);
   985         // not continuable ... e.g. WF errors
   986         close();
   987         errHandler.fatalError(x);
   988         throw x;
   989     }
   990 }

mercurial