jdk8-mips64-public/jaxws: src/share/jaxws_classes/com/sun/xml/internal/dtdparser/XmlReader.java@373ffda63c9a

Initial load
http://hg.openjdk.java.net/jdk8u/jdk8u/jaxws/
changeset: 657:d47a47f961ee
tag: jdk8u25-b17

     1 /*

     2  * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Oracle designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Oracle in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    22  * or visit www.oracle.com if you need additional information or have any

    23  * questions.

    24  */

    26 package com.sun.xml.internal.dtdparser;

    28 import java.io.ByteArrayInputStream;

    29 import java.io.CharConversionException;

    30 import java.io.IOException;

    31 import java.io.InputStream;

    32 import java.io.InputStreamReader;

    33 import java.io.PushbackInputStream;

    34 import java.io.Reader;

    35 import java.util.Hashtable;

    38 // NOTE:  Add I18N support to this class when JDK gets the ability to

    39 // defer selection of locale for exception messages ... use the same

    40 // technique for both.

    43 /**

    44  * This handles several XML-related tasks that normal java.io Readers

    45  * don't support, inluding use of IETF standard encoding names and

    46  * automatic detection of most XML encodings.  The former is needed

    47  * for interoperability; the latter is needed to conform with the XML

    48  * spec.  This class also optimizes reading some common encodings by

    49  * providing low-overhead unsynchronized Reader support.

    50  * <p/>

    51  * <P> Note that the autodetection facility should be used only on

    52  * data streams which have an unknown character encoding.  For example,

    53  * it should never be used on MIME text/xml entities.

    54  * <p/>

    55  * <P> Note that XML processors are only required to support UTF-8 and

    56  * UTF-16 character encodings.  Autodetection permits the underlying Java

    57  * implementation to provide support for many other encodings, such as

    58  * US-ASCII, ISO-8859-5, Shift_JIS, EUC-JP, and ISO-2022-JP.

    59  *

    60  * @author David Brownell

    61  * @author Janet Koenig

    62  * @version 1.3 00/02/24

    63  */

    64 // package private

    65 final class XmlReader extends Reader {

    66     private static final int MAXPUSHBACK = 512;

    68     private Reader in;

    69     private String assignedEncoding;

    70     private boolean closed;

    72     //

    73     // This class always delegates I/O to a reader, which gets

    74     // its data from the very beginning of the XML text.  It needs

    75     // to use a pushback stream since (a) autodetection can read

    76     // partial UTF-8 characters which need to be fully processed,

    77     // (b) the "Unicode" readers swallow characters that they think

    78     // are byte order marks, so tests fail if they don't see the

    79     // real byte order mark.

    80     //

    81     // It's got do this efficiently:  character I/O is solidly on the

    82     // critical path.  (So keep buffer length over 2 Kbytes to avoid

    83     // excess buffering. Many URL handlers stuff a BufferedInputStream

    84     // between here and the real data source, and larger buffers keep

    85     // that from slowing you down.)

    86     //

    88     /**

    89      * Constructs the reader from an input stream, autodetecting

    90      * the encoding to use according to the heuristic specified

    91      * in the XML 1.0 recommendation.

    92      *

    93      * @param in the input stream from which the reader is constructed

    94      * @throws IOException on error, such as unrecognized encoding

    95      */

    96     public static Reader createReader(InputStream in) throws IOException {

    97         return new XmlReader(in);

    98     }

   100     /**

   101      * Creates a reader supporting the given encoding, mapping

   102      * from standard encoding names to ones that understood by

   103      * Java where necessary.

   104      *

   105      * @param in       the input stream from which the reader is constructed

   106      * @param encoding the IETF standard name of the encoding to use;

   107      *                 if null, autodetection is used.

   108      * @throws IOException on error, including unrecognized encoding

   109      */

   110     public static Reader createReader(InputStream in, String encoding)

   111             throws IOException {

   112         if (encoding == null)

   113             return new XmlReader(in);

   114         if ("UTF-8".equalsIgnoreCase(encoding)

   115                 || "UTF8".equalsIgnoreCase(encoding))

   116             return new Utf8Reader(in);

   117         if ("US-ASCII".equalsIgnoreCase(encoding)

   118                 || "ASCII".equalsIgnoreCase(encoding))

   119             return new AsciiReader(in);

   120         if ("ISO-8859-1".equalsIgnoreCase(encoding)

   121         // plus numerous aliases ...

   122         )

   123             return new Iso8859_1Reader(in);

   125         //

   126         // What we really want is an administerable resource mapping

   127         // encoding names/aliases to classnames.  For example a property

   128         // file resource, "readers/mapping.props", holding and a set

   129         // of readers in that (sub)package... defaulting to this call

   130         // only if no better choice is available.

   131         //

   132         return new InputStreamReader(in, std2java(encoding));

   133     }

   135     //

   136     // JDK doesn't know all of the standard encoding names, and

   137     // in particular none of the EBCDIC ones IANA defines (and

   138     // which IBM encourages).

   139     //

   140     static private final Hashtable charsets = new Hashtable(31);

   142     static {

   143         charsets.put("UTF-16", "Unicode");

   144         charsets.put("ISO-10646-UCS-2", "Unicode");

   146         // NOTE: no support for ISO-10646-UCS-4 yet.

   148         charsets.put("EBCDIC-CP-US", "cp037");

   149         charsets.put("EBCDIC-CP-CA", "cp037");

   150         charsets.put("EBCDIC-CP-NL", "cp037");

   151         charsets.put("EBCDIC-CP-WT", "cp037");

   153         charsets.put("EBCDIC-CP-DK", "cp277");

   154         charsets.put("EBCDIC-CP-NO", "cp277");

   155         charsets.put("EBCDIC-CP-FI", "cp278");

   156         charsets.put("EBCDIC-CP-SE", "cp278");

   158         charsets.put("EBCDIC-CP-IT", "cp280");

   159         charsets.put("EBCDIC-CP-ES", "cp284");

   160         charsets.put("EBCDIC-CP-GB", "cp285");

   161         charsets.put("EBCDIC-CP-FR", "cp297");

   163         charsets.put("EBCDIC-CP-AR1", "cp420");

   164         charsets.put("EBCDIC-CP-HE", "cp424");

   165         charsets.put("EBCDIC-CP-BE", "cp500");

   166         charsets.put("EBCDIC-CP-CH", "cp500");

   168         charsets.put("EBCDIC-CP-ROECE", "cp870");

   169         charsets.put("EBCDIC-CP-YU", "cp870");

   170         charsets.put("EBCDIC-CP-IS", "cp871");

   171         charsets.put("EBCDIC-CP-AR2", "cp918");

   173         // IANA also defines two that JDK 1.2 doesn't handle:

   174         //    EBCDIC-CP-GR        --> CP423

   175         //    EBCDIC-CP-TR        --> CP905

   176     }

   178     // returns an encoding name supported by JDK >= 1.1.6

   179     // for some cases required by the XML spec

   180     private static String std2java(String encoding) {

   181         String temp = encoding.toUpperCase();

   182         temp = (String) charsets.get(temp);

   183         return temp != null ? temp : encoding;

   184     }

   186     /**

   187      * Returns the standard name of the encoding in use

   188      */

   189     public String getEncoding() {

   190         return assignedEncoding;

   191     }

   193     private XmlReader(InputStream stream) throws IOException {

   194         super(stream);

   196         PushbackInputStream pb;

   197         byte buf [];

   198         int len;

   200         if (stream instanceof PushbackInputStream)

   201             pb = (PushbackInputStream) stream;

   202         else

   203             pb = new PushbackInputStream(stream, MAXPUSHBACK);

   205         //

   206         // See if we can figure out the character encoding used

   207         // in this file by peeking at the first few bytes.

   208         //

   209         buf = new byte[4];

   210         len = pb.read(buf);

   211         if (len > 0)

   212             pb.unread(buf, 0, len);

   214         if (len == 4)

   215             switch (buf[0] & 0x0ff) {

   216             case 0:

   217                 // 00 3c 00 3f == illegal UTF-16 big-endian

   218                 if (buf[1] == 0x3c && buf[2] == 0x00 && buf[3] == 0x3f) {

   219                     setEncoding(pb, "UnicodeBig");

   220                     return;

   221                 }

   222                 // else it's probably UCS-4

   223                 break;

   225             case '<':      // 0x3c: the most common cases!

   226                 switch (buf[1] & 0x0ff) {

   227                 // First character is '<'; could be XML without

   228                 // an XML directive such as "<hello>", "<!-- ...",

   229                 // and so on.

   230                 default:

   231                     break;

   233                     // 3c 00 3f 00 == illegal UTF-16 little endian

   234                 case 0x00:

   235                     if (buf[2] == 0x3f && buf[3] == 0x00) {

   236                         setEncoding(pb, "UnicodeLittle");

   237                         return;

   238                     }

   239                     // else probably UCS-4

   240                     break;

   242                     // 3c 3f 78 6d == ASCII and supersets '<?xm'

   243                 case '?':

   244                     if (buf[2] != 'x' || buf[3] != 'm')

   245                         break;

   246                     //

   247                     // One of several encodings could be used:

   248                     // Shift-JIS, ASCII, UTF-8, ISO-8859-*, etc

   249                     //

   250                     useEncodingDecl(pb, "UTF8");

   251                     return;

   252                 }

   253                 break;

   255                 // 4c 6f a7 94 ... some EBCDIC code page

   256             case 0x4c:

   257                 if (buf[1] == 0x6f

   258                         && (0x0ff & buf[2]) == 0x0a7

   259                         && (0x0ff & buf[3]) == 0x094) {

   260                     useEncodingDecl(pb, "CP037");

   261                     return;

   262                 }

   263                 // whoops, treat as UTF-8

   264                 break;

   266                 // UTF-16 big-endian

   267             case 0xfe:

   268                 if ((buf[1] & 0x0ff) != 0xff)

   269                     break;

   270                 setEncoding(pb, "UTF-16");

   271                 return;

   273                 // UTF-16 little-endian

   274             case 0xff:

   275                 if ((buf[1] & 0x0ff) != 0xfe)

   276                     break;

   277                 setEncoding(pb, "UTF-16");

   278                 return;

   280                 // default ... no XML declaration

   281             default:

   282                 break;

   283             }

   285         //

   286         // If all else fails, assume XML without a declaration, and

   287         // using UTF-8 encoding.

   288         //

   289         setEncoding(pb, "UTF-8");

   290     }

   292     /*

   293      * Read the encoding decl on the stream, knowing that it should

   294      * be readable using the specified encoding (basically, ASCII or

   295      * EBCDIC).  The body of the document may use a wider range of

   296      * characters than the XML/Text decl itself, so we switch to use

   297      * the specified encoding as soon as we can.  (ASCII is a subset

   298      * of UTF-8, ISO-8859-*, ISO-2022-JP, EUC-JP, and more; EBCDIC

   299      * has a variety of "code pages" that have these characters as

   300      * a common subset.)

   301      */

   302     private void useEncodingDecl(PushbackInputStream pb, String encoding)

   303             throws IOException {

   304         byte buffer [] = new byte[MAXPUSHBACK];

   305         int len;

   306         Reader r;

   307         int c;

   309         //

   310         // Buffer up a bunch of input, and set up to read it in

   311         // the specified encoding ... we can skip the first four

   312         // bytes since we know that "<?xm" was read to determine

   313         // what encoding to use!

   314         //

   315         len = pb.read(buffer, 0, buffer.length);

   316         pb.unread(buffer, 0, len);

   317         r = new InputStreamReader(new ByteArrayInputStream(buffer, 4, len),

   318                 encoding);

   320         //

   321         // Next must be "l" (and whitespace) else we conclude

   322         // error and choose UTF-8.

   323         //

   324         if ((c = r.read()) != 'l') {

   325             setEncoding(pb, "UTF-8");

   326             return;

   327         }

   329         //

   330         // Then, we'll skip any

   331         //     S version="..."     [or single quotes]

   332         // bit and get any subsequent

   333         //     S encoding="..."     [or single quotes]

   334         //

   335         // We put an arbitrary size limit on how far we read; lots

   336         // of space will break this algorithm.

   337         //

   338         StringBuffer buf = new StringBuffer();

   339         StringBuffer keyBuf = null;

   340         String key = null;

   341         boolean sawEq = false;

   342         char quoteChar = 0;

   343         boolean sawQuestion = false;

   345         XmlDecl:

   346         for (int i = 0; i < MAXPUSHBACK - 5; ++i) {

   347             if ((c = r.read()) == -1)

   348                 break;

   350             // ignore whitespace before/between "key = 'value'"

   351             if (c == ' ' || c == '\t' || c == '\n' || c == '\r')

   352                 continue;

   354             // ... but require at least a little!

   355             if (i == 0)

   356                 break;

   358             // terminate the loop ASAP

   359             if (c == '?')

   360                 sawQuestion = true;

   361             else if (sawQuestion) {

   362                 if (c == '>')

   363                     break;

   364                 sawQuestion = false;

   365             }

   367             // did we get the "key =" bit yet?

   368             if (key == null || !sawEq) {

   369                 if (keyBuf == null) {

   370                     if (Character.isWhitespace((char) c))

   371                         continue;

   372                     keyBuf = buf;

   373                     buf.setLength(0);

   374                     buf.append((char) c);

   375                     sawEq = false;

   376                 } else if (Character.isWhitespace((char) c)) {

   377                     key = keyBuf.toString();

   378                 } else if (c == '=') {

   379                     if (key == null)

   380                         key = keyBuf.toString();

   381                     sawEq = true;

   382                     keyBuf = null;

   383                     quoteChar = 0;

   384                 } else

   385                     keyBuf.append((char) c);

   386                 continue;

   387             }

   389             // space before quoted value

   390             if (Character.isWhitespace((char) c))

   391                 continue;

   392             if (c == '"' || c == '\'') {

   393                 if (quoteChar == 0) {

   394                     quoteChar = (char) c;

   395                     buf.setLength(0);

   396                     continue;

   397                 } else if (c == quoteChar) {

   398                     if ("encoding".equals(key)) {

   399                         assignedEncoding = buf.toString();

   401                         // [81] Encname ::= [A-Za-z] ([A-Za-z0-9._]|'-')*

   402                         for (i = 0; i < assignedEncoding.length(); i++) {

   403                             c = assignedEncoding.charAt(i);

   404                             if ((c >= 'A' && c <= 'Z')

   405                                     || (c >= 'a' && c <= 'z'))

   406                                 continue;

   407                             if (i == 0)

   408                                 break XmlDecl;

   409                             if (i > 0 && (c == '-'

   410                                     || (c >= '0' && c <= '9')

   411                                     || c == '.' || c == '_'))

   412                                 continue;

   413                             // map illegal names to UTF-8 default

   414                             break XmlDecl;

   415                         }

   417                         setEncoding(pb, assignedEncoding);

   418                         return;

   420                     } else {

   421                         key = null;

   422                         continue;

   423                     }

   424                 }

   425             }

   426             buf.append((char) c);

   427         }

   429         setEncoding(pb, "UTF-8");

   430     }

   432     private void setEncoding(InputStream stream, String encoding)

   433             throws IOException {

   434         assignedEncoding = encoding;

   435         in = createReader(stream, encoding);

   436     }

   438     /**

   439      * Reads the number of characters read into the buffer, or -1 on EOF.

   440      */

   441     public int read(char buf [], int off, int len) throws IOException {

   442         int val;

   444         if (closed)

   445             return -1;        // throw new IOException ("closed");

   446         val = in.read(buf, off, len);

   447         if (val == -1)

   448             close();

   449         return val;

   450     }

   452     /**

   453      * Reads a single character.

   454      */

   455     public int read() throws IOException {

   456         int val;

   458         if (closed)

   459             throw new IOException("closed");

   460         val = in.read();

   461         if (val == -1)

   462             close();

   463         return val;

   464     }

   466     /**

   467      * Returns true iff the reader supports mark/reset.

   468      */

   469     public boolean markSupported() {

   470         return in == null ? false : in.markSupported();

   471     }

   473     /**

   474      * Sets a mark allowing a limited number of characters to

   475      * be "peeked", by reading and then resetting.

   476      *

   477      * @param value how many characters may be "peeked".

   478      */

   479     public void mark(int value) throws IOException {

   480         if (in != null) in.mark(value);

   481     }

   483     /**

   484      * Resets the current position to the last marked position.

   485      */

   486     public void reset() throws IOException {

   487         if (in != null) in.reset();

   488     }

   490     /**

   491      * Skips a specified number of characters.

   492      */

   493     public long skip(long value) throws IOException {

   494         return in == null ? 0 : in.skip(value);

   495     }

   497     /**

   498      * Returns true iff input characters are known to be ready.

   499      */

   500     public boolean ready() throws IOException {

   501         return in == null ? false : in.ready();

   502     }

   504     /**

   505      * Closes the reader.

   506      */

   507     public void close() throws IOException {

   508         if (closed)

   509             return;

   510         in.close();

   511         in = null;

   512         closed = true;

   513     }

   515     //

   516     // Delegating to a converter module will always be slower than

   517     // direct conversion.  Use a similar approach for any other

   518     // readers that need to be particularly fast; only block I/O

   519     // speed matters to this package.  For UTF-16, separate readers

   520     // for big and little endian streams make a difference, too;

   521     // fewer conditionals in the critical path!

   522     //

   523     static abstract class BaseReader extends Reader {

   524         protected InputStream instream;

   525         protected byte buffer [];

   526         protected int start, finish;

   528         BaseReader(InputStream stream) {

   529             super(stream);

   531             instream = stream;

   532             buffer = new byte[8192];

   533         }

   535         public boolean ready() throws IOException {

   536             return instream == null

   537                     || (finish - start) > 0

   538                     || instream.available() != 0;

   539         }

   541         // caller shouldn't read again

   542         public void close() throws IOException {

   543             if (instream != null) {

   544                 instream.close();

   545                 start = finish = 0;

   546                 buffer = null;

   547                 instream = null;

   548             }

   549         }

   550     }

   552     //

   553     // We want this reader, to make the default encoding be as fast

   554     // as we can make it.  JDK's "UTF8" (not "UTF-8" till JDK 1.2)

   555     // InputStreamReader works, but 20+% slower speed isn't OK for

   556     // the default/primary encoding.

   557     //

   558     static final class Utf8Reader extends BaseReader {

   559         // 2nd half of UTF-8 surrogate pair

   560         private char nextChar;

   562         Utf8Reader(InputStream stream) {

   563             super(stream);

   564         }

   566         public int read(char buf [], int offset, int len) throws IOException {

   567             int i = 0, c = 0;

   569             if (len <= 0)

   570                 return 0;

   572             // Consume remaining half of any surrogate pair immediately

   573             if (nextChar != 0) {

   574                 buf[offset + i++] = nextChar;

   575                 nextChar = 0;

   576             }

   578             while (i < len) {

   579                 // stop or read data if needed

   580                 if (finish <= start) {

   581                     if (instream == null) {

   582                         c = -1;

   583                         break;

   584                     }

   585                     start = 0;

   586                     finish = instream.read(buffer, 0, buffer.length);

   587                     if (finish <= 0) {

   588                         this.close();

   589                         c = -1;

   590                         break;

   591                     }

   592                 }

   594                 //

   595                 // RFC 2279 describes UTF-8; there are six encodings.

   596                 // Each encoding takes a fixed number of characters

   597                 // (1-6 bytes) and is flagged by a bit pattern in the

   598                 // first byte.  The five and six byte-per-character

   599                 // encodings address characters which are disallowed

   600                 // in XML documents, as do some four byte ones.

   601                 //

   603                 //

   604                 // Single byte == ASCII.  Common; optimize.

   605                 //

   606                 c = buffer[start] & 0x0ff;

   607                 if ((c & 0x80) == 0x00) {

   608                     // 0x0000 <= c <= 0x007f

   609                     start++;

   610                     buf[offset + i++] = (char) c;

   611                     continue;

   612                 }

   614                 //

   615                 // Multibyte chars -- check offsets optimistically,

   616                 // ditto the "10xx xxxx" format for subsequent bytes

   617                 //

   618                 int off = start;

   620                 try {

   621                     // 2 bytes

   622                     if ((buffer[off] & 0x0E0) == 0x0C0) {

   623                         c = (buffer[off++] & 0x1f) << 6;

   624                         c += buffer[off++] & 0x3f;

   626                         // 0x0080 <= c <= 0x07ff

   628                         // 3 bytes

   629                     } else if ((buffer[off] & 0x0F0) == 0x0E0) {

   630                         c = (buffer[off++] & 0x0f) << 12;

   631                         c += (buffer[off++] & 0x3f) << 6;

   632                         c += buffer[off++] & 0x3f;

   634                         // 0x0800 <= c <= 0xffff

   636                         // 4 bytes

   637                     } else if ((buffer[off] & 0x0f8) == 0x0F0) {

   638                         c = (buffer[off++] & 0x07) << 18;

   639                         c += (buffer[off++] & 0x3f) << 12;

   640                         c += (buffer[off++] & 0x3f) << 6;

   641                         c += buffer[off++] & 0x3f;

   643                         // 0x0001 0000  <= c  <= 0x001f ffff

   645                         // Unicode supports c <= 0x0010 ffff ...

   646                         if (c > 0x0010ffff)

   647                             throw new CharConversionException("UTF-8 encoding of character 0x00"

   648                                     + Integer.toHexString(c)

   649                                     + " can't be converted to Unicode.");

   651                         // Convert UCS-4 char to surrogate pair (UTF-16)

   652                         c -= 0x10000;

   653                         nextChar = (char) (0xDC00 + (c & 0x03ff));

   654                         c = 0xD800 + (c >> 10);

   656                         // 5 and 6 byte versions are XML WF errors, but

   657                         // typically come from mislabeled encodings

   658                     } else

   659                         throw new CharConversionException("Unconvertible UTF-8 character"

   660                                 + " beginning with 0x"

   661                                 + Integer.toHexString(buffer[start] & 0xff));

   663                 } catch (ArrayIndexOutOfBoundsException e) {

   664                     // off > length && length >= buffer.length

   665                     c = 0;

   666                 }

   668                 //

   669                 // if the buffer held only a partial character,

   670                 // compact it and try to read the rest of the

   671                 // character.  worst case involves three

   672                 // single-byte reads -- quite rare.

   673                 //

   674                 if (off > finish) {

   675                     System.arraycopy(buffer, start,

   676                             buffer, 0, finish - start);

   677                     finish -= start;

   678                     start = 0;

   679                     off = instream.read(buffer, finish,

   680                             buffer.length - finish);

   681                     if (off < 0) {

   682                         this.close();

   683                         throw new CharConversionException("Partial UTF-8 char");

   684                     }

   685                     finish += off;

   686                     continue;

   687                 }

   689                 //

   690                 // check the format of the non-initial bytes

   691                 //

   692                 for (start++; start < off; start++) {

   693                     if ((buffer[start] & 0xC0) != 0x80) {

   694                         this.close();

   695                         throw new CharConversionException("Malformed UTF-8 char -- "

   696                                 + "is an XML encoding declaration missing?");

   697                     }

   698                 }

   700                 //

   701                 // If this needed a surrogate pair, consume ASAP

   702                 //

   703                 buf[offset + i++] = (char) c;

   704                 if (nextChar != 0 && i < len) {

   705                     buf[offset + i++] = nextChar;

   706                     nextChar = 0;

   707                 }

   708             }

   709             if (i > 0)

   710                 return i;

   711             return (c == -1) ? -1 : 0;

   712         }

   713     }

   715     //

   716     // We want ASCII and ISO-8859 Readers since they're the most common

   717     // encodings in the US and Europe, and we don't want performance

   718     // regressions for them.  They're also easy to implement efficiently,

   719     // since they're bitmask subsets of UNICODE.

   720     //

   721     // XXX haven't benchmarked these readers vs what we get out of JDK.

   722     //

   723     static final class AsciiReader extends BaseReader {

   724         AsciiReader(InputStream in) {

   725             super(in);

   726         }

   728         public int read(char buf [], int offset, int len) throws IOException {

   729             int i, c;

   731             if (instream == null)

   732                 return -1;

   734             for (i = 0; i < len; i++) {

   735                 if (start >= finish) {

   736                     start = 0;

   737                     finish = instream.read(buffer, 0, buffer.length);

   738                     if (finish <= 0) {

   739                         if (finish <= 0)

   740                             this.close();

   741                         break;

   742                     }

   743                 }

   744                 c = buffer[start++];

   745                 if ((c & 0x80) != 0)

   746                     throw new CharConversionException("Illegal ASCII character, 0x"

   747                             + Integer.toHexString(c & 0xff));

   748                 buf[offset + i] = (char) c;

   749             }

   750             if (i == 0 && finish <= 0)

   751                 return -1;

   752             return i;

   753         }

   754     }

   756     static final class Iso8859_1Reader extends BaseReader {

   757         Iso8859_1Reader(InputStream in) {

   758             super(in);

   759         }

   761         public int read(char buf [], int offset, int len) throws IOException {

   762             int i;

   764             if (instream == null)

   765                 return -1;

   767             for (i = 0; i < len; i++) {

   768                 if (start >= finish) {

   769                     start = 0;

   770                     finish = instream.read(buffer, 0, buffer.length);

   771                     if (finish <= 0) {

   772                         if (finish <= 0)

   773                             this.close();

   774                         break;

   775                     }

   776                 }

   777                 buf[offset + i] = (char) (0x0ff & buffer[start++]);

   778             }

   779             if (i == 0 && finish <= 0)

   780                 return -1;

   781             return i;

   782         }

   783     }

   784 }

Mercurial > jdk8-mips64-public > jaxws / file revision

src/share/jaxws_classes/com/sun/xml/internal/dtdparser/XmlReader.java@373ffda63c9a

src/share/jaxws_classes/com/sun/xml/internal/dtdparser/XmlReader.java