src/share/jaxws_classes/com/sun/xml/internal/dtdparser/XmlReader.java

Wed, 27 Apr 2016 01:27:09 +0800

author
aoqi
date
Wed, 27 Apr 2016 01:27:09 +0800
changeset 0
373ffda63c9a
child 637
9c07ef4934dd
permissions
-rw-r--r--

Initial load
http://hg.openjdk.java.net/jdk8u/jdk8u/jaxws/
changeset: 657:d47a47f961ee
tag: jdk8u25-b17

     1 /*
     2  * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.xml.internal.dtdparser;
    28 import java.io.ByteArrayInputStream;
    29 import java.io.CharConversionException;
    30 import java.io.IOException;
    31 import java.io.InputStream;
    32 import java.io.InputStreamReader;
    33 import java.io.PushbackInputStream;
    34 import java.io.Reader;
    35 import java.util.Hashtable;
    38 // NOTE:  Add I18N support to this class when JDK gets the ability to
    39 // defer selection of locale for exception messages ... use the same
    40 // technique for both.
    43 /**
    44  * This handles several XML-related tasks that normal java.io Readers
    45  * don't support, inluding use of IETF standard encoding names and
    46  * automatic detection of most XML encodings.  The former is needed
    47  * for interoperability; the latter is needed to conform with the XML
    48  * spec.  This class also optimizes reading some common encodings by
    49  * providing low-overhead unsynchronized Reader support.
    50  * <p/>
    51  * <P> Note that the autodetection facility should be used only on
    52  * data streams which have an unknown character encoding.  For example,
    53  * it should never be used on MIME text/xml entities.
    54  * <p/>
    55  * <P> Note that XML processors are only required to support UTF-8 and
    56  * UTF-16 character encodings.  Autodetection permits the underlying Java
    57  * implementation to provide support for many other encodings, such as
    58  * US-ASCII, ISO-8859-5, Shift_JIS, EUC-JP, and ISO-2022-JP.
    59  *
    60  * @author David Brownell
    61  * @author Janet Koenig
    62  * @version 1.3 00/02/24
    63  */
    64 // package private
    65 final class XmlReader extends Reader {
    66     private static final int MAXPUSHBACK = 512;
    68     private Reader in;
    69     private String assignedEncoding;
    70     private boolean closed;
    72     //
    73     // This class always delegates I/O to a reader, which gets
    74     // its data from the very beginning of the XML text.  It needs
    75     // to use a pushback stream since (a) autodetection can read
    76     // partial UTF-8 characters which need to be fully processed,
    77     // (b) the "Unicode" readers swallow characters that they think
    78     // are byte order marks, so tests fail if they don't see the
    79     // real byte order mark.
    80     //
    81     // It's got do this efficiently:  character I/O is solidly on the
    82     // critical path.  (So keep buffer length over 2 Kbytes to avoid
    83     // excess buffering. Many URL handlers stuff a BufferedInputStream
    84     // between here and the real data source, and larger buffers keep
    85     // that from slowing you down.)
    86     //
    88     /**
    89      * Constructs the reader from an input stream, autodetecting
    90      * the encoding to use according to the heuristic specified
    91      * in the XML 1.0 recommendation.
    92      *
    93      * @param in the input stream from which the reader is constructed
    94      * @throws IOException on error, such as unrecognized encoding
    95      */
    96     public static Reader createReader(InputStream in) throws IOException {
    97         return new XmlReader(in);
    98     }
   100     /**
   101      * Creates a reader supporting the given encoding, mapping
   102      * from standard encoding names to ones that understood by
   103      * Java where necessary.
   104      *
   105      * @param in       the input stream from which the reader is constructed
   106      * @param encoding the IETF standard name of the encoding to use;
   107      *                 if null, autodetection is used.
   108      * @throws IOException on error, including unrecognized encoding
   109      */
   110     public static Reader createReader(InputStream in, String encoding)
   111             throws IOException {
   112         if (encoding == null)
   113             return new XmlReader(in);
   114         if ("UTF-8".equalsIgnoreCase(encoding)
   115                 || "UTF8".equalsIgnoreCase(encoding))
   116             return new Utf8Reader(in);
   117         if ("US-ASCII".equalsIgnoreCase(encoding)
   118                 || "ASCII".equalsIgnoreCase(encoding))
   119             return new AsciiReader(in);
   120         if ("ISO-8859-1".equalsIgnoreCase(encoding)
   121         // plus numerous aliases ...
   122         )
   123             return new Iso8859_1Reader(in);
   125         //
   126         // What we really want is an administerable resource mapping
   127         // encoding names/aliases to classnames.  For example a property
   128         // file resource, "readers/mapping.props", holding and a set
   129         // of readers in that (sub)package... defaulting to this call
   130         // only if no better choice is available.
   131         //
   132         return new InputStreamReader(in, std2java(encoding));
   133     }
   135     //
   136     // JDK doesn't know all of the standard encoding names, and
   137     // in particular none of the EBCDIC ones IANA defines (and
   138     // which IBM encourages).
   139     //
   140     static private final Hashtable charsets = new Hashtable(31);
   142     static {
   143         charsets.put("UTF-16", "Unicode");
   144         charsets.put("ISO-10646-UCS-2", "Unicode");
   146         // NOTE: no support for ISO-10646-UCS-4 yet.
   148         charsets.put("EBCDIC-CP-US", "cp037");
   149         charsets.put("EBCDIC-CP-CA", "cp037");
   150         charsets.put("EBCDIC-CP-NL", "cp037");
   151         charsets.put("EBCDIC-CP-WT", "cp037");
   153         charsets.put("EBCDIC-CP-DK", "cp277");
   154         charsets.put("EBCDIC-CP-NO", "cp277");
   155         charsets.put("EBCDIC-CP-FI", "cp278");
   156         charsets.put("EBCDIC-CP-SE", "cp278");
   158         charsets.put("EBCDIC-CP-IT", "cp280");
   159         charsets.put("EBCDIC-CP-ES", "cp284");
   160         charsets.put("EBCDIC-CP-GB", "cp285");
   161         charsets.put("EBCDIC-CP-FR", "cp297");
   163         charsets.put("EBCDIC-CP-AR1", "cp420");
   164         charsets.put("EBCDIC-CP-HE", "cp424");
   165         charsets.put("EBCDIC-CP-BE", "cp500");
   166         charsets.put("EBCDIC-CP-CH", "cp500");
   168         charsets.put("EBCDIC-CP-ROECE", "cp870");
   169         charsets.put("EBCDIC-CP-YU", "cp870");
   170         charsets.put("EBCDIC-CP-IS", "cp871");
   171         charsets.put("EBCDIC-CP-AR2", "cp918");
   173         // IANA also defines two that JDK 1.2 doesn't handle:
   174         //    EBCDIC-CP-GR        --> CP423
   175         //    EBCDIC-CP-TR        --> CP905
   176     }
   178     // returns an encoding name supported by JDK >= 1.1.6
   179     // for some cases required by the XML spec
   180     private static String std2java(String encoding) {
   181         String temp = encoding.toUpperCase();
   182         temp = (String) charsets.get(temp);
   183         return temp != null ? temp : encoding;
   184     }
   186     /**
   187      * Returns the standard name of the encoding in use
   188      */
   189     public String getEncoding() {
   190         return assignedEncoding;
   191     }
   193     private XmlReader(InputStream stream) throws IOException {
   194         super(stream);
   196         PushbackInputStream pb;
   197         byte buf [];
   198         int len;
   200         if (stream instanceof PushbackInputStream)
   201             pb = (PushbackInputStream) stream;
   202         else
   203             pb = new PushbackInputStream(stream, MAXPUSHBACK);
   205         //
   206         // See if we can figure out the character encoding used
   207         // in this file by peeking at the first few bytes.
   208         //
   209         buf = new byte[4];
   210         len = pb.read(buf);
   211         if (len > 0)
   212             pb.unread(buf, 0, len);
   214         if (len == 4)
   215             switch (buf[0] & 0x0ff) {
   216             case 0:
   217                 // 00 3c 00 3f == illegal UTF-16 big-endian
   218                 if (buf[1] == 0x3c && buf[2] == 0x00 && buf[3] == 0x3f) {
   219                     setEncoding(pb, "UnicodeBig");
   220                     return;
   221                 }
   222                 // else it's probably UCS-4
   223                 break;
   225             case '<':      // 0x3c: the most common cases!
   226                 switch (buf[1] & 0x0ff) {
   227                 // First character is '<'; could be XML without
   228                 // an XML directive such as "<hello>", "<!-- ...",
   229                 // and so on.
   230                 default:
   231                     break;
   233                     // 3c 00 3f 00 == illegal UTF-16 little endian
   234                 case 0x00:
   235                     if (buf[2] == 0x3f && buf[3] == 0x00) {
   236                         setEncoding(pb, "UnicodeLittle");
   237                         return;
   238                     }
   239                     // else probably UCS-4
   240                     break;
   242                     // 3c 3f 78 6d == ASCII and supersets '<?xm'
   243                 case '?':
   244                     if (buf[2] != 'x' || buf[3] != 'm')
   245                         break;
   246                     //
   247                     // One of several encodings could be used:
   248                     // Shift-JIS, ASCII, UTF-8, ISO-8859-*, etc
   249                     //
   250                     useEncodingDecl(pb, "UTF8");
   251                     return;
   252                 }
   253                 break;
   255                 // 4c 6f a7 94 ... some EBCDIC code page
   256             case 0x4c:
   257                 if (buf[1] == 0x6f
   258                         && (0x0ff & buf[2]) == 0x0a7
   259                         && (0x0ff & buf[3]) == 0x094) {
   260                     useEncodingDecl(pb, "CP037");
   261                     return;
   262                 }
   263                 // whoops, treat as UTF-8
   264                 break;
   266                 // UTF-16 big-endian
   267             case 0xfe:
   268                 if ((buf[1] & 0x0ff) != 0xff)
   269                     break;
   270                 setEncoding(pb, "UTF-16");
   271                 return;
   273                 // UTF-16 little-endian
   274             case 0xff:
   275                 if ((buf[1] & 0x0ff) != 0xfe)
   276                     break;
   277                 setEncoding(pb, "UTF-16");
   278                 return;
   280                 // default ... no XML declaration
   281             default:
   282                 break;
   283             }
   285         //
   286         // If all else fails, assume XML without a declaration, and
   287         // using UTF-8 encoding.
   288         //
   289         setEncoding(pb, "UTF-8");
   290     }
   292     /*
   293      * Read the encoding decl on the stream, knowing that it should
   294      * be readable using the specified encoding (basically, ASCII or
   295      * EBCDIC).  The body of the document may use a wider range of
   296      * characters than the XML/Text decl itself, so we switch to use
   297      * the specified encoding as soon as we can.  (ASCII is a subset
   298      * of UTF-8, ISO-8859-*, ISO-2022-JP, EUC-JP, and more; EBCDIC
   299      * has a variety of "code pages" that have these characters as
   300      * a common subset.)
   301      */
   302     private void useEncodingDecl(PushbackInputStream pb, String encoding)
   303             throws IOException {
   304         byte buffer [] = new byte[MAXPUSHBACK];
   305         int len;
   306         Reader r;
   307         int c;
   309         //
   310         // Buffer up a bunch of input, and set up to read it in
   311         // the specified encoding ... we can skip the first four
   312         // bytes since we know that "<?xm" was read to determine
   313         // what encoding to use!
   314         //
   315         len = pb.read(buffer, 0, buffer.length);
   316         pb.unread(buffer, 0, len);
   317         r = new InputStreamReader(new ByteArrayInputStream(buffer, 4, len),
   318                 encoding);
   320         //
   321         // Next must be "l" (and whitespace) else we conclude
   322         // error and choose UTF-8.
   323         //
   324         if ((c = r.read()) != 'l') {
   325             setEncoding(pb, "UTF-8");
   326             return;
   327         }
   329         //
   330         // Then, we'll skip any
   331         //     S version="..."     [or single quotes]
   332         // bit and get any subsequent
   333         //     S encoding="..."     [or single quotes]
   334         //
   335         // We put an arbitrary size limit on how far we read; lots
   336         // of space will break this algorithm.
   337         //
   338         StringBuffer buf = new StringBuffer();
   339         StringBuffer keyBuf = null;
   340         String key = null;
   341         boolean sawEq = false;
   342         char quoteChar = 0;
   343         boolean sawQuestion = false;
   345         XmlDecl:
   346         for (int i = 0; i < MAXPUSHBACK - 5; ++i) {
   347             if ((c = r.read()) == -1)
   348                 break;
   350             // ignore whitespace before/between "key = 'value'"
   351             if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
   352                 continue;
   354             // ... but require at least a little!
   355             if (i == 0)
   356                 break;
   358             // terminate the loop ASAP
   359             if (c == '?')
   360                 sawQuestion = true;
   361             else if (sawQuestion) {
   362                 if (c == '>')
   363                     break;
   364                 sawQuestion = false;
   365             }
   367             // did we get the "key =" bit yet?
   368             if (key == null || !sawEq) {
   369                 if (keyBuf == null) {
   370                     if (Character.isWhitespace((char) c))
   371                         continue;
   372                     keyBuf = buf;
   373                     buf.setLength(0);
   374                     buf.append((char) c);
   375                     sawEq = false;
   376                 } else if (Character.isWhitespace((char) c)) {
   377                     key = keyBuf.toString();
   378                 } else if (c == '=') {
   379                     if (key == null)
   380                         key = keyBuf.toString();
   381                     sawEq = true;
   382                     keyBuf = null;
   383                     quoteChar = 0;
   384                 } else
   385                     keyBuf.append((char) c);
   386                 continue;
   387             }
   389             // space before quoted value
   390             if (Character.isWhitespace((char) c))
   391                 continue;
   392             if (c == '"' || c == '\'') {
   393                 if (quoteChar == 0) {
   394                     quoteChar = (char) c;
   395                     buf.setLength(0);
   396                     continue;
   397                 } else if (c == quoteChar) {
   398                     if ("encoding".equals(key)) {
   399                         assignedEncoding = buf.toString();
   401                         // [81] Encname ::= [A-Za-z] ([A-Za-z0-9._]|'-')*
   402                         for (i = 0; i < assignedEncoding.length(); i++) {
   403                             c = assignedEncoding.charAt(i);
   404                             if ((c >= 'A' && c <= 'Z')
   405                                     || (c >= 'a' && c <= 'z'))
   406                                 continue;
   407                             if (i == 0)
   408                                 break XmlDecl;
   409                             if (i > 0 && (c == '-'
   410                                     || (c >= '0' && c <= '9')
   411                                     || c == '.' || c == '_'))
   412                                 continue;
   413                             // map illegal names to UTF-8 default
   414                             break XmlDecl;
   415                         }
   417                         setEncoding(pb, assignedEncoding);
   418                         return;
   420                     } else {
   421                         key = null;
   422                         continue;
   423                     }
   424                 }
   425             }
   426             buf.append((char) c);
   427         }
   429         setEncoding(pb, "UTF-8");
   430     }
   432     private void setEncoding(InputStream stream, String encoding)
   433             throws IOException {
   434         assignedEncoding = encoding;
   435         in = createReader(stream, encoding);
   436     }
   438     /**
   439      * Reads the number of characters read into the buffer, or -1 on EOF.
   440      */
   441     public int read(char buf [], int off, int len) throws IOException {
   442         int val;
   444         if (closed)
   445             return -1;        // throw new IOException ("closed");
   446         val = in.read(buf, off, len);
   447         if (val == -1)
   448             close();
   449         return val;
   450     }
   452     /**
   453      * Reads a single character.
   454      */
   455     public int read() throws IOException {
   456         int val;
   458         if (closed)
   459             throw new IOException("closed");
   460         val = in.read();
   461         if (val == -1)
   462             close();
   463         return val;
   464     }
   466     /**
   467      * Returns true iff the reader supports mark/reset.
   468      */
   469     public boolean markSupported() {
   470         return in == null ? false : in.markSupported();
   471     }
   473     /**
   474      * Sets a mark allowing a limited number of characters to
   475      * be "peeked", by reading and then resetting.
   476      *
   477      * @param value how many characters may be "peeked".
   478      */
   479     public void mark(int value) throws IOException {
   480         if (in != null) in.mark(value);
   481     }
   483     /**
   484      * Resets the current position to the last marked position.
   485      */
   486     public void reset() throws IOException {
   487         if (in != null) in.reset();
   488     }
   490     /**
   491      * Skips a specified number of characters.
   492      */
   493     public long skip(long value) throws IOException {
   494         return in == null ? 0 : in.skip(value);
   495     }
   497     /**
   498      * Returns true iff input characters are known to be ready.
   499      */
   500     public boolean ready() throws IOException {
   501         return in == null ? false : in.ready();
   502     }
   504     /**
   505      * Closes the reader.
   506      */
   507     public void close() throws IOException {
   508         if (closed)
   509             return;
   510         in.close();
   511         in = null;
   512         closed = true;
   513     }
   515     //
   516     // Delegating to a converter module will always be slower than
   517     // direct conversion.  Use a similar approach for any other
   518     // readers that need to be particularly fast; only block I/O
   519     // speed matters to this package.  For UTF-16, separate readers
   520     // for big and little endian streams make a difference, too;
   521     // fewer conditionals in the critical path!
   522     //
   523     static abstract class BaseReader extends Reader {
   524         protected InputStream instream;
   525         protected byte buffer [];
   526         protected int start, finish;
   528         BaseReader(InputStream stream) {
   529             super(stream);
   531             instream = stream;
   532             buffer = new byte[8192];
   533         }
   535         public boolean ready() throws IOException {
   536             return instream == null
   537                     || (finish - start) > 0
   538                     || instream.available() != 0;
   539         }
   541         // caller shouldn't read again
   542         public void close() throws IOException {
   543             if (instream != null) {
   544                 instream.close();
   545                 start = finish = 0;
   546                 buffer = null;
   547                 instream = null;
   548             }
   549         }
   550     }
   552     //
   553     // We want this reader, to make the default encoding be as fast
   554     // as we can make it.  JDK's "UTF8" (not "UTF-8" till JDK 1.2)
   555     // InputStreamReader works, but 20+% slower speed isn't OK for
   556     // the default/primary encoding.
   557     //
   558     static final class Utf8Reader extends BaseReader {
   559         // 2nd half of UTF-8 surrogate pair
   560         private char nextChar;
   562         Utf8Reader(InputStream stream) {
   563             super(stream);
   564         }
   566         public int read(char buf [], int offset, int len) throws IOException {
   567             int i = 0, c = 0;
   569             if (len <= 0)
   570                 return 0;
   572             // Consume remaining half of any surrogate pair immediately
   573             if (nextChar != 0) {
   574                 buf[offset + i++] = nextChar;
   575                 nextChar = 0;
   576             }
   578             while (i < len) {
   579                 // stop or read data if needed
   580                 if (finish <= start) {
   581                     if (instream == null) {
   582                         c = -1;
   583                         break;
   584                     }
   585                     start = 0;
   586                     finish = instream.read(buffer, 0, buffer.length);
   587                     if (finish <= 0) {
   588                         this.close();
   589                         c = -1;
   590                         break;
   591                     }
   592                 }
   594                 //
   595                 // RFC 2279 describes UTF-8; there are six encodings.
   596                 // Each encoding takes a fixed number of characters
   597                 // (1-6 bytes) and is flagged by a bit pattern in the
   598                 // first byte.  The five and six byte-per-character
   599                 // encodings address characters which are disallowed
   600                 // in XML documents, as do some four byte ones.
   601                 //
   603                 //
   604                 // Single byte == ASCII.  Common; optimize.
   605                 //
   606                 c = buffer[start] & 0x0ff;
   607                 if ((c & 0x80) == 0x00) {
   608                     // 0x0000 <= c <= 0x007f
   609                     start++;
   610                     buf[offset + i++] = (char) c;
   611                     continue;
   612                 }
   614                 //
   615                 // Multibyte chars -- check offsets optimistically,
   616                 // ditto the "10xx xxxx" format for subsequent bytes
   617                 //
   618                 int off = start;
   620                 try {
   621                     // 2 bytes
   622                     if ((buffer[off] & 0x0E0) == 0x0C0) {
   623                         c = (buffer[off++] & 0x1f) << 6;
   624                         c += buffer[off++] & 0x3f;
   626                         // 0x0080 <= c <= 0x07ff
   628                         // 3 bytes
   629                     } else if ((buffer[off] & 0x0F0) == 0x0E0) {
   630                         c = (buffer[off++] & 0x0f) << 12;
   631                         c += (buffer[off++] & 0x3f) << 6;
   632                         c += buffer[off++] & 0x3f;
   634                         // 0x0800 <= c <= 0xffff
   636                         // 4 bytes
   637                     } else if ((buffer[off] & 0x0f8) == 0x0F0) {
   638                         c = (buffer[off++] & 0x07) << 18;
   639                         c += (buffer[off++] & 0x3f) << 12;
   640                         c += (buffer[off++] & 0x3f) << 6;
   641                         c += buffer[off++] & 0x3f;
   643                         // 0x0001 0000  <= c  <= 0x001f ffff
   645                         // Unicode supports c <= 0x0010 ffff ...
   646                         if (c > 0x0010ffff)
   647                             throw new CharConversionException("UTF-8 encoding of character 0x00"
   648                                     + Integer.toHexString(c)
   649                                     + " can't be converted to Unicode.");
   651                         // Convert UCS-4 char to surrogate pair (UTF-16)
   652                         c -= 0x10000;
   653                         nextChar = (char) (0xDC00 + (c & 0x03ff));
   654                         c = 0xD800 + (c >> 10);
   656                         // 5 and 6 byte versions are XML WF errors, but
   657                         // typically come from mislabeled encodings
   658                     } else
   659                         throw new CharConversionException("Unconvertible UTF-8 character"
   660                                 + " beginning with 0x"
   661                                 + Integer.toHexString(buffer[start] & 0xff));
   663                 } catch (ArrayIndexOutOfBoundsException e) {
   664                     // off > length && length >= buffer.length
   665                     c = 0;
   666                 }
   668                 //
   669                 // if the buffer held only a partial character,
   670                 // compact it and try to read the rest of the
   671                 // character.  worst case involves three
   672                 // single-byte reads -- quite rare.
   673                 //
   674                 if (off > finish) {
   675                     System.arraycopy(buffer, start,
   676                             buffer, 0, finish - start);
   677                     finish -= start;
   678                     start = 0;
   679                     off = instream.read(buffer, finish,
   680                             buffer.length - finish);
   681                     if (off < 0) {
   682                         this.close();
   683                         throw new CharConversionException("Partial UTF-8 char");
   684                     }
   685                     finish += off;
   686                     continue;
   687                 }
   689                 //
   690                 // check the format of the non-initial bytes
   691                 //
   692                 for (start++; start < off; start++) {
   693                     if ((buffer[start] & 0xC0) != 0x80) {
   694                         this.close();
   695                         throw new CharConversionException("Malformed UTF-8 char -- "
   696                                 + "is an XML encoding declaration missing?");
   697                     }
   698                 }
   700                 //
   701                 // If this needed a surrogate pair, consume ASAP
   702                 //
   703                 buf[offset + i++] = (char) c;
   704                 if (nextChar != 0 && i < len) {
   705                     buf[offset + i++] = nextChar;
   706                     nextChar = 0;
   707                 }
   708             }
   709             if (i > 0)
   710                 return i;
   711             return (c == -1) ? -1 : 0;
   712         }
   713     }
   715     //
   716     // We want ASCII and ISO-8859 Readers since they're the most common
   717     // encodings in the US and Europe, and we don't want performance
   718     // regressions for them.  They're also easy to implement efficiently,
   719     // since they're bitmask subsets of UNICODE.
   720     //
   721     // XXX haven't benchmarked these readers vs what we get out of JDK.
   722     //
   723     static final class AsciiReader extends BaseReader {
   724         AsciiReader(InputStream in) {
   725             super(in);
   726         }
   728         public int read(char buf [], int offset, int len) throws IOException {
   729             int i, c;
   731             if (instream == null)
   732                 return -1;
   734             for (i = 0; i < len; i++) {
   735                 if (start >= finish) {
   736                     start = 0;
   737                     finish = instream.read(buffer, 0, buffer.length);
   738                     if (finish <= 0) {
   739                         if (finish <= 0)
   740                             this.close();
   741                         break;
   742                     }
   743                 }
   744                 c = buffer[start++];
   745                 if ((c & 0x80) != 0)
   746                     throw new CharConversionException("Illegal ASCII character, 0x"
   747                             + Integer.toHexString(c & 0xff));
   748                 buf[offset + i] = (char) c;
   749             }
   750             if (i == 0 && finish <= 0)
   751                 return -1;
   752             return i;
   753         }
   754     }
   756     static final class Iso8859_1Reader extends BaseReader {
   757         Iso8859_1Reader(InputStream in) {
   758             super(in);
   759         }
   761         public int read(char buf [], int offset, int len) throws IOException {
   762             int i;
   764             if (instream == null)
   765                 return -1;
   767             for (i = 0; i < len; i++) {
   768                 if (start >= finish) {
   769                     start = 0;
   770                     finish = instream.read(buffer, 0, buffer.length);
   771                     if (finish <= 0) {
   772                         if (finish <= 0)
   773                             this.close();
   774                         break;
   775                     }
   776                 }
   777                 buf[offset + i] = (char) (0x0ff & buffer[start++]);
   778             }
   779             if (i == 0 && finish <= 0)
   780                 return -1;
   781             return i;
   782         }
   783     }
   784 }

mercurial