jdk8-mips64-public/langtools: src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java@5c0b3faeb0b0

     1 /*

     2  * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Oracle designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Oracle in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    22  * or visit www.oracle.com if you need additional information or have any

    23  * questions.

    24  */

    26 package com.sun.tools.javac.parser;

    28 import com.sun.tools.javac.file.JavacFileManager;

    29 import com.sun.tools.javac.util.Log;

    30 import com.sun.tools.javac.util.Name;

    31 import com.sun.tools.javac.util.Names;

    33 import java.nio.CharBuffer;

    35 import static com.sun.tools.javac.util.LayoutCharacters.*;

    37 /** The char reader used by the javac lexer/tokenizer. Returns the sequence of

    38  * characters contained in the input stream, handling unicode escape accordingly.

    39  * Additionally, it provide features for saving chars into a buffer and to retrieve

    40  * them at a later stage.

    41  *

    42  *  <p><b>This is NOT part of any supported API.

    43  *  If you write code that depends on this, you do so at your own risk.

    44  *  This code and its internal interfaces are subject to change or

    45  *  deletion without notice.</b>

    46  */

    47 public class UnicodeReader {

    49     /** The input buffer, index of next character to be read,

    50      *  index of one past last character in buffer.

    51      */

    52     protected char[] buf;

    53     protected int bp;

    54     protected final int buflen;

    56     /** The current character.

    57      */

    58     protected char ch;

    60     /** The buffer index of the last converted unicode character

    61      */

    62     protected int unicodeConversionBp = -1;

    64     protected Log log;

    65     protected Names names;

    67     /** A character buffer for saved chars.

    68      */

    69     protected char[] sbuf = new char[128];

    70     protected int sp;

    72     /**

    73      * Create a scanner from the input array.  This method might

    74      * modify the array.  To avoid copying the input array, ensure

    75      * that {@code inputLength < input.length} or

    76      * {@code input[input.length -1]} is a white space character.

    77      *

    78      * @param fac the factory which created this Scanner

    79      * @param input the input, might be modified

    80      * @param inputLength the size of the input.

    81      * Must be positive and less than or equal to input.length.

    82      */

    83     protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) {

    84         this(sf, JavacFileManager.toArray(buffer), buffer.limit());

    85     }

    87     protected UnicodeReader(ScannerFactory sf, char[] input, int inputLength) {

    88         log = sf.log;

    89         names = sf.names;

    90         if (inputLength == input.length) {

    91             if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {

    92                 inputLength--;

    93             } else {

    94                 char[] newInput = new char[inputLength + 1];

    95                 System.arraycopy(input, 0, newInput, 0, input.length);

    96                 input = newInput;

    97             }

    98         }

    99         buf = input;

   100         buflen = inputLength;

   101         buf[buflen] = EOI;

   102         bp = -1;

   103         scanChar();

   104     }

   106     /** Read next character.

   107      */

   108     protected void scanChar() {

   109         if (bp < buflen) {

   110             ch = buf[++bp];

   111             if (ch == '\\') {

   112                 convertUnicode();

   113             }

   114         }

   115     }

   117     /** Read next character in comment, skipping over double '\' characters.

   118      */

   119     protected void scanCommentChar() {

   120         scanChar();

   121         if (ch == '\\') {

   122             if (peekChar() == '\\' && !isUnicode()) {

   123                 skipChar();

   124             } else {

   125                 convertUnicode();

   126             }

   127         }

   128     }

   130     /** Append a character to sbuf.

   131      */

   132     protected void putChar(char ch, boolean scan) {

   133         if (sp == sbuf.length) {

   134             char[] newsbuf = new char[sbuf.length * 2];

   135             System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);

   136             sbuf = newsbuf;

   137         }

   138         sbuf[sp++] = ch;

   139         if (scan)

   140             scanChar();

   141     }

   143     protected void putChar(char ch) {

   144         putChar(ch, false);

   145     }

   147     protected void putChar(boolean scan) {

   148         putChar(ch, scan);

   149     }

   151     Name name() {

   152         return names.fromChars(sbuf, 0, sp);

   153     }

   155     String chars() {

   156         return new String(sbuf, 0, sp);

   157     }

   159     /** Convert unicode escape; bp points to initial '\' character

   160      *  (Spec 3.3).

   161      */

   162     protected void convertUnicode() {

   163         if (ch == '\\' && unicodeConversionBp != bp) {

   164             bp++; ch = buf[bp];

   165             if (ch == 'u') {

   166                 do {

   167                     bp++; ch = buf[bp];

   168                 } while (ch == 'u');

   169                 int limit = bp + 3;

   170                 if (limit < buflen) {

   171                     int d = digit(bp, 16);

   172                     int code = d;

   173                     while (bp < limit && d >= 0) {

   174                         bp++; ch = buf[bp];

   175                         d = digit(bp, 16);

   176                         code = (code << 4) + d;

   177                     }

   178                     if (d >= 0) {

   179                         ch = (char)code;

   180                         unicodeConversionBp = bp;

   181                         return;

   182                     }

   183                 }

   184                 log.error(bp, "illegal.unicode.esc");

   185             } else {

   186                 bp--;

   187                 ch = '\\';

   188             }

   189         }

   190     }

   192     /** Are surrogates supported?

   193      */

   194     final static boolean surrogatesSupported = surrogatesSupported();

   195     private static boolean surrogatesSupported() {

   196         try {

   197             Character.isHighSurrogate('a');

   198             return true;

   199         } catch (NoSuchMethodError ex) {

   200             return false;

   201         }

   202     }

   204     /** Scan surrogate pairs.  If 'ch' is a high surrogate and

   205      *  the next character is a low surrogate, then put the low

   206      *  surrogate in 'ch', and return the high surrogate.

   207      *  otherwise, just return 0.

   208      */

   209     protected char scanSurrogates() {

   210         if (surrogatesSupported && Character.isHighSurrogate(ch)) {

   211             char high = ch;

   213             scanChar();

   215             if (Character.isLowSurrogate(ch)) {

   216                 return high;

   217             }

   219             ch = high;

   220         }

   222         return 0;

   223     }

   225     /** Convert an ASCII digit from its base (8, 10, or 16)

   226      *  to its value.

   227      */

   228     protected int digit(int pos, int base) {

   229         char c = ch;

   230         int result = Character.digit(c, base);

   231         if (result >= 0 && c > 0x7f) {

   232             log.error(pos + 1, "illegal.nonascii.digit");

   233             ch = "0123456789abcdef".charAt(result);

   234         }

   235         return result;

   236     }

   238     protected boolean isUnicode() {

   239         return unicodeConversionBp == bp;

   240     }

   242     protected void skipChar() {

   243         bp++;

   244     }

   246     protected char peekChar() {

   247         return buf[bp + 1];

   248     }

   250     /**

   251      * Returns a copy of the input buffer, up to its inputLength.

   252      * Unicode escape sequences are not translated.

   253      */

   254     public char[] getRawCharacters() {

   255         char[] chars = new char[buflen];

   256         System.arraycopy(buf, 0, chars, 0, buflen);

   257         return chars;

   258     }

   260     /**

   261      * Returns a copy of a character array subset of the input buffer.

   262      * The returned array begins at the <code>beginIndex</code> and

   263      * extends to the character at index <code>endIndex - 1</code>.

   264      * Thus the length of the substring is <code>endIndex-beginIndex</code>.

   265      * This behavior is like

   266      * <code>String.substring(beginIndex, endIndex)</code>.

   267      * Unicode escape sequences are not translated.

   268      *

   269      * @param beginIndex the beginning index, inclusive.

   270      * @param endIndex the ending index, exclusive.

   271      * @throws IndexOutOfBounds if either offset is outside of the

   272      *         array bounds

   273      */

   274     public char[] getRawCharacters(int beginIndex, int endIndex) {

   275         int length = endIndex - beginIndex;

   276         char[] chars = new char[length];

   277         System.arraycopy(buf, beginIndex, chars, 0, length);

   278         return chars;

   279     }

   280 }

Mercurial > jdk8-mips64-public > langtools / file revision

src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java@5c0b3faeb0b0

src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java