jdk8-mips64-public/langtools: src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java@af8417e590f4

     1 /*

     2  * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Oracle designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Oracle in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    22  * or visit www.oracle.com if you need additional information or have any

    23  * questions.

    24  */

    26 package com.sun.tools.javac.parser;

    28 import java.nio.CharBuffer;

    29 import java.util.Arrays;

    31 import com.sun.tools.javac.file.JavacFileManager;

    32 import com.sun.tools.javac.util.ArrayUtils;

    33 import com.sun.tools.javac.util.Log;

    34 import com.sun.tools.javac.util.Name;

    35 import com.sun.tools.javac.util.Names;

    37 import static com.sun.tools.javac.util.LayoutCharacters.*;

    39 /** The char reader used by the javac lexer/tokenizer. Returns the sequence of

    40  * characters contained in the input stream, handling unicode escape accordingly.

    41  * Additionally, it provides features for saving chars into a buffer and to retrieve

    42  * them at a later stage.

    43  *

    44  *  <p><b>This is NOT part of any supported API.

    45  *  If you write code that depends on this, you do so at your own risk.

    46  *  This code and its internal interfaces are subject to change or

    47  *  deletion without notice.</b>

    48  */

    49 public class UnicodeReader {

    51     /** The input buffer, index of next character to be read,

    52      *  index of one past last character in buffer.

    53      */

    54     protected char[] buf;

    55     protected int bp;

    56     protected final int buflen;

    58     /** The current character.

    59      */

    60     protected char ch;

    62     /** The buffer index of the last converted unicode character

    63      */

    64     protected int unicodeConversionBp = -1;

    66     protected Log log;

    67     protected Names names;

    69     /** A character buffer for saved chars.

    70      */

    71     protected char[] sbuf = new char[128];

    72     protected int sp;

    74     /**

    75      * Create a scanner from the input array.  This method might

    76      * modify the array.  To avoid copying the input array, ensure

    77      * that {@code inputLength < input.length} or

    78      * {@code input[input.length -1]} is a white space character.

    79      *

    80      * @param sf the factory which created this Scanner

    81      * @param buffer the input, might be modified

    82      * Must be positive and less than or equal to input.length.

    83      */

    84     protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) {

    85         this(sf, JavacFileManager.toArray(buffer), buffer.limit());

    86     }

    88     protected UnicodeReader(ScannerFactory sf, char[] input, int inputLength) {

    89         log = sf.log;

    90         names = sf.names;

    91         if (inputLength == input.length) {

    92             if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {

    93                 inputLength--;

    94             } else {

    95                 input = Arrays.copyOf(input, inputLength + 1);

    96             }

    97         }

    98         buf = input;

    99         buflen = inputLength;

   100         buf[buflen] = EOI;

   101         bp = -1;

   102         scanChar();

   103     }

   105     /** Read next character.

   106      */

   107     protected void scanChar() {

   108         if (bp < buflen) {

   109             ch = buf[++bp];

   110             if (ch == '\\') {

   111                 convertUnicode();

   112             }

   113         }

   114     }

   116     /** Read next character in comment, skipping over double '\' characters.

   117      */

   118     protected void scanCommentChar() {

   119         scanChar();

   120         if (ch == '\\') {

   121             if (peekChar() == '\\' && !isUnicode()) {

   122                 skipChar();

   123             } else {

   124                 convertUnicode();

   125             }

   126         }

   127     }

   129     /** Append a character to sbuf.

   130      */

   131     protected void putChar(char ch, boolean scan) {

   132         sbuf = ArrayUtils.ensureCapacity(sbuf, sp);

   133         sbuf[sp++] = ch;

   134         if (scan)

   135             scanChar();

   136     }

   138     protected void putChar(char ch) {

   139         putChar(ch, false);

   140     }

   142     protected void putChar(boolean scan) {

   143         putChar(ch, scan);

   144     }

   146     Name name() {

   147         return names.fromChars(sbuf, 0, sp);

   148     }

   150     String chars() {

   151         return new String(sbuf, 0, sp);

   152     }

   154     /** Convert unicode escape; bp points to initial '\' character

   155      *  (Spec 3.3).

   156      */

   157     protected void convertUnicode() {

   158         if (ch == '\\' && unicodeConversionBp != bp) {

   159             bp++; ch = buf[bp];

   160             if (ch == 'u') {

   161                 do {

   162                     bp++; ch = buf[bp];

   163                 } while (ch == 'u');

   164                 int limit = bp + 3;

   165                 if (limit < buflen) {

   166                     int d = digit(bp, 16);

   167                     int code = d;

   168                     while (bp < limit && d >= 0) {

   169                         bp++; ch = buf[bp];

   170                         d = digit(bp, 16);

   171                         code = (code << 4) + d;

   172                     }

   173                     if (d >= 0) {

   174                         ch = (char)code;

   175                         unicodeConversionBp = bp;

   176                         return;

   177                     }

   178                 }

   179                 log.error(bp, "illegal.unicode.esc");

   180             } else {

   181                 bp--;

   182                 ch = '\\';

   183             }

   184         }

   185     }

   187     /** Are surrogates supported?

   188      */

   189     final static boolean surrogatesSupported = surrogatesSupported();

   190     private static boolean surrogatesSupported() {

   191         try {

   192             Character.isHighSurrogate('a');

   193             return true;

   194         } catch (NoSuchMethodError ex) {

   195             return false;

   196         }

   197     }

   199     /** Scan surrogate pairs.  If 'ch' is a high surrogate and

   200      *  the next character is a low surrogate, then put the low

   201      *  surrogate in 'ch', and return the high surrogate.

   202      *  otherwise, just return 0.

   203      */

   204     protected char scanSurrogates() {

   205         if (surrogatesSupported && Character.isHighSurrogate(ch)) {

   206             char high = ch;

   208             scanChar();

   210             if (Character.isLowSurrogate(ch)) {

   211                 return high;

   212             }

   214             ch = high;

   215         }

   217         return 0;

   218     }

   220     /** Convert an ASCII digit from its base (8, 10, or 16)

   221      *  to its value.

   222      */

   223     protected int digit(int pos, int base) {

   224         char c = ch;

   225         int result = Character.digit(c, base);

   226         if (result >= 0 && c > 0x7f) {

   227             log.error(pos + 1, "illegal.nonascii.digit");

   228             ch = "0123456789abcdef".charAt(result);

   229         }

   230         return result;

   231     }

   233     protected boolean isUnicode() {

   234         return unicodeConversionBp == bp;

   235     }

   237     protected void skipChar() {

   238         bp++;

   239     }

   241     protected char peekChar() {

   242         return buf[bp + 1];

   243     }

   245     /**

   246      * Returns a copy of the input buffer, up to its inputLength.

   247      * Unicode escape sequences are not translated.

   248      */

   249     public char[] getRawCharacters() {

   250         char[] chars = new char[buflen];

   251         System.arraycopy(buf, 0, chars, 0, buflen);

   252         return chars;

   253     }

   255     /**

   256      * Returns a copy of a character array subset of the input buffer.

   257      * The returned array begins at the {@code beginIndex} and

   258      * extends to the character at index {@code endIndex - 1}.

   259      * Thus the length of the substring is {@code endIndex-beginIndex}.

   260      * This behavior is like

   261      * {@code String.substring(beginIndex, endIndex)}.

   262      * Unicode escape sequences are not translated.

   263      *

   264      * @param beginIndex the beginning index, inclusive.

   265      * @param endIndex the ending index, exclusive.

   266      * @throws ArrayIndexOutOfBoundsException if either offset is outside of the

   267      *         array bounds

   268      */

   269     public char[] getRawCharacters(int beginIndex, int endIndex) {

   270         int length = endIndex - beginIndex;

   271         char[] chars = new char[length];

   272         System.arraycopy(buf, beginIndex, chars, 0, length);

   273         return chars;

   274     }

   275 }

Mercurial > jdk8-mips64-public > langtools / file revision

src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java@af8417e590f4

src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java