src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java

Sun, 17 Feb 2013 16:44:55 -0500

author
dholmes
date
Sun, 17 Feb 2013 16:44:55 -0500
changeset 1571
af8417e590f4
parent 1521
71f35e4b93a5
child 2525
2eb010b6cb22
permissions
-rw-r--r--

Merge

     1 /*
     2  * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.tools.javac.parser;
    28 import java.nio.CharBuffer;
    29 import java.util.Arrays;
    31 import com.sun.tools.javac.file.JavacFileManager;
    32 import com.sun.tools.javac.util.ArrayUtils;
    33 import com.sun.tools.javac.util.Log;
    34 import com.sun.tools.javac.util.Name;
    35 import com.sun.tools.javac.util.Names;
    37 import static com.sun.tools.javac.util.LayoutCharacters.*;
    39 /** The char reader used by the javac lexer/tokenizer. Returns the sequence of
    40  * characters contained in the input stream, handling unicode escape accordingly.
    41  * Additionally, it provides features for saving chars into a buffer and to retrieve
    42  * them at a later stage.
    43  *
    44  *  <p><b>This is NOT part of any supported API.
    45  *  If you write code that depends on this, you do so at your own risk.
    46  *  This code and its internal interfaces are subject to change or
    47  *  deletion without notice.</b>
    48  */
    49 public class UnicodeReader {
    51     /** The input buffer, index of next character to be read,
    52      *  index of one past last character in buffer.
    53      */
    54     protected char[] buf;
    55     protected int bp;
    56     protected final int buflen;
    58     /** The current character.
    59      */
    60     protected char ch;
    62     /** The buffer index of the last converted unicode character
    63      */
    64     protected int unicodeConversionBp = -1;
    66     protected Log log;
    67     protected Names names;
    69     /** A character buffer for saved chars.
    70      */
    71     protected char[] sbuf = new char[128];
    72     protected int sp;
    74     /**
    75      * Create a scanner from the input array.  This method might
    76      * modify the array.  To avoid copying the input array, ensure
    77      * that {@code inputLength < input.length} or
    78      * {@code input[input.length -1]} is a white space character.
    79      *
    80      * @param sf the factory which created this Scanner
    81      * @param buffer the input, might be modified
    82      * Must be positive and less than or equal to input.length.
    83      */
    84     protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) {
    85         this(sf, JavacFileManager.toArray(buffer), buffer.limit());
    86     }
    88     protected UnicodeReader(ScannerFactory sf, char[] input, int inputLength) {
    89         log = sf.log;
    90         names = sf.names;
    91         if (inputLength == input.length) {
    92             if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
    93                 inputLength--;
    94             } else {
    95                 input = Arrays.copyOf(input, inputLength + 1);
    96             }
    97         }
    98         buf = input;
    99         buflen = inputLength;
   100         buf[buflen] = EOI;
   101         bp = -1;
   102         scanChar();
   103     }
   105     /** Read next character.
   106      */
   107     protected void scanChar() {
   108         if (bp < buflen) {
   109             ch = buf[++bp];
   110             if (ch == '\\') {
   111                 convertUnicode();
   112             }
   113         }
   114     }
   116     /** Read next character in comment, skipping over double '\' characters.
   117      */
   118     protected void scanCommentChar() {
   119         scanChar();
   120         if (ch == '\\') {
   121             if (peekChar() == '\\' && !isUnicode()) {
   122                 skipChar();
   123             } else {
   124                 convertUnicode();
   125             }
   126         }
   127     }
   129     /** Append a character to sbuf.
   130      */
   131     protected void putChar(char ch, boolean scan) {
   132         sbuf = ArrayUtils.ensureCapacity(sbuf, sp);
   133         sbuf[sp++] = ch;
   134         if (scan)
   135             scanChar();
   136     }
   138     protected void putChar(char ch) {
   139         putChar(ch, false);
   140     }
   142     protected void putChar(boolean scan) {
   143         putChar(ch, scan);
   144     }
   146     Name name() {
   147         return names.fromChars(sbuf, 0, sp);
   148     }
   150     String chars() {
   151         return new String(sbuf, 0, sp);
   152     }
   154     /** Convert unicode escape; bp points to initial '\' character
   155      *  (Spec 3.3).
   156      */
   157     protected void convertUnicode() {
   158         if (ch == '\\' && unicodeConversionBp != bp) {
   159             bp++; ch = buf[bp];
   160             if (ch == 'u') {
   161                 do {
   162                     bp++; ch = buf[bp];
   163                 } while (ch == 'u');
   164                 int limit = bp + 3;
   165                 if (limit < buflen) {
   166                     int d = digit(bp, 16);
   167                     int code = d;
   168                     while (bp < limit && d >= 0) {
   169                         bp++; ch = buf[bp];
   170                         d = digit(bp, 16);
   171                         code = (code << 4) + d;
   172                     }
   173                     if (d >= 0) {
   174                         ch = (char)code;
   175                         unicodeConversionBp = bp;
   176                         return;
   177                     }
   178                 }
   179                 log.error(bp, "illegal.unicode.esc");
   180             } else {
   181                 bp--;
   182                 ch = '\\';
   183             }
   184         }
   185     }
   187     /** Are surrogates supported?
   188      */
   189     final static boolean surrogatesSupported = surrogatesSupported();
   190     private static boolean surrogatesSupported() {
   191         try {
   192             Character.isHighSurrogate('a');
   193             return true;
   194         } catch (NoSuchMethodError ex) {
   195             return false;
   196         }
   197     }
   199     /** Scan surrogate pairs.  If 'ch' is a high surrogate and
   200      *  the next character is a low surrogate, then put the low
   201      *  surrogate in 'ch', and return the high surrogate.
   202      *  otherwise, just return 0.
   203      */
   204     protected char scanSurrogates() {
   205         if (surrogatesSupported && Character.isHighSurrogate(ch)) {
   206             char high = ch;
   208             scanChar();
   210             if (Character.isLowSurrogate(ch)) {
   211                 return high;
   212             }
   214             ch = high;
   215         }
   217         return 0;
   218     }
   220     /** Convert an ASCII digit from its base (8, 10, or 16)
   221      *  to its value.
   222      */
   223     protected int digit(int pos, int base) {
   224         char c = ch;
   225         int result = Character.digit(c, base);
   226         if (result >= 0 && c > 0x7f) {
   227             log.error(pos + 1, "illegal.nonascii.digit");
   228             ch = "0123456789abcdef".charAt(result);
   229         }
   230         return result;
   231     }
   233     protected boolean isUnicode() {
   234         return unicodeConversionBp == bp;
   235     }
   237     protected void skipChar() {
   238         bp++;
   239     }
   241     protected char peekChar() {
   242         return buf[bp + 1];
   243     }
   245     /**
   246      * Returns a copy of the input buffer, up to its inputLength.
   247      * Unicode escape sequences are not translated.
   248      */
   249     public char[] getRawCharacters() {
   250         char[] chars = new char[buflen];
   251         System.arraycopy(buf, 0, chars, 0, buflen);
   252         return chars;
   253     }
   255     /**
   256      * Returns a copy of a character array subset of the input buffer.
   257      * The returned array begins at the {@code beginIndex} and
   258      * extends to the character at index {@code endIndex - 1}.
   259      * Thus the length of the substring is {@code endIndex-beginIndex}.
   260      * This behavior is like
   261      * {@code String.substring(beginIndex, endIndex)}.
   262      * Unicode escape sequences are not translated.
   263      *
   264      * @param beginIndex the beginning index, inclusive.
   265      * @param endIndex the ending index, exclusive.
   266      * @throws ArrayIndexOutOfBoundsException if either offset is outside of the
   267      *         array bounds
   268      */
   269     public char[] getRawCharacters(int beginIndex, int endIndex) {
   270         int length = endIndex - beginIndex;
   271         char[] chars = new char[length];
   272         System.arraycopy(buf, beginIndex, chars, 0, length);
   273         return chars;
   274     }
   275 }

mercurial