src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java

Fri, 04 Nov 2011 12:36:40 +0000

author
mcimadamore
date
Fri, 04 Nov 2011 12:36:40 +0000
changeset 1125
56830d5cb5bb
parent 1113
d346ab55031b
child 1339
0e5899f09dab
permissions
-rw-r--r--

7104201: Refactor DocCommentScanner
Summary: Add new Comment helper class to parse contents of comments in source code
Reviewed-by: jjg

     1 /*
     2  * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.tools.javac.parser;
    28 import com.sun.tools.javac.file.JavacFileManager;
    29 import com.sun.tools.javac.util.Log;
    30 import com.sun.tools.javac.util.Name;
    31 import com.sun.tools.javac.util.Names;
    33 import java.nio.CharBuffer;
    35 import static com.sun.tools.javac.util.LayoutCharacters.*;
    37 /** The char reader used by the javac lexer/tokenizer. Returns the sequence of
    38  * characters contained in the input stream, handling unicode escape accordingly.
    39  * Additionally, it provide features for saving chars into a buffer and to retrieve
    40  * them at a later stage.
    41  *
    42  *  <p><b>This is NOT part of any supported API.
    43  *  If you write code that depends on this, you do so at your own risk.
    44  *  This code and its internal interfaces are subject to change or
    45  *  deletion without notice.</b>
    46  */
    47 public class UnicodeReader {
    49     /** The input buffer, index of next character to be read,
    50      *  index of one past last character in buffer.
    51      */
    52     protected char[] buf;
    53     protected int bp;
    54     protected final int buflen;
    56     /** The current character.
    57      */
    58     protected char ch;
    60     /** The buffer index of the last converted unicode character
    61      */
    62     protected int unicodeConversionBp = -1;
    64     protected Log log;
    65     protected Names names;
    67     /** A character buffer for saved chars.
    68      */
    69     protected char[] sbuf = new char[128];
    70     protected int sp;
    72     /**
    73      * Create a scanner from the input array.  This method might
    74      * modify the array.  To avoid copying the input array, ensure
    75      * that {@code inputLength < input.length} or
    76      * {@code input[input.length -1]} is a white space character.
    77      *
    78      * @param fac the factory which created this Scanner
    79      * @param input the input, might be modified
    80      * @param inputLength the size of the input.
    81      * Must be positive and less than or equal to input.length.
    82      */
    83     protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) {
    84         this(sf, JavacFileManager.toArray(buffer), buffer.limit());
    85     }
    87     protected UnicodeReader(ScannerFactory sf, char[] input, int inputLength) {
    88         log = sf.log;
    89         names = sf.names;
    90         if (inputLength == input.length) {
    91             if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
    92                 inputLength--;
    93             } else {
    94                 char[] newInput = new char[inputLength + 1];
    95                 System.arraycopy(input, 0, newInput, 0, input.length);
    96                 input = newInput;
    97             }
    98         }
    99         buf = input;
   100         buflen = inputLength;
   101         buf[buflen] = EOI;
   102         bp = -1;
   103         scanChar();
   104     }
   106     /** Read next character.
   107      */
   108     protected void scanChar() {
   109         if (bp < buflen) {
   110             ch = buf[++bp];
   111             if (ch == '\\') {
   112                 convertUnicode();
   113             }
   114         }
   115     }
   117     /** Read next character in comment, skipping over double '\' characters.
   118      */
   119     protected void scanCommentChar() {
   120         scanChar();
   121         if (ch == '\\') {
   122             if (peekChar() == '\\' && !isUnicode()) {
   123                 skipChar();
   124             } else {
   125                 convertUnicode();
   126             }
   127         }
   128     }
   130     /** Append a character to sbuf.
   131      */
   132     protected void putChar(char ch, boolean scan) {
   133         if (sp == sbuf.length) {
   134             char[] newsbuf = new char[sbuf.length * 2];
   135             System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
   136             sbuf = newsbuf;
   137         }
   138         sbuf[sp++] = ch;
   139         if (scan)
   140             scanChar();
   141     }
   143     protected void putChar(char ch) {
   144         putChar(ch, false);
   145     }
   147     protected void putChar(boolean scan) {
   148         putChar(ch, scan);
   149     }
   151     Name name() {
   152         return names.fromChars(sbuf, 0, sp);
   153     }
   155     String chars() {
   156         return new String(sbuf, 0, sp);
   157     }
   159     /** Convert unicode escape; bp points to initial '\' character
   160      *  (Spec 3.3).
   161      */
   162     protected void convertUnicode() {
   163         if (ch == '\\' && unicodeConversionBp != bp) {
   164             bp++; ch = buf[bp];
   165             if (ch == 'u') {
   166                 do {
   167                     bp++; ch = buf[bp];
   168                 } while (ch == 'u');
   169                 int limit = bp + 3;
   170                 if (limit < buflen) {
   171                     int d = digit(bp, 16);
   172                     int code = d;
   173                     while (bp < limit && d >= 0) {
   174                         bp++; ch = buf[bp];
   175                         d = digit(bp, 16);
   176                         code = (code << 4) + d;
   177                     }
   178                     if (d >= 0) {
   179                         ch = (char)code;
   180                         unicodeConversionBp = bp;
   181                         return;
   182                     }
   183                 }
   184                 log.error(bp, "illegal.unicode.esc");
   185             } else {
   186                 bp--;
   187                 ch = '\\';
   188             }
   189         }
   190     }
   192     /** Are surrogates supported?
   193      */
   194     final static boolean surrogatesSupported = surrogatesSupported();
   195     private static boolean surrogatesSupported() {
   196         try {
   197             Character.isHighSurrogate('a');
   198             return true;
   199         } catch (NoSuchMethodError ex) {
   200             return false;
   201         }
   202     }
   204     /** Scan surrogate pairs.  If 'ch' is a high surrogate and
   205      *  the next character is a low surrogate, then put the low
   206      *  surrogate in 'ch', and return the high surrogate.
   207      *  otherwise, just return 0.
   208      */
   209     protected char scanSurrogates() {
   210         if (surrogatesSupported && Character.isHighSurrogate(ch)) {
   211             char high = ch;
   213             scanChar();
   215             if (Character.isLowSurrogate(ch)) {
   216                 return high;
   217             }
   219             ch = high;
   220         }
   222         return 0;
   223     }
   225     /** Convert an ASCII digit from its base (8, 10, or 16)
   226      *  to its value.
   227      */
   228     protected int digit(int pos, int base) {
   229         char c = ch;
   230         int result = Character.digit(c, base);
   231         if (result >= 0 && c > 0x7f) {
   232             log.error(pos + 1, "illegal.nonascii.digit");
   233             ch = "0123456789abcdef".charAt(result);
   234         }
   235         return result;
   236     }
   238     protected boolean isUnicode() {
   239         return unicodeConversionBp == bp;
   240     }
   242     protected void skipChar() {
   243         bp++;
   244     }
   246     protected char peekChar() {
   247         return buf[bp + 1];
   248     }
   250     /**
   251      * Returns a copy of the input buffer, up to its inputLength.
   252      * Unicode escape sequences are not translated.
   253      */
   254     public char[] getRawCharacters() {
   255         char[] chars = new char[buflen];
   256         System.arraycopy(buf, 0, chars, 0, buflen);
   257         return chars;
   258     }
   260     /**
   261      * Returns a copy of a character array subset of the input buffer.
   262      * The returned array begins at the <code>beginIndex</code> and
   263      * extends to the character at index <code>endIndex - 1</code>.
   264      * Thus the length of the substring is <code>endIndex-beginIndex</code>.
   265      * This behavior is like
   266      * <code>String.substring(beginIndex, endIndex)</code>.
   267      * Unicode escape sequences are not translated.
   268      *
   269      * @param beginIndex the beginning index, inclusive.
   270      * @param endIndex the ending index, exclusive.
   271      * @throws IndexOutOfBounds if either offset is outside of the
   272      *         array bounds
   273      */
   274     public char[] getRawCharacters(int beginIndex, int endIndex) {
   275         int length = endIndex - beginIndex;
   276         char[] chars = new char[length];
   277         System.arraycopy(buf, beginIndex, chars, 0, length);
   278         return chars;
   279     }
   280 }

mercurial