diff -r b4021c520e40 -r d346ab55031b src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java Mon Oct 24 13:00:20 2011 +0100 @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package com.sun.tools.javac.parser; + +import com.sun.tools.javac.file.JavacFileManager; +import java.nio.CharBuffer; +import com.sun.tools.javac.util.Log; +import static com.sun.tools.javac.util.LayoutCharacters.*; + +/** The char reader used by the javac lexer/tokenizer. Returns the sequence of + * characters contained in the input stream, handling unicode escape accordingly. + * Additionally, it provide features for saving chars into a buffer and to retrieve + * them at a later stage. + * + *
This is NOT part of any supported API.
+ * If you write code that depends on this, you do so at your own risk.
+ * This code and its internal interfaces are subject to change or
+ * deletion without notice.
+ */
+public class UnicodeReader {
+
+ /** The input buffer, index of next character to be read,
+ * index of one past last character in buffer.
+ */
+ protected char[] buf;
+ protected int bp;
+ protected final int buflen;
+
+ /** The current character.
+ */
+ protected char ch;
+
+ /** The buffer index of the last converted unicode character
+ */
+ protected int unicodeConversionBp = -1;
+
+ protected Log log;
+
+ /**
+ * Create a scanner from the input array. This method might
+ * modify the array. To avoid copying the input array, ensure
+ * that {@code inputLength < input.length} or
+ * {@code input[input.length -1]} is a white space character.
+ *
+ * @param fac the factory which created this Scanner
+ * @param input the input, might be modified
+ * @param inputLength the size of the input.
+ * Must be positive and less than or equal to input.length.
+ */
+ protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) {
+ this(sf, JavacFileManager.toArray(buffer), buffer.limit());
+ }
+
+ protected UnicodeReader(ScannerFactory sf, char[] input, int inputLength) {
+ log = sf.log;
+ if (inputLength == input.length) {
+ if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
+ inputLength--;
+ } else {
+ char[] newInput = new char[inputLength + 1];
+ System.arraycopy(input, 0, newInput, 0, input.length);
+ input = newInput;
+ }
+ }
+ buf = input;
+ buflen = inputLength;
+ buf[buflen] = EOI;
+ bp = -1;
+ scanChar();
+ }
+
+ /** Read next character.
+ */
+ protected void scanChar() {
+ if (bp < buflen) {
+ ch = buf[++bp];
+ if (ch == '\\') {
+ convertUnicode();
+ }
+ }
+ }
+
+ /** Convert unicode escape; bp points to initial '\' character
+ * (Spec 3.3).
+ */
+ protected void convertUnicode() {
+ if (ch == '\\' && unicodeConversionBp != bp) {
+ bp++; ch = buf[bp];
+ if (ch == 'u') {
+ do {
+ bp++; ch = buf[bp];
+ } while (ch == 'u');
+ int limit = bp + 3;
+ if (limit < buflen) {
+ int d = digit(bp, 16);
+ int code = d;
+ while (bp < limit && d >= 0) {
+ bp++; ch = buf[bp];
+ d = digit(bp, 16);
+ code = (code << 4) + d;
+ }
+ if (d >= 0) {
+ ch = (char)code;
+ unicodeConversionBp = bp;
+ return;
+ }
+ }
+ log.error(bp, "illegal.unicode.esc");
+ } else {
+ bp--;
+ ch = '\\';
+ }
+ }
+ }
+
+ /** Are surrogates supported?
+ */
+ final static boolean surrogatesSupported = surrogatesSupported();
+ private static boolean surrogatesSupported() {
+ try {
+ Character.isHighSurrogate('a');
+ return true;
+ } catch (NoSuchMethodError ex) {
+ return false;
+ }
+ }
+
+ /** Scan surrogate pairs. If 'ch' is a high surrogate and
+ * the next character is a low surrogate, then put the low
+ * surrogate in 'ch', and return the high surrogate.
+ * otherwise, just return 0.
+ */
+ protected char scanSurrogates() {
+ if (surrogatesSupported && Character.isHighSurrogate(ch)) {
+ char high = ch;
+
+ scanChar();
+
+ if (Character.isLowSurrogate(ch)) {
+ return high;
+ }
+
+ ch = high;
+ }
+
+ return 0;
+ }
+
+ /** Convert an ASCII digit from its base (8, 10, or 16)
+ * to its value.
+ */
+ protected int digit(int pos, int base) {
+ char c = ch;
+ int result = Character.digit(c, base);
+ if (result >= 0 && c > 0x7f) {
+ log.error(pos + 1, "illegal.nonascii.digit");
+ ch = "0123456789abcdef".charAt(result);
+ }
+ return result;
+ }
+
+ protected boolean isUnicode() {
+ return unicodeConversionBp == bp;
+ }
+
+ protected void skipChar() {
+ bp++;
+ }
+
+ protected char peekChar() {
+ return buf[bp + 1];
+ }
+
+ /**
+ * Returns a copy of the input buffer, up to its inputLength.
+ * Unicode escape sequences are not translated.
+ */
+ public char[] getRawCharacters() {
+ char[] chars = new char[buflen];
+ System.arraycopy(buf, 0, chars, 0, buflen);
+ return chars;
+ }
+
+ /**
+ * Returns a copy of a character array subset of the input buffer.
+ * The returned array begins at the beginIndex
and
+ * extends to the character at index endIndex - 1
.
+ * Thus the length of the substring is endIndex-beginIndex
.
+ * This behavior is like
+ * String.substring(beginIndex, endIndex)
.
+ * Unicode escape sequences are not translated.
+ *
+ * @param beginIndex the beginning index, inclusive.
+ * @param endIndex the ending index, exclusive.
+ * @throws IndexOutOfBounds if either offset is outside of the
+ * array bounds
+ */
+ public char[] getRawCharacters(int beginIndex, int endIndex) {
+ int length = endIndex - beginIndex;
+ char[] chars = new char[length];
+ System.arraycopy(buf, beginIndex, chars, 0, length);
+ return chars;
+ }
+}