mcimadamore@1113: /* jjg@1339: * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. mcimadamore@1113: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. mcimadamore@1113: * mcimadamore@1113: * This code is free software; you can redistribute it and/or modify it mcimadamore@1113: * under the terms of the GNU General Public License version 2 only, as mcimadamore@1113: * published by the Free Software Foundation. Oracle designates this mcimadamore@1113: * particular file as subject to the "Classpath" exception as provided mcimadamore@1113: * by Oracle in the LICENSE file that accompanied this code. mcimadamore@1113: * mcimadamore@1113: * This code is distributed in the hope that it will be useful, but WITHOUT mcimadamore@1113: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or mcimadamore@1113: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License mcimadamore@1113: * version 2 for more details (a copy is included in the LICENSE file that mcimadamore@1113: * accompanied this code). mcimadamore@1113: * mcimadamore@1113: * You should have received a copy of the GNU General Public License version mcimadamore@1113: * 2 along with this work; if not, write to the Free Software Foundation, mcimadamore@1113: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. mcimadamore@1113: * mcimadamore@1113: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA mcimadamore@1113: * or visit www.oracle.com if you need additional information or have any mcimadamore@1113: * questions. mcimadamore@1113: */ mcimadamore@1113: mcimadamore@1113: package com.sun.tools.javac.parser; mcimadamore@1113: jjg@1339: import java.nio.CharBuffer; jjg@1339: import java.util.Arrays; jjg@1339: mcimadamore@1113: import com.sun.tools.javac.file.JavacFileManager; jjg@1339: import com.sun.tools.javac.util.ArrayUtils; mcimadamore@1125: import com.sun.tools.javac.util.Log; mcimadamore@1125: import com.sun.tools.javac.util.Name; mcimadamore@1125: import com.sun.tools.javac.util.Names; mcimadamore@1125: mcimadamore@1113: import static com.sun.tools.javac.util.LayoutCharacters.*; mcimadamore@1113: mcimadamore@1113: /** The char reader used by the javac lexer/tokenizer. Returns the sequence of mcimadamore@1113: * characters contained in the input stream, handling unicode escape accordingly. mcimadamore@1113: * Additionally, it provide features for saving chars into a buffer and to retrieve mcimadamore@1113: * them at a later stage. mcimadamore@1113: * mcimadamore@1113: *
This is NOT part of any supported API.
mcimadamore@1113: * If you write code that depends on this, you do so at your own risk.
mcimadamore@1113: * This code and its internal interfaces are subject to change or
mcimadamore@1113: * deletion without notice.
mcimadamore@1113: */
mcimadamore@1113: public class UnicodeReader {
mcimadamore@1113:
mcimadamore@1113: /** The input buffer, index of next character to be read,
mcimadamore@1113: * index of one past last character in buffer.
mcimadamore@1113: */
mcimadamore@1113: protected char[] buf;
mcimadamore@1113: protected int bp;
mcimadamore@1113: protected final int buflen;
mcimadamore@1113:
mcimadamore@1113: /** The current character.
mcimadamore@1113: */
mcimadamore@1113: protected char ch;
mcimadamore@1113:
mcimadamore@1113: /** The buffer index of the last converted unicode character
mcimadamore@1113: */
mcimadamore@1113: protected int unicodeConversionBp = -1;
mcimadamore@1113:
mcimadamore@1113: protected Log log;
mcimadamore@1125: protected Names names;
mcimadamore@1125:
mcimadamore@1125: /** A character buffer for saved chars.
mcimadamore@1125: */
mcimadamore@1125: protected char[] sbuf = new char[128];
mcimadamore@1125: protected int sp;
mcimadamore@1113:
mcimadamore@1113: /**
mcimadamore@1113: * Create a scanner from the input array. This method might
mcimadamore@1113: * modify the array. To avoid copying the input array, ensure
mcimadamore@1113: * that {@code inputLength < input.length} or
mcimadamore@1113: * {@code input[input.length -1]} is a white space character.
mcimadamore@1113: *
mcimadamore@1113: * @param fac the factory which created this Scanner
mcimadamore@1113: * @param input the input, might be modified
mcimadamore@1113: * @param inputLength the size of the input.
mcimadamore@1113: * Must be positive and less than or equal to input.length.
mcimadamore@1113: */
mcimadamore@1113: protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) {
mcimadamore@1113: this(sf, JavacFileManager.toArray(buffer), buffer.limit());
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: protected UnicodeReader(ScannerFactory sf, char[] input, int inputLength) {
mcimadamore@1113: log = sf.log;
mcimadamore@1125: names = sf.names;
mcimadamore@1113: if (inputLength == input.length) {
mcimadamore@1113: if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
mcimadamore@1113: inputLength--;
mcimadamore@1113: } else {
jjg@1339: input = Arrays.copyOf(input, inputLength + 1);
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113: buf = input;
mcimadamore@1113: buflen = inputLength;
mcimadamore@1113: buf[buflen] = EOI;
mcimadamore@1113: bp = -1;
mcimadamore@1113: scanChar();
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /** Read next character.
mcimadamore@1113: */
mcimadamore@1113: protected void scanChar() {
mcimadamore@1113: if (bp < buflen) {
mcimadamore@1113: ch = buf[++bp];
mcimadamore@1113: if (ch == '\\') {
mcimadamore@1113: convertUnicode();
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1125: /** Read next character in comment, skipping over double '\' characters.
mcimadamore@1125: */
mcimadamore@1125: protected void scanCommentChar() {
mcimadamore@1125: scanChar();
mcimadamore@1125: if (ch == '\\') {
mcimadamore@1125: if (peekChar() == '\\' && !isUnicode()) {
mcimadamore@1125: skipChar();
mcimadamore@1125: } else {
mcimadamore@1125: convertUnicode();
mcimadamore@1125: }
mcimadamore@1125: }
mcimadamore@1125: }
mcimadamore@1125:
mcimadamore@1125: /** Append a character to sbuf.
mcimadamore@1125: */
mcimadamore@1125: protected void putChar(char ch, boolean scan) {
jjg@1339: sbuf = ArrayUtils.ensureCapacity(sbuf, sp);
mcimadamore@1125: sbuf[sp++] = ch;
mcimadamore@1125: if (scan)
mcimadamore@1125: scanChar();
mcimadamore@1125: }
mcimadamore@1125:
mcimadamore@1125: protected void putChar(char ch) {
mcimadamore@1125: putChar(ch, false);
mcimadamore@1125: }
mcimadamore@1125:
mcimadamore@1125: protected void putChar(boolean scan) {
mcimadamore@1125: putChar(ch, scan);
mcimadamore@1125: }
mcimadamore@1125:
mcimadamore@1125: Name name() {
mcimadamore@1125: return names.fromChars(sbuf, 0, sp);
mcimadamore@1125: }
mcimadamore@1125:
mcimadamore@1125: String chars() {
mcimadamore@1125: return new String(sbuf, 0, sp);
mcimadamore@1125: }
mcimadamore@1125:
mcimadamore@1113: /** Convert unicode escape; bp points to initial '\' character
mcimadamore@1113: * (Spec 3.3).
mcimadamore@1113: */
mcimadamore@1113: protected void convertUnicode() {
mcimadamore@1113: if (ch == '\\' && unicodeConversionBp != bp) {
mcimadamore@1113: bp++; ch = buf[bp];
mcimadamore@1113: if (ch == 'u') {
mcimadamore@1113: do {
mcimadamore@1113: bp++; ch = buf[bp];
mcimadamore@1113: } while (ch == 'u');
mcimadamore@1113: int limit = bp + 3;
mcimadamore@1113: if (limit < buflen) {
mcimadamore@1113: int d = digit(bp, 16);
mcimadamore@1113: int code = d;
mcimadamore@1113: while (bp < limit && d >= 0) {
mcimadamore@1113: bp++; ch = buf[bp];
mcimadamore@1113: d = digit(bp, 16);
mcimadamore@1113: code = (code << 4) + d;
mcimadamore@1113: }
mcimadamore@1113: if (d >= 0) {
mcimadamore@1113: ch = (char)code;
mcimadamore@1113: unicodeConversionBp = bp;
mcimadamore@1113: return;
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113: log.error(bp, "illegal.unicode.esc");
mcimadamore@1113: } else {
mcimadamore@1113: bp--;
mcimadamore@1113: ch = '\\';
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /** Are surrogates supported?
mcimadamore@1113: */
mcimadamore@1113: final static boolean surrogatesSupported = surrogatesSupported();
mcimadamore@1113: private static boolean surrogatesSupported() {
mcimadamore@1113: try {
mcimadamore@1113: Character.isHighSurrogate('a');
mcimadamore@1113: return true;
mcimadamore@1113: } catch (NoSuchMethodError ex) {
mcimadamore@1113: return false;
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /** Scan surrogate pairs. If 'ch' is a high surrogate and
mcimadamore@1113: * the next character is a low surrogate, then put the low
mcimadamore@1113: * surrogate in 'ch', and return the high surrogate.
mcimadamore@1113: * otherwise, just return 0.
mcimadamore@1113: */
mcimadamore@1113: protected char scanSurrogates() {
mcimadamore@1113: if (surrogatesSupported && Character.isHighSurrogate(ch)) {
mcimadamore@1113: char high = ch;
mcimadamore@1113:
mcimadamore@1113: scanChar();
mcimadamore@1113:
mcimadamore@1113: if (Character.isLowSurrogate(ch)) {
mcimadamore@1113: return high;
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: ch = high;
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: return 0;
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /** Convert an ASCII digit from its base (8, 10, or 16)
mcimadamore@1113: * to its value.
mcimadamore@1113: */
mcimadamore@1113: protected int digit(int pos, int base) {
mcimadamore@1113: char c = ch;
mcimadamore@1113: int result = Character.digit(c, base);
mcimadamore@1113: if (result >= 0 && c > 0x7f) {
mcimadamore@1113: log.error(pos + 1, "illegal.nonascii.digit");
mcimadamore@1113: ch = "0123456789abcdef".charAt(result);
mcimadamore@1113: }
mcimadamore@1113: return result;
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: protected boolean isUnicode() {
mcimadamore@1113: return unicodeConversionBp == bp;
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: protected void skipChar() {
mcimadamore@1113: bp++;
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: protected char peekChar() {
mcimadamore@1113: return buf[bp + 1];
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /**
mcimadamore@1113: * Returns a copy of the input buffer, up to its inputLength.
mcimadamore@1113: * Unicode escape sequences are not translated.
mcimadamore@1113: */
mcimadamore@1113: public char[] getRawCharacters() {
mcimadamore@1113: char[] chars = new char[buflen];
mcimadamore@1113: System.arraycopy(buf, 0, chars, 0, buflen);
mcimadamore@1113: return chars;
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /**
mcimadamore@1113: * Returns a copy of a character array subset of the input buffer.
mcimadamore@1113: * The returned array begins at the beginIndex
and
mcimadamore@1113: * extends to the character at index endIndex - 1
.
mcimadamore@1113: * Thus the length of the substring is endIndex-beginIndex
.
mcimadamore@1113: * This behavior is like
mcimadamore@1113: * String.substring(beginIndex, endIndex)
.
mcimadamore@1113: * Unicode escape sequences are not translated.
mcimadamore@1113: *
mcimadamore@1113: * @param beginIndex the beginning index, inclusive.
mcimadamore@1113: * @param endIndex the ending index, exclusive.
mcimadamore@1113: * @throws IndexOutOfBounds if either offset is outside of the
mcimadamore@1113: * array bounds
mcimadamore@1113: */
mcimadamore@1113: public char[] getRawCharacters(int beginIndex, int endIndex) {
mcimadamore@1113: int length = endIndex - beginIndex;
mcimadamore@1113: char[] chars = new char[length];
mcimadamore@1113: System.arraycopy(buf, beginIndex, chars, 0, length);
mcimadamore@1113: return chars;
mcimadamore@1113: }
mcimadamore@1113: }