mcimadamore@1113: /* jjg@1339: * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. mcimadamore@1113: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. mcimadamore@1113: * mcimadamore@1113: * This code is free software; you can redistribute it and/or modify it mcimadamore@1113: * under the terms of the GNU General Public License version 2 only, as mcimadamore@1113: * published by the Free Software Foundation. Oracle designates this mcimadamore@1113: * particular file as subject to the "Classpath" exception as provided mcimadamore@1113: * by Oracle in the LICENSE file that accompanied this code. mcimadamore@1113: * mcimadamore@1113: * This code is distributed in the hope that it will be useful, but WITHOUT mcimadamore@1113: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or mcimadamore@1113: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License mcimadamore@1113: * version 2 for more details (a copy is included in the LICENSE file that mcimadamore@1113: * accompanied this code). mcimadamore@1113: * mcimadamore@1113: * You should have received a copy of the GNU General Public License version mcimadamore@1113: * 2 along with this work; if not, write to the Free Software Foundation, mcimadamore@1113: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. mcimadamore@1113: * mcimadamore@1113: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA mcimadamore@1113: * or visit www.oracle.com if you need additional information or have any mcimadamore@1113: * questions. mcimadamore@1113: */ mcimadamore@1113: mcimadamore@1113: package com.sun.tools.javac.parser; mcimadamore@1113: jjg@1339: import java.nio.CharBuffer; jjg@1339: import java.util.Arrays; jjg@1339: mcimadamore@1113: import com.sun.tools.javac.file.JavacFileManager; jjg@1339: import com.sun.tools.javac.util.ArrayUtils; mcimadamore@1125: import com.sun.tools.javac.util.Log; mcimadamore@1125: import com.sun.tools.javac.util.Name; mcimadamore@1125: import com.sun.tools.javac.util.Names; mcimadamore@1125: mcimadamore@1113: import static com.sun.tools.javac.util.LayoutCharacters.*; mcimadamore@1113: mcimadamore@1113: /** The char reader used by the javac lexer/tokenizer. Returns the sequence of mcimadamore@1113: * characters contained in the input stream, handling unicode escape accordingly. mcimadamore@1113: * Additionally, it provide features for saving chars into a buffer and to retrieve mcimadamore@1113: * them at a later stage. mcimadamore@1113: * mcimadamore@1113: *

This is NOT part of any supported API. mcimadamore@1113: * If you write code that depends on this, you do so at your own risk. mcimadamore@1113: * This code and its internal interfaces are subject to change or mcimadamore@1113: * deletion without notice. mcimadamore@1113: */ mcimadamore@1113: public class UnicodeReader { mcimadamore@1113: mcimadamore@1113: /** The input buffer, index of next character to be read, mcimadamore@1113: * index of one past last character in buffer. mcimadamore@1113: */ mcimadamore@1113: protected char[] buf; mcimadamore@1113: protected int bp; mcimadamore@1113: protected final int buflen; mcimadamore@1113: mcimadamore@1113: /** The current character. mcimadamore@1113: */ mcimadamore@1113: protected char ch; mcimadamore@1113: mcimadamore@1113: /** The buffer index of the last converted unicode character mcimadamore@1113: */ mcimadamore@1113: protected int unicodeConversionBp = -1; mcimadamore@1113: mcimadamore@1113: protected Log log; mcimadamore@1125: protected Names names; mcimadamore@1125: mcimadamore@1125: /** A character buffer for saved chars. mcimadamore@1125: */ mcimadamore@1125: protected char[] sbuf = new char[128]; mcimadamore@1125: protected int sp; mcimadamore@1113: mcimadamore@1113: /** mcimadamore@1113: * Create a scanner from the input array. This method might mcimadamore@1113: * modify the array. To avoid copying the input array, ensure mcimadamore@1113: * that {@code inputLength < input.length} or mcimadamore@1113: * {@code input[input.length -1]} is a white space character. mcimadamore@1113: * mcimadamore@1113: * @param fac the factory which created this Scanner mcimadamore@1113: * @param input the input, might be modified mcimadamore@1113: * @param inputLength the size of the input. mcimadamore@1113: * Must be positive and less than or equal to input.length. mcimadamore@1113: */ mcimadamore@1113: protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) { mcimadamore@1113: this(sf, JavacFileManager.toArray(buffer), buffer.limit()); mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: protected UnicodeReader(ScannerFactory sf, char[] input, int inputLength) { mcimadamore@1113: log = sf.log; mcimadamore@1125: names = sf.names; mcimadamore@1113: if (inputLength == input.length) { mcimadamore@1113: if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) { mcimadamore@1113: inputLength--; mcimadamore@1113: } else { jjg@1339: input = Arrays.copyOf(input, inputLength + 1); mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: buf = input; mcimadamore@1113: buflen = inputLength; mcimadamore@1113: buf[buflen] = EOI; mcimadamore@1113: bp = -1; mcimadamore@1113: scanChar(); mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Read next character. mcimadamore@1113: */ mcimadamore@1113: protected void scanChar() { mcimadamore@1113: if (bp < buflen) { mcimadamore@1113: ch = buf[++bp]; mcimadamore@1113: if (ch == '\\') { mcimadamore@1113: convertUnicode(); mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: mcimadamore@1125: /** Read next character in comment, skipping over double '\' characters. mcimadamore@1125: */ mcimadamore@1125: protected void scanCommentChar() { mcimadamore@1125: scanChar(); mcimadamore@1125: if (ch == '\\') { mcimadamore@1125: if (peekChar() == '\\' && !isUnicode()) { mcimadamore@1125: skipChar(); mcimadamore@1125: } else { mcimadamore@1125: convertUnicode(); mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1125: mcimadamore@1125: /** Append a character to sbuf. mcimadamore@1125: */ mcimadamore@1125: protected void putChar(char ch, boolean scan) { jjg@1339: sbuf = ArrayUtils.ensureCapacity(sbuf, sp); mcimadamore@1125: sbuf[sp++] = ch; mcimadamore@1125: if (scan) mcimadamore@1125: scanChar(); mcimadamore@1125: } mcimadamore@1125: mcimadamore@1125: protected void putChar(char ch) { mcimadamore@1125: putChar(ch, false); mcimadamore@1125: } mcimadamore@1125: mcimadamore@1125: protected void putChar(boolean scan) { mcimadamore@1125: putChar(ch, scan); mcimadamore@1125: } mcimadamore@1125: mcimadamore@1125: Name name() { mcimadamore@1125: return names.fromChars(sbuf, 0, sp); mcimadamore@1125: } mcimadamore@1125: mcimadamore@1125: String chars() { mcimadamore@1125: return new String(sbuf, 0, sp); mcimadamore@1125: } mcimadamore@1125: mcimadamore@1113: /** Convert unicode escape; bp points to initial '\' character mcimadamore@1113: * (Spec 3.3). mcimadamore@1113: */ mcimadamore@1113: protected void convertUnicode() { mcimadamore@1113: if (ch == '\\' && unicodeConversionBp != bp) { mcimadamore@1113: bp++; ch = buf[bp]; mcimadamore@1113: if (ch == 'u') { mcimadamore@1113: do { mcimadamore@1113: bp++; ch = buf[bp]; mcimadamore@1113: } while (ch == 'u'); mcimadamore@1113: int limit = bp + 3; mcimadamore@1113: if (limit < buflen) { mcimadamore@1113: int d = digit(bp, 16); mcimadamore@1113: int code = d; mcimadamore@1113: while (bp < limit && d >= 0) { mcimadamore@1113: bp++; ch = buf[bp]; mcimadamore@1113: d = digit(bp, 16); mcimadamore@1113: code = (code << 4) + d; mcimadamore@1113: } mcimadamore@1113: if (d >= 0) { mcimadamore@1113: ch = (char)code; mcimadamore@1113: unicodeConversionBp = bp; mcimadamore@1113: return; mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: log.error(bp, "illegal.unicode.esc"); mcimadamore@1113: } else { mcimadamore@1113: bp--; mcimadamore@1113: ch = '\\'; mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Are surrogates supported? mcimadamore@1113: */ mcimadamore@1113: final static boolean surrogatesSupported = surrogatesSupported(); mcimadamore@1113: private static boolean surrogatesSupported() { mcimadamore@1113: try { mcimadamore@1113: Character.isHighSurrogate('a'); mcimadamore@1113: return true; mcimadamore@1113: } catch (NoSuchMethodError ex) { mcimadamore@1113: return false; mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Scan surrogate pairs. If 'ch' is a high surrogate and mcimadamore@1113: * the next character is a low surrogate, then put the low mcimadamore@1113: * surrogate in 'ch', and return the high surrogate. mcimadamore@1113: * otherwise, just return 0. mcimadamore@1113: */ mcimadamore@1113: protected char scanSurrogates() { mcimadamore@1113: if (surrogatesSupported && Character.isHighSurrogate(ch)) { mcimadamore@1113: char high = ch; mcimadamore@1113: mcimadamore@1113: scanChar(); mcimadamore@1113: mcimadamore@1113: if (Character.isLowSurrogate(ch)) { mcimadamore@1113: return high; mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: ch = high; mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: return 0; mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Convert an ASCII digit from its base (8, 10, or 16) mcimadamore@1113: * to its value. mcimadamore@1113: */ mcimadamore@1113: protected int digit(int pos, int base) { mcimadamore@1113: char c = ch; mcimadamore@1113: int result = Character.digit(c, base); mcimadamore@1113: if (result >= 0 && c > 0x7f) { mcimadamore@1113: log.error(pos + 1, "illegal.nonascii.digit"); mcimadamore@1113: ch = "0123456789abcdef".charAt(result); mcimadamore@1113: } mcimadamore@1113: return result; mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: protected boolean isUnicode() { mcimadamore@1113: return unicodeConversionBp == bp; mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: protected void skipChar() { mcimadamore@1113: bp++; mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: protected char peekChar() { mcimadamore@1113: return buf[bp + 1]; mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** mcimadamore@1113: * Returns a copy of the input buffer, up to its inputLength. mcimadamore@1113: * Unicode escape sequences are not translated. mcimadamore@1113: */ mcimadamore@1113: public char[] getRawCharacters() { mcimadamore@1113: char[] chars = new char[buflen]; mcimadamore@1113: System.arraycopy(buf, 0, chars, 0, buflen); mcimadamore@1113: return chars; mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** mcimadamore@1113: * Returns a copy of a character array subset of the input buffer. mcimadamore@1113: * The returned array begins at the beginIndex and mcimadamore@1113: * extends to the character at index endIndex - 1. mcimadamore@1113: * Thus the length of the substring is endIndex-beginIndex. mcimadamore@1113: * This behavior is like mcimadamore@1113: * String.substring(beginIndex, endIndex). mcimadamore@1113: * Unicode escape sequences are not translated. mcimadamore@1113: * mcimadamore@1113: * @param beginIndex the beginning index, inclusive. mcimadamore@1113: * @param endIndex the ending index, exclusive. mcimadamore@1113: * @throws IndexOutOfBounds if either offset is outside of the mcimadamore@1113: * array bounds mcimadamore@1113: */ mcimadamore@1113: public char[] getRawCharacters(int beginIndex, int endIndex) { mcimadamore@1113: int length = endIndex - beginIndex; mcimadamore@1113: char[] chars = new char[length]; mcimadamore@1113: System.arraycopy(buf, beginIndex, chars, 0, length); mcimadamore@1113: return chars; mcimadamore@1113: } mcimadamore@1113: }