mcimadamore@1113: /* jjg@1280: * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. mcimadamore@1113: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. mcimadamore@1113: * mcimadamore@1113: * This code is free software; you can redistribute it and/or modify it mcimadamore@1113: * under the terms of the GNU General Public License version 2 only, as mcimadamore@1113: * published by the Free Software Foundation. Oracle designates this mcimadamore@1113: * particular file as subject to the "Classpath" exception as provided mcimadamore@1113: * by Oracle in the LICENSE file that accompanied this code. mcimadamore@1113: * mcimadamore@1113: * This code is distributed in the hope that it will be useful, but WITHOUT mcimadamore@1113: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or mcimadamore@1113: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License mcimadamore@1113: * version 2 for more details (a copy is included in the LICENSE file that mcimadamore@1113: * accompanied this code). mcimadamore@1113: * mcimadamore@1113: * You should have received a copy of the GNU General Public License version mcimadamore@1113: * 2 along with this work; if not, write to the Free Software Foundation, mcimadamore@1113: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. mcimadamore@1113: * mcimadamore@1113: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA mcimadamore@1113: * or visit www.oracle.com if you need additional information or have any mcimadamore@1113: * questions. mcimadamore@1113: */ mcimadamore@1113: mcimadamore@1113: package com.sun.tools.javac.parser; mcimadamore@1113: mcimadamore@1113: import com.sun.tools.javac.code.Source; mcimadamore@1125: import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; mcimadamore@1113: import com.sun.tools.javac.util.*; mcimadamore@1113: mcimadamore@1125: import java.nio.CharBuffer; mcimadamore@1113: mcimadamore@1113: import static com.sun.tools.javac.parser.Tokens.*; mcimadamore@1113: import static com.sun.tools.javac.util.LayoutCharacters.*; mcimadamore@1113: mcimadamore@1113: /** The lexical analyzer maps an input stream consisting of mcimadamore@1113: * ASCII characters and Unicode escapes into a token sequence. mcimadamore@1113: * mcimadamore@1113: *
This is NOT part of any supported API.
mcimadamore@1113: * If you write code that depends on this, you do so at your own risk.
mcimadamore@1113: * This code and its internal interfaces are subject to change or
mcimadamore@1113: * deletion without notice.
mcimadamore@1113: */
mcimadamore@1113: public class JavaTokenizer {
mcimadamore@1113:
vromero@1442: private static final boolean scannerDebug = false;
mcimadamore@1113:
mcimadamore@1113: /** Allow hex floating-point literals.
mcimadamore@1113: */
mcimadamore@1113: private boolean allowHexFloats;
mcimadamore@1113:
mcimadamore@1113: /** Allow binary literals.
mcimadamore@1113: */
mcimadamore@1113: private boolean allowBinaryLiterals;
mcimadamore@1113:
mcimadamore@1113: /** Allow underscores in literals.
mcimadamore@1113: */
mcimadamore@1113: private boolean allowUnderscoresInLiterals;
mcimadamore@1113:
mcimadamore@1113: /** The source language setting.
mcimadamore@1113: */
mcimadamore@1113: private Source source;
mcimadamore@1113:
mcimadamore@1113: /** The log to be used for error reporting.
mcimadamore@1113: */
mcimadamore@1113: private final Log log;
mcimadamore@1113:
mcimadamore@1113: /** The token factory. */
mcimadamore@1113: private final Tokens tokens;
mcimadamore@1113:
mcimadamore@1113: /** The token kind, set by nextToken().
mcimadamore@1113: */
mcimadamore@1113: protected TokenKind tk;
mcimadamore@1113:
mcimadamore@1113: /** The token's radix, set by nextToken().
mcimadamore@1113: */
mcimadamore@1113: protected int radix;
mcimadamore@1113:
mcimadamore@1113: /** The token's name, set by nextToken().
mcimadamore@1113: */
mcimadamore@1113: protected Name name;
mcimadamore@1113:
mcimadamore@1113: /** The position where a lexical error occurred;
mcimadamore@1113: */
mcimadamore@1113: protected int errPos = Position.NOPOS;
mcimadamore@1113:
mcimadamore@1125: /** The Unicode reader (low-level stream reader).
mcimadamore@1113: */
mcimadamore@1125: protected UnicodeReader reader;
mcimadamore@1113:
mcimadamore@1125: protected ScannerFactory fac;
mcimadamore@1113:
mcimadamore@1113: private static final boolean hexFloatsWork = hexFloatsWork();
mcimadamore@1113: private static boolean hexFloatsWork() {
mcimadamore@1113: try {
mcimadamore@1113: Float.valueOf("0x1.0p1");
mcimadamore@1113: return true;
mcimadamore@1113: } catch (NumberFormatException ex) {
mcimadamore@1113: return false;
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /**
mcimadamore@1113: * Create a scanner from the input array. This method might
mcimadamore@1113: * modify the array. To avoid copying the input array, ensure
mcimadamore@1113: * that {@code inputLength < input.length} or
mcimadamore@1113: * {@code input[input.length -1]} is a white space character.
mcimadamore@1113: *
mcimadamore@1113: * @param fac the factory which created this Scanner
jjg@1358: * @param buf the input, might be modified
mcimadamore@1113: * Must be positive and less than or equal to input.length.
mcimadamore@1113: */
mcimadamore@1113: protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
mcimadamore@1113: this(fac, new UnicodeReader(fac, buf));
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
mcimadamore@1113: this(fac, new UnicodeReader(fac, buf, inputLength));
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
mcimadamore@1125: this.fac = fac;
mcimadamore@1125: this.log = fac.log;
mcimadamore@1125: this.tokens = fac.tokens;
mcimadamore@1125: this.source = fac.source;
mcimadamore@1113: this.reader = reader;
mcimadamore@1125: this.allowBinaryLiterals = source.allowBinaryLiterals();
mcimadamore@1125: this.allowHexFloats = source.allowHexFloats();
mcimadamore@1125: this.allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /** Report an error at the given position using the provided arguments.
mcimadamore@1113: */
mcimadamore@1113: protected void lexError(int pos, String key, Object... args) {
mcimadamore@1113: log.error(pos, key, args);
mcimadamore@1113: tk = TokenKind.ERROR;
mcimadamore@1113: errPos = pos;
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /** Read next character in character or string literal and copy into sbuf.
mcimadamore@1113: */
mcimadamore@1113: private void scanLitChar(int pos) {
mcimadamore@1113: if (reader.ch == '\\') {
mcimadamore@1113: if (reader.peekChar() == '\\' && !reader.isUnicode()) {
mcimadamore@1113: reader.skipChar();
mcimadamore@1125: reader.putChar('\\', true);
mcimadamore@1113: } else {
mcimadamore@1113: reader.scanChar();
mcimadamore@1113: switch (reader.ch) {
mcimadamore@1113: case '0': case '1': case '2': case '3':
mcimadamore@1113: case '4': case '5': case '6': case '7':
mcimadamore@1113: char leadch = reader.ch;
mcimadamore@1113: int oct = reader.digit(pos, 8);
mcimadamore@1113: reader.scanChar();
mcimadamore@1113: if ('0' <= reader.ch && reader.ch <= '7') {
mcimadamore@1113: oct = oct * 8 + reader.digit(pos, 8);
mcimadamore@1113: reader.scanChar();
mcimadamore@1113: if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
mcimadamore@1113: oct = oct * 8 + reader.digit(pos, 8);
mcimadamore@1113: reader.scanChar();
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1125: reader.putChar((char)oct);
mcimadamore@1113: break;
mcimadamore@1113: case 'b':
mcimadamore@1125: reader.putChar('\b', true); break;
mcimadamore@1113: case 't':
mcimadamore@1125: reader.putChar('\t', true); break;
mcimadamore@1113: case 'n':
mcimadamore@1125: reader.putChar('\n', true); break;
mcimadamore@1113: case 'f':
mcimadamore@1125: reader.putChar('\f', true); break;
mcimadamore@1113: case 'r':
mcimadamore@1125: reader.putChar('\r', true); break;
mcimadamore@1113: case '\'':
mcimadamore@1125: reader.putChar('\'', true); break;
mcimadamore@1113: case '\"':
mcimadamore@1125: reader.putChar('\"', true); break;
mcimadamore@1113: case '\\':
mcimadamore@1125: reader.putChar('\\', true); break;
mcimadamore@1113: default:
mcimadamore@1113: lexError(reader.bp, "illegal.esc.char");
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113: } else if (reader.bp != reader.buflen) {
mcimadamore@1125: reader.putChar(true);
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: private void scanDigits(int pos, int digitRadix) {
mcimadamore@1113: char saveCh;
mcimadamore@1113: int savePos;
mcimadamore@1113: do {
mcimadamore@1113: if (reader.ch != '_') {
mcimadamore@1125: reader.putChar(false);
mcimadamore@1113: } else {
mcimadamore@1113: if (!allowUnderscoresInLiterals) {
mcimadamore@1113: lexError(pos, "unsupported.underscore.lit", source.name);
mcimadamore@1113: allowUnderscoresInLiterals = true;
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113: saveCh = reader.ch;
mcimadamore@1113: savePos = reader.bp;
mcimadamore@1113: reader.scanChar();
mcimadamore@1113: } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
mcimadamore@1113: if (saveCh == '_')
mcimadamore@1113: lexError(savePos, "illegal.underscore");
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /** Read fractional part of hexadecimal floating point number.
mcimadamore@1113: */
mcimadamore@1113: private void scanHexExponentAndSuffix(int pos) {
mcimadamore@1113: if (reader.ch == 'p' || reader.ch == 'P') {
mcimadamore@1125: reader.putChar(true);
mcimadamore@1113: skipIllegalUnderscores();
mcimadamore@1113: if (reader.ch == '+' || reader.ch == '-') {
mcimadamore@1125: reader.putChar(true);
mcimadamore@1113: }
mcimadamore@1113: skipIllegalUnderscores();
mcimadamore@1113: if ('0' <= reader.ch && reader.ch <= '9') {
mcimadamore@1113: scanDigits(pos, 10);
mcimadamore@1113: if (!allowHexFloats) {
mcimadamore@1113: lexError(pos, "unsupported.fp.lit", source.name);
mcimadamore@1113: allowHexFloats = true;
mcimadamore@1113: }
mcimadamore@1113: else if (!hexFloatsWork)
mcimadamore@1113: lexError(pos, "unsupported.cross.fp.lit");
mcimadamore@1113: } else
mcimadamore@1113: lexError(pos, "malformed.fp.lit");
mcimadamore@1113: } else {
mcimadamore@1113: lexError(pos, "malformed.fp.lit");
mcimadamore@1113: }
mcimadamore@1113: if (reader.ch == 'f' || reader.ch == 'F') {
mcimadamore@1125: reader.putChar(true);
mcimadamore@1113: tk = TokenKind.FLOATLITERAL;
mcimadamore@1113: radix = 16;
mcimadamore@1113: } else {
mcimadamore@1113: if (reader.ch == 'd' || reader.ch == 'D') {
mcimadamore@1125: reader.putChar(true);
mcimadamore@1113: }
mcimadamore@1113: tk = TokenKind.DOUBLELITERAL;
mcimadamore@1113: radix = 16;
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /** Read fractional part of floating point number.
mcimadamore@1113: */
mcimadamore@1113: private void scanFraction(int pos) {
mcimadamore@1113: skipIllegalUnderscores();
mcimadamore@1113: if ('0' <= reader.ch && reader.ch <= '9') {
mcimadamore@1113: scanDigits(pos, 10);
mcimadamore@1113: }
mcimadamore@1125: int sp1 = reader.sp;
mcimadamore@1113: if (reader.ch == 'e' || reader.ch == 'E') {
mcimadamore@1125: reader.putChar(true);
mcimadamore@1113: skipIllegalUnderscores();
mcimadamore@1113: if (reader.ch == '+' || reader.ch == '-') {
mcimadamore@1125: reader.putChar(true);
mcimadamore@1113: }
mcimadamore@1113: skipIllegalUnderscores();
mcimadamore@1113: if ('0' <= reader.ch && reader.ch <= '9') {
mcimadamore@1113: scanDigits(pos, 10);
mcimadamore@1113: return;
mcimadamore@1113: }
mcimadamore@1113: lexError(pos, "malformed.fp.lit");
mcimadamore@1125: reader.sp = sp1;
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /** Read fractional part and 'd' or 'f' suffix of floating point number.
mcimadamore@1113: */
mcimadamore@1113: private void scanFractionAndSuffix(int pos) {
mcimadamore@1113: radix = 10;
mcimadamore@1113: scanFraction(pos);
mcimadamore@1113: if (reader.ch == 'f' || reader.ch == 'F') {
mcimadamore@1125: reader.putChar(true);
mcimadamore@1113: tk = TokenKind.FLOATLITERAL;
mcimadamore@1113: } else {
mcimadamore@1113: if (reader.ch == 'd' || reader.ch == 'D') {
mcimadamore@1125: reader.putChar(true);
mcimadamore@1113: }
mcimadamore@1113: tk = TokenKind.DOUBLELITERAL;
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /** Read fractional part and 'd' or 'f' suffix of floating point number.
mcimadamore@1113: */
mcimadamore@1113: private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
mcimadamore@1113: radix = 16;
mcimadamore@1113: Assert.check(reader.ch == '.');
mcimadamore@1125: reader.putChar(true);
mcimadamore@1113: skipIllegalUnderscores();
mcimadamore@1113: if (reader.digit(pos, 16) >= 0) {
mcimadamore@1113: seendigit = true;
mcimadamore@1113: scanDigits(pos, 16);
mcimadamore@1113: }
mcimadamore@1113: if (!seendigit)
mcimadamore@1113: lexError(pos, "invalid.hex.number");
mcimadamore@1113: else
mcimadamore@1113: scanHexExponentAndSuffix(pos);
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: private void skipIllegalUnderscores() {
mcimadamore@1113: if (reader.ch == '_') {
mcimadamore@1113: lexError(reader.bp, "illegal.underscore");
mcimadamore@1113: while (reader.ch == '_')
mcimadamore@1113: reader.scanChar();
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /** Read a number.
mcimadamore@1113: * @param radix The radix of the number; one of 2, j8, 10, 16.
mcimadamore@1113: */
mcimadamore@1113: private void scanNumber(int pos, int radix) {
mcimadamore@1113: // for octal, allow base-10 digit in case it's a float literal
mcimadamore@1113: this.radix = radix;
mcimadamore@1113: int digitRadix = (radix == 8 ? 10 : radix);
mcimadamore@1113: boolean seendigit = false;
mcimadamore@1113: if (reader.digit(pos, digitRadix) >= 0) {
mcimadamore@1113: seendigit = true;
mcimadamore@1113: scanDigits(pos, digitRadix);
mcimadamore@1113: }
mcimadamore@1113: if (radix == 16 && reader.ch == '.') {
mcimadamore@1113: scanHexFractionAndSuffix(pos, seendigit);
mcimadamore@1113: } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
mcimadamore@1113: scanHexExponentAndSuffix(pos);
mcimadamore@1113: } else if (digitRadix == 10 && reader.ch == '.') {
mcimadamore@1125: reader.putChar(true);
mcimadamore@1113: scanFractionAndSuffix(pos);
mcimadamore@1113: } else if (digitRadix == 10 &&
mcimadamore@1113: (reader.ch == 'e' || reader.ch == 'E' ||
mcimadamore@1113: reader.ch == 'f' || reader.ch == 'F' ||
mcimadamore@1113: reader.ch == 'd' || reader.ch == 'D')) {
mcimadamore@1113: scanFractionAndSuffix(pos);
mcimadamore@1113: } else {
mcimadamore@1113: if (reader.ch == 'l' || reader.ch == 'L') {
mcimadamore@1113: reader.scanChar();
mcimadamore@1113: tk = TokenKind.LONGLITERAL;
mcimadamore@1113: } else {
mcimadamore@1113: tk = TokenKind.INTLITERAL;
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /** Read an identifier.
mcimadamore@1113: */
mcimadamore@1113: private void scanIdent() {
mcimadamore@1113: boolean isJavaIdentifierPart;
mcimadamore@1113: char high;
vromero@1431: reader.putChar(true);
mcimadamore@1113: do {
mcimadamore@1113: switch (reader.ch) {
mcimadamore@1113: case 'A': case 'B': case 'C': case 'D': case 'E':
mcimadamore@1113: case 'F': case 'G': case 'H': case 'I': case 'J':
mcimadamore@1113: case 'K': case 'L': case 'M': case 'N': case 'O':
mcimadamore@1113: case 'P': case 'Q': case 'R': case 'S': case 'T':
mcimadamore@1113: case 'U': case 'V': case 'W': case 'X': case 'Y':
mcimadamore@1113: case 'Z':
mcimadamore@1113: case 'a': case 'b': case 'c': case 'd': case 'e':
mcimadamore@1113: case 'f': case 'g': case 'h': case 'i': case 'j':
mcimadamore@1113: case 'k': case 'l': case 'm': case 'n': case 'o':
mcimadamore@1113: case 'p': case 'q': case 'r': case 's': case 't':
mcimadamore@1113: case 'u': case 'v': case 'w': case 'x': case 'y':
mcimadamore@1113: case 'z':
mcimadamore@1113: case '$': case '_':
mcimadamore@1113: case '0': case '1': case '2': case '3': case '4':
mcimadamore@1113: case '5': case '6': case '7': case '8': case '9':
vromero@1431: break;
mcimadamore@1113: case '\u0000': case '\u0001': case '\u0002': case '\u0003':
mcimadamore@1113: case '\u0004': case '\u0005': case '\u0006': case '\u0007':
mcimadamore@1113: case '\u0008': case '\u000E': case '\u000F': case '\u0010':
mcimadamore@1113: case '\u0011': case '\u0012': case '\u0013': case '\u0014':
mcimadamore@1113: case '\u0015': case '\u0016': case '\u0017':
mcimadamore@1113: case '\u0018': case '\u0019': case '\u001B':
mcimadamore@1113: case '\u007F':
vromero@1431: reader.scanChar();
vromero@1431: continue;
mcimadamore@1113: case '\u001A': // EOI is also a legal identifier part
mcimadamore@1113: if (reader.bp >= reader.buflen) {
mcimadamore@1125: name = reader.name();
mcimadamore@1113: tk = tokens.lookupKind(name);
mcimadamore@1113: return;
mcimadamore@1113: }
vromero@1431: reader.scanChar();
vromero@1431: continue;
mcimadamore@1113: default:
mcimadamore@1113: if (reader.ch < '\u0080') {
mcimadamore@1113: // all ASCII range chars already handled, above
mcimadamore@1113: isJavaIdentifierPart = false;
mcimadamore@1113: } else {
vromero@1431: if (Character.isIdentifierIgnorable(reader.ch)) {
vromero@1431: reader.scanChar();
vromero@1431: continue;
mcimadamore@1113: } else {
vromero@1431: high = reader.scanSurrogates();
vromero@1431: if (high != 0) {
vromero@1431: reader.putChar(high);
vromero@1431: isJavaIdentifierPart = Character.isJavaIdentifierPart(
vromero@1431: Character.toCodePoint(high, reader.ch));
vromero@1431: } else {
vromero@1431: isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
vromero@1431: }
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113: if (!isJavaIdentifierPart) {
mcimadamore@1125: name = reader.name();
mcimadamore@1113: tk = tokens.lookupKind(name);
mcimadamore@1113: return;
mcimadamore@1113: }
mcimadamore@1113: }
vromero@1431: reader.putChar(true);
mcimadamore@1113: } while (true);
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /** Return true if reader.ch can be part of an operator.
mcimadamore@1113: */
mcimadamore@1113: private boolean isSpecial(char ch) {
mcimadamore@1113: switch (ch) {
mcimadamore@1113: case '!': case '%': case '&': case '*': case '?':
mcimadamore@1113: case '+': case '-': case ':': case '<': case '=':
mcimadamore@1113: case '>': case '^': case '|': case '~':
mcimadamore@1113: case '@':
mcimadamore@1113: return true;
mcimadamore@1113: default:
mcimadamore@1113: return false;
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /** Read longest possible sequence of special characters and convert
mcimadamore@1113: * to token.
mcimadamore@1113: */
mcimadamore@1113: private void scanOperator() {
mcimadamore@1113: while (true) {
mcimadamore@1125: reader.putChar(false);
mcimadamore@1125: Name newname = reader.name();
mcimadamore@1113: TokenKind tk1 = tokens.lookupKind(newname);
mcimadamore@1113: if (tk1 == TokenKind.IDENTIFIER) {
mcimadamore@1125: reader.sp--;
mcimadamore@1113: break;
mcimadamore@1113: }
mcimadamore@1113: tk = tk1;
mcimadamore@1113: reader.scanChar();
mcimadamore@1113: if (!isSpecial(reader.ch)) break;
mcimadamore@1113: }
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /** Read token.
mcimadamore@1113: */
mcimadamore@1113: public Token readToken() {
mcimadamore@1113:
mcimadamore@1125: reader.sp = 0;
mcimadamore@1113: name = null;
mcimadamore@1113: radix = 0;
mcimadamore@1125:
mcimadamore@1113: int pos = 0;
mcimadamore@1113: int endPos = 0;
mcimadamore@1125: List