mcimadamore@1113: /* jjg@1280: * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. mcimadamore@1113: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. mcimadamore@1113: * mcimadamore@1113: * This code is free software; you can redistribute it and/or modify it mcimadamore@1113: * under the terms of the GNU General Public License version 2 only, as mcimadamore@1113: * published by the Free Software Foundation. Oracle designates this mcimadamore@1113: * particular file as subject to the "Classpath" exception as provided mcimadamore@1113: * by Oracle in the LICENSE file that accompanied this code. mcimadamore@1113: * mcimadamore@1113: * This code is distributed in the hope that it will be useful, but WITHOUT mcimadamore@1113: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or mcimadamore@1113: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License mcimadamore@1113: * version 2 for more details (a copy is included in the LICENSE file that mcimadamore@1113: * accompanied this code). mcimadamore@1113: * mcimadamore@1113: * You should have received a copy of the GNU General Public License version mcimadamore@1113: * 2 along with this work; if not, write to the Free Software Foundation, mcimadamore@1113: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. mcimadamore@1113: * mcimadamore@1113: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA mcimadamore@1113: * or visit www.oracle.com if you need additional information or have any mcimadamore@1113: * questions. mcimadamore@1113: */ mcimadamore@1113: mcimadamore@1113: package com.sun.tools.javac.parser; mcimadamore@1113: mcimadamore@1113: import com.sun.tools.javac.code.Source; mcimadamore@1125: import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; mcimadamore@1113: import com.sun.tools.javac.util.*; mcimadamore@1113: mcimadamore@1125: import java.nio.CharBuffer; mcimadamore@1113: mcimadamore@1113: import static com.sun.tools.javac.parser.Tokens.*; mcimadamore@1113: import static com.sun.tools.javac.util.LayoutCharacters.*; mcimadamore@1113: mcimadamore@1113: /** The lexical analyzer maps an input stream consisting of mcimadamore@1113: * ASCII characters and Unicode escapes into a token sequence. mcimadamore@1113: * mcimadamore@1113: *

This is NOT part of any supported API. mcimadamore@1113: * If you write code that depends on this, you do so at your own risk. mcimadamore@1113: * This code and its internal interfaces are subject to change or mcimadamore@1113: * deletion without notice. mcimadamore@1113: */ mcimadamore@1113: public class JavaTokenizer { mcimadamore@1113: vromero@1442: private static final boolean scannerDebug = false; mcimadamore@1113: mcimadamore@1113: /** Allow hex floating-point literals. mcimadamore@1113: */ mcimadamore@1113: private boolean allowHexFloats; mcimadamore@1113: mcimadamore@1113: /** Allow binary literals. mcimadamore@1113: */ mcimadamore@1113: private boolean allowBinaryLiterals; mcimadamore@1113: mcimadamore@1113: /** Allow underscores in literals. mcimadamore@1113: */ mcimadamore@1113: private boolean allowUnderscoresInLiterals; mcimadamore@1113: mcimadamore@1113: /** The source language setting. mcimadamore@1113: */ mcimadamore@1113: private Source source; mcimadamore@1113: mcimadamore@1113: /** The log to be used for error reporting. mcimadamore@1113: */ mcimadamore@1113: private final Log log; mcimadamore@1113: mcimadamore@1113: /** The token factory. */ mcimadamore@1113: private final Tokens tokens; mcimadamore@1113: mcimadamore@1113: /** The token kind, set by nextToken(). mcimadamore@1113: */ mcimadamore@1113: protected TokenKind tk; mcimadamore@1113: mcimadamore@1113: /** The token's radix, set by nextToken(). mcimadamore@1113: */ mcimadamore@1113: protected int radix; mcimadamore@1113: mcimadamore@1113: /** The token's name, set by nextToken(). mcimadamore@1113: */ mcimadamore@1113: protected Name name; mcimadamore@1113: mcimadamore@1113: /** The position where a lexical error occurred; mcimadamore@1113: */ mcimadamore@1113: protected int errPos = Position.NOPOS; mcimadamore@1113: mcimadamore@1125: /** The Unicode reader (low-level stream reader). mcimadamore@1113: */ mcimadamore@1125: protected UnicodeReader reader; mcimadamore@1113: mcimadamore@1125: protected ScannerFactory fac; mcimadamore@1113: mcimadamore@1113: private static final boolean hexFloatsWork = hexFloatsWork(); mcimadamore@1113: private static boolean hexFloatsWork() { mcimadamore@1113: try { mcimadamore@1113: Float.valueOf("0x1.0p1"); mcimadamore@1113: return true; mcimadamore@1113: } catch (NumberFormatException ex) { mcimadamore@1113: return false; mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** mcimadamore@1113: * Create a scanner from the input array. This method might mcimadamore@1113: * modify the array. To avoid copying the input array, ensure mcimadamore@1113: * that {@code inputLength < input.length} or mcimadamore@1113: * {@code input[input.length -1]} is a white space character. mcimadamore@1113: * mcimadamore@1113: * @param fac the factory which created this Scanner jjg@1358: * @param buf the input, might be modified mcimadamore@1113: * Must be positive and less than or equal to input.length. mcimadamore@1113: */ mcimadamore@1113: protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) { mcimadamore@1113: this(fac, new UnicodeReader(fac, buf)); mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) { mcimadamore@1113: this(fac, new UnicodeReader(fac, buf, inputLength)); mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) { mcimadamore@1125: this.fac = fac; mcimadamore@1125: this.log = fac.log; mcimadamore@1125: this.tokens = fac.tokens; mcimadamore@1125: this.source = fac.source; mcimadamore@1113: this.reader = reader; mcimadamore@1125: this.allowBinaryLiterals = source.allowBinaryLiterals(); mcimadamore@1125: this.allowHexFloats = source.allowHexFloats(); mcimadamore@1125: this.allowUnderscoresInLiterals = source.allowUnderscoresInLiterals(); mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Report an error at the given position using the provided arguments. mcimadamore@1113: */ mcimadamore@1113: protected void lexError(int pos, String key, Object... args) { mcimadamore@1113: log.error(pos, key, args); mcimadamore@1113: tk = TokenKind.ERROR; mcimadamore@1113: errPos = pos; mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Read next character in character or string literal and copy into sbuf. mcimadamore@1113: */ mcimadamore@1113: private void scanLitChar(int pos) { mcimadamore@1113: if (reader.ch == '\\') { mcimadamore@1113: if (reader.peekChar() == '\\' && !reader.isUnicode()) { mcimadamore@1113: reader.skipChar(); mcimadamore@1125: reader.putChar('\\', true); mcimadamore@1113: } else { mcimadamore@1113: reader.scanChar(); mcimadamore@1113: switch (reader.ch) { mcimadamore@1113: case '0': case '1': case '2': case '3': mcimadamore@1113: case '4': case '5': case '6': case '7': mcimadamore@1113: char leadch = reader.ch; mcimadamore@1113: int oct = reader.digit(pos, 8); mcimadamore@1113: reader.scanChar(); mcimadamore@1113: if ('0' <= reader.ch && reader.ch <= '7') { mcimadamore@1113: oct = oct * 8 + reader.digit(pos, 8); mcimadamore@1113: reader.scanChar(); mcimadamore@1113: if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') { mcimadamore@1113: oct = oct * 8 + reader.digit(pos, 8); mcimadamore@1113: reader.scanChar(); mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1125: reader.putChar((char)oct); mcimadamore@1113: break; mcimadamore@1113: case 'b': mcimadamore@1125: reader.putChar('\b', true); break; mcimadamore@1113: case 't': mcimadamore@1125: reader.putChar('\t', true); break; mcimadamore@1113: case 'n': mcimadamore@1125: reader.putChar('\n', true); break; mcimadamore@1113: case 'f': mcimadamore@1125: reader.putChar('\f', true); break; mcimadamore@1113: case 'r': mcimadamore@1125: reader.putChar('\r', true); break; mcimadamore@1113: case '\'': mcimadamore@1125: reader.putChar('\'', true); break; mcimadamore@1113: case '\"': mcimadamore@1125: reader.putChar('\"', true); break; mcimadamore@1113: case '\\': mcimadamore@1125: reader.putChar('\\', true); break; mcimadamore@1113: default: mcimadamore@1113: lexError(reader.bp, "illegal.esc.char"); mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: } else if (reader.bp != reader.buflen) { mcimadamore@1125: reader.putChar(true); mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: private void scanDigits(int pos, int digitRadix) { mcimadamore@1113: char saveCh; mcimadamore@1113: int savePos; mcimadamore@1113: do { mcimadamore@1113: if (reader.ch != '_') { mcimadamore@1125: reader.putChar(false); mcimadamore@1113: } else { mcimadamore@1113: if (!allowUnderscoresInLiterals) { mcimadamore@1113: lexError(pos, "unsupported.underscore.lit", source.name); mcimadamore@1113: allowUnderscoresInLiterals = true; mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: saveCh = reader.ch; mcimadamore@1113: savePos = reader.bp; mcimadamore@1113: reader.scanChar(); mcimadamore@1113: } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_'); mcimadamore@1113: if (saveCh == '_') mcimadamore@1113: lexError(savePos, "illegal.underscore"); mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Read fractional part of hexadecimal floating point number. mcimadamore@1113: */ mcimadamore@1113: private void scanHexExponentAndSuffix(int pos) { mcimadamore@1113: if (reader.ch == 'p' || reader.ch == 'P') { mcimadamore@1125: reader.putChar(true); mcimadamore@1113: skipIllegalUnderscores(); mcimadamore@1113: if (reader.ch == '+' || reader.ch == '-') { mcimadamore@1125: reader.putChar(true); mcimadamore@1113: } mcimadamore@1113: skipIllegalUnderscores(); mcimadamore@1113: if ('0' <= reader.ch && reader.ch <= '9') { mcimadamore@1113: scanDigits(pos, 10); mcimadamore@1113: if (!allowHexFloats) { mcimadamore@1113: lexError(pos, "unsupported.fp.lit", source.name); mcimadamore@1113: allowHexFloats = true; mcimadamore@1113: } mcimadamore@1113: else if (!hexFloatsWork) mcimadamore@1113: lexError(pos, "unsupported.cross.fp.lit"); mcimadamore@1113: } else mcimadamore@1113: lexError(pos, "malformed.fp.lit"); mcimadamore@1113: } else { mcimadamore@1113: lexError(pos, "malformed.fp.lit"); mcimadamore@1113: } mcimadamore@1113: if (reader.ch == 'f' || reader.ch == 'F') { mcimadamore@1125: reader.putChar(true); mcimadamore@1113: tk = TokenKind.FLOATLITERAL; mcimadamore@1113: radix = 16; mcimadamore@1113: } else { mcimadamore@1113: if (reader.ch == 'd' || reader.ch == 'D') { mcimadamore@1125: reader.putChar(true); mcimadamore@1113: } mcimadamore@1113: tk = TokenKind.DOUBLELITERAL; mcimadamore@1113: radix = 16; mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Read fractional part of floating point number. mcimadamore@1113: */ mcimadamore@1113: private void scanFraction(int pos) { mcimadamore@1113: skipIllegalUnderscores(); mcimadamore@1113: if ('0' <= reader.ch && reader.ch <= '9') { mcimadamore@1113: scanDigits(pos, 10); mcimadamore@1113: } mcimadamore@1125: int sp1 = reader.sp; mcimadamore@1113: if (reader.ch == 'e' || reader.ch == 'E') { mcimadamore@1125: reader.putChar(true); mcimadamore@1113: skipIllegalUnderscores(); mcimadamore@1113: if (reader.ch == '+' || reader.ch == '-') { mcimadamore@1125: reader.putChar(true); mcimadamore@1113: } mcimadamore@1113: skipIllegalUnderscores(); mcimadamore@1113: if ('0' <= reader.ch && reader.ch <= '9') { mcimadamore@1113: scanDigits(pos, 10); mcimadamore@1113: return; mcimadamore@1113: } mcimadamore@1113: lexError(pos, "malformed.fp.lit"); mcimadamore@1125: reader.sp = sp1; mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Read fractional part and 'd' or 'f' suffix of floating point number. mcimadamore@1113: */ mcimadamore@1113: private void scanFractionAndSuffix(int pos) { mcimadamore@1113: radix = 10; mcimadamore@1113: scanFraction(pos); mcimadamore@1113: if (reader.ch == 'f' || reader.ch == 'F') { mcimadamore@1125: reader.putChar(true); mcimadamore@1113: tk = TokenKind.FLOATLITERAL; mcimadamore@1113: } else { mcimadamore@1113: if (reader.ch == 'd' || reader.ch == 'D') { mcimadamore@1125: reader.putChar(true); mcimadamore@1113: } mcimadamore@1113: tk = TokenKind.DOUBLELITERAL; mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Read fractional part and 'd' or 'f' suffix of floating point number. mcimadamore@1113: */ mcimadamore@1113: private void scanHexFractionAndSuffix(int pos, boolean seendigit) { mcimadamore@1113: radix = 16; mcimadamore@1113: Assert.check(reader.ch == '.'); mcimadamore@1125: reader.putChar(true); mcimadamore@1113: skipIllegalUnderscores(); mcimadamore@1113: if (reader.digit(pos, 16) >= 0) { mcimadamore@1113: seendigit = true; mcimadamore@1113: scanDigits(pos, 16); mcimadamore@1113: } mcimadamore@1113: if (!seendigit) mcimadamore@1113: lexError(pos, "invalid.hex.number"); mcimadamore@1113: else mcimadamore@1113: scanHexExponentAndSuffix(pos); mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: private void skipIllegalUnderscores() { mcimadamore@1113: if (reader.ch == '_') { mcimadamore@1113: lexError(reader.bp, "illegal.underscore"); mcimadamore@1113: while (reader.ch == '_') mcimadamore@1113: reader.scanChar(); mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Read a number. mcimadamore@1113: * @param radix The radix of the number; one of 2, j8, 10, 16. mcimadamore@1113: */ mcimadamore@1113: private void scanNumber(int pos, int radix) { mcimadamore@1113: // for octal, allow base-10 digit in case it's a float literal mcimadamore@1113: this.radix = radix; mcimadamore@1113: int digitRadix = (radix == 8 ? 10 : radix); mcimadamore@1113: boolean seendigit = false; mcimadamore@1113: if (reader.digit(pos, digitRadix) >= 0) { mcimadamore@1113: seendigit = true; mcimadamore@1113: scanDigits(pos, digitRadix); mcimadamore@1113: } mcimadamore@1113: if (radix == 16 && reader.ch == '.') { mcimadamore@1113: scanHexFractionAndSuffix(pos, seendigit); mcimadamore@1113: } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) { mcimadamore@1113: scanHexExponentAndSuffix(pos); mcimadamore@1113: } else if (digitRadix == 10 && reader.ch == '.') { mcimadamore@1125: reader.putChar(true); mcimadamore@1113: scanFractionAndSuffix(pos); mcimadamore@1113: } else if (digitRadix == 10 && mcimadamore@1113: (reader.ch == 'e' || reader.ch == 'E' || mcimadamore@1113: reader.ch == 'f' || reader.ch == 'F' || mcimadamore@1113: reader.ch == 'd' || reader.ch == 'D')) { mcimadamore@1113: scanFractionAndSuffix(pos); mcimadamore@1113: } else { mcimadamore@1113: if (reader.ch == 'l' || reader.ch == 'L') { mcimadamore@1113: reader.scanChar(); mcimadamore@1113: tk = TokenKind.LONGLITERAL; mcimadamore@1113: } else { mcimadamore@1113: tk = TokenKind.INTLITERAL; mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Read an identifier. mcimadamore@1113: */ mcimadamore@1113: private void scanIdent() { mcimadamore@1113: boolean isJavaIdentifierPart; mcimadamore@1113: char high; vromero@1431: reader.putChar(true); mcimadamore@1113: do { mcimadamore@1113: switch (reader.ch) { mcimadamore@1113: case 'A': case 'B': case 'C': case 'D': case 'E': mcimadamore@1113: case 'F': case 'G': case 'H': case 'I': case 'J': mcimadamore@1113: case 'K': case 'L': case 'M': case 'N': case 'O': mcimadamore@1113: case 'P': case 'Q': case 'R': case 'S': case 'T': mcimadamore@1113: case 'U': case 'V': case 'W': case 'X': case 'Y': mcimadamore@1113: case 'Z': mcimadamore@1113: case 'a': case 'b': case 'c': case 'd': case 'e': mcimadamore@1113: case 'f': case 'g': case 'h': case 'i': case 'j': mcimadamore@1113: case 'k': case 'l': case 'm': case 'n': case 'o': mcimadamore@1113: case 'p': case 'q': case 'r': case 's': case 't': mcimadamore@1113: case 'u': case 'v': case 'w': case 'x': case 'y': mcimadamore@1113: case 'z': mcimadamore@1113: case '$': case '_': mcimadamore@1113: case '0': case '1': case '2': case '3': case '4': mcimadamore@1113: case '5': case '6': case '7': case '8': case '9': vromero@1431: break; mcimadamore@1113: case '\u0000': case '\u0001': case '\u0002': case '\u0003': mcimadamore@1113: case '\u0004': case '\u0005': case '\u0006': case '\u0007': mcimadamore@1113: case '\u0008': case '\u000E': case '\u000F': case '\u0010': mcimadamore@1113: case '\u0011': case '\u0012': case '\u0013': case '\u0014': mcimadamore@1113: case '\u0015': case '\u0016': case '\u0017': mcimadamore@1113: case '\u0018': case '\u0019': case '\u001B': mcimadamore@1113: case '\u007F': vromero@1431: reader.scanChar(); vromero@1431: continue; mcimadamore@1113: case '\u001A': // EOI is also a legal identifier part mcimadamore@1113: if (reader.bp >= reader.buflen) { mcimadamore@1125: name = reader.name(); mcimadamore@1113: tk = tokens.lookupKind(name); mcimadamore@1113: return; mcimadamore@1113: } vromero@1431: reader.scanChar(); vromero@1431: continue; mcimadamore@1113: default: mcimadamore@1113: if (reader.ch < '\u0080') { mcimadamore@1113: // all ASCII range chars already handled, above mcimadamore@1113: isJavaIdentifierPart = false; mcimadamore@1113: } else { vromero@1431: if (Character.isIdentifierIgnorable(reader.ch)) { vromero@1431: reader.scanChar(); vromero@1431: continue; mcimadamore@1113: } else { vromero@1431: high = reader.scanSurrogates(); vromero@1431: if (high != 0) { vromero@1431: reader.putChar(high); vromero@1431: isJavaIdentifierPart = Character.isJavaIdentifierPart( vromero@1431: Character.toCodePoint(high, reader.ch)); vromero@1431: } else { vromero@1431: isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch); vromero@1431: } mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: if (!isJavaIdentifierPart) { mcimadamore@1125: name = reader.name(); mcimadamore@1113: tk = tokens.lookupKind(name); mcimadamore@1113: return; mcimadamore@1113: } mcimadamore@1113: } vromero@1431: reader.putChar(true); mcimadamore@1113: } while (true); mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Return true if reader.ch can be part of an operator. mcimadamore@1113: */ mcimadamore@1113: private boolean isSpecial(char ch) { mcimadamore@1113: switch (ch) { mcimadamore@1113: case '!': case '%': case '&': case '*': case '?': mcimadamore@1113: case '+': case '-': case ':': case '<': case '=': mcimadamore@1113: case '>': case '^': case '|': case '~': mcimadamore@1113: case '@': mcimadamore@1113: return true; mcimadamore@1113: default: mcimadamore@1113: return false; mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Read longest possible sequence of special characters and convert mcimadamore@1113: * to token. mcimadamore@1113: */ mcimadamore@1113: private void scanOperator() { mcimadamore@1113: while (true) { mcimadamore@1125: reader.putChar(false); mcimadamore@1125: Name newname = reader.name(); mcimadamore@1113: TokenKind tk1 = tokens.lookupKind(newname); mcimadamore@1113: if (tk1 == TokenKind.IDENTIFIER) { mcimadamore@1125: reader.sp--; mcimadamore@1113: break; mcimadamore@1113: } mcimadamore@1113: tk = tk1; mcimadamore@1113: reader.scanChar(); mcimadamore@1113: if (!isSpecial(reader.ch)) break; mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Read token. mcimadamore@1113: */ mcimadamore@1113: public Token readToken() { mcimadamore@1113: mcimadamore@1125: reader.sp = 0; mcimadamore@1113: name = null; mcimadamore@1113: radix = 0; mcimadamore@1125: mcimadamore@1113: int pos = 0; mcimadamore@1113: int endPos = 0; mcimadamore@1125: List comments = null; mcimadamore@1113: mcimadamore@1113: try { mcimadamore@1113: loop: while (true) { mcimadamore@1113: pos = reader.bp; mcimadamore@1113: switch (reader.ch) { mcimadamore@1113: case ' ': // (Spec 3.6) mcimadamore@1113: case '\t': // (Spec 3.6) mcimadamore@1113: case FF: // (Spec 3.6) mcimadamore@1113: do { mcimadamore@1113: reader.scanChar(); mcimadamore@1113: } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF); mcimadamore@1113: processWhiteSpace(pos, reader.bp); mcimadamore@1113: break; mcimadamore@1113: case LF: // (Spec 3.4) mcimadamore@1113: reader.scanChar(); mcimadamore@1113: processLineTerminator(pos, reader.bp); mcimadamore@1113: break; mcimadamore@1113: case CR: // (Spec 3.4) mcimadamore@1113: reader.scanChar(); mcimadamore@1113: if (reader.ch == LF) { mcimadamore@1113: reader.scanChar(); mcimadamore@1113: } mcimadamore@1113: processLineTerminator(pos, reader.bp); mcimadamore@1113: break; mcimadamore@1113: case 'A': case 'B': case 'C': case 'D': case 'E': mcimadamore@1113: case 'F': case 'G': case 'H': case 'I': case 'J': mcimadamore@1113: case 'K': case 'L': case 'M': case 'N': case 'O': mcimadamore@1113: case 'P': case 'Q': case 'R': case 'S': case 'T': mcimadamore@1113: case 'U': case 'V': case 'W': case 'X': case 'Y': mcimadamore@1113: case 'Z': mcimadamore@1113: case 'a': case 'b': case 'c': case 'd': case 'e': mcimadamore@1113: case 'f': case 'g': case 'h': case 'i': case 'j': mcimadamore@1113: case 'k': case 'l': case 'm': case 'n': case 'o': mcimadamore@1113: case 'p': case 'q': case 'r': case 's': case 't': mcimadamore@1113: case 'u': case 'v': case 'w': case 'x': case 'y': mcimadamore@1113: case 'z': mcimadamore@1113: case '$': case '_': mcimadamore@1113: scanIdent(); mcimadamore@1113: break loop; mcimadamore@1113: case '0': mcimadamore@1113: reader.scanChar(); mcimadamore@1113: if (reader.ch == 'x' || reader.ch == 'X') { mcimadamore@1113: reader.scanChar(); mcimadamore@1113: skipIllegalUnderscores(); mcimadamore@1113: if (reader.ch == '.') { mcimadamore@1113: scanHexFractionAndSuffix(pos, false); mcimadamore@1113: } else if (reader.digit(pos, 16) < 0) { mcimadamore@1113: lexError(pos, "invalid.hex.number"); mcimadamore@1113: } else { mcimadamore@1113: scanNumber(pos, 16); mcimadamore@1113: } mcimadamore@1113: } else if (reader.ch == 'b' || reader.ch == 'B') { mcimadamore@1113: if (!allowBinaryLiterals) { mcimadamore@1113: lexError(pos, "unsupported.binary.lit", source.name); mcimadamore@1113: allowBinaryLiterals = true; mcimadamore@1113: } mcimadamore@1113: reader.scanChar(); mcimadamore@1113: skipIllegalUnderscores(); mcimadamore@1113: if (reader.digit(pos, 2) < 0) { mcimadamore@1113: lexError(pos, "invalid.binary.number"); mcimadamore@1113: } else { mcimadamore@1113: scanNumber(pos, 2); mcimadamore@1113: } mcimadamore@1113: } else { mcimadamore@1125: reader.putChar('0'); mcimadamore@1113: if (reader.ch == '_') { mcimadamore@1113: int savePos = reader.bp; mcimadamore@1113: do { mcimadamore@1113: reader.scanChar(); mcimadamore@1113: } while (reader.ch == '_'); mcimadamore@1113: if (reader.digit(pos, 10) < 0) { mcimadamore@1113: lexError(savePos, "illegal.underscore"); mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: scanNumber(pos, 8); mcimadamore@1113: } mcimadamore@1113: break loop; mcimadamore@1113: case '1': case '2': case '3': case '4': mcimadamore@1113: case '5': case '6': case '7': case '8': case '9': mcimadamore@1113: scanNumber(pos, 10); mcimadamore@1113: break loop; mcimadamore@1113: case '.': mcimadamore@1113: reader.scanChar(); mcimadamore@1113: if ('0' <= reader.ch && reader.ch <= '9') { mcimadamore@1125: reader.putChar('.'); mcimadamore@1113: scanFractionAndSuffix(pos); mcimadamore@1113: } else if (reader.ch == '.') { jjg@1171: int savePos = reader.bp; mcimadamore@1125: reader.putChar('.'); reader.putChar('.', true); mcimadamore@1113: if (reader.ch == '.') { mcimadamore@1113: reader.scanChar(); mcimadamore@1125: reader.putChar('.'); mcimadamore@1113: tk = TokenKind.ELLIPSIS; mcimadamore@1113: } else { jjg@1171: lexError(savePos, "illegal.dot"); mcimadamore@1113: } mcimadamore@1113: } else { mcimadamore@1113: tk = TokenKind.DOT; mcimadamore@1113: } mcimadamore@1113: break loop; mcimadamore@1113: case ',': mcimadamore@1113: reader.scanChar(); tk = TokenKind.COMMA; break loop; mcimadamore@1113: case ';': mcimadamore@1113: reader.scanChar(); tk = TokenKind.SEMI; break loop; mcimadamore@1113: case '(': mcimadamore@1113: reader.scanChar(); tk = TokenKind.LPAREN; break loop; mcimadamore@1113: case ')': mcimadamore@1113: reader.scanChar(); tk = TokenKind.RPAREN; break loop; mcimadamore@1113: case '[': mcimadamore@1113: reader.scanChar(); tk = TokenKind.LBRACKET; break loop; mcimadamore@1113: case ']': mcimadamore@1113: reader.scanChar(); tk = TokenKind.RBRACKET; break loop; mcimadamore@1113: case '{': mcimadamore@1113: reader.scanChar(); tk = TokenKind.LBRACE; break loop; mcimadamore@1113: case '}': mcimadamore@1113: reader.scanChar(); tk = TokenKind.RBRACE; break loop; mcimadamore@1113: case '/': mcimadamore@1113: reader.scanChar(); mcimadamore@1113: if (reader.ch == '/') { mcimadamore@1113: do { mcimadamore@1125: reader.scanCommentChar(); mcimadamore@1113: } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen); mcimadamore@1113: if (reader.bp < reader.buflen) { jjg@1280: comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE)); mcimadamore@1113: } mcimadamore@1113: break; mcimadamore@1113: } else if (reader.ch == '*') { mcimadamore@1125: boolean isEmpty = false; mcimadamore@1113: reader.scanChar(); mcimadamore@1113: CommentStyle style; mcimadamore@1113: if (reader.ch == '*') { mcimadamore@1113: style = CommentStyle.JAVADOC; mcimadamore@1125: reader.scanCommentChar(); mcimadamore@1125: if (reader.ch == '/') { mcimadamore@1125: isEmpty = true; mcimadamore@1125: } mcimadamore@1113: } else { mcimadamore@1113: style = CommentStyle.BLOCK; mcimadamore@1125: } mcimadamore@1125: while (!isEmpty && reader.bp < reader.buflen) { mcimadamore@1125: if (reader.ch == '*') { mcimadamore@1125: reader.scanChar(); mcimadamore@1125: if (reader.ch == '/') break; mcimadamore@1125: } else { mcimadamore@1125: reader.scanCommentChar(); mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: if (reader.ch == '/') { mcimadamore@1113: reader.scanChar(); jjg@1280: comments = addComment(comments, processComment(pos, reader.bp, style)); mcimadamore@1113: break; mcimadamore@1113: } else { mcimadamore@1113: lexError(pos, "unclosed.comment"); mcimadamore@1113: break loop; mcimadamore@1113: } mcimadamore@1113: } else if (reader.ch == '=') { mcimadamore@1113: tk = TokenKind.SLASHEQ; mcimadamore@1113: reader.scanChar(); mcimadamore@1113: } else { mcimadamore@1113: tk = TokenKind.SLASH; mcimadamore@1113: } mcimadamore@1113: break loop; mcimadamore@1113: case '\'': mcimadamore@1113: reader.scanChar(); mcimadamore@1113: if (reader.ch == '\'') { mcimadamore@1113: lexError(pos, "empty.char.lit"); mcimadamore@1113: } else { mcimadamore@1113: if (reader.ch == CR || reader.ch == LF) mcimadamore@1113: lexError(pos, "illegal.line.end.in.char.lit"); mcimadamore@1113: scanLitChar(pos); mcimadamore@1113: char ch2 = reader.ch; mcimadamore@1113: if (reader.ch == '\'') { mcimadamore@1113: reader.scanChar(); mcimadamore@1113: tk = TokenKind.CHARLITERAL; mcimadamore@1113: } else { mcimadamore@1113: lexError(pos, "unclosed.char.lit"); mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: break loop; mcimadamore@1113: case '\"': mcimadamore@1113: reader.scanChar(); mcimadamore@1113: while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen) mcimadamore@1113: scanLitChar(pos); mcimadamore@1113: if (reader.ch == '\"') { mcimadamore@1113: tk = TokenKind.STRINGLITERAL; mcimadamore@1113: reader.scanChar(); mcimadamore@1113: } else { mcimadamore@1113: lexError(pos, "unclosed.str.lit"); mcimadamore@1113: } mcimadamore@1113: break loop; mcimadamore@1113: default: mcimadamore@1113: if (isSpecial(reader.ch)) { mcimadamore@1113: scanOperator(); mcimadamore@1113: } else { mcimadamore@1113: boolean isJavaIdentifierStart; mcimadamore@1113: if (reader.ch < '\u0080') { mcimadamore@1113: // all ASCII range chars already handled, above mcimadamore@1113: isJavaIdentifierStart = false; mcimadamore@1113: } else { mcimadamore@1113: char high = reader.scanSurrogates(); mcimadamore@1113: if (high != 0) { mcimadamore@1125: reader.putChar(high); mcimadamore@1113: mcimadamore@1113: isJavaIdentifierStart = Character.isJavaIdentifierStart( mcimadamore@1113: Character.toCodePoint(high, reader.ch)); mcimadamore@1113: } else { mcimadamore@1113: isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch); mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: if (isJavaIdentifierStart) { mcimadamore@1113: scanIdent(); mcimadamore@1113: } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5 mcimadamore@1113: tk = TokenKind.EOF; mcimadamore@1113: pos = reader.buflen; mcimadamore@1113: } else { vromero@1385: String arg = (32 < reader.ch && reader.ch < 127) ? vromero@1385: String.format("%s", reader.ch) : vromero@1385: String.format("\\u%04x", (int)reader.ch); vromero@1385: lexError(pos, "illegal.char", arg); mcimadamore@1113: reader.scanChar(); mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: break loop; mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: endPos = reader.bp; mcimadamore@1113: switch (tk.tag) { mcimadamore@1125: case DEFAULT: return new Token(tk, pos, endPos, comments); mcimadamore@1125: case NAMED: return new NamedToken(tk, pos, endPos, name, comments); mcimadamore@1125: case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments); mcimadamore@1125: case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments); mcimadamore@1113: default: throw new AssertionError(); mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: finally { mcimadamore@1113: if (scannerDebug) { mcimadamore@1113: System.out.println("nextToken(" + pos mcimadamore@1113: + "," + endPos + ")=|" + mcimadamore@1113: new String(reader.getRawCharacters(pos, endPos)) mcimadamore@1113: + "|"); mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1125: //where jjg@1280: List addComment(List comments, Comment comment) { jjg@1280: return comments == null ? jjg@1280: List.of(comment) : jjg@1280: comments.prepend(comment); mcimadamore@1125: } mcimadamore@1113: mcimadamore@1113: /** Return the position where a lexical error occurred; mcimadamore@1113: */ mcimadamore@1113: public int errPos() { mcimadamore@1113: return errPos; mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Set the position where a lexical error occurred; mcimadamore@1113: */ mcimadamore@1113: public void errPos(int pos) { mcimadamore@1113: errPos = pos; mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** mcimadamore@1113: * Called when a complete comment has been scanned. pos and endPos mcimadamore@1113: * will mark the comment boundary. mcimadamore@1113: */ mcimadamore@1125: protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) { mcimadamore@1113: if (scannerDebug) mcimadamore@1113: System.out.println("processComment(" + pos mcimadamore@1113: + "," + endPos + "," + style + ")=|" mcimadamore@1113: + new String(reader.getRawCharacters(pos, endPos)) mcimadamore@1113: + "|"); mcimadamore@1125: char[] buf = reader.getRawCharacters(pos, endPos); mcimadamore@1125: return new BasicComment(new UnicodeReader(fac, buf, buf.length), style); mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** mcimadamore@1113: * Called when a complete whitespace run has been scanned. pos and endPos mcimadamore@1113: * will mark the whitespace boundary. mcimadamore@1113: */ mcimadamore@1113: protected void processWhiteSpace(int pos, int endPos) { mcimadamore@1113: if (scannerDebug) mcimadamore@1113: System.out.println("processWhitespace(" + pos mcimadamore@1113: + "," + endPos + ")=|" + mcimadamore@1113: new String(reader.getRawCharacters(pos, endPos)) mcimadamore@1113: + "|"); mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** mcimadamore@1113: * Called when a line terminator has been processed. mcimadamore@1113: */ mcimadamore@1113: protected void processLineTerminator(int pos, int endPos) { mcimadamore@1113: if (scannerDebug) mcimadamore@1113: System.out.println("processTerminator(" + pos mcimadamore@1113: + "," + endPos + ")=|" + mcimadamore@1113: new String(reader.getRawCharacters(pos, endPos)) mcimadamore@1113: + "|"); mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Build a map for translating between line numbers and mcimadamore@1113: * positions in the input. mcimadamore@1113: * mcimadamore@1113: * @return a LineMap */ mcimadamore@1113: public Position.LineMap getLineMap() { mcimadamore@1113: return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false); mcimadamore@1113: } mcimadamore@1125: mcimadamore@1125: mcimadamore@1125: /** mcimadamore@1125: * Scan a documentation comment; determine if a deprecated tag is present. mcimadamore@1125: * Called once the initial /, * have been skipped, positioned at the second * mcimadamore@1125: * (which is treated as the beginning of the first line). mcimadamore@1125: * Stops positioned at the closing '/'. mcimadamore@1125: */ mcimadamore@1125: protected class BasicComment implements Comment { mcimadamore@1125: mcimadamore@1125: CommentStyle cs; mcimadamore@1125: U comment_reader; mcimadamore@1125: mcimadamore@1125: protected boolean deprecatedFlag = false; mcimadamore@1125: protected boolean scanned = false; mcimadamore@1125: mcimadamore@1125: protected BasicComment(U comment_reader, CommentStyle cs) { mcimadamore@1125: this.comment_reader = comment_reader; mcimadamore@1125: this.cs = cs; mcimadamore@1125: } mcimadamore@1125: mcimadamore@1125: public String getText() { mcimadamore@1125: return null; mcimadamore@1125: } mcimadamore@1125: jjg@1281: public int getSourcePos(int pos) { jjg@1281: return -1; jjg@1281: } jjg@1281: mcimadamore@1125: public CommentStyle getStyle() { mcimadamore@1125: return cs; mcimadamore@1125: } mcimadamore@1125: mcimadamore@1125: public boolean isDeprecated() { mcimadamore@1125: if (!scanned && cs == CommentStyle.JAVADOC) { mcimadamore@1125: scanDocComment(); mcimadamore@1125: } mcimadamore@1125: return deprecatedFlag; mcimadamore@1125: } mcimadamore@1125: mcimadamore@1125: @SuppressWarnings("fallthrough") mcimadamore@1125: protected void scanDocComment() { mcimadamore@1125: try { mcimadamore@1125: boolean deprecatedPrefix = false; mcimadamore@1125: mcimadamore@1125: comment_reader.bp += 3; // '/**' mcimadamore@1125: comment_reader.ch = comment_reader.buf[comment_reader.bp]; mcimadamore@1125: mcimadamore@1125: forEachLine: mcimadamore@1125: while (comment_reader.bp < comment_reader.buflen) { mcimadamore@1125: mcimadamore@1125: // Skip optional WhiteSpace at beginning of line mcimadamore@1125: while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: } mcimadamore@1125: mcimadamore@1125: // Skip optional consecutive Stars mcimadamore@1125: while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') { mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: if (comment_reader.ch == '/') { mcimadamore@1125: return; mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1125: mcimadamore@1125: // Skip optional WhiteSpace after Stars mcimadamore@1125: while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: } mcimadamore@1125: mcimadamore@1125: deprecatedPrefix = false; mcimadamore@1125: // At beginning of line in the JavaDoc sense. mcimadamore@1125: if (!deprecatedFlag) { mcimadamore@1125: String deprecated = "@deprecated"; mcimadamore@1125: int i = 0; mcimadamore@1125: while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) { mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: i++; mcimadamore@1125: if (i == deprecated.length()) { mcimadamore@1125: deprecatedPrefix = true; mcimadamore@1125: break; mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1125: mcimadamore@1125: if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) { mcimadamore@1125: if (Character.isWhitespace(comment_reader.ch)) { mcimadamore@1125: deprecatedFlag = true; mcimadamore@1125: } else if (comment_reader.ch == '*') { mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: if (comment_reader.ch == '/') { mcimadamore@1125: deprecatedFlag = true; mcimadamore@1125: return; mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1125: mcimadamore@1125: // Skip rest of line mcimadamore@1125: while (comment_reader.bp < comment_reader.buflen) { mcimadamore@1125: switch (comment_reader.ch) { mcimadamore@1125: case '*': mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: if (comment_reader.ch == '/') { mcimadamore@1125: return; mcimadamore@1125: } mcimadamore@1125: break; mcimadamore@1125: case CR: // (Spec 3.4) mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: if (comment_reader.ch != LF) { mcimadamore@1125: continue forEachLine; mcimadamore@1125: } mcimadamore@1125: /* fall through to LF case */ mcimadamore@1125: case LF: // (Spec 3.4) mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: continue forEachLine; mcimadamore@1125: default: mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: } mcimadamore@1125: } // rest of line mcimadamore@1125: } // forEachLine mcimadamore@1125: return; mcimadamore@1125: } finally { mcimadamore@1125: scanned = true; mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1113: }