duke@1: /*
xdono@54: * Copyright 1999-2008 Sun Microsystems, Inc. All Rights Reserved.
duke@1: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
duke@1: *
duke@1: * This code is free software; you can redistribute it and/or modify it
duke@1: * under the terms of the GNU General Public License version 2 only, as
duke@1: * published by the Free Software Foundation. Sun designates this
duke@1: * particular file as subject to the "Classpath" exception as provided
duke@1: * by Sun in the LICENSE file that accompanied this code.
duke@1: *
duke@1: * This code is distributed in the hope that it will be useful, but WITHOUT
duke@1: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
duke@1: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
duke@1: * version 2 for more details (a copy is included in the LICENSE file that
duke@1: * accompanied this code).
duke@1: *
duke@1: * You should have received a copy of the GNU General Public License version
duke@1: * 2 along with this work; if not, write to the Free Software Foundation,
duke@1: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
duke@1: *
duke@1: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
duke@1: * CA 95054 USA or visit www.sun.com if you need additional information or
duke@1: * have any questions.
duke@1: */
duke@1:
duke@1: package com.sun.tools.javac.parser;
duke@1:
duke@1: import java.nio.*;
duke@1:
jjg@50: import com.sun.tools.javac.code.Source;
jjg@50: import com.sun.tools.javac.file.JavacFileManager;
duke@1: import com.sun.tools.javac.util.*;
duke@1:
duke@1:
duke@1: import static com.sun.tools.javac.parser.Token.*;
duke@1: import static com.sun.tools.javac.util.LayoutCharacters.*;
duke@1:
duke@1: /** The lexical analyzer maps an input stream consisting of
duke@1: * ASCII characters and Unicode escapes into a token sequence.
duke@1: *
duke@1: *
This is NOT part of any API supported by Sun Microsystems. If
duke@1: * you write code that depends on this, you do so at your own risk.
duke@1: * This code and its internal interfaces are subject to change or
duke@1: * deletion without notice.
duke@1: */
duke@1: public class Scanner implements Lexer {
duke@1:
duke@1: private static boolean scannerDebug = false;
duke@1:
duke@1: /** A factory for creating scanners. */
duke@1: public static class Factory {
duke@1: /** The context key for the scanner factory. */
duke@1: public static final Context.Key scannerFactoryKey =
duke@1: new Context.Key();
duke@1:
duke@1: /** Get the Factory instance for this context. */
duke@1: public static Factory instance(Context context) {
duke@1: Factory instance = context.get(scannerFactoryKey);
duke@1: if (instance == null)
duke@1: instance = new Factory(context);
duke@1: return instance;
duke@1: }
duke@1:
duke@1: final Log log;
jjg@113: final Names names;
duke@1: final Source source;
duke@1: final Keywords keywords;
duke@1:
duke@1: /** Create a new scanner factory. */
duke@1: protected Factory(Context context) {
duke@1: context.put(scannerFactoryKey, this);
duke@1: this.log = Log.instance(context);
jjg@113: this.names = Names.instance(context);
duke@1: this.source = Source.instance(context);
duke@1: this.keywords = Keywords.instance(context);
duke@1: }
duke@1:
duke@1: public Scanner newScanner(CharSequence input) {
duke@1: if (input instanceof CharBuffer) {
duke@1: return new Scanner(this, (CharBuffer)input);
duke@1: } else {
duke@1: char[] array = input.toString().toCharArray();
duke@1: return newScanner(array, array.length);
duke@1: }
duke@1: }
duke@1:
duke@1: public Scanner newScanner(char[] input, int inputLength) {
duke@1: return new Scanner(this, input, inputLength);
duke@1: }
duke@1: }
duke@1:
duke@1: /* Output variables; set by nextToken():
duke@1: */
duke@1:
duke@1: /** The token, set by nextToken().
duke@1: */
duke@1: private Token token;
duke@1:
duke@1: /** Allow hex floating-point literals.
duke@1: */
duke@1: private boolean allowHexFloats;
duke@1:
duke@1: /** The token's position, 0-based offset from beginning of text.
duke@1: */
duke@1: private int pos;
duke@1:
duke@1: /** Character position just after the last character of the token.
duke@1: */
duke@1: private int endPos;
duke@1:
duke@1: /** The last character position of the previous token.
duke@1: */
duke@1: private int prevEndPos;
duke@1:
duke@1: /** The position where a lexical error occurred;
duke@1: */
duke@1: private int errPos = Position.NOPOS;
duke@1:
duke@1: /** The name of an identifier or token:
duke@1: */
duke@1: private Name name;
duke@1:
duke@1: /** The radix of a numeric literal token.
duke@1: */
duke@1: private int radix;
duke@1:
duke@1: /** Has a @deprecated been encountered in last doc comment?
duke@1: * this needs to be reset by client.
duke@1: */
duke@1: protected boolean deprecatedFlag = false;
duke@1:
duke@1: /** A character buffer for literals.
duke@1: */
duke@1: private char[] sbuf = new char[128];
duke@1: private int sp;
duke@1:
duke@1: /** The input buffer, index of next chacter to be read,
duke@1: * index of one past last character in buffer.
duke@1: */
duke@1: private char[] buf;
duke@1: private int bp;
duke@1: private int buflen;
duke@1: private int eofPos;
duke@1:
duke@1: /** The current character.
duke@1: */
duke@1: private char ch;
duke@1:
duke@1: /** The buffer index of the last converted unicode character
duke@1: */
duke@1: private int unicodeConversionBp = -1;
duke@1:
duke@1: /** The log to be used for error reporting.
duke@1: */
duke@1: private final Log log;
duke@1:
duke@1: /** The name table. */
jjg@113: private final Names names;
duke@1:
duke@1: /** The keyword table. */
duke@1: private final Keywords keywords;
duke@1:
duke@1: /** Common code for constructors. */
duke@1: private Scanner(Factory fac) {
duke@1: this.log = fac.log;
duke@1: this.names = fac.names;
duke@1: this.keywords = fac.keywords;
duke@1: this.allowHexFloats = fac.source.allowHexFloats();
duke@1: }
duke@1:
duke@1: private static final boolean hexFloatsWork = hexFloatsWork();
duke@1: private static boolean hexFloatsWork() {
duke@1: try {
duke@1: Float.valueOf("0x1.0p1");
duke@1: return true;
duke@1: } catch (NumberFormatException ex) {
duke@1: return false;
duke@1: }
duke@1: }
duke@1:
duke@1: /** Create a scanner from the input buffer. buffer must implement
duke@1: * array() and compact(), and remaining() must be less than limit().
duke@1: */
duke@1: protected Scanner(Factory fac, CharBuffer buffer) {
duke@1: this(fac, JavacFileManager.toArray(buffer), buffer.limit());
duke@1: }
duke@1:
duke@1: /**
duke@1: * Create a scanner from the input array. This method might
duke@1: * modify the array. To avoid copying the input array, ensure
duke@1: * that {@code inputLength < input.length} or
duke@1: * {@code input[input.length -1]} is a white space character.
duke@1: *
duke@1: * @param fac the factory which created this Scanner
duke@1: * @param input the input, might be modified
duke@1: * @param inputLength the size of the input.
duke@1: * Must be positive and less than or equal to input.length.
duke@1: */
duke@1: protected Scanner(Factory fac, char[] input, int inputLength) {
duke@1: this(fac);
duke@1: eofPos = inputLength;
duke@1: if (inputLength == input.length) {
duke@1: if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
duke@1: inputLength--;
duke@1: } else {
duke@1: char[] newInput = new char[inputLength + 1];
duke@1: System.arraycopy(input, 0, newInput, 0, input.length);
duke@1: input = newInput;
duke@1: }
duke@1: }
duke@1: buf = input;
duke@1: buflen = inputLength;
duke@1: buf[buflen] = EOI;
duke@1: bp = -1;
duke@1: scanChar();
duke@1: }
duke@1:
duke@1: /** Report an error at the given position using the provided arguments.
duke@1: */
duke@1: private void lexError(int pos, String key, Object... args) {
duke@1: log.error(pos, key, args);
duke@1: token = ERROR;
duke@1: errPos = pos;
duke@1: }
duke@1:
duke@1: /** Report an error at the current token position using the provided
duke@1: * arguments.
duke@1: */
duke@1: private void lexError(String key, Object... args) {
duke@1: lexError(pos, key, args);
duke@1: }
duke@1:
duke@1: /** Convert an ASCII digit from its base (8, 10, or 16)
duke@1: * to its value.
duke@1: */
duke@1: private int digit(int base) {
duke@1: char c = ch;
duke@1: int result = Character.digit(c, base);
duke@1: if (result >= 0 && c > 0x7f) {
duke@1: lexError(pos+1, "illegal.nonascii.digit");
duke@1: ch = "0123456789abcdef".charAt(result);
duke@1: }
duke@1: return result;
duke@1: }
duke@1:
duke@1: /** Convert unicode escape; bp points to initial '\' character
duke@1: * (Spec 3.3).
duke@1: */
duke@1: private void convertUnicode() {
duke@1: if (ch == '\\' && unicodeConversionBp != bp) {
duke@1: bp++; ch = buf[bp];
duke@1: if (ch == 'u') {
duke@1: do {
duke@1: bp++; ch = buf[bp];
duke@1: } while (ch == 'u');
duke@1: int limit = bp + 3;
duke@1: if (limit < buflen) {
duke@1: int d = digit(16);
duke@1: int code = d;
duke@1: while (bp < limit && d >= 0) {
duke@1: bp++; ch = buf[bp];
duke@1: d = digit(16);
duke@1: code = (code << 4) + d;
duke@1: }
duke@1: if (d >= 0) {
duke@1: ch = (char)code;
duke@1: unicodeConversionBp = bp;
duke@1: return;
duke@1: }
duke@1: }
duke@1: lexError(bp, "illegal.unicode.esc");
duke@1: } else {
duke@1: bp--;
duke@1: ch = '\\';
duke@1: }
duke@1: }
duke@1: }
duke@1:
duke@1: /** Read next character.
duke@1: */
duke@1: private void scanChar() {
duke@1: ch = buf[++bp];
duke@1: if (ch == '\\') {
duke@1: convertUnicode();
duke@1: }
duke@1: }
duke@1:
duke@1: /** Read next character in comment, skipping over double '\' characters.
duke@1: */
duke@1: private void scanCommentChar() {
duke@1: scanChar();
duke@1: if (ch == '\\') {
duke@1: if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
duke@1: bp++;
duke@1: } else {
duke@1: convertUnicode();
duke@1: }
duke@1: }
duke@1: }
duke@1:
duke@1: /** Append a character to sbuf.
duke@1: */
duke@1: private void putChar(char ch) {
duke@1: if (sp == sbuf.length) {
duke@1: char[] newsbuf = new char[sbuf.length * 2];
duke@1: System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
duke@1: sbuf = newsbuf;
duke@1: }
duke@1: sbuf[sp++] = ch;
duke@1: }
duke@1:
duke@1: /** For debugging purposes: print character.
duke@1: */
duke@1: private void dch() {
duke@1: System.err.print(ch); System.out.flush();
duke@1: }
duke@1:
duke@1: /** Read next character in character or string literal and copy into sbuf.
duke@1: */
duke@1: private void scanLitChar() {
duke@1: if (ch == '\\') {
duke@1: if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
duke@1: bp++;
duke@1: putChar('\\');
duke@1: scanChar();
duke@1: } else {
duke@1: scanChar();
duke@1: switch (ch) {
duke@1: case '0': case '1': case '2': case '3':
duke@1: case '4': case '5': case '6': case '7':
duke@1: char leadch = ch;
duke@1: int oct = digit(8);
duke@1: scanChar();
duke@1: if ('0' <= ch && ch <= '7') {
duke@1: oct = oct * 8 + digit(8);
duke@1: scanChar();
duke@1: if (leadch <= '3' && '0' <= ch && ch <= '7') {
duke@1: oct = oct * 8 + digit(8);
duke@1: scanChar();
duke@1: }
duke@1: }
duke@1: putChar((char)oct);
duke@1: break;
duke@1: case 'b':
duke@1: putChar('\b'); scanChar(); break;
duke@1: case 't':
duke@1: putChar('\t'); scanChar(); break;
duke@1: case 'n':
duke@1: putChar('\n'); scanChar(); break;
duke@1: case 'f':
duke@1: putChar('\f'); scanChar(); break;
duke@1: case 'r':
duke@1: putChar('\r'); scanChar(); break;
duke@1: case '\'':
duke@1: putChar('\''); scanChar(); break;
duke@1: case '\"':
duke@1: putChar('\"'); scanChar(); break;
duke@1: case '\\':
duke@1: putChar('\\'); scanChar(); break;
duke@1: default:
duke@1: lexError(bp, "illegal.esc.char");
duke@1: }
duke@1: }
duke@1: } else if (bp != buflen) {
duke@1: putChar(ch); scanChar();
duke@1: }
duke@1: }
duke@1:
duke@1: /** Read fractional part of hexadecimal floating point number.
duke@1: */
duke@1: private void scanHexExponentAndSuffix() {
duke@1: if (ch == 'p' || ch == 'P') {
duke@1: putChar(ch);
duke@1: scanChar();
duke@1: if (ch == '+' || ch == '-') {
duke@1: putChar(ch);
duke@1: scanChar();
duke@1: }
duke@1: if ('0' <= ch && ch <= '9') {
duke@1: do {
duke@1: putChar(ch);
duke@1: scanChar();
duke@1: } while ('0' <= ch && ch <= '9');
duke@1: if (!allowHexFloats) {
duke@1: lexError("unsupported.fp.lit");
duke@1: allowHexFloats = true;
duke@1: }
duke@1: else if (!hexFloatsWork)
duke@1: lexError("unsupported.cross.fp.lit");
duke@1: } else
duke@1: lexError("malformed.fp.lit");
duke@1: } else {
duke@1: lexError("malformed.fp.lit");
duke@1: }
duke@1: if (ch == 'f' || ch == 'F') {
duke@1: putChar(ch);
duke@1: scanChar();
duke@1: token = FLOATLITERAL;
duke@1: } else {
duke@1: if (ch == 'd' || ch == 'D') {
duke@1: putChar(ch);
duke@1: scanChar();
duke@1: }
duke@1: token = DOUBLELITERAL;
duke@1: }
duke@1: }
duke@1:
duke@1: /** Read fractional part of floating point number.
duke@1: */
duke@1: private void scanFraction() {
duke@1: while (digit(10) >= 0) {
duke@1: putChar(ch);
duke@1: scanChar();
duke@1: }
duke@1: int sp1 = sp;
duke@1: if (ch == 'e' || ch == 'E') {
duke@1: putChar(ch);
duke@1: scanChar();
duke@1: if (ch == '+' || ch == '-') {
duke@1: putChar(ch);
duke@1: scanChar();
duke@1: }
duke@1: if ('0' <= ch && ch <= '9') {
duke@1: do {
duke@1: putChar(ch);
duke@1: scanChar();
duke@1: } while ('0' <= ch && ch <= '9');
duke@1: return;
duke@1: }
duke@1: lexError("malformed.fp.lit");
duke@1: sp = sp1;
duke@1: }
duke@1: }
duke@1:
duke@1: /** Read fractional part and 'd' or 'f' suffix of floating point number.
duke@1: */
duke@1: private void scanFractionAndSuffix() {
duke@1: this.radix = 10;
duke@1: scanFraction();
duke@1: if (ch == 'f' || ch == 'F') {
duke@1: putChar(ch);
duke@1: scanChar();
duke@1: token = FLOATLITERAL;
duke@1: } else {
duke@1: if (ch == 'd' || ch == 'D') {
duke@1: putChar(ch);
duke@1: scanChar();
duke@1: }
duke@1: token = DOUBLELITERAL;
duke@1: }
duke@1: }
duke@1:
duke@1: /** Read fractional part and 'd' or 'f' suffix of floating point number.
duke@1: */
duke@1: private void scanHexFractionAndSuffix(boolean seendigit) {
duke@1: this.radix = 16;
duke@1: assert ch == '.';
duke@1: putChar(ch);
duke@1: scanChar();
duke@1: while (digit(16) >= 0) {
duke@1: seendigit = true;
duke@1: putChar(ch);
duke@1: scanChar();
duke@1: }
duke@1: if (!seendigit)
duke@1: lexError("invalid.hex.number");
duke@1: else
duke@1: scanHexExponentAndSuffix();
duke@1: }
duke@1:
duke@1: /** Read a number.
duke@1: * @param radix The radix of the number; one of 8, 10, 16.
duke@1: */
duke@1: private void scanNumber(int radix) {
duke@1: this.radix = radix;
duke@1: // for octal, allow base-10 digit in case it's a float literal
duke@1: int digitRadix = (radix <= 10) ? 10 : 16;
duke@1: boolean seendigit = false;
duke@1: while (digit(digitRadix) >= 0) {
duke@1: seendigit = true;
duke@1: putChar(ch);
duke@1: scanChar();
duke@1: }
duke@1: if (radix == 16 && ch == '.') {
duke@1: scanHexFractionAndSuffix(seendigit);
duke@1: } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
duke@1: scanHexExponentAndSuffix();
duke@1: } else if (radix <= 10 && ch == '.') {
duke@1: putChar(ch);
duke@1: scanChar();
duke@1: scanFractionAndSuffix();
duke@1: } else if (radix <= 10 &&
duke@1: (ch == 'e' || ch == 'E' ||
duke@1: ch == 'f' || ch == 'F' ||
duke@1: ch == 'd' || ch == 'D')) {
duke@1: scanFractionAndSuffix();
duke@1: } else {
duke@1: if (ch == 'l' || ch == 'L') {
duke@1: scanChar();
duke@1: token = LONGLITERAL;
duke@1: } else {
duke@1: token = INTLITERAL;
duke@1: }
duke@1: }
duke@1: }
duke@1:
duke@1: /** Read an identifier.
duke@1: */
duke@1: private void scanIdent() {
duke@1: boolean isJavaIdentifierPart;
duke@1: char high;
duke@1: do {
duke@1: if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
duke@1: // optimization, was: putChar(ch);
duke@1:
duke@1: scanChar();
duke@1: switch (ch) {
duke@1: case 'A': case 'B': case 'C': case 'D': case 'E':
duke@1: case 'F': case 'G': case 'H': case 'I': case 'J':
duke@1: case 'K': case 'L': case 'M': case 'N': case 'O':
duke@1: case 'P': case 'Q': case 'R': case 'S': case 'T':
duke@1: case 'U': case 'V': case 'W': case 'X': case 'Y':
duke@1: case 'Z':
duke@1: case 'a': case 'b': case 'c': case 'd': case 'e':
duke@1: case 'f': case 'g': case 'h': case 'i': case 'j':
duke@1: case 'k': case 'l': case 'm': case 'n': case 'o':
duke@1: case 'p': case 'q': case 'r': case 's': case 't':
duke@1: case 'u': case 'v': case 'w': case 'x': case 'y':
duke@1: case 'z':
duke@1: case '$': case '_':
duke@1: case '0': case '1': case '2': case '3': case '4':
duke@1: case '5': case '6': case '7': case '8': case '9':
duke@1: case '\u0000': case '\u0001': case '\u0002': case '\u0003':
duke@1: case '\u0004': case '\u0005': case '\u0006': case '\u0007':
duke@1: case '\u0008': case '\u000E': case '\u000F': case '\u0010':
duke@1: case '\u0011': case '\u0012': case '\u0013': case '\u0014':
duke@1: case '\u0015': case '\u0016': case '\u0017':
duke@1: case '\u0018': case '\u0019': case '\u001B':
duke@1: case '\u007F':
duke@1: break;
duke@1: case '\u001A': // EOI is also a legal identifier part
duke@1: if (bp >= buflen) {
duke@1: name = names.fromChars(sbuf, 0, sp);
duke@1: token = keywords.key(name);
duke@1: return;
duke@1: }
duke@1: break;
duke@1: default:
duke@1: if (ch < '\u0080') {
duke@1: // all ASCII range chars already handled, above
duke@1: isJavaIdentifierPart = false;
duke@1: } else {
duke@1: high = scanSurrogates();
duke@1: if (high != 0) {
duke@1: if (sp == sbuf.length) {
duke@1: putChar(high);
duke@1: } else {
duke@1: sbuf[sp++] = high;
duke@1: }
duke@1: isJavaIdentifierPart = Character.isJavaIdentifierPart(
duke@1: Character.toCodePoint(high, ch));
duke@1: } else {
duke@1: isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
duke@1: }
duke@1: }
duke@1: if (!isJavaIdentifierPart) {
duke@1: name = names.fromChars(sbuf, 0, sp);
duke@1: token = keywords.key(name);
duke@1: return;
duke@1: }
duke@1: }
duke@1: } while (true);
duke@1: }
duke@1:
duke@1: /** Are surrogates supported?
duke@1: */
duke@1: final static boolean surrogatesSupported = surrogatesSupported();
duke@1: private static boolean surrogatesSupported() {
duke@1: try {
duke@1: Character.isHighSurrogate('a');
duke@1: return true;
duke@1: } catch (NoSuchMethodError ex) {
duke@1: return false;
duke@1: }
duke@1: }
duke@1:
duke@1: /** Scan surrogate pairs. If 'ch' is a high surrogate and
duke@1: * the next character is a low surrogate, then put the low
duke@1: * surrogate in 'ch', and return the high surrogate.
duke@1: * otherwise, just return 0.
duke@1: */
duke@1: private char scanSurrogates() {
duke@1: if (surrogatesSupported && Character.isHighSurrogate(ch)) {
duke@1: char high = ch;
duke@1:
duke@1: scanChar();
duke@1:
duke@1: if (Character.isLowSurrogate(ch)) {
duke@1: return high;
duke@1: }
duke@1:
duke@1: ch = high;
duke@1: }
duke@1:
duke@1: return 0;
duke@1: }
duke@1:
duke@1: /** Return true if ch can be part of an operator.
duke@1: */
duke@1: private boolean isSpecial(char ch) {
duke@1: switch (ch) {
duke@1: case '!': case '%': case '&': case '*': case '?':
duke@1: case '+': case '-': case ':': case '<': case '=':
duke@1: case '>': case '^': case '|': case '~':
duke@1: case '@':
duke@1: return true;
duke@1: default:
duke@1: return false;
duke@1: }
duke@1: }
duke@1:
duke@1: /** Read longest possible sequence of special characters and convert
duke@1: * to token.
duke@1: */
duke@1: private void scanOperator() {
duke@1: while (true) {
duke@1: putChar(ch);
duke@1: Name newname = names.fromChars(sbuf, 0, sp);
duke@1: if (keywords.key(newname) == IDENTIFIER) {
duke@1: sp--;
duke@1: break;
duke@1: }
duke@1: name = newname;
duke@1: token = keywords.key(newname);
duke@1: scanChar();
duke@1: if (!isSpecial(ch)) break;
duke@1: }
duke@1: }
duke@1:
duke@1: /**
duke@1: * Scan a documention comment; determine if a deprecated tag is present.
duke@1: * Called once the initial /, * have been skipped, positioned at the second *
duke@1: * (which is treated as the beginning of the first line).
duke@1: * Stops positioned at the closing '/'.
duke@1: */
duke@1: @SuppressWarnings("fallthrough")
duke@1: private void scanDocComment() {
duke@1: boolean deprecatedPrefix = false;
duke@1:
duke@1: forEachLine:
duke@1: while (bp < buflen) {
duke@1:
duke@1: // Skip optional WhiteSpace at beginning of line
duke@1: while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
duke@1: scanCommentChar();
duke@1: }
duke@1:
duke@1: // Skip optional consecutive Stars
duke@1: while (bp < buflen && ch == '*') {
duke@1: scanCommentChar();
duke@1: if (ch == '/') {
duke@1: return;
duke@1: }
duke@1: }
duke@1:
duke@1: // Skip optional WhiteSpace after Stars
duke@1: while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
duke@1: scanCommentChar();
duke@1: }
duke@1:
duke@1: deprecatedPrefix = false;
duke@1: // At beginning of line in the JavaDoc sense.
duke@1: if (bp < buflen && ch == '@' && !deprecatedFlag) {
duke@1: scanCommentChar();
duke@1: if (bp < buflen && ch == 'd') {
duke@1: scanCommentChar();
duke@1: if (bp < buflen && ch == 'e') {
duke@1: scanCommentChar();
duke@1: if (bp < buflen && ch == 'p') {
duke@1: scanCommentChar();
duke@1: if (bp < buflen && ch == 'r') {
duke@1: scanCommentChar();
duke@1: if (bp < buflen && ch == 'e') {
duke@1: scanCommentChar();
duke@1: if (bp < buflen && ch == 'c') {
duke@1: scanCommentChar();
duke@1: if (bp < buflen && ch == 'a') {
duke@1: scanCommentChar();
duke@1: if (bp < buflen && ch == 't') {
duke@1: scanCommentChar();
duke@1: if (bp < buflen && ch == 'e') {
duke@1: scanCommentChar();
duke@1: if (bp < buflen && ch == 'd') {
duke@1: deprecatedPrefix = true;
duke@1: scanCommentChar();
duke@1: }}}}}}}}}}}
duke@1: if (deprecatedPrefix && bp < buflen) {
duke@1: if (Character.isWhitespace(ch)) {
duke@1: deprecatedFlag = true;
duke@1: } else if (ch == '*') {
duke@1: scanCommentChar();
duke@1: if (ch == '/') {
duke@1: deprecatedFlag = true;
duke@1: return;
duke@1: }
duke@1: }
duke@1: }
duke@1:
duke@1: // Skip rest of line
duke@1: while (bp < buflen) {
duke@1: switch (ch) {
duke@1: case '*':
duke@1: scanCommentChar();
duke@1: if (ch == '/') {
duke@1: return;
duke@1: }
duke@1: break;
duke@1: case CR: // (Spec 3.4)
duke@1: scanCommentChar();
duke@1: if (ch != LF) {
duke@1: continue forEachLine;
duke@1: }
duke@1: /* fall through to LF case */
duke@1: case LF: // (Spec 3.4)
duke@1: scanCommentChar();
duke@1: continue forEachLine;
duke@1: default:
duke@1: scanCommentChar();
duke@1: }
duke@1: } // rest of line
duke@1: } // forEachLine
duke@1: return;
duke@1: }
duke@1:
duke@1: /** The value of a literal token, recorded as a string.
duke@1: * For integers, leading 0x and 'l' suffixes are suppressed.
duke@1: */
duke@1: public String stringVal() {
duke@1: return new String(sbuf, 0, sp);
duke@1: }
duke@1:
duke@1: /** Read token.
duke@1: */
duke@1: public void nextToken() {
duke@1:
duke@1: try {
duke@1: prevEndPos = endPos;
duke@1: sp = 0;
duke@1:
duke@1: while (true) {
duke@1: pos = bp;
duke@1: switch (ch) {
duke@1: case ' ': // (Spec 3.6)
duke@1: case '\t': // (Spec 3.6)
duke@1: case FF: // (Spec 3.6)
duke@1: do {
duke@1: scanChar();
duke@1: } while (ch == ' ' || ch == '\t' || ch == FF);
duke@1: endPos = bp;
duke@1: processWhiteSpace();
duke@1: break;
duke@1: case LF: // (Spec 3.4)
duke@1: scanChar();
duke@1: endPos = bp;
duke@1: processLineTerminator();
duke@1: break;
duke@1: case CR: // (Spec 3.4)
duke@1: scanChar();
duke@1: if (ch == LF) {
duke@1: scanChar();
duke@1: }
duke@1: endPos = bp;
duke@1: processLineTerminator();
duke@1: break;
duke@1: case 'A': case 'B': case 'C': case 'D': case 'E':
duke@1: case 'F': case 'G': case 'H': case 'I': case 'J':
duke@1: case 'K': case 'L': case 'M': case 'N': case 'O':
duke@1: case 'P': case 'Q': case 'R': case 'S': case 'T':
duke@1: case 'U': case 'V': case 'W': case 'X': case 'Y':
duke@1: case 'Z':
duke@1: case 'a': case 'b': case 'c': case 'd': case 'e':
duke@1: case 'f': case 'g': case 'h': case 'i': case 'j':
duke@1: case 'k': case 'l': case 'm': case 'n': case 'o':
duke@1: case 'p': case 'q': case 'r': case 's': case 't':
duke@1: case 'u': case 'v': case 'w': case 'x': case 'y':
duke@1: case 'z':
duke@1: case '$': case '_':
duke@1: scanIdent();
duke@1: return;
duke@1: case '0':
duke@1: scanChar();
duke@1: if (ch == 'x' || ch == 'X') {
duke@1: scanChar();
duke@1: if (ch == '.') {
duke@1: scanHexFractionAndSuffix(false);
duke@1: } else if (digit(16) < 0) {
duke@1: lexError("invalid.hex.number");
duke@1: } else {
duke@1: scanNumber(16);
duke@1: }
duke@1: } else {
duke@1: putChar('0');
duke@1: scanNumber(8);
duke@1: }
duke@1: return;
duke@1: case '1': case '2': case '3': case '4':
duke@1: case '5': case '6': case '7': case '8': case '9':
duke@1: scanNumber(10);
duke@1: return;
duke@1: case '.':
duke@1: scanChar();
duke@1: if ('0' <= ch && ch <= '9') {
duke@1: putChar('.');
duke@1: scanFractionAndSuffix();
duke@1: } else if (ch == '.') {
duke@1: putChar('.'); putChar('.');
duke@1: scanChar();
duke@1: if (ch == '.') {
duke@1: scanChar();
duke@1: putChar('.');
duke@1: token = ELLIPSIS;
duke@1: } else {
duke@1: lexError("malformed.fp.lit");
duke@1: }
duke@1: } else {
duke@1: token = DOT;
duke@1: }
duke@1: return;
duke@1: case ',':
duke@1: scanChar(); token = COMMA; return;
duke@1: case ';':
duke@1: scanChar(); token = SEMI; return;
duke@1: case '(':
duke@1: scanChar(); token = LPAREN; return;
duke@1: case ')':
duke@1: scanChar(); token = RPAREN; return;
duke@1: case '[':
duke@1: scanChar(); token = LBRACKET; return;
duke@1: case ']':
duke@1: scanChar(); token = RBRACKET; return;
duke@1: case '{':
duke@1: scanChar(); token = LBRACE; return;
duke@1: case '}':
duke@1: scanChar(); token = RBRACE; return;
duke@1: case '/':
duke@1: scanChar();
duke@1: if (ch == '/') {
duke@1: do {
duke@1: scanCommentChar();
duke@1: } while (ch != CR && ch != LF && bp < buflen);
duke@1: if (bp < buflen) {
duke@1: endPos = bp;
duke@1: processComment(CommentStyle.LINE);
duke@1: }
duke@1: break;
duke@1: } else if (ch == '*') {
duke@1: scanChar();
duke@1: CommentStyle style;
duke@1: if (ch == '*') {
duke@1: style = CommentStyle.JAVADOC;
duke@1: scanDocComment();
duke@1: } else {
duke@1: style = CommentStyle.BLOCK;
duke@1: while (bp < buflen) {
duke@1: if (ch == '*') {
duke@1: scanChar();
duke@1: if (ch == '/') break;
duke@1: } else {
duke@1: scanCommentChar();
duke@1: }
duke@1: }
duke@1: }
duke@1: if (ch == '/') {
duke@1: scanChar();
duke@1: endPos = bp;
duke@1: processComment(style);
duke@1: break;
duke@1: } else {
duke@1: lexError("unclosed.comment");
duke@1: return;
duke@1: }
duke@1: } else if (ch == '=') {
duke@1: name = names.slashequals;
duke@1: token = SLASHEQ;
duke@1: scanChar();
duke@1: } else {
duke@1: name = names.slash;
duke@1: token = SLASH;
duke@1: }
duke@1: return;
duke@1: case '\'':
duke@1: scanChar();
duke@1: if (ch == '\'') {
duke@1: lexError("empty.char.lit");
duke@1: } else {
duke@1: if (ch == CR || ch == LF)
duke@1: lexError(pos, "illegal.line.end.in.char.lit");
duke@1: scanLitChar();
duke@1: if (ch == '\'') {
duke@1: scanChar();
duke@1: token = CHARLITERAL;
duke@1: } else {
duke@1: lexError(pos, "unclosed.char.lit");
duke@1: }
duke@1: }
duke@1: return;
duke@1: case '\"':
duke@1: scanChar();
duke@1: while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
duke@1: scanLitChar();
duke@1: if (ch == '\"') {
duke@1: token = STRINGLITERAL;
duke@1: scanChar();
duke@1: } else {
duke@1: lexError(pos, "unclosed.str.lit");
duke@1: }
duke@1: return;
duke@1: default:
duke@1: if (isSpecial(ch)) {
duke@1: scanOperator();
duke@1: } else {
duke@1: boolean isJavaIdentifierStart;
duke@1: if (ch < '\u0080') {
duke@1: // all ASCII range chars already handled, above
duke@1: isJavaIdentifierStart = false;
duke@1: } else {
duke@1: char high = scanSurrogates();
duke@1: if (high != 0) {
duke@1: if (sp == sbuf.length) {
duke@1: putChar(high);
duke@1: } else {
duke@1: sbuf[sp++] = high;
duke@1: }
duke@1:
duke@1: isJavaIdentifierStart = Character.isJavaIdentifierStart(
duke@1: Character.toCodePoint(high, ch));
duke@1: } else {
duke@1: isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
duke@1: }
duke@1: }
duke@1: if (isJavaIdentifierStart) {
duke@1: scanIdent();
duke@1: } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
duke@1: token = EOF;
duke@1: pos = bp = eofPos;
duke@1: } else {
duke@1: lexError("illegal.char", String.valueOf((int)ch));
duke@1: scanChar();
duke@1: }
duke@1: }
duke@1: return;
duke@1: }
duke@1: }
duke@1: } finally {
duke@1: endPos = bp;
duke@1: if (scannerDebug)
duke@1: System.out.println("nextToken(" + pos
duke@1: + "," + endPos + ")=|" +
duke@1: new String(getRawCharacters(pos, endPos))
duke@1: + "|");
duke@1: }
duke@1: }
duke@1:
duke@1: /** Return the current token, set by nextToken().
duke@1: */
duke@1: public Token token() {
duke@1: return token;
duke@1: }
duke@1:
duke@1: /** Sets the current token.
duke@1: */
duke@1: public void token(Token token) {
duke@1: this.token = token;
duke@1: }
duke@1:
duke@1: /** Return the current token's position: a 0-based
duke@1: * offset from beginning of the raw input stream
duke@1: * (before unicode translation)
duke@1: */
duke@1: public int pos() {
duke@1: return pos;
duke@1: }
duke@1:
duke@1: /** Return the last character position of the current token.
duke@1: */
duke@1: public int endPos() {
duke@1: return endPos;
duke@1: }
duke@1:
duke@1: /** Return the last character position of the previous token.
duke@1: */
duke@1: public int prevEndPos() {
duke@1: return prevEndPos;
duke@1: }
duke@1:
duke@1: /** Return the position where a lexical error occurred;
duke@1: */
duke@1: public int errPos() {
duke@1: return errPos;
duke@1: }
duke@1:
duke@1: /** Set the position where a lexical error occurred;
duke@1: */
duke@1: public void errPos(int pos) {
duke@1: errPos = pos;
duke@1: }
duke@1:
duke@1: /** Return the name of an identifier or token for the current token.
duke@1: */
duke@1: public Name name() {
duke@1: return name;
duke@1: }
duke@1:
duke@1: /** Return the radix of a numeric literal token.
duke@1: */
duke@1: public int radix() {
duke@1: return radix;
duke@1: }
duke@1:
duke@1: /** Has a @deprecated been encountered in last doc comment?
duke@1: * This needs to be reset by client with resetDeprecatedFlag.
duke@1: */
duke@1: public boolean deprecatedFlag() {
duke@1: return deprecatedFlag;
duke@1: }
duke@1:
duke@1: public void resetDeprecatedFlag() {
duke@1: deprecatedFlag = false;
duke@1: }
duke@1:
duke@1: /**
duke@1: * Returns the documentation string of the current token.
duke@1: */
duke@1: public String docComment() {
duke@1: return null;
duke@1: }
duke@1:
duke@1: /**
duke@1: * Returns a copy of the input buffer, up to its inputLength.
duke@1: * Unicode escape sequences are not translated.
duke@1: */
duke@1: public char[] getRawCharacters() {
duke@1: char[] chars = new char[buflen];
duke@1: System.arraycopy(buf, 0, chars, 0, buflen);
duke@1: return chars;
duke@1: }
duke@1:
duke@1: /**
duke@1: * Returns a copy of a character array subset of the input buffer.
duke@1: * The returned array begins at the beginIndex
and
duke@1: * extends to the character at index endIndex - 1
.
duke@1: * Thus the length of the substring is endIndex-beginIndex
.
duke@1: * This behavior is like
duke@1: * String.substring(beginIndex, endIndex)
.
duke@1: * Unicode escape sequences are not translated.
duke@1: *
duke@1: * @param beginIndex the beginning index, inclusive.
duke@1: * @param endIndex the ending index, exclusive.
duke@1: * @throws IndexOutOfBounds if either offset is outside of the
duke@1: * array bounds
duke@1: */
duke@1: public char[] getRawCharacters(int beginIndex, int endIndex) {
duke@1: int length = endIndex - beginIndex;
duke@1: char[] chars = new char[length];
duke@1: System.arraycopy(buf, beginIndex, chars, 0, length);
duke@1: return chars;
duke@1: }
duke@1:
duke@1: public enum CommentStyle {
duke@1: LINE,
duke@1: BLOCK,
duke@1: JAVADOC,
duke@1: }
duke@1:
duke@1: /**
duke@1: * Called when a complete comment has been scanned. pos and endPos
duke@1: * will mark the comment boundary.
duke@1: */
duke@1: protected void processComment(CommentStyle style) {
duke@1: if (scannerDebug)
duke@1: System.out.println("processComment(" + pos
duke@1: + "," + endPos + "," + style + ")=|"
duke@1: + new String(getRawCharacters(pos, endPos))
duke@1: + "|");
duke@1: }
duke@1:
duke@1: /**
duke@1: * Called when a complete whitespace run has been scanned. pos and endPos
duke@1: * will mark the whitespace boundary.
duke@1: */
duke@1: protected void processWhiteSpace() {
duke@1: if (scannerDebug)
duke@1: System.out.println("processWhitespace(" + pos
duke@1: + "," + endPos + ")=|" +
duke@1: new String(getRawCharacters(pos, endPos))
duke@1: + "|");
duke@1: }
duke@1:
duke@1: /**
duke@1: * Called when a line terminator has been processed.
duke@1: */
duke@1: protected void processLineTerminator() {
duke@1: if (scannerDebug)
duke@1: System.out.println("processTerminator(" + pos
duke@1: + "," + endPos + ")=|" +
duke@1: new String(getRawCharacters(pos, endPos))
duke@1: + "|");
duke@1: }
duke@1:
duke@1: /** Build a map for translating between line numbers and
duke@1: * positions in the input.
duke@1: *
duke@1: * @return a LineMap */
duke@1: public Position.LineMap getLineMap() {
duke@1: return Position.makeLineMap(buf, buflen, false);
duke@1: }
duke@1:
duke@1: }