src/share/classes/com/sun/tools/javac/parser/Scanner.java

changeset 1113
d346ab55031b
parent 1074
04f983e3e825
child 1144
9448fe783fd2
     1.1 --- a/src/share/classes/com/sun/tools/javac/parser/Scanner.java	Fri Oct 21 14:14:29 2011 -0700
     1.2 +++ b/src/share/classes/com/sun/tools/javac/parser/Scanner.java	Mon Oct 24 13:00:20 2011 +0100
     1.3 @@ -27,13 +27,11 @@
     1.4  
     1.5  import java.nio.*;
     1.6  
     1.7 -import com.sun.tools.javac.code.Source;
     1.8 -import com.sun.tools.javac.file.JavacFileManager;
     1.9  import com.sun.tools.javac.util.*;
    1.10 +import com.sun.tools.javac.util.Position.LineMap;
    1.11 +import com.sun.tools.javac.parser.JavaTokenizer.*;
    1.12  
    1.13 -
    1.14 -import static com.sun.tools.javac.parser.Token.*;
    1.15 -import static com.sun.tools.javac.util.LayoutCharacters.*;
    1.16 +import static com.sun.tools.javac.parser.Tokens.*;
    1.17  
    1.18  /** The lexical analyzer maps an input stream consisting of
    1.19   *  ASCII characters and Unicode escapes into a token sequence.
    1.20 @@ -45,119 +43,17 @@
    1.21   */
    1.22  public class Scanner implements Lexer {
    1.23  
    1.24 -    private static boolean scannerDebug = false;
    1.25 -
    1.26 -    /* Output variables; set by nextToken():
    1.27 -     */
    1.28 +    private Tokens tokens;
    1.29  
    1.30      /** The token, set by nextToken().
    1.31       */
    1.32      private Token token;
    1.33  
    1.34 -    /** Allow hex floating-point literals.
    1.35 +    /** The previous token, set by nextToken().
    1.36       */
    1.37 -    private boolean allowHexFloats;
    1.38 +    private Token prevToken;
    1.39  
    1.40 -    /** Allow binary literals.
    1.41 -     */
    1.42 -    private boolean allowBinaryLiterals;
    1.43 -
    1.44 -    /** Allow underscores in literals.
    1.45 -     */
    1.46 -    private boolean allowUnderscoresInLiterals;
    1.47 -
    1.48 -    /** The source language setting.
    1.49 -     */
    1.50 -    private Source source;
    1.51 -
    1.52 -    /** The token's position, 0-based offset from beginning of text.
    1.53 -     */
    1.54 -    private int pos;
    1.55 -
    1.56 -    /** Character position just after the last character of the token.
    1.57 -     */
    1.58 -    private int endPos;
    1.59 -
    1.60 -    /** The last character position of the previous token.
    1.61 -     */
    1.62 -    private int prevEndPos;
    1.63 -
    1.64 -    /** The position where a lexical error occurred;
    1.65 -     */
    1.66 -    private int errPos = Position.NOPOS;
    1.67 -
    1.68 -    /** The name of an identifier or token:
    1.69 -     */
    1.70 -    private Name name;
    1.71 -
    1.72 -    /** The radix of a numeric literal token.
    1.73 -     */
    1.74 -    private int radix;
    1.75 -
    1.76 -    /** Has a @deprecated been encountered in last doc comment?
    1.77 -     *  this needs to be reset by client.
    1.78 -     */
    1.79 -    protected boolean deprecatedFlag = false;
    1.80 -
    1.81 -    /** A character buffer for literals.
    1.82 -     */
    1.83 -    private char[] sbuf = new char[128];
    1.84 -    private int sp;
    1.85 -
    1.86 -    /** The input buffer, index of next chacter to be read,
    1.87 -     *  index of one past last character in buffer.
    1.88 -     */
    1.89 -    private char[] buf;
    1.90 -    private int bp;
    1.91 -    private int buflen;
    1.92 -    private int eofPos;
    1.93 -
    1.94 -    /** The current character.
    1.95 -     */
    1.96 -    private char ch;
    1.97 -
    1.98 -    /** The buffer index of the last converted unicode character
    1.99 -     */
   1.100 -    private int unicodeConversionBp = -1;
   1.101 -
   1.102 -    /** The log to be used for error reporting.
   1.103 -     */
   1.104 -    private final Log log;
   1.105 -
   1.106 -    /** The name table. */
   1.107 -    private final Names names;
   1.108 -
   1.109 -    /** The keyword table. */
   1.110 -    private final Keywords keywords;
   1.111 -
   1.112 -    /** Common code for constructors. */
   1.113 -    private Scanner(ScannerFactory fac) {
   1.114 -        log = fac.log;
   1.115 -        names = fac.names;
   1.116 -        keywords = fac.keywords;
   1.117 -        source = fac.source;
   1.118 -        allowBinaryLiterals = source.allowBinaryLiterals();
   1.119 -        allowHexFloats = source.allowHexFloats();
   1.120 -        allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();
   1.121 -    }
   1.122 -
   1.123 -    private static final boolean hexFloatsWork = hexFloatsWork();
   1.124 -    private static boolean hexFloatsWork() {
   1.125 -        try {
   1.126 -            Float.valueOf("0x1.0p1");
   1.127 -            return true;
   1.128 -        } catch (NumberFormatException ex) {
   1.129 -            return false;
   1.130 -        }
   1.131 -    }
   1.132 -
   1.133 -    /** Create a scanner from the input buffer.  buffer must implement
   1.134 -     *  array() and compact(), and remaining() must be less than limit().
   1.135 -     */
   1.136 -    protected Scanner(ScannerFactory fac, CharBuffer buffer) {
   1.137 -        this(fac, JavacFileManager.toArray(buffer), buffer.limit());
   1.138 -    }
   1.139 -
   1.140 +    private JavaTokenizer tokenizer;
   1.141      /**
   1.142       * Create a scanner from the input array.  This method might
   1.143       * modify the array.  To avoid copying the input array, ensure
   1.144 @@ -169,972 +65,49 @@
   1.145       * @param inputLength the size of the input.
   1.146       * Must be positive and less than or equal to input.length.
   1.147       */
   1.148 -    protected Scanner(ScannerFactory fac, char[] input, int inputLength) {
   1.149 -        this(fac);
   1.150 -        eofPos = inputLength;
   1.151 -        if (inputLength == input.length) {
   1.152 -            if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
   1.153 -                inputLength--;
   1.154 -            } else {
   1.155 -                char[] newInput = new char[inputLength + 1];
   1.156 -                System.arraycopy(input, 0, newInput, 0, input.length);
   1.157 -                input = newInput;
   1.158 -            }
   1.159 -        }
   1.160 -        buf = input;
   1.161 -        buflen = inputLength;
   1.162 -        buf[buflen] = EOI;
   1.163 -        bp = -1;
   1.164 -        scanChar();
   1.165 +    protected Scanner(ScannerFactory fac, CharBuffer buf) {
   1.166 +        this(fac, new JavaTokenizer(fac, buf));
   1.167      }
   1.168  
   1.169 -    /** Report an error at the given position using the provided arguments.
   1.170 -     */
   1.171 -    private void lexError(int pos, String key, Object... args) {
   1.172 -        log.error(pos, key, args);
   1.173 -        token = ERROR;
   1.174 -        errPos = pos;
   1.175 +    protected Scanner(ScannerFactory fac, char[] buf, int inputLength) {
   1.176 +        this(fac, new JavaTokenizer(fac, buf, inputLength));
   1.177      }
   1.178  
   1.179 -    /** Report an error at the current token position using the provided
   1.180 -     *  arguments.
   1.181 -     */
   1.182 -    private void lexError(String key, Object... args) {
   1.183 -        lexError(pos, key, args);
   1.184 +    protected Scanner(ScannerFactory fac, JavaTokenizer tokenizer) {
   1.185 +        this.tokenizer = tokenizer;
   1.186 +        tokens = fac.tokens;
   1.187 +        token = prevToken = DUMMY;
   1.188      }
   1.189  
   1.190 -    /** Convert an ASCII digit from its base (8, 10, or 16)
   1.191 -     *  to its value.
   1.192 -     */
   1.193 -    private int digit(int base) {
   1.194 -        char c = ch;
   1.195 -        int result = Character.digit(c, base);
   1.196 -        if (result >= 0 && c > 0x7f) {
   1.197 -            lexError(pos+1, "illegal.nonascii.digit");
   1.198 -            ch = "0123456789abcdef".charAt(result);
   1.199 -        }
   1.200 -        return result;
   1.201 -    }
   1.202 -
   1.203 -    /** Convert unicode escape; bp points to initial '\' character
   1.204 -     *  (Spec 3.3).
   1.205 -     */
   1.206 -    private void convertUnicode() {
   1.207 -        if (ch == '\\' && unicodeConversionBp != bp) {
   1.208 -            bp++; ch = buf[bp];
   1.209 -            if (ch == 'u') {
   1.210 -                do {
   1.211 -                    bp++; ch = buf[bp];
   1.212 -                } while (ch == 'u');
   1.213 -                int limit = bp + 3;
   1.214 -                if (limit < buflen) {
   1.215 -                    int d = digit(16);
   1.216 -                    int code = d;
   1.217 -                    while (bp < limit && d >= 0) {
   1.218 -                        bp++; ch = buf[bp];
   1.219 -                        d = digit(16);
   1.220 -                        code = (code << 4) + d;
   1.221 -                    }
   1.222 -                    if (d >= 0) {
   1.223 -                        ch = (char)code;
   1.224 -                        unicodeConversionBp = bp;
   1.225 -                        return;
   1.226 -                    }
   1.227 -                }
   1.228 -                lexError(bp, "illegal.unicode.esc");
   1.229 -            } else {
   1.230 -                bp--;
   1.231 -                ch = '\\';
   1.232 -            }
   1.233 -        }
   1.234 -    }
   1.235 -
   1.236 -    /** Read next character.
   1.237 -     */
   1.238 -    private void scanChar() {
   1.239 -        ch = buf[++bp];
   1.240 -        if (ch == '\\') {
   1.241 -            convertUnicode();
   1.242 -        }
   1.243 -    }
   1.244 -
   1.245 -    /** Read next character in comment, skipping over double '\' characters.
   1.246 -     */
   1.247 -    private void scanCommentChar() {
   1.248 -        scanChar();
   1.249 -        if (ch == '\\') {
   1.250 -            if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
   1.251 -                bp++;
   1.252 -            } else {
   1.253 -                convertUnicode();
   1.254 -            }
   1.255 -        }
   1.256 -    }
   1.257 -
   1.258 -    /** Append a character to sbuf.
   1.259 -     */
   1.260 -    private void putChar(char ch) {
   1.261 -        if (sp == sbuf.length) {
   1.262 -            char[] newsbuf = new char[sbuf.length * 2];
   1.263 -            System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
   1.264 -            sbuf = newsbuf;
   1.265 -        }
   1.266 -        sbuf[sp++] = ch;
   1.267 -    }
   1.268 -
   1.269 -    /** Read next character in character or string literal and copy into sbuf.
   1.270 -     */
   1.271 -    private void scanLitChar() {
   1.272 -        if (ch == '\\') {
   1.273 -            if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
   1.274 -                bp++;
   1.275 -                putChar('\\');
   1.276 -                scanChar();
   1.277 -            } else {
   1.278 -                scanChar();
   1.279 -                switch (ch) {
   1.280 -                case '0': case '1': case '2': case '3':
   1.281 -                case '4': case '5': case '6': case '7':
   1.282 -                    char leadch = ch;
   1.283 -                    int oct = digit(8);
   1.284 -                    scanChar();
   1.285 -                    if ('0' <= ch && ch <= '7') {
   1.286 -                        oct = oct * 8 + digit(8);
   1.287 -                        scanChar();
   1.288 -                        if (leadch <= '3' && '0' <= ch && ch <= '7') {
   1.289 -                            oct = oct * 8 + digit(8);
   1.290 -                            scanChar();
   1.291 -                        }
   1.292 -                    }
   1.293 -                    putChar((char)oct);
   1.294 -                    break;
   1.295 -                case 'b':
   1.296 -                    putChar('\b'); scanChar(); break;
   1.297 -                case 't':
   1.298 -                    putChar('\t'); scanChar(); break;
   1.299 -                case 'n':
   1.300 -                    putChar('\n'); scanChar(); break;
   1.301 -                case 'f':
   1.302 -                    putChar('\f'); scanChar(); break;
   1.303 -                case 'r':
   1.304 -                    putChar('\r'); scanChar(); break;
   1.305 -                case '\'':
   1.306 -                    putChar('\''); scanChar(); break;
   1.307 -                case '\"':
   1.308 -                    putChar('\"'); scanChar(); break;
   1.309 -                case '\\':
   1.310 -                    putChar('\\'); scanChar(); break;
   1.311 -                default:
   1.312 -                    lexError(bp, "illegal.esc.char");
   1.313 -                }
   1.314 -            }
   1.315 -        } else if (bp != buflen) {
   1.316 -            putChar(ch); scanChar();
   1.317 -        }
   1.318 -    }
   1.319 -
   1.320 -    private void scanDigits(int digitRadix) {
   1.321 -        char saveCh;
   1.322 -        int savePos;
   1.323 -        do {
   1.324 -            if (ch != '_') {
   1.325 -                putChar(ch);
   1.326 -            } else {
   1.327 -                if (!allowUnderscoresInLiterals) {
   1.328 -                    lexError("unsupported.underscore.lit", source.name);
   1.329 -                    allowUnderscoresInLiterals = true;
   1.330 -                }
   1.331 -            }
   1.332 -            saveCh = ch;
   1.333 -            savePos = bp;
   1.334 -            scanChar();
   1.335 -        } while (digit(digitRadix) >= 0 || ch == '_');
   1.336 -        if (saveCh == '_')
   1.337 -            lexError(savePos, "illegal.underscore");
   1.338 -    }
   1.339 -
   1.340 -    /** Read fractional part of hexadecimal floating point number.
   1.341 -     */
   1.342 -    private void scanHexExponentAndSuffix() {
   1.343 -        if (ch == 'p' || ch == 'P') {
   1.344 -            putChar(ch);
   1.345 -            scanChar();
   1.346 -            skipIllegalUnderscores();
   1.347 -            if (ch == '+' || ch == '-') {
   1.348 -                putChar(ch);
   1.349 -                scanChar();
   1.350 -            }
   1.351 -            skipIllegalUnderscores();
   1.352 -            if ('0' <= ch && ch <= '9') {
   1.353 -                scanDigits(10);
   1.354 -                if (!allowHexFloats) {
   1.355 -                    lexError("unsupported.fp.lit", source.name);
   1.356 -                    allowHexFloats = true;
   1.357 -                }
   1.358 -                else if (!hexFloatsWork)
   1.359 -                    lexError("unsupported.cross.fp.lit");
   1.360 -            } else
   1.361 -                lexError("malformed.fp.lit");
   1.362 -        } else {
   1.363 -            lexError("malformed.fp.lit");
   1.364 -        }
   1.365 -        if (ch == 'f' || ch == 'F') {
   1.366 -            putChar(ch);
   1.367 -            scanChar();
   1.368 -            token = FLOATLITERAL;
   1.369 -        } else {
   1.370 -            if (ch == 'd' || ch == 'D') {
   1.371 -                putChar(ch);
   1.372 -                scanChar();
   1.373 -            }
   1.374 -            token = DOUBLELITERAL;
   1.375 -        }
   1.376 -    }
   1.377 -
   1.378 -    /** Read fractional part of floating point number.
   1.379 -     */
   1.380 -    private void scanFraction() {
   1.381 -        skipIllegalUnderscores();
   1.382 -        if ('0' <= ch && ch <= '9') {
   1.383 -            scanDigits(10);
   1.384 -        }
   1.385 -        int sp1 = sp;
   1.386 -        if (ch == 'e' || ch == 'E') {
   1.387 -            putChar(ch);
   1.388 -            scanChar();
   1.389 -            skipIllegalUnderscores();
   1.390 -            if (ch == '+' || ch == '-') {
   1.391 -                putChar(ch);
   1.392 -                scanChar();
   1.393 -            }
   1.394 -            skipIllegalUnderscores();
   1.395 -            if ('0' <= ch && ch <= '9') {
   1.396 -                scanDigits(10);
   1.397 -                return;
   1.398 -            }
   1.399 -            lexError("malformed.fp.lit");
   1.400 -            sp = sp1;
   1.401 -        }
   1.402 -    }
   1.403 -
   1.404 -    /** Read fractional part and 'd' or 'f' suffix of floating point number.
   1.405 -     */
   1.406 -    private void scanFractionAndSuffix() {
   1.407 -        this.radix = 10;
   1.408 -        scanFraction();
   1.409 -        if (ch == 'f' || ch == 'F') {
   1.410 -            putChar(ch);
   1.411 -            scanChar();
   1.412 -            token = FLOATLITERAL;
   1.413 -        } else {
   1.414 -            if (ch == 'd' || ch == 'D') {
   1.415 -                putChar(ch);
   1.416 -                scanChar();
   1.417 -            }
   1.418 -            token = DOUBLELITERAL;
   1.419 -        }
   1.420 -    }
   1.421 -
   1.422 -    /** Read fractional part and 'd' or 'f' suffix of floating point number.
   1.423 -     */
   1.424 -    private void scanHexFractionAndSuffix(boolean seendigit) {
   1.425 -        this.radix = 16;
   1.426 -        Assert.check(ch == '.');
   1.427 -        putChar(ch);
   1.428 -        scanChar();
   1.429 -        skipIllegalUnderscores();
   1.430 -        if (digit(16) >= 0) {
   1.431 -            seendigit = true;
   1.432 -            scanDigits(16);
   1.433 -        }
   1.434 -        if (!seendigit)
   1.435 -            lexError("invalid.hex.number");
   1.436 -        else
   1.437 -            scanHexExponentAndSuffix();
   1.438 -    }
   1.439 -
   1.440 -    private void skipIllegalUnderscores() {
   1.441 -        if (ch == '_') {
   1.442 -            lexError(bp, "illegal.underscore");
   1.443 -            while (ch == '_')
   1.444 -                scanChar();
   1.445 -        }
   1.446 -    }
   1.447 -
   1.448 -    /** Read a number.
   1.449 -     *  @param radix  The radix of the number; one of 2, j8, 10, 16.
   1.450 -     */
   1.451 -    private void scanNumber(int radix) {
   1.452 -        this.radix = radix;
   1.453 -        // for octal, allow base-10 digit in case it's a float literal
   1.454 -        int digitRadix = (radix == 8 ? 10 : radix);
   1.455 -        boolean seendigit = false;
   1.456 -        if (digit(digitRadix) >= 0) {
   1.457 -            seendigit = true;
   1.458 -            scanDigits(digitRadix);
   1.459 -        }
   1.460 -        if (radix == 16 && ch == '.') {
   1.461 -            scanHexFractionAndSuffix(seendigit);
   1.462 -        } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
   1.463 -            scanHexExponentAndSuffix();
   1.464 -        } else if (digitRadix == 10 && ch == '.') {
   1.465 -            putChar(ch);
   1.466 -            scanChar();
   1.467 -            scanFractionAndSuffix();
   1.468 -        } else if (digitRadix == 10 &&
   1.469 -                   (ch == 'e' || ch == 'E' ||
   1.470 -                    ch == 'f' || ch == 'F' ||
   1.471 -                    ch == 'd' || ch == 'D')) {
   1.472 -            scanFractionAndSuffix();
   1.473 -        } else {
   1.474 -            if (ch == 'l' || ch == 'L') {
   1.475 -                scanChar();
   1.476 -                token = LONGLITERAL;
   1.477 -            } else {
   1.478 -                token = INTLITERAL;
   1.479 -            }
   1.480 -        }
   1.481 -    }
   1.482 -
   1.483 -    /** Read an identifier.
   1.484 -     */
   1.485 -    private void scanIdent() {
   1.486 -        boolean isJavaIdentifierPart;
   1.487 -        char high;
   1.488 -        do {
   1.489 -            if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
   1.490 -            // optimization, was: putChar(ch);
   1.491 -
   1.492 -            scanChar();
   1.493 -            switch (ch) {
   1.494 -            case 'A': case 'B': case 'C': case 'D': case 'E':
   1.495 -            case 'F': case 'G': case 'H': case 'I': case 'J':
   1.496 -            case 'K': case 'L': case 'M': case 'N': case 'O':
   1.497 -            case 'P': case 'Q': case 'R': case 'S': case 'T':
   1.498 -            case 'U': case 'V': case 'W': case 'X': case 'Y':
   1.499 -            case 'Z':
   1.500 -            case 'a': case 'b': case 'c': case 'd': case 'e':
   1.501 -            case 'f': case 'g': case 'h': case 'i': case 'j':
   1.502 -            case 'k': case 'l': case 'm': case 'n': case 'o':
   1.503 -            case 'p': case 'q': case 'r': case 's': case 't':
   1.504 -            case 'u': case 'v': case 'w': case 'x': case 'y':
   1.505 -            case 'z':
   1.506 -            case '$': case '_':
   1.507 -            case '0': case '1': case '2': case '3': case '4':
   1.508 -            case '5': case '6': case '7': case '8': case '9':
   1.509 -            case '\u0000': case '\u0001': case '\u0002': case '\u0003':
   1.510 -            case '\u0004': case '\u0005': case '\u0006': case '\u0007':
   1.511 -            case '\u0008': case '\u000E': case '\u000F': case '\u0010':
   1.512 -            case '\u0011': case '\u0012': case '\u0013': case '\u0014':
   1.513 -            case '\u0015': case '\u0016': case '\u0017':
   1.514 -            case '\u0018': case '\u0019': case '\u001B':
   1.515 -            case '\u007F':
   1.516 -                break;
   1.517 -            case '\u001A': // EOI is also a legal identifier part
   1.518 -                if (bp >= buflen) {
   1.519 -                    name = names.fromChars(sbuf, 0, sp);
   1.520 -                    token = keywords.key(name);
   1.521 -                    return;
   1.522 -                }
   1.523 -                break;
   1.524 -            default:
   1.525 -                if (ch < '\u0080') {
   1.526 -                    // all ASCII range chars already handled, above
   1.527 -                    isJavaIdentifierPart = false;
   1.528 -                } else {
   1.529 -                    high = scanSurrogates();
   1.530 -                    if (high != 0) {
   1.531 -                        if (sp == sbuf.length) {
   1.532 -                            putChar(high);
   1.533 -                        } else {
   1.534 -                            sbuf[sp++] = high;
   1.535 -                        }
   1.536 -                        isJavaIdentifierPart = Character.isJavaIdentifierPart(
   1.537 -                            Character.toCodePoint(high, ch));
   1.538 -                    } else {
   1.539 -                        isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
   1.540 -                    }
   1.541 -                }
   1.542 -                if (!isJavaIdentifierPart) {
   1.543 -                    name = names.fromChars(sbuf, 0, sp);
   1.544 -                    token = keywords.key(name);
   1.545 -                    return;
   1.546 -                }
   1.547 -            }
   1.548 -        } while (true);
   1.549 -    }
   1.550 -
   1.551 -    /** Are surrogates supported?
   1.552 -     */
   1.553 -    final static boolean surrogatesSupported = surrogatesSupported();
   1.554 -    private static boolean surrogatesSupported() {
   1.555 -        try {
   1.556 -            Character.isHighSurrogate('a');
   1.557 -            return true;
   1.558 -        } catch (NoSuchMethodError ex) {
   1.559 -            return false;
   1.560 -        }
   1.561 -    }
   1.562 -
   1.563 -    /** Scan surrogate pairs.  If 'ch' is a high surrogate and
   1.564 -     *  the next character is a low surrogate, then put the low
   1.565 -     *  surrogate in 'ch', and return the high surrogate.
   1.566 -     *  otherwise, just return 0.
   1.567 -     */
   1.568 -    private char scanSurrogates() {
   1.569 -        if (surrogatesSupported && Character.isHighSurrogate(ch)) {
   1.570 -            char high = ch;
   1.571 -
   1.572 -            scanChar();
   1.573 -
   1.574 -            if (Character.isLowSurrogate(ch)) {
   1.575 -                return high;
   1.576 -            }
   1.577 -
   1.578 -            ch = high;
   1.579 -        }
   1.580 -
   1.581 -        return 0;
   1.582 -    }
   1.583 -
   1.584 -    /** Return true if ch can be part of an operator.
   1.585 -     */
   1.586 -    private boolean isSpecial(char ch) {
   1.587 -        switch (ch) {
   1.588 -        case '!': case '%': case '&': case '*': case '?':
   1.589 -        case '+': case '-': case ':': case '<': case '=':
   1.590 -        case '>': case '^': case '|': case '~':
   1.591 -        case '@':
   1.592 -            return true;
   1.593 -        default:
   1.594 -            return false;
   1.595 -        }
   1.596 -    }
   1.597 -
   1.598 -    /** Read longest possible sequence of special characters and convert
   1.599 -     *  to token.
   1.600 -     */
   1.601 -    private void scanOperator() {
   1.602 -        while (true) {
   1.603 -            putChar(ch);
   1.604 -            Name newname = names.fromChars(sbuf, 0, sp);
   1.605 -            if (keywords.key(newname) == IDENTIFIER) {
   1.606 -                sp--;
   1.607 -                break;
   1.608 -            }
   1.609 -            name = newname;
   1.610 -            token = keywords.key(newname);
   1.611 -            scanChar();
   1.612 -            if (!isSpecial(ch)) break;
   1.613 -        }
   1.614 -    }
   1.615 -
   1.616 -    /**
   1.617 -     * Scan a documention comment; determine if a deprecated tag is present.
   1.618 -     * Called once the initial /, * have been skipped, positioned at the second *
   1.619 -     * (which is treated as the beginning of the first line).
   1.620 -     * Stops positioned at the closing '/'.
   1.621 -     */
   1.622 -    @SuppressWarnings("fallthrough")
   1.623 -    private void scanDocComment() {
   1.624 -        boolean deprecatedPrefix = false;
   1.625 -
   1.626 -        forEachLine:
   1.627 -        while (bp < buflen) {
   1.628 -
   1.629 -            // Skip optional WhiteSpace at beginning of line
   1.630 -            while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
   1.631 -                scanCommentChar();
   1.632 -            }
   1.633 -
   1.634 -            // Skip optional consecutive Stars
   1.635 -            while (bp < buflen && ch == '*') {
   1.636 -                scanCommentChar();
   1.637 -                if (ch == '/') {
   1.638 -                    return;
   1.639 -                }
   1.640 -            }
   1.641 -
   1.642 -            // Skip optional WhiteSpace after Stars
   1.643 -            while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
   1.644 -                scanCommentChar();
   1.645 -            }
   1.646 -
   1.647 -            deprecatedPrefix = false;
   1.648 -            // At beginning of line in the JavaDoc sense.
   1.649 -            if (bp < buflen && ch == '@' && !deprecatedFlag) {
   1.650 -                scanCommentChar();
   1.651 -                if (bp < buflen && ch == 'd') {
   1.652 -                    scanCommentChar();
   1.653 -                    if (bp < buflen && ch == 'e') {
   1.654 -                        scanCommentChar();
   1.655 -                        if (bp < buflen && ch == 'p') {
   1.656 -                            scanCommentChar();
   1.657 -                            if (bp < buflen && ch == 'r') {
   1.658 -                                scanCommentChar();
   1.659 -                                if (bp < buflen && ch == 'e') {
   1.660 -                                    scanCommentChar();
   1.661 -                                    if (bp < buflen && ch == 'c') {
   1.662 -                                        scanCommentChar();
   1.663 -                                        if (bp < buflen && ch == 'a') {
   1.664 -                                            scanCommentChar();
   1.665 -                                            if (bp < buflen && ch == 't') {
   1.666 -                                                scanCommentChar();
   1.667 -                                                if (bp < buflen && ch == 'e') {
   1.668 -                                                    scanCommentChar();
   1.669 -                                                    if (bp < buflen && ch == 'd') {
   1.670 -                                                        deprecatedPrefix = true;
   1.671 -                                                        scanCommentChar();
   1.672 -                                                    }}}}}}}}}}}
   1.673 -            if (deprecatedPrefix && bp < buflen) {
   1.674 -                if (Character.isWhitespace(ch)) {
   1.675 -                    deprecatedFlag = true;
   1.676 -                } else if (ch == '*') {
   1.677 -                    scanCommentChar();
   1.678 -                    if (ch == '/') {
   1.679 -                        deprecatedFlag = true;
   1.680 -                        return;
   1.681 -                    }
   1.682 -                }
   1.683 -            }
   1.684 -
   1.685 -            // Skip rest of line
   1.686 -            while (bp < buflen) {
   1.687 -                switch (ch) {
   1.688 -                case '*':
   1.689 -                    scanCommentChar();
   1.690 -                    if (ch == '/') {
   1.691 -                        return;
   1.692 -                    }
   1.693 -                    break;
   1.694 -                case CR: // (Spec 3.4)
   1.695 -                    scanCommentChar();
   1.696 -                    if (ch != LF) {
   1.697 -                        continue forEachLine;
   1.698 -                    }
   1.699 -                    /* fall through to LF case */
   1.700 -                case LF: // (Spec 3.4)
   1.701 -                    scanCommentChar();
   1.702 -                    continue forEachLine;
   1.703 -                default:
   1.704 -                    scanCommentChar();
   1.705 -                }
   1.706 -            } // rest of line
   1.707 -        } // forEachLine
   1.708 -        return;
   1.709 -    }
   1.710 -
   1.711 -    /** The value of a literal token, recorded as a string.
   1.712 -     *  For integers, leading 0x and 'l' suffixes are suppressed.
   1.713 -     */
   1.714 -    public String stringVal() {
   1.715 -        return new String(sbuf, 0, sp);
   1.716 -    }
   1.717 -
   1.718 -    /** Read token.
   1.719 -     */
   1.720 -    public void nextToken() {
   1.721 -
   1.722 -        try {
   1.723 -            prevEndPos = endPos;
   1.724 -            sp = 0;
   1.725 -
   1.726 -            while (true) {
   1.727 -                pos = bp;
   1.728 -                switch (ch) {
   1.729 -                case ' ': // (Spec 3.6)
   1.730 -                case '\t': // (Spec 3.6)
   1.731 -                case FF: // (Spec 3.6)
   1.732 -                    do {
   1.733 -                        scanChar();
   1.734 -                    } while (ch == ' ' || ch == '\t' || ch == FF);
   1.735 -                    endPos = bp;
   1.736 -                    processWhiteSpace();
   1.737 -                    break;
   1.738 -                case LF: // (Spec 3.4)
   1.739 -                    scanChar();
   1.740 -                    endPos = bp;
   1.741 -                    processLineTerminator();
   1.742 -                    break;
   1.743 -                case CR: // (Spec 3.4)
   1.744 -                    scanChar();
   1.745 -                    if (ch == LF) {
   1.746 -                        scanChar();
   1.747 -                    }
   1.748 -                    endPos = bp;
   1.749 -                    processLineTerminator();
   1.750 -                    break;
   1.751 -                case 'A': case 'B': case 'C': case 'D': case 'E':
   1.752 -                case 'F': case 'G': case 'H': case 'I': case 'J':
   1.753 -                case 'K': case 'L': case 'M': case 'N': case 'O':
   1.754 -                case 'P': case 'Q': case 'R': case 'S': case 'T':
   1.755 -                case 'U': case 'V': case 'W': case 'X': case 'Y':
   1.756 -                case 'Z':
   1.757 -                case 'a': case 'b': case 'c': case 'd': case 'e':
   1.758 -                case 'f': case 'g': case 'h': case 'i': case 'j':
   1.759 -                case 'k': case 'l': case 'm': case 'n': case 'o':
   1.760 -                case 'p': case 'q': case 'r': case 's': case 't':
   1.761 -                case 'u': case 'v': case 'w': case 'x': case 'y':
   1.762 -                case 'z':
   1.763 -                case '$': case '_':
   1.764 -                    scanIdent();
   1.765 -                    return;
   1.766 -                case '0':
   1.767 -                    scanChar();
   1.768 -                    if (ch == 'x' || ch == 'X') {
   1.769 -                        scanChar();
   1.770 -                        skipIllegalUnderscores();
   1.771 -                        if (ch == '.') {
   1.772 -                            scanHexFractionAndSuffix(false);
   1.773 -                        } else if (digit(16) < 0) {
   1.774 -                            lexError("invalid.hex.number");
   1.775 -                        } else {
   1.776 -                            scanNumber(16);
   1.777 -                        }
   1.778 -                    } else if (ch == 'b' || ch == 'B') {
   1.779 -                        if (!allowBinaryLiterals) {
   1.780 -                            lexError("unsupported.binary.lit", source.name);
   1.781 -                            allowBinaryLiterals = true;
   1.782 -                        }
   1.783 -                        scanChar();
   1.784 -                        skipIllegalUnderscores();
   1.785 -                        if (digit(2) < 0) {
   1.786 -                            lexError("invalid.binary.number");
   1.787 -                        } else {
   1.788 -                            scanNumber(2);
   1.789 -                        }
   1.790 -                    } else {
   1.791 -                        putChar('0');
   1.792 -                        if (ch == '_') {
   1.793 -                            int savePos = bp;
   1.794 -                            do {
   1.795 -                                scanChar();
   1.796 -                            } while (ch == '_');
   1.797 -                            if (digit(10) < 0) {
   1.798 -                                lexError(savePos, "illegal.underscore");
   1.799 -                            }
   1.800 -                        }
   1.801 -                        scanNumber(8);
   1.802 -                    }
   1.803 -                    return;
   1.804 -                case '1': case '2': case '3': case '4':
   1.805 -                case '5': case '6': case '7': case '8': case '9':
   1.806 -                    scanNumber(10);
   1.807 -                    return;
   1.808 -                case '.':
   1.809 -                    scanChar();
   1.810 -                    if ('0' <= ch && ch <= '9') {
   1.811 -                        putChar('.');
   1.812 -                        scanFractionAndSuffix();
   1.813 -                    } else if (ch == '.') {
   1.814 -                        putChar('.'); putChar('.');
   1.815 -                        scanChar();
   1.816 -                        if (ch == '.') {
   1.817 -                            scanChar();
   1.818 -                            putChar('.');
   1.819 -                            token = ELLIPSIS;
   1.820 -                        } else {
   1.821 -                            lexError("malformed.fp.lit");
   1.822 -                        }
   1.823 -                    } else {
   1.824 -                        token = DOT;
   1.825 -                    }
   1.826 -                    return;
   1.827 -                case ',':
   1.828 -                    scanChar(); token = COMMA; return;
   1.829 -                case ';':
   1.830 -                    scanChar(); token = SEMI; return;
   1.831 -                case '(':
   1.832 -                    scanChar(); token = LPAREN; return;
   1.833 -                case ')':
   1.834 -                    scanChar(); token = RPAREN; return;
   1.835 -                case '[':
   1.836 -                    scanChar(); token = LBRACKET; return;
   1.837 -                case ']':
   1.838 -                    scanChar(); token = RBRACKET; return;
   1.839 -                case '{':
   1.840 -                    scanChar(); token = LBRACE; return;
   1.841 -                case '}':
   1.842 -                    scanChar(); token = RBRACE; return;
   1.843 -                case '/':
   1.844 -                    scanChar();
   1.845 -                    if (ch == '/') {
   1.846 -                        do {
   1.847 -                            scanCommentChar();
   1.848 -                        } while (ch != CR && ch != LF && bp < buflen);
   1.849 -                        if (bp < buflen) {
   1.850 -                            endPos = bp;
   1.851 -                            processComment(CommentStyle.LINE);
   1.852 -                        }
   1.853 -                        break;
   1.854 -                    } else if (ch == '*') {
   1.855 -                        scanChar();
   1.856 -                        CommentStyle style;
   1.857 -                        if (ch == '*') {
   1.858 -                            style = CommentStyle.JAVADOC;
   1.859 -                            scanDocComment();
   1.860 -                        } else {
   1.861 -                            style = CommentStyle.BLOCK;
   1.862 -                            while (bp < buflen) {
   1.863 -                                if (ch == '*') {
   1.864 -                                    scanChar();
   1.865 -                                    if (ch == '/') break;
   1.866 -                                } else {
   1.867 -                                    scanCommentChar();
   1.868 -                                }
   1.869 -                            }
   1.870 -                        }
   1.871 -                        if (ch == '/') {
   1.872 -                            scanChar();
   1.873 -                            endPos = bp;
   1.874 -                            processComment(style);
   1.875 -                            break;
   1.876 -                        } else {
   1.877 -                            lexError("unclosed.comment");
   1.878 -                            return;
   1.879 -                        }
   1.880 -                    } else if (ch == '=') {
   1.881 -                        name = names.slashequals;
   1.882 -                        token = SLASHEQ;
   1.883 -                        scanChar();
   1.884 -                    } else {
   1.885 -                        name = names.slash;
   1.886 -                        token = SLASH;
   1.887 -                    }
   1.888 -                    return;
   1.889 -                case '\'':
   1.890 -                    scanChar();
   1.891 -                    if (ch == '\'') {
   1.892 -                        lexError("empty.char.lit");
   1.893 -                    } else {
   1.894 -                        if (ch == CR || ch == LF)
   1.895 -                            lexError(pos, "illegal.line.end.in.char.lit");
   1.896 -                        scanLitChar();
   1.897 -                        if (ch == '\'') {
   1.898 -                            scanChar();
   1.899 -                            token = CHARLITERAL;
   1.900 -                        } else {
   1.901 -                            lexError(pos, "unclosed.char.lit");
   1.902 -                        }
   1.903 -                    }
   1.904 -                    return;
   1.905 -                case '\"':
   1.906 -                    scanChar();
   1.907 -                    while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
   1.908 -                        scanLitChar();
   1.909 -                    if (ch == '\"') {
   1.910 -                        token = STRINGLITERAL;
   1.911 -                        scanChar();
   1.912 -                    } else {
   1.913 -                        lexError(pos, "unclosed.str.lit");
   1.914 -                    }
   1.915 -                    return;
   1.916 -                default:
   1.917 -                    if (isSpecial(ch)) {
   1.918 -                        scanOperator();
   1.919 -                    } else {
   1.920 -                        boolean isJavaIdentifierStart;
   1.921 -                        if (ch < '\u0080') {
   1.922 -                            // all ASCII range chars already handled, above
   1.923 -                            isJavaIdentifierStart = false;
   1.924 -                        } else {
   1.925 -                            char high = scanSurrogates();
   1.926 -                            if (high != 0) {
   1.927 -                                if (sp == sbuf.length) {
   1.928 -                                    putChar(high);
   1.929 -                                } else {
   1.930 -                                    sbuf[sp++] = high;
   1.931 -                                }
   1.932 -
   1.933 -                                isJavaIdentifierStart = Character.isJavaIdentifierStart(
   1.934 -                                    Character.toCodePoint(high, ch));
   1.935 -                            } else {
   1.936 -                                isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
   1.937 -                            }
   1.938 -                        }
   1.939 -                        if (isJavaIdentifierStart) {
   1.940 -                            scanIdent();
   1.941 -                        } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
   1.942 -                            token = EOF;
   1.943 -                            pos = bp = eofPos;
   1.944 -                        } else {
   1.945 -                            lexError("illegal.char", String.valueOf((int)ch));
   1.946 -                            scanChar();
   1.947 -                        }
   1.948 -                    }
   1.949 -                    return;
   1.950 -                }
   1.951 -            }
   1.952 -        } finally {
   1.953 -            endPos = bp;
   1.954 -            if (scannerDebug)
   1.955 -                System.out.println("nextToken(" + pos
   1.956 -                                   + "," + endPos + ")=|" +
   1.957 -                                   new String(getRawCharacters(pos, endPos))
   1.958 -                                   + "|");
   1.959 -        }
   1.960 -    }
   1.961 -
   1.962 -    /** Return the current token, set by nextToken().
   1.963 -     */
   1.964      public Token token() {
   1.965          return token;
   1.966      }
   1.967  
   1.968 -    /** Sets the current token.
   1.969 -     * This method is primarily used to update the token stream when the
   1.970 -     * parser is handling the end of nested type arguments such as
   1.971 -     * {@code List<List<String>>} and needs to disambiguate between
   1.972 -     * repeated use of ">" and relation operators such as ">>" and ">>>". Noting
   1.973 -     * that this does not handle arbitrary tokens containing Unicode escape
   1.974 -     * sequences.
   1.975 -     */
   1.976 -    public void token(Token token) {
   1.977 -        pos += this.token.name.length() - token.name.length();
   1.978 -        prevEndPos = pos;
   1.979 -        this.token = token;
   1.980 +    public Token prevToken() {
   1.981 +        return prevToken;
   1.982      }
   1.983  
   1.984 -    /** Return the current token's position: a 0-based
   1.985 -     *  offset from beginning of the raw input stream
   1.986 -     *  (before unicode translation)
   1.987 -     */
   1.988 -    public int pos() {
   1.989 -        return pos;
   1.990 +    public void nextToken() {
   1.991 +        prevToken = token;
   1.992 +        token = tokenizer.readToken();
   1.993      }
   1.994  
   1.995 -    /** Return the last character position of the current token.
   1.996 -     */
   1.997 -    public int endPos() {
   1.998 -        return endPos;
   1.999 +    public Token split() {
  1.1000 +        Token[] splitTokens = token.split(tokens);
  1.1001 +        prevToken = splitTokens[0];
  1.1002 +        token = splitTokens[1];
  1.1003 +        return token;
  1.1004      }
  1.1005  
  1.1006 -    /** Return the last character position of the previous token.
  1.1007 -     */
  1.1008 -    public int prevEndPos() {
  1.1009 -        return prevEndPos;
  1.1010 +    public LineMap getLineMap() {
  1.1011 +        return tokenizer.getLineMap();
  1.1012      }
  1.1013  
  1.1014 -    /** Return the position where a lexical error occurred;
  1.1015 -     */
  1.1016      public int errPos() {
  1.1017 -        return errPos;
  1.1018 +        return tokenizer.errPos();
  1.1019      }
  1.1020  
  1.1021 -    /** Set the position where a lexical error occurred;
  1.1022 -     */
  1.1023      public void errPos(int pos) {
  1.1024 -        errPos = pos;
  1.1025 +        tokenizer.errPos(pos);
  1.1026      }
  1.1027 -
  1.1028 -    /** Return the name of an identifier or token for the current token.
  1.1029 -     */
  1.1030 -    public Name name() {
  1.1031 -        return name;
  1.1032 -    }
  1.1033 -
  1.1034 -    /** Return the radix of a numeric literal token.
  1.1035 -     */
  1.1036 -    public int radix() {
  1.1037 -        return radix;
  1.1038 -    }
  1.1039 -
  1.1040 -    /** Has a @deprecated been encountered in last doc comment?
  1.1041 -     *  This needs to be reset by client with resetDeprecatedFlag.
  1.1042 -     */
  1.1043 -    public boolean deprecatedFlag() {
  1.1044 -        return deprecatedFlag;
  1.1045 -    }
  1.1046 -
  1.1047 -    public void resetDeprecatedFlag() {
  1.1048 -        deprecatedFlag = false;
  1.1049 -    }
  1.1050 -
  1.1051 -    /**
  1.1052 -     * Returns the documentation string of the current token.
  1.1053 -     */
  1.1054 -    public String docComment() {
  1.1055 -        return null;
  1.1056 -    }
  1.1057 -
  1.1058 -    /**
  1.1059 -     * Returns a copy of the input buffer, up to its inputLength.
  1.1060 -     * Unicode escape sequences are not translated.
  1.1061 -     */
  1.1062 -    public char[] getRawCharacters() {
  1.1063 -        char[] chars = new char[buflen];
  1.1064 -        System.arraycopy(buf, 0, chars, 0, buflen);
  1.1065 -        return chars;
  1.1066 -    }
  1.1067 -
  1.1068 -    /**
  1.1069 -     * Returns a copy of a character array subset of the input buffer.
  1.1070 -     * The returned array begins at the <code>beginIndex</code> and
  1.1071 -     * extends to the character at index <code>endIndex - 1</code>.
  1.1072 -     * Thus the length of the substring is <code>endIndex-beginIndex</code>.
  1.1073 -     * This behavior is like
  1.1074 -     * <code>String.substring(beginIndex, endIndex)</code>.
  1.1075 -     * Unicode escape sequences are not translated.
  1.1076 -     *
  1.1077 -     * @param beginIndex the beginning index, inclusive.
  1.1078 -     * @param endIndex the ending index, exclusive.
  1.1079 -     * @throws IndexOutOfBounds if either offset is outside of the
  1.1080 -     *         array bounds
  1.1081 -     */
  1.1082 -    public char[] getRawCharacters(int beginIndex, int endIndex) {
  1.1083 -        int length = endIndex - beginIndex;
  1.1084 -        char[] chars = new char[length];
  1.1085 -        System.arraycopy(buf, beginIndex, chars, 0, length);
  1.1086 -        return chars;
  1.1087 -    }
  1.1088 -
  1.1089 -    public enum CommentStyle {
  1.1090 -        LINE,
  1.1091 -        BLOCK,
  1.1092 -        JAVADOC,
  1.1093 -    }
  1.1094 -
  1.1095 -    /**
  1.1096 -     * Called when a complete comment has been scanned. pos and endPos
  1.1097 -     * will mark the comment boundary.
  1.1098 -     */
  1.1099 -    protected void processComment(CommentStyle style) {
  1.1100 -        if (scannerDebug)
  1.1101 -            System.out.println("processComment(" + pos
  1.1102 -                               + "," + endPos + "," + style + ")=|"
  1.1103 -                               + new String(getRawCharacters(pos, endPos))
  1.1104 -                               + "|");
  1.1105 -    }
  1.1106 -
  1.1107 -    /**
  1.1108 -     * Called when a complete whitespace run has been scanned. pos and endPos
  1.1109 -     * will mark the whitespace boundary.
  1.1110 -     */
  1.1111 -    protected void processWhiteSpace() {
  1.1112 -        if (scannerDebug)
  1.1113 -            System.out.println("processWhitespace(" + pos
  1.1114 -                               + "," + endPos + ")=|" +
  1.1115 -                               new String(getRawCharacters(pos, endPos))
  1.1116 -                               + "|");
  1.1117 -    }
  1.1118 -
  1.1119 -    /**
  1.1120 -     * Called when a line terminator has been processed.
  1.1121 -     */
  1.1122 -    protected void processLineTerminator() {
  1.1123 -        if (scannerDebug)
  1.1124 -            System.out.println("processTerminator(" + pos
  1.1125 -                               + "," + endPos + ")=|" +
  1.1126 -                               new String(getRawCharacters(pos, endPos))
  1.1127 -                               + "|");
  1.1128 -    }
  1.1129 -
  1.1130 -    /** Build a map for translating between line numbers and
  1.1131 -     * positions in the input.
  1.1132 -     *
  1.1133 -     * @return a LineMap */
  1.1134 -    public Position.LineMap getLineMap() {
  1.1135 -        return Position.makeLineMap(buf, buflen, false);
  1.1136 -    }
  1.1137 -
  1.1138  }

mercurial