src/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java

changeset 0
959103a6100f
child 2525
2eb010b6cb22
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java	Wed Apr 27 01:34:52 2016 +0800
     1.3 @@ -0,0 +1,890 @@
     1.4 +/*
     1.5 + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.  Oracle designates this
    1.11 + * particular file as subject to the "Classpath" exception as provided
    1.12 + * by Oracle in the LICENSE file that accompanied this code.
    1.13 + *
    1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.17 + * version 2 for more details (a copy is included in the LICENSE file that
    1.18 + * accompanied this code).
    1.19 + *
    1.20 + * You should have received a copy of the GNU General Public License version
    1.21 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.23 + *
    1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.25 + * or visit www.oracle.com if you need additional information or have any
    1.26 + * questions.
    1.27 + */
    1.28 +
    1.29 +package com.sun.tools.javac.parser;
    1.30 +
    1.31 +import com.sun.tools.javac.code.Source;
    1.32 +import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
    1.33 +import com.sun.tools.javac.util.*;
    1.34 +
    1.35 +import java.nio.CharBuffer;
    1.36 +
    1.37 +import static com.sun.tools.javac.parser.Tokens.*;
    1.38 +import static com.sun.tools.javac.util.LayoutCharacters.*;
    1.39 +
    1.40 +/** The lexical analyzer maps an input stream consisting of
    1.41 + *  ASCII characters and Unicode escapes into a token sequence.
    1.42 + *
    1.43 + *  <p><b>This is NOT part of any supported API.
    1.44 + *  If you write code that depends on this, you do so at your own risk.
    1.45 + *  This code and its internal interfaces are subject to change or
    1.46 + *  deletion without notice.</b>
    1.47 + */
    1.48 +public class JavaTokenizer {
    1.49 +
    1.50 +    private static final boolean scannerDebug = false;
    1.51 +
    1.52 +    /** Allow hex floating-point literals.
    1.53 +     */
    1.54 +    private boolean allowHexFloats;
    1.55 +
    1.56 +    /** Allow binary literals.
    1.57 +     */
    1.58 +    private boolean allowBinaryLiterals;
    1.59 +
    1.60 +    /** Allow underscores in literals.
    1.61 +     */
    1.62 +    private boolean allowUnderscoresInLiterals;
    1.63 +
    1.64 +    /** The source language setting.
    1.65 +     */
    1.66 +    private Source source;
    1.67 +
    1.68 +    /** The log to be used for error reporting.
    1.69 +     */
    1.70 +    private final Log log;
    1.71 +
    1.72 +    /** The token factory. */
    1.73 +    private final Tokens tokens;
    1.74 +
    1.75 +    /** The token kind, set by nextToken().
    1.76 +     */
    1.77 +    protected TokenKind tk;
    1.78 +
    1.79 +    /** The token's radix, set by nextToken().
    1.80 +     */
    1.81 +    protected int radix;
    1.82 +
    1.83 +    /** The token's name, set by nextToken().
    1.84 +     */
    1.85 +    protected Name name;
    1.86 +
    1.87 +    /** The position where a lexical error occurred;
    1.88 +     */
    1.89 +    protected int errPos = Position.NOPOS;
    1.90 +
    1.91 +    /** The Unicode reader (low-level stream reader).
    1.92 +     */
    1.93 +    protected UnicodeReader reader;
    1.94 +
    1.95 +    protected ScannerFactory fac;
    1.96 +
    1.97 +    private static final boolean hexFloatsWork = hexFloatsWork();
    1.98 +    private static boolean hexFloatsWork() {
    1.99 +        try {
   1.100 +            Float.valueOf("0x1.0p1");
   1.101 +            return true;
   1.102 +        } catch (NumberFormatException ex) {
   1.103 +            return false;
   1.104 +        }
   1.105 +    }
   1.106 +
   1.107 +    /**
   1.108 +     * Create a scanner from the input array.  This method might
   1.109 +     * modify the array.  To avoid copying the input array, ensure
   1.110 +     * that {@code inputLength < input.length} or
   1.111 +     * {@code input[input.length -1]} is a white space character.
   1.112 +     *
   1.113 +     * @param fac the factory which created this Scanner
   1.114 +     * @param buf the input, might be modified
   1.115 +     * Must be positive and less than or equal to input.length.
   1.116 +     */
   1.117 +    protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
   1.118 +        this(fac, new UnicodeReader(fac, buf));
   1.119 +    }
   1.120 +
   1.121 +    protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
   1.122 +        this(fac, new UnicodeReader(fac, buf, inputLength));
   1.123 +    }
   1.124 +
   1.125 +    protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
   1.126 +        this.fac = fac;
   1.127 +        this.log = fac.log;
   1.128 +        this.tokens = fac.tokens;
   1.129 +        this.source = fac.source;
   1.130 +        this.reader = reader;
   1.131 +        this.allowBinaryLiterals = source.allowBinaryLiterals();
   1.132 +        this.allowHexFloats = source.allowHexFloats();
   1.133 +        this.allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();
   1.134 +    }
   1.135 +
   1.136 +    /** Report an error at the given position using the provided arguments.
   1.137 +     */
   1.138 +    protected void lexError(int pos, String key, Object... args) {
   1.139 +        log.error(pos, key, args);
   1.140 +        tk = TokenKind.ERROR;
   1.141 +        errPos = pos;
   1.142 +    }
   1.143 +
   1.144 +    /** Read next character in character or string literal and copy into sbuf.
   1.145 +     */
   1.146 +    private void scanLitChar(int pos) {
   1.147 +        if (reader.ch == '\\') {
   1.148 +            if (reader.peekChar() == '\\' && !reader.isUnicode()) {
   1.149 +                reader.skipChar();
   1.150 +                reader.putChar('\\', true);
   1.151 +            } else {
   1.152 +                reader.scanChar();
   1.153 +                switch (reader.ch) {
   1.154 +                case '0': case '1': case '2': case '3':
   1.155 +                case '4': case '5': case '6': case '7':
   1.156 +                    char leadch = reader.ch;
   1.157 +                    int oct = reader.digit(pos, 8);
   1.158 +                    reader.scanChar();
   1.159 +                    if ('0' <= reader.ch && reader.ch <= '7') {
   1.160 +                        oct = oct * 8 + reader.digit(pos, 8);
   1.161 +                        reader.scanChar();
   1.162 +                        if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
   1.163 +                            oct = oct * 8 + reader.digit(pos, 8);
   1.164 +                            reader.scanChar();
   1.165 +                        }
   1.166 +                    }
   1.167 +                    reader.putChar((char)oct);
   1.168 +                    break;
   1.169 +                case 'b':
   1.170 +                    reader.putChar('\b', true); break;
   1.171 +                case 't':
   1.172 +                    reader.putChar('\t', true); break;
   1.173 +                case 'n':
   1.174 +                    reader.putChar('\n', true); break;
   1.175 +                case 'f':
   1.176 +                    reader.putChar('\f', true); break;
   1.177 +                case 'r':
   1.178 +                    reader.putChar('\r', true); break;
   1.179 +                case '\'':
   1.180 +                    reader.putChar('\'', true); break;
   1.181 +                case '\"':
   1.182 +                    reader.putChar('\"', true); break;
   1.183 +                case '\\':
   1.184 +                    reader.putChar('\\', true); break;
   1.185 +                default:
   1.186 +                    lexError(reader.bp, "illegal.esc.char");
   1.187 +                }
   1.188 +            }
   1.189 +        } else if (reader.bp != reader.buflen) {
   1.190 +            reader.putChar(true);
   1.191 +        }
   1.192 +    }
   1.193 +
   1.194 +    private void scanDigits(int pos, int digitRadix) {
   1.195 +        char saveCh;
   1.196 +        int savePos;
   1.197 +        do {
   1.198 +            if (reader.ch != '_') {
   1.199 +                reader.putChar(false);
   1.200 +            } else {
   1.201 +                if (!allowUnderscoresInLiterals) {
   1.202 +                    lexError(pos, "unsupported.underscore.lit", source.name);
   1.203 +                    allowUnderscoresInLiterals = true;
   1.204 +                }
   1.205 +            }
   1.206 +            saveCh = reader.ch;
   1.207 +            savePos = reader.bp;
   1.208 +            reader.scanChar();
   1.209 +        } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
   1.210 +        if (saveCh == '_')
   1.211 +            lexError(savePos, "illegal.underscore");
   1.212 +    }
   1.213 +
   1.214 +    /** Read fractional part of hexadecimal floating point number.
   1.215 +     */
   1.216 +    private void scanHexExponentAndSuffix(int pos) {
   1.217 +        if (reader.ch == 'p' || reader.ch == 'P') {
   1.218 +            reader.putChar(true);
   1.219 +            skipIllegalUnderscores();
   1.220 +            if (reader.ch == '+' || reader.ch == '-') {
   1.221 +                reader.putChar(true);
   1.222 +            }
   1.223 +            skipIllegalUnderscores();
   1.224 +            if ('0' <= reader.ch && reader.ch <= '9') {
   1.225 +                scanDigits(pos, 10);
   1.226 +                if (!allowHexFloats) {
   1.227 +                    lexError(pos, "unsupported.fp.lit", source.name);
   1.228 +                    allowHexFloats = true;
   1.229 +                }
   1.230 +                else if (!hexFloatsWork)
   1.231 +                    lexError(pos, "unsupported.cross.fp.lit");
   1.232 +            } else
   1.233 +                lexError(pos, "malformed.fp.lit");
   1.234 +        } else {
   1.235 +            lexError(pos, "malformed.fp.lit");
   1.236 +        }
   1.237 +        if (reader.ch == 'f' || reader.ch == 'F') {
   1.238 +            reader.putChar(true);
   1.239 +            tk = TokenKind.FLOATLITERAL;
   1.240 +            radix = 16;
   1.241 +        } else {
   1.242 +            if (reader.ch == 'd' || reader.ch == 'D') {
   1.243 +                reader.putChar(true);
   1.244 +            }
   1.245 +            tk = TokenKind.DOUBLELITERAL;
   1.246 +            radix = 16;
   1.247 +        }
   1.248 +    }
   1.249 +
   1.250 +    /** Read fractional part of floating point number.
   1.251 +     */
   1.252 +    private void scanFraction(int pos) {
   1.253 +        skipIllegalUnderscores();
   1.254 +        if ('0' <= reader.ch && reader.ch <= '9') {
   1.255 +            scanDigits(pos, 10);
   1.256 +        }
   1.257 +        int sp1 = reader.sp;
   1.258 +        if (reader.ch == 'e' || reader.ch == 'E') {
   1.259 +            reader.putChar(true);
   1.260 +            skipIllegalUnderscores();
   1.261 +            if (reader.ch == '+' || reader.ch == '-') {
   1.262 +                reader.putChar(true);
   1.263 +            }
   1.264 +            skipIllegalUnderscores();
   1.265 +            if ('0' <= reader.ch && reader.ch <= '9') {
   1.266 +                scanDigits(pos, 10);
   1.267 +                return;
   1.268 +            }
   1.269 +            lexError(pos, "malformed.fp.lit");
   1.270 +            reader.sp = sp1;
   1.271 +        }
   1.272 +    }
   1.273 +
   1.274 +    /** Read fractional part and 'd' or 'f' suffix of floating point number.
   1.275 +     */
   1.276 +    private void scanFractionAndSuffix(int pos) {
   1.277 +        radix = 10;
   1.278 +        scanFraction(pos);
   1.279 +        if (reader.ch == 'f' || reader.ch == 'F') {
   1.280 +            reader.putChar(true);
   1.281 +            tk = TokenKind.FLOATLITERAL;
   1.282 +        } else {
   1.283 +            if (reader.ch == 'd' || reader.ch == 'D') {
   1.284 +                reader.putChar(true);
   1.285 +            }
   1.286 +            tk = TokenKind.DOUBLELITERAL;
   1.287 +        }
   1.288 +    }
   1.289 +
   1.290 +    /** Read fractional part and 'd' or 'f' suffix of floating point number.
   1.291 +     */
   1.292 +    private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
   1.293 +        radix = 16;
   1.294 +        Assert.check(reader.ch == '.');
   1.295 +        reader.putChar(true);
   1.296 +        skipIllegalUnderscores();
   1.297 +        if (reader.digit(pos, 16) >= 0) {
   1.298 +            seendigit = true;
   1.299 +            scanDigits(pos, 16);
   1.300 +        }
   1.301 +        if (!seendigit)
   1.302 +            lexError(pos, "invalid.hex.number");
   1.303 +        else
   1.304 +            scanHexExponentAndSuffix(pos);
   1.305 +    }
   1.306 +
   1.307 +    private void skipIllegalUnderscores() {
   1.308 +        if (reader.ch == '_') {
   1.309 +            lexError(reader.bp, "illegal.underscore");
   1.310 +            while (reader.ch == '_')
   1.311 +                reader.scanChar();
   1.312 +        }
   1.313 +    }
   1.314 +
   1.315 +    /** Read a number.
   1.316 +     *  @param radix  The radix of the number; one of 2, j8, 10, 16.
   1.317 +     */
   1.318 +    private void scanNumber(int pos, int radix) {
   1.319 +        // for octal, allow base-10 digit in case it's a float literal
   1.320 +        this.radix = radix;
   1.321 +        int digitRadix = (radix == 8 ? 10 : radix);
   1.322 +        boolean seendigit = false;
   1.323 +        if (reader.digit(pos, digitRadix) >= 0) {
   1.324 +            seendigit = true;
   1.325 +            scanDigits(pos, digitRadix);
   1.326 +        }
   1.327 +        if (radix == 16 && reader.ch == '.') {
   1.328 +            scanHexFractionAndSuffix(pos, seendigit);
   1.329 +        } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
   1.330 +            scanHexExponentAndSuffix(pos);
   1.331 +        } else if (digitRadix == 10 && reader.ch == '.') {
   1.332 +            reader.putChar(true);
   1.333 +            scanFractionAndSuffix(pos);
   1.334 +        } else if (digitRadix == 10 &&
   1.335 +                   (reader.ch == 'e' || reader.ch == 'E' ||
   1.336 +                    reader.ch == 'f' || reader.ch == 'F' ||
   1.337 +                    reader.ch == 'd' || reader.ch == 'D')) {
   1.338 +            scanFractionAndSuffix(pos);
   1.339 +        } else {
   1.340 +            if (reader.ch == 'l' || reader.ch == 'L') {
   1.341 +                reader.scanChar();
   1.342 +                tk = TokenKind.LONGLITERAL;
   1.343 +            } else {
   1.344 +                tk = TokenKind.INTLITERAL;
   1.345 +            }
   1.346 +        }
   1.347 +    }
   1.348 +
   1.349 +    /** Read an identifier.
   1.350 +     */
   1.351 +    private void scanIdent() {
   1.352 +        boolean isJavaIdentifierPart;
   1.353 +        char high;
   1.354 +        reader.putChar(true);
   1.355 +        do {
   1.356 +            switch (reader.ch) {
   1.357 +            case 'A': case 'B': case 'C': case 'D': case 'E':
   1.358 +            case 'F': case 'G': case 'H': case 'I': case 'J':
   1.359 +            case 'K': case 'L': case 'M': case 'N': case 'O':
   1.360 +            case 'P': case 'Q': case 'R': case 'S': case 'T':
   1.361 +            case 'U': case 'V': case 'W': case 'X': case 'Y':
   1.362 +            case 'Z':
   1.363 +            case 'a': case 'b': case 'c': case 'd': case 'e':
   1.364 +            case 'f': case 'g': case 'h': case 'i': case 'j':
   1.365 +            case 'k': case 'l': case 'm': case 'n': case 'o':
   1.366 +            case 'p': case 'q': case 'r': case 's': case 't':
   1.367 +            case 'u': case 'v': case 'w': case 'x': case 'y':
   1.368 +            case 'z':
   1.369 +            case '$': case '_':
   1.370 +            case '0': case '1': case '2': case '3': case '4':
   1.371 +            case '5': case '6': case '7': case '8': case '9':
   1.372 +                break;
   1.373 +            case '\u0000': case '\u0001': case '\u0002': case '\u0003':
   1.374 +            case '\u0004': case '\u0005': case '\u0006': case '\u0007':
   1.375 +            case '\u0008': case '\u000E': case '\u000F': case '\u0010':
   1.376 +            case '\u0011': case '\u0012': case '\u0013': case '\u0014':
   1.377 +            case '\u0015': case '\u0016': case '\u0017':
   1.378 +            case '\u0018': case '\u0019': case '\u001B':
   1.379 +            case '\u007F':
   1.380 +                reader.scanChar();
   1.381 +                continue;
   1.382 +            case '\u001A': // EOI is also a legal identifier part
   1.383 +                if (reader.bp >= reader.buflen) {
   1.384 +                    name = reader.name();
   1.385 +                    tk = tokens.lookupKind(name);
   1.386 +                    return;
   1.387 +                }
   1.388 +                reader.scanChar();
   1.389 +                continue;
   1.390 +            default:
   1.391 +                if (reader.ch < '\u0080') {
   1.392 +                    // all ASCII range chars already handled, above
   1.393 +                    isJavaIdentifierPart = false;
   1.394 +                } else {
   1.395 +                    if (Character.isIdentifierIgnorable(reader.ch)) {
   1.396 +                        reader.scanChar();
   1.397 +                        continue;
   1.398 +                    } else {
   1.399 +                        high = reader.scanSurrogates();
   1.400 +                        if (high != 0) {
   1.401 +                            reader.putChar(high);
   1.402 +                            isJavaIdentifierPart = Character.isJavaIdentifierPart(
   1.403 +                                Character.toCodePoint(high, reader.ch));
   1.404 +                        } else {
   1.405 +                            isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
   1.406 +                        }
   1.407 +                    }
   1.408 +                }
   1.409 +                if (!isJavaIdentifierPart) {
   1.410 +                    name = reader.name();
   1.411 +                    tk = tokens.lookupKind(name);
   1.412 +                    return;
   1.413 +                }
   1.414 +            }
   1.415 +            reader.putChar(true);
   1.416 +        } while (true);
   1.417 +    }
   1.418 +
   1.419 +    /** Return true if reader.ch can be part of an operator.
   1.420 +     */
   1.421 +    private boolean isSpecial(char ch) {
   1.422 +        switch (ch) {
   1.423 +        case '!': case '%': case '&': case '*': case '?':
   1.424 +        case '+': case '-': case ':': case '<': case '=':
   1.425 +        case '>': case '^': case '|': case '~':
   1.426 +        case '@':
   1.427 +            return true;
   1.428 +        default:
   1.429 +            return false;
   1.430 +        }
   1.431 +    }
   1.432 +
   1.433 +    /** Read longest possible sequence of special characters and convert
   1.434 +     *  to token.
   1.435 +     */
   1.436 +    private void scanOperator() {
   1.437 +        while (true) {
   1.438 +            reader.putChar(false);
   1.439 +            Name newname = reader.name();
   1.440 +            TokenKind tk1 = tokens.lookupKind(newname);
   1.441 +            if (tk1 == TokenKind.IDENTIFIER) {
   1.442 +                reader.sp--;
   1.443 +                break;
   1.444 +            }
   1.445 +            tk = tk1;
   1.446 +            reader.scanChar();
   1.447 +            if (!isSpecial(reader.ch)) break;
   1.448 +        }
   1.449 +    }
   1.450 +
   1.451 +    /** Read token.
   1.452 +     */
   1.453 +    public Token readToken() {
   1.454 +
   1.455 +        reader.sp = 0;
   1.456 +        name = null;
   1.457 +        radix = 0;
   1.458 +
   1.459 +        int pos = 0;
   1.460 +        int endPos = 0;
   1.461 +        List<Comment> comments = null;
   1.462 +
   1.463 +        try {
   1.464 +            loop: while (true) {
   1.465 +                pos = reader.bp;
   1.466 +                switch (reader.ch) {
   1.467 +                case ' ': // (Spec 3.6)
   1.468 +                case '\t': // (Spec 3.6)
   1.469 +                case FF: // (Spec 3.6)
   1.470 +                    do {
   1.471 +                        reader.scanChar();
   1.472 +                    } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
   1.473 +                    processWhiteSpace(pos, reader.bp);
   1.474 +                    break;
   1.475 +                case LF: // (Spec 3.4)
   1.476 +                    reader.scanChar();
   1.477 +                    processLineTerminator(pos, reader.bp);
   1.478 +                    break;
   1.479 +                case CR: // (Spec 3.4)
   1.480 +                    reader.scanChar();
   1.481 +                    if (reader.ch == LF) {
   1.482 +                        reader.scanChar();
   1.483 +                    }
   1.484 +                    processLineTerminator(pos, reader.bp);
   1.485 +                    break;
   1.486 +                case 'A': case 'B': case 'C': case 'D': case 'E':
   1.487 +                case 'F': case 'G': case 'H': case 'I': case 'J':
   1.488 +                case 'K': case 'L': case 'M': case 'N': case 'O':
   1.489 +                case 'P': case 'Q': case 'R': case 'S': case 'T':
   1.490 +                case 'U': case 'V': case 'W': case 'X': case 'Y':
   1.491 +                case 'Z':
   1.492 +                case 'a': case 'b': case 'c': case 'd': case 'e':
   1.493 +                case 'f': case 'g': case 'h': case 'i': case 'j':
   1.494 +                case 'k': case 'l': case 'm': case 'n': case 'o':
   1.495 +                case 'p': case 'q': case 'r': case 's': case 't':
   1.496 +                case 'u': case 'v': case 'w': case 'x': case 'y':
   1.497 +                case 'z':
   1.498 +                case '$': case '_':
   1.499 +                    scanIdent();
   1.500 +                    break loop;
   1.501 +                case '0':
   1.502 +                    reader.scanChar();
   1.503 +                    if (reader.ch == 'x' || reader.ch == 'X') {
   1.504 +                        reader.scanChar();
   1.505 +                        skipIllegalUnderscores();
   1.506 +                        if (reader.ch == '.') {
   1.507 +                            scanHexFractionAndSuffix(pos, false);
   1.508 +                        } else if (reader.digit(pos, 16) < 0) {
   1.509 +                            lexError(pos, "invalid.hex.number");
   1.510 +                        } else {
   1.511 +                            scanNumber(pos, 16);
   1.512 +                        }
   1.513 +                    } else if (reader.ch == 'b' || reader.ch == 'B') {
   1.514 +                        if (!allowBinaryLiterals) {
   1.515 +                            lexError(pos, "unsupported.binary.lit", source.name);
   1.516 +                            allowBinaryLiterals = true;
   1.517 +                        }
   1.518 +                        reader.scanChar();
   1.519 +                        skipIllegalUnderscores();
   1.520 +                        if (reader.digit(pos, 2) < 0) {
   1.521 +                            lexError(pos, "invalid.binary.number");
   1.522 +                        } else {
   1.523 +                            scanNumber(pos, 2);
   1.524 +                        }
   1.525 +                    } else {
   1.526 +                        reader.putChar('0');
   1.527 +                        if (reader.ch == '_') {
   1.528 +                            int savePos = reader.bp;
   1.529 +                            do {
   1.530 +                                reader.scanChar();
   1.531 +                            } while (reader.ch == '_');
   1.532 +                            if (reader.digit(pos, 10) < 0) {
   1.533 +                                lexError(savePos, "illegal.underscore");
   1.534 +                            }
   1.535 +                        }
   1.536 +                        scanNumber(pos, 8);
   1.537 +                    }
   1.538 +                    break loop;
   1.539 +                case '1': case '2': case '3': case '4':
   1.540 +                case '5': case '6': case '7': case '8': case '9':
   1.541 +                    scanNumber(pos, 10);
   1.542 +                    break loop;
   1.543 +                case '.':
   1.544 +                    reader.scanChar();
   1.545 +                    if ('0' <= reader.ch && reader.ch <= '9') {
   1.546 +                        reader.putChar('.');
   1.547 +                        scanFractionAndSuffix(pos);
   1.548 +                    } else if (reader.ch == '.') {
   1.549 +                        int savePos = reader.bp;
   1.550 +                        reader.putChar('.'); reader.putChar('.', true);
   1.551 +                        if (reader.ch == '.') {
   1.552 +                            reader.scanChar();
   1.553 +                            reader.putChar('.');
   1.554 +                            tk = TokenKind.ELLIPSIS;
   1.555 +                        } else {
   1.556 +                            lexError(savePos, "illegal.dot");
   1.557 +                        }
   1.558 +                    } else {
   1.559 +                        tk = TokenKind.DOT;
   1.560 +                    }
   1.561 +                    break loop;
   1.562 +                case ',':
   1.563 +                    reader.scanChar(); tk = TokenKind.COMMA; break loop;
   1.564 +                case ';':
   1.565 +                    reader.scanChar(); tk = TokenKind.SEMI; break loop;
   1.566 +                case '(':
   1.567 +                    reader.scanChar(); tk = TokenKind.LPAREN; break loop;
   1.568 +                case ')':
   1.569 +                    reader.scanChar(); tk = TokenKind.RPAREN; break loop;
   1.570 +                case '[':
   1.571 +                    reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
   1.572 +                case ']':
   1.573 +                    reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
   1.574 +                case '{':
   1.575 +                    reader.scanChar(); tk = TokenKind.LBRACE; break loop;
   1.576 +                case '}':
   1.577 +                    reader.scanChar(); tk = TokenKind.RBRACE; break loop;
   1.578 +                case '/':
   1.579 +                    reader.scanChar();
   1.580 +                    if (reader.ch == '/') {
   1.581 +                        do {
   1.582 +                            reader.scanCommentChar();
   1.583 +                        } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
   1.584 +                        if (reader.bp < reader.buflen) {
   1.585 +                            comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
   1.586 +                        }
   1.587 +                        break;
   1.588 +                    } else if (reader.ch == '*') {
   1.589 +                        boolean isEmpty = false;
   1.590 +                        reader.scanChar();
   1.591 +                        CommentStyle style;
   1.592 +                        if (reader.ch == '*') {
   1.593 +                            style = CommentStyle.JAVADOC;
   1.594 +                            reader.scanCommentChar();
   1.595 +                            if (reader.ch == '/') {
   1.596 +                                isEmpty = true;
   1.597 +                            }
   1.598 +                        } else {
   1.599 +                            style = CommentStyle.BLOCK;
   1.600 +                        }
   1.601 +                        while (!isEmpty && reader.bp < reader.buflen) {
   1.602 +                            if (reader.ch == '*') {
   1.603 +                                reader.scanChar();
   1.604 +                                if (reader.ch == '/') break;
   1.605 +                            } else {
   1.606 +                                reader.scanCommentChar();
   1.607 +                            }
   1.608 +                        }
   1.609 +                        if (reader.ch == '/') {
   1.610 +                            reader.scanChar();
   1.611 +                            comments = addComment(comments, processComment(pos, reader.bp, style));
   1.612 +                            break;
   1.613 +                        } else {
   1.614 +                            lexError(pos, "unclosed.comment");
   1.615 +                            break loop;
   1.616 +                        }
   1.617 +                    } else if (reader.ch == '=') {
   1.618 +                        tk = TokenKind.SLASHEQ;
   1.619 +                        reader.scanChar();
   1.620 +                    } else {
   1.621 +                        tk = TokenKind.SLASH;
   1.622 +                    }
   1.623 +                    break loop;
   1.624 +                case '\'':
   1.625 +                    reader.scanChar();
   1.626 +                    if (reader.ch == '\'') {
   1.627 +                        lexError(pos, "empty.char.lit");
   1.628 +                    } else {
   1.629 +                        if (reader.ch == CR || reader.ch == LF)
   1.630 +                            lexError(pos, "illegal.line.end.in.char.lit");
   1.631 +                        scanLitChar(pos);
   1.632 +                        char ch2 = reader.ch;
   1.633 +                        if (reader.ch == '\'') {
   1.634 +                            reader.scanChar();
   1.635 +                            tk = TokenKind.CHARLITERAL;
   1.636 +                        } else {
   1.637 +                            lexError(pos, "unclosed.char.lit");
   1.638 +                        }
   1.639 +                    }
   1.640 +                    break loop;
   1.641 +                case '\"':
   1.642 +                    reader.scanChar();
   1.643 +                    while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen)
   1.644 +                        scanLitChar(pos);
   1.645 +                    if (reader.ch == '\"') {
   1.646 +                        tk = TokenKind.STRINGLITERAL;
   1.647 +                        reader.scanChar();
   1.648 +                    } else {
   1.649 +                        lexError(pos, "unclosed.str.lit");
   1.650 +                    }
   1.651 +                    break loop;
   1.652 +                default:
   1.653 +                    if (isSpecial(reader.ch)) {
   1.654 +                        scanOperator();
   1.655 +                    } else {
   1.656 +                        boolean isJavaIdentifierStart;
   1.657 +                        if (reader.ch < '\u0080') {
   1.658 +                            // all ASCII range chars already handled, above
   1.659 +                            isJavaIdentifierStart = false;
   1.660 +                        } else {
   1.661 +                            char high = reader.scanSurrogates();
   1.662 +                            if (high != 0) {
   1.663 +                                reader.putChar(high);
   1.664 +
   1.665 +                                isJavaIdentifierStart = Character.isJavaIdentifierStart(
   1.666 +                                    Character.toCodePoint(high, reader.ch));
   1.667 +                            } else {
   1.668 +                                isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
   1.669 +                            }
   1.670 +                        }
   1.671 +                        if (isJavaIdentifierStart) {
   1.672 +                            scanIdent();
   1.673 +                        } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
   1.674 +                            tk = TokenKind.EOF;
   1.675 +                            pos = reader.buflen;
   1.676 +                        } else {
   1.677 +                            String arg = (32 < reader.ch && reader.ch < 127) ?
   1.678 +                                            String.format("%s", reader.ch) :
   1.679 +                                            String.format("\\u%04x", (int)reader.ch);
   1.680 +                            lexError(pos, "illegal.char", arg);
   1.681 +                            reader.scanChar();
   1.682 +                        }
   1.683 +                    }
   1.684 +                    break loop;
   1.685 +                }
   1.686 +            }
   1.687 +            endPos = reader.bp;
   1.688 +            switch (tk.tag) {
   1.689 +                case DEFAULT: return new Token(tk, pos, endPos, comments);
   1.690 +                case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
   1.691 +                case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments);
   1.692 +                case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
   1.693 +                default: throw new AssertionError();
   1.694 +            }
   1.695 +        }
   1.696 +        finally {
   1.697 +            if (scannerDebug) {
   1.698 +                    System.out.println("nextToken(" + pos
   1.699 +                                       + "," + endPos + ")=|" +
   1.700 +                                       new String(reader.getRawCharacters(pos, endPos))
   1.701 +                                       + "|");
   1.702 +            }
   1.703 +        }
   1.704 +    }
   1.705 +    //where
   1.706 +        List<Comment> addComment(List<Comment> comments, Comment comment) {
   1.707 +            return comments == null ?
   1.708 +                    List.of(comment) :
   1.709 +                    comments.prepend(comment);
   1.710 +        }
   1.711 +
   1.712 +    /** Return the position where a lexical error occurred;
   1.713 +     */
   1.714 +    public int errPos() {
   1.715 +        return errPos;
   1.716 +    }
   1.717 +
   1.718 +    /** Set the position where a lexical error occurred;
   1.719 +     */
   1.720 +    public void errPos(int pos) {
   1.721 +        errPos = pos;
   1.722 +    }
   1.723 +
   1.724 +    /**
   1.725 +     * Called when a complete comment has been scanned. pos and endPos
   1.726 +     * will mark the comment boundary.
   1.727 +     */
   1.728 +    protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
   1.729 +        if (scannerDebug)
   1.730 +            System.out.println("processComment(" + pos
   1.731 +                               + "," + endPos + "," + style + ")=|"
   1.732 +                               + new String(reader.getRawCharacters(pos, endPos))
   1.733 +                               + "|");
   1.734 +        char[] buf = reader.getRawCharacters(pos, endPos);
   1.735 +        return new BasicComment<UnicodeReader>(new UnicodeReader(fac, buf, buf.length), style);
   1.736 +    }
   1.737 +
   1.738 +    /**
   1.739 +     * Called when a complete whitespace run has been scanned. pos and endPos
   1.740 +     * will mark the whitespace boundary.
   1.741 +     */
   1.742 +    protected void processWhiteSpace(int pos, int endPos) {
   1.743 +        if (scannerDebug)
   1.744 +            System.out.println("processWhitespace(" + pos
   1.745 +                               + "," + endPos + ")=|" +
   1.746 +                               new String(reader.getRawCharacters(pos, endPos))
   1.747 +                               + "|");
   1.748 +    }
   1.749 +
   1.750 +    /**
   1.751 +     * Called when a line terminator has been processed.
   1.752 +     */
   1.753 +    protected void processLineTerminator(int pos, int endPos) {
   1.754 +        if (scannerDebug)
   1.755 +            System.out.println("processTerminator(" + pos
   1.756 +                               + "," + endPos + ")=|" +
   1.757 +                               new String(reader.getRawCharacters(pos, endPos))
   1.758 +                               + "|");
   1.759 +    }
   1.760 +
   1.761 +    /** Build a map for translating between line numbers and
   1.762 +     * positions in the input.
   1.763 +     *
   1.764 +     * @return a LineMap */
   1.765 +    public Position.LineMap getLineMap() {
   1.766 +        return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
   1.767 +    }
   1.768 +
   1.769 +
   1.770 +    /**
   1.771 +    * Scan a documentation comment; determine if a deprecated tag is present.
   1.772 +    * Called once the initial /, * have been skipped, positioned at the second *
   1.773 +    * (which is treated as the beginning of the first line).
   1.774 +    * Stops positioned at the closing '/'.
   1.775 +    */
   1.776 +    protected static class BasicComment<U extends UnicodeReader> implements Comment {
   1.777 +
   1.778 +        CommentStyle cs;
   1.779 +        U comment_reader;
   1.780 +
   1.781 +        protected boolean deprecatedFlag = false;
   1.782 +        protected boolean scanned = false;
   1.783 +
   1.784 +        protected BasicComment(U comment_reader, CommentStyle cs) {
   1.785 +            this.comment_reader = comment_reader;
   1.786 +            this.cs = cs;
   1.787 +        }
   1.788 +
   1.789 +        public String getText() {
   1.790 +            return null;
   1.791 +        }
   1.792 +
   1.793 +        public int getSourcePos(int pos) {
   1.794 +            return -1;
   1.795 +        }
   1.796 +
   1.797 +        public CommentStyle getStyle() {
   1.798 +            return cs;
   1.799 +        }
   1.800 +
   1.801 +        public boolean isDeprecated() {
   1.802 +            if (!scanned && cs == CommentStyle.JAVADOC) {
   1.803 +                scanDocComment();
   1.804 +            }
   1.805 +            return deprecatedFlag;
   1.806 +        }
   1.807 +
   1.808 +        @SuppressWarnings("fallthrough")
   1.809 +        protected void scanDocComment() {
   1.810 +            try {
   1.811 +                boolean deprecatedPrefix = false;
   1.812 +
   1.813 +                comment_reader.bp += 3; // '/**'
   1.814 +                comment_reader.ch = comment_reader.buf[comment_reader.bp];
   1.815 +
   1.816 +                forEachLine:
   1.817 +                while (comment_reader.bp < comment_reader.buflen) {
   1.818 +
   1.819 +                    // Skip optional WhiteSpace at beginning of line
   1.820 +                    while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
   1.821 +                        comment_reader.scanCommentChar();
   1.822 +                    }
   1.823 +
   1.824 +                    // Skip optional consecutive Stars
   1.825 +                    while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
   1.826 +                        comment_reader.scanCommentChar();
   1.827 +                        if (comment_reader.ch == '/') {
   1.828 +                            return;
   1.829 +                        }
   1.830 +                    }
   1.831 +
   1.832 +                    // Skip optional WhiteSpace after Stars
   1.833 +                    while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
   1.834 +                        comment_reader.scanCommentChar();
   1.835 +                    }
   1.836 +
   1.837 +                    deprecatedPrefix = false;
   1.838 +                    // At beginning of line in the JavaDoc sense.
   1.839 +                    if (!deprecatedFlag) {
   1.840 +                        String deprecated = "@deprecated";
   1.841 +                        int i = 0;
   1.842 +                        while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
   1.843 +                            comment_reader.scanCommentChar();
   1.844 +                            i++;
   1.845 +                            if (i == deprecated.length()) {
   1.846 +                                deprecatedPrefix = true;
   1.847 +                                break;
   1.848 +                            }
   1.849 +                        }
   1.850 +                    }
   1.851 +
   1.852 +                    if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
   1.853 +                        if (Character.isWhitespace(comment_reader.ch)) {
   1.854 +                            deprecatedFlag = true;
   1.855 +                        } else if (comment_reader.ch == '*') {
   1.856 +                            comment_reader.scanCommentChar();
   1.857 +                            if (comment_reader.ch == '/') {
   1.858 +                                deprecatedFlag = true;
   1.859 +                                return;
   1.860 +                            }
   1.861 +                        }
   1.862 +                    }
   1.863 +
   1.864 +                    // Skip rest of line
   1.865 +                    while (comment_reader.bp < comment_reader.buflen) {
   1.866 +                        switch (comment_reader.ch) {
   1.867 +                            case '*':
   1.868 +                                comment_reader.scanCommentChar();
   1.869 +                                if (comment_reader.ch == '/') {
   1.870 +                                    return;
   1.871 +                                }
   1.872 +                                break;
   1.873 +                            case CR: // (Spec 3.4)
   1.874 +                                comment_reader.scanCommentChar();
   1.875 +                                if (comment_reader.ch != LF) {
   1.876 +                                    continue forEachLine;
   1.877 +                                }
   1.878 +                            /* fall through to LF case */
   1.879 +                            case LF: // (Spec 3.4)
   1.880 +                                comment_reader.scanCommentChar();
   1.881 +                                continue forEachLine;
   1.882 +                            default:
   1.883 +                                comment_reader.scanCommentChar();
   1.884 +                        }
   1.885 +                    } // rest of line
   1.886 +                } // forEachLine
   1.887 +                return;
   1.888 +            } finally {
   1.889 +                scanned = true;
   1.890 +            }
   1.891 +        }
   1.892 +    }
   1.893 +}

mercurial