1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java Wed Apr 27 01:34:52 2016 +0800 1.3 @@ -0,0 +1,890 @@ 1.4 +/* 1.5 + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. Oracle designates this 1.11 + * particular file as subject to the "Classpath" exception as provided 1.12 + * by Oracle in the LICENSE file that accompanied this code. 1.13 + * 1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.16 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.17 + * version 2 for more details (a copy is included in the LICENSE file that 1.18 + * accompanied this code). 1.19 + * 1.20 + * You should have received a copy of the GNU General Public License version 1.21 + * 2 along with this work; if not, write to the Free Software Foundation, 1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.23 + * 1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.25 + * or visit www.oracle.com if you need additional information or have any 1.26 + * questions. 1.27 + */ 1.28 + 1.29 +package com.sun.tools.javac.parser; 1.30 + 1.31 +import com.sun.tools.javac.code.Source; 1.32 +import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; 1.33 +import com.sun.tools.javac.util.*; 1.34 + 1.35 +import java.nio.CharBuffer; 1.36 + 1.37 +import static com.sun.tools.javac.parser.Tokens.*; 1.38 +import static com.sun.tools.javac.util.LayoutCharacters.*; 1.39 + 1.40 +/** The lexical analyzer maps an input stream consisting of 1.41 + * ASCII characters and Unicode escapes into a token sequence. 1.42 + * 1.43 + * <p><b>This is NOT part of any supported API. 1.44 + * If you write code that depends on this, you do so at your own risk. 1.45 + * This code and its internal interfaces are subject to change or 1.46 + * deletion without notice.</b> 1.47 + */ 1.48 +public class JavaTokenizer { 1.49 + 1.50 + private static final boolean scannerDebug = false; 1.51 + 1.52 + /** Allow hex floating-point literals. 1.53 + */ 1.54 + private boolean allowHexFloats; 1.55 + 1.56 + /** Allow binary literals. 1.57 + */ 1.58 + private boolean allowBinaryLiterals; 1.59 + 1.60 + /** Allow underscores in literals. 1.61 + */ 1.62 + private boolean allowUnderscoresInLiterals; 1.63 + 1.64 + /** The source language setting. 1.65 + */ 1.66 + private Source source; 1.67 + 1.68 + /** The log to be used for error reporting. 1.69 + */ 1.70 + private final Log log; 1.71 + 1.72 + /** The token factory. */ 1.73 + private final Tokens tokens; 1.74 + 1.75 + /** The token kind, set by nextToken(). 1.76 + */ 1.77 + protected TokenKind tk; 1.78 + 1.79 + /** The token's radix, set by nextToken(). 1.80 + */ 1.81 + protected int radix; 1.82 + 1.83 + /** The token's name, set by nextToken(). 1.84 + */ 1.85 + protected Name name; 1.86 + 1.87 + /** The position where a lexical error occurred; 1.88 + */ 1.89 + protected int errPos = Position.NOPOS; 1.90 + 1.91 + /** The Unicode reader (low-level stream reader). 1.92 + */ 1.93 + protected UnicodeReader reader; 1.94 + 1.95 + protected ScannerFactory fac; 1.96 + 1.97 + private static final boolean hexFloatsWork = hexFloatsWork(); 1.98 + private static boolean hexFloatsWork() { 1.99 + try { 1.100 + Float.valueOf("0x1.0p1"); 1.101 + return true; 1.102 + } catch (NumberFormatException ex) { 1.103 + return false; 1.104 + } 1.105 + } 1.106 + 1.107 + /** 1.108 + * Create a scanner from the input array. This method might 1.109 + * modify the array. To avoid copying the input array, ensure 1.110 + * that {@code inputLength < input.length} or 1.111 + * {@code input[input.length -1]} is a white space character. 1.112 + * 1.113 + * @param fac the factory which created this Scanner 1.114 + * @param buf the input, might be modified 1.115 + * Must be positive and less than or equal to input.length. 1.116 + */ 1.117 + protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) { 1.118 + this(fac, new UnicodeReader(fac, buf)); 1.119 + } 1.120 + 1.121 + protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) { 1.122 + this(fac, new UnicodeReader(fac, buf, inputLength)); 1.123 + } 1.124 + 1.125 + protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) { 1.126 + this.fac = fac; 1.127 + this.log = fac.log; 1.128 + this.tokens = fac.tokens; 1.129 + this.source = fac.source; 1.130 + this.reader = reader; 1.131 + this.allowBinaryLiterals = source.allowBinaryLiterals(); 1.132 + this.allowHexFloats = source.allowHexFloats(); 1.133 + this.allowUnderscoresInLiterals = source.allowUnderscoresInLiterals(); 1.134 + } 1.135 + 1.136 + /** Report an error at the given position using the provided arguments. 1.137 + */ 1.138 + protected void lexError(int pos, String key, Object... args) { 1.139 + log.error(pos, key, args); 1.140 + tk = TokenKind.ERROR; 1.141 + errPos = pos; 1.142 + } 1.143 + 1.144 + /** Read next character in character or string literal and copy into sbuf. 1.145 + */ 1.146 + private void scanLitChar(int pos) { 1.147 + if (reader.ch == '\\') { 1.148 + if (reader.peekChar() == '\\' && !reader.isUnicode()) { 1.149 + reader.skipChar(); 1.150 + reader.putChar('\\', true); 1.151 + } else { 1.152 + reader.scanChar(); 1.153 + switch (reader.ch) { 1.154 + case '0': case '1': case '2': case '3': 1.155 + case '4': case '5': case '6': case '7': 1.156 + char leadch = reader.ch; 1.157 + int oct = reader.digit(pos, 8); 1.158 + reader.scanChar(); 1.159 + if ('0' <= reader.ch && reader.ch <= '7') { 1.160 + oct = oct * 8 + reader.digit(pos, 8); 1.161 + reader.scanChar(); 1.162 + if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') { 1.163 + oct = oct * 8 + reader.digit(pos, 8); 1.164 + reader.scanChar(); 1.165 + } 1.166 + } 1.167 + reader.putChar((char)oct); 1.168 + break; 1.169 + case 'b': 1.170 + reader.putChar('\b', true); break; 1.171 + case 't': 1.172 + reader.putChar('\t', true); break; 1.173 + case 'n': 1.174 + reader.putChar('\n', true); break; 1.175 + case 'f': 1.176 + reader.putChar('\f', true); break; 1.177 + case 'r': 1.178 + reader.putChar('\r', true); break; 1.179 + case '\'': 1.180 + reader.putChar('\'', true); break; 1.181 + case '\"': 1.182 + reader.putChar('\"', true); break; 1.183 + case '\\': 1.184 + reader.putChar('\\', true); break; 1.185 + default: 1.186 + lexError(reader.bp, "illegal.esc.char"); 1.187 + } 1.188 + } 1.189 + } else if (reader.bp != reader.buflen) { 1.190 + reader.putChar(true); 1.191 + } 1.192 + } 1.193 + 1.194 + private void scanDigits(int pos, int digitRadix) { 1.195 + char saveCh; 1.196 + int savePos; 1.197 + do { 1.198 + if (reader.ch != '_') { 1.199 + reader.putChar(false); 1.200 + } else { 1.201 + if (!allowUnderscoresInLiterals) { 1.202 + lexError(pos, "unsupported.underscore.lit", source.name); 1.203 + allowUnderscoresInLiterals = true; 1.204 + } 1.205 + } 1.206 + saveCh = reader.ch; 1.207 + savePos = reader.bp; 1.208 + reader.scanChar(); 1.209 + } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_'); 1.210 + if (saveCh == '_') 1.211 + lexError(savePos, "illegal.underscore"); 1.212 + } 1.213 + 1.214 + /** Read fractional part of hexadecimal floating point number. 1.215 + */ 1.216 + private void scanHexExponentAndSuffix(int pos) { 1.217 + if (reader.ch == 'p' || reader.ch == 'P') { 1.218 + reader.putChar(true); 1.219 + skipIllegalUnderscores(); 1.220 + if (reader.ch == '+' || reader.ch == '-') { 1.221 + reader.putChar(true); 1.222 + } 1.223 + skipIllegalUnderscores(); 1.224 + if ('0' <= reader.ch && reader.ch <= '9') { 1.225 + scanDigits(pos, 10); 1.226 + if (!allowHexFloats) { 1.227 + lexError(pos, "unsupported.fp.lit", source.name); 1.228 + allowHexFloats = true; 1.229 + } 1.230 + else if (!hexFloatsWork) 1.231 + lexError(pos, "unsupported.cross.fp.lit"); 1.232 + } else 1.233 + lexError(pos, "malformed.fp.lit"); 1.234 + } else { 1.235 + lexError(pos, "malformed.fp.lit"); 1.236 + } 1.237 + if (reader.ch == 'f' || reader.ch == 'F') { 1.238 + reader.putChar(true); 1.239 + tk = TokenKind.FLOATLITERAL; 1.240 + radix = 16; 1.241 + } else { 1.242 + if (reader.ch == 'd' || reader.ch == 'D') { 1.243 + reader.putChar(true); 1.244 + } 1.245 + tk = TokenKind.DOUBLELITERAL; 1.246 + radix = 16; 1.247 + } 1.248 + } 1.249 + 1.250 + /** Read fractional part of floating point number. 1.251 + */ 1.252 + private void scanFraction(int pos) { 1.253 + skipIllegalUnderscores(); 1.254 + if ('0' <= reader.ch && reader.ch <= '9') { 1.255 + scanDigits(pos, 10); 1.256 + } 1.257 + int sp1 = reader.sp; 1.258 + if (reader.ch == 'e' || reader.ch == 'E') { 1.259 + reader.putChar(true); 1.260 + skipIllegalUnderscores(); 1.261 + if (reader.ch == '+' || reader.ch == '-') { 1.262 + reader.putChar(true); 1.263 + } 1.264 + skipIllegalUnderscores(); 1.265 + if ('0' <= reader.ch && reader.ch <= '9') { 1.266 + scanDigits(pos, 10); 1.267 + return; 1.268 + } 1.269 + lexError(pos, "malformed.fp.lit"); 1.270 + reader.sp = sp1; 1.271 + } 1.272 + } 1.273 + 1.274 + /** Read fractional part and 'd' or 'f' suffix of floating point number. 1.275 + */ 1.276 + private void scanFractionAndSuffix(int pos) { 1.277 + radix = 10; 1.278 + scanFraction(pos); 1.279 + if (reader.ch == 'f' || reader.ch == 'F') { 1.280 + reader.putChar(true); 1.281 + tk = TokenKind.FLOATLITERAL; 1.282 + } else { 1.283 + if (reader.ch == 'd' || reader.ch == 'D') { 1.284 + reader.putChar(true); 1.285 + } 1.286 + tk = TokenKind.DOUBLELITERAL; 1.287 + } 1.288 + } 1.289 + 1.290 + /** Read fractional part and 'd' or 'f' suffix of floating point number. 1.291 + */ 1.292 + private void scanHexFractionAndSuffix(int pos, boolean seendigit) { 1.293 + radix = 16; 1.294 + Assert.check(reader.ch == '.'); 1.295 + reader.putChar(true); 1.296 + skipIllegalUnderscores(); 1.297 + if (reader.digit(pos, 16) >= 0) { 1.298 + seendigit = true; 1.299 + scanDigits(pos, 16); 1.300 + } 1.301 + if (!seendigit) 1.302 + lexError(pos, "invalid.hex.number"); 1.303 + else 1.304 + scanHexExponentAndSuffix(pos); 1.305 + } 1.306 + 1.307 + private void skipIllegalUnderscores() { 1.308 + if (reader.ch == '_') { 1.309 + lexError(reader.bp, "illegal.underscore"); 1.310 + while (reader.ch == '_') 1.311 + reader.scanChar(); 1.312 + } 1.313 + } 1.314 + 1.315 + /** Read a number. 1.316 + * @param radix The radix of the number; one of 2, j8, 10, 16. 1.317 + */ 1.318 + private void scanNumber(int pos, int radix) { 1.319 + // for octal, allow base-10 digit in case it's a float literal 1.320 + this.radix = radix; 1.321 + int digitRadix = (radix == 8 ? 10 : radix); 1.322 + boolean seendigit = false; 1.323 + if (reader.digit(pos, digitRadix) >= 0) { 1.324 + seendigit = true; 1.325 + scanDigits(pos, digitRadix); 1.326 + } 1.327 + if (radix == 16 && reader.ch == '.') { 1.328 + scanHexFractionAndSuffix(pos, seendigit); 1.329 + } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) { 1.330 + scanHexExponentAndSuffix(pos); 1.331 + } else if (digitRadix == 10 && reader.ch == '.') { 1.332 + reader.putChar(true); 1.333 + scanFractionAndSuffix(pos); 1.334 + } else if (digitRadix == 10 && 1.335 + (reader.ch == 'e' || reader.ch == 'E' || 1.336 + reader.ch == 'f' || reader.ch == 'F' || 1.337 + reader.ch == 'd' || reader.ch == 'D')) { 1.338 + scanFractionAndSuffix(pos); 1.339 + } else { 1.340 + if (reader.ch == 'l' || reader.ch == 'L') { 1.341 + reader.scanChar(); 1.342 + tk = TokenKind.LONGLITERAL; 1.343 + } else { 1.344 + tk = TokenKind.INTLITERAL; 1.345 + } 1.346 + } 1.347 + } 1.348 + 1.349 + /** Read an identifier. 1.350 + */ 1.351 + private void scanIdent() { 1.352 + boolean isJavaIdentifierPart; 1.353 + char high; 1.354 + reader.putChar(true); 1.355 + do { 1.356 + switch (reader.ch) { 1.357 + case 'A': case 'B': case 'C': case 'D': case 'E': 1.358 + case 'F': case 'G': case 'H': case 'I': case 'J': 1.359 + case 'K': case 'L': case 'M': case 'N': case 'O': 1.360 + case 'P': case 'Q': case 'R': case 'S': case 'T': 1.361 + case 'U': case 'V': case 'W': case 'X': case 'Y': 1.362 + case 'Z': 1.363 + case 'a': case 'b': case 'c': case 'd': case 'e': 1.364 + case 'f': case 'g': case 'h': case 'i': case 'j': 1.365 + case 'k': case 'l': case 'm': case 'n': case 'o': 1.366 + case 'p': case 'q': case 'r': case 's': case 't': 1.367 + case 'u': case 'v': case 'w': case 'x': case 'y': 1.368 + case 'z': 1.369 + case '$': case '_': 1.370 + case '0': case '1': case '2': case '3': case '4': 1.371 + case '5': case '6': case '7': case '8': case '9': 1.372 + break; 1.373 + case '\u0000': case '\u0001': case '\u0002': case '\u0003': 1.374 + case '\u0004': case '\u0005': case '\u0006': case '\u0007': 1.375 + case '\u0008': case '\u000E': case '\u000F': case '\u0010': 1.376 + case '\u0011': case '\u0012': case '\u0013': case '\u0014': 1.377 + case '\u0015': case '\u0016': case '\u0017': 1.378 + case '\u0018': case '\u0019': case '\u001B': 1.379 + case '\u007F': 1.380 + reader.scanChar(); 1.381 + continue; 1.382 + case '\u001A': // EOI is also a legal identifier part 1.383 + if (reader.bp >= reader.buflen) { 1.384 + name = reader.name(); 1.385 + tk = tokens.lookupKind(name); 1.386 + return; 1.387 + } 1.388 + reader.scanChar(); 1.389 + continue; 1.390 + default: 1.391 + if (reader.ch < '\u0080') { 1.392 + // all ASCII range chars already handled, above 1.393 + isJavaIdentifierPart = false; 1.394 + } else { 1.395 + if (Character.isIdentifierIgnorable(reader.ch)) { 1.396 + reader.scanChar(); 1.397 + continue; 1.398 + } else { 1.399 + high = reader.scanSurrogates(); 1.400 + if (high != 0) { 1.401 + reader.putChar(high); 1.402 + isJavaIdentifierPart = Character.isJavaIdentifierPart( 1.403 + Character.toCodePoint(high, reader.ch)); 1.404 + } else { 1.405 + isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch); 1.406 + } 1.407 + } 1.408 + } 1.409 + if (!isJavaIdentifierPart) { 1.410 + name = reader.name(); 1.411 + tk = tokens.lookupKind(name); 1.412 + return; 1.413 + } 1.414 + } 1.415 + reader.putChar(true); 1.416 + } while (true); 1.417 + } 1.418 + 1.419 + /** Return true if reader.ch can be part of an operator. 1.420 + */ 1.421 + private boolean isSpecial(char ch) { 1.422 + switch (ch) { 1.423 + case '!': case '%': case '&': case '*': case '?': 1.424 + case '+': case '-': case ':': case '<': case '=': 1.425 + case '>': case '^': case '|': case '~': 1.426 + case '@': 1.427 + return true; 1.428 + default: 1.429 + return false; 1.430 + } 1.431 + } 1.432 + 1.433 + /** Read longest possible sequence of special characters and convert 1.434 + * to token. 1.435 + */ 1.436 + private void scanOperator() { 1.437 + while (true) { 1.438 + reader.putChar(false); 1.439 + Name newname = reader.name(); 1.440 + TokenKind tk1 = tokens.lookupKind(newname); 1.441 + if (tk1 == TokenKind.IDENTIFIER) { 1.442 + reader.sp--; 1.443 + break; 1.444 + } 1.445 + tk = tk1; 1.446 + reader.scanChar(); 1.447 + if (!isSpecial(reader.ch)) break; 1.448 + } 1.449 + } 1.450 + 1.451 + /** Read token. 1.452 + */ 1.453 + public Token readToken() { 1.454 + 1.455 + reader.sp = 0; 1.456 + name = null; 1.457 + radix = 0; 1.458 + 1.459 + int pos = 0; 1.460 + int endPos = 0; 1.461 + List<Comment> comments = null; 1.462 + 1.463 + try { 1.464 + loop: while (true) { 1.465 + pos = reader.bp; 1.466 + switch (reader.ch) { 1.467 + case ' ': // (Spec 3.6) 1.468 + case '\t': // (Spec 3.6) 1.469 + case FF: // (Spec 3.6) 1.470 + do { 1.471 + reader.scanChar(); 1.472 + } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF); 1.473 + processWhiteSpace(pos, reader.bp); 1.474 + break; 1.475 + case LF: // (Spec 3.4) 1.476 + reader.scanChar(); 1.477 + processLineTerminator(pos, reader.bp); 1.478 + break; 1.479 + case CR: // (Spec 3.4) 1.480 + reader.scanChar(); 1.481 + if (reader.ch == LF) { 1.482 + reader.scanChar(); 1.483 + } 1.484 + processLineTerminator(pos, reader.bp); 1.485 + break; 1.486 + case 'A': case 'B': case 'C': case 'D': case 'E': 1.487 + case 'F': case 'G': case 'H': case 'I': case 'J': 1.488 + case 'K': case 'L': case 'M': case 'N': case 'O': 1.489 + case 'P': case 'Q': case 'R': case 'S': case 'T': 1.490 + case 'U': case 'V': case 'W': case 'X': case 'Y': 1.491 + case 'Z': 1.492 + case 'a': case 'b': case 'c': case 'd': case 'e': 1.493 + case 'f': case 'g': case 'h': case 'i': case 'j': 1.494 + case 'k': case 'l': case 'm': case 'n': case 'o': 1.495 + case 'p': case 'q': case 'r': case 's': case 't': 1.496 + case 'u': case 'v': case 'w': case 'x': case 'y': 1.497 + case 'z': 1.498 + case '$': case '_': 1.499 + scanIdent(); 1.500 + break loop; 1.501 + case '0': 1.502 + reader.scanChar(); 1.503 + if (reader.ch == 'x' || reader.ch == 'X') { 1.504 + reader.scanChar(); 1.505 + skipIllegalUnderscores(); 1.506 + if (reader.ch == '.') { 1.507 + scanHexFractionAndSuffix(pos, false); 1.508 + } else if (reader.digit(pos, 16) < 0) { 1.509 + lexError(pos, "invalid.hex.number"); 1.510 + } else { 1.511 + scanNumber(pos, 16); 1.512 + } 1.513 + } else if (reader.ch == 'b' || reader.ch == 'B') { 1.514 + if (!allowBinaryLiterals) { 1.515 + lexError(pos, "unsupported.binary.lit", source.name); 1.516 + allowBinaryLiterals = true; 1.517 + } 1.518 + reader.scanChar(); 1.519 + skipIllegalUnderscores(); 1.520 + if (reader.digit(pos, 2) < 0) { 1.521 + lexError(pos, "invalid.binary.number"); 1.522 + } else { 1.523 + scanNumber(pos, 2); 1.524 + } 1.525 + } else { 1.526 + reader.putChar('0'); 1.527 + if (reader.ch == '_') { 1.528 + int savePos = reader.bp; 1.529 + do { 1.530 + reader.scanChar(); 1.531 + } while (reader.ch == '_'); 1.532 + if (reader.digit(pos, 10) < 0) { 1.533 + lexError(savePos, "illegal.underscore"); 1.534 + } 1.535 + } 1.536 + scanNumber(pos, 8); 1.537 + } 1.538 + break loop; 1.539 + case '1': case '2': case '3': case '4': 1.540 + case '5': case '6': case '7': case '8': case '9': 1.541 + scanNumber(pos, 10); 1.542 + break loop; 1.543 + case '.': 1.544 + reader.scanChar(); 1.545 + if ('0' <= reader.ch && reader.ch <= '9') { 1.546 + reader.putChar('.'); 1.547 + scanFractionAndSuffix(pos); 1.548 + } else if (reader.ch == '.') { 1.549 + int savePos = reader.bp; 1.550 + reader.putChar('.'); reader.putChar('.', true); 1.551 + if (reader.ch == '.') { 1.552 + reader.scanChar(); 1.553 + reader.putChar('.'); 1.554 + tk = TokenKind.ELLIPSIS; 1.555 + } else { 1.556 + lexError(savePos, "illegal.dot"); 1.557 + } 1.558 + } else { 1.559 + tk = TokenKind.DOT; 1.560 + } 1.561 + break loop; 1.562 + case ',': 1.563 + reader.scanChar(); tk = TokenKind.COMMA; break loop; 1.564 + case ';': 1.565 + reader.scanChar(); tk = TokenKind.SEMI; break loop; 1.566 + case '(': 1.567 + reader.scanChar(); tk = TokenKind.LPAREN; break loop; 1.568 + case ')': 1.569 + reader.scanChar(); tk = TokenKind.RPAREN; break loop; 1.570 + case '[': 1.571 + reader.scanChar(); tk = TokenKind.LBRACKET; break loop; 1.572 + case ']': 1.573 + reader.scanChar(); tk = TokenKind.RBRACKET; break loop; 1.574 + case '{': 1.575 + reader.scanChar(); tk = TokenKind.LBRACE; break loop; 1.576 + case '}': 1.577 + reader.scanChar(); tk = TokenKind.RBRACE; break loop; 1.578 + case '/': 1.579 + reader.scanChar(); 1.580 + if (reader.ch == '/') { 1.581 + do { 1.582 + reader.scanCommentChar(); 1.583 + } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen); 1.584 + if (reader.bp < reader.buflen) { 1.585 + comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE)); 1.586 + } 1.587 + break; 1.588 + } else if (reader.ch == '*') { 1.589 + boolean isEmpty = false; 1.590 + reader.scanChar(); 1.591 + CommentStyle style; 1.592 + if (reader.ch == '*') { 1.593 + style = CommentStyle.JAVADOC; 1.594 + reader.scanCommentChar(); 1.595 + if (reader.ch == '/') { 1.596 + isEmpty = true; 1.597 + } 1.598 + } else { 1.599 + style = CommentStyle.BLOCK; 1.600 + } 1.601 + while (!isEmpty && reader.bp < reader.buflen) { 1.602 + if (reader.ch == '*') { 1.603 + reader.scanChar(); 1.604 + if (reader.ch == '/') break; 1.605 + } else { 1.606 + reader.scanCommentChar(); 1.607 + } 1.608 + } 1.609 + if (reader.ch == '/') { 1.610 + reader.scanChar(); 1.611 + comments = addComment(comments, processComment(pos, reader.bp, style)); 1.612 + break; 1.613 + } else { 1.614 + lexError(pos, "unclosed.comment"); 1.615 + break loop; 1.616 + } 1.617 + } else if (reader.ch == '=') { 1.618 + tk = TokenKind.SLASHEQ; 1.619 + reader.scanChar(); 1.620 + } else { 1.621 + tk = TokenKind.SLASH; 1.622 + } 1.623 + break loop; 1.624 + case '\'': 1.625 + reader.scanChar(); 1.626 + if (reader.ch == '\'') { 1.627 + lexError(pos, "empty.char.lit"); 1.628 + } else { 1.629 + if (reader.ch == CR || reader.ch == LF) 1.630 + lexError(pos, "illegal.line.end.in.char.lit"); 1.631 + scanLitChar(pos); 1.632 + char ch2 = reader.ch; 1.633 + if (reader.ch == '\'') { 1.634 + reader.scanChar(); 1.635 + tk = TokenKind.CHARLITERAL; 1.636 + } else { 1.637 + lexError(pos, "unclosed.char.lit"); 1.638 + } 1.639 + } 1.640 + break loop; 1.641 + case '\"': 1.642 + reader.scanChar(); 1.643 + while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen) 1.644 + scanLitChar(pos); 1.645 + if (reader.ch == '\"') { 1.646 + tk = TokenKind.STRINGLITERAL; 1.647 + reader.scanChar(); 1.648 + } else { 1.649 + lexError(pos, "unclosed.str.lit"); 1.650 + } 1.651 + break loop; 1.652 + default: 1.653 + if (isSpecial(reader.ch)) { 1.654 + scanOperator(); 1.655 + } else { 1.656 + boolean isJavaIdentifierStart; 1.657 + if (reader.ch < '\u0080') { 1.658 + // all ASCII range chars already handled, above 1.659 + isJavaIdentifierStart = false; 1.660 + } else { 1.661 + char high = reader.scanSurrogates(); 1.662 + if (high != 0) { 1.663 + reader.putChar(high); 1.664 + 1.665 + isJavaIdentifierStart = Character.isJavaIdentifierStart( 1.666 + Character.toCodePoint(high, reader.ch)); 1.667 + } else { 1.668 + isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch); 1.669 + } 1.670 + } 1.671 + if (isJavaIdentifierStart) { 1.672 + scanIdent(); 1.673 + } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5 1.674 + tk = TokenKind.EOF; 1.675 + pos = reader.buflen; 1.676 + } else { 1.677 + String arg = (32 < reader.ch && reader.ch < 127) ? 1.678 + String.format("%s", reader.ch) : 1.679 + String.format("\\u%04x", (int)reader.ch); 1.680 + lexError(pos, "illegal.char", arg); 1.681 + reader.scanChar(); 1.682 + } 1.683 + } 1.684 + break loop; 1.685 + } 1.686 + } 1.687 + endPos = reader.bp; 1.688 + switch (tk.tag) { 1.689 + case DEFAULT: return new Token(tk, pos, endPos, comments); 1.690 + case NAMED: return new NamedToken(tk, pos, endPos, name, comments); 1.691 + case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments); 1.692 + case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments); 1.693 + default: throw new AssertionError(); 1.694 + } 1.695 + } 1.696 + finally { 1.697 + if (scannerDebug) { 1.698 + System.out.println("nextToken(" + pos 1.699 + + "," + endPos + ")=|" + 1.700 + new String(reader.getRawCharacters(pos, endPos)) 1.701 + + "|"); 1.702 + } 1.703 + } 1.704 + } 1.705 + //where 1.706 + List<Comment> addComment(List<Comment> comments, Comment comment) { 1.707 + return comments == null ? 1.708 + List.of(comment) : 1.709 + comments.prepend(comment); 1.710 + } 1.711 + 1.712 + /** Return the position where a lexical error occurred; 1.713 + */ 1.714 + public int errPos() { 1.715 + return errPos; 1.716 + } 1.717 + 1.718 + /** Set the position where a lexical error occurred; 1.719 + */ 1.720 + public void errPos(int pos) { 1.721 + errPos = pos; 1.722 + } 1.723 + 1.724 + /** 1.725 + * Called when a complete comment has been scanned. pos and endPos 1.726 + * will mark the comment boundary. 1.727 + */ 1.728 + protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) { 1.729 + if (scannerDebug) 1.730 + System.out.println("processComment(" + pos 1.731 + + "," + endPos + "," + style + ")=|" 1.732 + + new String(reader.getRawCharacters(pos, endPos)) 1.733 + + "|"); 1.734 + char[] buf = reader.getRawCharacters(pos, endPos); 1.735 + return new BasicComment<UnicodeReader>(new UnicodeReader(fac, buf, buf.length), style); 1.736 + } 1.737 + 1.738 + /** 1.739 + * Called when a complete whitespace run has been scanned. pos and endPos 1.740 + * will mark the whitespace boundary. 1.741 + */ 1.742 + protected void processWhiteSpace(int pos, int endPos) { 1.743 + if (scannerDebug) 1.744 + System.out.println("processWhitespace(" + pos 1.745 + + "," + endPos + ")=|" + 1.746 + new String(reader.getRawCharacters(pos, endPos)) 1.747 + + "|"); 1.748 + } 1.749 + 1.750 + /** 1.751 + * Called when a line terminator has been processed. 1.752 + */ 1.753 + protected void processLineTerminator(int pos, int endPos) { 1.754 + if (scannerDebug) 1.755 + System.out.println("processTerminator(" + pos 1.756 + + "," + endPos + ")=|" + 1.757 + new String(reader.getRawCharacters(pos, endPos)) 1.758 + + "|"); 1.759 + } 1.760 + 1.761 + /** Build a map for translating between line numbers and 1.762 + * positions in the input. 1.763 + * 1.764 + * @return a LineMap */ 1.765 + public Position.LineMap getLineMap() { 1.766 + return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false); 1.767 + } 1.768 + 1.769 + 1.770 + /** 1.771 + * Scan a documentation comment; determine if a deprecated tag is present. 1.772 + * Called once the initial /, * have been skipped, positioned at the second * 1.773 + * (which is treated as the beginning of the first line). 1.774 + * Stops positioned at the closing '/'. 1.775 + */ 1.776 + protected static class BasicComment<U extends UnicodeReader> implements Comment { 1.777 + 1.778 + CommentStyle cs; 1.779 + U comment_reader; 1.780 + 1.781 + protected boolean deprecatedFlag = false; 1.782 + protected boolean scanned = false; 1.783 + 1.784 + protected BasicComment(U comment_reader, CommentStyle cs) { 1.785 + this.comment_reader = comment_reader; 1.786 + this.cs = cs; 1.787 + } 1.788 + 1.789 + public String getText() { 1.790 + return null; 1.791 + } 1.792 + 1.793 + public int getSourcePos(int pos) { 1.794 + return -1; 1.795 + } 1.796 + 1.797 + public CommentStyle getStyle() { 1.798 + return cs; 1.799 + } 1.800 + 1.801 + public boolean isDeprecated() { 1.802 + if (!scanned && cs == CommentStyle.JAVADOC) { 1.803 + scanDocComment(); 1.804 + } 1.805 + return deprecatedFlag; 1.806 + } 1.807 + 1.808 + @SuppressWarnings("fallthrough") 1.809 + protected void scanDocComment() { 1.810 + try { 1.811 + boolean deprecatedPrefix = false; 1.812 + 1.813 + comment_reader.bp += 3; // '/**' 1.814 + comment_reader.ch = comment_reader.buf[comment_reader.bp]; 1.815 + 1.816 + forEachLine: 1.817 + while (comment_reader.bp < comment_reader.buflen) { 1.818 + 1.819 + // Skip optional WhiteSpace at beginning of line 1.820 + while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { 1.821 + comment_reader.scanCommentChar(); 1.822 + } 1.823 + 1.824 + // Skip optional consecutive Stars 1.825 + while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') { 1.826 + comment_reader.scanCommentChar(); 1.827 + if (comment_reader.ch == '/') { 1.828 + return; 1.829 + } 1.830 + } 1.831 + 1.832 + // Skip optional WhiteSpace after Stars 1.833 + while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { 1.834 + comment_reader.scanCommentChar(); 1.835 + } 1.836 + 1.837 + deprecatedPrefix = false; 1.838 + // At beginning of line in the JavaDoc sense. 1.839 + if (!deprecatedFlag) { 1.840 + String deprecated = "@deprecated"; 1.841 + int i = 0; 1.842 + while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) { 1.843 + comment_reader.scanCommentChar(); 1.844 + i++; 1.845 + if (i == deprecated.length()) { 1.846 + deprecatedPrefix = true; 1.847 + break; 1.848 + } 1.849 + } 1.850 + } 1.851 + 1.852 + if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) { 1.853 + if (Character.isWhitespace(comment_reader.ch)) { 1.854 + deprecatedFlag = true; 1.855 + } else if (comment_reader.ch == '*') { 1.856 + comment_reader.scanCommentChar(); 1.857 + if (comment_reader.ch == '/') { 1.858 + deprecatedFlag = true; 1.859 + return; 1.860 + } 1.861 + } 1.862 + } 1.863 + 1.864 + // Skip rest of line 1.865 + while (comment_reader.bp < comment_reader.buflen) { 1.866 + switch (comment_reader.ch) { 1.867 + case '*': 1.868 + comment_reader.scanCommentChar(); 1.869 + if (comment_reader.ch == '/') { 1.870 + return; 1.871 + } 1.872 + break; 1.873 + case CR: // (Spec 3.4) 1.874 + comment_reader.scanCommentChar(); 1.875 + if (comment_reader.ch != LF) { 1.876 + continue forEachLine; 1.877 + } 1.878 + /* fall through to LF case */ 1.879 + case LF: // (Spec 3.4) 1.880 + comment_reader.scanCommentChar(); 1.881 + continue forEachLine; 1.882 + default: 1.883 + comment_reader.scanCommentChar(); 1.884 + } 1.885 + } // rest of line 1.886 + } // forEachLine 1.887 + return; 1.888 + } finally { 1.889 + scanned = true; 1.890 + } 1.891 + } 1.892 + } 1.893 +}