src/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java

Thu, 31 Aug 2017 15:17:03 +0800

author
aoqi
date
Thu, 31 Aug 2017 15:17:03 +0800
changeset 2525
2eb010b6cb22
parent 1679
b402b93cbe38
parent 0
959103a6100f
permissions
-rw-r--r--

merge

aoqi@0 1 /*
aoqi@0 2 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
aoqi@0 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@0 4 *
aoqi@0 5 * This code is free software; you can redistribute it and/or modify it
aoqi@0 6 * under the terms of the GNU General Public License version 2 only, as
aoqi@0 7 * published by the Free Software Foundation. Oracle designates this
aoqi@0 8 * particular file as subject to the "Classpath" exception as provided
aoqi@0 9 * by Oracle in the LICENSE file that accompanied this code.
aoqi@0 10 *
aoqi@0 11 * This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@0 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@0 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@0 14 * version 2 for more details (a copy is included in the LICENSE file that
aoqi@0 15 * accompanied this code).
aoqi@0 16 *
aoqi@0 17 * You should have received a copy of the GNU General Public License version
aoqi@0 18 * 2 along with this work; if not, write to the Free Software Foundation,
aoqi@0 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@0 20 *
aoqi@0 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@0 22 * or visit www.oracle.com if you need additional information or have any
aoqi@0 23 * questions.
aoqi@0 24 */
aoqi@0 25
aoqi@0 26 package com.sun.tools.javac.parser;
aoqi@0 27
aoqi@0 28 import com.sun.tools.javac.code.Source;
aoqi@0 29 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
aoqi@0 30 import com.sun.tools.javac.util.*;
aoqi@0 31
aoqi@0 32 import java.nio.CharBuffer;
aoqi@0 33
aoqi@0 34 import static com.sun.tools.javac.parser.Tokens.*;
aoqi@0 35 import static com.sun.tools.javac.util.LayoutCharacters.*;
aoqi@0 36
aoqi@0 37 /** The lexical analyzer maps an input stream consisting of
aoqi@0 38 * ASCII characters and Unicode escapes into a token sequence.
aoqi@0 39 *
aoqi@0 40 * <p><b>This is NOT part of any supported API.
aoqi@0 41 * If you write code that depends on this, you do so at your own risk.
aoqi@0 42 * This code and its internal interfaces are subject to change or
aoqi@0 43 * deletion without notice.</b>
aoqi@0 44 */
aoqi@0 45 public class JavaTokenizer {
aoqi@0 46
aoqi@0 47 private static final boolean scannerDebug = false;
aoqi@0 48
aoqi@0 49 /** Allow hex floating-point literals.
aoqi@0 50 */
aoqi@0 51 private boolean allowHexFloats;
aoqi@0 52
aoqi@0 53 /** Allow binary literals.
aoqi@0 54 */
aoqi@0 55 private boolean allowBinaryLiterals;
aoqi@0 56
aoqi@0 57 /** Allow underscores in literals.
aoqi@0 58 */
aoqi@0 59 private boolean allowUnderscoresInLiterals;
aoqi@0 60
aoqi@0 61 /** The source language setting.
aoqi@0 62 */
aoqi@0 63 private Source source;
aoqi@0 64
aoqi@0 65 /** The log to be used for error reporting.
aoqi@0 66 */
aoqi@0 67 private final Log log;
aoqi@0 68
aoqi@0 69 /** The token factory. */
aoqi@0 70 private final Tokens tokens;
aoqi@0 71
aoqi@0 72 /** The token kind, set by nextToken().
aoqi@0 73 */
aoqi@0 74 protected TokenKind tk;
aoqi@0 75
aoqi@0 76 /** The token's radix, set by nextToken().
aoqi@0 77 */
aoqi@0 78 protected int radix;
aoqi@0 79
aoqi@0 80 /** The token's name, set by nextToken().
aoqi@0 81 */
aoqi@0 82 protected Name name;
aoqi@0 83
aoqi@0 84 /** The position where a lexical error occurred;
aoqi@0 85 */
aoqi@0 86 protected int errPos = Position.NOPOS;
aoqi@0 87
aoqi@0 88 /** The Unicode reader (low-level stream reader).
aoqi@0 89 */
aoqi@0 90 protected UnicodeReader reader;
aoqi@0 91
aoqi@0 92 protected ScannerFactory fac;
aoqi@0 93
aoqi@0 94 private static final boolean hexFloatsWork = hexFloatsWork();
aoqi@0 95 private static boolean hexFloatsWork() {
aoqi@0 96 try {
aoqi@0 97 Float.valueOf("0x1.0p1");
aoqi@0 98 return true;
aoqi@0 99 } catch (NumberFormatException ex) {
aoqi@0 100 return false;
aoqi@0 101 }
aoqi@0 102 }
aoqi@0 103
aoqi@0 104 /**
aoqi@0 105 * Create a scanner from the input array. This method might
aoqi@0 106 * modify the array. To avoid copying the input array, ensure
aoqi@0 107 * that {@code inputLength < input.length} or
aoqi@0 108 * {@code input[input.length -1]} is a white space character.
aoqi@0 109 *
aoqi@0 110 * @param fac the factory which created this Scanner
aoqi@0 111 * @param buf the input, might be modified
aoqi@0 112 * Must be positive and less than or equal to input.length.
aoqi@0 113 */
aoqi@0 114 protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
aoqi@0 115 this(fac, new UnicodeReader(fac, buf));
aoqi@0 116 }
aoqi@0 117
aoqi@0 118 protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
aoqi@0 119 this(fac, new UnicodeReader(fac, buf, inputLength));
aoqi@0 120 }
aoqi@0 121
aoqi@0 122 protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
aoqi@0 123 this.fac = fac;
aoqi@0 124 this.log = fac.log;
aoqi@0 125 this.tokens = fac.tokens;
aoqi@0 126 this.source = fac.source;
aoqi@0 127 this.reader = reader;
aoqi@0 128 this.allowBinaryLiterals = source.allowBinaryLiterals();
aoqi@0 129 this.allowHexFloats = source.allowHexFloats();
aoqi@0 130 this.allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();
aoqi@0 131 }
aoqi@0 132
aoqi@0 133 /** Report an error at the given position using the provided arguments.
aoqi@0 134 */
aoqi@0 135 protected void lexError(int pos, String key, Object... args) {
aoqi@0 136 log.error(pos, key, args);
aoqi@0 137 tk = TokenKind.ERROR;
aoqi@0 138 errPos = pos;
aoqi@0 139 }
aoqi@0 140
aoqi@0 141 /** Read next character in character or string literal and copy into sbuf.
aoqi@0 142 */
aoqi@0 143 private void scanLitChar(int pos) {
aoqi@0 144 if (reader.ch == '\\') {
aoqi@0 145 if (reader.peekChar() == '\\' && !reader.isUnicode()) {
aoqi@0 146 reader.skipChar();
aoqi@0 147 reader.putChar('\\', true);
aoqi@0 148 } else {
aoqi@0 149 reader.scanChar();
aoqi@0 150 switch (reader.ch) {
aoqi@0 151 case '0': case '1': case '2': case '3':
aoqi@0 152 case '4': case '5': case '6': case '7':
aoqi@0 153 char leadch = reader.ch;
aoqi@0 154 int oct = reader.digit(pos, 8);
aoqi@0 155 reader.scanChar();
aoqi@0 156 if ('0' <= reader.ch && reader.ch <= '7') {
aoqi@0 157 oct = oct * 8 + reader.digit(pos, 8);
aoqi@0 158 reader.scanChar();
aoqi@0 159 if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
aoqi@0 160 oct = oct * 8 + reader.digit(pos, 8);
aoqi@0 161 reader.scanChar();
aoqi@0 162 }
aoqi@0 163 }
aoqi@0 164 reader.putChar((char)oct);
aoqi@0 165 break;
aoqi@0 166 case 'b':
aoqi@0 167 reader.putChar('\b', true); break;
aoqi@0 168 case 't':
aoqi@0 169 reader.putChar('\t', true); break;
aoqi@0 170 case 'n':
aoqi@0 171 reader.putChar('\n', true); break;
aoqi@0 172 case 'f':
aoqi@0 173 reader.putChar('\f', true); break;
aoqi@0 174 case 'r':
aoqi@0 175 reader.putChar('\r', true); break;
aoqi@0 176 case '\'':
aoqi@0 177 reader.putChar('\'', true); break;
aoqi@0 178 case '\"':
aoqi@0 179 reader.putChar('\"', true); break;
aoqi@0 180 case '\\':
aoqi@0 181 reader.putChar('\\', true); break;
aoqi@0 182 default:
aoqi@0 183 lexError(reader.bp, "illegal.esc.char");
aoqi@0 184 }
aoqi@0 185 }
aoqi@0 186 } else if (reader.bp != reader.buflen) {
aoqi@0 187 reader.putChar(true);
aoqi@0 188 }
aoqi@0 189 }
aoqi@0 190
aoqi@0 191 private void scanDigits(int pos, int digitRadix) {
aoqi@0 192 char saveCh;
aoqi@0 193 int savePos;
aoqi@0 194 do {
aoqi@0 195 if (reader.ch != '_') {
aoqi@0 196 reader.putChar(false);
aoqi@0 197 } else {
aoqi@0 198 if (!allowUnderscoresInLiterals) {
aoqi@0 199 lexError(pos, "unsupported.underscore.lit", source.name);
aoqi@0 200 allowUnderscoresInLiterals = true;
aoqi@0 201 }
aoqi@0 202 }
aoqi@0 203 saveCh = reader.ch;
aoqi@0 204 savePos = reader.bp;
aoqi@0 205 reader.scanChar();
aoqi@0 206 } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
aoqi@0 207 if (saveCh == '_')
aoqi@0 208 lexError(savePos, "illegal.underscore");
aoqi@0 209 }
aoqi@0 210
aoqi@0 211 /** Read fractional part of hexadecimal floating point number.
aoqi@0 212 */
aoqi@0 213 private void scanHexExponentAndSuffix(int pos) {
aoqi@0 214 if (reader.ch == 'p' || reader.ch == 'P') {
aoqi@0 215 reader.putChar(true);
aoqi@0 216 skipIllegalUnderscores();
aoqi@0 217 if (reader.ch == '+' || reader.ch == '-') {
aoqi@0 218 reader.putChar(true);
aoqi@0 219 }
aoqi@0 220 skipIllegalUnderscores();
aoqi@0 221 if ('0' <= reader.ch && reader.ch <= '9') {
aoqi@0 222 scanDigits(pos, 10);
aoqi@0 223 if (!allowHexFloats) {
aoqi@0 224 lexError(pos, "unsupported.fp.lit", source.name);
aoqi@0 225 allowHexFloats = true;
aoqi@0 226 }
aoqi@0 227 else if (!hexFloatsWork)
aoqi@0 228 lexError(pos, "unsupported.cross.fp.lit");
aoqi@0 229 } else
aoqi@0 230 lexError(pos, "malformed.fp.lit");
aoqi@0 231 } else {
aoqi@0 232 lexError(pos, "malformed.fp.lit");
aoqi@0 233 }
aoqi@0 234 if (reader.ch == 'f' || reader.ch == 'F') {
aoqi@0 235 reader.putChar(true);
aoqi@0 236 tk = TokenKind.FLOATLITERAL;
aoqi@0 237 radix = 16;
aoqi@0 238 } else {
aoqi@0 239 if (reader.ch == 'd' || reader.ch == 'D') {
aoqi@0 240 reader.putChar(true);
aoqi@0 241 }
aoqi@0 242 tk = TokenKind.DOUBLELITERAL;
aoqi@0 243 radix = 16;
aoqi@0 244 }
aoqi@0 245 }
aoqi@0 246
aoqi@0 247 /** Read fractional part of floating point number.
aoqi@0 248 */
aoqi@0 249 private void scanFraction(int pos) {
aoqi@0 250 skipIllegalUnderscores();
aoqi@0 251 if ('0' <= reader.ch && reader.ch <= '9') {
aoqi@0 252 scanDigits(pos, 10);
aoqi@0 253 }
aoqi@0 254 int sp1 = reader.sp;
aoqi@0 255 if (reader.ch == 'e' || reader.ch == 'E') {
aoqi@0 256 reader.putChar(true);
aoqi@0 257 skipIllegalUnderscores();
aoqi@0 258 if (reader.ch == '+' || reader.ch == '-') {
aoqi@0 259 reader.putChar(true);
aoqi@0 260 }
aoqi@0 261 skipIllegalUnderscores();
aoqi@0 262 if ('0' <= reader.ch && reader.ch <= '9') {
aoqi@0 263 scanDigits(pos, 10);
aoqi@0 264 return;
aoqi@0 265 }
aoqi@0 266 lexError(pos, "malformed.fp.lit");
aoqi@0 267 reader.sp = sp1;
aoqi@0 268 }
aoqi@0 269 }
aoqi@0 270
aoqi@0 271 /** Read fractional part and 'd' or 'f' suffix of floating point number.
aoqi@0 272 */
aoqi@0 273 private void scanFractionAndSuffix(int pos) {
aoqi@0 274 radix = 10;
aoqi@0 275 scanFraction(pos);
aoqi@0 276 if (reader.ch == 'f' || reader.ch == 'F') {
aoqi@0 277 reader.putChar(true);
aoqi@0 278 tk = TokenKind.FLOATLITERAL;
aoqi@0 279 } else {
aoqi@0 280 if (reader.ch == 'd' || reader.ch == 'D') {
aoqi@0 281 reader.putChar(true);
aoqi@0 282 }
aoqi@0 283 tk = TokenKind.DOUBLELITERAL;
aoqi@0 284 }
aoqi@0 285 }
aoqi@0 286
aoqi@0 287 /** Read fractional part and 'd' or 'f' suffix of floating point number.
aoqi@0 288 */
aoqi@0 289 private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
aoqi@0 290 radix = 16;
aoqi@0 291 Assert.check(reader.ch == '.');
aoqi@0 292 reader.putChar(true);
aoqi@0 293 skipIllegalUnderscores();
aoqi@0 294 if (reader.digit(pos, 16) >= 0) {
aoqi@0 295 seendigit = true;
aoqi@0 296 scanDigits(pos, 16);
aoqi@0 297 }
aoqi@0 298 if (!seendigit)
aoqi@0 299 lexError(pos, "invalid.hex.number");
aoqi@0 300 else
aoqi@0 301 scanHexExponentAndSuffix(pos);
aoqi@0 302 }
aoqi@0 303
aoqi@0 304 private void skipIllegalUnderscores() {
aoqi@0 305 if (reader.ch == '_') {
aoqi@0 306 lexError(reader.bp, "illegal.underscore");
aoqi@0 307 while (reader.ch == '_')
aoqi@0 308 reader.scanChar();
aoqi@0 309 }
aoqi@0 310 }
aoqi@0 311
aoqi@0 312 /** Read a number.
aoqi@0 313 * @param radix The radix of the number; one of 2, j8, 10, 16.
aoqi@0 314 */
aoqi@0 315 private void scanNumber(int pos, int radix) {
aoqi@0 316 // for octal, allow base-10 digit in case it's a float literal
aoqi@0 317 this.radix = radix;
aoqi@0 318 int digitRadix = (radix == 8 ? 10 : radix);
aoqi@0 319 boolean seendigit = false;
aoqi@0 320 if (reader.digit(pos, digitRadix) >= 0) {
aoqi@0 321 seendigit = true;
aoqi@0 322 scanDigits(pos, digitRadix);
aoqi@0 323 }
aoqi@0 324 if (radix == 16 && reader.ch == '.') {
aoqi@0 325 scanHexFractionAndSuffix(pos, seendigit);
aoqi@0 326 } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
aoqi@0 327 scanHexExponentAndSuffix(pos);
aoqi@0 328 } else if (digitRadix == 10 && reader.ch == '.') {
aoqi@0 329 reader.putChar(true);
aoqi@0 330 scanFractionAndSuffix(pos);
aoqi@0 331 } else if (digitRadix == 10 &&
aoqi@0 332 (reader.ch == 'e' || reader.ch == 'E' ||
aoqi@0 333 reader.ch == 'f' || reader.ch == 'F' ||
aoqi@0 334 reader.ch == 'd' || reader.ch == 'D')) {
aoqi@0 335 scanFractionAndSuffix(pos);
aoqi@0 336 } else {
aoqi@0 337 if (reader.ch == 'l' || reader.ch == 'L') {
aoqi@0 338 reader.scanChar();
aoqi@0 339 tk = TokenKind.LONGLITERAL;
aoqi@0 340 } else {
aoqi@0 341 tk = TokenKind.INTLITERAL;
aoqi@0 342 }
aoqi@0 343 }
aoqi@0 344 }
aoqi@0 345
aoqi@0 346 /** Read an identifier.
aoqi@0 347 */
aoqi@0 348 private void scanIdent() {
aoqi@0 349 boolean isJavaIdentifierPart;
aoqi@0 350 char high;
aoqi@0 351 reader.putChar(true);
aoqi@0 352 do {
aoqi@0 353 switch (reader.ch) {
aoqi@0 354 case 'A': case 'B': case 'C': case 'D': case 'E':
aoqi@0 355 case 'F': case 'G': case 'H': case 'I': case 'J':
aoqi@0 356 case 'K': case 'L': case 'M': case 'N': case 'O':
aoqi@0 357 case 'P': case 'Q': case 'R': case 'S': case 'T':
aoqi@0 358 case 'U': case 'V': case 'W': case 'X': case 'Y':
aoqi@0 359 case 'Z':
aoqi@0 360 case 'a': case 'b': case 'c': case 'd': case 'e':
aoqi@0 361 case 'f': case 'g': case 'h': case 'i': case 'j':
aoqi@0 362 case 'k': case 'l': case 'm': case 'n': case 'o':
aoqi@0 363 case 'p': case 'q': case 'r': case 's': case 't':
aoqi@0 364 case 'u': case 'v': case 'w': case 'x': case 'y':
aoqi@0 365 case 'z':
aoqi@0 366 case '$': case '_':
aoqi@0 367 case '0': case '1': case '2': case '3': case '4':
aoqi@0 368 case '5': case '6': case '7': case '8': case '9':
aoqi@0 369 break;
aoqi@0 370 case '\u0000': case '\u0001': case '\u0002': case '\u0003':
aoqi@0 371 case '\u0004': case '\u0005': case '\u0006': case '\u0007':
aoqi@0 372 case '\u0008': case '\u000E': case '\u000F': case '\u0010':
aoqi@0 373 case '\u0011': case '\u0012': case '\u0013': case '\u0014':
aoqi@0 374 case '\u0015': case '\u0016': case '\u0017':
aoqi@0 375 case '\u0018': case '\u0019': case '\u001B':
aoqi@0 376 case '\u007F':
aoqi@0 377 reader.scanChar();
aoqi@0 378 continue;
aoqi@0 379 case '\u001A': // EOI is also a legal identifier part
aoqi@0 380 if (reader.bp >= reader.buflen) {
aoqi@0 381 name = reader.name();
aoqi@0 382 tk = tokens.lookupKind(name);
aoqi@0 383 return;
aoqi@0 384 }
aoqi@0 385 reader.scanChar();
aoqi@0 386 continue;
aoqi@0 387 default:
aoqi@0 388 if (reader.ch < '\u0080') {
aoqi@0 389 // all ASCII range chars already handled, above
aoqi@0 390 isJavaIdentifierPart = false;
aoqi@0 391 } else {
aoqi@0 392 if (Character.isIdentifierIgnorable(reader.ch)) {
aoqi@0 393 reader.scanChar();
aoqi@0 394 continue;
aoqi@0 395 } else {
aoqi@0 396 high = reader.scanSurrogates();
aoqi@0 397 if (high != 0) {
aoqi@0 398 reader.putChar(high);
aoqi@0 399 isJavaIdentifierPart = Character.isJavaIdentifierPart(
aoqi@0 400 Character.toCodePoint(high, reader.ch));
aoqi@0 401 } else {
aoqi@0 402 isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
aoqi@0 403 }
aoqi@0 404 }
aoqi@0 405 }
aoqi@0 406 if (!isJavaIdentifierPart) {
aoqi@0 407 name = reader.name();
aoqi@0 408 tk = tokens.lookupKind(name);
aoqi@0 409 return;
aoqi@0 410 }
aoqi@0 411 }
aoqi@0 412 reader.putChar(true);
aoqi@0 413 } while (true);
aoqi@0 414 }
aoqi@0 415
aoqi@0 416 /** Return true if reader.ch can be part of an operator.
aoqi@0 417 */
aoqi@0 418 private boolean isSpecial(char ch) {
aoqi@0 419 switch (ch) {
aoqi@0 420 case '!': case '%': case '&': case '*': case '?':
aoqi@0 421 case '+': case '-': case ':': case '<': case '=':
aoqi@0 422 case '>': case '^': case '|': case '~':
aoqi@0 423 case '@':
aoqi@0 424 return true;
aoqi@0 425 default:
aoqi@0 426 return false;
aoqi@0 427 }
aoqi@0 428 }
aoqi@0 429
aoqi@0 430 /** Read longest possible sequence of special characters and convert
aoqi@0 431 * to token.
aoqi@0 432 */
aoqi@0 433 private void scanOperator() {
aoqi@0 434 while (true) {
aoqi@0 435 reader.putChar(false);
aoqi@0 436 Name newname = reader.name();
aoqi@0 437 TokenKind tk1 = tokens.lookupKind(newname);
aoqi@0 438 if (tk1 == TokenKind.IDENTIFIER) {
aoqi@0 439 reader.sp--;
aoqi@0 440 break;
aoqi@0 441 }
aoqi@0 442 tk = tk1;
aoqi@0 443 reader.scanChar();
aoqi@0 444 if (!isSpecial(reader.ch)) break;
aoqi@0 445 }
aoqi@0 446 }
aoqi@0 447
aoqi@0 448 /** Read token.
aoqi@0 449 */
aoqi@0 450 public Token readToken() {
aoqi@0 451
aoqi@0 452 reader.sp = 0;
aoqi@0 453 name = null;
aoqi@0 454 radix = 0;
aoqi@0 455
aoqi@0 456 int pos = 0;
aoqi@0 457 int endPos = 0;
aoqi@0 458 List<Comment> comments = null;
aoqi@0 459
aoqi@0 460 try {
aoqi@0 461 loop: while (true) {
aoqi@0 462 pos = reader.bp;
aoqi@0 463 switch (reader.ch) {
aoqi@0 464 case ' ': // (Spec 3.6)
aoqi@0 465 case '\t': // (Spec 3.6)
aoqi@0 466 case FF: // (Spec 3.6)
aoqi@0 467 do {
aoqi@0 468 reader.scanChar();
aoqi@0 469 } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
aoqi@0 470 processWhiteSpace(pos, reader.bp);
aoqi@0 471 break;
aoqi@0 472 case LF: // (Spec 3.4)
aoqi@0 473 reader.scanChar();
aoqi@0 474 processLineTerminator(pos, reader.bp);
aoqi@0 475 break;
aoqi@0 476 case CR: // (Spec 3.4)
aoqi@0 477 reader.scanChar();
aoqi@0 478 if (reader.ch == LF) {
aoqi@0 479 reader.scanChar();
aoqi@0 480 }
aoqi@0 481 processLineTerminator(pos, reader.bp);
aoqi@0 482 break;
aoqi@0 483 case 'A': case 'B': case 'C': case 'D': case 'E':
aoqi@0 484 case 'F': case 'G': case 'H': case 'I': case 'J':
aoqi@0 485 case 'K': case 'L': case 'M': case 'N': case 'O':
aoqi@0 486 case 'P': case 'Q': case 'R': case 'S': case 'T':
aoqi@0 487 case 'U': case 'V': case 'W': case 'X': case 'Y':
aoqi@0 488 case 'Z':
aoqi@0 489 case 'a': case 'b': case 'c': case 'd': case 'e':
aoqi@0 490 case 'f': case 'g': case 'h': case 'i': case 'j':
aoqi@0 491 case 'k': case 'l': case 'm': case 'n': case 'o':
aoqi@0 492 case 'p': case 'q': case 'r': case 's': case 't':
aoqi@0 493 case 'u': case 'v': case 'w': case 'x': case 'y':
aoqi@0 494 case 'z':
aoqi@0 495 case '$': case '_':
aoqi@0 496 scanIdent();
aoqi@0 497 break loop;
aoqi@0 498 case '0':
aoqi@0 499 reader.scanChar();
aoqi@0 500 if (reader.ch == 'x' || reader.ch == 'X') {
aoqi@0 501 reader.scanChar();
aoqi@0 502 skipIllegalUnderscores();
aoqi@0 503 if (reader.ch == '.') {
aoqi@0 504 scanHexFractionAndSuffix(pos, false);
aoqi@0 505 } else if (reader.digit(pos, 16) < 0) {
aoqi@0 506 lexError(pos, "invalid.hex.number");
aoqi@0 507 } else {
aoqi@0 508 scanNumber(pos, 16);
aoqi@0 509 }
aoqi@0 510 } else if (reader.ch == 'b' || reader.ch == 'B') {
aoqi@0 511 if (!allowBinaryLiterals) {
aoqi@0 512 lexError(pos, "unsupported.binary.lit", source.name);
aoqi@0 513 allowBinaryLiterals = true;
aoqi@0 514 }
aoqi@0 515 reader.scanChar();
aoqi@0 516 skipIllegalUnderscores();
aoqi@0 517 if (reader.digit(pos, 2) < 0) {
aoqi@0 518 lexError(pos, "invalid.binary.number");
aoqi@0 519 } else {
aoqi@0 520 scanNumber(pos, 2);
aoqi@0 521 }
aoqi@0 522 } else {
aoqi@0 523 reader.putChar('0');
aoqi@0 524 if (reader.ch == '_') {
aoqi@0 525 int savePos = reader.bp;
aoqi@0 526 do {
aoqi@0 527 reader.scanChar();
aoqi@0 528 } while (reader.ch == '_');
aoqi@0 529 if (reader.digit(pos, 10) < 0) {
aoqi@0 530 lexError(savePos, "illegal.underscore");
aoqi@0 531 }
aoqi@0 532 }
aoqi@0 533 scanNumber(pos, 8);
aoqi@0 534 }
aoqi@0 535 break loop;
aoqi@0 536 case '1': case '2': case '3': case '4':
aoqi@0 537 case '5': case '6': case '7': case '8': case '9':
aoqi@0 538 scanNumber(pos, 10);
aoqi@0 539 break loop;
aoqi@0 540 case '.':
aoqi@0 541 reader.scanChar();
aoqi@0 542 if ('0' <= reader.ch && reader.ch <= '9') {
aoqi@0 543 reader.putChar('.');
aoqi@0 544 scanFractionAndSuffix(pos);
aoqi@0 545 } else if (reader.ch == '.') {
aoqi@0 546 int savePos = reader.bp;
aoqi@0 547 reader.putChar('.'); reader.putChar('.', true);
aoqi@0 548 if (reader.ch == '.') {
aoqi@0 549 reader.scanChar();
aoqi@0 550 reader.putChar('.');
aoqi@0 551 tk = TokenKind.ELLIPSIS;
aoqi@0 552 } else {
aoqi@0 553 lexError(savePos, "illegal.dot");
aoqi@0 554 }
aoqi@0 555 } else {
aoqi@0 556 tk = TokenKind.DOT;
aoqi@0 557 }
aoqi@0 558 break loop;
aoqi@0 559 case ',':
aoqi@0 560 reader.scanChar(); tk = TokenKind.COMMA; break loop;
aoqi@0 561 case ';':
aoqi@0 562 reader.scanChar(); tk = TokenKind.SEMI; break loop;
aoqi@0 563 case '(':
aoqi@0 564 reader.scanChar(); tk = TokenKind.LPAREN; break loop;
aoqi@0 565 case ')':
aoqi@0 566 reader.scanChar(); tk = TokenKind.RPAREN; break loop;
aoqi@0 567 case '[':
aoqi@0 568 reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
aoqi@0 569 case ']':
aoqi@0 570 reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
aoqi@0 571 case '{':
aoqi@0 572 reader.scanChar(); tk = TokenKind.LBRACE; break loop;
aoqi@0 573 case '}':
aoqi@0 574 reader.scanChar(); tk = TokenKind.RBRACE; break loop;
aoqi@0 575 case '/':
aoqi@0 576 reader.scanChar();
aoqi@0 577 if (reader.ch == '/') {
aoqi@0 578 do {
aoqi@0 579 reader.scanCommentChar();
aoqi@0 580 } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
aoqi@0 581 if (reader.bp < reader.buflen) {
aoqi@0 582 comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
aoqi@0 583 }
aoqi@0 584 break;
aoqi@0 585 } else if (reader.ch == '*') {
aoqi@0 586 boolean isEmpty = false;
aoqi@0 587 reader.scanChar();
aoqi@0 588 CommentStyle style;
aoqi@0 589 if (reader.ch == '*') {
aoqi@0 590 style = CommentStyle.JAVADOC;
aoqi@0 591 reader.scanCommentChar();
aoqi@0 592 if (reader.ch == '/') {
aoqi@0 593 isEmpty = true;
aoqi@0 594 }
aoqi@0 595 } else {
aoqi@0 596 style = CommentStyle.BLOCK;
aoqi@0 597 }
aoqi@0 598 while (!isEmpty && reader.bp < reader.buflen) {
aoqi@0 599 if (reader.ch == '*') {
aoqi@0 600 reader.scanChar();
aoqi@0 601 if (reader.ch == '/') break;
aoqi@0 602 } else {
aoqi@0 603 reader.scanCommentChar();
aoqi@0 604 }
aoqi@0 605 }
aoqi@0 606 if (reader.ch == '/') {
aoqi@0 607 reader.scanChar();
aoqi@0 608 comments = addComment(comments, processComment(pos, reader.bp, style));
aoqi@0 609 break;
aoqi@0 610 } else {
aoqi@0 611 lexError(pos, "unclosed.comment");
aoqi@0 612 break loop;
aoqi@0 613 }
aoqi@0 614 } else if (reader.ch == '=') {
aoqi@0 615 tk = TokenKind.SLASHEQ;
aoqi@0 616 reader.scanChar();
aoqi@0 617 } else {
aoqi@0 618 tk = TokenKind.SLASH;
aoqi@0 619 }
aoqi@0 620 break loop;
aoqi@0 621 case '\'':
aoqi@0 622 reader.scanChar();
aoqi@0 623 if (reader.ch == '\'') {
aoqi@0 624 lexError(pos, "empty.char.lit");
aoqi@0 625 } else {
aoqi@0 626 if (reader.ch == CR || reader.ch == LF)
aoqi@0 627 lexError(pos, "illegal.line.end.in.char.lit");
aoqi@0 628 scanLitChar(pos);
aoqi@0 629 char ch2 = reader.ch;
aoqi@0 630 if (reader.ch == '\'') {
aoqi@0 631 reader.scanChar();
aoqi@0 632 tk = TokenKind.CHARLITERAL;
aoqi@0 633 } else {
aoqi@0 634 lexError(pos, "unclosed.char.lit");
aoqi@0 635 }
aoqi@0 636 }
aoqi@0 637 break loop;
aoqi@0 638 case '\"':
aoqi@0 639 reader.scanChar();
aoqi@0 640 while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen)
aoqi@0 641 scanLitChar(pos);
aoqi@0 642 if (reader.ch == '\"') {
aoqi@0 643 tk = TokenKind.STRINGLITERAL;
aoqi@0 644 reader.scanChar();
aoqi@0 645 } else {
aoqi@0 646 lexError(pos, "unclosed.str.lit");
aoqi@0 647 }
aoqi@0 648 break loop;
aoqi@0 649 default:
aoqi@0 650 if (isSpecial(reader.ch)) {
aoqi@0 651 scanOperator();
aoqi@0 652 } else {
aoqi@0 653 boolean isJavaIdentifierStart;
aoqi@0 654 if (reader.ch < '\u0080') {
aoqi@0 655 // all ASCII range chars already handled, above
aoqi@0 656 isJavaIdentifierStart = false;
aoqi@0 657 } else {
aoqi@0 658 char high = reader.scanSurrogates();
aoqi@0 659 if (high != 0) {
aoqi@0 660 reader.putChar(high);
aoqi@0 661
aoqi@0 662 isJavaIdentifierStart = Character.isJavaIdentifierStart(
aoqi@0 663 Character.toCodePoint(high, reader.ch));
aoqi@0 664 } else {
aoqi@0 665 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
aoqi@0 666 }
aoqi@0 667 }
aoqi@0 668 if (isJavaIdentifierStart) {
aoqi@0 669 scanIdent();
aoqi@0 670 } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
aoqi@0 671 tk = TokenKind.EOF;
aoqi@0 672 pos = reader.buflen;
aoqi@0 673 } else {
aoqi@0 674 String arg = (32 < reader.ch && reader.ch < 127) ?
aoqi@0 675 String.format("%s", reader.ch) :
aoqi@0 676 String.format("\\u%04x", (int)reader.ch);
aoqi@0 677 lexError(pos, "illegal.char", arg);
aoqi@0 678 reader.scanChar();
aoqi@0 679 }
aoqi@0 680 }
aoqi@0 681 break loop;
aoqi@0 682 }
aoqi@0 683 }
aoqi@0 684 endPos = reader.bp;
aoqi@0 685 switch (tk.tag) {
aoqi@0 686 case DEFAULT: return new Token(tk, pos, endPos, comments);
aoqi@0 687 case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
aoqi@0 688 case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments);
aoqi@0 689 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
aoqi@0 690 default: throw new AssertionError();
aoqi@0 691 }
aoqi@0 692 }
aoqi@0 693 finally {
aoqi@0 694 if (scannerDebug) {
aoqi@0 695 System.out.println("nextToken(" + pos
aoqi@0 696 + "," + endPos + ")=|" +
aoqi@0 697 new String(reader.getRawCharacters(pos, endPos))
aoqi@0 698 + "|");
aoqi@0 699 }
aoqi@0 700 }
aoqi@0 701 }
aoqi@0 702 //where
aoqi@0 703 List<Comment> addComment(List<Comment> comments, Comment comment) {
aoqi@0 704 return comments == null ?
aoqi@0 705 List.of(comment) :
aoqi@0 706 comments.prepend(comment);
aoqi@0 707 }
aoqi@0 708
aoqi@0 709 /** Return the position where a lexical error occurred;
aoqi@0 710 */
aoqi@0 711 public int errPos() {
aoqi@0 712 return errPos;
aoqi@0 713 }
aoqi@0 714
aoqi@0 715 /** Set the position where a lexical error occurred;
aoqi@0 716 */
aoqi@0 717 public void errPos(int pos) {
aoqi@0 718 errPos = pos;
aoqi@0 719 }
aoqi@0 720
aoqi@0 721 /**
aoqi@0 722 * Called when a complete comment has been scanned. pos and endPos
aoqi@0 723 * will mark the comment boundary.
aoqi@0 724 */
aoqi@0 725 protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
aoqi@0 726 if (scannerDebug)
aoqi@0 727 System.out.println("processComment(" + pos
aoqi@0 728 + "," + endPos + "," + style + ")=|"
aoqi@0 729 + new String(reader.getRawCharacters(pos, endPos))
aoqi@0 730 + "|");
aoqi@0 731 char[] buf = reader.getRawCharacters(pos, endPos);
aoqi@0 732 return new BasicComment<UnicodeReader>(new UnicodeReader(fac, buf, buf.length), style);
aoqi@0 733 }
aoqi@0 734
aoqi@0 735 /**
aoqi@0 736 * Called when a complete whitespace run has been scanned. pos and endPos
aoqi@0 737 * will mark the whitespace boundary.
aoqi@0 738 */
aoqi@0 739 protected void processWhiteSpace(int pos, int endPos) {
aoqi@0 740 if (scannerDebug)
aoqi@0 741 System.out.println("processWhitespace(" + pos
aoqi@0 742 + "," + endPos + ")=|" +
aoqi@0 743 new String(reader.getRawCharacters(pos, endPos))
aoqi@0 744 + "|");
aoqi@0 745 }
aoqi@0 746
aoqi@0 747 /**
aoqi@0 748 * Called when a line terminator has been processed.
aoqi@0 749 */
aoqi@0 750 protected void processLineTerminator(int pos, int endPos) {
aoqi@0 751 if (scannerDebug)
aoqi@0 752 System.out.println("processTerminator(" + pos
aoqi@0 753 + "," + endPos + ")=|" +
aoqi@0 754 new String(reader.getRawCharacters(pos, endPos))
aoqi@0 755 + "|");
aoqi@0 756 }
aoqi@0 757
aoqi@0 758 /** Build a map for translating between line numbers and
aoqi@0 759 * positions in the input.
aoqi@0 760 *
aoqi@0 761 * @return a LineMap */
aoqi@0 762 public Position.LineMap getLineMap() {
aoqi@0 763 return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
aoqi@0 764 }
aoqi@0 765
aoqi@0 766
aoqi@0 767 /**
aoqi@0 768 * Scan a documentation comment; determine if a deprecated tag is present.
aoqi@0 769 * Called once the initial /, * have been skipped, positioned at the second *
aoqi@0 770 * (which is treated as the beginning of the first line).
aoqi@0 771 * Stops positioned at the closing '/'.
aoqi@0 772 */
aoqi@0 773 protected static class BasicComment<U extends UnicodeReader> implements Comment {
aoqi@0 774
aoqi@0 775 CommentStyle cs;
aoqi@0 776 U comment_reader;
aoqi@0 777
aoqi@0 778 protected boolean deprecatedFlag = false;
aoqi@0 779 protected boolean scanned = false;
aoqi@0 780
aoqi@0 781 protected BasicComment(U comment_reader, CommentStyle cs) {
aoqi@0 782 this.comment_reader = comment_reader;
aoqi@0 783 this.cs = cs;
aoqi@0 784 }
aoqi@0 785
aoqi@0 786 public String getText() {
aoqi@0 787 return null;
aoqi@0 788 }
aoqi@0 789
aoqi@0 790 public int getSourcePos(int pos) {
aoqi@0 791 return -1;
aoqi@0 792 }
aoqi@0 793
aoqi@0 794 public CommentStyle getStyle() {
aoqi@0 795 return cs;
aoqi@0 796 }
aoqi@0 797
aoqi@0 798 public boolean isDeprecated() {
aoqi@0 799 if (!scanned && cs == CommentStyle.JAVADOC) {
aoqi@0 800 scanDocComment();
aoqi@0 801 }
aoqi@0 802 return deprecatedFlag;
aoqi@0 803 }
aoqi@0 804
aoqi@0 805 @SuppressWarnings("fallthrough")
aoqi@0 806 protected void scanDocComment() {
aoqi@0 807 try {
aoqi@0 808 boolean deprecatedPrefix = false;
aoqi@0 809
aoqi@0 810 comment_reader.bp += 3; // '/**'
aoqi@0 811 comment_reader.ch = comment_reader.buf[comment_reader.bp];
aoqi@0 812
aoqi@0 813 forEachLine:
aoqi@0 814 while (comment_reader.bp < comment_reader.buflen) {
aoqi@0 815
aoqi@0 816 // Skip optional WhiteSpace at beginning of line
aoqi@0 817 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
aoqi@0 818 comment_reader.scanCommentChar();
aoqi@0 819 }
aoqi@0 820
aoqi@0 821 // Skip optional consecutive Stars
aoqi@0 822 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
aoqi@0 823 comment_reader.scanCommentChar();
aoqi@0 824 if (comment_reader.ch == '/') {
aoqi@0 825 return;
aoqi@0 826 }
aoqi@0 827 }
aoqi@0 828
aoqi@0 829 // Skip optional WhiteSpace after Stars
aoqi@0 830 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
aoqi@0 831 comment_reader.scanCommentChar();
aoqi@0 832 }
aoqi@0 833
aoqi@0 834 deprecatedPrefix = false;
aoqi@0 835 // At beginning of line in the JavaDoc sense.
aoqi@0 836 if (!deprecatedFlag) {
aoqi@0 837 String deprecated = "@deprecated";
aoqi@0 838 int i = 0;
aoqi@0 839 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
aoqi@0 840 comment_reader.scanCommentChar();
aoqi@0 841 i++;
aoqi@0 842 if (i == deprecated.length()) {
aoqi@0 843 deprecatedPrefix = true;
aoqi@0 844 break;
aoqi@0 845 }
aoqi@0 846 }
aoqi@0 847 }
aoqi@0 848
aoqi@0 849 if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
aoqi@0 850 if (Character.isWhitespace(comment_reader.ch)) {
aoqi@0 851 deprecatedFlag = true;
aoqi@0 852 } else if (comment_reader.ch == '*') {
aoqi@0 853 comment_reader.scanCommentChar();
aoqi@0 854 if (comment_reader.ch == '/') {
aoqi@0 855 deprecatedFlag = true;
aoqi@0 856 return;
aoqi@0 857 }
aoqi@0 858 }
aoqi@0 859 }
aoqi@0 860
aoqi@0 861 // Skip rest of line
aoqi@0 862 while (comment_reader.bp < comment_reader.buflen) {
aoqi@0 863 switch (comment_reader.ch) {
aoqi@0 864 case '*':
aoqi@0 865 comment_reader.scanCommentChar();
aoqi@0 866 if (comment_reader.ch == '/') {
aoqi@0 867 return;
aoqi@0 868 }
aoqi@0 869 break;
aoqi@0 870 case CR: // (Spec 3.4)
aoqi@0 871 comment_reader.scanCommentChar();
aoqi@0 872 if (comment_reader.ch != LF) {
aoqi@0 873 continue forEachLine;
aoqi@0 874 }
aoqi@0 875 /* fall through to LF case */
aoqi@0 876 case LF: // (Spec 3.4)
aoqi@0 877 comment_reader.scanCommentChar();
aoqi@0 878 continue forEachLine;
aoqi@0 879 default:
aoqi@0 880 comment_reader.scanCommentChar();
aoqi@0 881 }
aoqi@0 882 } // rest of line
aoqi@0 883 } // forEachLine
aoqi@0 884 return;
aoqi@0 885 } finally {
aoqi@0 886 scanned = true;
aoqi@0 887 }
aoqi@0 888 }
aoqi@0 889 }
aoqi@0 890 }

mercurial