src/share/classes/com/sun/tools/javac/parser/Scanner.java

Mon, 14 Mar 2011 11:48:41 -0700

author
jjg
date
Mon, 14 Mar 2011 11:48:41 -0700
changeset 930
cb119107aeea
parent 870
3ce4e1a07e92
child 1074
04f983e3e825
permissions
-rw-r--r--

7026509: Cannot use JavaCompiler to create multiple CompilationTasks for partial compilations
Reviewed-by: mcimadamore

duke@1 1 /*
jjg@816 2 * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
duke@1 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
duke@1 4 *
duke@1 5 * This code is free software; you can redistribute it and/or modify it
duke@1 6 * under the terms of the GNU General Public License version 2 only, as
ohair@554 7 * published by the Free Software Foundation. Oracle designates this
duke@1 8 * particular file as subject to the "Classpath" exception as provided
ohair@554 9 * by Oracle in the LICENSE file that accompanied this code.
duke@1 10 *
duke@1 11 * This code is distributed in the hope that it will be useful, but WITHOUT
duke@1 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
duke@1 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
duke@1 14 * version 2 for more details (a copy is included in the LICENSE file that
duke@1 15 * accompanied this code).
duke@1 16 *
duke@1 17 * You should have received a copy of the GNU General Public License version
duke@1 18 * 2 along with this work; if not, write to the Free Software Foundation,
duke@1 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
duke@1 20 *
ohair@554 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
ohair@554 22 * or visit www.oracle.com if you need additional information or have any
ohair@554 23 * questions.
duke@1 24 */
duke@1 25
duke@1 26 package com.sun.tools.javac.parser;
duke@1 27
duke@1 28 import java.nio.*;
duke@1 29
jjg@50 30 import com.sun.tools.javac.code.Source;
jjg@50 31 import com.sun.tools.javac.file.JavacFileManager;
duke@1 32 import com.sun.tools.javac.util.*;
duke@1 33
duke@1 34
duke@1 35 import static com.sun.tools.javac.parser.Token.*;
duke@1 36 import static com.sun.tools.javac.util.LayoutCharacters.*;
duke@1 37
duke@1 38 /** The lexical analyzer maps an input stream consisting of
duke@1 39 * ASCII characters and Unicode escapes into a token sequence.
duke@1 40 *
jjg@581 41 * <p><b>This is NOT part of any supported API.
jjg@581 42 * If you write code that depends on this, you do so at your own risk.
duke@1 43 * This code and its internal interfaces are subject to change or
duke@1 44 * deletion without notice.</b>
duke@1 45 */
duke@1 46 public class Scanner implements Lexer {
duke@1 47
duke@1 48 private static boolean scannerDebug = false;
duke@1 49
duke@1 50 /* Output variables; set by nextToken():
duke@1 51 */
duke@1 52
duke@1 53 /** The token, set by nextToken().
duke@1 54 */
duke@1 55 private Token token;
duke@1 56
duke@1 57 /** Allow hex floating-point literals.
duke@1 58 */
duke@1 59 private boolean allowHexFloats;
duke@1 60
jjg@409 61 /** Allow binary literals.
jjg@409 62 */
jjg@409 63 private boolean allowBinaryLiterals;
jjg@409 64
jjg@409 65 /** Allow underscores in literals.
jjg@409 66 */
jjg@409 67 private boolean allowUnderscoresInLiterals;
jjg@409 68
jjg@409 69 /** The source language setting.
jjg@409 70 */
jjg@409 71 private Source source;
jjg@409 72
duke@1 73 /** The token's position, 0-based offset from beginning of text.
duke@1 74 */
duke@1 75 private int pos;
duke@1 76
duke@1 77 /** Character position just after the last character of the token.
duke@1 78 */
duke@1 79 private int endPos;
duke@1 80
duke@1 81 /** The last character position of the previous token.
duke@1 82 */
duke@1 83 private int prevEndPos;
duke@1 84
duke@1 85 /** The position where a lexical error occurred;
duke@1 86 */
duke@1 87 private int errPos = Position.NOPOS;
duke@1 88
duke@1 89 /** The name of an identifier or token:
duke@1 90 */
duke@1 91 private Name name;
duke@1 92
duke@1 93 /** The radix of a numeric literal token.
duke@1 94 */
duke@1 95 private int radix;
duke@1 96
duke@1 97 /** Has a @deprecated been encountered in last doc comment?
duke@1 98 * this needs to be reset by client.
duke@1 99 */
duke@1 100 protected boolean deprecatedFlag = false;
duke@1 101
duke@1 102 /** A character buffer for literals.
duke@1 103 */
duke@1 104 private char[] sbuf = new char[128];
duke@1 105 private int sp;
duke@1 106
duke@1 107 /** The input buffer, index of next chacter to be read,
duke@1 108 * index of one past last character in buffer.
duke@1 109 */
duke@1 110 private char[] buf;
duke@1 111 private int bp;
duke@1 112 private int buflen;
duke@1 113 private int eofPos;
duke@1 114
duke@1 115 /** The current character.
duke@1 116 */
duke@1 117 private char ch;
duke@1 118
duke@1 119 /** The buffer index of the last converted unicode character
duke@1 120 */
duke@1 121 private int unicodeConversionBp = -1;
duke@1 122
duke@1 123 /** The log to be used for error reporting.
duke@1 124 */
duke@1 125 private final Log log;
duke@1 126
duke@1 127 /** The name table. */
jjg@113 128 private final Names names;
duke@1 129
duke@1 130 /** The keyword table. */
duke@1 131 private final Keywords keywords;
duke@1 132
duke@1 133 /** Common code for constructors. */
jjg@695 134 private Scanner(ScannerFactory fac) {
jjg@409 135 log = fac.log;
jjg@409 136 names = fac.names;
jjg@409 137 keywords = fac.keywords;
jjg@409 138 source = fac.source;
jjg@409 139 allowBinaryLiterals = source.allowBinaryLiterals();
jjg@409 140 allowHexFloats = source.allowHexFloats();
jjg@859 141 allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();
duke@1 142 }
duke@1 143
duke@1 144 private static final boolean hexFloatsWork = hexFloatsWork();
duke@1 145 private static boolean hexFloatsWork() {
duke@1 146 try {
duke@1 147 Float.valueOf("0x1.0p1");
duke@1 148 return true;
duke@1 149 } catch (NumberFormatException ex) {
duke@1 150 return false;
duke@1 151 }
duke@1 152 }
duke@1 153
duke@1 154 /** Create a scanner from the input buffer. buffer must implement
duke@1 155 * array() and compact(), and remaining() must be less than limit().
duke@1 156 */
jjg@695 157 protected Scanner(ScannerFactory fac, CharBuffer buffer) {
duke@1 158 this(fac, JavacFileManager.toArray(buffer), buffer.limit());
duke@1 159 }
duke@1 160
duke@1 161 /**
duke@1 162 * Create a scanner from the input array. This method might
duke@1 163 * modify the array. To avoid copying the input array, ensure
duke@1 164 * that {@code inputLength < input.length} or
duke@1 165 * {@code input[input.length -1]} is a white space character.
duke@1 166 *
duke@1 167 * @param fac the factory which created this Scanner
duke@1 168 * @param input the input, might be modified
duke@1 169 * @param inputLength the size of the input.
duke@1 170 * Must be positive and less than or equal to input.length.
duke@1 171 */
jjg@695 172 protected Scanner(ScannerFactory fac, char[] input, int inputLength) {
duke@1 173 this(fac);
duke@1 174 eofPos = inputLength;
duke@1 175 if (inputLength == input.length) {
duke@1 176 if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
duke@1 177 inputLength--;
duke@1 178 } else {
duke@1 179 char[] newInput = new char[inputLength + 1];
duke@1 180 System.arraycopy(input, 0, newInput, 0, input.length);
duke@1 181 input = newInput;
duke@1 182 }
duke@1 183 }
duke@1 184 buf = input;
duke@1 185 buflen = inputLength;
duke@1 186 buf[buflen] = EOI;
duke@1 187 bp = -1;
duke@1 188 scanChar();
duke@1 189 }
duke@1 190
duke@1 191 /** Report an error at the given position using the provided arguments.
duke@1 192 */
duke@1 193 private void lexError(int pos, String key, Object... args) {
duke@1 194 log.error(pos, key, args);
duke@1 195 token = ERROR;
duke@1 196 errPos = pos;
duke@1 197 }
duke@1 198
duke@1 199 /** Report an error at the current token position using the provided
duke@1 200 * arguments.
duke@1 201 */
duke@1 202 private void lexError(String key, Object... args) {
duke@1 203 lexError(pos, key, args);
duke@1 204 }
duke@1 205
duke@1 206 /** Convert an ASCII digit from its base (8, 10, or 16)
duke@1 207 * to its value.
duke@1 208 */
duke@1 209 private int digit(int base) {
duke@1 210 char c = ch;
duke@1 211 int result = Character.digit(c, base);
duke@1 212 if (result >= 0 && c > 0x7f) {
duke@1 213 lexError(pos+1, "illegal.nonascii.digit");
duke@1 214 ch = "0123456789abcdef".charAt(result);
duke@1 215 }
duke@1 216 return result;
duke@1 217 }
duke@1 218
duke@1 219 /** Convert unicode escape; bp points to initial '\' character
duke@1 220 * (Spec 3.3).
duke@1 221 */
duke@1 222 private void convertUnicode() {
duke@1 223 if (ch == '\\' && unicodeConversionBp != bp) {
duke@1 224 bp++; ch = buf[bp];
duke@1 225 if (ch == 'u') {
duke@1 226 do {
duke@1 227 bp++; ch = buf[bp];
duke@1 228 } while (ch == 'u');
duke@1 229 int limit = bp + 3;
duke@1 230 if (limit < buflen) {
duke@1 231 int d = digit(16);
duke@1 232 int code = d;
duke@1 233 while (bp < limit && d >= 0) {
duke@1 234 bp++; ch = buf[bp];
duke@1 235 d = digit(16);
duke@1 236 code = (code << 4) + d;
duke@1 237 }
duke@1 238 if (d >= 0) {
duke@1 239 ch = (char)code;
duke@1 240 unicodeConversionBp = bp;
duke@1 241 return;
duke@1 242 }
duke@1 243 }
duke@1 244 lexError(bp, "illegal.unicode.esc");
duke@1 245 } else {
duke@1 246 bp--;
duke@1 247 ch = '\\';
duke@1 248 }
duke@1 249 }
duke@1 250 }
duke@1 251
duke@1 252 /** Read next character.
duke@1 253 */
duke@1 254 private void scanChar() {
duke@1 255 ch = buf[++bp];
duke@1 256 if (ch == '\\') {
duke@1 257 convertUnicode();
duke@1 258 }
duke@1 259 }
duke@1 260
duke@1 261 /** Read next character in comment, skipping over double '\' characters.
duke@1 262 */
duke@1 263 private void scanCommentChar() {
duke@1 264 scanChar();
duke@1 265 if (ch == '\\') {
duke@1 266 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
duke@1 267 bp++;
duke@1 268 } else {
duke@1 269 convertUnicode();
duke@1 270 }
duke@1 271 }
duke@1 272 }
duke@1 273
duke@1 274 /** Append a character to sbuf.
duke@1 275 */
duke@1 276 private void putChar(char ch) {
duke@1 277 if (sp == sbuf.length) {
duke@1 278 char[] newsbuf = new char[sbuf.length * 2];
duke@1 279 System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
duke@1 280 sbuf = newsbuf;
duke@1 281 }
duke@1 282 sbuf[sp++] = ch;
duke@1 283 }
duke@1 284
duke@1 285 /** Read next character in character or string literal and copy into sbuf.
duke@1 286 */
jjg@752 287 private void scanLitChar() {
duke@1 288 if (ch == '\\') {
duke@1 289 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
duke@1 290 bp++;
duke@1 291 putChar('\\');
duke@1 292 scanChar();
duke@1 293 } else {
duke@1 294 scanChar();
duke@1 295 switch (ch) {
duke@1 296 case '0': case '1': case '2': case '3':
duke@1 297 case '4': case '5': case '6': case '7':
duke@1 298 char leadch = ch;
duke@1 299 int oct = digit(8);
duke@1 300 scanChar();
duke@1 301 if ('0' <= ch && ch <= '7') {
duke@1 302 oct = oct * 8 + digit(8);
duke@1 303 scanChar();
duke@1 304 if (leadch <= '3' && '0' <= ch && ch <= '7') {
duke@1 305 oct = oct * 8 + digit(8);
duke@1 306 scanChar();
duke@1 307 }
duke@1 308 }
duke@1 309 putChar((char)oct);
duke@1 310 break;
duke@1 311 case 'b':
duke@1 312 putChar('\b'); scanChar(); break;
duke@1 313 case 't':
duke@1 314 putChar('\t'); scanChar(); break;
duke@1 315 case 'n':
duke@1 316 putChar('\n'); scanChar(); break;
duke@1 317 case 'f':
duke@1 318 putChar('\f'); scanChar(); break;
duke@1 319 case 'r':
duke@1 320 putChar('\r'); scanChar(); break;
duke@1 321 case '\'':
duke@1 322 putChar('\''); scanChar(); break;
duke@1 323 case '\"':
duke@1 324 putChar('\"'); scanChar(); break;
duke@1 325 case '\\':
duke@1 326 putChar('\\'); scanChar(); break;
duke@1 327 default:
duke@1 328 lexError(bp, "illegal.esc.char");
duke@1 329 }
duke@1 330 }
duke@1 331 } else if (bp != buflen) {
duke@1 332 putChar(ch); scanChar();
duke@1 333 }
duke@1 334 }
duke@1 335
jjg@409 336 private void scanDigits(int digitRadix) {
jjg@409 337 char saveCh;
jjg@409 338 int savePos;
jjg@409 339 do {
jjg@409 340 if (ch != '_') {
jjg@409 341 putChar(ch);
jjg@409 342 } else {
jjg@409 343 if (!allowUnderscoresInLiterals) {
jjg@597 344 lexError("unsupported.underscore.lit", source.name);
jjg@409 345 allowUnderscoresInLiterals = true;
jjg@409 346 }
jjg@409 347 }
jjg@409 348 saveCh = ch;
jjg@409 349 savePos = bp;
jjg@409 350 scanChar();
jjg@409 351 } while (digit(digitRadix) >= 0 || ch == '_');
jjg@409 352 if (saveCh == '_')
jjg@409 353 lexError(savePos, "illegal.underscore");
jjg@409 354 }
jjg@409 355
duke@1 356 /** Read fractional part of hexadecimal floating point number.
duke@1 357 */
duke@1 358 private void scanHexExponentAndSuffix() {
duke@1 359 if (ch == 'p' || ch == 'P') {
duke@1 360 putChar(ch);
duke@1 361 scanChar();
jjg@409 362 skipIllegalUnderscores();
duke@1 363 if (ch == '+' || ch == '-') {
duke@1 364 putChar(ch);
duke@1 365 scanChar();
duke@1 366 }
jjg@409 367 skipIllegalUnderscores();
duke@1 368 if ('0' <= ch && ch <= '9') {
jjg@409 369 scanDigits(10);
duke@1 370 if (!allowHexFloats) {
jjg@409 371 lexError("unsupported.fp.lit", source.name);
duke@1 372 allowHexFloats = true;
duke@1 373 }
duke@1 374 else if (!hexFloatsWork)
duke@1 375 lexError("unsupported.cross.fp.lit");
duke@1 376 } else
duke@1 377 lexError("malformed.fp.lit");
duke@1 378 } else {
duke@1 379 lexError("malformed.fp.lit");
duke@1 380 }
duke@1 381 if (ch == 'f' || ch == 'F') {
duke@1 382 putChar(ch);
duke@1 383 scanChar();
duke@1 384 token = FLOATLITERAL;
duke@1 385 } else {
duke@1 386 if (ch == 'd' || ch == 'D') {
duke@1 387 putChar(ch);
duke@1 388 scanChar();
duke@1 389 }
duke@1 390 token = DOUBLELITERAL;
duke@1 391 }
duke@1 392 }
duke@1 393
duke@1 394 /** Read fractional part of floating point number.
duke@1 395 */
duke@1 396 private void scanFraction() {
jjg@409 397 skipIllegalUnderscores();
jjg@409 398 if ('0' <= ch && ch <= '9') {
jjg@409 399 scanDigits(10);
duke@1 400 }
duke@1 401 int sp1 = sp;
duke@1 402 if (ch == 'e' || ch == 'E') {
duke@1 403 putChar(ch);
duke@1 404 scanChar();
jjg@409 405 skipIllegalUnderscores();
duke@1 406 if (ch == '+' || ch == '-') {
duke@1 407 putChar(ch);
duke@1 408 scanChar();
duke@1 409 }
jjg@409 410 skipIllegalUnderscores();
duke@1 411 if ('0' <= ch && ch <= '9') {
jjg@409 412 scanDigits(10);
duke@1 413 return;
duke@1 414 }
duke@1 415 lexError("malformed.fp.lit");
duke@1 416 sp = sp1;
duke@1 417 }
duke@1 418 }
duke@1 419
duke@1 420 /** Read fractional part and 'd' or 'f' suffix of floating point number.
duke@1 421 */
duke@1 422 private void scanFractionAndSuffix() {
duke@1 423 this.radix = 10;
duke@1 424 scanFraction();
duke@1 425 if (ch == 'f' || ch == 'F') {
duke@1 426 putChar(ch);
duke@1 427 scanChar();
duke@1 428 token = FLOATLITERAL;
duke@1 429 } else {
duke@1 430 if (ch == 'd' || ch == 'D') {
duke@1 431 putChar(ch);
duke@1 432 scanChar();
duke@1 433 }
duke@1 434 token = DOUBLELITERAL;
duke@1 435 }
duke@1 436 }
duke@1 437
duke@1 438 /** Read fractional part and 'd' or 'f' suffix of floating point number.
duke@1 439 */
duke@1 440 private void scanHexFractionAndSuffix(boolean seendigit) {
duke@1 441 this.radix = 16;
jjg@816 442 Assert.check(ch == '.');
duke@1 443 putChar(ch);
duke@1 444 scanChar();
jjg@409 445 skipIllegalUnderscores();
jjg@409 446 if (digit(16) >= 0) {
duke@1 447 seendigit = true;
jjg@409 448 scanDigits(16);
duke@1 449 }
duke@1 450 if (!seendigit)
duke@1 451 lexError("invalid.hex.number");
duke@1 452 else
duke@1 453 scanHexExponentAndSuffix();
duke@1 454 }
duke@1 455
jjg@409 456 private void skipIllegalUnderscores() {
jjg@409 457 if (ch == '_') {
jjg@409 458 lexError(bp, "illegal.underscore");
jjg@409 459 while (ch == '_')
jjg@409 460 scanChar();
jjg@409 461 }
jjg@409 462 }
jjg@409 463
duke@1 464 /** Read a number.
jjg@409 465 * @param radix The radix of the number; one of 2, j8, 10, 16.
duke@1 466 */
duke@1 467 private void scanNumber(int radix) {
duke@1 468 this.radix = radix;
duke@1 469 // for octal, allow base-10 digit in case it's a float literal
jjg@409 470 int digitRadix = (radix == 8 ? 10 : radix);
duke@1 471 boolean seendigit = false;
jjg@409 472 if (digit(digitRadix) >= 0) {
duke@1 473 seendigit = true;
jjg@409 474 scanDigits(digitRadix);
duke@1 475 }
duke@1 476 if (radix == 16 && ch == '.') {
duke@1 477 scanHexFractionAndSuffix(seendigit);
duke@1 478 } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
duke@1 479 scanHexExponentAndSuffix();
jjg@409 480 } else if (digitRadix == 10 && ch == '.') {
duke@1 481 putChar(ch);
duke@1 482 scanChar();
duke@1 483 scanFractionAndSuffix();
jjg@409 484 } else if (digitRadix == 10 &&
duke@1 485 (ch == 'e' || ch == 'E' ||
duke@1 486 ch == 'f' || ch == 'F' ||
duke@1 487 ch == 'd' || ch == 'D')) {
duke@1 488 scanFractionAndSuffix();
duke@1 489 } else {
duke@1 490 if (ch == 'l' || ch == 'L') {
duke@1 491 scanChar();
duke@1 492 token = LONGLITERAL;
duke@1 493 } else {
duke@1 494 token = INTLITERAL;
duke@1 495 }
duke@1 496 }
duke@1 497 }
duke@1 498
duke@1 499 /** Read an identifier.
duke@1 500 */
duke@1 501 private void scanIdent() {
duke@1 502 boolean isJavaIdentifierPart;
duke@1 503 char high;
duke@1 504 do {
duke@1 505 if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
duke@1 506 // optimization, was: putChar(ch);
duke@1 507
duke@1 508 scanChar();
duke@1 509 switch (ch) {
duke@1 510 case 'A': case 'B': case 'C': case 'D': case 'E':
duke@1 511 case 'F': case 'G': case 'H': case 'I': case 'J':
duke@1 512 case 'K': case 'L': case 'M': case 'N': case 'O':
duke@1 513 case 'P': case 'Q': case 'R': case 'S': case 'T':
duke@1 514 case 'U': case 'V': case 'W': case 'X': case 'Y':
duke@1 515 case 'Z':
duke@1 516 case 'a': case 'b': case 'c': case 'd': case 'e':
duke@1 517 case 'f': case 'g': case 'h': case 'i': case 'j':
duke@1 518 case 'k': case 'l': case 'm': case 'n': case 'o':
duke@1 519 case 'p': case 'q': case 'r': case 's': case 't':
duke@1 520 case 'u': case 'v': case 'w': case 'x': case 'y':
duke@1 521 case 'z':
duke@1 522 case '$': case '_':
duke@1 523 case '0': case '1': case '2': case '3': case '4':
duke@1 524 case '5': case '6': case '7': case '8': case '9':
duke@1 525 case '\u0000': case '\u0001': case '\u0002': case '\u0003':
duke@1 526 case '\u0004': case '\u0005': case '\u0006': case '\u0007':
duke@1 527 case '\u0008': case '\u000E': case '\u000F': case '\u0010':
duke@1 528 case '\u0011': case '\u0012': case '\u0013': case '\u0014':
duke@1 529 case '\u0015': case '\u0016': case '\u0017':
duke@1 530 case '\u0018': case '\u0019': case '\u001B':
duke@1 531 case '\u007F':
duke@1 532 break;
duke@1 533 case '\u001A': // EOI is also a legal identifier part
duke@1 534 if (bp >= buflen) {
duke@1 535 name = names.fromChars(sbuf, 0, sp);
duke@1 536 token = keywords.key(name);
duke@1 537 return;
duke@1 538 }
duke@1 539 break;
duke@1 540 default:
duke@1 541 if (ch < '\u0080') {
duke@1 542 // all ASCII range chars already handled, above
duke@1 543 isJavaIdentifierPart = false;
duke@1 544 } else {
duke@1 545 high = scanSurrogates();
duke@1 546 if (high != 0) {
duke@1 547 if (sp == sbuf.length) {
duke@1 548 putChar(high);
duke@1 549 } else {
duke@1 550 sbuf[sp++] = high;
duke@1 551 }
duke@1 552 isJavaIdentifierPart = Character.isJavaIdentifierPart(
duke@1 553 Character.toCodePoint(high, ch));
duke@1 554 } else {
duke@1 555 isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
duke@1 556 }
duke@1 557 }
duke@1 558 if (!isJavaIdentifierPart) {
duke@1 559 name = names.fromChars(sbuf, 0, sp);
duke@1 560 token = keywords.key(name);
duke@1 561 return;
duke@1 562 }
duke@1 563 }
duke@1 564 } while (true);
duke@1 565 }
duke@1 566
duke@1 567 /** Are surrogates supported?
duke@1 568 */
duke@1 569 final static boolean surrogatesSupported = surrogatesSupported();
duke@1 570 private static boolean surrogatesSupported() {
duke@1 571 try {
duke@1 572 Character.isHighSurrogate('a');
duke@1 573 return true;
duke@1 574 } catch (NoSuchMethodError ex) {
duke@1 575 return false;
duke@1 576 }
duke@1 577 }
duke@1 578
duke@1 579 /** Scan surrogate pairs. If 'ch' is a high surrogate and
duke@1 580 * the next character is a low surrogate, then put the low
duke@1 581 * surrogate in 'ch', and return the high surrogate.
duke@1 582 * otherwise, just return 0.
duke@1 583 */
duke@1 584 private char scanSurrogates() {
duke@1 585 if (surrogatesSupported && Character.isHighSurrogate(ch)) {
duke@1 586 char high = ch;
duke@1 587
duke@1 588 scanChar();
duke@1 589
duke@1 590 if (Character.isLowSurrogate(ch)) {
duke@1 591 return high;
duke@1 592 }
duke@1 593
duke@1 594 ch = high;
duke@1 595 }
duke@1 596
duke@1 597 return 0;
duke@1 598 }
duke@1 599
duke@1 600 /** Return true if ch can be part of an operator.
duke@1 601 */
duke@1 602 private boolean isSpecial(char ch) {
duke@1 603 switch (ch) {
duke@1 604 case '!': case '%': case '&': case '*': case '?':
duke@1 605 case '+': case '-': case ':': case '<': case '=':
duke@1 606 case '>': case '^': case '|': case '~':
duke@1 607 case '@':
duke@1 608 return true;
duke@1 609 default:
duke@1 610 return false;
duke@1 611 }
duke@1 612 }
duke@1 613
duke@1 614 /** Read longest possible sequence of special characters and convert
duke@1 615 * to token.
duke@1 616 */
duke@1 617 private void scanOperator() {
duke@1 618 while (true) {
duke@1 619 putChar(ch);
duke@1 620 Name newname = names.fromChars(sbuf, 0, sp);
duke@1 621 if (keywords.key(newname) == IDENTIFIER) {
duke@1 622 sp--;
duke@1 623 break;
duke@1 624 }
duke@1 625 name = newname;
duke@1 626 token = keywords.key(newname);
duke@1 627 scanChar();
duke@1 628 if (!isSpecial(ch)) break;
duke@1 629 }
duke@1 630 }
duke@1 631
duke@1 632 /**
duke@1 633 * Scan a documention comment; determine if a deprecated tag is present.
duke@1 634 * Called once the initial /, * have been skipped, positioned at the second *
duke@1 635 * (which is treated as the beginning of the first line).
duke@1 636 * Stops positioned at the closing '/'.
duke@1 637 */
duke@1 638 @SuppressWarnings("fallthrough")
duke@1 639 private void scanDocComment() {
duke@1 640 boolean deprecatedPrefix = false;
duke@1 641
duke@1 642 forEachLine:
duke@1 643 while (bp < buflen) {
duke@1 644
duke@1 645 // Skip optional WhiteSpace at beginning of line
duke@1 646 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
duke@1 647 scanCommentChar();
duke@1 648 }
duke@1 649
duke@1 650 // Skip optional consecutive Stars
duke@1 651 while (bp < buflen && ch == '*') {
duke@1 652 scanCommentChar();
duke@1 653 if (ch == '/') {
duke@1 654 return;
duke@1 655 }
duke@1 656 }
duke@1 657
duke@1 658 // Skip optional WhiteSpace after Stars
duke@1 659 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
duke@1 660 scanCommentChar();
duke@1 661 }
duke@1 662
duke@1 663 deprecatedPrefix = false;
duke@1 664 // At beginning of line in the JavaDoc sense.
duke@1 665 if (bp < buflen && ch == '@' && !deprecatedFlag) {
duke@1 666 scanCommentChar();
duke@1 667 if (bp < buflen && ch == 'd') {
duke@1 668 scanCommentChar();
duke@1 669 if (bp < buflen && ch == 'e') {
duke@1 670 scanCommentChar();
duke@1 671 if (bp < buflen && ch == 'p') {
duke@1 672 scanCommentChar();
duke@1 673 if (bp < buflen && ch == 'r') {
duke@1 674 scanCommentChar();
duke@1 675 if (bp < buflen && ch == 'e') {
duke@1 676 scanCommentChar();
duke@1 677 if (bp < buflen && ch == 'c') {
duke@1 678 scanCommentChar();
duke@1 679 if (bp < buflen && ch == 'a') {
duke@1 680 scanCommentChar();
duke@1 681 if (bp < buflen && ch == 't') {
duke@1 682 scanCommentChar();
duke@1 683 if (bp < buflen && ch == 'e') {
duke@1 684 scanCommentChar();
duke@1 685 if (bp < buflen && ch == 'd') {
duke@1 686 deprecatedPrefix = true;
duke@1 687 scanCommentChar();
duke@1 688 }}}}}}}}}}}
duke@1 689 if (deprecatedPrefix && bp < buflen) {
duke@1 690 if (Character.isWhitespace(ch)) {
duke@1 691 deprecatedFlag = true;
duke@1 692 } else if (ch == '*') {
duke@1 693 scanCommentChar();
duke@1 694 if (ch == '/') {
duke@1 695 deprecatedFlag = true;
duke@1 696 return;
duke@1 697 }
duke@1 698 }
duke@1 699 }
duke@1 700
duke@1 701 // Skip rest of line
duke@1 702 while (bp < buflen) {
duke@1 703 switch (ch) {
duke@1 704 case '*':
duke@1 705 scanCommentChar();
duke@1 706 if (ch == '/') {
duke@1 707 return;
duke@1 708 }
duke@1 709 break;
duke@1 710 case CR: // (Spec 3.4)
duke@1 711 scanCommentChar();
duke@1 712 if (ch != LF) {
duke@1 713 continue forEachLine;
duke@1 714 }
duke@1 715 /* fall through to LF case */
duke@1 716 case LF: // (Spec 3.4)
duke@1 717 scanCommentChar();
duke@1 718 continue forEachLine;
duke@1 719 default:
duke@1 720 scanCommentChar();
duke@1 721 }
duke@1 722 } // rest of line
duke@1 723 } // forEachLine
duke@1 724 return;
duke@1 725 }
duke@1 726
duke@1 727 /** The value of a literal token, recorded as a string.
duke@1 728 * For integers, leading 0x and 'l' suffixes are suppressed.
duke@1 729 */
duke@1 730 public String stringVal() {
duke@1 731 return new String(sbuf, 0, sp);
duke@1 732 }
duke@1 733
duke@1 734 /** Read token.
duke@1 735 */
duke@1 736 public void nextToken() {
duke@1 737
duke@1 738 try {
duke@1 739 prevEndPos = endPos;
duke@1 740 sp = 0;
duke@1 741
duke@1 742 while (true) {
duke@1 743 pos = bp;
duke@1 744 switch (ch) {
duke@1 745 case ' ': // (Spec 3.6)
duke@1 746 case '\t': // (Spec 3.6)
duke@1 747 case FF: // (Spec 3.6)
duke@1 748 do {
duke@1 749 scanChar();
duke@1 750 } while (ch == ' ' || ch == '\t' || ch == FF);
duke@1 751 endPos = bp;
duke@1 752 processWhiteSpace();
duke@1 753 break;
duke@1 754 case LF: // (Spec 3.4)
duke@1 755 scanChar();
duke@1 756 endPos = bp;
duke@1 757 processLineTerminator();
duke@1 758 break;
duke@1 759 case CR: // (Spec 3.4)
duke@1 760 scanChar();
duke@1 761 if (ch == LF) {
duke@1 762 scanChar();
duke@1 763 }
duke@1 764 endPos = bp;
duke@1 765 processLineTerminator();
duke@1 766 break;
duke@1 767 case 'A': case 'B': case 'C': case 'D': case 'E':
duke@1 768 case 'F': case 'G': case 'H': case 'I': case 'J':
duke@1 769 case 'K': case 'L': case 'M': case 'N': case 'O':
duke@1 770 case 'P': case 'Q': case 'R': case 'S': case 'T':
duke@1 771 case 'U': case 'V': case 'W': case 'X': case 'Y':
duke@1 772 case 'Z':
duke@1 773 case 'a': case 'b': case 'c': case 'd': case 'e':
duke@1 774 case 'f': case 'g': case 'h': case 'i': case 'j':
duke@1 775 case 'k': case 'l': case 'm': case 'n': case 'o':
duke@1 776 case 'p': case 'q': case 'r': case 's': case 't':
duke@1 777 case 'u': case 'v': case 'w': case 'x': case 'y':
duke@1 778 case 'z':
duke@1 779 case '$': case '_':
duke@1 780 scanIdent();
duke@1 781 return;
duke@1 782 case '0':
duke@1 783 scanChar();
duke@1 784 if (ch == 'x' || ch == 'X') {
duke@1 785 scanChar();
jjg@409 786 skipIllegalUnderscores();
duke@1 787 if (ch == '.') {
duke@1 788 scanHexFractionAndSuffix(false);
duke@1 789 } else if (digit(16) < 0) {
duke@1 790 lexError("invalid.hex.number");
duke@1 791 } else {
duke@1 792 scanNumber(16);
duke@1 793 }
jjg@409 794 } else if (ch == 'b' || ch == 'B') {
jjg@409 795 if (!allowBinaryLiterals) {
jjg@409 796 lexError("unsupported.binary.lit", source.name);
jjg@409 797 allowBinaryLiterals = true;
jjg@409 798 }
jjg@409 799 scanChar();
jjg@409 800 skipIllegalUnderscores();
jjg@423 801 if (digit(2) < 0) {
jjg@423 802 lexError("invalid.binary.number");
jjg@423 803 } else {
jjg@423 804 scanNumber(2);
jjg@423 805 }
duke@1 806 } else {
duke@1 807 putChar('0');
jjg@409 808 if (ch == '_') {
jjg@409 809 int savePos = bp;
jjg@409 810 do {
jjg@409 811 scanChar();
jjg@409 812 } while (ch == '_');
jjg@409 813 if (digit(10) < 0) {
jjg@409 814 lexError(savePos, "illegal.underscore");
jjg@409 815 }
jjg@409 816 }
duke@1 817 scanNumber(8);
duke@1 818 }
duke@1 819 return;
duke@1 820 case '1': case '2': case '3': case '4':
duke@1 821 case '5': case '6': case '7': case '8': case '9':
duke@1 822 scanNumber(10);
duke@1 823 return;
duke@1 824 case '.':
duke@1 825 scanChar();
duke@1 826 if ('0' <= ch && ch <= '9') {
duke@1 827 putChar('.');
duke@1 828 scanFractionAndSuffix();
duke@1 829 } else if (ch == '.') {
duke@1 830 putChar('.'); putChar('.');
duke@1 831 scanChar();
duke@1 832 if (ch == '.') {
duke@1 833 scanChar();
duke@1 834 putChar('.');
duke@1 835 token = ELLIPSIS;
duke@1 836 } else {
duke@1 837 lexError("malformed.fp.lit");
duke@1 838 }
duke@1 839 } else {
duke@1 840 token = DOT;
duke@1 841 }
duke@1 842 return;
duke@1 843 case ',':
duke@1 844 scanChar(); token = COMMA; return;
duke@1 845 case ';':
duke@1 846 scanChar(); token = SEMI; return;
duke@1 847 case '(':
duke@1 848 scanChar(); token = LPAREN; return;
duke@1 849 case ')':
duke@1 850 scanChar(); token = RPAREN; return;
duke@1 851 case '[':
duke@1 852 scanChar(); token = LBRACKET; return;
duke@1 853 case ']':
duke@1 854 scanChar(); token = RBRACKET; return;
duke@1 855 case '{':
duke@1 856 scanChar(); token = LBRACE; return;
duke@1 857 case '}':
duke@1 858 scanChar(); token = RBRACE; return;
duke@1 859 case '/':
duke@1 860 scanChar();
duke@1 861 if (ch == '/') {
duke@1 862 do {
duke@1 863 scanCommentChar();
duke@1 864 } while (ch != CR && ch != LF && bp < buflen);
duke@1 865 if (bp < buflen) {
duke@1 866 endPos = bp;
duke@1 867 processComment(CommentStyle.LINE);
duke@1 868 }
duke@1 869 break;
duke@1 870 } else if (ch == '*') {
duke@1 871 scanChar();
duke@1 872 CommentStyle style;
duke@1 873 if (ch == '*') {
duke@1 874 style = CommentStyle.JAVADOC;
duke@1 875 scanDocComment();
duke@1 876 } else {
duke@1 877 style = CommentStyle.BLOCK;
duke@1 878 while (bp < buflen) {
duke@1 879 if (ch == '*') {
duke@1 880 scanChar();
duke@1 881 if (ch == '/') break;
duke@1 882 } else {
duke@1 883 scanCommentChar();
duke@1 884 }
duke@1 885 }
duke@1 886 }
duke@1 887 if (ch == '/') {
duke@1 888 scanChar();
duke@1 889 endPos = bp;
duke@1 890 processComment(style);
duke@1 891 break;
duke@1 892 } else {
duke@1 893 lexError("unclosed.comment");
duke@1 894 return;
duke@1 895 }
duke@1 896 } else if (ch == '=') {
duke@1 897 name = names.slashequals;
duke@1 898 token = SLASHEQ;
duke@1 899 scanChar();
duke@1 900 } else {
duke@1 901 name = names.slash;
duke@1 902 token = SLASH;
duke@1 903 }
duke@1 904 return;
duke@1 905 case '\'':
duke@1 906 scanChar();
duke@1 907 if (ch == '\'') {
duke@1 908 lexError("empty.char.lit");
duke@1 909 } else {
duke@1 910 if (ch == CR || ch == LF)
duke@1 911 lexError(pos, "illegal.line.end.in.char.lit");
duke@1 912 scanLitChar();
duke@1 913 if (ch == '\'') {
duke@1 914 scanChar();
duke@1 915 token = CHARLITERAL;
duke@1 916 } else {
duke@1 917 lexError(pos, "unclosed.char.lit");
duke@1 918 }
duke@1 919 }
duke@1 920 return;
duke@1 921 case '\"':
duke@1 922 scanChar();
duke@1 923 while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
duke@1 924 scanLitChar();
duke@1 925 if (ch == '\"') {
duke@1 926 token = STRINGLITERAL;
duke@1 927 scanChar();
duke@1 928 } else {
duke@1 929 lexError(pos, "unclosed.str.lit");
duke@1 930 }
duke@1 931 return;
duke@1 932 default:
duke@1 933 if (isSpecial(ch)) {
duke@1 934 scanOperator();
duke@1 935 } else {
duke@1 936 boolean isJavaIdentifierStart;
duke@1 937 if (ch < '\u0080') {
duke@1 938 // all ASCII range chars already handled, above
duke@1 939 isJavaIdentifierStart = false;
duke@1 940 } else {
duke@1 941 char high = scanSurrogates();
duke@1 942 if (high != 0) {
duke@1 943 if (sp == sbuf.length) {
duke@1 944 putChar(high);
duke@1 945 } else {
duke@1 946 sbuf[sp++] = high;
duke@1 947 }
duke@1 948
duke@1 949 isJavaIdentifierStart = Character.isJavaIdentifierStart(
duke@1 950 Character.toCodePoint(high, ch));
duke@1 951 } else {
duke@1 952 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
duke@1 953 }
duke@1 954 }
duke@1 955 if (isJavaIdentifierStart) {
duke@1 956 scanIdent();
duke@1 957 } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
duke@1 958 token = EOF;
duke@1 959 pos = bp = eofPos;
duke@1 960 } else {
duke@1 961 lexError("illegal.char", String.valueOf((int)ch));
duke@1 962 scanChar();
duke@1 963 }
duke@1 964 }
duke@1 965 return;
duke@1 966 }
duke@1 967 }
duke@1 968 } finally {
duke@1 969 endPos = bp;
duke@1 970 if (scannerDebug)
duke@1 971 System.out.println("nextToken(" + pos
duke@1 972 + "," + endPos + ")=|" +
duke@1 973 new String(getRawCharacters(pos, endPos))
duke@1 974 + "|");
duke@1 975 }
duke@1 976 }
duke@1 977
duke@1 978 /** Return the current token, set by nextToken().
duke@1 979 */
duke@1 980 public Token token() {
duke@1 981 return token;
duke@1 982 }
duke@1 983
duke@1 984 /** Sets the current token.
duke@1 985 */
duke@1 986 public void token(Token token) {
duke@1 987 this.token = token;
duke@1 988 }
duke@1 989
duke@1 990 /** Return the current token's position: a 0-based
duke@1 991 * offset from beginning of the raw input stream
duke@1 992 * (before unicode translation)
duke@1 993 */
duke@1 994 public int pos() {
duke@1 995 return pos;
duke@1 996 }
duke@1 997
duke@1 998 /** Return the last character position of the current token.
duke@1 999 */
duke@1 1000 public int endPos() {
duke@1 1001 return endPos;
duke@1 1002 }
duke@1 1003
duke@1 1004 /** Return the last character position of the previous token.
duke@1 1005 */
duke@1 1006 public int prevEndPos() {
duke@1 1007 return prevEndPos;
duke@1 1008 }
duke@1 1009
duke@1 1010 /** Return the position where a lexical error occurred;
duke@1 1011 */
duke@1 1012 public int errPos() {
duke@1 1013 return errPos;
duke@1 1014 }
duke@1 1015
duke@1 1016 /** Set the position where a lexical error occurred;
duke@1 1017 */
duke@1 1018 public void errPos(int pos) {
duke@1 1019 errPos = pos;
duke@1 1020 }
duke@1 1021
duke@1 1022 /** Return the name of an identifier or token for the current token.
duke@1 1023 */
duke@1 1024 public Name name() {
duke@1 1025 return name;
duke@1 1026 }
duke@1 1027
duke@1 1028 /** Return the radix of a numeric literal token.
duke@1 1029 */
duke@1 1030 public int radix() {
duke@1 1031 return radix;
duke@1 1032 }
duke@1 1033
duke@1 1034 /** Has a @deprecated been encountered in last doc comment?
duke@1 1035 * This needs to be reset by client with resetDeprecatedFlag.
duke@1 1036 */
duke@1 1037 public boolean deprecatedFlag() {
duke@1 1038 return deprecatedFlag;
duke@1 1039 }
duke@1 1040
duke@1 1041 public void resetDeprecatedFlag() {
duke@1 1042 deprecatedFlag = false;
duke@1 1043 }
duke@1 1044
duke@1 1045 /**
duke@1 1046 * Returns the documentation string of the current token.
duke@1 1047 */
duke@1 1048 public String docComment() {
duke@1 1049 return null;
duke@1 1050 }
duke@1 1051
duke@1 1052 /**
duke@1 1053 * Returns a copy of the input buffer, up to its inputLength.
duke@1 1054 * Unicode escape sequences are not translated.
duke@1 1055 */
duke@1 1056 public char[] getRawCharacters() {
duke@1 1057 char[] chars = new char[buflen];
duke@1 1058 System.arraycopy(buf, 0, chars, 0, buflen);
duke@1 1059 return chars;
duke@1 1060 }
duke@1 1061
duke@1 1062 /**
duke@1 1063 * Returns a copy of a character array subset of the input buffer.
duke@1 1064 * The returned array begins at the <code>beginIndex</code> and
duke@1 1065 * extends to the character at index <code>endIndex - 1</code>.
duke@1 1066 * Thus the length of the substring is <code>endIndex-beginIndex</code>.
duke@1 1067 * This behavior is like
duke@1 1068 * <code>String.substring(beginIndex, endIndex)</code>.
duke@1 1069 * Unicode escape sequences are not translated.
duke@1 1070 *
duke@1 1071 * @param beginIndex the beginning index, inclusive.
duke@1 1072 * @param endIndex the ending index, exclusive.
duke@1 1073 * @throws IndexOutOfBounds if either offset is outside of the
duke@1 1074 * array bounds
duke@1 1075 */
duke@1 1076 public char[] getRawCharacters(int beginIndex, int endIndex) {
duke@1 1077 int length = endIndex - beginIndex;
duke@1 1078 char[] chars = new char[length];
duke@1 1079 System.arraycopy(buf, beginIndex, chars, 0, length);
duke@1 1080 return chars;
duke@1 1081 }
duke@1 1082
duke@1 1083 public enum CommentStyle {
duke@1 1084 LINE,
duke@1 1085 BLOCK,
duke@1 1086 JAVADOC,
duke@1 1087 }
duke@1 1088
duke@1 1089 /**
duke@1 1090 * Called when a complete comment has been scanned. pos and endPos
duke@1 1091 * will mark the comment boundary.
duke@1 1092 */
duke@1 1093 protected void processComment(CommentStyle style) {
duke@1 1094 if (scannerDebug)
duke@1 1095 System.out.println("processComment(" + pos
duke@1 1096 + "," + endPos + "," + style + ")=|"
duke@1 1097 + new String(getRawCharacters(pos, endPos))
duke@1 1098 + "|");
duke@1 1099 }
duke@1 1100
duke@1 1101 /**
duke@1 1102 * Called when a complete whitespace run has been scanned. pos and endPos
duke@1 1103 * will mark the whitespace boundary.
duke@1 1104 */
duke@1 1105 protected void processWhiteSpace() {
duke@1 1106 if (scannerDebug)
duke@1 1107 System.out.println("processWhitespace(" + pos
duke@1 1108 + "," + endPos + ")=|" +
duke@1 1109 new String(getRawCharacters(pos, endPos))
duke@1 1110 + "|");
duke@1 1111 }
duke@1 1112
duke@1 1113 /**
duke@1 1114 * Called when a line terminator has been processed.
duke@1 1115 */
duke@1 1116 protected void processLineTerminator() {
duke@1 1117 if (scannerDebug)
duke@1 1118 System.out.println("processTerminator(" + pos
duke@1 1119 + "," + endPos + ")=|" +
duke@1 1120 new String(getRawCharacters(pos, endPos))
duke@1 1121 + "|");
duke@1 1122 }
duke@1 1123
duke@1 1124 /** Build a map for translating between line numbers and
duke@1 1125 * positions in the input.
duke@1 1126 *
duke@1 1127 * @return a LineMap */
duke@1 1128 public Position.LineMap getLineMap() {
duke@1 1129 return Position.makeLineMap(buf, buflen, false);
duke@1 1130 }
duke@1 1131
duke@1 1132 }

mercurial