src/share/classes/com/sun/tools/javac/parser/Scanner.java

Tue, 28 Dec 2010 15:54:52 -0800

author
ohair
date
Tue, 28 Dec 2010 15:54:52 -0800
changeset 798
4868a36f6fd8
parent 752
03177f49411d
child 816
7c537f4298fb
permissions
-rw-r--r--

6962318: Update copyright year
Reviewed-by: xdono

duke@1 1 /*
jjg@695 2 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
duke@1 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
duke@1 4 *
duke@1 5 * This code is free software; you can redistribute it and/or modify it
duke@1 6 * under the terms of the GNU General Public License version 2 only, as
ohair@554 7 * published by the Free Software Foundation. Oracle designates this
duke@1 8 * particular file as subject to the "Classpath" exception as provided
ohair@554 9 * by Oracle in the LICENSE file that accompanied this code.
duke@1 10 *
duke@1 11 * This code is distributed in the hope that it will be useful, but WITHOUT
duke@1 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
duke@1 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
duke@1 14 * version 2 for more details (a copy is included in the LICENSE file that
duke@1 15 * accompanied this code).
duke@1 16 *
duke@1 17 * You should have received a copy of the GNU General Public License version
duke@1 18 * 2 along with this work; if not, write to the Free Software Foundation,
duke@1 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
duke@1 20 *
ohair@554 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
ohair@554 22 * or visit www.oracle.com if you need additional information or have any
ohair@554 23 * questions.
duke@1 24 */
duke@1 25
duke@1 26 package com.sun.tools.javac.parser;
duke@1 27
duke@1 28 import java.nio.*;
duke@1 29
jjg@50 30 import com.sun.tools.javac.code.Source;
jjg@50 31 import com.sun.tools.javac.file.JavacFileManager;
duke@1 32 import com.sun.tools.javac.util.*;
duke@1 33
duke@1 34
duke@1 35 import static com.sun.tools.javac.parser.Token.*;
duke@1 36 import static com.sun.tools.javac.util.LayoutCharacters.*;
duke@1 37
duke@1 38 /** The lexical analyzer maps an input stream consisting of
duke@1 39 * ASCII characters and Unicode escapes into a token sequence.
duke@1 40 *
jjg@581 41 * <p><b>This is NOT part of any supported API.
jjg@581 42 * If you write code that depends on this, you do so at your own risk.
duke@1 43 * This code and its internal interfaces are subject to change or
duke@1 44 * deletion without notice.</b>
duke@1 45 */
duke@1 46 public class Scanner implements Lexer {
duke@1 47
duke@1 48 private static boolean scannerDebug = false;
duke@1 49
duke@1 50 /* Output variables; set by nextToken():
duke@1 51 */
duke@1 52
duke@1 53 /** The token, set by nextToken().
duke@1 54 */
duke@1 55 private Token token;
duke@1 56
duke@1 57 /** Allow hex floating-point literals.
duke@1 58 */
duke@1 59 private boolean allowHexFloats;
duke@1 60
jjg@409 61 /** Allow binary literals.
jjg@409 62 */
jjg@409 63 private boolean allowBinaryLiterals;
jjg@409 64
jjg@409 65 /** Allow underscores in literals.
jjg@409 66 */
jjg@409 67 private boolean allowUnderscoresInLiterals;
jjg@409 68
jjg@409 69 /** The source language setting.
jjg@409 70 */
jjg@409 71 private Source source;
jjg@409 72
duke@1 73 /** The token's position, 0-based offset from beginning of text.
duke@1 74 */
duke@1 75 private int pos;
duke@1 76
duke@1 77 /** Character position just after the last character of the token.
duke@1 78 */
duke@1 79 private int endPos;
duke@1 80
duke@1 81 /** The last character position of the previous token.
duke@1 82 */
duke@1 83 private int prevEndPos;
duke@1 84
duke@1 85 /** The position where a lexical error occurred;
duke@1 86 */
duke@1 87 private int errPos = Position.NOPOS;
duke@1 88
duke@1 89 /** The name of an identifier or token:
duke@1 90 */
duke@1 91 private Name name;
duke@1 92
duke@1 93 /** The radix of a numeric literal token.
duke@1 94 */
duke@1 95 private int radix;
duke@1 96
duke@1 97 /** Has a @deprecated been encountered in last doc comment?
duke@1 98 * this needs to be reset by client.
duke@1 99 */
duke@1 100 protected boolean deprecatedFlag = false;
duke@1 101
duke@1 102 /** A character buffer for literals.
duke@1 103 */
duke@1 104 private char[] sbuf = new char[128];
duke@1 105 private int sp;
duke@1 106
duke@1 107 /** The input buffer, index of next chacter to be read,
duke@1 108 * index of one past last character in buffer.
duke@1 109 */
duke@1 110 private char[] buf;
duke@1 111 private int bp;
duke@1 112 private int buflen;
duke@1 113 private int eofPos;
duke@1 114
duke@1 115 /** The current character.
duke@1 116 */
duke@1 117 private char ch;
duke@1 118
duke@1 119 /** The buffer index of the last converted unicode character
duke@1 120 */
duke@1 121 private int unicodeConversionBp = -1;
duke@1 122
duke@1 123 /** The log to be used for error reporting.
duke@1 124 */
duke@1 125 private final Log log;
duke@1 126
duke@1 127 /** The name table. */
jjg@113 128 private final Names names;
duke@1 129
duke@1 130 /** The keyword table. */
duke@1 131 private final Keywords keywords;
duke@1 132
duke@1 133 /** Common code for constructors. */
jjg@695 134 private Scanner(ScannerFactory fac) {
jjg@409 135 log = fac.log;
jjg@409 136 names = fac.names;
jjg@409 137 keywords = fac.keywords;
jjg@409 138 source = fac.source;
jjg@409 139 allowBinaryLiterals = source.allowBinaryLiterals();
jjg@409 140 allowHexFloats = source.allowHexFloats();
jjg@409 141 allowUnderscoresInLiterals = source.allowBinaryLiterals();
duke@1 142 }
duke@1 143
duke@1 144 private static final boolean hexFloatsWork = hexFloatsWork();
duke@1 145 private static boolean hexFloatsWork() {
duke@1 146 try {
duke@1 147 Float.valueOf("0x1.0p1");
duke@1 148 return true;
duke@1 149 } catch (NumberFormatException ex) {
duke@1 150 return false;
duke@1 151 }
duke@1 152 }
duke@1 153
duke@1 154 /** Create a scanner from the input buffer. buffer must implement
duke@1 155 * array() and compact(), and remaining() must be less than limit().
duke@1 156 */
jjg@695 157 protected Scanner(ScannerFactory fac, CharBuffer buffer) {
duke@1 158 this(fac, JavacFileManager.toArray(buffer), buffer.limit());
duke@1 159 }
duke@1 160
duke@1 161 /**
duke@1 162 * Create a scanner from the input array. This method might
duke@1 163 * modify the array. To avoid copying the input array, ensure
duke@1 164 * that {@code inputLength < input.length} or
duke@1 165 * {@code input[input.length -1]} is a white space character.
duke@1 166 *
duke@1 167 * @param fac the factory which created this Scanner
duke@1 168 * @param input the input, might be modified
duke@1 169 * @param inputLength the size of the input.
duke@1 170 * Must be positive and less than or equal to input.length.
duke@1 171 */
jjg@695 172 protected Scanner(ScannerFactory fac, char[] input, int inputLength) {
duke@1 173 this(fac);
duke@1 174 eofPos = inputLength;
duke@1 175 if (inputLength == input.length) {
duke@1 176 if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
duke@1 177 inputLength--;
duke@1 178 } else {
duke@1 179 char[] newInput = new char[inputLength + 1];
duke@1 180 System.arraycopy(input, 0, newInput, 0, input.length);
duke@1 181 input = newInput;
duke@1 182 }
duke@1 183 }
duke@1 184 buf = input;
duke@1 185 buflen = inputLength;
duke@1 186 buf[buflen] = EOI;
duke@1 187 bp = -1;
duke@1 188 scanChar();
duke@1 189 }
duke@1 190
duke@1 191 /** Report an error at the given position using the provided arguments.
duke@1 192 */
duke@1 193 private void lexError(int pos, String key, Object... args) {
duke@1 194 log.error(pos, key, args);
duke@1 195 token = ERROR;
duke@1 196 errPos = pos;
duke@1 197 }
duke@1 198
duke@1 199 /** Report an error at the current token position using the provided
duke@1 200 * arguments.
duke@1 201 */
duke@1 202 private void lexError(String key, Object... args) {
duke@1 203 lexError(pos, key, args);
duke@1 204 }
duke@1 205
duke@1 206 /** Convert an ASCII digit from its base (8, 10, or 16)
duke@1 207 * to its value.
duke@1 208 */
duke@1 209 private int digit(int base) {
duke@1 210 char c = ch;
duke@1 211 int result = Character.digit(c, base);
duke@1 212 if (result >= 0 && c > 0x7f) {
duke@1 213 lexError(pos+1, "illegal.nonascii.digit");
duke@1 214 ch = "0123456789abcdef".charAt(result);
duke@1 215 }
duke@1 216 return result;
duke@1 217 }
duke@1 218
duke@1 219 /** Convert unicode escape; bp points to initial '\' character
duke@1 220 * (Spec 3.3).
duke@1 221 */
duke@1 222 private void convertUnicode() {
duke@1 223 if (ch == '\\' && unicodeConversionBp != bp) {
duke@1 224 bp++; ch = buf[bp];
duke@1 225 if (ch == 'u') {
duke@1 226 do {
duke@1 227 bp++; ch = buf[bp];
duke@1 228 } while (ch == 'u');
duke@1 229 int limit = bp + 3;
duke@1 230 if (limit < buflen) {
duke@1 231 int d = digit(16);
duke@1 232 int code = d;
duke@1 233 while (bp < limit && d >= 0) {
duke@1 234 bp++; ch = buf[bp];
duke@1 235 d = digit(16);
duke@1 236 code = (code << 4) + d;
duke@1 237 }
duke@1 238 if (d >= 0) {
duke@1 239 ch = (char)code;
duke@1 240 unicodeConversionBp = bp;
duke@1 241 return;
duke@1 242 }
duke@1 243 }
duke@1 244 lexError(bp, "illegal.unicode.esc");
duke@1 245 } else {
duke@1 246 bp--;
duke@1 247 ch = '\\';
duke@1 248 }
duke@1 249 }
duke@1 250 }
duke@1 251
duke@1 252 /** Read next character.
duke@1 253 */
duke@1 254 private void scanChar() {
duke@1 255 ch = buf[++bp];
duke@1 256 if (ch == '\\') {
duke@1 257 convertUnicode();
duke@1 258 }
duke@1 259 }
duke@1 260
duke@1 261 /** Read next character in comment, skipping over double '\' characters.
duke@1 262 */
duke@1 263 private void scanCommentChar() {
duke@1 264 scanChar();
duke@1 265 if (ch == '\\') {
duke@1 266 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
duke@1 267 bp++;
duke@1 268 } else {
duke@1 269 convertUnicode();
duke@1 270 }
duke@1 271 }
duke@1 272 }
duke@1 273
duke@1 274 /** Append a character to sbuf.
duke@1 275 */
duke@1 276 private void putChar(char ch) {
duke@1 277 if (sp == sbuf.length) {
duke@1 278 char[] newsbuf = new char[sbuf.length * 2];
duke@1 279 System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
duke@1 280 sbuf = newsbuf;
duke@1 281 }
duke@1 282 sbuf[sp++] = ch;
duke@1 283 }
duke@1 284
duke@1 285 /** For debugging purposes: print character.
duke@1 286 */
duke@1 287 private void dch() {
duke@1 288 System.err.print(ch); System.out.flush();
duke@1 289 }
duke@1 290
duke@1 291 /** Read next character in character or string literal and copy into sbuf.
duke@1 292 */
jjg@752 293 private void scanLitChar() {
duke@1 294 if (ch == '\\') {
duke@1 295 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
duke@1 296 bp++;
duke@1 297 putChar('\\');
duke@1 298 scanChar();
duke@1 299 } else {
duke@1 300 scanChar();
duke@1 301 switch (ch) {
duke@1 302 case '0': case '1': case '2': case '3':
duke@1 303 case '4': case '5': case '6': case '7':
duke@1 304 char leadch = ch;
duke@1 305 int oct = digit(8);
duke@1 306 scanChar();
duke@1 307 if ('0' <= ch && ch <= '7') {
duke@1 308 oct = oct * 8 + digit(8);
duke@1 309 scanChar();
duke@1 310 if (leadch <= '3' && '0' <= ch && ch <= '7') {
duke@1 311 oct = oct * 8 + digit(8);
duke@1 312 scanChar();
duke@1 313 }
duke@1 314 }
duke@1 315 putChar((char)oct);
duke@1 316 break;
duke@1 317 case 'b':
duke@1 318 putChar('\b'); scanChar(); break;
duke@1 319 case 't':
duke@1 320 putChar('\t'); scanChar(); break;
duke@1 321 case 'n':
duke@1 322 putChar('\n'); scanChar(); break;
duke@1 323 case 'f':
duke@1 324 putChar('\f'); scanChar(); break;
duke@1 325 case 'r':
duke@1 326 putChar('\r'); scanChar(); break;
duke@1 327 case '\'':
duke@1 328 putChar('\''); scanChar(); break;
duke@1 329 case '\"':
duke@1 330 putChar('\"'); scanChar(); break;
duke@1 331 case '\\':
duke@1 332 putChar('\\'); scanChar(); break;
duke@1 333 default:
duke@1 334 lexError(bp, "illegal.esc.char");
duke@1 335 }
duke@1 336 }
duke@1 337 } else if (bp != buflen) {
duke@1 338 putChar(ch); scanChar();
duke@1 339 }
duke@1 340 }
duke@1 341
jjg@409 342 private void scanDigits(int digitRadix) {
jjg@409 343 char saveCh;
jjg@409 344 int savePos;
jjg@409 345 do {
jjg@409 346 if (ch != '_') {
jjg@409 347 putChar(ch);
jjg@409 348 } else {
jjg@409 349 if (!allowUnderscoresInLiterals) {
jjg@597 350 lexError("unsupported.underscore.lit", source.name);
jjg@409 351 allowUnderscoresInLiterals = true;
jjg@409 352 }
jjg@409 353 }
jjg@409 354 saveCh = ch;
jjg@409 355 savePos = bp;
jjg@409 356 scanChar();
jjg@409 357 } while (digit(digitRadix) >= 0 || ch == '_');
jjg@409 358 if (saveCh == '_')
jjg@409 359 lexError(savePos, "illegal.underscore");
jjg@409 360 }
jjg@409 361
duke@1 362 /** Read fractional part of hexadecimal floating point number.
duke@1 363 */
duke@1 364 private void scanHexExponentAndSuffix() {
duke@1 365 if (ch == 'p' || ch == 'P') {
duke@1 366 putChar(ch);
duke@1 367 scanChar();
jjg@409 368 skipIllegalUnderscores();
duke@1 369 if (ch == '+' || ch == '-') {
duke@1 370 putChar(ch);
duke@1 371 scanChar();
duke@1 372 }
jjg@409 373 skipIllegalUnderscores();
duke@1 374 if ('0' <= ch && ch <= '9') {
jjg@409 375 scanDigits(10);
duke@1 376 if (!allowHexFloats) {
jjg@409 377 lexError("unsupported.fp.lit", source.name);
duke@1 378 allowHexFloats = true;
duke@1 379 }
duke@1 380 else if (!hexFloatsWork)
duke@1 381 lexError("unsupported.cross.fp.lit");
duke@1 382 } else
duke@1 383 lexError("malformed.fp.lit");
duke@1 384 } else {
duke@1 385 lexError("malformed.fp.lit");
duke@1 386 }
duke@1 387 if (ch == 'f' || ch == 'F') {
duke@1 388 putChar(ch);
duke@1 389 scanChar();
duke@1 390 token = FLOATLITERAL;
duke@1 391 } else {
duke@1 392 if (ch == 'd' || ch == 'D') {
duke@1 393 putChar(ch);
duke@1 394 scanChar();
duke@1 395 }
duke@1 396 token = DOUBLELITERAL;
duke@1 397 }
duke@1 398 }
duke@1 399
duke@1 400 /** Read fractional part of floating point number.
duke@1 401 */
duke@1 402 private void scanFraction() {
jjg@409 403 skipIllegalUnderscores();
jjg@409 404 if ('0' <= ch && ch <= '9') {
jjg@409 405 scanDigits(10);
duke@1 406 }
duke@1 407 int sp1 = sp;
duke@1 408 if (ch == 'e' || ch == 'E') {
duke@1 409 putChar(ch);
duke@1 410 scanChar();
jjg@409 411 skipIllegalUnderscores();
duke@1 412 if (ch == '+' || ch == '-') {
duke@1 413 putChar(ch);
duke@1 414 scanChar();
duke@1 415 }
jjg@409 416 skipIllegalUnderscores();
duke@1 417 if ('0' <= ch && ch <= '9') {
jjg@409 418 scanDigits(10);
duke@1 419 return;
duke@1 420 }
duke@1 421 lexError("malformed.fp.lit");
duke@1 422 sp = sp1;
duke@1 423 }
duke@1 424 }
duke@1 425
duke@1 426 /** Read fractional part and 'd' or 'f' suffix of floating point number.
duke@1 427 */
duke@1 428 private void scanFractionAndSuffix() {
duke@1 429 this.radix = 10;
duke@1 430 scanFraction();
duke@1 431 if (ch == 'f' || ch == 'F') {
duke@1 432 putChar(ch);
duke@1 433 scanChar();
duke@1 434 token = FLOATLITERAL;
duke@1 435 } else {
duke@1 436 if (ch == 'd' || ch == 'D') {
duke@1 437 putChar(ch);
duke@1 438 scanChar();
duke@1 439 }
duke@1 440 token = DOUBLELITERAL;
duke@1 441 }
duke@1 442 }
duke@1 443
duke@1 444 /** Read fractional part and 'd' or 'f' suffix of floating point number.
duke@1 445 */
duke@1 446 private void scanHexFractionAndSuffix(boolean seendigit) {
duke@1 447 this.radix = 16;
duke@1 448 assert ch == '.';
duke@1 449 putChar(ch);
duke@1 450 scanChar();
jjg@409 451 skipIllegalUnderscores();
jjg@409 452 if (digit(16) >= 0) {
duke@1 453 seendigit = true;
jjg@409 454 scanDigits(16);
duke@1 455 }
duke@1 456 if (!seendigit)
duke@1 457 lexError("invalid.hex.number");
duke@1 458 else
duke@1 459 scanHexExponentAndSuffix();
duke@1 460 }
duke@1 461
jjg@409 462 private void skipIllegalUnderscores() {
jjg@409 463 if (ch == '_') {
jjg@409 464 lexError(bp, "illegal.underscore");
jjg@409 465 while (ch == '_')
jjg@409 466 scanChar();
jjg@409 467 }
jjg@409 468 }
jjg@409 469
duke@1 470 /** Read a number.
jjg@409 471 * @param radix The radix of the number; one of 2, j8, 10, 16.
duke@1 472 */
duke@1 473 private void scanNumber(int radix) {
duke@1 474 this.radix = radix;
duke@1 475 // for octal, allow base-10 digit in case it's a float literal
jjg@409 476 int digitRadix = (radix == 8 ? 10 : radix);
duke@1 477 boolean seendigit = false;
jjg@409 478 if (digit(digitRadix) >= 0) {
duke@1 479 seendigit = true;
jjg@409 480 scanDigits(digitRadix);
duke@1 481 }
duke@1 482 if (radix == 16 && ch == '.') {
duke@1 483 scanHexFractionAndSuffix(seendigit);
duke@1 484 } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
duke@1 485 scanHexExponentAndSuffix();
jjg@409 486 } else if (digitRadix == 10 && ch == '.') {
duke@1 487 putChar(ch);
duke@1 488 scanChar();
duke@1 489 scanFractionAndSuffix();
jjg@409 490 } else if (digitRadix == 10 &&
duke@1 491 (ch == 'e' || ch == 'E' ||
duke@1 492 ch == 'f' || ch == 'F' ||
duke@1 493 ch == 'd' || ch == 'D')) {
duke@1 494 scanFractionAndSuffix();
duke@1 495 } else {
duke@1 496 if (ch == 'l' || ch == 'L') {
duke@1 497 scanChar();
duke@1 498 token = LONGLITERAL;
duke@1 499 } else {
duke@1 500 token = INTLITERAL;
duke@1 501 }
duke@1 502 }
duke@1 503 }
duke@1 504
duke@1 505 /** Read an identifier.
duke@1 506 */
duke@1 507 private void scanIdent() {
duke@1 508 boolean isJavaIdentifierPart;
duke@1 509 char high;
duke@1 510 do {
duke@1 511 if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
duke@1 512 // optimization, was: putChar(ch);
duke@1 513
duke@1 514 scanChar();
duke@1 515 switch (ch) {
duke@1 516 case 'A': case 'B': case 'C': case 'D': case 'E':
duke@1 517 case 'F': case 'G': case 'H': case 'I': case 'J':
duke@1 518 case 'K': case 'L': case 'M': case 'N': case 'O':
duke@1 519 case 'P': case 'Q': case 'R': case 'S': case 'T':
duke@1 520 case 'U': case 'V': case 'W': case 'X': case 'Y':
duke@1 521 case 'Z':
duke@1 522 case 'a': case 'b': case 'c': case 'd': case 'e':
duke@1 523 case 'f': case 'g': case 'h': case 'i': case 'j':
duke@1 524 case 'k': case 'l': case 'm': case 'n': case 'o':
duke@1 525 case 'p': case 'q': case 'r': case 's': case 't':
duke@1 526 case 'u': case 'v': case 'w': case 'x': case 'y':
duke@1 527 case 'z':
duke@1 528 case '$': case '_':
duke@1 529 case '0': case '1': case '2': case '3': case '4':
duke@1 530 case '5': case '6': case '7': case '8': case '9':
duke@1 531 case '\u0000': case '\u0001': case '\u0002': case '\u0003':
duke@1 532 case '\u0004': case '\u0005': case '\u0006': case '\u0007':
duke@1 533 case '\u0008': case '\u000E': case '\u000F': case '\u0010':
duke@1 534 case '\u0011': case '\u0012': case '\u0013': case '\u0014':
duke@1 535 case '\u0015': case '\u0016': case '\u0017':
duke@1 536 case '\u0018': case '\u0019': case '\u001B':
duke@1 537 case '\u007F':
duke@1 538 break;
duke@1 539 case '\u001A': // EOI is also a legal identifier part
duke@1 540 if (bp >= buflen) {
duke@1 541 name = names.fromChars(sbuf, 0, sp);
duke@1 542 token = keywords.key(name);
duke@1 543 return;
duke@1 544 }
duke@1 545 break;
duke@1 546 default:
duke@1 547 if (ch < '\u0080') {
duke@1 548 // all ASCII range chars already handled, above
duke@1 549 isJavaIdentifierPart = false;
duke@1 550 } else {
duke@1 551 high = scanSurrogates();
duke@1 552 if (high != 0) {
duke@1 553 if (sp == sbuf.length) {
duke@1 554 putChar(high);
duke@1 555 } else {
duke@1 556 sbuf[sp++] = high;
duke@1 557 }
duke@1 558 isJavaIdentifierPart = Character.isJavaIdentifierPart(
duke@1 559 Character.toCodePoint(high, ch));
duke@1 560 } else {
duke@1 561 isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
duke@1 562 }
duke@1 563 }
duke@1 564 if (!isJavaIdentifierPart) {
duke@1 565 name = names.fromChars(sbuf, 0, sp);
duke@1 566 token = keywords.key(name);
duke@1 567 return;
duke@1 568 }
duke@1 569 }
duke@1 570 } while (true);
duke@1 571 }
duke@1 572
duke@1 573 /** Are surrogates supported?
duke@1 574 */
duke@1 575 final static boolean surrogatesSupported = surrogatesSupported();
duke@1 576 private static boolean surrogatesSupported() {
duke@1 577 try {
duke@1 578 Character.isHighSurrogate('a');
duke@1 579 return true;
duke@1 580 } catch (NoSuchMethodError ex) {
duke@1 581 return false;
duke@1 582 }
duke@1 583 }
duke@1 584
duke@1 585 /** Scan surrogate pairs. If 'ch' is a high surrogate and
duke@1 586 * the next character is a low surrogate, then put the low
duke@1 587 * surrogate in 'ch', and return the high surrogate.
duke@1 588 * otherwise, just return 0.
duke@1 589 */
duke@1 590 private char scanSurrogates() {
duke@1 591 if (surrogatesSupported && Character.isHighSurrogate(ch)) {
duke@1 592 char high = ch;
duke@1 593
duke@1 594 scanChar();
duke@1 595
duke@1 596 if (Character.isLowSurrogate(ch)) {
duke@1 597 return high;
duke@1 598 }
duke@1 599
duke@1 600 ch = high;
duke@1 601 }
duke@1 602
duke@1 603 return 0;
duke@1 604 }
duke@1 605
duke@1 606 /** Return true if ch can be part of an operator.
duke@1 607 */
duke@1 608 private boolean isSpecial(char ch) {
duke@1 609 switch (ch) {
duke@1 610 case '!': case '%': case '&': case '*': case '?':
duke@1 611 case '+': case '-': case ':': case '<': case '=':
duke@1 612 case '>': case '^': case '|': case '~':
duke@1 613 case '@':
duke@1 614 return true;
duke@1 615 default:
duke@1 616 return false;
duke@1 617 }
duke@1 618 }
duke@1 619
duke@1 620 /** Read longest possible sequence of special characters and convert
duke@1 621 * to token.
duke@1 622 */
duke@1 623 private void scanOperator() {
duke@1 624 while (true) {
duke@1 625 putChar(ch);
duke@1 626 Name newname = names.fromChars(sbuf, 0, sp);
duke@1 627 if (keywords.key(newname) == IDENTIFIER) {
duke@1 628 sp--;
duke@1 629 break;
duke@1 630 }
duke@1 631 name = newname;
duke@1 632 token = keywords.key(newname);
duke@1 633 scanChar();
duke@1 634 if (!isSpecial(ch)) break;
duke@1 635 }
duke@1 636 }
duke@1 637
duke@1 638 /**
duke@1 639 * Scan a documention comment; determine if a deprecated tag is present.
duke@1 640 * Called once the initial /, * have been skipped, positioned at the second *
duke@1 641 * (which is treated as the beginning of the first line).
duke@1 642 * Stops positioned at the closing '/'.
duke@1 643 */
duke@1 644 @SuppressWarnings("fallthrough")
duke@1 645 private void scanDocComment() {
duke@1 646 boolean deprecatedPrefix = false;
duke@1 647
duke@1 648 forEachLine:
duke@1 649 while (bp < buflen) {
duke@1 650
duke@1 651 // Skip optional WhiteSpace at beginning of line
duke@1 652 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
duke@1 653 scanCommentChar();
duke@1 654 }
duke@1 655
duke@1 656 // Skip optional consecutive Stars
duke@1 657 while (bp < buflen && ch == '*') {
duke@1 658 scanCommentChar();
duke@1 659 if (ch == '/') {
duke@1 660 return;
duke@1 661 }
duke@1 662 }
duke@1 663
duke@1 664 // Skip optional WhiteSpace after Stars
duke@1 665 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
duke@1 666 scanCommentChar();
duke@1 667 }
duke@1 668
duke@1 669 deprecatedPrefix = false;
duke@1 670 // At beginning of line in the JavaDoc sense.
duke@1 671 if (bp < buflen && ch == '@' && !deprecatedFlag) {
duke@1 672 scanCommentChar();
duke@1 673 if (bp < buflen && ch == 'd') {
duke@1 674 scanCommentChar();
duke@1 675 if (bp < buflen && ch == 'e') {
duke@1 676 scanCommentChar();
duke@1 677 if (bp < buflen && ch == 'p') {
duke@1 678 scanCommentChar();
duke@1 679 if (bp < buflen && ch == 'r') {
duke@1 680 scanCommentChar();
duke@1 681 if (bp < buflen && ch == 'e') {
duke@1 682 scanCommentChar();
duke@1 683 if (bp < buflen && ch == 'c') {
duke@1 684 scanCommentChar();
duke@1 685 if (bp < buflen && ch == 'a') {
duke@1 686 scanCommentChar();
duke@1 687 if (bp < buflen && ch == 't') {
duke@1 688 scanCommentChar();
duke@1 689 if (bp < buflen && ch == 'e') {
duke@1 690 scanCommentChar();
duke@1 691 if (bp < buflen && ch == 'd') {
duke@1 692 deprecatedPrefix = true;
duke@1 693 scanCommentChar();
duke@1 694 }}}}}}}}}}}
duke@1 695 if (deprecatedPrefix && bp < buflen) {
duke@1 696 if (Character.isWhitespace(ch)) {
duke@1 697 deprecatedFlag = true;
duke@1 698 } else if (ch == '*') {
duke@1 699 scanCommentChar();
duke@1 700 if (ch == '/') {
duke@1 701 deprecatedFlag = true;
duke@1 702 return;
duke@1 703 }
duke@1 704 }
duke@1 705 }
duke@1 706
duke@1 707 // Skip rest of line
duke@1 708 while (bp < buflen) {
duke@1 709 switch (ch) {
duke@1 710 case '*':
duke@1 711 scanCommentChar();
duke@1 712 if (ch == '/') {
duke@1 713 return;
duke@1 714 }
duke@1 715 break;
duke@1 716 case CR: // (Spec 3.4)
duke@1 717 scanCommentChar();
duke@1 718 if (ch != LF) {
duke@1 719 continue forEachLine;
duke@1 720 }
duke@1 721 /* fall through to LF case */
duke@1 722 case LF: // (Spec 3.4)
duke@1 723 scanCommentChar();
duke@1 724 continue forEachLine;
duke@1 725 default:
duke@1 726 scanCommentChar();
duke@1 727 }
duke@1 728 } // rest of line
duke@1 729 } // forEachLine
duke@1 730 return;
duke@1 731 }
duke@1 732
duke@1 733 /** The value of a literal token, recorded as a string.
duke@1 734 * For integers, leading 0x and 'l' suffixes are suppressed.
duke@1 735 */
duke@1 736 public String stringVal() {
duke@1 737 return new String(sbuf, 0, sp);
duke@1 738 }
duke@1 739
duke@1 740 /** Read token.
duke@1 741 */
duke@1 742 public void nextToken() {
duke@1 743
duke@1 744 try {
duke@1 745 prevEndPos = endPos;
duke@1 746 sp = 0;
duke@1 747
duke@1 748 while (true) {
duke@1 749 pos = bp;
duke@1 750 switch (ch) {
duke@1 751 case ' ': // (Spec 3.6)
duke@1 752 case '\t': // (Spec 3.6)
duke@1 753 case FF: // (Spec 3.6)
duke@1 754 do {
duke@1 755 scanChar();
duke@1 756 } while (ch == ' ' || ch == '\t' || ch == FF);
duke@1 757 endPos = bp;
duke@1 758 processWhiteSpace();
duke@1 759 break;
duke@1 760 case LF: // (Spec 3.4)
duke@1 761 scanChar();
duke@1 762 endPos = bp;
duke@1 763 processLineTerminator();
duke@1 764 break;
duke@1 765 case CR: // (Spec 3.4)
duke@1 766 scanChar();
duke@1 767 if (ch == LF) {
duke@1 768 scanChar();
duke@1 769 }
duke@1 770 endPos = bp;
duke@1 771 processLineTerminator();
duke@1 772 break;
duke@1 773 case 'A': case 'B': case 'C': case 'D': case 'E':
duke@1 774 case 'F': case 'G': case 'H': case 'I': case 'J':
duke@1 775 case 'K': case 'L': case 'M': case 'N': case 'O':
duke@1 776 case 'P': case 'Q': case 'R': case 'S': case 'T':
duke@1 777 case 'U': case 'V': case 'W': case 'X': case 'Y':
duke@1 778 case 'Z':
duke@1 779 case 'a': case 'b': case 'c': case 'd': case 'e':
duke@1 780 case 'f': case 'g': case 'h': case 'i': case 'j':
duke@1 781 case 'k': case 'l': case 'm': case 'n': case 'o':
duke@1 782 case 'p': case 'q': case 'r': case 's': case 't':
duke@1 783 case 'u': case 'v': case 'w': case 'x': case 'y':
duke@1 784 case 'z':
duke@1 785 case '$': case '_':
duke@1 786 scanIdent();
duke@1 787 return;
duke@1 788 case '0':
duke@1 789 scanChar();
duke@1 790 if (ch == 'x' || ch == 'X') {
duke@1 791 scanChar();
jjg@409 792 skipIllegalUnderscores();
duke@1 793 if (ch == '.') {
duke@1 794 scanHexFractionAndSuffix(false);
duke@1 795 } else if (digit(16) < 0) {
duke@1 796 lexError("invalid.hex.number");
duke@1 797 } else {
duke@1 798 scanNumber(16);
duke@1 799 }
jjg@409 800 } else if (ch == 'b' || ch == 'B') {
jjg@409 801 if (!allowBinaryLiterals) {
jjg@409 802 lexError("unsupported.binary.lit", source.name);
jjg@409 803 allowBinaryLiterals = true;
jjg@409 804 }
jjg@409 805 scanChar();
jjg@409 806 skipIllegalUnderscores();
jjg@423 807 if (digit(2) < 0) {
jjg@423 808 lexError("invalid.binary.number");
jjg@423 809 } else {
jjg@423 810 scanNumber(2);
jjg@423 811 }
duke@1 812 } else {
duke@1 813 putChar('0');
jjg@409 814 if (ch == '_') {
jjg@409 815 int savePos = bp;
jjg@409 816 do {
jjg@409 817 scanChar();
jjg@409 818 } while (ch == '_');
jjg@409 819 if (digit(10) < 0) {
jjg@409 820 lexError(savePos, "illegal.underscore");
jjg@409 821 }
jjg@409 822 }
duke@1 823 scanNumber(8);
duke@1 824 }
duke@1 825 return;
duke@1 826 case '1': case '2': case '3': case '4':
duke@1 827 case '5': case '6': case '7': case '8': case '9':
duke@1 828 scanNumber(10);
duke@1 829 return;
duke@1 830 case '.':
duke@1 831 scanChar();
duke@1 832 if ('0' <= ch && ch <= '9') {
duke@1 833 putChar('.');
duke@1 834 scanFractionAndSuffix();
duke@1 835 } else if (ch == '.') {
duke@1 836 putChar('.'); putChar('.');
duke@1 837 scanChar();
duke@1 838 if (ch == '.') {
duke@1 839 scanChar();
duke@1 840 putChar('.');
duke@1 841 token = ELLIPSIS;
duke@1 842 } else {
duke@1 843 lexError("malformed.fp.lit");
duke@1 844 }
duke@1 845 } else {
duke@1 846 token = DOT;
duke@1 847 }
duke@1 848 return;
duke@1 849 case ',':
duke@1 850 scanChar(); token = COMMA; return;
duke@1 851 case ';':
duke@1 852 scanChar(); token = SEMI; return;
duke@1 853 case '(':
duke@1 854 scanChar(); token = LPAREN; return;
duke@1 855 case ')':
duke@1 856 scanChar(); token = RPAREN; return;
duke@1 857 case '[':
duke@1 858 scanChar(); token = LBRACKET; return;
duke@1 859 case ']':
duke@1 860 scanChar(); token = RBRACKET; return;
duke@1 861 case '{':
duke@1 862 scanChar(); token = LBRACE; return;
duke@1 863 case '}':
duke@1 864 scanChar(); token = RBRACE; return;
duke@1 865 case '/':
duke@1 866 scanChar();
duke@1 867 if (ch == '/') {
duke@1 868 do {
duke@1 869 scanCommentChar();
duke@1 870 } while (ch != CR && ch != LF && bp < buflen);
duke@1 871 if (bp < buflen) {
duke@1 872 endPos = bp;
duke@1 873 processComment(CommentStyle.LINE);
duke@1 874 }
duke@1 875 break;
duke@1 876 } else if (ch == '*') {
duke@1 877 scanChar();
duke@1 878 CommentStyle style;
duke@1 879 if (ch == '*') {
duke@1 880 style = CommentStyle.JAVADOC;
duke@1 881 scanDocComment();
duke@1 882 } else {
duke@1 883 style = CommentStyle.BLOCK;
duke@1 884 while (bp < buflen) {
duke@1 885 if (ch == '*') {
duke@1 886 scanChar();
duke@1 887 if (ch == '/') break;
duke@1 888 } else {
duke@1 889 scanCommentChar();
duke@1 890 }
duke@1 891 }
duke@1 892 }
duke@1 893 if (ch == '/') {
duke@1 894 scanChar();
duke@1 895 endPos = bp;
duke@1 896 processComment(style);
duke@1 897 break;
duke@1 898 } else {
duke@1 899 lexError("unclosed.comment");
duke@1 900 return;
duke@1 901 }
duke@1 902 } else if (ch == '=') {
duke@1 903 name = names.slashequals;
duke@1 904 token = SLASHEQ;
duke@1 905 scanChar();
duke@1 906 } else {
duke@1 907 name = names.slash;
duke@1 908 token = SLASH;
duke@1 909 }
duke@1 910 return;
duke@1 911 case '\'':
duke@1 912 scanChar();
duke@1 913 if (ch == '\'') {
duke@1 914 lexError("empty.char.lit");
duke@1 915 } else {
duke@1 916 if (ch == CR || ch == LF)
duke@1 917 lexError(pos, "illegal.line.end.in.char.lit");
duke@1 918 scanLitChar();
duke@1 919 if (ch == '\'') {
duke@1 920 scanChar();
duke@1 921 token = CHARLITERAL;
duke@1 922 } else {
duke@1 923 lexError(pos, "unclosed.char.lit");
duke@1 924 }
duke@1 925 }
duke@1 926 return;
duke@1 927 case '\"':
duke@1 928 scanChar();
duke@1 929 while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
duke@1 930 scanLitChar();
duke@1 931 if (ch == '\"') {
duke@1 932 token = STRINGLITERAL;
duke@1 933 scanChar();
duke@1 934 } else {
duke@1 935 lexError(pos, "unclosed.str.lit");
duke@1 936 }
duke@1 937 return;
duke@1 938 default:
duke@1 939 if (isSpecial(ch)) {
duke@1 940 scanOperator();
duke@1 941 } else {
duke@1 942 boolean isJavaIdentifierStart;
duke@1 943 if (ch < '\u0080') {
duke@1 944 // all ASCII range chars already handled, above
duke@1 945 isJavaIdentifierStart = false;
duke@1 946 } else {
duke@1 947 char high = scanSurrogates();
duke@1 948 if (high != 0) {
duke@1 949 if (sp == sbuf.length) {
duke@1 950 putChar(high);
duke@1 951 } else {
duke@1 952 sbuf[sp++] = high;
duke@1 953 }
duke@1 954
duke@1 955 isJavaIdentifierStart = Character.isJavaIdentifierStart(
duke@1 956 Character.toCodePoint(high, ch));
duke@1 957 } else {
duke@1 958 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
duke@1 959 }
duke@1 960 }
duke@1 961 if (isJavaIdentifierStart) {
duke@1 962 scanIdent();
duke@1 963 } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
duke@1 964 token = EOF;
duke@1 965 pos = bp = eofPos;
duke@1 966 } else {
duke@1 967 lexError("illegal.char", String.valueOf((int)ch));
duke@1 968 scanChar();
duke@1 969 }
duke@1 970 }
duke@1 971 return;
duke@1 972 }
duke@1 973 }
duke@1 974 } finally {
duke@1 975 endPos = bp;
duke@1 976 if (scannerDebug)
duke@1 977 System.out.println("nextToken(" + pos
duke@1 978 + "," + endPos + ")=|" +
duke@1 979 new String(getRawCharacters(pos, endPos))
duke@1 980 + "|");
duke@1 981 }
duke@1 982 }
duke@1 983
duke@1 984 /** Return the current token, set by nextToken().
duke@1 985 */
duke@1 986 public Token token() {
duke@1 987 return token;
duke@1 988 }
duke@1 989
duke@1 990 /** Sets the current token.
duke@1 991 */
duke@1 992 public void token(Token token) {
duke@1 993 this.token = token;
duke@1 994 }
duke@1 995
duke@1 996 /** Return the current token's position: a 0-based
duke@1 997 * offset from beginning of the raw input stream
duke@1 998 * (before unicode translation)
duke@1 999 */
duke@1 1000 public int pos() {
duke@1 1001 return pos;
duke@1 1002 }
duke@1 1003
duke@1 1004 /** Return the last character position of the current token.
duke@1 1005 */
duke@1 1006 public int endPos() {
duke@1 1007 return endPos;
duke@1 1008 }
duke@1 1009
duke@1 1010 /** Return the last character position of the previous token.
duke@1 1011 */
duke@1 1012 public int prevEndPos() {
duke@1 1013 return prevEndPos;
duke@1 1014 }
duke@1 1015
duke@1 1016 /** Return the position where a lexical error occurred;
duke@1 1017 */
duke@1 1018 public int errPos() {
duke@1 1019 return errPos;
duke@1 1020 }
duke@1 1021
duke@1 1022 /** Set the position where a lexical error occurred;
duke@1 1023 */
duke@1 1024 public void errPos(int pos) {
duke@1 1025 errPos = pos;
duke@1 1026 }
duke@1 1027
duke@1 1028 /** Return the name of an identifier or token for the current token.
duke@1 1029 */
duke@1 1030 public Name name() {
duke@1 1031 return name;
duke@1 1032 }
duke@1 1033
duke@1 1034 /** Return the radix of a numeric literal token.
duke@1 1035 */
duke@1 1036 public int radix() {
duke@1 1037 return radix;
duke@1 1038 }
duke@1 1039
duke@1 1040 /** Has a @deprecated been encountered in last doc comment?
duke@1 1041 * This needs to be reset by client with resetDeprecatedFlag.
duke@1 1042 */
duke@1 1043 public boolean deprecatedFlag() {
duke@1 1044 return deprecatedFlag;
duke@1 1045 }
duke@1 1046
duke@1 1047 public void resetDeprecatedFlag() {
duke@1 1048 deprecatedFlag = false;
duke@1 1049 }
duke@1 1050
duke@1 1051 /**
duke@1 1052 * Returns the documentation string of the current token.
duke@1 1053 */
duke@1 1054 public String docComment() {
duke@1 1055 return null;
duke@1 1056 }
duke@1 1057
duke@1 1058 /**
duke@1 1059 * Returns a copy of the input buffer, up to its inputLength.
duke@1 1060 * Unicode escape sequences are not translated.
duke@1 1061 */
duke@1 1062 public char[] getRawCharacters() {
duke@1 1063 char[] chars = new char[buflen];
duke@1 1064 System.arraycopy(buf, 0, chars, 0, buflen);
duke@1 1065 return chars;
duke@1 1066 }
duke@1 1067
duke@1 1068 /**
duke@1 1069 * Returns a copy of a character array subset of the input buffer.
duke@1 1070 * The returned array begins at the <code>beginIndex</code> and
duke@1 1071 * extends to the character at index <code>endIndex - 1</code>.
duke@1 1072 * Thus the length of the substring is <code>endIndex-beginIndex</code>.
duke@1 1073 * This behavior is like
duke@1 1074 * <code>String.substring(beginIndex, endIndex)</code>.
duke@1 1075 * Unicode escape sequences are not translated.
duke@1 1076 *
duke@1 1077 * @param beginIndex the beginning index, inclusive.
duke@1 1078 * @param endIndex the ending index, exclusive.
duke@1 1079 * @throws IndexOutOfBounds if either offset is outside of the
duke@1 1080 * array bounds
duke@1 1081 */
duke@1 1082 public char[] getRawCharacters(int beginIndex, int endIndex) {
duke@1 1083 int length = endIndex - beginIndex;
duke@1 1084 char[] chars = new char[length];
duke@1 1085 System.arraycopy(buf, beginIndex, chars, 0, length);
duke@1 1086 return chars;
duke@1 1087 }
duke@1 1088
duke@1 1089 public enum CommentStyle {
duke@1 1090 LINE,
duke@1 1091 BLOCK,
duke@1 1092 JAVADOC,
duke@1 1093 }
duke@1 1094
duke@1 1095 /**
duke@1 1096 * Called when a complete comment has been scanned. pos and endPos
duke@1 1097 * will mark the comment boundary.
duke@1 1098 */
duke@1 1099 protected void processComment(CommentStyle style) {
duke@1 1100 if (scannerDebug)
duke@1 1101 System.out.println("processComment(" + pos
duke@1 1102 + "," + endPos + "," + style + ")=|"
duke@1 1103 + new String(getRawCharacters(pos, endPos))
duke@1 1104 + "|");
duke@1 1105 }
duke@1 1106
duke@1 1107 /**
duke@1 1108 * Called when a complete whitespace run has been scanned. pos and endPos
duke@1 1109 * will mark the whitespace boundary.
duke@1 1110 */
duke@1 1111 protected void processWhiteSpace() {
duke@1 1112 if (scannerDebug)
duke@1 1113 System.out.println("processWhitespace(" + pos
duke@1 1114 + "," + endPos + ")=|" +
duke@1 1115 new String(getRawCharacters(pos, endPos))
duke@1 1116 + "|");
duke@1 1117 }
duke@1 1118
duke@1 1119 /**
duke@1 1120 * Called when a line terminator has been processed.
duke@1 1121 */
duke@1 1122 protected void processLineTerminator() {
duke@1 1123 if (scannerDebug)
duke@1 1124 System.out.println("processTerminator(" + pos
duke@1 1125 + "," + endPos + ")=|" +
duke@1 1126 new String(getRawCharacters(pos, endPos))
duke@1 1127 + "|");
duke@1 1128 }
duke@1 1129
duke@1 1130 /** Build a map for translating between line numbers and
duke@1 1131 * positions in the input.
duke@1 1132 *
duke@1 1133 * @return a LineMap */
duke@1 1134 public Position.LineMap getLineMap() {
duke@1 1135 return Position.makeLineMap(buf, buflen, false);
duke@1 1136 }
duke@1 1137
duke@1 1138 }

mercurial