src/share/classes/com/sun/tools/javac/parser/Scanner.java

Thu, 09 Oct 2008 16:19:13 +0100

author
mcimadamore
date
Thu, 09 Oct 2008 16:19:13 +0100
changeset 137
e4eaddca54b7
parent 113
eff38cc97183
child 267
e2722bd43f3a
permissions
-rw-r--r--

6731573: diagnostic output should optionally include source line
Summary: Added an -XD option to optionally prints out source lines in error messages
Reviewed-by: jjg

duke@1 1 /*
xdono@54 2 * Copyright 1999-2008 Sun Microsystems, Inc. All Rights Reserved.
duke@1 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
duke@1 4 *
duke@1 5 * This code is free software; you can redistribute it and/or modify it
duke@1 6 * under the terms of the GNU General Public License version 2 only, as
duke@1 7 * published by the Free Software Foundation. Sun designates this
duke@1 8 * particular file as subject to the "Classpath" exception as provided
duke@1 9 * by Sun in the LICENSE file that accompanied this code.
duke@1 10 *
duke@1 11 * This code is distributed in the hope that it will be useful, but WITHOUT
duke@1 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
duke@1 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
duke@1 14 * version 2 for more details (a copy is included in the LICENSE file that
duke@1 15 * accompanied this code).
duke@1 16 *
duke@1 17 * You should have received a copy of the GNU General Public License version
duke@1 18 * 2 along with this work; if not, write to the Free Software Foundation,
duke@1 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
duke@1 20 *
duke@1 21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
duke@1 22 * CA 95054 USA or visit www.sun.com if you need additional information or
duke@1 23 * have any questions.
duke@1 24 */
duke@1 25
duke@1 26 package com.sun.tools.javac.parser;
duke@1 27
duke@1 28 import java.nio.*;
duke@1 29
jjg@50 30 import com.sun.tools.javac.code.Source;
jjg@50 31 import com.sun.tools.javac.file.JavacFileManager;
duke@1 32 import com.sun.tools.javac.util.*;
duke@1 33
duke@1 34
duke@1 35 import static com.sun.tools.javac.parser.Token.*;
duke@1 36 import static com.sun.tools.javac.util.LayoutCharacters.*;
duke@1 37
duke@1 38 /** The lexical analyzer maps an input stream consisting of
duke@1 39 * ASCII characters and Unicode escapes into a token sequence.
duke@1 40 *
duke@1 41 * <p><b>This is NOT part of any API supported by Sun Microsystems. If
duke@1 42 * you write code that depends on this, you do so at your own risk.
duke@1 43 * This code and its internal interfaces are subject to change or
duke@1 44 * deletion without notice.</b>
duke@1 45 */
duke@1 46 public class Scanner implements Lexer {
duke@1 47
duke@1 48 private static boolean scannerDebug = false;
duke@1 49
duke@1 50 /** A factory for creating scanners. */
duke@1 51 public static class Factory {
duke@1 52 /** The context key for the scanner factory. */
duke@1 53 public static final Context.Key<Scanner.Factory> scannerFactoryKey =
duke@1 54 new Context.Key<Scanner.Factory>();
duke@1 55
duke@1 56 /** Get the Factory instance for this context. */
duke@1 57 public static Factory instance(Context context) {
duke@1 58 Factory instance = context.get(scannerFactoryKey);
duke@1 59 if (instance == null)
duke@1 60 instance = new Factory(context);
duke@1 61 return instance;
duke@1 62 }
duke@1 63
duke@1 64 final Log log;
jjg@113 65 final Names names;
duke@1 66 final Source source;
duke@1 67 final Keywords keywords;
duke@1 68
duke@1 69 /** Create a new scanner factory. */
duke@1 70 protected Factory(Context context) {
duke@1 71 context.put(scannerFactoryKey, this);
duke@1 72 this.log = Log.instance(context);
jjg@113 73 this.names = Names.instance(context);
duke@1 74 this.source = Source.instance(context);
duke@1 75 this.keywords = Keywords.instance(context);
duke@1 76 }
duke@1 77
duke@1 78 public Scanner newScanner(CharSequence input) {
duke@1 79 if (input instanceof CharBuffer) {
duke@1 80 return new Scanner(this, (CharBuffer)input);
duke@1 81 } else {
duke@1 82 char[] array = input.toString().toCharArray();
duke@1 83 return newScanner(array, array.length);
duke@1 84 }
duke@1 85 }
duke@1 86
duke@1 87 public Scanner newScanner(char[] input, int inputLength) {
duke@1 88 return new Scanner(this, input, inputLength);
duke@1 89 }
duke@1 90 }
duke@1 91
duke@1 92 /* Output variables; set by nextToken():
duke@1 93 */
duke@1 94
duke@1 95 /** The token, set by nextToken().
duke@1 96 */
duke@1 97 private Token token;
duke@1 98
duke@1 99 /** Allow hex floating-point literals.
duke@1 100 */
duke@1 101 private boolean allowHexFloats;
duke@1 102
duke@1 103 /** The token's position, 0-based offset from beginning of text.
duke@1 104 */
duke@1 105 private int pos;
duke@1 106
duke@1 107 /** Character position just after the last character of the token.
duke@1 108 */
duke@1 109 private int endPos;
duke@1 110
duke@1 111 /** The last character position of the previous token.
duke@1 112 */
duke@1 113 private int prevEndPos;
duke@1 114
duke@1 115 /** The position where a lexical error occurred;
duke@1 116 */
duke@1 117 private int errPos = Position.NOPOS;
duke@1 118
duke@1 119 /** The name of an identifier or token:
duke@1 120 */
duke@1 121 private Name name;
duke@1 122
duke@1 123 /** The radix of a numeric literal token.
duke@1 124 */
duke@1 125 private int radix;
duke@1 126
duke@1 127 /** Has a @deprecated been encountered in last doc comment?
duke@1 128 * this needs to be reset by client.
duke@1 129 */
duke@1 130 protected boolean deprecatedFlag = false;
duke@1 131
duke@1 132 /** A character buffer for literals.
duke@1 133 */
duke@1 134 private char[] sbuf = new char[128];
duke@1 135 private int sp;
duke@1 136
duke@1 137 /** The input buffer, index of next chacter to be read,
duke@1 138 * index of one past last character in buffer.
duke@1 139 */
duke@1 140 private char[] buf;
duke@1 141 private int bp;
duke@1 142 private int buflen;
duke@1 143 private int eofPos;
duke@1 144
duke@1 145 /** The current character.
duke@1 146 */
duke@1 147 private char ch;
duke@1 148
duke@1 149 /** The buffer index of the last converted unicode character
duke@1 150 */
duke@1 151 private int unicodeConversionBp = -1;
duke@1 152
duke@1 153 /** The log to be used for error reporting.
duke@1 154 */
duke@1 155 private final Log log;
duke@1 156
duke@1 157 /** The name table. */
jjg@113 158 private final Names names;
duke@1 159
duke@1 160 /** The keyword table. */
duke@1 161 private final Keywords keywords;
duke@1 162
duke@1 163 /** Common code for constructors. */
duke@1 164 private Scanner(Factory fac) {
duke@1 165 this.log = fac.log;
duke@1 166 this.names = fac.names;
duke@1 167 this.keywords = fac.keywords;
duke@1 168 this.allowHexFloats = fac.source.allowHexFloats();
duke@1 169 }
duke@1 170
duke@1 171 private static final boolean hexFloatsWork = hexFloatsWork();
duke@1 172 private static boolean hexFloatsWork() {
duke@1 173 try {
duke@1 174 Float.valueOf("0x1.0p1");
duke@1 175 return true;
duke@1 176 } catch (NumberFormatException ex) {
duke@1 177 return false;
duke@1 178 }
duke@1 179 }
duke@1 180
duke@1 181 /** Create a scanner from the input buffer. buffer must implement
duke@1 182 * array() and compact(), and remaining() must be less than limit().
duke@1 183 */
duke@1 184 protected Scanner(Factory fac, CharBuffer buffer) {
duke@1 185 this(fac, JavacFileManager.toArray(buffer), buffer.limit());
duke@1 186 }
duke@1 187
duke@1 188 /**
duke@1 189 * Create a scanner from the input array. This method might
duke@1 190 * modify the array. To avoid copying the input array, ensure
duke@1 191 * that {@code inputLength < input.length} or
duke@1 192 * {@code input[input.length -1]} is a white space character.
duke@1 193 *
duke@1 194 * @param fac the factory which created this Scanner
duke@1 195 * @param input the input, might be modified
duke@1 196 * @param inputLength the size of the input.
duke@1 197 * Must be positive and less than or equal to input.length.
duke@1 198 */
duke@1 199 protected Scanner(Factory fac, char[] input, int inputLength) {
duke@1 200 this(fac);
duke@1 201 eofPos = inputLength;
duke@1 202 if (inputLength == input.length) {
duke@1 203 if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
duke@1 204 inputLength--;
duke@1 205 } else {
duke@1 206 char[] newInput = new char[inputLength + 1];
duke@1 207 System.arraycopy(input, 0, newInput, 0, input.length);
duke@1 208 input = newInput;
duke@1 209 }
duke@1 210 }
duke@1 211 buf = input;
duke@1 212 buflen = inputLength;
duke@1 213 buf[buflen] = EOI;
duke@1 214 bp = -1;
duke@1 215 scanChar();
duke@1 216 }
duke@1 217
duke@1 218 /** Report an error at the given position using the provided arguments.
duke@1 219 */
duke@1 220 private void lexError(int pos, String key, Object... args) {
duke@1 221 log.error(pos, key, args);
duke@1 222 token = ERROR;
duke@1 223 errPos = pos;
duke@1 224 }
duke@1 225
duke@1 226 /** Report an error at the current token position using the provided
duke@1 227 * arguments.
duke@1 228 */
duke@1 229 private void lexError(String key, Object... args) {
duke@1 230 lexError(pos, key, args);
duke@1 231 }
duke@1 232
duke@1 233 /** Convert an ASCII digit from its base (8, 10, or 16)
duke@1 234 * to its value.
duke@1 235 */
duke@1 236 private int digit(int base) {
duke@1 237 char c = ch;
duke@1 238 int result = Character.digit(c, base);
duke@1 239 if (result >= 0 && c > 0x7f) {
duke@1 240 lexError(pos+1, "illegal.nonascii.digit");
duke@1 241 ch = "0123456789abcdef".charAt(result);
duke@1 242 }
duke@1 243 return result;
duke@1 244 }
duke@1 245
duke@1 246 /** Convert unicode escape; bp points to initial '\' character
duke@1 247 * (Spec 3.3).
duke@1 248 */
duke@1 249 private void convertUnicode() {
duke@1 250 if (ch == '\\' && unicodeConversionBp != bp) {
duke@1 251 bp++; ch = buf[bp];
duke@1 252 if (ch == 'u') {
duke@1 253 do {
duke@1 254 bp++; ch = buf[bp];
duke@1 255 } while (ch == 'u');
duke@1 256 int limit = bp + 3;
duke@1 257 if (limit < buflen) {
duke@1 258 int d = digit(16);
duke@1 259 int code = d;
duke@1 260 while (bp < limit && d >= 0) {
duke@1 261 bp++; ch = buf[bp];
duke@1 262 d = digit(16);
duke@1 263 code = (code << 4) + d;
duke@1 264 }
duke@1 265 if (d >= 0) {
duke@1 266 ch = (char)code;
duke@1 267 unicodeConversionBp = bp;
duke@1 268 return;
duke@1 269 }
duke@1 270 }
duke@1 271 lexError(bp, "illegal.unicode.esc");
duke@1 272 } else {
duke@1 273 bp--;
duke@1 274 ch = '\\';
duke@1 275 }
duke@1 276 }
duke@1 277 }
duke@1 278
duke@1 279 /** Read next character.
duke@1 280 */
duke@1 281 private void scanChar() {
duke@1 282 ch = buf[++bp];
duke@1 283 if (ch == '\\') {
duke@1 284 convertUnicode();
duke@1 285 }
duke@1 286 }
duke@1 287
duke@1 288 /** Read next character in comment, skipping over double '\' characters.
duke@1 289 */
duke@1 290 private void scanCommentChar() {
duke@1 291 scanChar();
duke@1 292 if (ch == '\\') {
duke@1 293 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
duke@1 294 bp++;
duke@1 295 } else {
duke@1 296 convertUnicode();
duke@1 297 }
duke@1 298 }
duke@1 299 }
duke@1 300
duke@1 301 /** Append a character to sbuf.
duke@1 302 */
duke@1 303 private void putChar(char ch) {
duke@1 304 if (sp == sbuf.length) {
duke@1 305 char[] newsbuf = new char[sbuf.length * 2];
duke@1 306 System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
duke@1 307 sbuf = newsbuf;
duke@1 308 }
duke@1 309 sbuf[sp++] = ch;
duke@1 310 }
duke@1 311
duke@1 312 /** For debugging purposes: print character.
duke@1 313 */
duke@1 314 private void dch() {
duke@1 315 System.err.print(ch); System.out.flush();
duke@1 316 }
duke@1 317
duke@1 318 /** Read next character in character or string literal and copy into sbuf.
duke@1 319 */
duke@1 320 private void scanLitChar() {
duke@1 321 if (ch == '\\') {
duke@1 322 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
duke@1 323 bp++;
duke@1 324 putChar('\\');
duke@1 325 scanChar();
duke@1 326 } else {
duke@1 327 scanChar();
duke@1 328 switch (ch) {
duke@1 329 case '0': case '1': case '2': case '3':
duke@1 330 case '4': case '5': case '6': case '7':
duke@1 331 char leadch = ch;
duke@1 332 int oct = digit(8);
duke@1 333 scanChar();
duke@1 334 if ('0' <= ch && ch <= '7') {
duke@1 335 oct = oct * 8 + digit(8);
duke@1 336 scanChar();
duke@1 337 if (leadch <= '3' && '0' <= ch && ch <= '7') {
duke@1 338 oct = oct * 8 + digit(8);
duke@1 339 scanChar();
duke@1 340 }
duke@1 341 }
duke@1 342 putChar((char)oct);
duke@1 343 break;
duke@1 344 case 'b':
duke@1 345 putChar('\b'); scanChar(); break;
duke@1 346 case 't':
duke@1 347 putChar('\t'); scanChar(); break;
duke@1 348 case 'n':
duke@1 349 putChar('\n'); scanChar(); break;
duke@1 350 case 'f':
duke@1 351 putChar('\f'); scanChar(); break;
duke@1 352 case 'r':
duke@1 353 putChar('\r'); scanChar(); break;
duke@1 354 case '\'':
duke@1 355 putChar('\''); scanChar(); break;
duke@1 356 case '\"':
duke@1 357 putChar('\"'); scanChar(); break;
duke@1 358 case '\\':
duke@1 359 putChar('\\'); scanChar(); break;
duke@1 360 default:
duke@1 361 lexError(bp, "illegal.esc.char");
duke@1 362 }
duke@1 363 }
duke@1 364 } else if (bp != buflen) {
duke@1 365 putChar(ch); scanChar();
duke@1 366 }
duke@1 367 }
duke@1 368
duke@1 369 /** Read fractional part of hexadecimal floating point number.
duke@1 370 */
duke@1 371 private void scanHexExponentAndSuffix() {
duke@1 372 if (ch == 'p' || ch == 'P') {
duke@1 373 putChar(ch);
duke@1 374 scanChar();
duke@1 375 if (ch == '+' || ch == '-') {
duke@1 376 putChar(ch);
duke@1 377 scanChar();
duke@1 378 }
duke@1 379 if ('0' <= ch && ch <= '9') {
duke@1 380 do {
duke@1 381 putChar(ch);
duke@1 382 scanChar();
duke@1 383 } while ('0' <= ch && ch <= '9');
duke@1 384 if (!allowHexFloats) {
duke@1 385 lexError("unsupported.fp.lit");
duke@1 386 allowHexFloats = true;
duke@1 387 }
duke@1 388 else if (!hexFloatsWork)
duke@1 389 lexError("unsupported.cross.fp.lit");
duke@1 390 } else
duke@1 391 lexError("malformed.fp.lit");
duke@1 392 } else {
duke@1 393 lexError("malformed.fp.lit");
duke@1 394 }
duke@1 395 if (ch == 'f' || ch == 'F') {
duke@1 396 putChar(ch);
duke@1 397 scanChar();
duke@1 398 token = FLOATLITERAL;
duke@1 399 } else {
duke@1 400 if (ch == 'd' || ch == 'D') {
duke@1 401 putChar(ch);
duke@1 402 scanChar();
duke@1 403 }
duke@1 404 token = DOUBLELITERAL;
duke@1 405 }
duke@1 406 }
duke@1 407
duke@1 408 /** Read fractional part of floating point number.
duke@1 409 */
duke@1 410 private void scanFraction() {
duke@1 411 while (digit(10) >= 0) {
duke@1 412 putChar(ch);
duke@1 413 scanChar();
duke@1 414 }
duke@1 415 int sp1 = sp;
duke@1 416 if (ch == 'e' || ch == 'E') {
duke@1 417 putChar(ch);
duke@1 418 scanChar();
duke@1 419 if (ch == '+' || ch == '-') {
duke@1 420 putChar(ch);
duke@1 421 scanChar();
duke@1 422 }
duke@1 423 if ('0' <= ch && ch <= '9') {
duke@1 424 do {
duke@1 425 putChar(ch);
duke@1 426 scanChar();
duke@1 427 } while ('0' <= ch && ch <= '9');
duke@1 428 return;
duke@1 429 }
duke@1 430 lexError("malformed.fp.lit");
duke@1 431 sp = sp1;
duke@1 432 }
duke@1 433 }
duke@1 434
duke@1 435 /** Read fractional part and 'd' or 'f' suffix of floating point number.
duke@1 436 */
duke@1 437 private void scanFractionAndSuffix() {
duke@1 438 this.radix = 10;
duke@1 439 scanFraction();
duke@1 440 if (ch == 'f' || ch == 'F') {
duke@1 441 putChar(ch);
duke@1 442 scanChar();
duke@1 443 token = FLOATLITERAL;
duke@1 444 } else {
duke@1 445 if (ch == 'd' || ch == 'D') {
duke@1 446 putChar(ch);
duke@1 447 scanChar();
duke@1 448 }
duke@1 449 token = DOUBLELITERAL;
duke@1 450 }
duke@1 451 }
duke@1 452
duke@1 453 /** Read fractional part and 'd' or 'f' suffix of floating point number.
duke@1 454 */
duke@1 455 private void scanHexFractionAndSuffix(boolean seendigit) {
duke@1 456 this.radix = 16;
duke@1 457 assert ch == '.';
duke@1 458 putChar(ch);
duke@1 459 scanChar();
duke@1 460 while (digit(16) >= 0) {
duke@1 461 seendigit = true;
duke@1 462 putChar(ch);
duke@1 463 scanChar();
duke@1 464 }
duke@1 465 if (!seendigit)
duke@1 466 lexError("invalid.hex.number");
duke@1 467 else
duke@1 468 scanHexExponentAndSuffix();
duke@1 469 }
duke@1 470
duke@1 471 /** Read a number.
duke@1 472 * @param radix The radix of the number; one of 8, 10, 16.
duke@1 473 */
duke@1 474 private void scanNumber(int radix) {
duke@1 475 this.radix = radix;
duke@1 476 // for octal, allow base-10 digit in case it's a float literal
duke@1 477 int digitRadix = (radix <= 10) ? 10 : 16;
duke@1 478 boolean seendigit = false;
duke@1 479 while (digit(digitRadix) >= 0) {
duke@1 480 seendigit = true;
duke@1 481 putChar(ch);
duke@1 482 scanChar();
duke@1 483 }
duke@1 484 if (radix == 16 && ch == '.') {
duke@1 485 scanHexFractionAndSuffix(seendigit);
duke@1 486 } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
duke@1 487 scanHexExponentAndSuffix();
duke@1 488 } else if (radix <= 10 && ch == '.') {
duke@1 489 putChar(ch);
duke@1 490 scanChar();
duke@1 491 scanFractionAndSuffix();
duke@1 492 } else if (radix <= 10 &&
duke@1 493 (ch == 'e' || ch == 'E' ||
duke@1 494 ch == 'f' || ch == 'F' ||
duke@1 495 ch == 'd' || ch == 'D')) {
duke@1 496 scanFractionAndSuffix();
duke@1 497 } else {
duke@1 498 if (ch == 'l' || ch == 'L') {
duke@1 499 scanChar();
duke@1 500 token = LONGLITERAL;
duke@1 501 } else {
duke@1 502 token = INTLITERAL;
duke@1 503 }
duke@1 504 }
duke@1 505 }
duke@1 506
duke@1 507 /** Read an identifier.
duke@1 508 */
duke@1 509 private void scanIdent() {
duke@1 510 boolean isJavaIdentifierPart;
duke@1 511 char high;
duke@1 512 do {
duke@1 513 if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
duke@1 514 // optimization, was: putChar(ch);
duke@1 515
duke@1 516 scanChar();
duke@1 517 switch (ch) {
duke@1 518 case 'A': case 'B': case 'C': case 'D': case 'E':
duke@1 519 case 'F': case 'G': case 'H': case 'I': case 'J':
duke@1 520 case 'K': case 'L': case 'M': case 'N': case 'O':
duke@1 521 case 'P': case 'Q': case 'R': case 'S': case 'T':
duke@1 522 case 'U': case 'V': case 'W': case 'X': case 'Y':
duke@1 523 case 'Z':
duke@1 524 case 'a': case 'b': case 'c': case 'd': case 'e':
duke@1 525 case 'f': case 'g': case 'h': case 'i': case 'j':
duke@1 526 case 'k': case 'l': case 'm': case 'n': case 'o':
duke@1 527 case 'p': case 'q': case 'r': case 's': case 't':
duke@1 528 case 'u': case 'v': case 'w': case 'x': case 'y':
duke@1 529 case 'z':
duke@1 530 case '$': case '_':
duke@1 531 case '0': case '1': case '2': case '3': case '4':
duke@1 532 case '5': case '6': case '7': case '8': case '9':
duke@1 533 case '\u0000': case '\u0001': case '\u0002': case '\u0003':
duke@1 534 case '\u0004': case '\u0005': case '\u0006': case '\u0007':
duke@1 535 case '\u0008': case '\u000E': case '\u000F': case '\u0010':
duke@1 536 case '\u0011': case '\u0012': case '\u0013': case '\u0014':
duke@1 537 case '\u0015': case '\u0016': case '\u0017':
duke@1 538 case '\u0018': case '\u0019': case '\u001B':
duke@1 539 case '\u007F':
duke@1 540 break;
duke@1 541 case '\u001A': // EOI is also a legal identifier part
duke@1 542 if (bp >= buflen) {
duke@1 543 name = names.fromChars(sbuf, 0, sp);
duke@1 544 token = keywords.key(name);
duke@1 545 return;
duke@1 546 }
duke@1 547 break;
duke@1 548 default:
duke@1 549 if (ch < '\u0080') {
duke@1 550 // all ASCII range chars already handled, above
duke@1 551 isJavaIdentifierPart = false;
duke@1 552 } else {
duke@1 553 high = scanSurrogates();
duke@1 554 if (high != 0) {
duke@1 555 if (sp == sbuf.length) {
duke@1 556 putChar(high);
duke@1 557 } else {
duke@1 558 sbuf[sp++] = high;
duke@1 559 }
duke@1 560 isJavaIdentifierPart = Character.isJavaIdentifierPart(
duke@1 561 Character.toCodePoint(high, ch));
duke@1 562 } else {
duke@1 563 isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
duke@1 564 }
duke@1 565 }
duke@1 566 if (!isJavaIdentifierPart) {
duke@1 567 name = names.fromChars(sbuf, 0, sp);
duke@1 568 token = keywords.key(name);
duke@1 569 return;
duke@1 570 }
duke@1 571 }
duke@1 572 } while (true);
duke@1 573 }
duke@1 574
duke@1 575 /** Are surrogates supported?
duke@1 576 */
duke@1 577 final static boolean surrogatesSupported = surrogatesSupported();
duke@1 578 private static boolean surrogatesSupported() {
duke@1 579 try {
duke@1 580 Character.isHighSurrogate('a');
duke@1 581 return true;
duke@1 582 } catch (NoSuchMethodError ex) {
duke@1 583 return false;
duke@1 584 }
duke@1 585 }
duke@1 586
duke@1 587 /** Scan surrogate pairs. If 'ch' is a high surrogate and
duke@1 588 * the next character is a low surrogate, then put the low
duke@1 589 * surrogate in 'ch', and return the high surrogate.
duke@1 590 * otherwise, just return 0.
duke@1 591 */
duke@1 592 private char scanSurrogates() {
duke@1 593 if (surrogatesSupported && Character.isHighSurrogate(ch)) {
duke@1 594 char high = ch;
duke@1 595
duke@1 596 scanChar();
duke@1 597
duke@1 598 if (Character.isLowSurrogate(ch)) {
duke@1 599 return high;
duke@1 600 }
duke@1 601
duke@1 602 ch = high;
duke@1 603 }
duke@1 604
duke@1 605 return 0;
duke@1 606 }
duke@1 607
duke@1 608 /** Return true if ch can be part of an operator.
duke@1 609 */
duke@1 610 private boolean isSpecial(char ch) {
duke@1 611 switch (ch) {
duke@1 612 case '!': case '%': case '&': case '*': case '?':
duke@1 613 case '+': case '-': case ':': case '<': case '=':
duke@1 614 case '>': case '^': case '|': case '~':
duke@1 615 case '@':
duke@1 616 return true;
duke@1 617 default:
duke@1 618 return false;
duke@1 619 }
duke@1 620 }
duke@1 621
duke@1 622 /** Read longest possible sequence of special characters and convert
duke@1 623 * to token.
duke@1 624 */
duke@1 625 private void scanOperator() {
duke@1 626 while (true) {
duke@1 627 putChar(ch);
duke@1 628 Name newname = names.fromChars(sbuf, 0, sp);
duke@1 629 if (keywords.key(newname) == IDENTIFIER) {
duke@1 630 sp--;
duke@1 631 break;
duke@1 632 }
duke@1 633 name = newname;
duke@1 634 token = keywords.key(newname);
duke@1 635 scanChar();
duke@1 636 if (!isSpecial(ch)) break;
duke@1 637 }
duke@1 638 }
duke@1 639
duke@1 640 /**
duke@1 641 * Scan a documention comment; determine if a deprecated tag is present.
duke@1 642 * Called once the initial /, * have been skipped, positioned at the second *
duke@1 643 * (which is treated as the beginning of the first line).
duke@1 644 * Stops positioned at the closing '/'.
duke@1 645 */
duke@1 646 @SuppressWarnings("fallthrough")
duke@1 647 private void scanDocComment() {
duke@1 648 boolean deprecatedPrefix = false;
duke@1 649
duke@1 650 forEachLine:
duke@1 651 while (bp < buflen) {
duke@1 652
duke@1 653 // Skip optional WhiteSpace at beginning of line
duke@1 654 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
duke@1 655 scanCommentChar();
duke@1 656 }
duke@1 657
duke@1 658 // Skip optional consecutive Stars
duke@1 659 while (bp < buflen && ch == '*') {
duke@1 660 scanCommentChar();
duke@1 661 if (ch == '/') {
duke@1 662 return;
duke@1 663 }
duke@1 664 }
duke@1 665
duke@1 666 // Skip optional WhiteSpace after Stars
duke@1 667 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
duke@1 668 scanCommentChar();
duke@1 669 }
duke@1 670
duke@1 671 deprecatedPrefix = false;
duke@1 672 // At beginning of line in the JavaDoc sense.
duke@1 673 if (bp < buflen && ch == '@' && !deprecatedFlag) {
duke@1 674 scanCommentChar();
duke@1 675 if (bp < buflen && ch == 'd') {
duke@1 676 scanCommentChar();
duke@1 677 if (bp < buflen && ch == 'e') {
duke@1 678 scanCommentChar();
duke@1 679 if (bp < buflen && ch == 'p') {
duke@1 680 scanCommentChar();
duke@1 681 if (bp < buflen && ch == 'r') {
duke@1 682 scanCommentChar();
duke@1 683 if (bp < buflen && ch == 'e') {
duke@1 684 scanCommentChar();
duke@1 685 if (bp < buflen && ch == 'c') {
duke@1 686 scanCommentChar();
duke@1 687 if (bp < buflen && ch == 'a') {
duke@1 688 scanCommentChar();
duke@1 689 if (bp < buflen && ch == 't') {
duke@1 690 scanCommentChar();
duke@1 691 if (bp < buflen && ch == 'e') {
duke@1 692 scanCommentChar();
duke@1 693 if (bp < buflen && ch == 'd') {
duke@1 694 deprecatedPrefix = true;
duke@1 695 scanCommentChar();
duke@1 696 }}}}}}}}}}}
duke@1 697 if (deprecatedPrefix && bp < buflen) {
duke@1 698 if (Character.isWhitespace(ch)) {
duke@1 699 deprecatedFlag = true;
duke@1 700 } else if (ch == '*') {
duke@1 701 scanCommentChar();
duke@1 702 if (ch == '/') {
duke@1 703 deprecatedFlag = true;
duke@1 704 return;
duke@1 705 }
duke@1 706 }
duke@1 707 }
duke@1 708
duke@1 709 // Skip rest of line
duke@1 710 while (bp < buflen) {
duke@1 711 switch (ch) {
duke@1 712 case '*':
duke@1 713 scanCommentChar();
duke@1 714 if (ch == '/') {
duke@1 715 return;
duke@1 716 }
duke@1 717 break;
duke@1 718 case CR: // (Spec 3.4)
duke@1 719 scanCommentChar();
duke@1 720 if (ch != LF) {
duke@1 721 continue forEachLine;
duke@1 722 }
duke@1 723 /* fall through to LF case */
duke@1 724 case LF: // (Spec 3.4)
duke@1 725 scanCommentChar();
duke@1 726 continue forEachLine;
duke@1 727 default:
duke@1 728 scanCommentChar();
duke@1 729 }
duke@1 730 } // rest of line
duke@1 731 } // forEachLine
duke@1 732 return;
duke@1 733 }
duke@1 734
duke@1 735 /** The value of a literal token, recorded as a string.
duke@1 736 * For integers, leading 0x and 'l' suffixes are suppressed.
duke@1 737 */
duke@1 738 public String stringVal() {
duke@1 739 return new String(sbuf, 0, sp);
duke@1 740 }
duke@1 741
duke@1 742 /** Read token.
duke@1 743 */
duke@1 744 public void nextToken() {
duke@1 745
duke@1 746 try {
duke@1 747 prevEndPos = endPos;
duke@1 748 sp = 0;
duke@1 749
duke@1 750 while (true) {
duke@1 751 pos = bp;
duke@1 752 switch (ch) {
duke@1 753 case ' ': // (Spec 3.6)
duke@1 754 case '\t': // (Spec 3.6)
duke@1 755 case FF: // (Spec 3.6)
duke@1 756 do {
duke@1 757 scanChar();
duke@1 758 } while (ch == ' ' || ch == '\t' || ch == FF);
duke@1 759 endPos = bp;
duke@1 760 processWhiteSpace();
duke@1 761 break;
duke@1 762 case LF: // (Spec 3.4)
duke@1 763 scanChar();
duke@1 764 endPos = bp;
duke@1 765 processLineTerminator();
duke@1 766 break;
duke@1 767 case CR: // (Spec 3.4)
duke@1 768 scanChar();
duke@1 769 if (ch == LF) {
duke@1 770 scanChar();
duke@1 771 }
duke@1 772 endPos = bp;
duke@1 773 processLineTerminator();
duke@1 774 break;
duke@1 775 case 'A': case 'B': case 'C': case 'D': case 'E':
duke@1 776 case 'F': case 'G': case 'H': case 'I': case 'J':
duke@1 777 case 'K': case 'L': case 'M': case 'N': case 'O':
duke@1 778 case 'P': case 'Q': case 'R': case 'S': case 'T':
duke@1 779 case 'U': case 'V': case 'W': case 'X': case 'Y':
duke@1 780 case 'Z':
duke@1 781 case 'a': case 'b': case 'c': case 'd': case 'e':
duke@1 782 case 'f': case 'g': case 'h': case 'i': case 'j':
duke@1 783 case 'k': case 'l': case 'm': case 'n': case 'o':
duke@1 784 case 'p': case 'q': case 'r': case 's': case 't':
duke@1 785 case 'u': case 'v': case 'w': case 'x': case 'y':
duke@1 786 case 'z':
duke@1 787 case '$': case '_':
duke@1 788 scanIdent();
duke@1 789 return;
duke@1 790 case '0':
duke@1 791 scanChar();
duke@1 792 if (ch == 'x' || ch == 'X') {
duke@1 793 scanChar();
duke@1 794 if (ch == '.') {
duke@1 795 scanHexFractionAndSuffix(false);
duke@1 796 } else if (digit(16) < 0) {
duke@1 797 lexError("invalid.hex.number");
duke@1 798 } else {
duke@1 799 scanNumber(16);
duke@1 800 }
duke@1 801 } else {
duke@1 802 putChar('0');
duke@1 803 scanNumber(8);
duke@1 804 }
duke@1 805 return;
duke@1 806 case '1': case '2': case '3': case '4':
duke@1 807 case '5': case '6': case '7': case '8': case '9':
duke@1 808 scanNumber(10);
duke@1 809 return;
duke@1 810 case '.':
duke@1 811 scanChar();
duke@1 812 if ('0' <= ch && ch <= '9') {
duke@1 813 putChar('.');
duke@1 814 scanFractionAndSuffix();
duke@1 815 } else if (ch == '.') {
duke@1 816 putChar('.'); putChar('.');
duke@1 817 scanChar();
duke@1 818 if (ch == '.') {
duke@1 819 scanChar();
duke@1 820 putChar('.');
duke@1 821 token = ELLIPSIS;
duke@1 822 } else {
duke@1 823 lexError("malformed.fp.lit");
duke@1 824 }
duke@1 825 } else {
duke@1 826 token = DOT;
duke@1 827 }
duke@1 828 return;
duke@1 829 case ',':
duke@1 830 scanChar(); token = COMMA; return;
duke@1 831 case ';':
duke@1 832 scanChar(); token = SEMI; return;
duke@1 833 case '(':
duke@1 834 scanChar(); token = LPAREN; return;
duke@1 835 case ')':
duke@1 836 scanChar(); token = RPAREN; return;
duke@1 837 case '[':
duke@1 838 scanChar(); token = LBRACKET; return;
duke@1 839 case ']':
duke@1 840 scanChar(); token = RBRACKET; return;
duke@1 841 case '{':
duke@1 842 scanChar(); token = LBRACE; return;
duke@1 843 case '}':
duke@1 844 scanChar(); token = RBRACE; return;
duke@1 845 case '/':
duke@1 846 scanChar();
duke@1 847 if (ch == '/') {
duke@1 848 do {
duke@1 849 scanCommentChar();
duke@1 850 } while (ch != CR && ch != LF && bp < buflen);
duke@1 851 if (bp < buflen) {
duke@1 852 endPos = bp;
duke@1 853 processComment(CommentStyle.LINE);
duke@1 854 }
duke@1 855 break;
duke@1 856 } else if (ch == '*') {
duke@1 857 scanChar();
duke@1 858 CommentStyle style;
duke@1 859 if (ch == '*') {
duke@1 860 style = CommentStyle.JAVADOC;
duke@1 861 scanDocComment();
duke@1 862 } else {
duke@1 863 style = CommentStyle.BLOCK;
duke@1 864 while (bp < buflen) {
duke@1 865 if (ch == '*') {
duke@1 866 scanChar();
duke@1 867 if (ch == '/') break;
duke@1 868 } else {
duke@1 869 scanCommentChar();
duke@1 870 }
duke@1 871 }
duke@1 872 }
duke@1 873 if (ch == '/') {
duke@1 874 scanChar();
duke@1 875 endPos = bp;
duke@1 876 processComment(style);
duke@1 877 break;
duke@1 878 } else {
duke@1 879 lexError("unclosed.comment");
duke@1 880 return;
duke@1 881 }
duke@1 882 } else if (ch == '=') {
duke@1 883 name = names.slashequals;
duke@1 884 token = SLASHEQ;
duke@1 885 scanChar();
duke@1 886 } else {
duke@1 887 name = names.slash;
duke@1 888 token = SLASH;
duke@1 889 }
duke@1 890 return;
duke@1 891 case '\'':
duke@1 892 scanChar();
duke@1 893 if (ch == '\'') {
duke@1 894 lexError("empty.char.lit");
duke@1 895 } else {
duke@1 896 if (ch == CR || ch == LF)
duke@1 897 lexError(pos, "illegal.line.end.in.char.lit");
duke@1 898 scanLitChar();
duke@1 899 if (ch == '\'') {
duke@1 900 scanChar();
duke@1 901 token = CHARLITERAL;
duke@1 902 } else {
duke@1 903 lexError(pos, "unclosed.char.lit");
duke@1 904 }
duke@1 905 }
duke@1 906 return;
duke@1 907 case '\"':
duke@1 908 scanChar();
duke@1 909 while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
duke@1 910 scanLitChar();
duke@1 911 if (ch == '\"') {
duke@1 912 token = STRINGLITERAL;
duke@1 913 scanChar();
duke@1 914 } else {
duke@1 915 lexError(pos, "unclosed.str.lit");
duke@1 916 }
duke@1 917 return;
duke@1 918 default:
duke@1 919 if (isSpecial(ch)) {
duke@1 920 scanOperator();
duke@1 921 } else {
duke@1 922 boolean isJavaIdentifierStart;
duke@1 923 if (ch < '\u0080') {
duke@1 924 // all ASCII range chars already handled, above
duke@1 925 isJavaIdentifierStart = false;
duke@1 926 } else {
duke@1 927 char high = scanSurrogates();
duke@1 928 if (high != 0) {
duke@1 929 if (sp == sbuf.length) {
duke@1 930 putChar(high);
duke@1 931 } else {
duke@1 932 sbuf[sp++] = high;
duke@1 933 }
duke@1 934
duke@1 935 isJavaIdentifierStart = Character.isJavaIdentifierStart(
duke@1 936 Character.toCodePoint(high, ch));
duke@1 937 } else {
duke@1 938 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
duke@1 939 }
duke@1 940 }
duke@1 941 if (isJavaIdentifierStart) {
duke@1 942 scanIdent();
duke@1 943 } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
duke@1 944 token = EOF;
duke@1 945 pos = bp = eofPos;
duke@1 946 } else {
duke@1 947 lexError("illegal.char", String.valueOf((int)ch));
duke@1 948 scanChar();
duke@1 949 }
duke@1 950 }
duke@1 951 return;
duke@1 952 }
duke@1 953 }
duke@1 954 } finally {
duke@1 955 endPos = bp;
duke@1 956 if (scannerDebug)
duke@1 957 System.out.println("nextToken(" + pos
duke@1 958 + "," + endPos + ")=|" +
duke@1 959 new String(getRawCharacters(pos, endPos))
duke@1 960 + "|");
duke@1 961 }
duke@1 962 }
duke@1 963
duke@1 964 /** Return the current token, set by nextToken().
duke@1 965 */
duke@1 966 public Token token() {
duke@1 967 return token;
duke@1 968 }
duke@1 969
duke@1 970 /** Sets the current token.
duke@1 971 */
duke@1 972 public void token(Token token) {
duke@1 973 this.token = token;
duke@1 974 }
duke@1 975
duke@1 976 /** Return the current token's position: a 0-based
duke@1 977 * offset from beginning of the raw input stream
duke@1 978 * (before unicode translation)
duke@1 979 */
duke@1 980 public int pos() {
duke@1 981 return pos;
duke@1 982 }
duke@1 983
duke@1 984 /** Return the last character position of the current token.
duke@1 985 */
duke@1 986 public int endPos() {
duke@1 987 return endPos;
duke@1 988 }
duke@1 989
duke@1 990 /** Return the last character position of the previous token.
duke@1 991 */
duke@1 992 public int prevEndPos() {
duke@1 993 return prevEndPos;
duke@1 994 }
duke@1 995
duke@1 996 /** Return the position where a lexical error occurred;
duke@1 997 */
duke@1 998 public int errPos() {
duke@1 999 return errPos;
duke@1 1000 }
duke@1 1001
duke@1 1002 /** Set the position where a lexical error occurred;
duke@1 1003 */
duke@1 1004 public void errPos(int pos) {
duke@1 1005 errPos = pos;
duke@1 1006 }
duke@1 1007
duke@1 1008 /** Return the name of an identifier or token for the current token.
duke@1 1009 */
duke@1 1010 public Name name() {
duke@1 1011 return name;
duke@1 1012 }
duke@1 1013
duke@1 1014 /** Return the radix of a numeric literal token.
duke@1 1015 */
duke@1 1016 public int radix() {
duke@1 1017 return radix;
duke@1 1018 }
duke@1 1019
duke@1 1020 /** Has a @deprecated been encountered in last doc comment?
duke@1 1021 * This needs to be reset by client with resetDeprecatedFlag.
duke@1 1022 */
duke@1 1023 public boolean deprecatedFlag() {
duke@1 1024 return deprecatedFlag;
duke@1 1025 }
duke@1 1026
duke@1 1027 public void resetDeprecatedFlag() {
duke@1 1028 deprecatedFlag = false;
duke@1 1029 }
duke@1 1030
duke@1 1031 /**
duke@1 1032 * Returns the documentation string of the current token.
duke@1 1033 */
duke@1 1034 public String docComment() {
duke@1 1035 return null;
duke@1 1036 }
duke@1 1037
duke@1 1038 /**
duke@1 1039 * Returns a copy of the input buffer, up to its inputLength.
duke@1 1040 * Unicode escape sequences are not translated.
duke@1 1041 */
duke@1 1042 public char[] getRawCharacters() {
duke@1 1043 char[] chars = new char[buflen];
duke@1 1044 System.arraycopy(buf, 0, chars, 0, buflen);
duke@1 1045 return chars;
duke@1 1046 }
duke@1 1047
duke@1 1048 /**
duke@1 1049 * Returns a copy of a character array subset of the input buffer.
duke@1 1050 * The returned array begins at the <code>beginIndex</code> and
duke@1 1051 * extends to the character at index <code>endIndex - 1</code>.
duke@1 1052 * Thus the length of the substring is <code>endIndex-beginIndex</code>.
duke@1 1053 * This behavior is like
duke@1 1054 * <code>String.substring(beginIndex, endIndex)</code>.
duke@1 1055 * Unicode escape sequences are not translated.
duke@1 1056 *
duke@1 1057 * @param beginIndex the beginning index, inclusive.
duke@1 1058 * @param endIndex the ending index, exclusive.
duke@1 1059 * @throws IndexOutOfBounds if either offset is outside of the
duke@1 1060 * array bounds
duke@1 1061 */
duke@1 1062 public char[] getRawCharacters(int beginIndex, int endIndex) {
duke@1 1063 int length = endIndex - beginIndex;
duke@1 1064 char[] chars = new char[length];
duke@1 1065 System.arraycopy(buf, beginIndex, chars, 0, length);
duke@1 1066 return chars;
duke@1 1067 }
duke@1 1068
duke@1 1069 public enum CommentStyle {
duke@1 1070 LINE,
duke@1 1071 BLOCK,
duke@1 1072 JAVADOC,
duke@1 1073 }
duke@1 1074
duke@1 1075 /**
duke@1 1076 * Called when a complete comment has been scanned. pos and endPos
duke@1 1077 * will mark the comment boundary.
duke@1 1078 */
duke@1 1079 protected void processComment(CommentStyle style) {
duke@1 1080 if (scannerDebug)
duke@1 1081 System.out.println("processComment(" + pos
duke@1 1082 + "," + endPos + "," + style + ")=|"
duke@1 1083 + new String(getRawCharacters(pos, endPos))
duke@1 1084 + "|");
duke@1 1085 }
duke@1 1086
duke@1 1087 /**
duke@1 1088 * Called when a complete whitespace run has been scanned. pos and endPos
duke@1 1089 * will mark the whitespace boundary.
duke@1 1090 */
duke@1 1091 protected void processWhiteSpace() {
duke@1 1092 if (scannerDebug)
duke@1 1093 System.out.println("processWhitespace(" + pos
duke@1 1094 + "," + endPos + ")=|" +
duke@1 1095 new String(getRawCharacters(pos, endPos))
duke@1 1096 + "|");
duke@1 1097 }
duke@1 1098
duke@1 1099 /**
duke@1 1100 * Called when a line terminator has been processed.
duke@1 1101 */
duke@1 1102 protected void processLineTerminator() {
duke@1 1103 if (scannerDebug)
duke@1 1104 System.out.println("processTerminator(" + pos
duke@1 1105 + "," + endPos + ")=|" +
duke@1 1106 new String(getRawCharacters(pos, endPos))
duke@1 1107 + "|");
duke@1 1108 }
duke@1 1109
duke@1 1110 /** Build a map for translating between line numbers and
duke@1 1111 * positions in the input.
duke@1 1112 *
duke@1 1113 * @return a LineMap */
duke@1 1114 public Position.LineMap getLineMap() {
duke@1 1115 return Position.makeLineMap(buf, buflen, false);
duke@1 1116 }
duke@1 1117
duke@1 1118 }

mercurial