src/share/classes/com/sun/tools/javac/parser/Scanner.java

Fri, 16 Jul 2010 19:35:24 -0700

author
darcy
date
Fri, 16 Jul 2010 19:35:24 -0700
changeset 609
13354e1abba7
parent 597
d2b7ecf33b35
child 674
584365f256a7
permissions
-rw-r--r--

6911256: Project Coin: Support Automatic Resource Management (ARM) blocks in the compiler
6964740: Project Coin: More tests for ARM compiler changes
6965277: Project Coin: Correctness issues in ARM implementation
6967065: add -Xlint warning category for Automatic Resource Management (ARM)
Reviewed-by: jjb, darcy, mcimadamore, jjg, briangoetz
Contributed-by: tball@google.com

duke@1 1 /*
ohair@554 2 * Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved.
duke@1 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
duke@1 4 *
duke@1 5 * This code is free software; you can redistribute it and/or modify it
duke@1 6 * under the terms of the GNU General Public License version 2 only, as
ohair@554 7 * published by the Free Software Foundation. Oracle designates this
duke@1 8 * particular file as subject to the "Classpath" exception as provided
ohair@554 9 * by Oracle in the LICENSE file that accompanied this code.
duke@1 10 *
duke@1 11 * This code is distributed in the hope that it will be useful, but WITHOUT
duke@1 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
duke@1 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
duke@1 14 * version 2 for more details (a copy is included in the LICENSE file that
duke@1 15 * accompanied this code).
duke@1 16 *
duke@1 17 * You should have received a copy of the GNU General Public License version
duke@1 18 * 2 along with this work; if not, write to the Free Software Foundation,
duke@1 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
duke@1 20 *
ohair@554 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
ohair@554 22 * or visit www.oracle.com if you need additional information or have any
ohair@554 23 * questions.
duke@1 24 */
duke@1 25
duke@1 26 package com.sun.tools.javac.parser;
duke@1 27
duke@1 28 import java.nio.*;
duke@1 29
jjg@50 30 import com.sun.tools.javac.code.Source;
jjg@50 31 import com.sun.tools.javac.file.JavacFileManager;
duke@1 32 import com.sun.tools.javac.util.*;
duke@1 33
duke@1 34
duke@1 35 import static com.sun.tools.javac.parser.Token.*;
duke@1 36 import static com.sun.tools.javac.util.LayoutCharacters.*;
duke@1 37
duke@1 38 /** The lexical analyzer maps an input stream consisting of
duke@1 39 * ASCII characters and Unicode escapes into a token sequence.
duke@1 40 *
jjg@581 41 * <p><b>This is NOT part of any supported API.
jjg@581 42 * If you write code that depends on this, you do so at your own risk.
duke@1 43 * This code and its internal interfaces are subject to change or
duke@1 44 * deletion without notice.</b>
duke@1 45 */
duke@1 46 public class Scanner implements Lexer {
duke@1 47
duke@1 48 private static boolean scannerDebug = false;
duke@1 49
duke@1 50 /** A factory for creating scanners. */
duke@1 51 public static class Factory {
duke@1 52 /** The context key for the scanner factory. */
duke@1 53 public static final Context.Key<Scanner.Factory> scannerFactoryKey =
duke@1 54 new Context.Key<Scanner.Factory>();
duke@1 55
duke@1 56 /** Get the Factory instance for this context. */
duke@1 57 public static Factory instance(Context context) {
duke@1 58 Factory instance = context.get(scannerFactoryKey);
duke@1 59 if (instance == null)
duke@1 60 instance = new Factory(context);
duke@1 61 return instance;
duke@1 62 }
duke@1 63
duke@1 64 final Log log;
jjg@113 65 final Names names;
duke@1 66 final Source source;
duke@1 67 final Keywords keywords;
duke@1 68
duke@1 69 /** Create a new scanner factory. */
duke@1 70 protected Factory(Context context) {
duke@1 71 context.put(scannerFactoryKey, this);
duke@1 72 this.log = Log.instance(context);
jjg@113 73 this.names = Names.instance(context);
duke@1 74 this.source = Source.instance(context);
duke@1 75 this.keywords = Keywords.instance(context);
duke@1 76 }
duke@1 77
duke@1 78 public Scanner newScanner(CharSequence input) {
duke@1 79 if (input instanceof CharBuffer) {
duke@1 80 return new Scanner(this, (CharBuffer)input);
duke@1 81 } else {
duke@1 82 char[] array = input.toString().toCharArray();
duke@1 83 return newScanner(array, array.length);
duke@1 84 }
duke@1 85 }
duke@1 86
duke@1 87 public Scanner newScanner(char[] input, int inputLength) {
duke@1 88 return new Scanner(this, input, inputLength);
duke@1 89 }
duke@1 90 }
duke@1 91
duke@1 92 /* Output variables; set by nextToken():
duke@1 93 */
duke@1 94
duke@1 95 /** The token, set by nextToken().
duke@1 96 */
duke@1 97 private Token token;
duke@1 98
duke@1 99 /** Allow hex floating-point literals.
duke@1 100 */
duke@1 101 private boolean allowHexFloats;
duke@1 102
jjg@409 103 /** Allow binary literals.
jjg@409 104 */
jjg@409 105 private boolean allowBinaryLiterals;
jjg@409 106
jjg@409 107 /** Allow underscores in literals.
jjg@409 108 */
jjg@409 109 private boolean allowUnderscoresInLiterals;
jjg@409 110
jjg@409 111 /** The source language setting.
jjg@409 112 */
jjg@409 113 private Source source;
jjg@409 114
duke@1 115 /** The token's position, 0-based offset from beginning of text.
duke@1 116 */
duke@1 117 private int pos;
duke@1 118
duke@1 119 /** Character position just after the last character of the token.
duke@1 120 */
duke@1 121 private int endPos;
duke@1 122
duke@1 123 /** The last character position of the previous token.
duke@1 124 */
duke@1 125 private int prevEndPos;
duke@1 126
duke@1 127 /** The position where a lexical error occurred;
duke@1 128 */
duke@1 129 private int errPos = Position.NOPOS;
duke@1 130
duke@1 131 /** The name of an identifier or token:
duke@1 132 */
duke@1 133 private Name name;
duke@1 134
duke@1 135 /** The radix of a numeric literal token.
duke@1 136 */
duke@1 137 private int radix;
duke@1 138
duke@1 139 /** Has a @deprecated been encountered in last doc comment?
duke@1 140 * this needs to be reset by client.
duke@1 141 */
duke@1 142 protected boolean deprecatedFlag = false;
duke@1 143
duke@1 144 /** A character buffer for literals.
duke@1 145 */
duke@1 146 private char[] sbuf = new char[128];
duke@1 147 private int sp;
duke@1 148
duke@1 149 /** The input buffer, index of next chacter to be read,
duke@1 150 * index of one past last character in buffer.
duke@1 151 */
duke@1 152 private char[] buf;
duke@1 153 private int bp;
duke@1 154 private int buflen;
duke@1 155 private int eofPos;
duke@1 156
duke@1 157 /** The current character.
duke@1 158 */
duke@1 159 private char ch;
duke@1 160
duke@1 161 /** The buffer index of the last converted unicode character
duke@1 162 */
duke@1 163 private int unicodeConversionBp = -1;
duke@1 164
duke@1 165 /** The log to be used for error reporting.
duke@1 166 */
duke@1 167 private final Log log;
duke@1 168
duke@1 169 /** The name table. */
jjg@113 170 private final Names names;
duke@1 171
duke@1 172 /** The keyword table. */
duke@1 173 private final Keywords keywords;
duke@1 174
duke@1 175 /** Common code for constructors. */
duke@1 176 private Scanner(Factory fac) {
jjg@409 177 log = fac.log;
jjg@409 178 names = fac.names;
jjg@409 179 keywords = fac.keywords;
jjg@409 180 source = fac.source;
jjg@409 181 allowBinaryLiterals = source.allowBinaryLiterals();
jjg@409 182 allowHexFloats = source.allowHexFloats();
jjg@409 183 allowUnderscoresInLiterals = source.allowBinaryLiterals();
duke@1 184 }
duke@1 185
duke@1 186 private static final boolean hexFloatsWork = hexFloatsWork();
duke@1 187 private static boolean hexFloatsWork() {
duke@1 188 try {
duke@1 189 Float.valueOf("0x1.0p1");
duke@1 190 return true;
duke@1 191 } catch (NumberFormatException ex) {
duke@1 192 return false;
duke@1 193 }
duke@1 194 }
duke@1 195
duke@1 196 /** Create a scanner from the input buffer. buffer must implement
duke@1 197 * array() and compact(), and remaining() must be less than limit().
duke@1 198 */
duke@1 199 protected Scanner(Factory fac, CharBuffer buffer) {
duke@1 200 this(fac, JavacFileManager.toArray(buffer), buffer.limit());
duke@1 201 }
duke@1 202
duke@1 203 /**
duke@1 204 * Create a scanner from the input array. This method might
duke@1 205 * modify the array. To avoid copying the input array, ensure
duke@1 206 * that {@code inputLength < input.length} or
duke@1 207 * {@code input[input.length -1]} is a white space character.
duke@1 208 *
duke@1 209 * @param fac the factory which created this Scanner
duke@1 210 * @param input the input, might be modified
duke@1 211 * @param inputLength the size of the input.
duke@1 212 * Must be positive and less than or equal to input.length.
duke@1 213 */
duke@1 214 protected Scanner(Factory fac, char[] input, int inputLength) {
duke@1 215 this(fac);
duke@1 216 eofPos = inputLength;
duke@1 217 if (inputLength == input.length) {
duke@1 218 if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
duke@1 219 inputLength--;
duke@1 220 } else {
duke@1 221 char[] newInput = new char[inputLength + 1];
duke@1 222 System.arraycopy(input, 0, newInput, 0, input.length);
duke@1 223 input = newInput;
duke@1 224 }
duke@1 225 }
duke@1 226 buf = input;
duke@1 227 buflen = inputLength;
duke@1 228 buf[buflen] = EOI;
duke@1 229 bp = -1;
duke@1 230 scanChar();
duke@1 231 }
duke@1 232
duke@1 233 /** Report an error at the given position using the provided arguments.
duke@1 234 */
duke@1 235 private void lexError(int pos, String key, Object... args) {
duke@1 236 log.error(pos, key, args);
duke@1 237 token = ERROR;
duke@1 238 errPos = pos;
duke@1 239 }
duke@1 240
duke@1 241 /** Report an error at the current token position using the provided
duke@1 242 * arguments.
duke@1 243 */
duke@1 244 private void lexError(String key, Object... args) {
duke@1 245 lexError(pos, key, args);
duke@1 246 }
duke@1 247
duke@1 248 /** Convert an ASCII digit from its base (8, 10, or 16)
duke@1 249 * to its value.
duke@1 250 */
duke@1 251 private int digit(int base) {
duke@1 252 char c = ch;
duke@1 253 int result = Character.digit(c, base);
duke@1 254 if (result >= 0 && c > 0x7f) {
duke@1 255 lexError(pos+1, "illegal.nonascii.digit");
duke@1 256 ch = "0123456789abcdef".charAt(result);
duke@1 257 }
duke@1 258 return result;
duke@1 259 }
duke@1 260
duke@1 261 /** Convert unicode escape; bp points to initial '\' character
duke@1 262 * (Spec 3.3).
duke@1 263 */
duke@1 264 private void convertUnicode() {
duke@1 265 if (ch == '\\' && unicodeConversionBp != bp) {
duke@1 266 bp++; ch = buf[bp];
duke@1 267 if (ch == 'u') {
duke@1 268 do {
duke@1 269 bp++; ch = buf[bp];
duke@1 270 } while (ch == 'u');
duke@1 271 int limit = bp + 3;
duke@1 272 if (limit < buflen) {
duke@1 273 int d = digit(16);
duke@1 274 int code = d;
duke@1 275 while (bp < limit && d >= 0) {
duke@1 276 bp++; ch = buf[bp];
duke@1 277 d = digit(16);
duke@1 278 code = (code << 4) + d;
duke@1 279 }
duke@1 280 if (d >= 0) {
duke@1 281 ch = (char)code;
duke@1 282 unicodeConversionBp = bp;
duke@1 283 return;
duke@1 284 }
duke@1 285 }
duke@1 286 lexError(bp, "illegal.unicode.esc");
duke@1 287 } else {
duke@1 288 bp--;
duke@1 289 ch = '\\';
duke@1 290 }
duke@1 291 }
duke@1 292 }
duke@1 293
duke@1 294 /** Read next character.
duke@1 295 */
duke@1 296 private void scanChar() {
duke@1 297 ch = buf[++bp];
duke@1 298 if (ch == '\\') {
duke@1 299 convertUnicode();
duke@1 300 }
duke@1 301 }
duke@1 302
duke@1 303 /** Read next character in comment, skipping over double '\' characters.
duke@1 304 */
duke@1 305 private void scanCommentChar() {
duke@1 306 scanChar();
duke@1 307 if (ch == '\\') {
duke@1 308 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
duke@1 309 bp++;
duke@1 310 } else {
duke@1 311 convertUnicode();
duke@1 312 }
duke@1 313 }
duke@1 314 }
duke@1 315
duke@1 316 /** Append a character to sbuf.
duke@1 317 */
duke@1 318 private void putChar(char ch) {
duke@1 319 if (sp == sbuf.length) {
duke@1 320 char[] newsbuf = new char[sbuf.length * 2];
duke@1 321 System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
duke@1 322 sbuf = newsbuf;
duke@1 323 }
duke@1 324 sbuf[sp++] = ch;
duke@1 325 }
duke@1 326
duke@1 327 /** For debugging purposes: print character.
duke@1 328 */
duke@1 329 private void dch() {
duke@1 330 System.err.print(ch); System.out.flush();
duke@1 331 }
duke@1 332
duke@1 333 /** Read next character in character or string literal and copy into sbuf.
duke@1 334 */
jrose@267 335 private void scanLitChar(boolean forBytecodeName) {
duke@1 336 if (ch == '\\') {
duke@1 337 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
duke@1 338 bp++;
duke@1 339 putChar('\\');
duke@1 340 scanChar();
duke@1 341 } else {
duke@1 342 scanChar();
duke@1 343 switch (ch) {
duke@1 344 case '0': case '1': case '2': case '3':
duke@1 345 case '4': case '5': case '6': case '7':
duke@1 346 char leadch = ch;
duke@1 347 int oct = digit(8);
duke@1 348 scanChar();
duke@1 349 if ('0' <= ch && ch <= '7') {
duke@1 350 oct = oct * 8 + digit(8);
duke@1 351 scanChar();
duke@1 352 if (leadch <= '3' && '0' <= ch && ch <= '7') {
duke@1 353 oct = oct * 8 + digit(8);
duke@1 354 scanChar();
duke@1 355 }
duke@1 356 }
duke@1 357 putChar((char)oct);
duke@1 358 break;
duke@1 359 case 'b':
duke@1 360 putChar('\b'); scanChar(); break;
duke@1 361 case 't':
duke@1 362 putChar('\t'); scanChar(); break;
duke@1 363 case 'n':
duke@1 364 putChar('\n'); scanChar(); break;
duke@1 365 case 'f':
duke@1 366 putChar('\f'); scanChar(); break;
duke@1 367 case 'r':
duke@1 368 putChar('\r'); scanChar(); break;
duke@1 369 case '\'':
duke@1 370 putChar('\''); scanChar(); break;
duke@1 371 case '\"':
duke@1 372 putChar('\"'); scanChar(); break;
duke@1 373 case '\\':
duke@1 374 putChar('\\'); scanChar(); break;
jrose@267 375 case '|': case ',': case '?': case '%':
jrose@267 376 case '^': case '_': case '{': case '}':
jrose@267 377 case '!': case '-': case '=':
jrose@267 378 if (forBytecodeName) {
jrose@267 379 // Accept escape sequences for dangerous bytecode chars.
jrose@267 380 // This is illegal in normal Java string or character literals.
jrose@267 381 // Note that the escape sequence itself is passed through.
jrose@267 382 putChar('\\'); putChar(ch); scanChar();
jrose@267 383 } else {
jrose@267 384 lexError(bp, "illegal.esc.char");
jrose@267 385 }
jrose@267 386 break;
duke@1 387 default:
duke@1 388 lexError(bp, "illegal.esc.char");
duke@1 389 }
duke@1 390 }
duke@1 391 } else if (bp != buflen) {
duke@1 392 putChar(ch); scanChar();
duke@1 393 }
duke@1 394 }
jrose@267 395 private void scanLitChar() {
jrose@267 396 scanLitChar(false);
jrose@267 397 }
jrose@267 398
jrose@267 399 /** Read next character in an exotic name #"foo"
jrose@267 400 */
jrose@267 401 private void scanBytecodeNameChar() {
jrose@267 402 switch (ch) {
jrose@267 403 // reject any "dangerous" char which is illegal somewhere in the JVM spec
jrose@267 404 // cf. http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm
jrose@267 405 case '/': case '.': case ';': // illegal everywhere
jrose@267 406 case '<': case '>': // illegal in methods, dangerous in classes
jrose@267 407 case '[': // illegal in classes
jrose@267 408 lexError(bp, "illegal.bytecode.ident.char", String.valueOf((int)ch));
jrose@267 409 break;
jrose@267 410 }
jrose@267 411 scanLitChar(true);
jrose@267 412 }
duke@1 413
jjg@409 414 private void scanDigits(int digitRadix) {
jjg@409 415 char saveCh;
jjg@409 416 int savePos;
jjg@409 417 do {
jjg@409 418 if (ch != '_') {
jjg@409 419 putChar(ch);
jjg@409 420 } else {
jjg@409 421 if (!allowUnderscoresInLiterals) {
jjg@597 422 lexError("unsupported.underscore.lit", source.name);
jjg@409 423 allowUnderscoresInLiterals = true;
jjg@409 424 }
jjg@409 425 }
jjg@409 426 saveCh = ch;
jjg@409 427 savePos = bp;
jjg@409 428 scanChar();
jjg@409 429 } while (digit(digitRadix) >= 0 || ch == '_');
jjg@409 430 if (saveCh == '_')
jjg@409 431 lexError(savePos, "illegal.underscore");
jjg@409 432 }
jjg@409 433
duke@1 434 /** Read fractional part of hexadecimal floating point number.
duke@1 435 */
duke@1 436 private void scanHexExponentAndSuffix() {
duke@1 437 if (ch == 'p' || ch == 'P') {
duke@1 438 putChar(ch);
duke@1 439 scanChar();
jjg@409 440 skipIllegalUnderscores();
duke@1 441 if (ch == '+' || ch == '-') {
duke@1 442 putChar(ch);
duke@1 443 scanChar();
duke@1 444 }
jjg@409 445 skipIllegalUnderscores();
duke@1 446 if ('0' <= ch && ch <= '9') {
jjg@409 447 scanDigits(10);
duke@1 448 if (!allowHexFloats) {
jjg@409 449 lexError("unsupported.fp.lit", source.name);
duke@1 450 allowHexFloats = true;
duke@1 451 }
duke@1 452 else if (!hexFloatsWork)
duke@1 453 lexError("unsupported.cross.fp.lit");
duke@1 454 } else
duke@1 455 lexError("malformed.fp.lit");
duke@1 456 } else {
duke@1 457 lexError("malformed.fp.lit");
duke@1 458 }
duke@1 459 if (ch == 'f' || ch == 'F') {
duke@1 460 putChar(ch);
duke@1 461 scanChar();
duke@1 462 token = FLOATLITERAL;
duke@1 463 } else {
duke@1 464 if (ch == 'd' || ch == 'D') {
duke@1 465 putChar(ch);
duke@1 466 scanChar();
duke@1 467 }
duke@1 468 token = DOUBLELITERAL;
duke@1 469 }
duke@1 470 }
duke@1 471
duke@1 472 /** Read fractional part of floating point number.
duke@1 473 */
duke@1 474 private void scanFraction() {
jjg@409 475 skipIllegalUnderscores();
jjg@409 476 if ('0' <= ch && ch <= '9') {
jjg@409 477 scanDigits(10);
duke@1 478 }
duke@1 479 int sp1 = sp;
duke@1 480 if (ch == 'e' || ch == 'E') {
duke@1 481 putChar(ch);
duke@1 482 scanChar();
jjg@409 483 skipIllegalUnderscores();
duke@1 484 if (ch == '+' || ch == '-') {
duke@1 485 putChar(ch);
duke@1 486 scanChar();
duke@1 487 }
jjg@409 488 skipIllegalUnderscores();
duke@1 489 if ('0' <= ch && ch <= '9') {
jjg@409 490 scanDigits(10);
duke@1 491 return;
duke@1 492 }
duke@1 493 lexError("malformed.fp.lit");
duke@1 494 sp = sp1;
duke@1 495 }
duke@1 496 }
duke@1 497
duke@1 498 /** Read fractional part and 'd' or 'f' suffix of floating point number.
duke@1 499 */
duke@1 500 private void scanFractionAndSuffix() {
duke@1 501 this.radix = 10;
duke@1 502 scanFraction();
duke@1 503 if (ch == 'f' || ch == 'F') {
duke@1 504 putChar(ch);
duke@1 505 scanChar();
duke@1 506 token = FLOATLITERAL;
duke@1 507 } else {
duke@1 508 if (ch == 'd' || ch == 'D') {
duke@1 509 putChar(ch);
duke@1 510 scanChar();
duke@1 511 }
duke@1 512 token = DOUBLELITERAL;
duke@1 513 }
duke@1 514 }
duke@1 515
duke@1 516 /** Read fractional part and 'd' or 'f' suffix of floating point number.
duke@1 517 */
duke@1 518 private void scanHexFractionAndSuffix(boolean seendigit) {
duke@1 519 this.radix = 16;
duke@1 520 assert ch == '.';
duke@1 521 putChar(ch);
duke@1 522 scanChar();
jjg@409 523 skipIllegalUnderscores();
jjg@409 524 if (digit(16) >= 0) {
duke@1 525 seendigit = true;
jjg@409 526 scanDigits(16);
duke@1 527 }
duke@1 528 if (!seendigit)
duke@1 529 lexError("invalid.hex.number");
duke@1 530 else
duke@1 531 scanHexExponentAndSuffix();
duke@1 532 }
duke@1 533
jjg@409 534 private void skipIllegalUnderscores() {
jjg@409 535 if (ch == '_') {
jjg@409 536 lexError(bp, "illegal.underscore");
jjg@409 537 while (ch == '_')
jjg@409 538 scanChar();
jjg@409 539 }
jjg@409 540 }
jjg@409 541
duke@1 542 /** Read a number.
jjg@409 543 * @param radix The radix of the number; one of 2, j8, 10, 16.
duke@1 544 */
duke@1 545 private void scanNumber(int radix) {
duke@1 546 this.radix = radix;
duke@1 547 // for octal, allow base-10 digit in case it's a float literal
jjg@409 548 int digitRadix = (radix == 8 ? 10 : radix);
duke@1 549 boolean seendigit = false;
jjg@409 550 if (digit(digitRadix) >= 0) {
duke@1 551 seendigit = true;
jjg@409 552 scanDigits(digitRadix);
duke@1 553 }
duke@1 554 if (radix == 16 && ch == '.') {
duke@1 555 scanHexFractionAndSuffix(seendigit);
duke@1 556 } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
duke@1 557 scanHexExponentAndSuffix();
jjg@409 558 } else if (digitRadix == 10 && ch == '.') {
duke@1 559 putChar(ch);
duke@1 560 scanChar();
duke@1 561 scanFractionAndSuffix();
jjg@409 562 } else if (digitRadix == 10 &&
duke@1 563 (ch == 'e' || ch == 'E' ||
duke@1 564 ch == 'f' || ch == 'F' ||
duke@1 565 ch == 'd' || ch == 'D')) {
duke@1 566 scanFractionAndSuffix();
duke@1 567 } else {
duke@1 568 if (ch == 'l' || ch == 'L') {
duke@1 569 scanChar();
duke@1 570 token = LONGLITERAL;
duke@1 571 } else {
duke@1 572 token = INTLITERAL;
duke@1 573 }
duke@1 574 }
duke@1 575 }
duke@1 576
duke@1 577 /** Read an identifier.
duke@1 578 */
duke@1 579 private void scanIdent() {
duke@1 580 boolean isJavaIdentifierPart;
duke@1 581 char high;
duke@1 582 do {
duke@1 583 if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
duke@1 584 // optimization, was: putChar(ch);
duke@1 585
duke@1 586 scanChar();
duke@1 587 switch (ch) {
duke@1 588 case 'A': case 'B': case 'C': case 'D': case 'E':
duke@1 589 case 'F': case 'G': case 'H': case 'I': case 'J':
duke@1 590 case 'K': case 'L': case 'M': case 'N': case 'O':
duke@1 591 case 'P': case 'Q': case 'R': case 'S': case 'T':
duke@1 592 case 'U': case 'V': case 'W': case 'X': case 'Y':
duke@1 593 case 'Z':
duke@1 594 case 'a': case 'b': case 'c': case 'd': case 'e':
duke@1 595 case 'f': case 'g': case 'h': case 'i': case 'j':
duke@1 596 case 'k': case 'l': case 'm': case 'n': case 'o':
duke@1 597 case 'p': case 'q': case 'r': case 's': case 't':
duke@1 598 case 'u': case 'v': case 'w': case 'x': case 'y':
duke@1 599 case 'z':
duke@1 600 case '$': case '_':
duke@1 601 case '0': case '1': case '2': case '3': case '4':
duke@1 602 case '5': case '6': case '7': case '8': case '9':
duke@1 603 case '\u0000': case '\u0001': case '\u0002': case '\u0003':
duke@1 604 case '\u0004': case '\u0005': case '\u0006': case '\u0007':
duke@1 605 case '\u0008': case '\u000E': case '\u000F': case '\u0010':
duke@1 606 case '\u0011': case '\u0012': case '\u0013': case '\u0014':
duke@1 607 case '\u0015': case '\u0016': case '\u0017':
duke@1 608 case '\u0018': case '\u0019': case '\u001B':
duke@1 609 case '\u007F':
duke@1 610 break;
duke@1 611 case '\u001A': // EOI is also a legal identifier part
duke@1 612 if (bp >= buflen) {
duke@1 613 name = names.fromChars(sbuf, 0, sp);
duke@1 614 token = keywords.key(name);
duke@1 615 return;
duke@1 616 }
duke@1 617 break;
duke@1 618 default:
duke@1 619 if (ch < '\u0080') {
duke@1 620 // all ASCII range chars already handled, above
duke@1 621 isJavaIdentifierPart = false;
duke@1 622 } else {
duke@1 623 high = scanSurrogates();
duke@1 624 if (high != 0) {
duke@1 625 if (sp == sbuf.length) {
duke@1 626 putChar(high);
duke@1 627 } else {
duke@1 628 sbuf[sp++] = high;
duke@1 629 }
duke@1 630 isJavaIdentifierPart = Character.isJavaIdentifierPart(
duke@1 631 Character.toCodePoint(high, ch));
duke@1 632 } else {
duke@1 633 isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
duke@1 634 }
duke@1 635 }
duke@1 636 if (!isJavaIdentifierPart) {
duke@1 637 name = names.fromChars(sbuf, 0, sp);
duke@1 638 token = keywords.key(name);
duke@1 639 return;
duke@1 640 }
duke@1 641 }
duke@1 642 } while (true);
duke@1 643 }
duke@1 644
duke@1 645 /** Are surrogates supported?
duke@1 646 */
duke@1 647 final static boolean surrogatesSupported = surrogatesSupported();
duke@1 648 private static boolean surrogatesSupported() {
duke@1 649 try {
duke@1 650 Character.isHighSurrogate('a');
duke@1 651 return true;
duke@1 652 } catch (NoSuchMethodError ex) {
duke@1 653 return false;
duke@1 654 }
duke@1 655 }
duke@1 656
duke@1 657 /** Scan surrogate pairs. If 'ch' is a high surrogate and
duke@1 658 * the next character is a low surrogate, then put the low
duke@1 659 * surrogate in 'ch', and return the high surrogate.
duke@1 660 * otherwise, just return 0.
duke@1 661 */
duke@1 662 private char scanSurrogates() {
duke@1 663 if (surrogatesSupported && Character.isHighSurrogate(ch)) {
duke@1 664 char high = ch;
duke@1 665
duke@1 666 scanChar();
duke@1 667
duke@1 668 if (Character.isLowSurrogate(ch)) {
duke@1 669 return high;
duke@1 670 }
duke@1 671
duke@1 672 ch = high;
duke@1 673 }
duke@1 674
duke@1 675 return 0;
duke@1 676 }
duke@1 677
duke@1 678 /** Return true if ch can be part of an operator.
duke@1 679 */
duke@1 680 private boolean isSpecial(char ch) {
duke@1 681 switch (ch) {
duke@1 682 case '!': case '%': case '&': case '*': case '?':
duke@1 683 case '+': case '-': case ':': case '<': case '=':
duke@1 684 case '>': case '^': case '|': case '~':
duke@1 685 case '@':
duke@1 686 return true;
duke@1 687 default:
duke@1 688 return false;
duke@1 689 }
duke@1 690 }
duke@1 691
duke@1 692 /** Read longest possible sequence of special characters and convert
duke@1 693 * to token.
duke@1 694 */
duke@1 695 private void scanOperator() {
duke@1 696 while (true) {
duke@1 697 putChar(ch);
duke@1 698 Name newname = names.fromChars(sbuf, 0, sp);
duke@1 699 if (keywords.key(newname) == IDENTIFIER) {
duke@1 700 sp--;
duke@1 701 break;
duke@1 702 }
duke@1 703 name = newname;
duke@1 704 token = keywords.key(newname);
duke@1 705 scanChar();
duke@1 706 if (!isSpecial(ch)) break;
duke@1 707 }
duke@1 708 }
duke@1 709
duke@1 710 /**
duke@1 711 * Scan a documention comment; determine if a deprecated tag is present.
duke@1 712 * Called once the initial /, * have been skipped, positioned at the second *
duke@1 713 * (which is treated as the beginning of the first line).
duke@1 714 * Stops positioned at the closing '/'.
duke@1 715 */
duke@1 716 @SuppressWarnings("fallthrough")
duke@1 717 private void scanDocComment() {
duke@1 718 boolean deprecatedPrefix = false;
duke@1 719
duke@1 720 forEachLine:
duke@1 721 while (bp < buflen) {
duke@1 722
duke@1 723 // Skip optional WhiteSpace at beginning of line
duke@1 724 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
duke@1 725 scanCommentChar();
duke@1 726 }
duke@1 727
duke@1 728 // Skip optional consecutive Stars
duke@1 729 while (bp < buflen && ch == '*') {
duke@1 730 scanCommentChar();
duke@1 731 if (ch == '/') {
duke@1 732 return;
duke@1 733 }
duke@1 734 }
duke@1 735
duke@1 736 // Skip optional WhiteSpace after Stars
duke@1 737 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
duke@1 738 scanCommentChar();
duke@1 739 }
duke@1 740
duke@1 741 deprecatedPrefix = false;
duke@1 742 // At beginning of line in the JavaDoc sense.
duke@1 743 if (bp < buflen && ch == '@' && !deprecatedFlag) {
duke@1 744 scanCommentChar();
duke@1 745 if (bp < buflen && ch == 'd') {
duke@1 746 scanCommentChar();
duke@1 747 if (bp < buflen && ch == 'e') {
duke@1 748 scanCommentChar();
duke@1 749 if (bp < buflen && ch == 'p') {
duke@1 750 scanCommentChar();
duke@1 751 if (bp < buflen && ch == 'r') {
duke@1 752 scanCommentChar();
duke@1 753 if (bp < buflen && ch == 'e') {
duke@1 754 scanCommentChar();
duke@1 755 if (bp < buflen && ch == 'c') {
duke@1 756 scanCommentChar();
duke@1 757 if (bp < buflen && ch == 'a') {
duke@1 758 scanCommentChar();
duke@1 759 if (bp < buflen && ch == 't') {
duke@1 760 scanCommentChar();
duke@1 761 if (bp < buflen && ch == 'e') {
duke@1 762 scanCommentChar();
duke@1 763 if (bp < buflen && ch == 'd') {
duke@1 764 deprecatedPrefix = true;
duke@1 765 scanCommentChar();
duke@1 766 }}}}}}}}}}}
duke@1 767 if (deprecatedPrefix && bp < buflen) {
duke@1 768 if (Character.isWhitespace(ch)) {
duke@1 769 deprecatedFlag = true;
duke@1 770 } else if (ch == '*') {
duke@1 771 scanCommentChar();
duke@1 772 if (ch == '/') {
duke@1 773 deprecatedFlag = true;
duke@1 774 return;
duke@1 775 }
duke@1 776 }
duke@1 777 }
duke@1 778
duke@1 779 // Skip rest of line
duke@1 780 while (bp < buflen) {
duke@1 781 switch (ch) {
duke@1 782 case '*':
duke@1 783 scanCommentChar();
duke@1 784 if (ch == '/') {
duke@1 785 return;
duke@1 786 }
duke@1 787 break;
duke@1 788 case CR: // (Spec 3.4)
duke@1 789 scanCommentChar();
duke@1 790 if (ch != LF) {
duke@1 791 continue forEachLine;
duke@1 792 }
duke@1 793 /* fall through to LF case */
duke@1 794 case LF: // (Spec 3.4)
duke@1 795 scanCommentChar();
duke@1 796 continue forEachLine;
duke@1 797 default:
duke@1 798 scanCommentChar();
duke@1 799 }
duke@1 800 } // rest of line
duke@1 801 } // forEachLine
duke@1 802 return;
duke@1 803 }
duke@1 804
duke@1 805 /** The value of a literal token, recorded as a string.
duke@1 806 * For integers, leading 0x and 'l' suffixes are suppressed.
duke@1 807 */
duke@1 808 public String stringVal() {
duke@1 809 return new String(sbuf, 0, sp);
duke@1 810 }
duke@1 811
duke@1 812 /** Read token.
duke@1 813 */
duke@1 814 public void nextToken() {
duke@1 815
duke@1 816 try {
duke@1 817 prevEndPos = endPos;
duke@1 818 sp = 0;
duke@1 819
duke@1 820 while (true) {
duke@1 821 pos = bp;
duke@1 822 switch (ch) {
duke@1 823 case ' ': // (Spec 3.6)
duke@1 824 case '\t': // (Spec 3.6)
duke@1 825 case FF: // (Spec 3.6)
duke@1 826 do {
duke@1 827 scanChar();
duke@1 828 } while (ch == ' ' || ch == '\t' || ch == FF);
duke@1 829 endPos = bp;
duke@1 830 processWhiteSpace();
duke@1 831 break;
duke@1 832 case LF: // (Spec 3.4)
duke@1 833 scanChar();
duke@1 834 endPos = bp;
duke@1 835 processLineTerminator();
duke@1 836 break;
duke@1 837 case CR: // (Spec 3.4)
duke@1 838 scanChar();
duke@1 839 if (ch == LF) {
duke@1 840 scanChar();
duke@1 841 }
duke@1 842 endPos = bp;
duke@1 843 processLineTerminator();
duke@1 844 break;
duke@1 845 case 'A': case 'B': case 'C': case 'D': case 'E':
duke@1 846 case 'F': case 'G': case 'H': case 'I': case 'J':
duke@1 847 case 'K': case 'L': case 'M': case 'N': case 'O':
duke@1 848 case 'P': case 'Q': case 'R': case 'S': case 'T':
duke@1 849 case 'U': case 'V': case 'W': case 'X': case 'Y':
duke@1 850 case 'Z':
duke@1 851 case 'a': case 'b': case 'c': case 'd': case 'e':
duke@1 852 case 'f': case 'g': case 'h': case 'i': case 'j':
duke@1 853 case 'k': case 'l': case 'm': case 'n': case 'o':
duke@1 854 case 'p': case 'q': case 'r': case 's': case 't':
duke@1 855 case 'u': case 'v': case 'w': case 'x': case 'y':
duke@1 856 case 'z':
duke@1 857 case '$': case '_':
duke@1 858 scanIdent();
duke@1 859 return;
duke@1 860 case '0':
duke@1 861 scanChar();
duke@1 862 if (ch == 'x' || ch == 'X') {
duke@1 863 scanChar();
jjg@409 864 skipIllegalUnderscores();
duke@1 865 if (ch == '.') {
duke@1 866 scanHexFractionAndSuffix(false);
duke@1 867 } else if (digit(16) < 0) {
duke@1 868 lexError("invalid.hex.number");
duke@1 869 } else {
duke@1 870 scanNumber(16);
duke@1 871 }
jjg@409 872 } else if (ch == 'b' || ch == 'B') {
jjg@409 873 if (!allowBinaryLiterals) {
jjg@409 874 lexError("unsupported.binary.lit", source.name);
jjg@409 875 allowBinaryLiterals = true;
jjg@409 876 }
jjg@409 877 scanChar();
jjg@409 878 skipIllegalUnderscores();
jjg@423 879 if (digit(2) < 0) {
jjg@423 880 lexError("invalid.binary.number");
jjg@423 881 } else {
jjg@423 882 scanNumber(2);
jjg@423 883 }
duke@1 884 } else {
duke@1 885 putChar('0');
jjg@409 886 if (ch == '_') {
jjg@409 887 int savePos = bp;
jjg@409 888 do {
jjg@409 889 scanChar();
jjg@409 890 } while (ch == '_');
jjg@409 891 if (digit(10) < 0) {
jjg@409 892 lexError(savePos, "illegal.underscore");
jjg@409 893 }
jjg@409 894 }
duke@1 895 scanNumber(8);
duke@1 896 }
duke@1 897 return;
duke@1 898 case '1': case '2': case '3': case '4':
duke@1 899 case '5': case '6': case '7': case '8': case '9':
duke@1 900 scanNumber(10);
duke@1 901 return;
duke@1 902 case '.':
duke@1 903 scanChar();
duke@1 904 if ('0' <= ch && ch <= '9') {
duke@1 905 putChar('.');
duke@1 906 scanFractionAndSuffix();
duke@1 907 } else if (ch == '.') {
duke@1 908 putChar('.'); putChar('.');
duke@1 909 scanChar();
duke@1 910 if (ch == '.') {
duke@1 911 scanChar();
duke@1 912 putChar('.');
duke@1 913 token = ELLIPSIS;
duke@1 914 } else {
duke@1 915 lexError("malformed.fp.lit");
duke@1 916 }
duke@1 917 } else {
duke@1 918 token = DOT;
duke@1 919 }
duke@1 920 return;
duke@1 921 case ',':
duke@1 922 scanChar(); token = COMMA; return;
duke@1 923 case ';':
duke@1 924 scanChar(); token = SEMI; return;
duke@1 925 case '(':
duke@1 926 scanChar(); token = LPAREN; return;
duke@1 927 case ')':
duke@1 928 scanChar(); token = RPAREN; return;
duke@1 929 case '[':
duke@1 930 scanChar(); token = LBRACKET; return;
duke@1 931 case ']':
duke@1 932 scanChar(); token = RBRACKET; return;
duke@1 933 case '{':
duke@1 934 scanChar(); token = LBRACE; return;
duke@1 935 case '}':
duke@1 936 scanChar(); token = RBRACE; return;
duke@1 937 case '/':
duke@1 938 scanChar();
duke@1 939 if (ch == '/') {
duke@1 940 do {
duke@1 941 scanCommentChar();
duke@1 942 } while (ch != CR && ch != LF && bp < buflen);
duke@1 943 if (bp < buflen) {
duke@1 944 endPos = bp;
duke@1 945 processComment(CommentStyle.LINE);
duke@1 946 }
duke@1 947 break;
duke@1 948 } else if (ch == '*') {
duke@1 949 scanChar();
duke@1 950 CommentStyle style;
duke@1 951 if (ch == '*') {
duke@1 952 style = CommentStyle.JAVADOC;
duke@1 953 scanDocComment();
duke@1 954 } else {
duke@1 955 style = CommentStyle.BLOCK;
duke@1 956 while (bp < buflen) {
duke@1 957 if (ch == '*') {
duke@1 958 scanChar();
duke@1 959 if (ch == '/') break;
duke@1 960 } else {
duke@1 961 scanCommentChar();
duke@1 962 }
duke@1 963 }
duke@1 964 }
duke@1 965 if (ch == '/') {
duke@1 966 scanChar();
duke@1 967 endPos = bp;
duke@1 968 processComment(style);
duke@1 969 break;
duke@1 970 } else {
duke@1 971 lexError("unclosed.comment");
duke@1 972 return;
duke@1 973 }
duke@1 974 } else if (ch == '=') {
duke@1 975 name = names.slashequals;
duke@1 976 token = SLASHEQ;
duke@1 977 scanChar();
duke@1 978 } else {
duke@1 979 name = names.slash;
duke@1 980 token = SLASH;
duke@1 981 }
duke@1 982 return;
duke@1 983 case '\'':
duke@1 984 scanChar();
duke@1 985 if (ch == '\'') {
duke@1 986 lexError("empty.char.lit");
duke@1 987 } else {
duke@1 988 if (ch == CR || ch == LF)
duke@1 989 lexError(pos, "illegal.line.end.in.char.lit");
duke@1 990 scanLitChar();
duke@1 991 if (ch == '\'') {
duke@1 992 scanChar();
duke@1 993 token = CHARLITERAL;
duke@1 994 } else {
duke@1 995 lexError(pos, "unclosed.char.lit");
duke@1 996 }
duke@1 997 }
duke@1 998 return;
duke@1 999 case '\"':
duke@1 1000 scanChar();
duke@1 1001 while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
duke@1 1002 scanLitChar();
duke@1 1003 if (ch == '\"') {
duke@1 1004 token = STRINGLITERAL;
duke@1 1005 scanChar();
duke@1 1006 } else {
duke@1 1007 lexError(pos, "unclosed.str.lit");
duke@1 1008 }
duke@1 1009 return;
jrose@267 1010 case '#':
jrose@267 1011 scanChar();
jrose@267 1012 if (ch == '\"') {
jrose@267 1013 scanChar();
jrose@267 1014 if (ch == '\"')
jrose@267 1015 lexError(pos, "empty.bytecode.ident");
jrose@267 1016 while (ch != '\"' && ch != CR && ch != LF && bp < buflen) {
jrose@267 1017 scanBytecodeNameChar();
jrose@267 1018 }
jrose@267 1019 if (ch == '\"') {
jrose@267 1020 name = names.fromChars(sbuf, 0, sp);
jrose@267 1021 token = IDENTIFIER; // even if #"int" or #"do"
jrose@267 1022 scanChar();
jrose@267 1023 } else {
jrose@267 1024 lexError(pos, "unclosed.bytecode.ident");
jrose@267 1025 }
jrose@267 1026 } else {
jrose@267 1027 lexError("illegal.char", String.valueOf((int)'#'));
jrose@267 1028 }
jrose@267 1029 return;
duke@1 1030 default:
duke@1 1031 if (isSpecial(ch)) {
duke@1 1032 scanOperator();
duke@1 1033 } else {
duke@1 1034 boolean isJavaIdentifierStart;
duke@1 1035 if (ch < '\u0080') {
duke@1 1036 // all ASCII range chars already handled, above
duke@1 1037 isJavaIdentifierStart = false;
duke@1 1038 } else {
duke@1 1039 char high = scanSurrogates();
duke@1 1040 if (high != 0) {
duke@1 1041 if (sp == sbuf.length) {
duke@1 1042 putChar(high);
duke@1 1043 } else {
duke@1 1044 sbuf[sp++] = high;
duke@1 1045 }
duke@1 1046
duke@1 1047 isJavaIdentifierStart = Character.isJavaIdentifierStart(
duke@1 1048 Character.toCodePoint(high, ch));
duke@1 1049 } else {
duke@1 1050 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
duke@1 1051 }
duke@1 1052 }
duke@1 1053 if (isJavaIdentifierStart) {
duke@1 1054 scanIdent();
duke@1 1055 } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
duke@1 1056 token = EOF;
duke@1 1057 pos = bp = eofPos;
duke@1 1058 } else {
duke@1 1059 lexError("illegal.char", String.valueOf((int)ch));
duke@1 1060 scanChar();
duke@1 1061 }
duke@1 1062 }
duke@1 1063 return;
duke@1 1064 }
duke@1 1065 }
duke@1 1066 } finally {
duke@1 1067 endPos = bp;
duke@1 1068 if (scannerDebug)
duke@1 1069 System.out.println("nextToken(" + pos
duke@1 1070 + "," + endPos + ")=|" +
duke@1 1071 new String(getRawCharacters(pos, endPos))
duke@1 1072 + "|");
duke@1 1073 }
duke@1 1074 }
duke@1 1075
duke@1 1076 /** Return the current token, set by nextToken().
duke@1 1077 */
duke@1 1078 public Token token() {
duke@1 1079 return token;
duke@1 1080 }
duke@1 1081
duke@1 1082 /** Sets the current token.
duke@1 1083 */
duke@1 1084 public void token(Token token) {
duke@1 1085 this.token = token;
duke@1 1086 }
duke@1 1087
duke@1 1088 /** Return the current token's position: a 0-based
duke@1 1089 * offset from beginning of the raw input stream
duke@1 1090 * (before unicode translation)
duke@1 1091 */
duke@1 1092 public int pos() {
duke@1 1093 return pos;
duke@1 1094 }
duke@1 1095
duke@1 1096 /** Return the last character position of the current token.
duke@1 1097 */
duke@1 1098 public int endPos() {
duke@1 1099 return endPos;
duke@1 1100 }
duke@1 1101
duke@1 1102 /** Return the last character position of the previous token.
duke@1 1103 */
duke@1 1104 public int prevEndPos() {
duke@1 1105 return prevEndPos;
duke@1 1106 }
duke@1 1107
duke@1 1108 /** Return the position where a lexical error occurred;
duke@1 1109 */
duke@1 1110 public int errPos() {
duke@1 1111 return errPos;
duke@1 1112 }
duke@1 1113
duke@1 1114 /** Set the position where a lexical error occurred;
duke@1 1115 */
duke@1 1116 public void errPos(int pos) {
duke@1 1117 errPos = pos;
duke@1 1118 }
duke@1 1119
duke@1 1120 /** Return the name of an identifier or token for the current token.
duke@1 1121 */
duke@1 1122 public Name name() {
duke@1 1123 return name;
duke@1 1124 }
duke@1 1125
duke@1 1126 /** Return the radix of a numeric literal token.
duke@1 1127 */
duke@1 1128 public int radix() {
duke@1 1129 return radix;
duke@1 1130 }
duke@1 1131
duke@1 1132 /** Has a @deprecated been encountered in last doc comment?
duke@1 1133 * This needs to be reset by client with resetDeprecatedFlag.
duke@1 1134 */
duke@1 1135 public boolean deprecatedFlag() {
duke@1 1136 return deprecatedFlag;
duke@1 1137 }
duke@1 1138
duke@1 1139 public void resetDeprecatedFlag() {
duke@1 1140 deprecatedFlag = false;
duke@1 1141 }
duke@1 1142
duke@1 1143 /**
duke@1 1144 * Returns the documentation string of the current token.
duke@1 1145 */
duke@1 1146 public String docComment() {
duke@1 1147 return null;
duke@1 1148 }
duke@1 1149
duke@1 1150 /**
duke@1 1151 * Returns a copy of the input buffer, up to its inputLength.
duke@1 1152 * Unicode escape sequences are not translated.
duke@1 1153 */
duke@1 1154 public char[] getRawCharacters() {
duke@1 1155 char[] chars = new char[buflen];
duke@1 1156 System.arraycopy(buf, 0, chars, 0, buflen);
duke@1 1157 return chars;
duke@1 1158 }
duke@1 1159
duke@1 1160 /**
duke@1 1161 * Returns a copy of a character array subset of the input buffer.
duke@1 1162 * The returned array begins at the <code>beginIndex</code> and
duke@1 1163 * extends to the character at index <code>endIndex - 1</code>.
duke@1 1164 * Thus the length of the substring is <code>endIndex-beginIndex</code>.
duke@1 1165 * This behavior is like
duke@1 1166 * <code>String.substring(beginIndex, endIndex)</code>.
duke@1 1167 * Unicode escape sequences are not translated.
duke@1 1168 *
duke@1 1169 * @param beginIndex the beginning index, inclusive.
duke@1 1170 * @param endIndex the ending index, exclusive.
duke@1 1171 * @throws IndexOutOfBounds if either offset is outside of the
duke@1 1172 * array bounds
duke@1 1173 */
duke@1 1174 public char[] getRawCharacters(int beginIndex, int endIndex) {
duke@1 1175 int length = endIndex - beginIndex;
duke@1 1176 char[] chars = new char[length];
duke@1 1177 System.arraycopy(buf, beginIndex, chars, 0, length);
duke@1 1178 return chars;
duke@1 1179 }
duke@1 1180
duke@1 1181 public enum CommentStyle {
duke@1 1182 LINE,
duke@1 1183 BLOCK,
duke@1 1184 JAVADOC,
duke@1 1185 }
duke@1 1186
duke@1 1187 /**
duke@1 1188 * Called when a complete comment has been scanned. pos and endPos
duke@1 1189 * will mark the comment boundary.
duke@1 1190 */
duke@1 1191 protected void processComment(CommentStyle style) {
duke@1 1192 if (scannerDebug)
duke@1 1193 System.out.println("processComment(" + pos
duke@1 1194 + "," + endPos + "," + style + ")=|"
duke@1 1195 + new String(getRawCharacters(pos, endPos))
duke@1 1196 + "|");
duke@1 1197 }
duke@1 1198
duke@1 1199 /**
duke@1 1200 * Called when a complete whitespace run has been scanned. pos and endPos
duke@1 1201 * will mark the whitespace boundary.
duke@1 1202 */
duke@1 1203 protected void processWhiteSpace() {
duke@1 1204 if (scannerDebug)
duke@1 1205 System.out.println("processWhitespace(" + pos
duke@1 1206 + "," + endPos + ")=|" +
duke@1 1207 new String(getRawCharacters(pos, endPos))
duke@1 1208 + "|");
duke@1 1209 }
duke@1 1210
duke@1 1211 /**
duke@1 1212 * Called when a line terminator has been processed.
duke@1 1213 */
duke@1 1214 protected void processLineTerminator() {
duke@1 1215 if (scannerDebug)
duke@1 1216 System.out.println("processTerminator(" + pos
duke@1 1217 + "," + endPos + ")=|" +
duke@1 1218 new String(getRawCharacters(pos, endPos))
duke@1 1219 + "|");
duke@1 1220 }
duke@1 1221
duke@1 1222 /** Build a map for translating between line numbers and
duke@1 1223 * positions in the input.
duke@1 1224 *
duke@1 1225 * @return a LineMap */
duke@1 1226 public Position.LineMap getLineMap() {
duke@1 1227 return Position.makeLineMap(buf, buflen, false);
duke@1 1228 }
duke@1 1229
duke@1 1230 }

mercurial