Mon, 26 Jul 2010 14:25:56 -0700
6957438: improve code for generating warning messages containing option names
Reviewed-by: mcimadamore
1 /*
2 * Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
26 package com.sun.tools.javac.parser;
28 import java.nio.*;
30 import com.sun.tools.javac.code.Source;
31 import com.sun.tools.javac.file.JavacFileManager;
32 import com.sun.tools.javac.util.*;
35 import static com.sun.tools.javac.parser.Token.*;
36 import static com.sun.tools.javac.util.LayoutCharacters.*;
38 /** The lexical analyzer maps an input stream consisting of
39 * ASCII characters and Unicode escapes into a token sequence.
40 *
41 * <p><b>This is NOT part of any supported API.
42 * If you write code that depends on this, you do so at your own risk.
43 * This code and its internal interfaces are subject to change or
44 * deletion without notice.</b>
45 */
46 public class Scanner implements Lexer {
48 private static boolean scannerDebug = false;
50 /** A factory for creating scanners. */
51 public static class Factory {
52 /** The context key for the scanner factory. */
53 public static final Context.Key<Scanner.Factory> scannerFactoryKey =
54 new Context.Key<Scanner.Factory>();
56 /** Get the Factory instance for this context. */
57 public static Factory instance(Context context) {
58 Factory instance = context.get(scannerFactoryKey);
59 if (instance == null)
60 instance = new Factory(context);
61 return instance;
62 }
64 final Log log;
65 final Names names;
66 final Source source;
67 final Keywords keywords;
69 /** Create a new scanner factory. */
70 protected Factory(Context context) {
71 context.put(scannerFactoryKey, this);
72 this.log = Log.instance(context);
73 this.names = Names.instance(context);
74 this.source = Source.instance(context);
75 this.keywords = Keywords.instance(context);
76 }
78 public Scanner newScanner(CharSequence input) {
79 if (input instanceof CharBuffer) {
80 return new Scanner(this, (CharBuffer)input);
81 } else {
82 char[] array = input.toString().toCharArray();
83 return newScanner(array, array.length);
84 }
85 }
87 public Scanner newScanner(char[] input, int inputLength) {
88 return new Scanner(this, input, inputLength);
89 }
90 }
92 /* Output variables; set by nextToken():
93 */
95 /** The token, set by nextToken().
96 */
97 private Token token;
99 /** Allow hex floating-point literals.
100 */
101 private boolean allowHexFloats;
103 /** Allow binary literals.
104 */
105 private boolean allowBinaryLiterals;
107 /** Allow underscores in literals.
108 */
109 private boolean allowUnderscoresInLiterals;
111 /** The source language setting.
112 */
113 private Source source;
115 /** The token's position, 0-based offset from beginning of text.
116 */
117 private int pos;
119 /** Character position just after the last character of the token.
120 */
121 private int endPos;
123 /** The last character position of the previous token.
124 */
125 private int prevEndPos;
127 /** The position where a lexical error occurred;
128 */
129 private int errPos = Position.NOPOS;
131 /** The name of an identifier or token:
132 */
133 private Name name;
135 /** The radix of a numeric literal token.
136 */
137 private int radix;
139 /** Has a @deprecated been encountered in last doc comment?
140 * this needs to be reset by client.
141 */
142 protected boolean deprecatedFlag = false;
144 /** A character buffer for literals.
145 */
146 private char[] sbuf = new char[128];
147 private int sp;
149 /** The input buffer, index of next chacter to be read,
150 * index of one past last character in buffer.
151 */
152 private char[] buf;
153 private int bp;
154 private int buflen;
155 private int eofPos;
157 /** The current character.
158 */
159 private char ch;
161 /** The buffer index of the last converted unicode character
162 */
163 private int unicodeConversionBp = -1;
165 /** The log to be used for error reporting.
166 */
167 private final Log log;
169 /** The name table. */
170 private final Names names;
172 /** The keyword table. */
173 private final Keywords keywords;
175 /** Common code for constructors. */
176 private Scanner(Factory fac) {
177 log = fac.log;
178 names = fac.names;
179 keywords = fac.keywords;
180 source = fac.source;
181 allowBinaryLiterals = source.allowBinaryLiterals();
182 allowHexFloats = source.allowHexFloats();
183 allowUnderscoresInLiterals = source.allowBinaryLiterals();
184 }
186 private static final boolean hexFloatsWork = hexFloatsWork();
187 private static boolean hexFloatsWork() {
188 try {
189 Float.valueOf("0x1.0p1");
190 return true;
191 } catch (NumberFormatException ex) {
192 return false;
193 }
194 }
196 /** Create a scanner from the input buffer. buffer must implement
197 * array() and compact(), and remaining() must be less than limit().
198 */
199 protected Scanner(Factory fac, CharBuffer buffer) {
200 this(fac, JavacFileManager.toArray(buffer), buffer.limit());
201 }
203 /**
204 * Create a scanner from the input array. This method might
205 * modify the array. To avoid copying the input array, ensure
206 * that {@code inputLength < input.length} or
207 * {@code input[input.length -1]} is a white space character.
208 *
209 * @param fac the factory which created this Scanner
210 * @param input the input, might be modified
211 * @param inputLength the size of the input.
212 * Must be positive and less than or equal to input.length.
213 */
214 protected Scanner(Factory fac, char[] input, int inputLength) {
215 this(fac);
216 eofPos = inputLength;
217 if (inputLength == input.length) {
218 if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
219 inputLength--;
220 } else {
221 char[] newInput = new char[inputLength + 1];
222 System.arraycopy(input, 0, newInput, 0, input.length);
223 input = newInput;
224 }
225 }
226 buf = input;
227 buflen = inputLength;
228 buf[buflen] = EOI;
229 bp = -1;
230 scanChar();
231 }
233 /** Report an error at the given position using the provided arguments.
234 */
235 private void lexError(int pos, String key, Object... args) {
236 log.error(pos, key, args);
237 token = ERROR;
238 errPos = pos;
239 }
241 /** Report an error at the current token position using the provided
242 * arguments.
243 */
244 private void lexError(String key, Object... args) {
245 lexError(pos, key, args);
246 }
248 /** Convert an ASCII digit from its base (8, 10, or 16)
249 * to its value.
250 */
251 private int digit(int base) {
252 char c = ch;
253 int result = Character.digit(c, base);
254 if (result >= 0 && c > 0x7f) {
255 lexError(pos+1, "illegal.nonascii.digit");
256 ch = "0123456789abcdef".charAt(result);
257 }
258 return result;
259 }
261 /** Convert unicode escape; bp points to initial '\' character
262 * (Spec 3.3).
263 */
264 private void convertUnicode() {
265 if (ch == '\\' && unicodeConversionBp != bp) {
266 bp++; ch = buf[bp];
267 if (ch == 'u') {
268 do {
269 bp++; ch = buf[bp];
270 } while (ch == 'u');
271 int limit = bp + 3;
272 if (limit < buflen) {
273 int d = digit(16);
274 int code = d;
275 while (bp < limit && d >= 0) {
276 bp++; ch = buf[bp];
277 d = digit(16);
278 code = (code << 4) + d;
279 }
280 if (d >= 0) {
281 ch = (char)code;
282 unicodeConversionBp = bp;
283 return;
284 }
285 }
286 lexError(bp, "illegal.unicode.esc");
287 } else {
288 bp--;
289 ch = '\\';
290 }
291 }
292 }
294 /** Read next character.
295 */
296 private void scanChar() {
297 ch = buf[++bp];
298 if (ch == '\\') {
299 convertUnicode();
300 }
301 }
303 /** Read next character in comment, skipping over double '\' characters.
304 */
305 private void scanCommentChar() {
306 scanChar();
307 if (ch == '\\') {
308 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
309 bp++;
310 } else {
311 convertUnicode();
312 }
313 }
314 }
316 /** Append a character to sbuf.
317 */
318 private void putChar(char ch) {
319 if (sp == sbuf.length) {
320 char[] newsbuf = new char[sbuf.length * 2];
321 System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
322 sbuf = newsbuf;
323 }
324 sbuf[sp++] = ch;
325 }
327 /** For debugging purposes: print character.
328 */
329 private void dch() {
330 System.err.print(ch); System.out.flush();
331 }
333 /** Read next character in character or string literal and copy into sbuf.
334 */
335 private void scanLitChar(boolean forBytecodeName) {
336 if (ch == '\\') {
337 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
338 bp++;
339 putChar('\\');
340 scanChar();
341 } else {
342 scanChar();
343 switch (ch) {
344 case '0': case '1': case '2': case '3':
345 case '4': case '5': case '6': case '7':
346 char leadch = ch;
347 int oct = digit(8);
348 scanChar();
349 if ('0' <= ch && ch <= '7') {
350 oct = oct * 8 + digit(8);
351 scanChar();
352 if (leadch <= '3' && '0' <= ch && ch <= '7') {
353 oct = oct * 8 + digit(8);
354 scanChar();
355 }
356 }
357 putChar((char)oct);
358 break;
359 case 'b':
360 putChar('\b'); scanChar(); break;
361 case 't':
362 putChar('\t'); scanChar(); break;
363 case 'n':
364 putChar('\n'); scanChar(); break;
365 case 'f':
366 putChar('\f'); scanChar(); break;
367 case 'r':
368 putChar('\r'); scanChar(); break;
369 case '\'':
370 putChar('\''); scanChar(); break;
371 case '\"':
372 putChar('\"'); scanChar(); break;
373 case '\\':
374 putChar('\\'); scanChar(); break;
375 case '|': case ',': case '?': case '%':
376 case '^': case '_': case '{': case '}':
377 case '!': case '-': case '=':
378 if (forBytecodeName) {
379 // Accept escape sequences for dangerous bytecode chars.
380 // This is illegal in normal Java string or character literals.
381 // Note that the escape sequence itself is passed through.
382 putChar('\\'); putChar(ch); scanChar();
383 } else {
384 lexError(bp, "illegal.esc.char");
385 }
386 break;
387 default:
388 lexError(bp, "illegal.esc.char");
389 }
390 }
391 } else if (bp != buflen) {
392 putChar(ch); scanChar();
393 }
394 }
395 private void scanLitChar() {
396 scanLitChar(false);
397 }
399 /** Read next character in an exotic name #"foo"
400 */
401 private void scanBytecodeNameChar() {
402 switch (ch) {
403 // reject any "dangerous" char which is illegal somewhere in the JVM spec
404 // cf. http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm
405 case '/': case '.': case ';': // illegal everywhere
406 case '<': case '>': // illegal in methods, dangerous in classes
407 case '[': // illegal in classes
408 lexError(bp, "illegal.bytecode.ident.char", String.valueOf((int)ch));
409 break;
410 }
411 scanLitChar(true);
412 }
414 private void scanDigits(int digitRadix) {
415 char saveCh;
416 int savePos;
417 do {
418 if (ch != '_') {
419 putChar(ch);
420 } else {
421 if (!allowUnderscoresInLiterals) {
422 lexError("unsupported.underscore.lit", source.name);
423 allowUnderscoresInLiterals = true;
424 }
425 }
426 saveCh = ch;
427 savePos = bp;
428 scanChar();
429 } while (digit(digitRadix) >= 0 || ch == '_');
430 if (saveCh == '_')
431 lexError(savePos, "illegal.underscore");
432 }
434 /** Read fractional part of hexadecimal floating point number.
435 */
436 private void scanHexExponentAndSuffix() {
437 if (ch == 'p' || ch == 'P') {
438 putChar(ch);
439 scanChar();
440 skipIllegalUnderscores();
441 if (ch == '+' || ch == '-') {
442 putChar(ch);
443 scanChar();
444 }
445 skipIllegalUnderscores();
446 if ('0' <= ch && ch <= '9') {
447 scanDigits(10);
448 if (!allowHexFloats) {
449 lexError("unsupported.fp.lit", source.name);
450 allowHexFloats = true;
451 }
452 else if (!hexFloatsWork)
453 lexError("unsupported.cross.fp.lit");
454 } else
455 lexError("malformed.fp.lit");
456 } else {
457 lexError("malformed.fp.lit");
458 }
459 if (ch == 'f' || ch == 'F') {
460 putChar(ch);
461 scanChar();
462 token = FLOATLITERAL;
463 } else {
464 if (ch == 'd' || ch == 'D') {
465 putChar(ch);
466 scanChar();
467 }
468 token = DOUBLELITERAL;
469 }
470 }
472 /** Read fractional part of floating point number.
473 */
474 private void scanFraction() {
475 skipIllegalUnderscores();
476 if ('0' <= ch && ch <= '9') {
477 scanDigits(10);
478 }
479 int sp1 = sp;
480 if (ch == 'e' || ch == 'E') {
481 putChar(ch);
482 scanChar();
483 skipIllegalUnderscores();
484 if (ch == '+' || ch == '-') {
485 putChar(ch);
486 scanChar();
487 }
488 skipIllegalUnderscores();
489 if ('0' <= ch && ch <= '9') {
490 scanDigits(10);
491 return;
492 }
493 lexError("malformed.fp.lit");
494 sp = sp1;
495 }
496 }
498 /** Read fractional part and 'd' or 'f' suffix of floating point number.
499 */
500 private void scanFractionAndSuffix() {
501 this.radix = 10;
502 scanFraction();
503 if (ch == 'f' || ch == 'F') {
504 putChar(ch);
505 scanChar();
506 token = FLOATLITERAL;
507 } else {
508 if (ch == 'd' || ch == 'D') {
509 putChar(ch);
510 scanChar();
511 }
512 token = DOUBLELITERAL;
513 }
514 }
516 /** Read fractional part and 'd' or 'f' suffix of floating point number.
517 */
518 private void scanHexFractionAndSuffix(boolean seendigit) {
519 this.radix = 16;
520 assert ch == '.';
521 putChar(ch);
522 scanChar();
523 skipIllegalUnderscores();
524 if (digit(16) >= 0) {
525 seendigit = true;
526 scanDigits(16);
527 }
528 if (!seendigit)
529 lexError("invalid.hex.number");
530 else
531 scanHexExponentAndSuffix();
532 }
534 private void skipIllegalUnderscores() {
535 if (ch == '_') {
536 lexError(bp, "illegal.underscore");
537 while (ch == '_')
538 scanChar();
539 }
540 }
542 /** Read a number.
543 * @param radix The radix of the number; one of 2, j8, 10, 16.
544 */
545 private void scanNumber(int radix) {
546 this.radix = radix;
547 // for octal, allow base-10 digit in case it's a float literal
548 int digitRadix = (radix == 8 ? 10 : radix);
549 boolean seendigit = false;
550 if (digit(digitRadix) >= 0) {
551 seendigit = true;
552 scanDigits(digitRadix);
553 }
554 if (radix == 16 && ch == '.') {
555 scanHexFractionAndSuffix(seendigit);
556 } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
557 scanHexExponentAndSuffix();
558 } else if (digitRadix == 10 && ch == '.') {
559 putChar(ch);
560 scanChar();
561 scanFractionAndSuffix();
562 } else if (digitRadix == 10 &&
563 (ch == 'e' || ch == 'E' ||
564 ch == 'f' || ch == 'F' ||
565 ch == 'd' || ch == 'D')) {
566 scanFractionAndSuffix();
567 } else {
568 if (ch == 'l' || ch == 'L') {
569 scanChar();
570 token = LONGLITERAL;
571 } else {
572 token = INTLITERAL;
573 }
574 }
575 }
577 /** Read an identifier.
578 */
579 private void scanIdent() {
580 boolean isJavaIdentifierPart;
581 char high;
582 do {
583 if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
584 // optimization, was: putChar(ch);
586 scanChar();
587 switch (ch) {
588 case 'A': case 'B': case 'C': case 'D': case 'E':
589 case 'F': case 'G': case 'H': case 'I': case 'J':
590 case 'K': case 'L': case 'M': case 'N': case 'O':
591 case 'P': case 'Q': case 'R': case 'S': case 'T':
592 case 'U': case 'V': case 'W': case 'X': case 'Y':
593 case 'Z':
594 case 'a': case 'b': case 'c': case 'd': case 'e':
595 case 'f': case 'g': case 'h': case 'i': case 'j':
596 case 'k': case 'l': case 'm': case 'n': case 'o':
597 case 'p': case 'q': case 'r': case 's': case 't':
598 case 'u': case 'v': case 'w': case 'x': case 'y':
599 case 'z':
600 case '$': case '_':
601 case '0': case '1': case '2': case '3': case '4':
602 case '5': case '6': case '7': case '8': case '9':
603 case '\u0000': case '\u0001': case '\u0002': case '\u0003':
604 case '\u0004': case '\u0005': case '\u0006': case '\u0007':
605 case '\u0008': case '\u000E': case '\u000F': case '\u0010':
606 case '\u0011': case '\u0012': case '\u0013': case '\u0014':
607 case '\u0015': case '\u0016': case '\u0017':
608 case '\u0018': case '\u0019': case '\u001B':
609 case '\u007F':
610 break;
611 case '\u001A': // EOI is also a legal identifier part
612 if (bp >= buflen) {
613 name = names.fromChars(sbuf, 0, sp);
614 token = keywords.key(name);
615 return;
616 }
617 break;
618 default:
619 if (ch < '\u0080') {
620 // all ASCII range chars already handled, above
621 isJavaIdentifierPart = false;
622 } else {
623 high = scanSurrogates();
624 if (high != 0) {
625 if (sp == sbuf.length) {
626 putChar(high);
627 } else {
628 sbuf[sp++] = high;
629 }
630 isJavaIdentifierPart = Character.isJavaIdentifierPart(
631 Character.toCodePoint(high, ch));
632 } else {
633 isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
634 }
635 }
636 if (!isJavaIdentifierPart) {
637 name = names.fromChars(sbuf, 0, sp);
638 token = keywords.key(name);
639 return;
640 }
641 }
642 } while (true);
643 }
645 /** Are surrogates supported?
646 */
647 final static boolean surrogatesSupported = surrogatesSupported();
648 private static boolean surrogatesSupported() {
649 try {
650 Character.isHighSurrogate('a');
651 return true;
652 } catch (NoSuchMethodError ex) {
653 return false;
654 }
655 }
657 /** Scan surrogate pairs. If 'ch' is a high surrogate and
658 * the next character is a low surrogate, then put the low
659 * surrogate in 'ch', and return the high surrogate.
660 * otherwise, just return 0.
661 */
662 private char scanSurrogates() {
663 if (surrogatesSupported && Character.isHighSurrogate(ch)) {
664 char high = ch;
666 scanChar();
668 if (Character.isLowSurrogate(ch)) {
669 return high;
670 }
672 ch = high;
673 }
675 return 0;
676 }
678 /** Return true if ch can be part of an operator.
679 */
680 private boolean isSpecial(char ch) {
681 switch (ch) {
682 case '!': case '%': case '&': case '*': case '?':
683 case '+': case '-': case ':': case '<': case '=':
684 case '>': case '^': case '|': case '~':
685 case '@':
686 return true;
687 default:
688 return false;
689 }
690 }
692 /** Read longest possible sequence of special characters and convert
693 * to token.
694 */
695 private void scanOperator() {
696 while (true) {
697 putChar(ch);
698 Name newname = names.fromChars(sbuf, 0, sp);
699 if (keywords.key(newname) == IDENTIFIER) {
700 sp--;
701 break;
702 }
703 name = newname;
704 token = keywords.key(newname);
705 scanChar();
706 if (!isSpecial(ch)) break;
707 }
708 }
710 /**
711 * Scan a documention comment; determine if a deprecated tag is present.
712 * Called once the initial /, * have been skipped, positioned at the second *
713 * (which is treated as the beginning of the first line).
714 * Stops positioned at the closing '/'.
715 */
716 @SuppressWarnings("fallthrough")
717 private void scanDocComment() {
718 boolean deprecatedPrefix = false;
720 forEachLine:
721 while (bp < buflen) {
723 // Skip optional WhiteSpace at beginning of line
724 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
725 scanCommentChar();
726 }
728 // Skip optional consecutive Stars
729 while (bp < buflen && ch == '*') {
730 scanCommentChar();
731 if (ch == '/') {
732 return;
733 }
734 }
736 // Skip optional WhiteSpace after Stars
737 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
738 scanCommentChar();
739 }
741 deprecatedPrefix = false;
742 // At beginning of line in the JavaDoc sense.
743 if (bp < buflen && ch == '@' && !deprecatedFlag) {
744 scanCommentChar();
745 if (bp < buflen && ch == 'd') {
746 scanCommentChar();
747 if (bp < buflen && ch == 'e') {
748 scanCommentChar();
749 if (bp < buflen && ch == 'p') {
750 scanCommentChar();
751 if (bp < buflen && ch == 'r') {
752 scanCommentChar();
753 if (bp < buflen && ch == 'e') {
754 scanCommentChar();
755 if (bp < buflen && ch == 'c') {
756 scanCommentChar();
757 if (bp < buflen && ch == 'a') {
758 scanCommentChar();
759 if (bp < buflen && ch == 't') {
760 scanCommentChar();
761 if (bp < buflen && ch == 'e') {
762 scanCommentChar();
763 if (bp < buflen && ch == 'd') {
764 deprecatedPrefix = true;
765 scanCommentChar();
766 }}}}}}}}}}}
767 if (deprecatedPrefix && bp < buflen) {
768 if (Character.isWhitespace(ch)) {
769 deprecatedFlag = true;
770 } else if (ch == '*') {
771 scanCommentChar();
772 if (ch == '/') {
773 deprecatedFlag = true;
774 return;
775 }
776 }
777 }
779 // Skip rest of line
780 while (bp < buflen) {
781 switch (ch) {
782 case '*':
783 scanCommentChar();
784 if (ch == '/') {
785 return;
786 }
787 break;
788 case CR: // (Spec 3.4)
789 scanCommentChar();
790 if (ch != LF) {
791 continue forEachLine;
792 }
793 /* fall through to LF case */
794 case LF: // (Spec 3.4)
795 scanCommentChar();
796 continue forEachLine;
797 default:
798 scanCommentChar();
799 }
800 } // rest of line
801 } // forEachLine
802 return;
803 }
805 /** The value of a literal token, recorded as a string.
806 * For integers, leading 0x and 'l' suffixes are suppressed.
807 */
808 public String stringVal() {
809 return new String(sbuf, 0, sp);
810 }
812 /** Read token.
813 */
814 public void nextToken() {
816 try {
817 prevEndPos = endPos;
818 sp = 0;
820 while (true) {
821 pos = bp;
822 switch (ch) {
823 case ' ': // (Spec 3.6)
824 case '\t': // (Spec 3.6)
825 case FF: // (Spec 3.6)
826 do {
827 scanChar();
828 } while (ch == ' ' || ch == '\t' || ch == FF);
829 endPos = bp;
830 processWhiteSpace();
831 break;
832 case LF: // (Spec 3.4)
833 scanChar();
834 endPos = bp;
835 processLineTerminator();
836 break;
837 case CR: // (Spec 3.4)
838 scanChar();
839 if (ch == LF) {
840 scanChar();
841 }
842 endPos = bp;
843 processLineTerminator();
844 break;
845 case 'A': case 'B': case 'C': case 'D': case 'E':
846 case 'F': case 'G': case 'H': case 'I': case 'J':
847 case 'K': case 'L': case 'M': case 'N': case 'O':
848 case 'P': case 'Q': case 'R': case 'S': case 'T':
849 case 'U': case 'V': case 'W': case 'X': case 'Y':
850 case 'Z':
851 case 'a': case 'b': case 'c': case 'd': case 'e':
852 case 'f': case 'g': case 'h': case 'i': case 'j':
853 case 'k': case 'l': case 'm': case 'n': case 'o':
854 case 'p': case 'q': case 'r': case 's': case 't':
855 case 'u': case 'v': case 'w': case 'x': case 'y':
856 case 'z':
857 case '$': case '_':
858 scanIdent();
859 return;
860 case '0':
861 scanChar();
862 if (ch == 'x' || ch == 'X') {
863 scanChar();
864 skipIllegalUnderscores();
865 if (ch == '.') {
866 scanHexFractionAndSuffix(false);
867 } else if (digit(16) < 0) {
868 lexError("invalid.hex.number");
869 } else {
870 scanNumber(16);
871 }
872 } else if (ch == 'b' || ch == 'B') {
873 if (!allowBinaryLiterals) {
874 lexError("unsupported.binary.lit", source.name);
875 allowBinaryLiterals = true;
876 }
877 scanChar();
878 skipIllegalUnderscores();
879 if (digit(2) < 0) {
880 lexError("invalid.binary.number");
881 } else {
882 scanNumber(2);
883 }
884 } else {
885 putChar('0');
886 if (ch == '_') {
887 int savePos = bp;
888 do {
889 scanChar();
890 } while (ch == '_');
891 if (digit(10) < 0) {
892 lexError(savePos, "illegal.underscore");
893 }
894 }
895 scanNumber(8);
896 }
897 return;
898 case '1': case '2': case '3': case '4':
899 case '5': case '6': case '7': case '8': case '9':
900 scanNumber(10);
901 return;
902 case '.':
903 scanChar();
904 if ('0' <= ch && ch <= '9') {
905 putChar('.');
906 scanFractionAndSuffix();
907 } else if (ch == '.') {
908 putChar('.'); putChar('.');
909 scanChar();
910 if (ch == '.') {
911 scanChar();
912 putChar('.');
913 token = ELLIPSIS;
914 } else {
915 lexError("malformed.fp.lit");
916 }
917 } else {
918 token = DOT;
919 }
920 return;
921 case ',':
922 scanChar(); token = COMMA; return;
923 case ';':
924 scanChar(); token = SEMI; return;
925 case '(':
926 scanChar(); token = LPAREN; return;
927 case ')':
928 scanChar(); token = RPAREN; return;
929 case '[':
930 scanChar(); token = LBRACKET; return;
931 case ']':
932 scanChar(); token = RBRACKET; return;
933 case '{':
934 scanChar(); token = LBRACE; return;
935 case '}':
936 scanChar(); token = RBRACE; return;
937 case '/':
938 scanChar();
939 if (ch == '/') {
940 do {
941 scanCommentChar();
942 } while (ch != CR && ch != LF && bp < buflen);
943 if (bp < buflen) {
944 endPos = bp;
945 processComment(CommentStyle.LINE);
946 }
947 break;
948 } else if (ch == '*') {
949 scanChar();
950 CommentStyle style;
951 if (ch == '*') {
952 style = CommentStyle.JAVADOC;
953 scanDocComment();
954 } else {
955 style = CommentStyle.BLOCK;
956 while (bp < buflen) {
957 if (ch == '*') {
958 scanChar();
959 if (ch == '/') break;
960 } else {
961 scanCommentChar();
962 }
963 }
964 }
965 if (ch == '/') {
966 scanChar();
967 endPos = bp;
968 processComment(style);
969 break;
970 } else {
971 lexError("unclosed.comment");
972 return;
973 }
974 } else if (ch == '=') {
975 name = names.slashequals;
976 token = SLASHEQ;
977 scanChar();
978 } else {
979 name = names.slash;
980 token = SLASH;
981 }
982 return;
983 case '\'':
984 scanChar();
985 if (ch == '\'') {
986 lexError("empty.char.lit");
987 } else {
988 if (ch == CR || ch == LF)
989 lexError(pos, "illegal.line.end.in.char.lit");
990 scanLitChar();
991 if (ch == '\'') {
992 scanChar();
993 token = CHARLITERAL;
994 } else {
995 lexError(pos, "unclosed.char.lit");
996 }
997 }
998 return;
999 case '\"':
1000 scanChar();
1001 while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
1002 scanLitChar();
1003 if (ch == '\"') {
1004 token = STRINGLITERAL;
1005 scanChar();
1006 } else {
1007 lexError(pos, "unclosed.str.lit");
1008 }
1009 return;
1010 case '#':
1011 scanChar();
1012 if (ch == '\"') {
1013 scanChar();
1014 if (ch == '\"')
1015 lexError(pos, "empty.bytecode.ident");
1016 while (ch != '\"' && ch != CR && ch != LF && bp < buflen) {
1017 scanBytecodeNameChar();
1018 }
1019 if (ch == '\"') {
1020 name = names.fromChars(sbuf, 0, sp);
1021 token = IDENTIFIER; // even if #"int" or #"do"
1022 scanChar();
1023 } else {
1024 lexError(pos, "unclosed.bytecode.ident");
1025 }
1026 } else {
1027 lexError("illegal.char", String.valueOf((int)'#'));
1028 }
1029 return;
1030 default:
1031 if (isSpecial(ch)) {
1032 scanOperator();
1033 } else {
1034 boolean isJavaIdentifierStart;
1035 if (ch < '\u0080') {
1036 // all ASCII range chars already handled, above
1037 isJavaIdentifierStart = false;
1038 } else {
1039 char high = scanSurrogates();
1040 if (high != 0) {
1041 if (sp == sbuf.length) {
1042 putChar(high);
1043 } else {
1044 sbuf[sp++] = high;
1045 }
1047 isJavaIdentifierStart = Character.isJavaIdentifierStart(
1048 Character.toCodePoint(high, ch));
1049 } else {
1050 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
1051 }
1052 }
1053 if (isJavaIdentifierStart) {
1054 scanIdent();
1055 } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
1056 token = EOF;
1057 pos = bp = eofPos;
1058 } else {
1059 lexError("illegal.char", String.valueOf((int)ch));
1060 scanChar();
1061 }
1062 }
1063 return;
1064 }
1065 }
1066 } finally {
1067 endPos = bp;
1068 if (scannerDebug)
1069 System.out.println("nextToken(" + pos
1070 + "," + endPos + ")=|" +
1071 new String(getRawCharacters(pos, endPos))
1072 + "|");
1073 }
1074 }
1076 /** Return the current token, set by nextToken().
1077 */
1078 public Token token() {
1079 return token;
1080 }
1082 /** Sets the current token.
1083 */
1084 public void token(Token token) {
1085 this.token = token;
1086 }
1088 /** Return the current token's position: a 0-based
1089 * offset from beginning of the raw input stream
1090 * (before unicode translation)
1091 */
1092 public int pos() {
1093 return pos;
1094 }
1096 /** Return the last character position of the current token.
1097 */
1098 public int endPos() {
1099 return endPos;
1100 }
1102 /** Return the last character position of the previous token.
1103 */
1104 public int prevEndPos() {
1105 return prevEndPos;
1106 }
1108 /** Return the position where a lexical error occurred;
1109 */
1110 public int errPos() {
1111 return errPos;
1112 }
1114 /** Set the position where a lexical error occurred;
1115 */
1116 public void errPos(int pos) {
1117 errPos = pos;
1118 }
1120 /** Return the name of an identifier or token for the current token.
1121 */
1122 public Name name() {
1123 return name;
1124 }
1126 /** Return the radix of a numeric literal token.
1127 */
1128 public int radix() {
1129 return radix;
1130 }
1132 /** Has a @deprecated been encountered in last doc comment?
1133 * This needs to be reset by client with resetDeprecatedFlag.
1134 */
1135 public boolean deprecatedFlag() {
1136 return deprecatedFlag;
1137 }
1139 public void resetDeprecatedFlag() {
1140 deprecatedFlag = false;
1141 }
1143 /**
1144 * Returns the documentation string of the current token.
1145 */
1146 public String docComment() {
1147 return null;
1148 }
1150 /**
1151 * Returns a copy of the input buffer, up to its inputLength.
1152 * Unicode escape sequences are not translated.
1153 */
1154 public char[] getRawCharacters() {
1155 char[] chars = new char[buflen];
1156 System.arraycopy(buf, 0, chars, 0, buflen);
1157 return chars;
1158 }
1160 /**
1161 * Returns a copy of a character array subset of the input buffer.
1162 * The returned array begins at the <code>beginIndex</code> and
1163 * extends to the character at index <code>endIndex - 1</code>.
1164 * Thus the length of the substring is <code>endIndex-beginIndex</code>.
1165 * This behavior is like
1166 * <code>String.substring(beginIndex, endIndex)</code>.
1167 * Unicode escape sequences are not translated.
1168 *
1169 * @param beginIndex the beginning index, inclusive.
1170 * @param endIndex the ending index, exclusive.
1171 * @throws IndexOutOfBounds if either offset is outside of the
1172 * array bounds
1173 */
1174 public char[] getRawCharacters(int beginIndex, int endIndex) {
1175 int length = endIndex - beginIndex;
1176 char[] chars = new char[length];
1177 System.arraycopy(buf, beginIndex, chars, 0, length);
1178 return chars;
1179 }
1181 public enum CommentStyle {
1182 LINE,
1183 BLOCK,
1184 JAVADOC,
1185 }
1187 /**
1188 * Called when a complete comment has been scanned. pos and endPos
1189 * will mark the comment boundary.
1190 */
1191 protected void processComment(CommentStyle style) {
1192 if (scannerDebug)
1193 System.out.println("processComment(" + pos
1194 + "," + endPos + "," + style + ")=|"
1195 + new String(getRawCharacters(pos, endPos))
1196 + "|");
1197 }
1199 /**
1200 * Called when a complete whitespace run has been scanned. pos and endPos
1201 * will mark the whitespace boundary.
1202 */
1203 protected void processWhiteSpace() {
1204 if (scannerDebug)
1205 System.out.println("processWhitespace(" + pos
1206 + "," + endPos + ")=|" +
1207 new String(getRawCharacters(pos, endPos))
1208 + "|");
1209 }
1211 /**
1212 * Called when a line terminator has been processed.
1213 */
1214 protected void processLineTerminator() {
1215 if (scannerDebug)
1216 System.out.println("processTerminator(" + pos
1217 + "," + endPos + ")=|" +
1218 new String(getRawCharacters(pos, endPos))
1219 + "|");
1220 }
1222 /** Build a map for translating between line numbers and
1223 * positions in the input.
1224 *
1225 * @return a LineMap */
1226 public Position.LineMap getLineMap() {
1227 return Position.makeLineMap(buf, buflen, false);
1228 }
1230 }