Tue, 11 Aug 2009 01:13:14 +0100
6521805: Regression: JDK5/JDK6 javac allows write access to outer class reference
Summary: javac should warn/complain about identifiers with the same name as synthetic symbol
Reviewed-by: jjg
1 /*
2 * Copyright 1999-2008 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
26 package com.sun.tools.javac.parser;
28 import java.nio.*;
30 import com.sun.tools.javac.code.Source;
31 import com.sun.tools.javac.file.JavacFileManager;
32 import com.sun.tools.javac.util.*;
35 import static com.sun.tools.javac.parser.Token.*;
36 import static com.sun.tools.javac.util.LayoutCharacters.*;
38 /** The lexical analyzer maps an input stream consisting of
39 * ASCII characters and Unicode escapes into a token sequence.
40 *
41 * <p><b>This is NOT part of any API supported by Sun Microsystems. If
42 * you write code that depends on this, you do so at your own risk.
43 * This code and its internal interfaces are subject to change or
44 * deletion without notice.</b>
45 */
46 public class Scanner implements Lexer {
48 private static boolean scannerDebug = false;
50 /** A factory for creating scanners. */
51 public static class Factory {
52 /** The context key for the scanner factory. */
53 public static final Context.Key<Scanner.Factory> scannerFactoryKey =
54 new Context.Key<Scanner.Factory>();
56 /** Get the Factory instance for this context. */
57 public static Factory instance(Context context) {
58 Factory instance = context.get(scannerFactoryKey);
59 if (instance == null)
60 instance = new Factory(context);
61 return instance;
62 }
64 final Log log;
65 final Names names;
66 final Source source;
67 final Keywords keywords;
69 /** Create a new scanner factory. */
70 protected Factory(Context context) {
71 context.put(scannerFactoryKey, this);
72 this.log = Log.instance(context);
73 this.names = Names.instance(context);
74 this.source = Source.instance(context);
75 this.keywords = Keywords.instance(context);
76 }
78 public Scanner newScanner(CharSequence input) {
79 if (input instanceof CharBuffer) {
80 return new Scanner(this, (CharBuffer)input);
81 } else {
82 char[] array = input.toString().toCharArray();
83 return newScanner(array, array.length);
84 }
85 }
87 public Scanner newScanner(char[] input, int inputLength) {
88 return new Scanner(this, input, inputLength);
89 }
90 }
92 /* Output variables; set by nextToken():
93 */
95 /** The token, set by nextToken().
96 */
97 private Token token;
99 /** Allow hex floating-point literals.
100 */
101 private boolean allowHexFloats;
103 /** The token's position, 0-based offset from beginning of text.
104 */
105 private int pos;
107 /** Character position just after the last character of the token.
108 */
109 private int endPos;
111 /** The last character position of the previous token.
112 */
113 private int prevEndPos;
115 /** The position where a lexical error occurred;
116 */
117 private int errPos = Position.NOPOS;
119 /** The name of an identifier or token:
120 */
121 private Name name;
123 /** The radix of a numeric literal token.
124 */
125 private int radix;
127 /** Has a @deprecated been encountered in last doc comment?
128 * this needs to be reset by client.
129 */
130 protected boolean deprecatedFlag = false;
132 /** A character buffer for literals.
133 */
134 private char[] sbuf = new char[128];
135 private int sp;
137 /** The input buffer, index of next chacter to be read,
138 * index of one past last character in buffer.
139 */
140 private char[] buf;
141 private int bp;
142 private int buflen;
143 private int eofPos;
145 /** The current character.
146 */
147 private char ch;
149 /** The buffer index of the last converted unicode character
150 */
151 private int unicodeConversionBp = -1;
153 /** The log to be used for error reporting.
154 */
155 private final Log log;
157 /** The name table. */
158 private final Names names;
160 /** The keyword table. */
161 private final Keywords keywords;
163 /** Common code for constructors. */
164 private Scanner(Factory fac) {
165 this.log = fac.log;
166 this.names = fac.names;
167 this.keywords = fac.keywords;
168 this.allowHexFloats = fac.source.allowHexFloats();
169 }
171 private static final boolean hexFloatsWork = hexFloatsWork();
172 private static boolean hexFloatsWork() {
173 try {
174 Float.valueOf("0x1.0p1");
175 return true;
176 } catch (NumberFormatException ex) {
177 return false;
178 }
179 }
181 /** Create a scanner from the input buffer. buffer must implement
182 * array() and compact(), and remaining() must be less than limit().
183 */
184 protected Scanner(Factory fac, CharBuffer buffer) {
185 this(fac, JavacFileManager.toArray(buffer), buffer.limit());
186 }
188 /**
189 * Create a scanner from the input array. This method might
190 * modify the array. To avoid copying the input array, ensure
191 * that {@code inputLength < input.length} or
192 * {@code input[input.length -1]} is a white space character.
193 *
194 * @param fac the factory which created this Scanner
195 * @param input the input, might be modified
196 * @param inputLength the size of the input.
197 * Must be positive and less than or equal to input.length.
198 */
199 protected Scanner(Factory fac, char[] input, int inputLength) {
200 this(fac);
201 eofPos = inputLength;
202 if (inputLength == input.length) {
203 if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
204 inputLength--;
205 } else {
206 char[] newInput = new char[inputLength + 1];
207 System.arraycopy(input, 0, newInput, 0, input.length);
208 input = newInput;
209 }
210 }
211 buf = input;
212 buflen = inputLength;
213 buf[buflen] = EOI;
214 bp = -1;
215 scanChar();
216 }
218 /** Report an error at the given position using the provided arguments.
219 */
220 private void lexError(int pos, String key, Object... args) {
221 log.error(pos, key, args);
222 token = ERROR;
223 errPos = pos;
224 }
226 /** Report an error at the current token position using the provided
227 * arguments.
228 */
229 private void lexError(String key, Object... args) {
230 lexError(pos, key, args);
231 }
233 /** Convert an ASCII digit from its base (8, 10, or 16)
234 * to its value.
235 */
236 private int digit(int base) {
237 char c = ch;
238 int result = Character.digit(c, base);
239 if (result >= 0 && c > 0x7f) {
240 lexError(pos+1, "illegal.nonascii.digit");
241 ch = "0123456789abcdef".charAt(result);
242 }
243 return result;
244 }
246 /** Convert unicode escape; bp points to initial '\' character
247 * (Spec 3.3).
248 */
249 private void convertUnicode() {
250 if (ch == '\\' && unicodeConversionBp != bp) {
251 bp++; ch = buf[bp];
252 if (ch == 'u') {
253 do {
254 bp++; ch = buf[bp];
255 } while (ch == 'u');
256 int limit = bp + 3;
257 if (limit < buflen) {
258 int d = digit(16);
259 int code = d;
260 while (bp < limit && d >= 0) {
261 bp++; ch = buf[bp];
262 d = digit(16);
263 code = (code << 4) + d;
264 }
265 if (d >= 0) {
266 ch = (char)code;
267 unicodeConversionBp = bp;
268 return;
269 }
270 }
271 lexError(bp, "illegal.unicode.esc");
272 } else {
273 bp--;
274 ch = '\\';
275 }
276 }
277 }
279 /** Read next character.
280 */
281 private void scanChar() {
282 ch = buf[++bp];
283 if (ch == '\\') {
284 convertUnicode();
285 }
286 }
288 /** Read next character in comment, skipping over double '\' characters.
289 */
290 private void scanCommentChar() {
291 scanChar();
292 if (ch == '\\') {
293 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
294 bp++;
295 } else {
296 convertUnicode();
297 }
298 }
299 }
301 /** Append a character to sbuf.
302 */
303 private void putChar(char ch) {
304 if (sp == sbuf.length) {
305 char[] newsbuf = new char[sbuf.length * 2];
306 System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
307 sbuf = newsbuf;
308 }
309 sbuf[sp++] = ch;
310 }
312 /** For debugging purposes: print character.
313 */
314 private void dch() {
315 System.err.print(ch); System.out.flush();
316 }
318 /** Read next character in character or string literal and copy into sbuf.
319 */
320 private void scanLitChar(boolean forBytecodeName) {
321 if (ch == '\\') {
322 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
323 bp++;
324 putChar('\\');
325 scanChar();
326 } else {
327 scanChar();
328 switch (ch) {
329 case '0': case '1': case '2': case '3':
330 case '4': case '5': case '6': case '7':
331 char leadch = ch;
332 int oct = digit(8);
333 scanChar();
334 if ('0' <= ch && ch <= '7') {
335 oct = oct * 8 + digit(8);
336 scanChar();
337 if (leadch <= '3' && '0' <= ch && ch <= '7') {
338 oct = oct * 8 + digit(8);
339 scanChar();
340 }
341 }
342 putChar((char)oct);
343 break;
344 case 'b':
345 putChar('\b'); scanChar(); break;
346 case 't':
347 putChar('\t'); scanChar(); break;
348 case 'n':
349 putChar('\n'); scanChar(); break;
350 case 'f':
351 putChar('\f'); scanChar(); break;
352 case 'r':
353 putChar('\r'); scanChar(); break;
354 case '\'':
355 putChar('\''); scanChar(); break;
356 case '\"':
357 putChar('\"'); scanChar(); break;
358 case '\\':
359 putChar('\\'); scanChar(); break;
360 case '|': case ',': case '?': case '%':
361 case '^': case '_': case '{': case '}':
362 case '!': case '-': case '=':
363 if (forBytecodeName) {
364 // Accept escape sequences for dangerous bytecode chars.
365 // This is illegal in normal Java string or character literals.
366 // Note that the escape sequence itself is passed through.
367 putChar('\\'); putChar(ch); scanChar();
368 } else {
369 lexError(bp, "illegal.esc.char");
370 }
371 break;
372 default:
373 lexError(bp, "illegal.esc.char");
374 }
375 }
376 } else if (bp != buflen) {
377 putChar(ch); scanChar();
378 }
379 }
380 private void scanLitChar() {
381 scanLitChar(false);
382 }
384 /** Read next character in an exotic name #"foo"
385 */
386 private void scanBytecodeNameChar() {
387 switch (ch) {
388 // reject any "dangerous" char which is illegal somewhere in the JVM spec
389 // cf. http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm
390 case '/': case '.': case ';': // illegal everywhere
391 case '<': case '>': // illegal in methods, dangerous in classes
392 case '[': // illegal in classes
393 lexError(bp, "illegal.bytecode.ident.char", String.valueOf((int)ch));
394 break;
395 }
396 scanLitChar(true);
397 }
399 /** Read fractional part of hexadecimal floating point number.
400 */
401 private void scanHexExponentAndSuffix() {
402 if (ch == 'p' || ch == 'P') {
403 putChar(ch);
404 scanChar();
405 if (ch == '+' || ch == '-') {
406 putChar(ch);
407 scanChar();
408 }
409 if ('0' <= ch && ch <= '9') {
410 do {
411 putChar(ch);
412 scanChar();
413 } while ('0' <= ch && ch <= '9');
414 if (!allowHexFloats) {
415 lexError("unsupported.fp.lit");
416 allowHexFloats = true;
417 }
418 else if (!hexFloatsWork)
419 lexError("unsupported.cross.fp.lit");
420 } else
421 lexError("malformed.fp.lit");
422 } else {
423 lexError("malformed.fp.lit");
424 }
425 if (ch == 'f' || ch == 'F') {
426 putChar(ch);
427 scanChar();
428 token = FLOATLITERAL;
429 } else {
430 if (ch == 'd' || ch == 'D') {
431 putChar(ch);
432 scanChar();
433 }
434 token = DOUBLELITERAL;
435 }
436 }
438 /** Read fractional part of floating point number.
439 */
440 private void scanFraction() {
441 while (digit(10) >= 0) {
442 putChar(ch);
443 scanChar();
444 }
445 int sp1 = sp;
446 if (ch == 'e' || ch == 'E') {
447 putChar(ch);
448 scanChar();
449 if (ch == '+' || ch == '-') {
450 putChar(ch);
451 scanChar();
452 }
453 if ('0' <= ch && ch <= '9') {
454 do {
455 putChar(ch);
456 scanChar();
457 } while ('0' <= ch && ch <= '9');
458 return;
459 }
460 lexError("malformed.fp.lit");
461 sp = sp1;
462 }
463 }
465 /** Read fractional part and 'd' or 'f' suffix of floating point number.
466 */
467 private void scanFractionAndSuffix() {
468 this.radix = 10;
469 scanFraction();
470 if (ch == 'f' || ch == 'F') {
471 putChar(ch);
472 scanChar();
473 token = FLOATLITERAL;
474 } else {
475 if (ch == 'd' || ch == 'D') {
476 putChar(ch);
477 scanChar();
478 }
479 token = DOUBLELITERAL;
480 }
481 }
483 /** Read fractional part and 'd' or 'f' suffix of floating point number.
484 */
485 private void scanHexFractionAndSuffix(boolean seendigit) {
486 this.radix = 16;
487 assert ch == '.';
488 putChar(ch);
489 scanChar();
490 while (digit(16) >= 0) {
491 seendigit = true;
492 putChar(ch);
493 scanChar();
494 }
495 if (!seendigit)
496 lexError("invalid.hex.number");
497 else
498 scanHexExponentAndSuffix();
499 }
501 /** Read a number.
502 * @param radix The radix of the number; one of 8, 10, 16.
503 */
504 private void scanNumber(int radix) {
505 this.radix = radix;
506 // for octal, allow base-10 digit in case it's a float literal
507 int digitRadix = (radix <= 10) ? 10 : 16;
508 boolean seendigit = false;
509 while (digit(digitRadix) >= 0) {
510 seendigit = true;
511 putChar(ch);
512 scanChar();
513 }
514 if (radix == 16 && ch == '.') {
515 scanHexFractionAndSuffix(seendigit);
516 } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
517 scanHexExponentAndSuffix();
518 } else if (radix <= 10 && ch == '.') {
519 putChar(ch);
520 scanChar();
521 scanFractionAndSuffix();
522 } else if (radix <= 10 &&
523 (ch == 'e' || ch == 'E' ||
524 ch == 'f' || ch == 'F' ||
525 ch == 'd' || ch == 'D')) {
526 scanFractionAndSuffix();
527 } else {
528 if (ch == 'l' || ch == 'L') {
529 scanChar();
530 token = LONGLITERAL;
531 } else {
532 token = INTLITERAL;
533 }
534 }
535 }
537 /** Read an identifier.
538 */
539 private void scanIdent() {
540 boolean isJavaIdentifierPart;
541 char high;
542 do {
543 if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
544 // optimization, was: putChar(ch);
546 scanChar();
547 switch (ch) {
548 case 'A': case 'B': case 'C': case 'D': case 'E':
549 case 'F': case 'G': case 'H': case 'I': case 'J':
550 case 'K': case 'L': case 'M': case 'N': case 'O':
551 case 'P': case 'Q': case 'R': case 'S': case 'T':
552 case 'U': case 'V': case 'W': case 'X': case 'Y':
553 case 'Z':
554 case 'a': case 'b': case 'c': case 'd': case 'e':
555 case 'f': case 'g': case 'h': case 'i': case 'j':
556 case 'k': case 'l': case 'm': case 'n': case 'o':
557 case 'p': case 'q': case 'r': case 's': case 't':
558 case 'u': case 'v': case 'w': case 'x': case 'y':
559 case 'z':
560 case '$': case '_':
561 case '0': case '1': case '2': case '3': case '4':
562 case '5': case '6': case '7': case '8': case '9':
563 case '\u0000': case '\u0001': case '\u0002': case '\u0003':
564 case '\u0004': case '\u0005': case '\u0006': case '\u0007':
565 case '\u0008': case '\u000E': case '\u000F': case '\u0010':
566 case '\u0011': case '\u0012': case '\u0013': case '\u0014':
567 case '\u0015': case '\u0016': case '\u0017':
568 case '\u0018': case '\u0019': case '\u001B':
569 case '\u007F':
570 break;
571 case '\u001A': // EOI is also a legal identifier part
572 if (bp >= buflen) {
573 name = names.fromChars(sbuf, 0, sp);
574 token = keywords.key(name);
575 return;
576 }
577 break;
578 default:
579 if (ch < '\u0080') {
580 // all ASCII range chars already handled, above
581 isJavaIdentifierPart = false;
582 } else {
583 high = scanSurrogates();
584 if (high != 0) {
585 if (sp == sbuf.length) {
586 putChar(high);
587 } else {
588 sbuf[sp++] = high;
589 }
590 isJavaIdentifierPart = Character.isJavaIdentifierPart(
591 Character.toCodePoint(high, ch));
592 } else {
593 isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
594 }
595 }
596 if (!isJavaIdentifierPart) {
597 name = names.fromChars(sbuf, 0, sp);
598 token = keywords.key(name);
599 return;
600 }
601 }
602 } while (true);
603 }
605 /** Are surrogates supported?
606 */
607 final static boolean surrogatesSupported = surrogatesSupported();
608 private static boolean surrogatesSupported() {
609 try {
610 Character.isHighSurrogate('a');
611 return true;
612 } catch (NoSuchMethodError ex) {
613 return false;
614 }
615 }
617 /** Scan surrogate pairs. If 'ch' is a high surrogate and
618 * the next character is a low surrogate, then put the low
619 * surrogate in 'ch', and return the high surrogate.
620 * otherwise, just return 0.
621 */
622 private char scanSurrogates() {
623 if (surrogatesSupported && Character.isHighSurrogate(ch)) {
624 char high = ch;
626 scanChar();
628 if (Character.isLowSurrogate(ch)) {
629 return high;
630 }
632 ch = high;
633 }
635 return 0;
636 }
638 /** Return true if ch can be part of an operator.
639 */
640 private boolean isSpecial(char ch) {
641 switch (ch) {
642 case '!': case '%': case '&': case '*': case '?':
643 case '+': case '-': case ':': case '<': case '=':
644 case '>': case '^': case '|': case '~':
645 case '@':
646 return true;
647 default:
648 return false;
649 }
650 }
652 /** Read longest possible sequence of special characters and convert
653 * to token.
654 */
655 private void scanOperator() {
656 while (true) {
657 putChar(ch);
658 Name newname = names.fromChars(sbuf, 0, sp);
659 if (keywords.key(newname) == IDENTIFIER) {
660 sp--;
661 break;
662 }
663 name = newname;
664 token = keywords.key(newname);
665 scanChar();
666 if (!isSpecial(ch)) break;
667 }
668 }
670 /**
671 * Scan a documention comment; determine if a deprecated tag is present.
672 * Called once the initial /, * have been skipped, positioned at the second *
673 * (which is treated as the beginning of the first line).
674 * Stops positioned at the closing '/'.
675 */
676 @SuppressWarnings("fallthrough")
677 private void scanDocComment() {
678 boolean deprecatedPrefix = false;
680 forEachLine:
681 while (bp < buflen) {
683 // Skip optional WhiteSpace at beginning of line
684 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
685 scanCommentChar();
686 }
688 // Skip optional consecutive Stars
689 while (bp < buflen && ch == '*') {
690 scanCommentChar();
691 if (ch == '/') {
692 return;
693 }
694 }
696 // Skip optional WhiteSpace after Stars
697 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
698 scanCommentChar();
699 }
701 deprecatedPrefix = false;
702 // At beginning of line in the JavaDoc sense.
703 if (bp < buflen && ch == '@' && !deprecatedFlag) {
704 scanCommentChar();
705 if (bp < buflen && ch == 'd') {
706 scanCommentChar();
707 if (bp < buflen && ch == 'e') {
708 scanCommentChar();
709 if (bp < buflen && ch == 'p') {
710 scanCommentChar();
711 if (bp < buflen && ch == 'r') {
712 scanCommentChar();
713 if (bp < buflen && ch == 'e') {
714 scanCommentChar();
715 if (bp < buflen && ch == 'c') {
716 scanCommentChar();
717 if (bp < buflen && ch == 'a') {
718 scanCommentChar();
719 if (bp < buflen && ch == 't') {
720 scanCommentChar();
721 if (bp < buflen && ch == 'e') {
722 scanCommentChar();
723 if (bp < buflen && ch == 'd') {
724 deprecatedPrefix = true;
725 scanCommentChar();
726 }}}}}}}}}}}
727 if (deprecatedPrefix && bp < buflen) {
728 if (Character.isWhitespace(ch)) {
729 deprecatedFlag = true;
730 } else if (ch == '*') {
731 scanCommentChar();
732 if (ch == '/') {
733 deprecatedFlag = true;
734 return;
735 }
736 }
737 }
739 // Skip rest of line
740 while (bp < buflen) {
741 switch (ch) {
742 case '*':
743 scanCommentChar();
744 if (ch == '/') {
745 return;
746 }
747 break;
748 case CR: // (Spec 3.4)
749 scanCommentChar();
750 if (ch != LF) {
751 continue forEachLine;
752 }
753 /* fall through to LF case */
754 case LF: // (Spec 3.4)
755 scanCommentChar();
756 continue forEachLine;
757 default:
758 scanCommentChar();
759 }
760 } // rest of line
761 } // forEachLine
762 return;
763 }
765 /** The value of a literal token, recorded as a string.
766 * For integers, leading 0x and 'l' suffixes are suppressed.
767 */
768 public String stringVal() {
769 return new String(sbuf, 0, sp);
770 }
772 /** Read token.
773 */
774 public void nextToken() {
776 try {
777 prevEndPos = endPos;
778 sp = 0;
780 while (true) {
781 pos = bp;
782 switch (ch) {
783 case ' ': // (Spec 3.6)
784 case '\t': // (Spec 3.6)
785 case FF: // (Spec 3.6)
786 do {
787 scanChar();
788 } while (ch == ' ' || ch == '\t' || ch == FF);
789 endPos = bp;
790 processWhiteSpace();
791 break;
792 case LF: // (Spec 3.4)
793 scanChar();
794 endPos = bp;
795 processLineTerminator();
796 break;
797 case CR: // (Spec 3.4)
798 scanChar();
799 if (ch == LF) {
800 scanChar();
801 }
802 endPos = bp;
803 processLineTerminator();
804 break;
805 case 'A': case 'B': case 'C': case 'D': case 'E':
806 case 'F': case 'G': case 'H': case 'I': case 'J':
807 case 'K': case 'L': case 'M': case 'N': case 'O':
808 case 'P': case 'Q': case 'R': case 'S': case 'T':
809 case 'U': case 'V': case 'W': case 'X': case 'Y':
810 case 'Z':
811 case 'a': case 'b': case 'c': case 'd': case 'e':
812 case 'f': case 'g': case 'h': case 'i': case 'j':
813 case 'k': case 'l': case 'm': case 'n': case 'o':
814 case 'p': case 'q': case 'r': case 's': case 't':
815 case 'u': case 'v': case 'w': case 'x': case 'y':
816 case 'z':
817 case '$': case '_':
818 scanIdent();
819 return;
820 case '0':
821 scanChar();
822 if (ch == 'x' || ch == 'X') {
823 scanChar();
824 if (ch == '.') {
825 scanHexFractionAndSuffix(false);
826 } else if (digit(16) < 0) {
827 lexError("invalid.hex.number");
828 } else {
829 scanNumber(16);
830 }
831 } else {
832 putChar('0');
833 scanNumber(8);
834 }
835 return;
836 case '1': case '2': case '3': case '4':
837 case '5': case '6': case '7': case '8': case '9':
838 scanNumber(10);
839 return;
840 case '.':
841 scanChar();
842 if ('0' <= ch && ch <= '9') {
843 putChar('.');
844 scanFractionAndSuffix();
845 } else if (ch == '.') {
846 putChar('.'); putChar('.');
847 scanChar();
848 if (ch == '.') {
849 scanChar();
850 putChar('.');
851 token = ELLIPSIS;
852 } else {
853 lexError("malformed.fp.lit");
854 }
855 } else {
856 token = DOT;
857 }
858 return;
859 case ',':
860 scanChar(); token = COMMA; return;
861 case ';':
862 scanChar(); token = SEMI; return;
863 case '(':
864 scanChar(); token = LPAREN; return;
865 case ')':
866 scanChar(); token = RPAREN; return;
867 case '[':
868 scanChar(); token = LBRACKET; return;
869 case ']':
870 scanChar(); token = RBRACKET; return;
871 case '{':
872 scanChar(); token = LBRACE; return;
873 case '}':
874 scanChar(); token = RBRACE; return;
875 case '/':
876 scanChar();
877 if (ch == '/') {
878 do {
879 scanCommentChar();
880 } while (ch != CR && ch != LF && bp < buflen);
881 if (bp < buflen) {
882 endPos = bp;
883 processComment(CommentStyle.LINE);
884 }
885 break;
886 } else if (ch == '*') {
887 scanChar();
888 CommentStyle style;
889 if (ch == '*') {
890 style = CommentStyle.JAVADOC;
891 scanDocComment();
892 } else {
893 style = CommentStyle.BLOCK;
894 while (bp < buflen) {
895 if (ch == '*') {
896 scanChar();
897 if (ch == '/') break;
898 } else {
899 scanCommentChar();
900 }
901 }
902 }
903 if (ch == '/') {
904 scanChar();
905 endPos = bp;
906 processComment(style);
907 break;
908 } else {
909 lexError("unclosed.comment");
910 return;
911 }
912 } else if (ch == '=') {
913 name = names.slashequals;
914 token = SLASHEQ;
915 scanChar();
916 } else {
917 name = names.slash;
918 token = SLASH;
919 }
920 return;
921 case '\'':
922 scanChar();
923 if (ch == '\'') {
924 lexError("empty.char.lit");
925 } else {
926 if (ch == CR || ch == LF)
927 lexError(pos, "illegal.line.end.in.char.lit");
928 scanLitChar();
929 if (ch == '\'') {
930 scanChar();
931 token = CHARLITERAL;
932 } else {
933 lexError(pos, "unclosed.char.lit");
934 }
935 }
936 return;
937 case '\"':
938 scanChar();
939 while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
940 scanLitChar();
941 if (ch == '\"') {
942 token = STRINGLITERAL;
943 scanChar();
944 } else {
945 lexError(pos, "unclosed.str.lit");
946 }
947 return;
948 case '#':
949 scanChar();
950 if (ch == '\"') {
951 scanChar();
952 if (ch == '\"')
953 lexError(pos, "empty.bytecode.ident");
954 while (ch != '\"' && ch != CR && ch != LF && bp < buflen) {
955 scanBytecodeNameChar();
956 }
957 if (ch == '\"') {
958 name = names.fromChars(sbuf, 0, sp);
959 token = IDENTIFIER; // even if #"int" or #"do"
960 scanChar();
961 } else {
962 lexError(pos, "unclosed.bytecode.ident");
963 }
964 } else {
965 lexError("illegal.char", String.valueOf((int)'#'));
966 }
967 return;
968 default:
969 if (isSpecial(ch)) {
970 scanOperator();
971 } else {
972 boolean isJavaIdentifierStart;
973 if (ch < '\u0080') {
974 // all ASCII range chars already handled, above
975 isJavaIdentifierStart = false;
976 } else {
977 char high = scanSurrogates();
978 if (high != 0) {
979 if (sp == sbuf.length) {
980 putChar(high);
981 } else {
982 sbuf[sp++] = high;
983 }
985 isJavaIdentifierStart = Character.isJavaIdentifierStart(
986 Character.toCodePoint(high, ch));
987 } else {
988 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
989 }
990 }
991 if (isJavaIdentifierStart) {
992 scanIdent();
993 } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
994 token = EOF;
995 pos = bp = eofPos;
996 } else {
997 lexError("illegal.char", String.valueOf((int)ch));
998 scanChar();
999 }
1000 }
1001 return;
1002 }
1003 }
1004 } finally {
1005 endPos = bp;
1006 if (scannerDebug)
1007 System.out.println("nextToken(" + pos
1008 + "," + endPos + ")=|" +
1009 new String(getRawCharacters(pos, endPos))
1010 + "|");
1011 }
1012 }
1014 /** Return the current token, set by nextToken().
1015 */
1016 public Token token() {
1017 return token;
1018 }
1020 /** Sets the current token.
1021 */
1022 public void token(Token token) {
1023 this.token = token;
1024 }
1026 /** Return the current token's position: a 0-based
1027 * offset from beginning of the raw input stream
1028 * (before unicode translation)
1029 */
1030 public int pos() {
1031 return pos;
1032 }
1034 /** Return the last character position of the current token.
1035 */
1036 public int endPos() {
1037 return endPos;
1038 }
1040 /** Return the last character position of the previous token.
1041 */
1042 public int prevEndPos() {
1043 return prevEndPos;
1044 }
1046 /** Return the position where a lexical error occurred;
1047 */
1048 public int errPos() {
1049 return errPos;
1050 }
1052 /** Set the position where a lexical error occurred;
1053 */
1054 public void errPos(int pos) {
1055 errPos = pos;
1056 }
1058 /** Return the name of an identifier or token for the current token.
1059 */
1060 public Name name() {
1061 return name;
1062 }
1064 /** Return the radix of a numeric literal token.
1065 */
1066 public int radix() {
1067 return radix;
1068 }
1070 /** Has a @deprecated been encountered in last doc comment?
1071 * This needs to be reset by client with resetDeprecatedFlag.
1072 */
1073 public boolean deprecatedFlag() {
1074 return deprecatedFlag;
1075 }
1077 public void resetDeprecatedFlag() {
1078 deprecatedFlag = false;
1079 }
1081 /**
1082 * Returns the documentation string of the current token.
1083 */
1084 public String docComment() {
1085 return null;
1086 }
1088 /**
1089 * Returns a copy of the input buffer, up to its inputLength.
1090 * Unicode escape sequences are not translated.
1091 */
1092 public char[] getRawCharacters() {
1093 char[] chars = new char[buflen];
1094 System.arraycopy(buf, 0, chars, 0, buflen);
1095 return chars;
1096 }
1098 /**
1099 * Returns a copy of a character array subset of the input buffer.
1100 * The returned array begins at the <code>beginIndex</code> and
1101 * extends to the character at index <code>endIndex - 1</code>.
1102 * Thus the length of the substring is <code>endIndex-beginIndex</code>.
1103 * This behavior is like
1104 * <code>String.substring(beginIndex, endIndex)</code>.
1105 * Unicode escape sequences are not translated.
1106 *
1107 * @param beginIndex the beginning index, inclusive.
1108 * @param endIndex the ending index, exclusive.
1109 * @throws IndexOutOfBounds if either offset is outside of the
1110 * array bounds
1111 */
1112 public char[] getRawCharacters(int beginIndex, int endIndex) {
1113 int length = endIndex - beginIndex;
1114 char[] chars = new char[length];
1115 System.arraycopy(buf, beginIndex, chars, 0, length);
1116 return chars;
1117 }
1119 public enum CommentStyle {
1120 LINE,
1121 BLOCK,
1122 JAVADOC,
1123 }
1125 /**
1126 * Called when a complete comment has been scanned. pos and endPos
1127 * will mark the comment boundary.
1128 */
1129 protected void processComment(CommentStyle style) {
1130 if (scannerDebug)
1131 System.out.println("processComment(" + pos
1132 + "," + endPos + "," + style + ")=|"
1133 + new String(getRawCharacters(pos, endPos))
1134 + "|");
1135 }
1137 /**
1138 * Called when a complete whitespace run has been scanned. pos and endPos
1139 * will mark the whitespace boundary.
1140 */
1141 protected void processWhiteSpace() {
1142 if (scannerDebug)
1143 System.out.println("processWhitespace(" + pos
1144 + "," + endPos + ")=|" +
1145 new String(getRawCharacters(pos, endPos))
1146 + "|");
1147 }
1149 /**
1150 * Called when a line terminator has been processed.
1151 */
1152 protected void processLineTerminator() {
1153 if (scannerDebug)
1154 System.out.println("processTerminator(" + pos
1155 + "," + endPos + ")=|" +
1156 new String(getRawCharacters(pos, endPos))
1157 + "|");
1158 }
1160 /** Build a map for translating between line numbers and
1161 * positions in the input.
1162 *
1163 * @return a LineMap */
1164 public Position.LineMap getLineMap() {
1165 return Position.makeLineMap(buf, buflen, false);
1166 }
1168 }