Tue, 16 Sep 2008 18:35:18 -0700
6574134: Allow for alternative implementation of Name Table with garbage collection of name bytes
Reviewed-by: darcy, mcimadamore
1 /*
2 * Copyright 1999-2008 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
26 package com.sun.tools.javac.parser;
28 import java.nio.*;
30 import com.sun.tools.javac.code.Source;
31 import com.sun.tools.javac.file.JavacFileManager;
32 import com.sun.tools.javac.util.*;
35 import static com.sun.tools.javac.parser.Token.*;
36 import static com.sun.tools.javac.util.LayoutCharacters.*;
38 /** The lexical analyzer maps an input stream consisting of
39 * ASCII characters and Unicode escapes into a token sequence.
40 *
41 * <p><b>This is NOT part of any API supported by Sun Microsystems. If
42 * you write code that depends on this, you do so at your own risk.
43 * This code and its internal interfaces are subject to change or
44 * deletion without notice.</b>
45 */
46 public class Scanner implements Lexer {
48 private static boolean scannerDebug = false;
50 /** A factory for creating scanners. */
51 public static class Factory {
52 /** The context key for the scanner factory. */
53 public static final Context.Key<Scanner.Factory> scannerFactoryKey =
54 new Context.Key<Scanner.Factory>();
56 /** Get the Factory instance for this context. */
57 public static Factory instance(Context context) {
58 Factory instance = context.get(scannerFactoryKey);
59 if (instance == null)
60 instance = new Factory(context);
61 return instance;
62 }
64 final Log log;
65 final Names names;
66 final Source source;
67 final Keywords keywords;
69 /** Create a new scanner factory. */
70 protected Factory(Context context) {
71 context.put(scannerFactoryKey, this);
72 this.log = Log.instance(context);
73 this.names = Names.instance(context);
74 this.source = Source.instance(context);
75 this.keywords = Keywords.instance(context);
76 }
78 public Scanner newScanner(CharSequence input) {
79 if (input instanceof CharBuffer) {
80 return new Scanner(this, (CharBuffer)input);
81 } else {
82 char[] array = input.toString().toCharArray();
83 return newScanner(array, array.length);
84 }
85 }
87 public Scanner newScanner(char[] input, int inputLength) {
88 return new Scanner(this, input, inputLength);
89 }
90 }
92 /* Output variables; set by nextToken():
93 */
95 /** The token, set by nextToken().
96 */
97 private Token token;
99 /** Allow hex floating-point literals.
100 */
101 private boolean allowHexFloats;
103 /** The token's position, 0-based offset from beginning of text.
104 */
105 private int pos;
107 /** Character position just after the last character of the token.
108 */
109 private int endPos;
111 /** The last character position of the previous token.
112 */
113 private int prevEndPos;
115 /** The position where a lexical error occurred;
116 */
117 private int errPos = Position.NOPOS;
119 /** The name of an identifier or token:
120 */
121 private Name name;
123 /** The radix of a numeric literal token.
124 */
125 private int radix;
127 /** Has a @deprecated been encountered in last doc comment?
128 * this needs to be reset by client.
129 */
130 protected boolean deprecatedFlag = false;
132 /** A character buffer for literals.
133 */
134 private char[] sbuf = new char[128];
135 private int sp;
137 /** The input buffer, index of next chacter to be read,
138 * index of one past last character in buffer.
139 */
140 private char[] buf;
141 private int bp;
142 private int buflen;
143 private int eofPos;
145 /** The current character.
146 */
147 private char ch;
149 /** The buffer index of the last converted unicode character
150 */
151 private int unicodeConversionBp = -1;
153 /** The log to be used for error reporting.
154 */
155 private final Log log;
157 /** The name table. */
158 private final Names names;
160 /** The keyword table. */
161 private final Keywords keywords;
163 /** Common code for constructors. */
164 private Scanner(Factory fac) {
165 this.log = fac.log;
166 this.names = fac.names;
167 this.keywords = fac.keywords;
168 this.allowHexFloats = fac.source.allowHexFloats();
169 }
171 private static final boolean hexFloatsWork = hexFloatsWork();
172 private static boolean hexFloatsWork() {
173 try {
174 Float.valueOf("0x1.0p1");
175 return true;
176 } catch (NumberFormatException ex) {
177 return false;
178 }
179 }
181 /** Create a scanner from the input buffer. buffer must implement
182 * array() and compact(), and remaining() must be less than limit().
183 */
184 protected Scanner(Factory fac, CharBuffer buffer) {
185 this(fac, JavacFileManager.toArray(buffer), buffer.limit());
186 }
188 /**
189 * Create a scanner from the input array. This method might
190 * modify the array. To avoid copying the input array, ensure
191 * that {@code inputLength < input.length} or
192 * {@code input[input.length -1]} is a white space character.
193 *
194 * @param fac the factory which created this Scanner
195 * @param input the input, might be modified
196 * @param inputLength the size of the input.
197 * Must be positive and less than or equal to input.length.
198 */
199 protected Scanner(Factory fac, char[] input, int inputLength) {
200 this(fac);
201 eofPos = inputLength;
202 if (inputLength == input.length) {
203 if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
204 inputLength--;
205 } else {
206 char[] newInput = new char[inputLength + 1];
207 System.arraycopy(input, 0, newInput, 0, input.length);
208 input = newInput;
209 }
210 }
211 buf = input;
212 buflen = inputLength;
213 buf[buflen] = EOI;
214 bp = -1;
215 scanChar();
216 }
218 /** Report an error at the given position using the provided arguments.
219 */
220 private void lexError(int pos, String key, Object... args) {
221 log.error(pos, key, args);
222 token = ERROR;
223 errPos = pos;
224 }
226 /** Report an error at the current token position using the provided
227 * arguments.
228 */
229 private void lexError(String key, Object... args) {
230 lexError(pos, key, args);
231 }
233 /** Convert an ASCII digit from its base (8, 10, or 16)
234 * to its value.
235 */
236 private int digit(int base) {
237 char c = ch;
238 int result = Character.digit(c, base);
239 if (result >= 0 && c > 0x7f) {
240 lexError(pos+1, "illegal.nonascii.digit");
241 ch = "0123456789abcdef".charAt(result);
242 }
243 return result;
244 }
246 /** Convert unicode escape; bp points to initial '\' character
247 * (Spec 3.3).
248 */
249 private void convertUnicode() {
250 if (ch == '\\' && unicodeConversionBp != bp) {
251 bp++; ch = buf[bp];
252 if (ch == 'u') {
253 do {
254 bp++; ch = buf[bp];
255 } while (ch == 'u');
256 int limit = bp + 3;
257 if (limit < buflen) {
258 int d = digit(16);
259 int code = d;
260 while (bp < limit && d >= 0) {
261 bp++; ch = buf[bp];
262 d = digit(16);
263 code = (code << 4) + d;
264 }
265 if (d >= 0) {
266 ch = (char)code;
267 unicodeConversionBp = bp;
268 return;
269 }
270 }
271 lexError(bp, "illegal.unicode.esc");
272 } else {
273 bp--;
274 ch = '\\';
275 }
276 }
277 }
279 /** Read next character.
280 */
281 private void scanChar() {
282 ch = buf[++bp];
283 if (ch == '\\') {
284 convertUnicode();
285 }
286 }
288 /** Read next character in comment, skipping over double '\' characters.
289 */
290 private void scanCommentChar() {
291 scanChar();
292 if (ch == '\\') {
293 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
294 bp++;
295 } else {
296 convertUnicode();
297 }
298 }
299 }
301 /** Append a character to sbuf.
302 */
303 private void putChar(char ch) {
304 if (sp == sbuf.length) {
305 char[] newsbuf = new char[sbuf.length * 2];
306 System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
307 sbuf = newsbuf;
308 }
309 sbuf[sp++] = ch;
310 }
312 /** For debugging purposes: print character.
313 */
314 private void dch() {
315 System.err.print(ch); System.out.flush();
316 }
318 /** Read next character in character or string literal and copy into sbuf.
319 */
320 private void scanLitChar() {
321 if (ch == '\\') {
322 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
323 bp++;
324 putChar('\\');
325 scanChar();
326 } else {
327 scanChar();
328 switch (ch) {
329 case '0': case '1': case '2': case '3':
330 case '4': case '5': case '6': case '7':
331 char leadch = ch;
332 int oct = digit(8);
333 scanChar();
334 if ('0' <= ch && ch <= '7') {
335 oct = oct * 8 + digit(8);
336 scanChar();
337 if (leadch <= '3' && '0' <= ch && ch <= '7') {
338 oct = oct * 8 + digit(8);
339 scanChar();
340 }
341 }
342 putChar((char)oct);
343 break;
344 case 'b':
345 putChar('\b'); scanChar(); break;
346 case 't':
347 putChar('\t'); scanChar(); break;
348 case 'n':
349 putChar('\n'); scanChar(); break;
350 case 'f':
351 putChar('\f'); scanChar(); break;
352 case 'r':
353 putChar('\r'); scanChar(); break;
354 case '\'':
355 putChar('\''); scanChar(); break;
356 case '\"':
357 putChar('\"'); scanChar(); break;
358 case '\\':
359 putChar('\\'); scanChar(); break;
360 default:
361 lexError(bp, "illegal.esc.char");
362 }
363 }
364 } else if (bp != buflen) {
365 putChar(ch); scanChar();
366 }
367 }
369 /** Read fractional part of hexadecimal floating point number.
370 */
371 private void scanHexExponentAndSuffix() {
372 if (ch == 'p' || ch == 'P') {
373 putChar(ch);
374 scanChar();
375 if (ch == '+' || ch == '-') {
376 putChar(ch);
377 scanChar();
378 }
379 if ('0' <= ch && ch <= '9') {
380 do {
381 putChar(ch);
382 scanChar();
383 } while ('0' <= ch && ch <= '9');
384 if (!allowHexFloats) {
385 lexError("unsupported.fp.lit");
386 allowHexFloats = true;
387 }
388 else if (!hexFloatsWork)
389 lexError("unsupported.cross.fp.lit");
390 } else
391 lexError("malformed.fp.lit");
392 } else {
393 lexError("malformed.fp.lit");
394 }
395 if (ch == 'f' || ch == 'F') {
396 putChar(ch);
397 scanChar();
398 token = FLOATLITERAL;
399 } else {
400 if (ch == 'd' || ch == 'D') {
401 putChar(ch);
402 scanChar();
403 }
404 token = DOUBLELITERAL;
405 }
406 }
408 /** Read fractional part of floating point number.
409 */
410 private void scanFraction() {
411 while (digit(10) >= 0) {
412 putChar(ch);
413 scanChar();
414 }
415 int sp1 = sp;
416 if (ch == 'e' || ch == 'E') {
417 putChar(ch);
418 scanChar();
419 if (ch == '+' || ch == '-') {
420 putChar(ch);
421 scanChar();
422 }
423 if ('0' <= ch && ch <= '9') {
424 do {
425 putChar(ch);
426 scanChar();
427 } while ('0' <= ch && ch <= '9');
428 return;
429 }
430 lexError("malformed.fp.lit");
431 sp = sp1;
432 }
433 }
435 /** Read fractional part and 'd' or 'f' suffix of floating point number.
436 */
437 private void scanFractionAndSuffix() {
438 this.radix = 10;
439 scanFraction();
440 if (ch == 'f' || ch == 'F') {
441 putChar(ch);
442 scanChar();
443 token = FLOATLITERAL;
444 } else {
445 if (ch == 'd' || ch == 'D') {
446 putChar(ch);
447 scanChar();
448 }
449 token = DOUBLELITERAL;
450 }
451 }
453 /** Read fractional part and 'd' or 'f' suffix of floating point number.
454 */
455 private void scanHexFractionAndSuffix(boolean seendigit) {
456 this.radix = 16;
457 assert ch == '.';
458 putChar(ch);
459 scanChar();
460 while (digit(16) >= 0) {
461 seendigit = true;
462 putChar(ch);
463 scanChar();
464 }
465 if (!seendigit)
466 lexError("invalid.hex.number");
467 else
468 scanHexExponentAndSuffix();
469 }
471 /** Read a number.
472 * @param radix The radix of the number; one of 8, 10, 16.
473 */
474 private void scanNumber(int radix) {
475 this.radix = radix;
476 // for octal, allow base-10 digit in case it's a float literal
477 int digitRadix = (radix <= 10) ? 10 : 16;
478 boolean seendigit = false;
479 while (digit(digitRadix) >= 0) {
480 seendigit = true;
481 putChar(ch);
482 scanChar();
483 }
484 if (radix == 16 && ch == '.') {
485 scanHexFractionAndSuffix(seendigit);
486 } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
487 scanHexExponentAndSuffix();
488 } else if (radix <= 10 && ch == '.') {
489 putChar(ch);
490 scanChar();
491 scanFractionAndSuffix();
492 } else if (radix <= 10 &&
493 (ch == 'e' || ch == 'E' ||
494 ch == 'f' || ch == 'F' ||
495 ch == 'd' || ch == 'D')) {
496 scanFractionAndSuffix();
497 } else {
498 if (ch == 'l' || ch == 'L') {
499 scanChar();
500 token = LONGLITERAL;
501 } else {
502 token = INTLITERAL;
503 }
504 }
505 }
507 /** Read an identifier.
508 */
509 private void scanIdent() {
510 boolean isJavaIdentifierPart;
511 char high;
512 do {
513 if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
514 // optimization, was: putChar(ch);
516 scanChar();
517 switch (ch) {
518 case 'A': case 'B': case 'C': case 'D': case 'E':
519 case 'F': case 'G': case 'H': case 'I': case 'J':
520 case 'K': case 'L': case 'M': case 'N': case 'O':
521 case 'P': case 'Q': case 'R': case 'S': case 'T':
522 case 'U': case 'V': case 'W': case 'X': case 'Y':
523 case 'Z':
524 case 'a': case 'b': case 'c': case 'd': case 'e':
525 case 'f': case 'g': case 'h': case 'i': case 'j':
526 case 'k': case 'l': case 'm': case 'n': case 'o':
527 case 'p': case 'q': case 'r': case 's': case 't':
528 case 'u': case 'v': case 'w': case 'x': case 'y':
529 case 'z':
530 case '$': case '_':
531 case '0': case '1': case '2': case '3': case '4':
532 case '5': case '6': case '7': case '8': case '9':
533 case '\u0000': case '\u0001': case '\u0002': case '\u0003':
534 case '\u0004': case '\u0005': case '\u0006': case '\u0007':
535 case '\u0008': case '\u000E': case '\u000F': case '\u0010':
536 case '\u0011': case '\u0012': case '\u0013': case '\u0014':
537 case '\u0015': case '\u0016': case '\u0017':
538 case '\u0018': case '\u0019': case '\u001B':
539 case '\u007F':
540 break;
541 case '\u001A': // EOI is also a legal identifier part
542 if (bp >= buflen) {
543 name = names.fromChars(sbuf, 0, sp);
544 token = keywords.key(name);
545 return;
546 }
547 break;
548 default:
549 if (ch < '\u0080') {
550 // all ASCII range chars already handled, above
551 isJavaIdentifierPart = false;
552 } else {
553 high = scanSurrogates();
554 if (high != 0) {
555 if (sp == sbuf.length) {
556 putChar(high);
557 } else {
558 sbuf[sp++] = high;
559 }
560 isJavaIdentifierPart = Character.isJavaIdentifierPart(
561 Character.toCodePoint(high, ch));
562 } else {
563 isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
564 }
565 }
566 if (!isJavaIdentifierPart) {
567 name = names.fromChars(sbuf, 0, sp);
568 token = keywords.key(name);
569 return;
570 }
571 }
572 } while (true);
573 }
575 /** Are surrogates supported?
576 */
577 final static boolean surrogatesSupported = surrogatesSupported();
578 private static boolean surrogatesSupported() {
579 try {
580 Character.isHighSurrogate('a');
581 return true;
582 } catch (NoSuchMethodError ex) {
583 return false;
584 }
585 }
587 /** Scan surrogate pairs. If 'ch' is a high surrogate and
588 * the next character is a low surrogate, then put the low
589 * surrogate in 'ch', and return the high surrogate.
590 * otherwise, just return 0.
591 */
592 private char scanSurrogates() {
593 if (surrogatesSupported && Character.isHighSurrogate(ch)) {
594 char high = ch;
596 scanChar();
598 if (Character.isLowSurrogate(ch)) {
599 return high;
600 }
602 ch = high;
603 }
605 return 0;
606 }
608 /** Return true if ch can be part of an operator.
609 */
610 private boolean isSpecial(char ch) {
611 switch (ch) {
612 case '!': case '%': case '&': case '*': case '?':
613 case '+': case '-': case ':': case '<': case '=':
614 case '>': case '^': case '|': case '~':
615 case '@':
616 return true;
617 default:
618 return false;
619 }
620 }
622 /** Read longest possible sequence of special characters and convert
623 * to token.
624 */
625 private void scanOperator() {
626 while (true) {
627 putChar(ch);
628 Name newname = names.fromChars(sbuf, 0, sp);
629 if (keywords.key(newname) == IDENTIFIER) {
630 sp--;
631 break;
632 }
633 name = newname;
634 token = keywords.key(newname);
635 scanChar();
636 if (!isSpecial(ch)) break;
637 }
638 }
640 /**
641 * Scan a documention comment; determine if a deprecated tag is present.
642 * Called once the initial /, * have been skipped, positioned at the second *
643 * (which is treated as the beginning of the first line).
644 * Stops positioned at the closing '/'.
645 */
646 @SuppressWarnings("fallthrough")
647 private void scanDocComment() {
648 boolean deprecatedPrefix = false;
650 forEachLine:
651 while (bp < buflen) {
653 // Skip optional WhiteSpace at beginning of line
654 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
655 scanCommentChar();
656 }
658 // Skip optional consecutive Stars
659 while (bp < buflen && ch == '*') {
660 scanCommentChar();
661 if (ch == '/') {
662 return;
663 }
664 }
666 // Skip optional WhiteSpace after Stars
667 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
668 scanCommentChar();
669 }
671 deprecatedPrefix = false;
672 // At beginning of line in the JavaDoc sense.
673 if (bp < buflen && ch == '@' && !deprecatedFlag) {
674 scanCommentChar();
675 if (bp < buflen && ch == 'd') {
676 scanCommentChar();
677 if (bp < buflen && ch == 'e') {
678 scanCommentChar();
679 if (bp < buflen && ch == 'p') {
680 scanCommentChar();
681 if (bp < buflen && ch == 'r') {
682 scanCommentChar();
683 if (bp < buflen && ch == 'e') {
684 scanCommentChar();
685 if (bp < buflen && ch == 'c') {
686 scanCommentChar();
687 if (bp < buflen && ch == 'a') {
688 scanCommentChar();
689 if (bp < buflen && ch == 't') {
690 scanCommentChar();
691 if (bp < buflen && ch == 'e') {
692 scanCommentChar();
693 if (bp < buflen && ch == 'd') {
694 deprecatedPrefix = true;
695 scanCommentChar();
696 }}}}}}}}}}}
697 if (deprecatedPrefix && bp < buflen) {
698 if (Character.isWhitespace(ch)) {
699 deprecatedFlag = true;
700 } else if (ch == '*') {
701 scanCommentChar();
702 if (ch == '/') {
703 deprecatedFlag = true;
704 return;
705 }
706 }
707 }
709 // Skip rest of line
710 while (bp < buflen) {
711 switch (ch) {
712 case '*':
713 scanCommentChar();
714 if (ch == '/') {
715 return;
716 }
717 break;
718 case CR: // (Spec 3.4)
719 scanCommentChar();
720 if (ch != LF) {
721 continue forEachLine;
722 }
723 /* fall through to LF case */
724 case LF: // (Spec 3.4)
725 scanCommentChar();
726 continue forEachLine;
727 default:
728 scanCommentChar();
729 }
730 } // rest of line
731 } // forEachLine
732 return;
733 }
735 /** The value of a literal token, recorded as a string.
736 * For integers, leading 0x and 'l' suffixes are suppressed.
737 */
738 public String stringVal() {
739 return new String(sbuf, 0, sp);
740 }
742 /** Read token.
743 */
744 public void nextToken() {
746 try {
747 prevEndPos = endPos;
748 sp = 0;
750 while (true) {
751 pos = bp;
752 switch (ch) {
753 case ' ': // (Spec 3.6)
754 case '\t': // (Spec 3.6)
755 case FF: // (Spec 3.6)
756 do {
757 scanChar();
758 } while (ch == ' ' || ch == '\t' || ch == FF);
759 endPos = bp;
760 processWhiteSpace();
761 break;
762 case LF: // (Spec 3.4)
763 scanChar();
764 endPos = bp;
765 processLineTerminator();
766 break;
767 case CR: // (Spec 3.4)
768 scanChar();
769 if (ch == LF) {
770 scanChar();
771 }
772 endPos = bp;
773 processLineTerminator();
774 break;
775 case 'A': case 'B': case 'C': case 'D': case 'E':
776 case 'F': case 'G': case 'H': case 'I': case 'J':
777 case 'K': case 'L': case 'M': case 'N': case 'O':
778 case 'P': case 'Q': case 'R': case 'S': case 'T':
779 case 'U': case 'V': case 'W': case 'X': case 'Y':
780 case 'Z':
781 case 'a': case 'b': case 'c': case 'd': case 'e':
782 case 'f': case 'g': case 'h': case 'i': case 'j':
783 case 'k': case 'l': case 'm': case 'n': case 'o':
784 case 'p': case 'q': case 'r': case 's': case 't':
785 case 'u': case 'v': case 'w': case 'x': case 'y':
786 case 'z':
787 case '$': case '_':
788 scanIdent();
789 return;
790 case '0':
791 scanChar();
792 if (ch == 'x' || ch == 'X') {
793 scanChar();
794 if (ch == '.') {
795 scanHexFractionAndSuffix(false);
796 } else if (digit(16) < 0) {
797 lexError("invalid.hex.number");
798 } else {
799 scanNumber(16);
800 }
801 } else {
802 putChar('0');
803 scanNumber(8);
804 }
805 return;
806 case '1': case '2': case '3': case '4':
807 case '5': case '6': case '7': case '8': case '9':
808 scanNumber(10);
809 return;
810 case '.':
811 scanChar();
812 if ('0' <= ch && ch <= '9') {
813 putChar('.');
814 scanFractionAndSuffix();
815 } else if (ch == '.') {
816 putChar('.'); putChar('.');
817 scanChar();
818 if (ch == '.') {
819 scanChar();
820 putChar('.');
821 token = ELLIPSIS;
822 } else {
823 lexError("malformed.fp.lit");
824 }
825 } else {
826 token = DOT;
827 }
828 return;
829 case ',':
830 scanChar(); token = COMMA; return;
831 case ';':
832 scanChar(); token = SEMI; return;
833 case '(':
834 scanChar(); token = LPAREN; return;
835 case ')':
836 scanChar(); token = RPAREN; return;
837 case '[':
838 scanChar(); token = LBRACKET; return;
839 case ']':
840 scanChar(); token = RBRACKET; return;
841 case '{':
842 scanChar(); token = LBRACE; return;
843 case '}':
844 scanChar(); token = RBRACE; return;
845 case '/':
846 scanChar();
847 if (ch == '/') {
848 do {
849 scanCommentChar();
850 } while (ch != CR && ch != LF && bp < buflen);
851 if (bp < buflen) {
852 endPos = bp;
853 processComment(CommentStyle.LINE);
854 }
855 break;
856 } else if (ch == '*') {
857 scanChar();
858 CommentStyle style;
859 if (ch == '*') {
860 style = CommentStyle.JAVADOC;
861 scanDocComment();
862 } else {
863 style = CommentStyle.BLOCK;
864 while (bp < buflen) {
865 if (ch == '*') {
866 scanChar();
867 if (ch == '/') break;
868 } else {
869 scanCommentChar();
870 }
871 }
872 }
873 if (ch == '/') {
874 scanChar();
875 endPos = bp;
876 processComment(style);
877 break;
878 } else {
879 lexError("unclosed.comment");
880 return;
881 }
882 } else if (ch == '=') {
883 name = names.slashequals;
884 token = SLASHEQ;
885 scanChar();
886 } else {
887 name = names.slash;
888 token = SLASH;
889 }
890 return;
891 case '\'':
892 scanChar();
893 if (ch == '\'') {
894 lexError("empty.char.lit");
895 } else {
896 if (ch == CR || ch == LF)
897 lexError(pos, "illegal.line.end.in.char.lit");
898 scanLitChar();
899 if (ch == '\'') {
900 scanChar();
901 token = CHARLITERAL;
902 } else {
903 lexError(pos, "unclosed.char.lit");
904 }
905 }
906 return;
907 case '\"':
908 scanChar();
909 while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
910 scanLitChar();
911 if (ch == '\"') {
912 token = STRINGLITERAL;
913 scanChar();
914 } else {
915 lexError(pos, "unclosed.str.lit");
916 }
917 return;
918 default:
919 if (isSpecial(ch)) {
920 scanOperator();
921 } else {
922 boolean isJavaIdentifierStart;
923 if (ch < '\u0080') {
924 // all ASCII range chars already handled, above
925 isJavaIdentifierStart = false;
926 } else {
927 char high = scanSurrogates();
928 if (high != 0) {
929 if (sp == sbuf.length) {
930 putChar(high);
931 } else {
932 sbuf[sp++] = high;
933 }
935 isJavaIdentifierStart = Character.isJavaIdentifierStart(
936 Character.toCodePoint(high, ch));
937 } else {
938 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
939 }
940 }
941 if (isJavaIdentifierStart) {
942 scanIdent();
943 } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
944 token = EOF;
945 pos = bp = eofPos;
946 } else {
947 lexError("illegal.char", String.valueOf((int)ch));
948 scanChar();
949 }
950 }
951 return;
952 }
953 }
954 } finally {
955 endPos = bp;
956 if (scannerDebug)
957 System.out.println("nextToken(" + pos
958 + "," + endPos + ")=|" +
959 new String(getRawCharacters(pos, endPos))
960 + "|");
961 }
962 }
964 /** Return the current token, set by nextToken().
965 */
966 public Token token() {
967 return token;
968 }
970 /** Sets the current token.
971 */
972 public void token(Token token) {
973 this.token = token;
974 }
976 /** Return the current token's position: a 0-based
977 * offset from beginning of the raw input stream
978 * (before unicode translation)
979 */
980 public int pos() {
981 return pos;
982 }
984 /** Return the last character position of the current token.
985 */
986 public int endPos() {
987 return endPos;
988 }
990 /** Return the last character position of the previous token.
991 */
992 public int prevEndPos() {
993 return prevEndPos;
994 }
996 /** Return the position where a lexical error occurred;
997 */
998 public int errPos() {
999 return errPos;
1000 }
1002 /** Set the position where a lexical error occurred;
1003 */
1004 public void errPos(int pos) {
1005 errPos = pos;
1006 }
1008 /** Return the name of an identifier or token for the current token.
1009 */
1010 public Name name() {
1011 return name;
1012 }
1014 /** Return the radix of a numeric literal token.
1015 */
1016 public int radix() {
1017 return radix;
1018 }
1020 /** Has a @deprecated been encountered in last doc comment?
1021 * This needs to be reset by client with resetDeprecatedFlag.
1022 */
1023 public boolean deprecatedFlag() {
1024 return deprecatedFlag;
1025 }
1027 public void resetDeprecatedFlag() {
1028 deprecatedFlag = false;
1029 }
1031 /**
1032 * Returns the documentation string of the current token.
1033 */
1034 public String docComment() {
1035 return null;
1036 }
1038 /**
1039 * Returns a copy of the input buffer, up to its inputLength.
1040 * Unicode escape sequences are not translated.
1041 */
1042 public char[] getRawCharacters() {
1043 char[] chars = new char[buflen];
1044 System.arraycopy(buf, 0, chars, 0, buflen);
1045 return chars;
1046 }
1048 /**
1049 * Returns a copy of a character array subset of the input buffer.
1050 * The returned array begins at the <code>beginIndex</code> and
1051 * extends to the character at index <code>endIndex - 1</code>.
1052 * Thus the length of the substring is <code>endIndex-beginIndex</code>.
1053 * This behavior is like
1054 * <code>String.substring(beginIndex, endIndex)</code>.
1055 * Unicode escape sequences are not translated.
1056 *
1057 * @param beginIndex the beginning index, inclusive.
1058 * @param endIndex the ending index, exclusive.
1059 * @throws IndexOutOfBounds if either offset is outside of the
1060 * array bounds
1061 */
1062 public char[] getRawCharacters(int beginIndex, int endIndex) {
1063 int length = endIndex - beginIndex;
1064 char[] chars = new char[length];
1065 System.arraycopy(buf, beginIndex, chars, 0, length);
1066 return chars;
1067 }
1069 public enum CommentStyle {
1070 LINE,
1071 BLOCK,
1072 JAVADOC,
1073 }
1075 /**
1076 * Called when a complete comment has been scanned. pos and endPos
1077 * will mark the comment boundary.
1078 */
1079 protected void processComment(CommentStyle style) {
1080 if (scannerDebug)
1081 System.out.println("processComment(" + pos
1082 + "," + endPos + "," + style + ")=|"
1083 + new String(getRawCharacters(pos, endPos))
1084 + "|");
1085 }
1087 /**
1088 * Called when a complete whitespace run has been scanned. pos and endPos
1089 * will mark the whitespace boundary.
1090 */
1091 protected void processWhiteSpace() {
1092 if (scannerDebug)
1093 System.out.println("processWhitespace(" + pos
1094 + "," + endPos + ")=|" +
1095 new String(getRawCharacters(pos, endPos))
1096 + "|");
1097 }
1099 /**
1100 * Called when a line terminator has been processed.
1101 */
1102 protected void processLineTerminator() {
1103 if (scannerDebug)
1104 System.out.println("processTerminator(" + pos
1105 + "," + endPos + ")=|" +
1106 new String(getRawCharacters(pos, endPos))
1107 + "|");
1108 }
1110 /** Build a map for translating between line numbers and
1111 * positions in the input.
1112 *
1113 * @return a LineMap */
1114 public Position.LineMap getLineMap() {
1115 return Position.makeLineMap(buf, buflen, false);
1116 }
1118 }