Tue, 28 Dec 2010 15:54:52 -0800
6962318: Update copyright year
Reviewed-by: xdono
1 /*
2 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
26 package com.sun.tools.javac.parser;
28 import java.nio.*;
30 import com.sun.tools.javac.code.Source;
31 import com.sun.tools.javac.file.JavacFileManager;
32 import com.sun.tools.javac.util.*;
35 import static com.sun.tools.javac.parser.Token.*;
36 import static com.sun.tools.javac.util.LayoutCharacters.*;
38 /** The lexical analyzer maps an input stream consisting of
39 * ASCII characters and Unicode escapes into a token sequence.
40 *
41 * <p><b>This is NOT part of any supported API.
42 * If you write code that depends on this, you do so at your own risk.
43 * This code and its internal interfaces are subject to change or
44 * deletion without notice.</b>
45 */
46 public class Scanner implements Lexer {
48 private static boolean scannerDebug = false;
50 /* Output variables; set by nextToken():
51 */
53 /** The token, set by nextToken().
54 */
55 private Token token;
57 /** Allow hex floating-point literals.
58 */
59 private boolean allowHexFloats;
61 /** Allow binary literals.
62 */
63 private boolean allowBinaryLiterals;
65 /** Allow underscores in literals.
66 */
67 private boolean allowUnderscoresInLiterals;
69 /** The source language setting.
70 */
71 private Source source;
73 /** The token's position, 0-based offset from beginning of text.
74 */
75 private int pos;
77 /** Character position just after the last character of the token.
78 */
79 private int endPos;
81 /** The last character position of the previous token.
82 */
83 private int prevEndPos;
85 /** The position where a lexical error occurred;
86 */
87 private int errPos = Position.NOPOS;
89 /** The name of an identifier or token:
90 */
91 private Name name;
93 /** The radix of a numeric literal token.
94 */
95 private int radix;
97 /** Has a @deprecated been encountered in last doc comment?
98 * this needs to be reset by client.
99 */
100 protected boolean deprecatedFlag = false;
102 /** A character buffer for literals.
103 */
104 private char[] sbuf = new char[128];
105 private int sp;
107 /** The input buffer, index of next chacter to be read,
108 * index of one past last character in buffer.
109 */
110 private char[] buf;
111 private int bp;
112 private int buflen;
113 private int eofPos;
115 /** The current character.
116 */
117 private char ch;
119 /** The buffer index of the last converted unicode character
120 */
121 private int unicodeConversionBp = -1;
123 /** The log to be used for error reporting.
124 */
125 private final Log log;
127 /** The name table. */
128 private final Names names;
130 /** The keyword table. */
131 private final Keywords keywords;
133 /** Common code for constructors. */
134 private Scanner(ScannerFactory fac) {
135 log = fac.log;
136 names = fac.names;
137 keywords = fac.keywords;
138 source = fac.source;
139 allowBinaryLiterals = source.allowBinaryLiterals();
140 allowHexFloats = source.allowHexFloats();
141 allowUnderscoresInLiterals = source.allowBinaryLiterals();
142 }
144 private static final boolean hexFloatsWork = hexFloatsWork();
145 private static boolean hexFloatsWork() {
146 try {
147 Float.valueOf("0x1.0p1");
148 return true;
149 } catch (NumberFormatException ex) {
150 return false;
151 }
152 }
154 /** Create a scanner from the input buffer. buffer must implement
155 * array() and compact(), and remaining() must be less than limit().
156 */
157 protected Scanner(ScannerFactory fac, CharBuffer buffer) {
158 this(fac, JavacFileManager.toArray(buffer), buffer.limit());
159 }
161 /**
162 * Create a scanner from the input array. This method might
163 * modify the array. To avoid copying the input array, ensure
164 * that {@code inputLength < input.length} or
165 * {@code input[input.length -1]} is a white space character.
166 *
167 * @param fac the factory which created this Scanner
168 * @param input the input, might be modified
169 * @param inputLength the size of the input.
170 * Must be positive and less than or equal to input.length.
171 */
172 protected Scanner(ScannerFactory fac, char[] input, int inputLength) {
173 this(fac);
174 eofPos = inputLength;
175 if (inputLength == input.length) {
176 if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
177 inputLength--;
178 } else {
179 char[] newInput = new char[inputLength + 1];
180 System.arraycopy(input, 0, newInput, 0, input.length);
181 input = newInput;
182 }
183 }
184 buf = input;
185 buflen = inputLength;
186 buf[buflen] = EOI;
187 bp = -1;
188 scanChar();
189 }
191 /** Report an error at the given position using the provided arguments.
192 */
193 private void lexError(int pos, String key, Object... args) {
194 log.error(pos, key, args);
195 token = ERROR;
196 errPos = pos;
197 }
199 /** Report an error at the current token position using the provided
200 * arguments.
201 */
202 private void lexError(String key, Object... args) {
203 lexError(pos, key, args);
204 }
206 /** Convert an ASCII digit from its base (8, 10, or 16)
207 * to its value.
208 */
209 private int digit(int base) {
210 char c = ch;
211 int result = Character.digit(c, base);
212 if (result >= 0 && c > 0x7f) {
213 lexError(pos+1, "illegal.nonascii.digit");
214 ch = "0123456789abcdef".charAt(result);
215 }
216 return result;
217 }
219 /** Convert unicode escape; bp points to initial '\' character
220 * (Spec 3.3).
221 */
222 private void convertUnicode() {
223 if (ch == '\\' && unicodeConversionBp != bp) {
224 bp++; ch = buf[bp];
225 if (ch == 'u') {
226 do {
227 bp++; ch = buf[bp];
228 } while (ch == 'u');
229 int limit = bp + 3;
230 if (limit < buflen) {
231 int d = digit(16);
232 int code = d;
233 while (bp < limit && d >= 0) {
234 bp++; ch = buf[bp];
235 d = digit(16);
236 code = (code << 4) + d;
237 }
238 if (d >= 0) {
239 ch = (char)code;
240 unicodeConversionBp = bp;
241 return;
242 }
243 }
244 lexError(bp, "illegal.unicode.esc");
245 } else {
246 bp--;
247 ch = '\\';
248 }
249 }
250 }
252 /** Read next character.
253 */
254 private void scanChar() {
255 ch = buf[++bp];
256 if (ch == '\\') {
257 convertUnicode();
258 }
259 }
261 /** Read next character in comment, skipping over double '\' characters.
262 */
263 private void scanCommentChar() {
264 scanChar();
265 if (ch == '\\') {
266 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
267 bp++;
268 } else {
269 convertUnicode();
270 }
271 }
272 }
274 /** Append a character to sbuf.
275 */
276 private void putChar(char ch) {
277 if (sp == sbuf.length) {
278 char[] newsbuf = new char[sbuf.length * 2];
279 System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
280 sbuf = newsbuf;
281 }
282 sbuf[sp++] = ch;
283 }
285 /** For debugging purposes: print character.
286 */
287 private void dch() {
288 System.err.print(ch); System.out.flush();
289 }
291 /** Read next character in character or string literal and copy into sbuf.
292 */
293 private void scanLitChar() {
294 if (ch == '\\') {
295 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
296 bp++;
297 putChar('\\');
298 scanChar();
299 } else {
300 scanChar();
301 switch (ch) {
302 case '0': case '1': case '2': case '3':
303 case '4': case '5': case '6': case '7':
304 char leadch = ch;
305 int oct = digit(8);
306 scanChar();
307 if ('0' <= ch && ch <= '7') {
308 oct = oct * 8 + digit(8);
309 scanChar();
310 if (leadch <= '3' && '0' <= ch && ch <= '7') {
311 oct = oct * 8 + digit(8);
312 scanChar();
313 }
314 }
315 putChar((char)oct);
316 break;
317 case 'b':
318 putChar('\b'); scanChar(); break;
319 case 't':
320 putChar('\t'); scanChar(); break;
321 case 'n':
322 putChar('\n'); scanChar(); break;
323 case 'f':
324 putChar('\f'); scanChar(); break;
325 case 'r':
326 putChar('\r'); scanChar(); break;
327 case '\'':
328 putChar('\''); scanChar(); break;
329 case '\"':
330 putChar('\"'); scanChar(); break;
331 case '\\':
332 putChar('\\'); scanChar(); break;
333 default:
334 lexError(bp, "illegal.esc.char");
335 }
336 }
337 } else if (bp != buflen) {
338 putChar(ch); scanChar();
339 }
340 }
342 private void scanDigits(int digitRadix) {
343 char saveCh;
344 int savePos;
345 do {
346 if (ch != '_') {
347 putChar(ch);
348 } else {
349 if (!allowUnderscoresInLiterals) {
350 lexError("unsupported.underscore.lit", source.name);
351 allowUnderscoresInLiterals = true;
352 }
353 }
354 saveCh = ch;
355 savePos = bp;
356 scanChar();
357 } while (digit(digitRadix) >= 0 || ch == '_');
358 if (saveCh == '_')
359 lexError(savePos, "illegal.underscore");
360 }
362 /** Read fractional part of hexadecimal floating point number.
363 */
364 private void scanHexExponentAndSuffix() {
365 if (ch == 'p' || ch == 'P') {
366 putChar(ch);
367 scanChar();
368 skipIllegalUnderscores();
369 if (ch == '+' || ch == '-') {
370 putChar(ch);
371 scanChar();
372 }
373 skipIllegalUnderscores();
374 if ('0' <= ch && ch <= '9') {
375 scanDigits(10);
376 if (!allowHexFloats) {
377 lexError("unsupported.fp.lit", source.name);
378 allowHexFloats = true;
379 }
380 else if (!hexFloatsWork)
381 lexError("unsupported.cross.fp.lit");
382 } else
383 lexError("malformed.fp.lit");
384 } else {
385 lexError("malformed.fp.lit");
386 }
387 if (ch == 'f' || ch == 'F') {
388 putChar(ch);
389 scanChar();
390 token = FLOATLITERAL;
391 } else {
392 if (ch == 'd' || ch == 'D') {
393 putChar(ch);
394 scanChar();
395 }
396 token = DOUBLELITERAL;
397 }
398 }
400 /** Read fractional part of floating point number.
401 */
402 private void scanFraction() {
403 skipIllegalUnderscores();
404 if ('0' <= ch && ch <= '9') {
405 scanDigits(10);
406 }
407 int sp1 = sp;
408 if (ch == 'e' || ch == 'E') {
409 putChar(ch);
410 scanChar();
411 skipIllegalUnderscores();
412 if (ch == '+' || ch == '-') {
413 putChar(ch);
414 scanChar();
415 }
416 skipIllegalUnderscores();
417 if ('0' <= ch && ch <= '9') {
418 scanDigits(10);
419 return;
420 }
421 lexError("malformed.fp.lit");
422 sp = sp1;
423 }
424 }
426 /** Read fractional part and 'd' or 'f' suffix of floating point number.
427 */
428 private void scanFractionAndSuffix() {
429 this.radix = 10;
430 scanFraction();
431 if (ch == 'f' || ch == 'F') {
432 putChar(ch);
433 scanChar();
434 token = FLOATLITERAL;
435 } else {
436 if (ch == 'd' || ch == 'D') {
437 putChar(ch);
438 scanChar();
439 }
440 token = DOUBLELITERAL;
441 }
442 }
444 /** Read fractional part and 'd' or 'f' suffix of floating point number.
445 */
446 private void scanHexFractionAndSuffix(boolean seendigit) {
447 this.radix = 16;
448 assert ch == '.';
449 putChar(ch);
450 scanChar();
451 skipIllegalUnderscores();
452 if (digit(16) >= 0) {
453 seendigit = true;
454 scanDigits(16);
455 }
456 if (!seendigit)
457 lexError("invalid.hex.number");
458 else
459 scanHexExponentAndSuffix();
460 }
462 private void skipIllegalUnderscores() {
463 if (ch == '_') {
464 lexError(bp, "illegal.underscore");
465 while (ch == '_')
466 scanChar();
467 }
468 }
470 /** Read a number.
471 * @param radix The radix of the number; one of 2, j8, 10, 16.
472 */
473 private void scanNumber(int radix) {
474 this.radix = radix;
475 // for octal, allow base-10 digit in case it's a float literal
476 int digitRadix = (radix == 8 ? 10 : radix);
477 boolean seendigit = false;
478 if (digit(digitRadix) >= 0) {
479 seendigit = true;
480 scanDigits(digitRadix);
481 }
482 if (radix == 16 && ch == '.') {
483 scanHexFractionAndSuffix(seendigit);
484 } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
485 scanHexExponentAndSuffix();
486 } else if (digitRadix == 10 && ch == '.') {
487 putChar(ch);
488 scanChar();
489 scanFractionAndSuffix();
490 } else if (digitRadix == 10 &&
491 (ch == 'e' || ch == 'E' ||
492 ch == 'f' || ch == 'F' ||
493 ch == 'd' || ch == 'D')) {
494 scanFractionAndSuffix();
495 } else {
496 if (ch == 'l' || ch == 'L') {
497 scanChar();
498 token = LONGLITERAL;
499 } else {
500 token = INTLITERAL;
501 }
502 }
503 }
505 /** Read an identifier.
506 */
507 private void scanIdent() {
508 boolean isJavaIdentifierPart;
509 char high;
510 do {
511 if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
512 // optimization, was: putChar(ch);
514 scanChar();
515 switch (ch) {
516 case 'A': case 'B': case 'C': case 'D': case 'E':
517 case 'F': case 'G': case 'H': case 'I': case 'J':
518 case 'K': case 'L': case 'M': case 'N': case 'O':
519 case 'P': case 'Q': case 'R': case 'S': case 'T':
520 case 'U': case 'V': case 'W': case 'X': case 'Y':
521 case 'Z':
522 case 'a': case 'b': case 'c': case 'd': case 'e':
523 case 'f': case 'g': case 'h': case 'i': case 'j':
524 case 'k': case 'l': case 'm': case 'n': case 'o':
525 case 'p': case 'q': case 'r': case 's': case 't':
526 case 'u': case 'v': case 'w': case 'x': case 'y':
527 case 'z':
528 case '$': case '_':
529 case '0': case '1': case '2': case '3': case '4':
530 case '5': case '6': case '7': case '8': case '9':
531 case '\u0000': case '\u0001': case '\u0002': case '\u0003':
532 case '\u0004': case '\u0005': case '\u0006': case '\u0007':
533 case '\u0008': case '\u000E': case '\u000F': case '\u0010':
534 case '\u0011': case '\u0012': case '\u0013': case '\u0014':
535 case '\u0015': case '\u0016': case '\u0017':
536 case '\u0018': case '\u0019': case '\u001B':
537 case '\u007F':
538 break;
539 case '\u001A': // EOI is also a legal identifier part
540 if (bp >= buflen) {
541 name = names.fromChars(sbuf, 0, sp);
542 token = keywords.key(name);
543 return;
544 }
545 break;
546 default:
547 if (ch < '\u0080') {
548 // all ASCII range chars already handled, above
549 isJavaIdentifierPart = false;
550 } else {
551 high = scanSurrogates();
552 if (high != 0) {
553 if (sp == sbuf.length) {
554 putChar(high);
555 } else {
556 sbuf[sp++] = high;
557 }
558 isJavaIdentifierPart = Character.isJavaIdentifierPart(
559 Character.toCodePoint(high, ch));
560 } else {
561 isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
562 }
563 }
564 if (!isJavaIdentifierPart) {
565 name = names.fromChars(sbuf, 0, sp);
566 token = keywords.key(name);
567 return;
568 }
569 }
570 } while (true);
571 }
573 /** Are surrogates supported?
574 */
575 final static boolean surrogatesSupported = surrogatesSupported();
576 private static boolean surrogatesSupported() {
577 try {
578 Character.isHighSurrogate('a');
579 return true;
580 } catch (NoSuchMethodError ex) {
581 return false;
582 }
583 }
585 /** Scan surrogate pairs. If 'ch' is a high surrogate and
586 * the next character is a low surrogate, then put the low
587 * surrogate in 'ch', and return the high surrogate.
588 * otherwise, just return 0.
589 */
590 private char scanSurrogates() {
591 if (surrogatesSupported && Character.isHighSurrogate(ch)) {
592 char high = ch;
594 scanChar();
596 if (Character.isLowSurrogate(ch)) {
597 return high;
598 }
600 ch = high;
601 }
603 return 0;
604 }
606 /** Return true if ch can be part of an operator.
607 */
608 private boolean isSpecial(char ch) {
609 switch (ch) {
610 case '!': case '%': case '&': case '*': case '?':
611 case '+': case '-': case ':': case '<': case '=':
612 case '>': case '^': case '|': case '~':
613 case '@':
614 return true;
615 default:
616 return false;
617 }
618 }
620 /** Read longest possible sequence of special characters and convert
621 * to token.
622 */
623 private void scanOperator() {
624 while (true) {
625 putChar(ch);
626 Name newname = names.fromChars(sbuf, 0, sp);
627 if (keywords.key(newname) == IDENTIFIER) {
628 sp--;
629 break;
630 }
631 name = newname;
632 token = keywords.key(newname);
633 scanChar();
634 if (!isSpecial(ch)) break;
635 }
636 }
638 /**
639 * Scan a documention comment; determine if a deprecated tag is present.
640 * Called once the initial /, * have been skipped, positioned at the second *
641 * (which is treated as the beginning of the first line).
642 * Stops positioned at the closing '/'.
643 */
644 @SuppressWarnings("fallthrough")
645 private void scanDocComment() {
646 boolean deprecatedPrefix = false;
648 forEachLine:
649 while (bp < buflen) {
651 // Skip optional WhiteSpace at beginning of line
652 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
653 scanCommentChar();
654 }
656 // Skip optional consecutive Stars
657 while (bp < buflen && ch == '*') {
658 scanCommentChar();
659 if (ch == '/') {
660 return;
661 }
662 }
664 // Skip optional WhiteSpace after Stars
665 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
666 scanCommentChar();
667 }
669 deprecatedPrefix = false;
670 // At beginning of line in the JavaDoc sense.
671 if (bp < buflen && ch == '@' && !deprecatedFlag) {
672 scanCommentChar();
673 if (bp < buflen && ch == 'd') {
674 scanCommentChar();
675 if (bp < buflen && ch == 'e') {
676 scanCommentChar();
677 if (bp < buflen && ch == 'p') {
678 scanCommentChar();
679 if (bp < buflen && ch == 'r') {
680 scanCommentChar();
681 if (bp < buflen && ch == 'e') {
682 scanCommentChar();
683 if (bp < buflen && ch == 'c') {
684 scanCommentChar();
685 if (bp < buflen && ch == 'a') {
686 scanCommentChar();
687 if (bp < buflen && ch == 't') {
688 scanCommentChar();
689 if (bp < buflen && ch == 'e') {
690 scanCommentChar();
691 if (bp < buflen && ch == 'd') {
692 deprecatedPrefix = true;
693 scanCommentChar();
694 }}}}}}}}}}}
695 if (deprecatedPrefix && bp < buflen) {
696 if (Character.isWhitespace(ch)) {
697 deprecatedFlag = true;
698 } else if (ch == '*') {
699 scanCommentChar();
700 if (ch == '/') {
701 deprecatedFlag = true;
702 return;
703 }
704 }
705 }
707 // Skip rest of line
708 while (bp < buflen) {
709 switch (ch) {
710 case '*':
711 scanCommentChar();
712 if (ch == '/') {
713 return;
714 }
715 break;
716 case CR: // (Spec 3.4)
717 scanCommentChar();
718 if (ch != LF) {
719 continue forEachLine;
720 }
721 /* fall through to LF case */
722 case LF: // (Spec 3.4)
723 scanCommentChar();
724 continue forEachLine;
725 default:
726 scanCommentChar();
727 }
728 } // rest of line
729 } // forEachLine
730 return;
731 }
733 /** The value of a literal token, recorded as a string.
734 * For integers, leading 0x and 'l' suffixes are suppressed.
735 */
736 public String stringVal() {
737 return new String(sbuf, 0, sp);
738 }
740 /** Read token.
741 */
742 public void nextToken() {
744 try {
745 prevEndPos = endPos;
746 sp = 0;
748 while (true) {
749 pos = bp;
750 switch (ch) {
751 case ' ': // (Spec 3.6)
752 case '\t': // (Spec 3.6)
753 case FF: // (Spec 3.6)
754 do {
755 scanChar();
756 } while (ch == ' ' || ch == '\t' || ch == FF);
757 endPos = bp;
758 processWhiteSpace();
759 break;
760 case LF: // (Spec 3.4)
761 scanChar();
762 endPos = bp;
763 processLineTerminator();
764 break;
765 case CR: // (Spec 3.4)
766 scanChar();
767 if (ch == LF) {
768 scanChar();
769 }
770 endPos = bp;
771 processLineTerminator();
772 break;
773 case 'A': case 'B': case 'C': case 'D': case 'E':
774 case 'F': case 'G': case 'H': case 'I': case 'J':
775 case 'K': case 'L': case 'M': case 'N': case 'O':
776 case 'P': case 'Q': case 'R': case 'S': case 'T':
777 case 'U': case 'V': case 'W': case 'X': case 'Y':
778 case 'Z':
779 case 'a': case 'b': case 'c': case 'd': case 'e':
780 case 'f': case 'g': case 'h': case 'i': case 'j':
781 case 'k': case 'l': case 'm': case 'n': case 'o':
782 case 'p': case 'q': case 'r': case 's': case 't':
783 case 'u': case 'v': case 'w': case 'x': case 'y':
784 case 'z':
785 case '$': case '_':
786 scanIdent();
787 return;
788 case '0':
789 scanChar();
790 if (ch == 'x' || ch == 'X') {
791 scanChar();
792 skipIllegalUnderscores();
793 if (ch == '.') {
794 scanHexFractionAndSuffix(false);
795 } else if (digit(16) < 0) {
796 lexError("invalid.hex.number");
797 } else {
798 scanNumber(16);
799 }
800 } else if (ch == 'b' || ch == 'B') {
801 if (!allowBinaryLiterals) {
802 lexError("unsupported.binary.lit", source.name);
803 allowBinaryLiterals = true;
804 }
805 scanChar();
806 skipIllegalUnderscores();
807 if (digit(2) < 0) {
808 lexError("invalid.binary.number");
809 } else {
810 scanNumber(2);
811 }
812 } else {
813 putChar('0');
814 if (ch == '_') {
815 int savePos = bp;
816 do {
817 scanChar();
818 } while (ch == '_');
819 if (digit(10) < 0) {
820 lexError(savePos, "illegal.underscore");
821 }
822 }
823 scanNumber(8);
824 }
825 return;
826 case '1': case '2': case '3': case '4':
827 case '5': case '6': case '7': case '8': case '9':
828 scanNumber(10);
829 return;
830 case '.':
831 scanChar();
832 if ('0' <= ch && ch <= '9') {
833 putChar('.');
834 scanFractionAndSuffix();
835 } else if (ch == '.') {
836 putChar('.'); putChar('.');
837 scanChar();
838 if (ch == '.') {
839 scanChar();
840 putChar('.');
841 token = ELLIPSIS;
842 } else {
843 lexError("malformed.fp.lit");
844 }
845 } else {
846 token = DOT;
847 }
848 return;
849 case ',':
850 scanChar(); token = COMMA; return;
851 case ';':
852 scanChar(); token = SEMI; return;
853 case '(':
854 scanChar(); token = LPAREN; return;
855 case ')':
856 scanChar(); token = RPAREN; return;
857 case '[':
858 scanChar(); token = LBRACKET; return;
859 case ']':
860 scanChar(); token = RBRACKET; return;
861 case '{':
862 scanChar(); token = LBRACE; return;
863 case '}':
864 scanChar(); token = RBRACE; return;
865 case '/':
866 scanChar();
867 if (ch == '/') {
868 do {
869 scanCommentChar();
870 } while (ch != CR && ch != LF && bp < buflen);
871 if (bp < buflen) {
872 endPos = bp;
873 processComment(CommentStyle.LINE);
874 }
875 break;
876 } else if (ch == '*') {
877 scanChar();
878 CommentStyle style;
879 if (ch == '*') {
880 style = CommentStyle.JAVADOC;
881 scanDocComment();
882 } else {
883 style = CommentStyle.BLOCK;
884 while (bp < buflen) {
885 if (ch == '*') {
886 scanChar();
887 if (ch == '/') break;
888 } else {
889 scanCommentChar();
890 }
891 }
892 }
893 if (ch == '/') {
894 scanChar();
895 endPos = bp;
896 processComment(style);
897 break;
898 } else {
899 lexError("unclosed.comment");
900 return;
901 }
902 } else if (ch == '=') {
903 name = names.slashequals;
904 token = SLASHEQ;
905 scanChar();
906 } else {
907 name = names.slash;
908 token = SLASH;
909 }
910 return;
911 case '\'':
912 scanChar();
913 if (ch == '\'') {
914 lexError("empty.char.lit");
915 } else {
916 if (ch == CR || ch == LF)
917 lexError(pos, "illegal.line.end.in.char.lit");
918 scanLitChar();
919 if (ch == '\'') {
920 scanChar();
921 token = CHARLITERAL;
922 } else {
923 lexError(pos, "unclosed.char.lit");
924 }
925 }
926 return;
927 case '\"':
928 scanChar();
929 while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
930 scanLitChar();
931 if (ch == '\"') {
932 token = STRINGLITERAL;
933 scanChar();
934 } else {
935 lexError(pos, "unclosed.str.lit");
936 }
937 return;
938 default:
939 if (isSpecial(ch)) {
940 scanOperator();
941 } else {
942 boolean isJavaIdentifierStart;
943 if (ch < '\u0080') {
944 // all ASCII range chars already handled, above
945 isJavaIdentifierStart = false;
946 } else {
947 char high = scanSurrogates();
948 if (high != 0) {
949 if (sp == sbuf.length) {
950 putChar(high);
951 } else {
952 sbuf[sp++] = high;
953 }
955 isJavaIdentifierStart = Character.isJavaIdentifierStart(
956 Character.toCodePoint(high, ch));
957 } else {
958 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
959 }
960 }
961 if (isJavaIdentifierStart) {
962 scanIdent();
963 } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
964 token = EOF;
965 pos = bp = eofPos;
966 } else {
967 lexError("illegal.char", String.valueOf((int)ch));
968 scanChar();
969 }
970 }
971 return;
972 }
973 }
974 } finally {
975 endPos = bp;
976 if (scannerDebug)
977 System.out.println("nextToken(" + pos
978 + "," + endPos + ")=|" +
979 new String(getRawCharacters(pos, endPos))
980 + "|");
981 }
982 }
984 /** Return the current token, set by nextToken().
985 */
986 public Token token() {
987 return token;
988 }
990 /** Sets the current token.
991 */
992 public void token(Token token) {
993 this.token = token;
994 }
996 /** Return the current token's position: a 0-based
997 * offset from beginning of the raw input stream
998 * (before unicode translation)
999 */
1000 public int pos() {
1001 return pos;
1002 }
1004 /** Return the last character position of the current token.
1005 */
1006 public int endPos() {
1007 return endPos;
1008 }
1010 /** Return the last character position of the previous token.
1011 */
1012 public int prevEndPos() {
1013 return prevEndPos;
1014 }
1016 /** Return the position where a lexical error occurred;
1017 */
1018 public int errPos() {
1019 return errPos;
1020 }
1022 /** Set the position where a lexical error occurred;
1023 */
1024 public void errPos(int pos) {
1025 errPos = pos;
1026 }
1028 /** Return the name of an identifier or token for the current token.
1029 */
1030 public Name name() {
1031 return name;
1032 }
1034 /** Return the radix of a numeric literal token.
1035 */
1036 public int radix() {
1037 return radix;
1038 }
1040 /** Has a @deprecated been encountered in last doc comment?
1041 * This needs to be reset by client with resetDeprecatedFlag.
1042 */
1043 public boolean deprecatedFlag() {
1044 return deprecatedFlag;
1045 }
1047 public void resetDeprecatedFlag() {
1048 deprecatedFlag = false;
1049 }
1051 /**
1052 * Returns the documentation string of the current token.
1053 */
1054 public String docComment() {
1055 return null;
1056 }
1058 /**
1059 * Returns a copy of the input buffer, up to its inputLength.
1060 * Unicode escape sequences are not translated.
1061 */
1062 public char[] getRawCharacters() {
1063 char[] chars = new char[buflen];
1064 System.arraycopy(buf, 0, chars, 0, buflen);
1065 return chars;
1066 }
1068 /**
1069 * Returns a copy of a character array subset of the input buffer.
1070 * The returned array begins at the <code>beginIndex</code> and
1071 * extends to the character at index <code>endIndex - 1</code>.
1072 * Thus the length of the substring is <code>endIndex-beginIndex</code>.
1073 * This behavior is like
1074 * <code>String.substring(beginIndex, endIndex)</code>.
1075 * Unicode escape sequences are not translated.
1076 *
1077 * @param beginIndex the beginning index, inclusive.
1078 * @param endIndex the ending index, exclusive.
1079 * @throws IndexOutOfBounds if either offset is outside of the
1080 * array bounds
1081 */
1082 public char[] getRawCharacters(int beginIndex, int endIndex) {
1083 int length = endIndex - beginIndex;
1084 char[] chars = new char[length];
1085 System.arraycopy(buf, beginIndex, chars, 0, length);
1086 return chars;
1087 }
1089 public enum CommentStyle {
1090 LINE,
1091 BLOCK,
1092 JAVADOC,
1093 }
1095 /**
1096 * Called when a complete comment has been scanned. pos and endPos
1097 * will mark the comment boundary.
1098 */
1099 protected void processComment(CommentStyle style) {
1100 if (scannerDebug)
1101 System.out.println("processComment(" + pos
1102 + "," + endPos + "," + style + ")=|"
1103 + new String(getRawCharacters(pos, endPos))
1104 + "|");
1105 }
1107 /**
1108 * Called when a complete whitespace run has been scanned. pos and endPos
1109 * will mark the whitespace boundary.
1110 */
1111 protected void processWhiteSpace() {
1112 if (scannerDebug)
1113 System.out.println("processWhitespace(" + pos
1114 + "," + endPos + ")=|" +
1115 new String(getRawCharacters(pos, endPos))
1116 + "|");
1117 }
1119 /**
1120 * Called when a line terminator has been processed.
1121 */
1122 protected void processLineTerminator() {
1123 if (scannerDebug)
1124 System.out.println("processTerminator(" + pos
1125 + "," + endPos + ")=|" +
1126 new String(getRawCharacters(pos, endPos))
1127 + "|");
1128 }
1130 /** Build a map for translating between line numbers and
1131 * positions in the input.
1132 *
1133 * @return a LineMap */
1134 public Position.LineMap getLineMap() {
1135 return Position.makeLineMap(buf, buflen, false);
1136 }
1138 }