src/share/classes/com/sun/tools/javac/parser/Scanner.java

changeset 1113
d346ab55031b
parent 1074
04f983e3e825
child 1144
9448fe783fd2
equal deleted inserted replaced
1112:b4021c520e40 1113:d346ab55031b
25 25
26 package com.sun.tools.javac.parser; 26 package com.sun.tools.javac.parser;
27 27
28 import java.nio.*; 28 import java.nio.*;
29 29
30 import com.sun.tools.javac.code.Source;
31 import com.sun.tools.javac.file.JavacFileManager;
32 import com.sun.tools.javac.util.*; 30 import com.sun.tools.javac.util.*;
31 import com.sun.tools.javac.util.Position.LineMap;
32 import com.sun.tools.javac.parser.JavaTokenizer.*;
33 33
34 34 import static com.sun.tools.javac.parser.Tokens.*;
35 import static com.sun.tools.javac.parser.Token.*;
36 import static com.sun.tools.javac.util.LayoutCharacters.*;
37 35
38 /** The lexical analyzer maps an input stream consisting of 36 /** The lexical analyzer maps an input stream consisting of
39 * ASCII characters and Unicode escapes into a token sequence. 37 * ASCII characters and Unicode escapes into a token sequence.
40 * 38 *
41 * <p><b>This is NOT part of any supported API. 39 * <p><b>This is NOT part of any supported API.
43 * This code and its internal interfaces are subject to change or 41 * This code and its internal interfaces are subject to change or
44 * deletion without notice.</b> 42 * deletion without notice.</b>
45 */ 43 */
46 public class Scanner implements Lexer { 44 public class Scanner implements Lexer {
47 45
48 private static boolean scannerDebug = false; 46 private Tokens tokens;
49
50 /* Output variables; set by nextToken():
51 */
52 47
53 /** The token, set by nextToken(). 48 /** The token, set by nextToken().
54 */ 49 */
55 private Token token; 50 private Token token;
56 51
57 /** Allow hex floating-point literals. 52 /** The previous token, set by nextToken().
58 */ 53 */
59 private boolean allowHexFloats; 54 private Token prevToken;
60 55
61 /** Allow binary literals. 56 private JavaTokenizer tokenizer;
62 */
63 private boolean allowBinaryLiterals;
64
65 /** Allow underscores in literals.
66 */
67 private boolean allowUnderscoresInLiterals;
68
69 /** The source language setting.
70 */
71 private Source source;
72
73 /** The token's position, 0-based offset from beginning of text.
74 */
75 private int pos;
76
77 /** Character position just after the last character of the token.
78 */
79 private int endPos;
80
81 /** The last character position of the previous token.
82 */
83 private int prevEndPos;
84
85 /** The position where a lexical error occurred;
86 */
87 private int errPos = Position.NOPOS;
88
89 /** The name of an identifier or token:
90 */
91 private Name name;
92
93 /** The radix of a numeric literal token.
94 */
95 private int radix;
96
97 /** Has a @deprecated been encountered in last doc comment?
98 * this needs to be reset by client.
99 */
100 protected boolean deprecatedFlag = false;
101
102 /** A character buffer for literals.
103 */
104 private char[] sbuf = new char[128];
105 private int sp;
106
107 /** The input buffer, index of next chacter to be read,
108 * index of one past last character in buffer.
109 */
110 private char[] buf;
111 private int bp;
112 private int buflen;
113 private int eofPos;
114
115 /** The current character.
116 */
117 private char ch;
118
119 /** The buffer index of the last converted unicode character
120 */
121 private int unicodeConversionBp = -1;
122
123 /** The log to be used for error reporting.
124 */
125 private final Log log;
126
127 /** The name table. */
128 private final Names names;
129
130 /** The keyword table. */
131 private final Keywords keywords;
132
133 /** Common code for constructors. */
134 private Scanner(ScannerFactory fac) {
135 log = fac.log;
136 names = fac.names;
137 keywords = fac.keywords;
138 source = fac.source;
139 allowBinaryLiterals = source.allowBinaryLiterals();
140 allowHexFloats = source.allowHexFloats();
141 allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();
142 }
143
144 private static final boolean hexFloatsWork = hexFloatsWork();
145 private static boolean hexFloatsWork() {
146 try {
147 Float.valueOf("0x1.0p1");
148 return true;
149 } catch (NumberFormatException ex) {
150 return false;
151 }
152 }
153
154 /** Create a scanner from the input buffer. buffer must implement
155 * array() and compact(), and remaining() must be less than limit().
156 */
157 protected Scanner(ScannerFactory fac, CharBuffer buffer) {
158 this(fac, JavacFileManager.toArray(buffer), buffer.limit());
159 }
160
161 /** 57 /**
162 * Create a scanner from the input array. This method might 58 * Create a scanner from the input array. This method might
163 * modify the array. To avoid copying the input array, ensure 59 * modify the array. To avoid copying the input array, ensure
164 * that {@code inputLength < input.length} or 60 * that {@code inputLength < input.length} or
165 * {@code input[input.length -1]} is a white space character. 61 * {@code input[input.length -1]} is a white space character.
167 * @param fac the factory which created this Scanner 63 * @param fac the factory which created this Scanner
168 * @param input the input, might be modified 64 * @param input the input, might be modified
169 * @param inputLength the size of the input. 65 * @param inputLength the size of the input.
170 * Must be positive and less than or equal to input.length. 66 * Must be positive and less than or equal to input.length.
171 */ 67 */
172 protected Scanner(ScannerFactory fac, char[] input, int inputLength) { 68 protected Scanner(ScannerFactory fac, CharBuffer buf) {
173 this(fac); 69 this(fac, new JavaTokenizer(fac, buf));
174 eofPos = inputLength;
175 if (inputLength == input.length) {
176 if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
177 inputLength--;
178 } else {
179 char[] newInput = new char[inputLength + 1];
180 System.arraycopy(input, 0, newInput, 0, input.length);
181 input = newInput;
182 }
183 }
184 buf = input;
185 buflen = inputLength;
186 buf[buflen] = EOI;
187 bp = -1;
188 scanChar();
189 } 70 }
190 71
191 /** Report an error at the given position using the provided arguments. 72 protected Scanner(ScannerFactory fac, char[] buf, int inputLength) {
192 */ 73 this(fac, new JavaTokenizer(fac, buf, inputLength));
193 private void lexError(int pos, String key, Object... args) {
194 log.error(pos, key, args);
195 token = ERROR;
196 errPos = pos;
197 } 74 }
198 75
199 /** Report an error at the current token position using the provided 76 protected Scanner(ScannerFactory fac, JavaTokenizer tokenizer) {
200 * arguments. 77 this.tokenizer = tokenizer;
201 */ 78 tokens = fac.tokens;
202 private void lexError(String key, Object... args) { 79 token = prevToken = DUMMY;
203 lexError(pos, key, args);
204 } 80 }
205 81
206 /** Convert an ASCII digit from its base (8, 10, or 16)
207 * to its value.
208 */
209 private int digit(int base) {
210 char c = ch;
211 int result = Character.digit(c, base);
212 if (result >= 0 && c > 0x7f) {
213 lexError(pos+1, "illegal.nonascii.digit");
214 ch = "0123456789abcdef".charAt(result);
215 }
216 return result;
217 }
218
219 /** Convert unicode escape; bp points to initial '\' character
220 * (Spec 3.3).
221 */
222 private void convertUnicode() {
223 if (ch == '\\' && unicodeConversionBp != bp) {
224 bp++; ch = buf[bp];
225 if (ch == 'u') {
226 do {
227 bp++; ch = buf[bp];
228 } while (ch == 'u');
229 int limit = bp + 3;
230 if (limit < buflen) {
231 int d = digit(16);
232 int code = d;
233 while (bp < limit && d >= 0) {
234 bp++; ch = buf[bp];
235 d = digit(16);
236 code = (code << 4) + d;
237 }
238 if (d >= 0) {
239 ch = (char)code;
240 unicodeConversionBp = bp;
241 return;
242 }
243 }
244 lexError(bp, "illegal.unicode.esc");
245 } else {
246 bp--;
247 ch = '\\';
248 }
249 }
250 }
251
252 /** Read next character.
253 */
254 private void scanChar() {
255 ch = buf[++bp];
256 if (ch == '\\') {
257 convertUnicode();
258 }
259 }
260
261 /** Read next character in comment, skipping over double '\' characters.
262 */
263 private void scanCommentChar() {
264 scanChar();
265 if (ch == '\\') {
266 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
267 bp++;
268 } else {
269 convertUnicode();
270 }
271 }
272 }
273
274 /** Append a character to sbuf.
275 */
276 private void putChar(char ch) {
277 if (sp == sbuf.length) {
278 char[] newsbuf = new char[sbuf.length * 2];
279 System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
280 sbuf = newsbuf;
281 }
282 sbuf[sp++] = ch;
283 }
284
285 /** Read next character in character or string literal and copy into sbuf.
286 */
287 private void scanLitChar() {
288 if (ch == '\\') {
289 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
290 bp++;
291 putChar('\\');
292 scanChar();
293 } else {
294 scanChar();
295 switch (ch) {
296 case '0': case '1': case '2': case '3':
297 case '4': case '5': case '6': case '7':
298 char leadch = ch;
299 int oct = digit(8);
300 scanChar();
301 if ('0' <= ch && ch <= '7') {
302 oct = oct * 8 + digit(8);
303 scanChar();
304 if (leadch <= '3' && '0' <= ch && ch <= '7') {
305 oct = oct * 8 + digit(8);
306 scanChar();
307 }
308 }
309 putChar((char)oct);
310 break;
311 case 'b':
312 putChar('\b'); scanChar(); break;
313 case 't':
314 putChar('\t'); scanChar(); break;
315 case 'n':
316 putChar('\n'); scanChar(); break;
317 case 'f':
318 putChar('\f'); scanChar(); break;
319 case 'r':
320 putChar('\r'); scanChar(); break;
321 case '\'':
322 putChar('\''); scanChar(); break;
323 case '\"':
324 putChar('\"'); scanChar(); break;
325 case '\\':
326 putChar('\\'); scanChar(); break;
327 default:
328 lexError(bp, "illegal.esc.char");
329 }
330 }
331 } else if (bp != buflen) {
332 putChar(ch); scanChar();
333 }
334 }
335
336 private void scanDigits(int digitRadix) {
337 char saveCh;
338 int savePos;
339 do {
340 if (ch != '_') {
341 putChar(ch);
342 } else {
343 if (!allowUnderscoresInLiterals) {
344 lexError("unsupported.underscore.lit", source.name);
345 allowUnderscoresInLiterals = true;
346 }
347 }
348 saveCh = ch;
349 savePos = bp;
350 scanChar();
351 } while (digit(digitRadix) >= 0 || ch == '_');
352 if (saveCh == '_')
353 lexError(savePos, "illegal.underscore");
354 }
355
356 /** Read fractional part of hexadecimal floating point number.
357 */
358 private void scanHexExponentAndSuffix() {
359 if (ch == 'p' || ch == 'P') {
360 putChar(ch);
361 scanChar();
362 skipIllegalUnderscores();
363 if (ch == '+' || ch == '-') {
364 putChar(ch);
365 scanChar();
366 }
367 skipIllegalUnderscores();
368 if ('0' <= ch && ch <= '9') {
369 scanDigits(10);
370 if (!allowHexFloats) {
371 lexError("unsupported.fp.lit", source.name);
372 allowHexFloats = true;
373 }
374 else if (!hexFloatsWork)
375 lexError("unsupported.cross.fp.lit");
376 } else
377 lexError("malformed.fp.lit");
378 } else {
379 lexError("malformed.fp.lit");
380 }
381 if (ch == 'f' || ch == 'F') {
382 putChar(ch);
383 scanChar();
384 token = FLOATLITERAL;
385 } else {
386 if (ch == 'd' || ch == 'D') {
387 putChar(ch);
388 scanChar();
389 }
390 token = DOUBLELITERAL;
391 }
392 }
393
394 /** Read fractional part of floating point number.
395 */
396 private void scanFraction() {
397 skipIllegalUnderscores();
398 if ('0' <= ch && ch <= '9') {
399 scanDigits(10);
400 }
401 int sp1 = sp;
402 if (ch == 'e' || ch == 'E') {
403 putChar(ch);
404 scanChar();
405 skipIllegalUnderscores();
406 if (ch == '+' || ch == '-') {
407 putChar(ch);
408 scanChar();
409 }
410 skipIllegalUnderscores();
411 if ('0' <= ch && ch <= '9') {
412 scanDigits(10);
413 return;
414 }
415 lexError("malformed.fp.lit");
416 sp = sp1;
417 }
418 }
419
420 /** Read fractional part and 'd' or 'f' suffix of floating point number.
421 */
422 private void scanFractionAndSuffix() {
423 this.radix = 10;
424 scanFraction();
425 if (ch == 'f' || ch == 'F') {
426 putChar(ch);
427 scanChar();
428 token = FLOATLITERAL;
429 } else {
430 if (ch == 'd' || ch == 'D') {
431 putChar(ch);
432 scanChar();
433 }
434 token = DOUBLELITERAL;
435 }
436 }
437
438 /** Read fractional part and 'd' or 'f' suffix of floating point number.
439 */
440 private void scanHexFractionAndSuffix(boolean seendigit) {
441 this.radix = 16;
442 Assert.check(ch == '.');
443 putChar(ch);
444 scanChar();
445 skipIllegalUnderscores();
446 if (digit(16) >= 0) {
447 seendigit = true;
448 scanDigits(16);
449 }
450 if (!seendigit)
451 lexError("invalid.hex.number");
452 else
453 scanHexExponentAndSuffix();
454 }
455
456 private void skipIllegalUnderscores() {
457 if (ch == '_') {
458 lexError(bp, "illegal.underscore");
459 while (ch == '_')
460 scanChar();
461 }
462 }
463
464 /** Read a number.
465 * @param radix The radix of the number; one of 2, j8, 10, 16.
466 */
467 private void scanNumber(int radix) {
468 this.radix = radix;
469 // for octal, allow base-10 digit in case it's a float literal
470 int digitRadix = (radix == 8 ? 10 : radix);
471 boolean seendigit = false;
472 if (digit(digitRadix) >= 0) {
473 seendigit = true;
474 scanDigits(digitRadix);
475 }
476 if (radix == 16 && ch == '.') {
477 scanHexFractionAndSuffix(seendigit);
478 } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
479 scanHexExponentAndSuffix();
480 } else if (digitRadix == 10 && ch == '.') {
481 putChar(ch);
482 scanChar();
483 scanFractionAndSuffix();
484 } else if (digitRadix == 10 &&
485 (ch == 'e' || ch == 'E' ||
486 ch == 'f' || ch == 'F' ||
487 ch == 'd' || ch == 'D')) {
488 scanFractionAndSuffix();
489 } else {
490 if (ch == 'l' || ch == 'L') {
491 scanChar();
492 token = LONGLITERAL;
493 } else {
494 token = INTLITERAL;
495 }
496 }
497 }
498
499 /** Read an identifier.
500 */
501 private void scanIdent() {
502 boolean isJavaIdentifierPart;
503 char high;
504 do {
505 if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
506 // optimization, was: putChar(ch);
507
508 scanChar();
509 switch (ch) {
510 case 'A': case 'B': case 'C': case 'D': case 'E':
511 case 'F': case 'G': case 'H': case 'I': case 'J':
512 case 'K': case 'L': case 'M': case 'N': case 'O':
513 case 'P': case 'Q': case 'R': case 'S': case 'T':
514 case 'U': case 'V': case 'W': case 'X': case 'Y':
515 case 'Z':
516 case 'a': case 'b': case 'c': case 'd': case 'e':
517 case 'f': case 'g': case 'h': case 'i': case 'j':
518 case 'k': case 'l': case 'm': case 'n': case 'o':
519 case 'p': case 'q': case 'r': case 's': case 't':
520 case 'u': case 'v': case 'w': case 'x': case 'y':
521 case 'z':
522 case '$': case '_':
523 case '0': case '1': case '2': case '3': case '4':
524 case '5': case '6': case '7': case '8': case '9':
525 case '\u0000': case '\u0001': case '\u0002': case '\u0003':
526 case '\u0004': case '\u0005': case '\u0006': case '\u0007':
527 case '\u0008': case '\u000E': case '\u000F': case '\u0010':
528 case '\u0011': case '\u0012': case '\u0013': case '\u0014':
529 case '\u0015': case '\u0016': case '\u0017':
530 case '\u0018': case '\u0019': case '\u001B':
531 case '\u007F':
532 break;
533 case '\u001A': // EOI is also a legal identifier part
534 if (bp >= buflen) {
535 name = names.fromChars(sbuf, 0, sp);
536 token = keywords.key(name);
537 return;
538 }
539 break;
540 default:
541 if (ch < '\u0080') {
542 // all ASCII range chars already handled, above
543 isJavaIdentifierPart = false;
544 } else {
545 high = scanSurrogates();
546 if (high != 0) {
547 if (sp == sbuf.length) {
548 putChar(high);
549 } else {
550 sbuf[sp++] = high;
551 }
552 isJavaIdentifierPart = Character.isJavaIdentifierPart(
553 Character.toCodePoint(high, ch));
554 } else {
555 isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
556 }
557 }
558 if (!isJavaIdentifierPart) {
559 name = names.fromChars(sbuf, 0, sp);
560 token = keywords.key(name);
561 return;
562 }
563 }
564 } while (true);
565 }
566
567 /** Are surrogates supported?
568 */
569 final static boolean surrogatesSupported = surrogatesSupported();
570 private static boolean surrogatesSupported() {
571 try {
572 Character.isHighSurrogate('a');
573 return true;
574 } catch (NoSuchMethodError ex) {
575 return false;
576 }
577 }
578
579 /** Scan surrogate pairs. If 'ch' is a high surrogate and
580 * the next character is a low surrogate, then put the low
581 * surrogate in 'ch', and return the high surrogate.
582 * otherwise, just return 0.
583 */
584 private char scanSurrogates() {
585 if (surrogatesSupported && Character.isHighSurrogate(ch)) {
586 char high = ch;
587
588 scanChar();
589
590 if (Character.isLowSurrogate(ch)) {
591 return high;
592 }
593
594 ch = high;
595 }
596
597 return 0;
598 }
599
600 /** Return true if ch can be part of an operator.
601 */
602 private boolean isSpecial(char ch) {
603 switch (ch) {
604 case '!': case '%': case '&': case '*': case '?':
605 case '+': case '-': case ':': case '<': case '=':
606 case '>': case '^': case '|': case '~':
607 case '@':
608 return true;
609 default:
610 return false;
611 }
612 }
613
614 /** Read longest possible sequence of special characters and convert
615 * to token.
616 */
617 private void scanOperator() {
618 while (true) {
619 putChar(ch);
620 Name newname = names.fromChars(sbuf, 0, sp);
621 if (keywords.key(newname) == IDENTIFIER) {
622 sp--;
623 break;
624 }
625 name = newname;
626 token = keywords.key(newname);
627 scanChar();
628 if (!isSpecial(ch)) break;
629 }
630 }
631
632 /**
633 * Scan a documention comment; determine if a deprecated tag is present.
634 * Called once the initial /, * have been skipped, positioned at the second *
635 * (which is treated as the beginning of the first line).
636 * Stops positioned at the closing '/'.
637 */
638 @SuppressWarnings("fallthrough")
639 private void scanDocComment() {
640 boolean deprecatedPrefix = false;
641
642 forEachLine:
643 while (bp < buflen) {
644
645 // Skip optional WhiteSpace at beginning of line
646 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
647 scanCommentChar();
648 }
649
650 // Skip optional consecutive Stars
651 while (bp < buflen && ch == '*') {
652 scanCommentChar();
653 if (ch == '/') {
654 return;
655 }
656 }
657
658 // Skip optional WhiteSpace after Stars
659 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
660 scanCommentChar();
661 }
662
663 deprecatedPrefix = false;
664 // At beginning of line in the JavaDoc sense.
665 if (bp < buflen && ch == '@' && !deprecatedFlag) {
666 scanCommentChar();
667 if (bp < buflen && ch == 'd') {
668 scanCommentChar();
669 if (bp < buflen && ch == 'e') {
670 scanCommentChar();
671 if (bp < buflen && ch == 'p') {
672 scanCommentChar();
673 if (bp < buflen && ch == 'r') {
674 scanCommentChar();
675 if (bp < buflen && ch == 'e') {
676 scanCommentChar();
677 if (bp < buflen && ch == 'c') {
678 scanCommentChar();
679 if (bp < buflen && ch == 'a') {
680 scanCommentChar();
681 if (bp < buflen && ch == 't') {
682 scanCommentChar();
683 if (bp < buflen && ch == 'e') {
684 scanCommentChar();
685 if (bp < buflen && ch == 'd') {
686 deprecatedPrefix = true;
687 scanCommentChar();
688 }}}}}}}}}}}
689 if (deprecatedPrefix && bp < buflen) {
690 if (Character.isWhitespace(ch)) {
691 deprecatedFlag = true;
692 } else if (ch == '*') {
693 scanCommentChar();
694 if (ch == '/') {
695 deprecatedFlag = true;
696 return;
697 }
698 }
699 }
700
701 // Skip rest of line
702 while (bp < buflen) {
703 switch (ch) {
704 case '*':
705 scanCommentChar();
706 if (ch == '/') {
707 return;
708 }
709 break;
710 case CR: // (Spec 3.4)
711 scanCommentChar();
712 if (ch != LF) {
713 continue forEachLine;
714 }
715 /* fall through to LF case */
716 case LF: // (Spec 3.4)
717 scanCommentChar();
718 continue forEachLine;
719 default:
720 scanCommentChar();
721 }
722 } // rest of line
723 } // forEachLine
724 return;
725 }
726
727 /** The value of a literal token, recorded as a string.
728 * For integers, leading 0x and 'l' suffixes are suppressed.
729 */
730 public String stringVal() {
731 return new String(sbuf, 0, sp);
732 }
733
734 /** Read token.
735 */
736 public void nextToken() {
737
738 try {
739 prevEndPos = endPos;
740 sp = 0;
741
742 while (true) {
743 pos = bp;
744 switch (ch) {
745 case ' ': // (Spec 3.6)
746 case '\t': // (Spec 3.6)
747 case FF: // (Spec 3.6)
748 do {
749 scanChar();
750 } while (ch == ' ' || ch == '\t' || ch == FF);
751 endPos = bp;
752 processWhiteSpace();
753 break;
754 case LF: // (Spec 3.4)
755 scanChar();
756 endPos = bp;
757 processLineTerminator();
758 break;
759 case CR: // (Spec 3.4)
760 scanChar();
761 if (ch == LF) {
762 scanChar();
763 }
764 endPos = bp;
765 processLineTerminator();
766 break;
767 case 'A': case 'B': case 'C': case 'D': case 'E':
768 case 'F': case 'G': case 'H': case 'I': case 'J':
769 case 'K': case 'L': case 'M': case 'N': case 'O':
770 case 'P': case 'Q': case 'R': case 'S': case 'T':
771 case 'U': case 'V': case 'W': case 'X': case 'Y':
772 case 'Z':
773 case 'a': case 'b': case 'c': case 'd': case 'e':
774 case 'f': case 'g': case 'h': case 'i': case 'j':
775 case 'k': case 'l': case 'm': case 'n': case 'o':
776 case 'p': case 'q': case 'r': case 's': case 't':
777 case 'u': case 'v': case 'w': case 'x': case 'y':
778 case 'z':
779 case '$': case '_':
780 scanIdent();
781 return;
782 case '0':
783 scanChar();
784 if (ch == 'x' || ch == 'X') {
785 scanChar();
786 skipIllegalUnderscores();
787 if (ch == '.') {
788 scanHexFractionAndSuffix(false);
789 } else if (digit(16) < 0) {
790 lexError("invalid.hex.number");
791 } else {
792 scanNumber(16);
793 }
794 } else if (ch == 'b' || ch == 'B') {
795 if (!allowBinaryLiterals) {
796 lexError("unsupported.binary.lit", source.name);
797 allowBinaryLiterals = true;
798 }
799 scanChar();
800 skipIllegalUnderscores();
801 if (digit(2) < 0) {
802 lexError("invalid.binary.number");
803 } else {
804 scanNumber(2);
805 }
806 } else {
807 putChar('0');
808 if (ch == '_') {
809 int savePos = bp;
810 do {
811 scanChar();
812 } while (ch == '_');
813 if (digit(10) < 0) {
814 lexError(savePos, "illegal.underscore");
815 }
816 }
817 scanNumber(8);
818 }
819 return;
820 case '1': case '2': case '3': case '4':
821 case '5': case '6': case '7': case '8': case '9':
822 scanNumber(10);
823 return;
824 case '.':
825 scanChar();
826 if ('0' <= ch && ch <= '9') {
827 putChar('.');
828 scanFractionAndSuffix();
829 } else if (ch == '.') {
830 putChar('.'); putChar('.');
831 scanChar();
832 if (ch == '.') {
833 scanChar();
834 putChar('.');
835 token = ELLIPSIS;
836 } else {
837 lexError("malformed.fp.lit");
838 }
839 } else {
840 token = DOT;
841 }
842 return;
843 case ',':
844 scanChar(); token = COMMA; return;
845 case ';':
846 scanChar(); token = SEMI; return;
847 case '(':
848 scanChar(); token = LPAREN; return;
849 case ')':
850 scanChar(); token = RPAREN; return;
851 case '[':
852 scanChar(); token = LBRACKET; return;
853 case ']':
854 scanChar(); token = RBRACKET; return;
855 case '{':
856 scanChar(); token = LBRACE; return;
857 case '}':
858 scanChar(); token = RBRACE; return;
859 case '/':
860 scanChar();
861 if (ch == '/') {
862 do {
863 scanCommentChar();
864 } while (ch != CR && ch != LF && bp < buflen);
865 if (bp < buflen) {
866 endPos = bp;
867 processComment(CommentStyle.LINE);
868 }
869 break;
870 } else if (ch == '*') {
871 scanChar();
872 CommentStyle style;
873 if (ch == '*') {
874 style = CommentStyle.JAVADOC;
875 scanDocComment();
876 } else {
877 style = CommentStyle.BLOCK;
878 while (bp < buflen) {
879 if (ch == '*') {
880 scanChar();
881 if (ch == '/') break;
882 } else {
883 scanCommentChar();
884 }
885 }
886 }
887 if (ch == '/') {
888 scanChar();
889 endPos = bp;
890 processComment(style);
891 break;
892 } else {
893 lexError("unclosed.comment");
894 return;
895 }
896 } else if (ch == '=') {
897 name = names.slashequals;
898 token = SLASHEQ;
899 scanChar();
900 } else {
901 name = names.slash;
902 token = SLASH;
903 }
904 return;
905 case '\'':
906 scanChar();
907 if (ch == '\'') {
908 lexError("empty.char.lit");
909 } else {
910 if (ch == CR || ch == LF)
911 lexError(pos, "illegal.line.end.in.char.lit");
912 scanLitChar();
913 if (ch == '\'') {
914 scanChar();
915 token = CHARLITERAL;
916 } else {
917 lexError(pos, "unclosed.char.lit");
918 }
919 }
920 return;
921 case '\"':
922 scanChar();
923 while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
924 scanLitChar();
925 if (ch == '\"') {
926 token = STRINGLITERAL;
927 scanChar();
928 } else {
929 lexError(pos, "unclosed.str.lit");
930 }
931 return;
932 default:
933 if (isSpecial(ch)) {
934 scanOperator();
935 } else {
936 boolean isJavaIdentifierStart;
937 if (ch < '\u0080') {
938 // all ASCII range chars already handled, above
939 isJavaIdentifierStart = false;
940 } else {
941 char high = scanSurrogates();
942 if (high != 0) {
943 if (sp == sbuf.length) {
944 putChar(high);
945 } else {
946 sbuf[sp++] = high;
947 }
948
949 isJavaIdentifierStart = Character.isJavaIdentifierStart(
950 Character.toCodePoint(high, ch));
951 } else {
952 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
953 }
954 }
955 if (isJavaIdentifierStart) {
956 scanIdent();
957 } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
958 token = EOF;
959 pos = bp = eofPos;
960 } else {
961 lexError("illegal.char", String.valueOf((int)ch));
962 scanChar();
963 }
964 }
965 return;
966 }
967 }
968 } finally {
969 endPos = bp;
970 if (scannerDebug)
971 System.out.println("nextToken(" + pos
972 + "," + endPos + ")=|" +
973 new String(getRawCharacters(pos, endPos))
974 + "|");
975 }
976 }
977
978 /** Return the current token, set by nextToken().
979 */
980 public Token token() { 82 public Token token() {
981 return token; 83 return token;
982 } 84 }
983 85
984 /** Sets the current token. 86 public Token prevToken() {
985 * This method is primarily used to update the token stream when the 87 return prevToken;
986 * parser is handling the end of nested type arguments such as
987 * {@code List<List<String>>} and needs to disambiguate between
988 * repeated use of ">" and relation operators such as ">>" and ">>>". Noting
989 * that this does not handle arbitrary tokens containing Unicode escape
990 * sequences.
991 */
992 public void token(Token token) {
993 pos += this.token.name.length() - token.name.length();
994 prevEndPos = pos;
995 this.token = token;
996 } 88 }
997 89
998 /** Return the current token's position: a 0-based 90 public void nextToken() {
999 * offset from beginning of the raw input stream 91 prevToken = token;
1000 * (before unicode translation) 92 token = tokenizer.readToken();
1001 */
1002 public int pos() {
1003 return pos;
1004 } 93 }
1005 94
1006 /** Return the last character position of the current token. 95 public Token split() {
1007 */ 96 Token[] splitTokens = token.split(tokens);
1008 public int endPos() { 97 prevToken = splitTokens[0];
1009 return endPos; 98 token = splitTokens[1];
99 return token;
1010 } 100 }
1011 101
1012 /** Return the last character position of the previous token. 102 public LineMap getLineMap() {
1013 */ 103 return tokenizer.getLineMap();
1014 public int prevEndPos() {
1015 return prevEndPos;
1016 } 104 }
1017 105
1018 /** Return the position where a lexical error occurred;
1019 */
1020 public int errPos() { 106 public int errPos() {
1021 return errPos; 107 return tokenizer.errPos();
1022 } 108 }
1023 109
1024 /** Set the position where a lexical error occurred;
1025 */
1026 public void errPos(int pos) { 110 public void errPos(int pos) {
1027 errPos = pos; 111 tokenizer.errPos(pos);
1028 } 112 }
1029
1030 /** Return the name of an identifier or token for the current token.
1031 */
1032 public Name name() {
1033 return name;
1034 }
1035
1036 /** Return the radix of a numeric literal token.
1037 */
1038 public int radix() {
1039 return radix;
1040 }
1041
1042 /** Has a @deprecated been encountered in last doc comment?
1043 * This needs to be reset by client with resetDeprecatedFlag.
1044 */
1045 public boolean deprecatedFlag() {
1046 return deprecatedFlag;
1047 }
1048
1049 public void resetDeprecatedFlag() {
1050 deprecatedFlag = false;
1051 }
1052
1053 /**
1054 * Returns the documentation string of the current token.
1055 */
1056 public String docComment() {
1057 return null;
1058 }
1059
1060 /**
1061 * Returns a copy of the input buffer, up to its inputLength.
1062 * Unicode escape sequences are not translated.
1063 */
1064 public char[] getRawCharacters() {
1065 char[] chars = new char[buflen];
1066 System.arraycopy(buf, 0, chars, 0, buflen);
1067 return chars;
1068 }
1069
1070 /**
1071 * Returns a copy of a character array subset of the input buffer.
1072 * The returned array begins at the <code>beginIndex</code> and
1073 * extends to the character at index <code>endIndex - 1</code>.
1074 * Thus the length of the substring is <code>endIndex-beginIndex</code>.
1075 * This behavior is like
1076 * <code>String.substring(beginIndex, endIndex)</code>.
1077 * Unicode escape sequences are not translated.
1078 *
1079 * @param beginIndex the beginning index, inclusive.
1080 * @param endIndex the ending index, exclusive.
1081 * @throws IndexOutOfBounds if either offset is outside of the
1082 * array bounds
1083 */
1084 public char[] getRawCharacters(int beginIndex, int endIndex) {
1085 int length = endIndex - beginIndex;
1086 char[] chars = new char[length];
1087 System.arraycopy(buf, beginIndex, chars, 0, length);
1088 return chars;
1089 }
1090
1091 public enum CommentStyle {
1092 LINE,
1093 BLOCK,
1094 JAVADOC,
1095 }
1096
1097 /**
1098 * Called when a complete comment has been scanned. pos and endPos
1099 * will mark the comment boundary.
1100 */
1101 protected void processComment(CommentStyle style) {
1102 if (scannerDebug)
1103 System.out.println("processComment(" + pos
1104 + "," + endPos + "," + style + ")=|"
1105 + new String(getRawCharacters(pos, endPos))
1106 + "|");
1107 }
1108
1109 /**
1110 * Called when a complete whitespace run has been scanned. pos and endPos
1111 * will mark the whitespace boundary.
1112 */
1113 protected void processWhiteSpace() {
1114 if (scannerDebug)
1115 System.out.println("processWhitespace(" + pos
1116 + "," + endPos + ")=|" +
1117 new String(getRawCharacters(pos, endPos))
1118 + "|");
1119 }
1120
1121 /**
1122 * Called when a line terminator has been processed.
1123 */
1124 protected void processLineTerminator() {
1125 if (scannerDebug)
1126 System.out.println("processTerminator(" + pos
1127 + "," + endPos + ")=|" +
1128 new String(getRawCharacters(pos, endPos))
1129 + "|");
1130 }
1131
1132 /** Build a map for translating between line numbers and
1133 * positions in the input.
1134 *
1135 * @return a LineMap */
1136 public Position.LineMap getLineMap() {
1137 return Position.makeLineMap(buf, buflen, false);
1138 }
1139
1140 } 113 }

mercurial