src/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java

changeset 1125
56830d5cb5bb
parent 1113
d346ab55031b
child 1145
3343b22e2761
equal deleted inserted replaced
1124:9e2eb4bc49eb 1125:56830d5cb5bb
23 * questions. 23 * questions.
24 */ 24 */
25 25
26 package com.sun.tools.javac.parser; 26 package com.sun.tools.javac.parser;
27 27
28 import com.sun.tools.javac.code.Source;
29 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
30 import com.sun.tools.javac.util.*;
31
28 import java.nio.CharBuffer; 32 import java.nio.CharBuffer;
29 import com.sun.tools.javac.code.Source;
30 import com.sun.tools.javac.util.*;
31
32 33
33 import static com.sun.tools.javac.parser.Tokens.*; 34 import static com.sun.tools.javac.parser.Tokens.*;
34 import static com.sun.tools.javac.util.LayoutCharacters.*; 35 import static com.sun.tools.javac.util.LayoutCharacters.*;
35 36
36 /** The lexical analyzer maps an input stream consisting of 37 /** The lexical analyzer maps an input stream consisting of
63 64
64 /** The log to be used for error reporting. 65 /** The log to be used for error reporting.
65 */ 66 */
66 private final Log log; 67 private final Log log;
67 68
68 /** The name table. */
69 private final Names names;
70
71 /** The token factory. */ 69 /** The token factory. */
72 private final Tokens tokens; 70 private final Tokens tokens;
73 71
74 /** The token kind, set by nextToken(). 72 /** The token kind, set by nextToken().
75 */ 73 */
85 83
86 /** The position where a lexical error occurred; 84 /** The position where a lexical error occurred;
87 */ 85 */
88 protected int errPos = Position.NOPOS; 86 protected int errPos = Position.NOPOS;
89 87
90 /** Has a @deprecated been encountered in last doc comment? 88 /** The Unicode reader (low-level stream reader).
91 * this needs to be reset by client. 89 */
92 */
93 protected boolean deprecatedFlag = false;
94
95 /** A character buffer for saved chars.
96 */
97 protected char[] sbuf = new char[128];
98 protected int sp;
99
100 protected UnicodeReader reader; 90 protected UnicodeReader reader;
91
92 protected ScannerFactory fac;
101 93
102 private static final boolean hexFloatsWork = hexFloatsWork(); 94 private static final boolean hexFloatsWork = hexFloatsWork();
103 private static boolean hexFloatsWork() { 95 private static boolean hexFloatsWork() {
104 try { 96 try {
105 Float.valueOf("0x1.0p1"); 97 Float.valueOf("0x1.0p1");
127 protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) { 119 protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
128 this(fac, new UnicodeReader(fac, buf, inputLength)); 120 this(fac, new UnicodeReader(fac, buf, inputLength));
129 } 121 }
130 122
131 protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) { 123 protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
132 log = fac.log; 124 this.fac = fac;
133 names = fac.names; 125 this.log = fac.log;
134 tokens = fac.tokens; 126 this.tokens = fac.tokens;
135 source = fac.source; 127 this.source = fac.source;
136 this.reader = reader; 128 this.reader = reader;
137 allowBinaryLiterals = source.allowBinaryLiterals(); 129 this.allowBinaryLiterals = source.allowBinaryLiterals();
138 allowHexFloats = source.allowHexFloats(); 130 this.allowHexFloats = source.allowHexFloats();
139 allowUnderscoresInLiterals = source.allowUnderscoresInLiterals(); 131 this.allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();
140 } 132 }
141 133
142 /** Report an error at the given position using the provided arguments. 134 /** Report an error at the given position using the provided arguments.
143 */ 135 */
144 protected void lexError(int pos, String key, Object... args) { 136 protected void lexError(int pos, String key, Object... args) {
145 log.error(pos, key, args); 137 log.error(pos, key, args);
146 tk = TokenKind.ERROR; 138 tk = TokenKind.ERROR;
147 errPos = pos; 139 errPos = pos;
148 } 140 }
149 141
150 /** Read next character in comment, skipping over double '\' characters.
151 */
152 protected void scanCommentChar() {
153 reader.scanChar();
154 if (reader.ch == '\\') {
155 if (reader.peekChar() == '\\' && !reader.isUnicode()) {
156 reader.skipChar();
157 } else {
158 reader.convertUnicode();
159 }
160 }
161 }
162
163 /** Append a character to sbuf.
164 */
165 private void putChar(char ch) {
166 if (sp == sbuf.length) {
167 char[] newsbuf = new char[sbuf.length * 2];
168 System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
169 sbuf = newsbuf;
170 }
171 sbuf[sp++] = ch;
172 }
173
174 /** Read next character in character or string literal and copy into sbuf. 142 /** Read next character in character or string literal and copy into sbuf.
175 */ 143 */
176 private void scanLitChar(int pos) { 144 private void scanLitChar(int pos) {
177 if (reader.ch == '\\') { 145 if (reader.ch == '\\') {
178 if (reader.peekChar() == '\\' && !reader.isUnicode()) { 146 if (reader.peekChar() == '\\' && !reader.isUnicode()) {
179 reader.skipChar(); 147 reader.skipChar();
180 putChar('\\'); 148 reader.putChar('\\', true);
181 reader.scanChar();
182 } else { 149 } else {
183 reader.scanChar(); 150 reader.scanChar();
184 switch (reader.ch) { 151 switch (reader.ch) {
185 case '0': case '1': case '2': case '3': 152 case '0': case '1': case '2': case '3':
186 case '4': case '5': case '6': case '7': 153 case '4': case '5': case '6': case '7':
193 if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') { 160 if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
194 oct = oct * 8 + reader.digit(pos, 8); 161 oct = oct * 8 + reader.digit(pos, 8);
195 reader.scanChar(); 162 reader.scanChar();
196 } 163 }
197 } 164 }
198 putChar((char)oct); 165 reader.putChar((char)oct);
199 break; 166 break;
200 case 'b': 167 case 'b':
201 putChar('\b'); reader.scanChar(); break; 168 reader.putChar('\b', true); break;
202 case 't': 169 case 't':
203 putChar('\t'); reader.scanChar(); break; 170 reader.putChar('\t', true); break;
204 case 'n': 171 case 'n':
205 putChar('\n'); reader.scanChar(); break; 172 reader.putChar('\n', true); break;
206 case 'f': 173 case 'f':
207 putChar('\f'); reader.scanChar(); break; 174 reader.putChar('\f', true); break;
208 case 'r': 175 case 'r':
209 putChar('\r'); reader.scanChar(); break; 176 reader.putChar('\r', true); break;
210 case '\'': 177 case '\'':
211 putChar('\''); reader.scanChar(); break; 178 reader.putChar('\'', true); break;
212 case '\"': 179 case '\"':
213 putChar('\"'); reader.scanChar(); break; 180 reader.putChar('\"', true); break;
214 case '\\': 181 case '\\':
215 putChar('\\'); reader.scanChar(); break; 182 reader.putChar('\\', true); break;
216 default: 183 default:
217 lexError(reader.bp, "illegal.esc.char"); 184 lexError(reader.bp, "illegal.esc.char");
218 } 185 }
219 } 186 }
220 } else if (reader.bp != reader.buflen) { 187 } else if (reader.bp != reader.buflen) {
221 putChar(reader.ch); reader.scanChar(); 188 reader.putChar(true);
222 } 189 }
223 } 190 }
224 191
225 private void scanDigits(int pos, int digitRadix) { 192 private void scanDigits(int pos, int digitRadix) {
226 char saveCh; 193 char saveCh;
227 int savePos; 194 int savePos;
228 do { 195 do {
229 if (reader.ch != '_') { 196 if (reader.ch != '_') {
230 putChar(reader.ch); 197 reader.putChar(false);
231 } else { 198 } else {
232 if (!allowUnderscoresInLiterals) { 199 if (!allowUnderscoresInLiterals) {
233 lexError(pos, "unsupported.underscore.lit", source.name); 200 lexError(pos, "unsupported.underscore.lit", source.name);
234 allowUnderscoresInLiterals = true; 201 allowUnderscoresInLiterals = true;
235 } 202 }
244 211
245 /** Read fractional part of hexadecimal floating point number. 212 /** Read fractional part of hexadecimal floating point number.
246 */ 213 */
247 private void scanHexExponentAndSuffix(int pos) { 214 private void scanHexExponentAndSuffix(int pos) {
248 if (reader.ch == 'p' || reader.ch == 'P') { 215 if (reader.ch == 'p' || reader.ch == 'P') {
249 putChar(reader.ch); 216 reader.putChar(true);
250 reader.scanChar();
251 skipIllegalUnderscores(); 217 skipIllegalUnderscores();
252 if (reader.ch == '+' || reader.ch == '-') { 218 if (reader.ch == '+' || reader.ch == '-') {
253 putChar(reader.ch); 219 reader.putChar(true);
254 reader.scanChar();
255 } 220 }
256 skipIllegalUnderscores(); 221 skipIllegalUnderscores();
257 if ('0' <= reader.ch && reader.ch <= '9') { 222 if ('0' <= reader.ch && reader.ch <= '9') {
258 scanDigits(pos, 10); 223 scanDigits(pos, 10);
259 if (!allowHexFloats) { 224 if (!allowHexFloats) {
266 lexError(pos, "malformed.fp.lit"); 231 lexError(pos, "malformed.fp.lit");
267 } else { 232 } else {
268 lexError(pos, "malformed.fp.lit"); 233 lexError(pos, "malformed.fp.lit");
269 } 234 }
270 if (reader.ch == 'f' || reader.ch == 'F') { 235 if (reader.ch == 'f' || reader.ch == 'F') {
271 putChar(reader.ch); 236 reader.putChar(true);
272 reader.scanChar();
273 tk = TokenKind.FLOATLITERAL; 237 tk = TokenKind.FLOATLITERAL;
274 radix = 16; 238 radix = 16;
275 } else { 239 } else {
276 if (reader.ch == 'd' || reader.ch == 'D') { 240 if (reader.ch == 'd' || reader.ch == 'D') {
277 putChar(reader.ch); 241 reader.putChar(true);
278 reader.scanChar();
279 } 242 }
280 tk = TokenKind.DOUBLELITERAL; 243 tk = TokenKind.DOUBLELITERAL;
281 radix = 16; 244 radix = 16;
282 } 245 }
283 } 246 }
287 private void scanFraction(int pos) { 250 private void scanFraction(int pos) {
288 skipIllegalUnderscores(); 251 skipIllegalUnderscores();
289 if ('0' <= reader.ch && reader.ch <= '9') { 252 if ('0' <= reader.ch && reader.ch <= '9') {
290 scanDigits(pos, 10); 253 scanDigits(pos, 10);
291 } 254 }
292 int sp1 = sp; 255 int sp1 = reader.sp;
293 if (reader.ch == 'e' || reader.ch == 'E') { 256 if (reader.ch == 'e' || reader.ch == 'E') {
294 putChar(reader.ch); 257 reader.putChar(true);
295 reader.scanChar();
296 skipIllegalUnderscores(); 258 skipIllegalUnderscores();
297 if (reader.ch == '+' || reader.ch == '-') { 259 if (reader.ch == '+' || reader.ch == '-') {
298 putChar(reader.ch); 260 reader.putChar(true);
299 reader.scanChar();
300 } 261 }
301 skipIllegalUnderscores(); 262 skipIllegalUnderscores();
302 if ('0' <= reader.ch && reader.ch <= '9') { 263 if ('0' <= reader.ch && reader.ch <= '9') {
303 scanDigits(pos, 10); 264 scanDigits(pos, 10);
304 return; 265 return;
305 } 266 }
306 lexError(pos, "malformed.fp.lit"); 267 lexError(pos, "malformed.fp.lit");
307 sp = sp1; 268 reader.sp = sp1;
308 } 269 }
309 } 270 }
310 271
311 /** Read fractional part and 'd' or 'f' suffix of floating point number. 272 /** Read fractional part and 'd' or 'f' suffix of floating point number.
312 */ 273 */
313 private void scanFractionAndSuffix(int pos) { 274 private void scanFractionAndSuffix(int pos) {
314 radix = 10; 275 radix = 10;
315 scanFraction(pos); 276 scanFraction(pos);
316 if (reader.ch == 'f' || reader.ch == 'F') { 277 if (reader.ch == 'f' || reader.ch == 'F') {
317 putChar(reader.ch); 278 reader.putChar(true);
318 reader.scanChar();
319 tk = TokenKind.FLOATLITERAL; 279 tk = TokenKind.FLOATLITERAL;
320 } else { 280 } else {
321 if (reader.ch == 'd' || reader.ch == 'D') { 281 if (reader.ch == 'd' || reader.ch == 'D') {
322 putChar(reader.ch); 282 reader.putChar(true);
323 reader.scanChar();
324 } 283 }
325 tk = TokenKind.DOUBLELITERAL; 284 tk = TokenKind.DOUBLELITERAL;
326 } 285 }
327 } 286 }
328 287
329 /** Read fractional part and 'd' or 'f' suffix of floating point number. 288 /** Read fractional part and 'd' or 'f' suffix of floating point number.
330 */ 289 */
331 private void scanHexFractionAndSuffix(int pos, boolean seendigit) { 290 private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
332 radix = 16; 291 radix = 16;
333 Assert.check(reader.ch == '.'); 292 Assert.check(reader.ch == '.');
334 putChar(reader.ch); 293 reader.putChar(true);
335 reader.scanChar();
336 skipIllegalUnderscores(); 294 skipIllegalUnderscores();
337 if (reader.digit(pos, 16) >= 0) { 295 if (reader.digit(pos, 16) >= 0) {
338 seendigit = true; 296 seendigit = true;
339 scanDigits(pos, 16); 297 scanDigits(pos, 16);
340 } 298 }
367 if (radix == 16 && reader.ch == '.') { 325 if (radix == 16 && reader.ch == '.') {
368 scanHexFractionAndSuffix(pos, seendigit); 326 scanHexFractionAndSuffix(pos, seendigit);
369 } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) { 327 } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
370 scanHexExponentAndSuffix(pos); 328 scanHexExponentAndSuffix(pos);
371 } else if (digitRadix == 10 && reader.ch == '.') { 329 } else if (digitRadix == 10 && reader.ch == '.') {
372 putChar(reader.ch); 330 reader.putChar(true);
373 reader.scanChar();
374 scanFractionAndSuffix(pos); 331 scanFractionAndSuffix(pos);
375 } else if (digitRadix == 10 && 332 } else if (digitRadix == 10 &&
376 (reader.ch == 'e' || reader.ch == 'E' || 333 (reader.ch == 'e' || reader.ch == 'E' ||
377 reader.ch == 'f' || reader.ch == 'F' || 334 reader.ch == 'f' || reader.ch == 'F' ||
378 reader.ch == 'd' || reader.ch == 'D')) { 335 reader.ch == 'd' || reader.ch == 'D')) {
391 */ 348 */
392 private void scanIdent() { 349 private void scanIdent() {
393 boolean isJavaIdentifierPart; 350 boolean isJavaIdentifierPart;
394 char high; 351 char high;
395 do { 352 do {
396 if (sp == sbuf.length) putChar(reader.ch); else sbuf[sp++] = reader.ch; 353 reader.putChar(true);
397 // optimization, was: putChar(reader.ch);
398
399 reader.scanChar();
400 switch (reader.ch) { 354 switch (reader.ch) {
401 case 'A': case 'B': case 'C': case 'D': case 'E': 355 case 'A': case 'B': case 'C': case 'D': case 'E':
402 case 'F': case 'G': case 'H': case 'I': case 'J': 356 case 'F': case 'G': case 'H': case 'I': case 'J':
403 case 'K': case 'L': case 'M': case 'N': case 'O': 357 case 'K': case 'L': case 'M': case 'N': case 'O':
404 case 'P': case 'Q': case 'R': case 'S': case 'T': 358 case 'P': case 'Q': case 'R': case 'S': case 'T':
421 case '\u0018': case '\u0019': case '\u001B': 375 case '\u0018': case '\u0019': case '\u001B':
422 case '\u007F': 376 case '\u007F':
423 break; 377 break;
424 case '\u001A': // EOI is also a legal identifier part 378 case '\u001A': // EOI is also a legal identifier part
425 if (reader.bp >= reader.buflen) { 379 if (reader.bp >= reader.buflen) {
426 name = names.fromChars(sbuf, 0, sp); 380 name = reader.name();
427 tk = tokens.lookupKind(name); 381 tk = tokens.lookupKind(name);
428 return; 382 return;
429 } 383 }
430 break; 384 break;
431 default: 385 default:
433 // all ASCII range chars already handled, above 387 // all ASCII range chars already handled, above
434 isJavaIdentifierPart = false; 388 isJavaIdentifierPart = false;
435 } else { 389 } else {
436 high = reader.scanSurrogates(); 390 high = reader.scanSurrogates();
437 if (high != 0) { 391 if (high != 0) {
438 if (sp == sbuf.length) { 392 reader.putChar(high);
439 putChar(high);
440 } else {
441 sbuf[sp++] = high;
442 }
443 isJavaIdentifierPart = Character.isJavaIdentifierPart( 393 isJavaIdentifierPart = Character.isJavaIdentifierPart(
444 Character.toCodePoint(high, reader.ch)); 394 Character.toCodePoint(high, reader.ch));
445 } else { 395 } else {
446 isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch); 396 isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
447 } 397 }
448 } 398 }
449 if (!isJavaIdentifierPart) { 399 if (!isJavaIdentifierPart) {
450 name = names.fromChars(sbuf, 0, sp); 400 name = reader.name();
451 tk = tokens.lookupKind(name); 401 tk = tokens.lookupKind(name);
452 return; 402 return;
453 } 403 }
454 } 404 }
455 } while (true); 405 } while (true);
472 /** Read longest possible sequence of special characters and convert 422 /** Read longest possible sequence of special characters and convert
473 * to token. 423 * to token.
474 */ 424 */
475 private void scanOperator() { 425 private void scanOperator() {
476 while (true) { 426 while (true) {
477 putChar(reader.ch); 427 reader.putChar(false);
478 Name newname = names.fromChars(sbuf, 0, sp); 428 Name newname = reader.name();
479 TokenKind tk1 = tokens.lookupKind(newname); 429 TokenKind tk1 = tokens.lookupKind(newname);
480 if (tk1 == TokenKind.IDENTIFIER) { 430 if (tk1 == TokenKind.IDENTIFIER) {
481 sp--; 431 reader.sp--;
482 break; 432 break;
483 } 433 }
484 tk = tk1; 434 tk = tk1;
485 reader.scanChar(); 435 reader.scanChar();
486 if (!isSpecial(reader.ch)) break; 436 if (!isSpecial(reader.ch)) break;
487 } 437 }
488 } 438 }
489 439
490 /**
491 * Scan a documentation comment; determine if a deprecated tag is present.
492 * Called once the initial /, * have been skipped, positioned at the second *
493 * (which is treated as the beginning of the first line).
494 * Stops positioned at the closing '/'.
495 */
496 @SuppressWarnings("fallthrough")
497 private void scanDocComment() {
498 boolean deprecatedPrefix = false;
499
500 forEachLine:
501 while (reader.bp < reader.buflen) {
502
503 // Skip optional WhiteSpace at beginning of line
504 while (reader.bp < reader.buflen && (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF)) {
505 scanCommentChar();
506 }
507
508 // Skip optional consecutive Stars
509 while (reader.bp < reader.buflen && reader.ch == '*') {
510 scanCommentChar();
511 if (reader.ch == '/') {
512 return;
513 }
514 }
515
516 // Skip optional WhiteSpace after Stars
517 while (reader.bp < reader.buflen && (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF)) {
518 scanCommentChar();
519 }
520
521 deprecatedPrefix = false;
522 // At beginning of line in the JavaDoc sense.
523 if (reader.bp < reader.buflen && reader.ch == '@' && !deprecatedFlag) {
524 scanCommentChar();
525 if (reader.bp < reader.buflen && reader.ch == 'd') {
526 scanCommentChar();
527 if (reader.bp < reader.buflen && reader.ch == 'e') {
528 scanCommentChar();
529 if (reader.bp < reader.buflen && reader.ch == 'p') {
530 scanCommentChar();
531 if (reader.bp < reader.buflen && reader.ch == 'r') {
532 scanCommentChar();
533 if (reader.bp < reader.buflen && reader.ch == 'e') {
534 scanCommentChar();
535 if (reader.bp < reader.buflen && reader.ch == 'c') {
536 scanCommentChar();
537 if (reader.bp < reader.buflen && reader.ch == 'a') {
538 scanCommentChar();
539 if (reader.bp < reader.buflen && reader.ch == 't') {
540 scanCommentChar();
541 if (reader.bp < reader.buflen && reader.ch == 'e') {
542 scanCommentChar();
543 if (reader.bp < reader.buflen && reader.ch == 'd') {
544 deprecatedPrefix = true;
545 scanCommentChar();
546 }}}}}}}}}}}
547 if (deprecatedPrefix && reader.bp < reader.buflen) {
548 if (Character.isWhitespace(reader.ch)) {
549 deprecatedFlag = true;
550 } else if (reader.ch == '*') {
551 scanCommentChar();
552 if (reader.ch == '/') {
553 deprecatedFlag = true;
554 return;
555 }
556 }
557 }
558
559 // Skip rest of line
560 while (reader.bp < reader.buflen) {
561 switch (reader.ch) {
562 case '*':
563 scanCommentChar();
564 if (reader.ch == '/') {
565 return;
566 }
567 break;
568 case CR: // (Spec 3.4)
569 scanCommentChar();
570 if (reader.ch != LF) {
571 continue forEachLine;
572 }
573 /* fall through to LF case */
574 case LF: // (Spec 3.4)
575 scanCommentChar();
576 continue forEachLine;
577 default:
578 scanCommentChar();
579 }
580 } // rest of line
581 } // forEachLine
582 return;
583 }
584
585 /** Read token. 440 /** Read token.
586 */ 441 */
587 public Token readToken() { 442 public Token readToken() {
588 443
589 sp = 0; 444 reader.sp = 0;
590 name = null; 445 name = null;
591 deprecatedFlag = false;
592 radix = 0; 446 radix = 0;
447
593 int pos = 0; 448 int pos = 0;
594 int endPos = 0; 449 int endPos = 0;
450 List<Comment> comments = null;
595 451
596 try { 452 try {
597 loop: while (true) { 453 loop: while (true) {
598 pos = reader.bp; 454 pos = reader.bp;
599 switch (reader.ch) { 455 switch (reader.ch) {
654 lexError(pos, "invalid.binary.number"); 510 lexError(pos, "invalid.binary.number");
655 } else { 511 } else {
656 scanNumber(pos, 2); 512 scanNumber(pos, 2);
657 } 513 }
658 } else { 514 } else {
659 putChar('0'); 515 reader.putChar('0');
660 if (reader.ch == '_') { 516 if (reader.ch == '_') {
661 int savePos = reader.bp; 517 int savePos = reader.bp;
662 do { 518 do {
663 reader.scanChar(); 519 reader.scanChar();
664 } while (reader.ch == '_'); 520 } while (reader.ch == '_');
674 scanNumber(pos, 10); 530 scanNumber(pos, 10);
675 break loop; 531 break loop;
676 case '.': 532 case '.':
677 reader.scanChar(); 533 reader.scanChar();
678 if ('0' <= reader.ch && reader.ch <= '9') { 534 if ('0' <= reader.ch && reader.ch <= '9') {
679 putChar('.'); 535 reader.putChar('.');
680 scanFractionAndSuffix(pos); 536 scanFractionAndSuffix(pos);
681 } else if (reader.ch == '.') { 537 } else if (reader.ch == '.') {
682 putChar('.'); putChar('.'); 538 reader.putChar('.'); reader.putChar('.', true);
683 reader.scanChar();
684 if (reader.ch == '.') { 539 if (reader.ch == '.') {
685 reader.scanChar(); 540 reader.scanChar();
686 putChar('.'); 541 reader.putChar('.');
687 tk = TokenKind.ELLIPSIS; 542 tk = TokenKind.ELLIPSIS;
688 } else { 543 } else {
689 lexError(pos, "malformed.fp.lit"); 544 lexError(pos, "malformed.fp.lit");
690 } 545 }
691 } else { 546 } else {
710 reader.scanChar(); tk = TokenKind.RBRACE; break loop; 565 reader.scanChar(); tk = TokenKind.RBRACE; break loop;
711 case '/': 566 case '/':
712 reader.scanChar(); 567 reader.scanChar();
713 if (reader.ch == '/') { 568 if (reader.ch == '/') {
714 do { 569 do {
715 scanCommentChar(); 570 reader.scanCommentChar();
716 } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen); 571 } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
717 if (reader.bp < reader.buflen) { 572 if (reader.bp < reader.buflen) {
718 processComment(pos, reader.bp, CommentStyle.LINE); 573 comments = addDocReader(comments, processComment(pos, reader.bp, CommentStyle.LINE));
719 } 574 }
720 break; 575 break;
721 } else if (reader.ch == '*') { 576 } else if (reader.ch == '*') {
577 boolean isEmpty = false;
722 reader.scanChar(); 578 reader.scanChar();
723 CommentStyle style; 579 CommentStyle style;
724 if (reader.ch == '*') { 580 if (reader.ch == '*') {
725 style = CommentStyle.JAVADOC; 581 style = CommentStyle.JAVADOC;
726 scanDocComment(); 582 reader.scanCommentChar();
583 if (reader.ch == '/') {
584 isEmpty = true;
585 }
727 } else { 586 } else {
728 style = CommentStyle.BLOCK; 587 style = CommentStyle.BLOCK;
729 while (reader.bp < reader.buflen) { 588 }
730 if (reader.ch == '*') { 589 while (!isEmpty && reader.bp < reader.buflen) {
731 reader.scanChar(); 590 if (reader.ch == '*') {
732 if (reader.ch == '/') break; 591 reader.scanChar();
733 } else { 592 if (reader.ch == '/') break;
734 scanCommentChar(); 593 } else {
735 } 594 reader.scanCommentChar();
736 } 595 }
737 } 596 }
738 if (reader.ch == '/') { 597 if (reader.ch == '/') {
739 reader.scanChar(); 598 reader.scanChar();
740 processComment(pos, reader.bp, style); 599 comments = addDocReader(comments, processComment(pos, reader.bp, style));
741 break; 600 break;
742 } else { 601 } else {
743 lexError(pos, "unclosed.comment"); 602 lexError(pos, "unclosed.comment");
744 break loop; 603 break loop;
745 } 604 }
787 // all ASCII range chars already handled, above 646 // all ASCII range chars already handled, above
788 isJavaIdentifierStart = false; 647 isJavaIdentifierStart = false;
789 } else { 648 } else {
790 char high = reader.scanSurrogates(); 649 char high = reader.scanSurrogates();
791 if (high != 0) { 650 if (high != 0) {
792 if (sp == sbuf.length) { 651 reader.putChar(high);
793 putChar(high);
794 } else {
795 sbuf[sp++] = high;
796 }
797 652
798 isJavaIdentifierStart = Character.isJavaIdentifierStart( 653 isJavaIdentifierStart = Character.isJavaIdentifierStart(
799 Character.toCodePoint(high, reader.ch)); 654 Character.toCodePoint(high, reader.ch));
800 } else { 655 } else {
801 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch); 656 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
814 break loop; 669 break loop;
815 } 670 }
816 } 671 }
817 endPos = reader.bp; 672 endPos = reader.bp;
818 switch (tk.tag) { 673 switch (tk.tag) {
819 case DEFAULT: return new Token(tk, pos, endPos, deprecatedFlag); 674 case DEFAULT: return new Token(tk, pos, endPos, comments);
820 case NAMED: return new NamedToken(tk, pos, endPos, name, deprecatedFlag); 675 case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
821 case STRING: return new StringToken(tk, pos, endPos, new String(sbuf, 0, sp), deprecatedFlag); 676 case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments);
822 case NUMERIC: return new NumericToken(tk, pos, endPos, new String(sbuf, 0, sp), radix, deprecatedFlag); 677 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
823 default: throw new AssertionError(); 678 default: throw new AssertionError();
824 } 679 }
825 } 680 }
826 finally { 681 finally {
827 if (scannerDebug) { 682 if (scannerDebug) {
830 new String(reader.getRawCharacters(pos, endPos)) 685 new String(reader.getRawCharacters(pos, endPos))
831 + "|"); 686 + "|");
832 } 687 }
833 } 688 }
834 } 689 }
690 //where
691 List<Comment> addDocReader(List<Comment> docReaders, Comment docReader) {
692 return docReaders == null ?
693 List.of(docReader) :
694 docReaders.prepend(docReader);
695 }
835 696
836 /** Return the position where a lexical error occurred; 697 /** Return the position where a lexical error occurred;
837 */ 698 */
838 public int errPos() { 699 public int errPos() {
839 return errPos; 700 return errPos;
841 702
842 /** Set the position where a lexical error occurred; 703 /** Set the position where a lexical error occurred;
843 */ 704 */
844 public void errPos(int pos) { 705 public void errPos(int pos) {
845 errPos = pos; 706 errPos = pos;
846 }
847
848 public enum CommentStyle {
849 LINE,
850 BLOCK,
851 JAVADOC,
852 } 707 }
853 708
854 /** 709 /**
855 * Called when a complete comment has been scanned. pos and endPos 710 * Called when a complete comment has been scanned. pos and endPos
856 * will mark the comment boundary. 711 * will mark the comment boundary.
857 */ 712 */
858 protected void processComment(int pos, int endPos, CommentStyle style) { 713 protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
859 if (scannerDebug) 714 if (scannerDebug)
860 System.out.println("processComment(" + pos 715 System.out.println("processComment(" + pos
861 + "," + endPos + "," + style + ")=|" 716 + "," + endPos + "," + style + ")=|"
862 + new String(reader.getRawCharacters(pos, endPos)) 717 + new String(reader.getRawCharacters(pos, endPos))
863 + "|"); 718 + "|");
719 char[] buf = reader.getRawCharacters(pos, endPos);
720 return new BasicComment<UnicodeReader>(new UnicodeReader(fac, buf, buf.length), style);
864 } 721 }
865 722
866 /** 723 /**
867 * Called when a complete whitespace run has been scanned. pos and endPos 724 * Called when a complete whitespace run has been scanned. pos and endPos
868 * will mark the whitespace boundary. 725 * will mark the whitespace boundary.
891 * 748 *
892 * @return a LineMap */ 749 * @return a LineMap */
893 public Position.LineMap getLineMap() { 750 public Position.LineMap getLineMap() {
894 return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false); 751 return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
895 } 752 }
753
754
755 /**
756 * Scan a documentation comment; determine if a deprecated tag is present.
757 * Called once the initial /, * have been skipped, positioned at the second *
758 * (which is treated as the beginning of the first line).
759 * Stops positioned at the closing '/'.
760 */
761 protected class BasicComment<U extends UnicodeReader> implements Comment {
762
763 CommentStyle cs;
764 U comment_reader;
765
766 protected boolean deprecatedFlag = false;
767 protected boolean scanned = false;
768
769 protected BasicComment(U comment_reader, CommentStyle cs) {
770 this.comment_reader = comment_reader;
771 this.cs = cs;
772 }
773
774 public String getText() {
775 return null;
776 }
777
778 public CommentStyle getStyle() {
779 return cs;
780 }
781
782 public boolean isDeprecated() {
783 if (!scanned && cs == CommentStyle.JAVADOC) {
784 scanDocComment();
785 }
786 return deprecatedFlag;
787 }
788
789 @SuppressWarnings("fallthrough")
790 protected void scanDocComment() {
791 try {
792 boolean deprecatedPrefix = false;
793
794 comment_reader.bp += 3; // '/**'
795 comment_reader.ch = comment_reader.buf[comment_reader.bp];
796
797 forEachLine:
798 while (comment_reader.bp < comment_reader.buflen) {
799
800 // Skip optional WhiteSpace at beginning of line
801 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
802 comment_reader.scanCommentChar();
803 }
804
805 // Skip optional consecutive Stars
806 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
807 comment_reader.scanCommentChar();
808 if (comment_reader.ch == '/') {
809 return;
810 }
811 }
812
813 // Skip optional WhiteSpace after Stars
814 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
815 comment_reader.scanCommentChar();
816 }
817
818 deprecatedPrefix = false;
819 // At beginning of line in the JavaDoc sense.
820 if (!deprecatedFlag) {
821 String deprecated = "@deprecated";
822 int i = 0;
823 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
824 comment_reader.scanCommentChar();
825 i++;
826 if (i == deprecated.length()) {
827 deprecatedPrefix = true;
828 break;
829 }
830 }
831 }
832
833 if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
834 if (Character.isWhitespace(comment_reader.ch)) {
835 deprecatedFlag = true;
836 } else if (comment_reader.ch == '*') {
837 comment_reader.scanCommentChar();
838 if (comment_reader.ch == '/') {
839 deprecatedFlag = true;
840 return;
841 }
842 }
843 }
844
845 // Skip rest of line
846 while (comment_reader.bp < comment_reader.buflen) {
847 switch (comment_reader.ch) {
848 case '*':
849 comment_reader.scanCommentChar();
850 if (comment_reader.ch == '/') {
851 return;
852 }
853 break;
854 case CR: // (Spec 3.4)
855 comment_reader.scanCommentChar();
856 if (comment_reader.ch != LF) {
857 continue forEachLine;
858 }
859 /* fall through to LF case */
860 case LF: // (Spec 3.4)
861 comment_reader.scanCommentChar();
862 continue forEachLine;
863 default:
864 comment_reader.scanCommentChar();
865 }
866 } // rest of line
867 } // forEachLine
868 return;
869 } finally {
870 scanned = true;
871 }
872 }
873 }
896 } 874 }

mercurial