src/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java

Tue, 03 Jan 2012 11:37:00 -0800

author
jjg
date
Tue, 03 Jan 2012 11:37:00 -0800
changeset 1171
7a836147b266
parent 1145
3343b22e2761
child 1280
5c0b3faeb0b0
permissions
-rw-r--r--

4881269: improve diagnostic for ill-formed tokens
Reviewed-by: mcimadamore

     1 /*
     2  * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.tools.javac.parser;
    28 import com.sun.tools.javac.code.Source;
    29 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
    30 import com.sun.tools.javac.util.*;
    32 import java.nio.CharBuffer;
    34 import static com.sun.tools.javac.parser.Tokens.*;
    35 import static com.sun.tools.javac.util.LayoutCharacters.*;
    37 /** The lexical analyzer maps an input stream consisting of
    38  *  ASCII characters and Unicode escapes into a token sequence.
    39  *
    40  *  <p><b>This is NOT part of any supported API.
    41  *  If you write code that depends on this, you do so at your own risk.
    42  *  This code and its internal interfaces are subject to change or
    43  *  deletion without notice.</b>
    44  */
    45 public class JavaTokenizer {
    47     private static boolean scannerDebug = false;
    49     /** Allow hex floating-point literals.
    50      */
    51     private boolean allowHexFloats;
    53     /** Allow binary literals.
    54      */
    55     private boolean allowBinaryLiterals;
    57     /** Allow underscores in literals.
    58      */
    59     private boolean allowUnderscoresInLiterals;
    61     /** The source language setting.
    62      */
    63     private Source source;
    65     /** The log to be used for error reporting.
    66      */
    67     private final Log log;
    69     /** The token factory. */
    70     private final Tokens tokens;
    72     /** The token kind, set by nextToken().
    73      */
    74     protected TokenKind tk;
    76     /** The token's radix, set by nextToken().
    77      */
    78     protected int radix;
    80     /** The token's name, set by nextToken().
    81      */
    82     protected Name name;
    84     /** The position where a lexical error occurred;
    85      */
    86     protected int errPos = Position.NOPOS;
    88     /** The Unicode reader (low-level stream reader).
    89      */
    90     protected UnicodeReader reader;
    92     protected ScannerFactory fac;
    94     private static final boolean hexFloatsWork = hexFloatsWork();
    95     private static boolean hexFloatsWork() {
    96         try {
    97             Float.valueOf("0x1.0p1");
    98             return true;
    99         } catch (NumberFormatException ex) {
   100             return false;
   101         }
   102     }
   104     /**
   105      * Create a scanner from the input array.  This method might
   106      * modify the array.  To avoid copying the input array, ensure
   107      * that {@code inputLength < input.length} or
   108      * {@code input[input.length -1]} is a white space character.
   109      *
   110      * @param fac the factory which created this Scanner
   111      * @param input the input, might be modified
   112      * @param inputLength the size of the input.
   113      * Must be positive and less than or equal to input.length.
   114      */
   115     protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
   116         this(fac, new UnicodeReader(fac, buf));
   117     }
   119     protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
   120         this(fac, new UnicodeReader(fac, buf, inputLength));
   121     }
   123     protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
   124         this.fac = fac;
   125         this.log = fac.log;
   126         this.tokens = fac.tokens;
   127         this.source = fac.source;
   128         this.reader = reader;
   129         this.allowBinaryLiterals = source.allowBinaryLiterals();
   130         this.allowHexFloats = source.allowHexFloats();
   131         this.allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();
   132     }
   134     /** Report an error at the given position using the provided arguments.
   135      */
   136     protected void lexError(int pos, String key, Object... args) {
   137         log.error(pos, key, args);
   138         tk = TokenKind.ERROR;
   139         errPos = pos;
   140     }
   142     /** Read next character in character or string literal and copy into sbuf.
   143      */
   144     private void scanLitChar(int pos) {
   145         if (reader.ch == '\\') {
   146             if (reader.peekChar() == '\\' && !reader.isUnicode()) {
   147                 reader.skipChar();
   148                 reader.putChar('\\', true);
   149             } else {
   150                 reader.scanChar();
   151                 switch (reader.ch) {
   152                 case '0': case '1': case '2': case '3':
   153                 case '4': case '5': case '6': case '7':
   154                     char leadch = reader.ch;
   155                     int oct = reader.digit(pos, 8);
   156                     reader.scanChar();
   157                     if ('0' <= reader.ch && reader.ch <= '7') {
   158                         oct = oct * 8 + reader.digit(pos, 8);
   159                         reader.scanChar();
   160                         if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
   161                             oct = oct * 8 + reader.digit(pos, 8);
   162                             reader.scanChar();
   163                         }
   164                     }
   165                     reader.putChar((char)oct);
   166                     break;
   167                 case 'b':
   168                     reader.putChar('\b', true); break;
   169                 case 't':
   170                     reader.putChar('\t', true); break;
   171                 case 'n':
   172                     reader.putChar('\n', true); break;
   173                 case 'f':
   174                     reader.putChar('\f', true); break;
   175                 case 'r':
   176                     reader.putChar('\r', true); break;
   177                 case '\'':
   178                     reader.putChar('\'', true); break;
   179                 case '\"':
   180                     reader.putChar('\"', true); break;
   181                 case '\\':
   182                     reader.putChar('\\', true); break;
   183                 default:
   184                     lexError(reader.bp, "illegal.esc.char");
   185                 }
   186             }
   187         } else if (reader.bp != reader.buflen) {
   188             reader.putChar(true);
   189         }
   190     }
   192     private void scanDigits(int pos, int digitRadix) {
   193         char saveCh;
   194         int savePos;
   195         do {
   196             if (reader.ch != '_') {
   197                 reader.putChar(false);
   198             } else {
   199                 if (!allowUnderscoresInLiterals) {
   200                     lexError(pos, "unsupported.underscore.lit", source.name);
   201                     allowUnderscoresInLiterals = true;
   202                 }
   203             }
   204             saveCh = reader.ch;
   205             savePos = reader.bp;
   206             reader.scanChar();
   207         } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
   208         if (saveCh == '_')
   209             lexError(savePos, "illegal.underscore");
   210     }
   212     /** Read fractional part of hexadecimal floating point number.
   213      */
   214     private void scanHexExponentAndSuffix(int pos) {
   215         if (reader.ch == 'p' || reader.ch == 'P') {
   216             reader.putChar(true);
   217             skipIllegalUnderscores();
   218             if (reader.ch == '+' || reader.ch == '-') {
   219                 reader.putChar(true);
   220             }
   221             skipIllegalUnderscores();
   222             if ('0' <= reader.ch && reader.ch <= '9') {
   223                 scanDigits(pos, 10);
   224                 if (!allowHexFloats) {
   225                     lexError(pos, "unsupported.fp.lit", source.name);
   226                     allowHexFloats = true;
   227                 }
   228                 else if (!hexFloatsWork)
   229                     lexError(pos, "unsupported.cross.fp.lit");
   230             } else
   231                 lexError(pos, "malformed.fp.lit");
   232         } else {
   233             lexError(pos, "malformed.fp.lit");
   234         }
   235         if (reader.ch == 'f' || reader.ch == 'F') {
   236             reader.putChar(true);
   237             tk = TokenKind.FLOATLITERAL;
   238             radix = 16;
   239         } else {
   240             if (reader.ch == 'd' || reader.ch == 'D') {
   241                 reader.putChar(true);
   242             }
   243             tk = TokenKind.DOUBLELITERAL;
   244             radix = 16;
   245         }
   246     }
   248     /** Read fractional part of floating point number.
   249      */
   250     private void scanFraction(int pos) {
   251         skipIllegalUnderscores();
   252         if ('0' <= reader.ch && reader.ch <= '9') {
   253             scanDigits(pos, 10);
   254         }
   255         int sp1 = reader.sp;
   256         if (reader.ch == 'e' || reader.ch == 'E') {
   257             reader.putChar(true);
   258             skipIllegalUnderscores();
   259             if (reader.ch == '+' || reader.ch == '-') {
   260                 reader.putChar(true);
   261             }
   262             skipIllegalUnderscores();
   263             if ('0' <= reader.ch && reader.ch <= '9') {
   264                 scanDigits(pos, 10);
   265                 return;
   266             }
   267             lexError(pos, "malformed.fp.lit");
   268             reader.sp = sp1;
   269         }
   270     }
   272     /** Read fractional part and 'd' or 'f' suffix of floating point number.
   273      */
   274     private void scanFractionAndSuffix(int pos) {
   275         radix = 10;
   276         scanFraction(pos);
   277         if (reader.ch == 'f' || reader.ch == 'F') {
   278             reader.putChar(true);
   279             tk = TokenKind.FLOATLITERAL;
   280         } else {
   281             if (reader.ch == 'd' || reader.ch == 'D') {
   282                 reader.putChar(true);
   283             }
   284             tk = TokenKind.DOUBLELITERAL;
   285         }
   286     }
   288     /** Read fractional part and 'd' or 'f' suffix of floating point number.
   289      */
   290     private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
   291         radix = 16;
   292         Assert.check(reader.ch == '.');
   293         reader.putChar(true);
   294         skipIllegalUnderscores();
   295         if (reader.digit(pos, 16) >= 0) {
   296             seendigit = true;
   297             scanDigits(pos, 16);
   298         }
   299         if (!seendigit)
   300             lexError(pos, "invalid.hex.number");
   301         else
   302             scanHexExponentAndSuffix(pos);
   303     }
   305     private void skipIllegalUnderscores() {
   306         if (reader.ch == '_') {
   307             lexError(reader.bp, "illegal.underscore");
   308             while (reader.ch == '_')
   309                 reader.scanChar();
   310         }
   311     }
   313     /** Read a number.
   314      *  @param radix  The radix of the number; one of 2, j8, 10, 16.
   315      */
   316     private void scanNumber(int pos, int radix) {
   317         // for octal, allow base-10 digit in case it's a float literal
   318         this.radix = radix;
   319         int digitRadix = (radix == 8 ? 10 : radix);
   320         boolean seendigit = false;
   321         if (reader.digit(pos, digitRadix) >= 0) {
   322             seendigit = true;
   323             scanDigits(pos, digitRadix);
   324         }
   325         if (radix == 16 && reader.ch == '.') {
   326             scanHexFractionAndSuffix(pos, seendigit);
   327         } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
   328             scanHexExponentAndSuffix(pos);
   329         } else if (digitRadix == 10 && reader.ch == '.') {
   330             reader.putChar(true);
   331             scanFractionAndSuffix(pos);
   332         } else if (digitRadix == 10 &&
   333                    (reader.ch == 'e' || reader.ch == 'E' ||
   334                     reader.ch == 'f' || reader.ch == 'F' ||
   335                     reader.ch == 'd' || reader.ch == 'D')) {
   336             scanFractionAndSuffix(pos);
   337         } else {
   338             if (reader.ch == 'l' || reader.ch == 'L') {
   339                 reader.scanChar();
   340                 tk = TokenKind.LONGLITERAL;
   341             } else {
   342                 tk = TokenKind.INTLITERAL;
   343             }
   344         }
   345     }
   347     /** Read an identifier.
   348      */
   349     private void scanIdent() {
   350         boolean isJavaIdentifierPart;
   351         char high;
   352         do {
   353             reader.putChar(true);
   354             switch (reader.ch) {
   355             case 'A': case 'B': case 'C': case 'D': case 'E':
   356             case 'F': case 'G': case 'H': case 'I': case 'J':
   357             case 'K': case 'L': case 'M': case 'N': case 'O':
   358             case 'P': case 'Q': case 'R': case 'S': case 'T':
   359             case 'U': case 'V': case 'W': case 'X': case 'Y':
   360             case 'Z':
   361             case 'a': case 'b': case 'c': case 'd': case 'e':
   362             case 'f': case 'g': case 'h': case 'i': case 'j':
   363             case 'k': case 'l': case 'm': case 'n': case 'o':
   364             case 'p': case 'q': case 'r': case 's': case 't':
   365             case 'u': case 'v': case 'w': case 'x': case 'y':
   366             case 'z':
   367             case '$': case '_':
   368             case '0': case '1': case '2': case '3': case '4':
   369             case '5': case '6': case '7': case '8': case '9':
   370             case '\u0000': case '\u0001': case '\u0002': case '\u0003':
   371             case '\u0004': case '\u0005': case '\u0006': case '\u0007':
   372             case '\u0008': case '\u000E': case '\u000F': case '\u0010':
   373             case '\u0011': case '\u0012': case '\u0013': case '\u0014':
   374             case '\u0015': case '\u0016': case '\u0017':
   375             case '\u0018': case '\u0019': case '\u001B':
   376             case '\u007F':
   377                 break;
   378             case '\u001A': // EOI is also a legal identifier part
   379                 if (reader.bp >= reader.buflen) {
   380                     name = reader.name();
   381                     tk = tokens.lookupKind(name);
   382                     return;
   383                 }
   384                 break;
   385             default:
   386                 if (reader.ch < '\u0080') {
   387                     // all ASCII range chars already handled, above
   388                     isJavaIdentifierPart = false;
   389                 } else {
   390                     high = reader.scanSurrogates();
   391                     if (high != 0) {
   392                         reader.putChar(high);
   393                         isJavaIdentifierPart = Character.isJavaIdentifierPart(
   394                             Character.toCodePoint(high, reader.ch));
   395                     } else {
   396                         isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
   397                     }
   398                 }
   399                 if (!isJavaIdentifierPart) {
   400                     name = reader.name();
   401                     tk = tokens.lookupKind(name);
   402                     return;
   403                 }
   404             }
   405         } while (true);
   406     }
   408     /** Return true if reader.ch can be part of an operator.
   409      */
   410     private boolean isSpecial(char ch) {
   411         switch (ch) {
   412         case '!': case '%': case '&': case '*': case '?':
   413         case '+': case '-': case ':': case '<': case '=':
   414         case '>': case '^': case '|': case '~':
   415         case '@':
   416             return true;
   417         default:
   418             return false;
   419         }
   420     }
   422     /** Read longest possible sequence of special characters and convert
   423      *  to token.
   424      */
   425     private void scanOperator() {
   426         while (true) {
   427             reader.putChar(false);
   428             Name newname = reader.name();
   429             TokenKind tk1 = tokens.lookupKind(newname);
   430             if (tk1 == TokenKind.IDENTIFIER) {
   431                 reader.sp--;
   432                 break;
   433             }
   434             tk = tk1;
   435             reader.scanChar();
   436             if (!isSpecial(reader.ch)) break;
   437         }
   438     }
   440     /** Read token.
   441      */
   442     public Token readToken() {
   444         reader.sp = 0;
   445         name = null;
   446         radix = 0;
   448         int pos = 0;
   449         int endPos = 0;
   450         List<Comment> comments = null;
   452         try {
   453             loop: while (true) {
   454                 pos = reader.bp;
   455                 switch (reader.ch) {
   456                 case ' ': // (Spec 3.6)
   457                 case '\t': // (Spec 3.6)
   458                 case FF: // (Spec 3.6)
   459                     do {
   460                         reader.scanChar();
   461                     } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
   462                     processWhiteSpace(pos, reader.bp);
   463                     break;
   464                 case LF: // (Spec 3.4)
   465                     reader.scanChar();
   466                     processLineTerminator(pos, reader.bp);
   467                     break;
   468                 case CR: // (Spec 3.4)
   469                     reader.scanChar();
   470                     if (reader.ch == LF) {
   471                         reader.scanChar();
   472                     }
   473                     processLineTerminator(pos, reader.bp);
   474                     break;
   475                 case 'A': case 'B': case 'C': case 'D': case 'E':
   476                 case 'F': case 'G': case 'H': case 'I': case 'J':
   477                 case 'K': case 'L': case 'M': case 'N': case 'O':
   478                 case 'P': case 'Q': case 'R': case 'S': case 'T':
   479                 case 'U': case 'V': case 'W': case 'X': case 'Y':
   480                 case 'Z':
   481                 case 'a': case 'b': case 'c': case 'd': case 'e':
   482                 case 'f': case 'g': case 'h': case 'i': case 'j':
   483                 case 'k': case 'l': case 'm': case 'n': case 'o':
   484                 case 'p': case 'q': case 'r': case 's': case 't':
   485                 case 'u': case 'v': case 'w': case 'x': case 'y':
   486                 case 'z':
   487                 case '$': case '_':
   488                     scanIdent();
   489                     break loop;
   490                 case '0':
   491                     reader.scanChar();
   492                     if (reader.ch == 'x' || reader.ch == 'X') {
   493                         reader.scanChar();
   494                         skipIllegalUnderscores();
   495                         if (reader.ch == '.') {
   496                             scanHexFractionAndSuffix(pos, false);
   497                         } else if (reader.digit(pos, 16) < 0) {
   498                             lexError(pos, "invalid.hex.number");
   499                         } else {
   500                             scanNumber(pos, 16);
   501                         }
   502                     } else if (reader.ch == 'b' || reader.ch == 'B') {
   503                         if (!allowBinaryLiterals) {
   504                             lexError(pos, "unsupported.binary.lit", source.name);
   505                             allowBinaryLiterals = true;
   506                         }
   507                         reader.scanChar();
   508                         skipIllegalUnderscores();
   509                         if (reader.digit(pos, 2) < 0) {
   510                             lexError(pos, "invalid.binary.number");
   511                         } else {
   512                             scanNumber(pos, 2);
   513                         }
   514                     } else {
   515                         reader.putChar('0');
   516                         if (reader.ch == '_') {
   517                             int savePos = reader.bp;
   518                             do {
   519                                 reader.scanChar();
   520                             } while (reader.ch == '_');
   521                             if (reader.digit(pos, 10) < 0) {
   522                                 lexError(savePos, "illegal.underscore");
   523                             }
   524                         }
   525                         scanNumber(pos, 8);
   526                     }
   527                     break loop;
   528                 case '1': case '2': case '3': case '4':
   529                 case '5': case '6': case '7': case '8': case '9':
   530                     scanNumber(pos, 10);
   531                     break loop;
   532                 case '.':
   533                     reader.scanChar();
   534                     if ('0' <= reader.ch && reader.ch <= '9') {
   535                         reader.putChar('.');
   536                         scanFractionAndSuffix(pos);
   537                     } else if (reader.ch == '.') {
   538                         int savePos = reader.bp;
   539                         reader.putChar('.'); reader.putChar('.', true);
   540                         if (reader.ch == '.') {
   541                             reader.scanChar();
   542                             reader.putChar('.');
   543                             tk = TokenKind.ELLIPSIS;
   544                         } else {
   545                             lexError(savePos, "illegal.dot");
   546                         }
   547                     } else {
   548                         tk = TokenKind.DOT;
   549                     }
   550                     break loop;
   551                 case ',':
   552                     reader.scanChar(); tk = TokenKind.COMMA; break loop;
   553                 case ';':
   554                     reader.scanChar(); tk = TokenKind.SEMI; break loop;
   555                 case '(':
   556                     reader.scanChar(); tk = TokenKind.LPAREN; break loop;
   557                 case ')':
   558                     reader.scanChar(); tk = TokenKind.RPAREN; break loop;
   559                 case '[':
   560                     reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
   561                 case ']':
   562                     reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
   563                 case '{':
   564                     reader.scanChar(); tk = TokenKind.LBRACE; break loop;
   565                 case '}':
   566                     reader.scanChar(); tk = TokenKind.RBRACE; break loop;
   567                 case '/':
   568                     reader.scanChar();
   569                     if (reader.ch == '/') {
   570                         do {
   571                             reader.scanCommentChar();
   572                         } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
   573                         if (reader.bp < reader.buflen) {
   574                             comments = addDocReader(comments, processComment(pos, reader.bp, CommentStyle.LINE));
   575                         }
   576                         break;
   577                     } else if (reader.ch == '*') {
   578                         boolean isEmpty = false;
   579                         reader.scanChar();
   580                         CommentStyle style;
   581                         if (reader.ch == '*') {
   582                             style = CommentStyle.JAVADOC;
   583                             reader.scanCommentChar();
   584                             if (reader.ch == '/') {
   585                                 isEmpty = true;
   586                             }
   587                         } else {
   588                             style = CommentStyle.BLOCK;
   589                         }
   590                         while (!isEmpty && reader.bp < reader.buflen) {
   591                             if (reader.ch == '*') {
   592                                 reader.scanChar();
   593                                 if (reader.ch == '/') break;
   594                             } else {
   595                                 reader.scanCommentChar();
   596                             }
   597                         }
   598                         if (reader.ch == '/') {
   599                             reader.scanChar();
   600                             comments = addDocReader(comments, processComment(pos, reader.bp, style));
   601                             break;
   602                         } else {
   603                             lexError(pos, "unclosed.comment");
   604                             break loop;
   605                         }
   606                     } else if (reader.ch == '=') {
   607                         tk = TokenKind.SLASHEQ;
   608                         reader.scanChar();
   609                     } else {
   610                         tk = TokenKind.SLASH;
   611                     }
   612                     break loop;
   613                 case '\'':
   614                     reader.scanChar();
   615                     if (reader.ch == '\'') {
   616                         lexError(pos, "empty.char.lit");
   617                     } else {
   618                         if (reader.ch == CR || reader.ch == LF)
   619                             lexError(pos, "illegal.line.end.in.char.lit");
   620                         scanLitChar(pos);
   621                         char ch2 = reader.ch;
   622                         if (reader.ch == '\'') {
   623                             reader.scanChar();
   624                             tk = TokenKind.CHARLITERAL;
   625                         } else {
   626                             lexError(pos, "unclosed.char.lit");
   627                         }
   628                     }
   629                     break loop;
   630                 case '\"':
   631                     reader.scanChar();
   632                     while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen)
   633                         scanLitChar(pos);
   634                     if (reader.ch == '\"') {
   635                         tk = TokenKind.STRINGLITERAL;
   636                         reader.scanChar();
   637                     } else {
   638                         lexError(pos, "unclosed.str.lit");
   639                     }
   640                     break loop;
   641                 case '#':
   642                     reader.scanChar();
   643                     tk = TokenKind.HASH;
   644                     break loop;
   645                 default:
   646                     if (isSpecial(reader.ch)) {
   647                         scanOperator();
   648                     } else {
   649                         boolean isJavaIdentifierStart;
   650                         if (reader.ch < '\u0080') {
   651                             // all ASCII range chars already handled, above
   652                             isJavaIdentifierStart = false;
   653                         } else {
   654                             char high = reader.scanSurrogates();
   655                             if (high != 0) {
   656                                 reader.putChar(high);
   658                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(
   659                                     Character.toCodePoint(high, reader.ch));
   660                             } else {
   661                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
   662                             }
   663                         }
   664                         if (isJavaIdentifierStart) {
   665                             scanIdent();
   666                         } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
   667                             tk = TokenKind.EOF;
   668                             pos = reader.buflen;
   669                         } else {
   670                             lexError(pos, "illegal.char", String.valueOf((int)reader.ch));
   671                             reader.scanChar();
   672                         }
   673                     }
   674                     break loop;
   675                 }
   676             }
   677             endPos = reader.bp;
   678             switch (tk.tag) {
   679                 case DEFAULT: return new Token(tk, pos, endPos, comments);
   680                 case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
   681                 case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments);
   682                 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
   683                 default: throw new AssertionError();
   684             }
   685         }
   686         finally {
   687             if (scannerDebug) {
   688                     System.out.println("nextToken(" + pos
   689                                        + "," + endPos + ")=|" +
   690                                        new String(reader.getRawCharacters(pos, endPos))
   691                                        + "|");
   692             }
   693         }
   694     }
   695     //where
   696         List<Comment> addDocReader(List<Comment> docReaders, Comment docReader) {
   697             return docReaders == null ?
   698                     List.of(docReader) :
   699                     docReaders.prepend(docReader);
   700         }
   702     /** Return the position where a lexical error occurred;
   703      */
   704     public int errPos() {
   705         return errPos;
   706     }
   708     /** Set the position where a lexical error occurred;
   709      */
   710     public void errPos(int pos) {
   711         errPos = pos;
   712     }
   714     /**
   715      * Called when a complete comment has been scanned. pos and endPos
   716      * will mark the comment boundary.
   717      */
   718     protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
   719         if (scannerDebug)
   720             System.out.println("processComment(" + pos
   721                                + "," + endPos + "," + style + ")=|"
   722                                + new String(reader.getRawCharacters(pos, endPos))
   723                                + "|");
   724         char[] buf = reader.getRawCharacters(pos, endPos);
   725         return new BasicComment<UnicodeReader>(new UnicodeReader(fac, buf, buf.length), style);
   726     }
   728     /**
   729      * Called when a complete whitespace run has been scanned. pos and endPos
   730      * will mark the whitespace boundary.
   731      */
   732     protected void processWhiteSpace(int pos, int endPos) {
   733         if (scannerDebug)
   734             System.out.println("processWhitespace(" + pos
   735                                + "," + endPos + ")=|" +
   736                                new String(reader.getRawCharacters(pos, endPos))
   737                                + "|");
   738     }
   740     /**
   741      * Called when a line terminator has been processed.
   742      */
   743     protected void processLineTerminator(int pos, int endPos) {
   744         if (scannerDebug)
   745             System.out.println("processTerminator(" + pos
   746                                + "," + endPos + ")=|" +
   747                                new String(reader.getRawCharacters(pos, endPos))
   748                                + "|");
   749     }
   751     /** Build a map for translating between line numbers and
   752      * positions in the input.
   753      *
   754      * @return a LineMap */
   755     public Position.LineMap getLineMap() {
   756         return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
   757     }
   760     /**
   761     * Scan a documentation comment; determine if a deprecated tag is present.
   762     * Called once the initial /, * have been skipped, positioned at the second *
   763     * (which is treated as the beginning of the first line).
   764     * Stops positioned at the closing '/'.
   765     */
   766     protected class BasicComment<U extends UnicodeReader> implements Comment {
   768         CommentStyle cs;
   769         U comment_reader;
   771         protected boolean deprecatedFlag = false;
   772         protected boolean scanned = false;
   774         protected BasicComment(U comment_reader, CommentStyle cs) {
   775             this.comment_reader = comment_reader;
   776             this.cs = cs;
   777         }
   779         public String getText() {
   780             return null;
   781         }
   783         public CommentStyle getStyle() {
   784             return cs;
   785         }
   787         public boolean isDeprecated() {
   788             if (!scanned && cs == CommentStyle.JAVADOC) {
   789                 scanDocComment();
   790             }
   791             return deprecatedFlag;
   792         }
   794         @SuppressWarnings("fallthrough")
   795         protected void scanDocComment() {
   796             try {
   797                 boolean deprecatedPrefix = false;
   799                 comment_reader.bp += 3; // '/**'
   800                 comment_reader.ch = comment_reader.buf[comment_reader.bp];
   802                 forEachLine:
   803                 while (comment_reader.bp < comment_reader.buflen) {
   805                     // Skip optional WhiteSpace at beginning of line
   806                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
   807                         comment_reader.scanCommentChar();
   808                     }
   810                     // Skip optional consecutive Stars
   811                     while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
   812                         comment_reader.scanCommentChar();
   813                         if (comment_reader.ch == '/') {
   814                             return;
   815                         }
   816                     }
   818                     // Skip optional WhiteSpace after Stars
   819                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
   820                         comment_reader.scanCommentChar();
   821                     }
   823                     deprecatedPrefix = false;
   824                     // At beginning of line in the JavaDoc sense.
   825                     if (!deprecatedFlag) {
   826                         String deprecated = "@deprecated";
   827                         int i = 0;
   828                         while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
   829                             comment_reader.scanCommentChar();
   830                             i++;
   831                             if (i == deprecated.length()) {
   832                                 deprecatedPrefix = true;
   833                                 break;
   834                             }
   835                         }
   836                     }
   838                     if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
   839                         if (Character.isWhitespace(comment_reader.ch)) {
   840                             deprecatedFlag = true;
   841                         } else if (comment_reader.ch == '*') {
   842                             comment_reader.scanCommentChar();
   843                             if (comment_reader.ch == '/') {
   844                                 deprecatedFlag = true;
   845                                 return;
   846                             }
   847                         }
   848                     }
   850                     // Skip rest of line
   851                     while (comment_reader.bp < comment_reader.buflen) {
   852                         switch (comment_reader.ch) {
   853                             case '*':
   854                                 comment_reader.scanCommentChar();
   855                                 if (comment_reader.ch == '/') {
   856                                     return;
   857                                 }
   858                                 break;
   859                             case CR: // (Spec 3.4)
   860                                 comment_reader.scanCommentChar();
   861                                 if (comment_reader.ch != LF) {
   862                                     continue forEachLine;
   863                                 }
   864                             /* fall through to LF case */
   865                             case LF: // (Spec 3.4)
   866                                 comment_reader.scanCommentChar();
   867                                 continue forEachLine;
   868                             default:
   869                                 comment_reader.scanCommentChar();
   870                         }
   871                     } // rest of line
   872                 } // forEachLine
   873                 return;
   874             } finally {
   875                 scanned = true;
   876             }
   877         }
   878     }
   879 }

mercurial