src/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java

Mon, 10 Dec 2012 16:21:26 +0000

author
vromero
date
Mon, 10 Dec 2012 16:21:26 +0000
changeset 1442
fcf89720ae71
parent 1431
1f41a5758cf7
child 1679
b402b93cbe38
permissions
-rw-r--r--

8003967: detect and remove all mutable implicit static enum fields in langtools
Reviewed-by: jjg

     1 /*
     2  * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.tools.javac.parser;
    28 import com.sun.tools.javac.code.Source;
    29 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
    30 import com.sun.tools.javac.util.*;
    32 import java.nio.CharBuffer;
    34 import static com.sun.tools.javac.parser.Tokens.*;
    35 import static com.sun.tools.javac.util.LayoutCharacters.*;
    37 /** The lexical analyzer maps an input stream consisting of
    38  *  ASCII characters and Unicode escapes into a token sequence.
    39  *
    40  *  <p><b>This is NOT part of any supported API.
    41  *  If you write code that depends on this, you do so at your own risk.
    42  *  This code and its internal interfaces are subject to change or
    43  *  deletion without notice.</b>
    44  */
    45 public class JavaTokenizer {
    47     private static final boolean scannerDebug = false;
    49     /** Allow hex floating-point literals.
    50      */
    51     private boolean allowHexFloats;
    53     /** Allow binary literals.
    54      */
    55     private boolean allowBinaryLiterals;
    57     /** Allow underscores in literals.
    58      */
    59     private boolean allowUnderscoresInLiterals;
    61     /** The source language setting.
    62      */
    63     private Source source;
    65     /** The log to be used for error reporting.
    66      */
    67     private final Log log;
    69     /** The token factory. */
    70     private final Tokens tokens;
    72     /** The token kind, set by nextToken().
    73      */
    74     protected TokenKind tk;
    76     /** The token's radix, set by nextToken().
    77      */
    78     protected int radix;
    80     /** The token's name, set by nextToken().
    81      */
    82     protected Name name;
    84     /** The position where a lexical error occurred;
    85      */
    86     protected int errPos = Position.NOPOS;
    88     /** The Unicode reader (low-level stream reader).
    89      */
    90     protected UnicodeReader reader;
    92     protected ScannerFactory fac;
    94     private static final boolean hexFloatsWork = hexFloatsWork();
    95     private static boolean hexFloatsWork() {
    96         try {
    97             Float.valueOf("0x1.0p1");
    98             return true;
    99         } catch (NumberFormatException ex) {
   100             return false;
   101         }
   102     }
   104     /**
   105      * Create a scanner from the input array.  This method might
   106      * modify the array.  To avoid copying the input array, ensure
   107      * that {@code inputLength < input.length} or
   108      * {@code input[input.length -1]} is a white space character.
   109      *
   110      * @param fac the factory which created this Scanner
   111      * @param buf the input, might be modified
   112      * Must be positive and less than or equal to input.length.
   113      */
   114     protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
   115         this(fac, new UnicodeReader(fac, buf));
   116     }
   118     protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
   119         this(fac, new UnicodeReader(fac, buf, inputLength));
   120     }
   122     protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
   123         this.fac = fac;
   124         this.log = fac.log;
   125         this.tokens = fac.tokens;
   126         this.source = fac.source;
   127         this.reader = reader;
   128         this.allowBinaryLiterals = source.allowBinaryLiterals();
   129         this.allowHexFloats = source.allowHexFloats();
   130         this.allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();
   131     }
   133     /** Report an error at the given position using the provided arguments.
   134      */
   135     protected void lexError(int pos, String key, Object... args) {
   136         log.error(pos, key, args);
   137         tk = TokenKind.ERROR;
   138         errPos = pos;
   139     }
   141     /** Read next character in character or string literal and copy into sbuf.
   142      */
   143     private void scanLitChar(int pos) {
   144         if (reader.ch == '\\') {
   145             if (reader.peekChar() == '\\' && !reader.isUnicode()) {
   146                 reader.skipChar();
   147                 reader.putChar('\\', true);
   148             } else {
   149                 reader.scanChar();
   150                 switch (reader.ch) {
   151                 case '0': case '1': case '2': case '3':
   152                 case '4': case '5': case '6': case '7':
   153                     char leadch = reader.ch;
   154                     int oct = reader.digit(pos, 8);
   155                     reader.scanChar();
   156                     if ('0' <= reader.ch && reader.ch <= '7') {
   157                         oct = oct * 8 + reader.digit(pos, 8);
   158                         reader.scanChar();
   159                         if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
   160                             oct = oct * 8 + reader.digit(pos, 8);
   161                             reader.scanChar();
   162                         }
   163                     }
   164                     reader.putChar((char)oct);
   165                     break;
   166                 case 'b':
   167                     reader.putChar('\b', true); break;
   168                 case 't':
   169                     reader.putChar('\t', true); break;
   170                 case 'n':
   171                     reader.putChar('\n', true); break;
   172                 case 'f':
   173                     reader.putChar('\f', true); break;
   174                 case 'r':
   175                     reader.putChar('\r', true); break;
   176                 case '\'':
   177                     reader.putChar('\'', true); break;
   178                 case '\"':
   179                     reader.putChar('\"', true); break;
   180                 case '\\':
   181                     reader.putChar('\\', true); break;
   182                 default:
   183                     lexError(reader.bp, "illegal.esc.char");
   184                 }
   185             }
   186         } else if (reader.bp != reader.buflen) {
   187             reader.putChar(true);
   188         }
   189     }
   191     private void scanDigits(int pos, int digitRadix) {
   192         char saveCh;
   193         int savePos;
   194         do {
   195             if (reader.ch != '_') {
   196                 reader.putChar(false);
   197             } else {
   198                 if (!allowUnderscoresInLiterals) {
   199                     lexError(pos, "unsupported.underscore.lit", source.name);
   200                     allowUnderscoresInLiterals = true;
   201                 }
   202             }
   203             saveCh = reader.ch;
   204             savePos = reader.bp;
   205             reader.scanChar();
   206         } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
   207         if (saveCh == '_')
   208             lexError(savePos, "illegal.underscore");
   209     }
   211     /** Read fractional part of hexadecimal floating point number.
   212      */
   213     private void scanHexExponentAndSuffix(int pos) {
   214         if (reader.ch == 'p' || reader.ch == 'P') {
   215             reader.putChar(true);
   216             skipIllegalUnderscores();
   217             if (reader.ch == '+' || reader.ch == '-') {
   218                 reader.putChar(true);
   219             }
   220             skipIllegalUnderscores();
   221             if ('0' <= reader.ch && reader.ch <= '9') {
   222                 scanDigits(pos, 10);
   223                 if (!allowHexFloats) {
   224                     lexError(pos, "unsupported.fp.lit", source.name);
   225                     allowHexFloats = true;
   226                 }
   227                 else if (!hexFloatsWork)
   228                     lexError(pos, "unsupported.cross.fp.lit");
   229             } else
   230                 lexError(pos, "malformed.fp.lit");
   231         } else {
   232             lexError(pos, "malformed.fp.lit");
   233         }
   234         if (reader.ch == 'f' || reader.ch == 'F') {
   235             reader.putChar(true);
   236             tk = TokenKind.FLOATLITERAL;
   237             radix = 16;
   238         } else {
   239             if (reader.ch == 'd' || reader.ch == 'D') {
   240                 reader.putChar(true);
   241             }
   242             tk = TokenKind.DOUBLELITERAL;
   243             radix = 16;
   244         }
   245     }
   247     /** Read fractional part of floating point number.
   248      */
   249     private void scanFraction(int pos) {
   250         skipIllegalUnderscores();
   251         if ('0' <= reader.ch && reader.ch <= '9') {
   252             scanDigits(pos, 10);
   253         }
   254         int sp1 = reader.sp;
   255         if (reader.ch == 'e' || reader.ch == 'E') {
   256             reader.putChar(true);
   257             skipIllegalUnderscores();
   258             if (reader.ch == '+' || reader.ch == '-') {
   259                 reader.putChar(true);
   260             }
   261             skipIllegalUnderscores();
   262             if ('0' <= reader.ch && reader.ch <= '9') {
   263                 scanDigits(pos, 10);
   264                 return;
   265             }
   266             lexError(pos, "malformed.fp.lit");
   267             reader.sp = sp1;
   268         }
   269     }
   271     /** Read fractional part and 'd' or 'f' suffix of floating point number.
   272      */
   273     private void scanFractionAndSuffix(int pos) {
   274         radix = 10;
   275         scanFraction(pos);
   276         if (reader.ch == 'f' || reader.ch == 'F') {
   277             reader.putChar(true);
   278             tk = TokenKind.FLOATLITERAL;
   279         } else {
   280             if (reader.ch == 'd' || reader.ch == 'D') {
   281                 reader.putChar(true);
   282             }
   283             tk = TokenKind.DOUBLELITERAL;
   284         }
   285     }
   287     /** Read fractional part and 'd' or 'f' suffix of floating point number.
   288      */
   289     private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
   290         radix = 16;
   291         Assert.check(reader.ch == '.');
   292         reader.putChar(true);
   293         skipIllegalUnderscores();
   294         if (reader.digit(pos, 16) >= 0) {
   295             seendigit = true;
   296             scanDigits(pos, 16);
   297         }
   298         if (!seendigit)
   299             lexError(pos, "invalid.hex.number");
   300         else
   301             scanHexExponentAndSuffix(pos);
   302     }
   304     private void skipIllegalUnderscores() {
   305         if (reader.ch == '_') {
   306             lexError(reader.bp, "illegal.underscore");
   307             while (reader.ch == '_')
   308                 reader.scanChar();
   309         }
   310     }
   312     /** Read a number.
   313      *  @param radix  The radix of the number; one of 2, j8, 10, 16.
   314      */
   315     private void scanNumber(int pos, int radix) {
   316         // for octal, allow base-10 digit in case it's a float literal
   317         this.radix = radix;
   318         int digitRadix = (radix == 8 ? 10 : radix);
   319         boolean seendigit = false;
   320         if (reader.digit(pos, digitRadix) >= 0) {
   321             seendigit = true;
   322             scanDigits(pos, digitRadix);
   323         }
   324         if (radix == 16 && reader.ch == '.') {
   325             scanHexFractionAndSuffix(pos, seendigit);
   326         } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
   327             scanHexExponentAndSuffix(pos);
   328         } else if (digitRadix == 10 && reader.ch == '.') {
   329             reader.putChar(true);
   330             scanFractionAndSuffix(pos);
   331         } else if (digitRadix == 10 &&
   332                    (reader.ch == 'e' || reader.ch == 'E' ||
   333                     reader.ch == 'f' || reader.ch == 'F' ||
   334                     reader.ch == 'd' || reader.ch == 'D')) {
   335             scanFractionAndSuffix(pos);
   336         } else {
   337             if (reader.ch == 'l' || reader.ch == 'L') {
   338                 reader.scanChar();
   339                 tk = TokenKind.LONGLITERAL;
   340             } else {
   341                 tk = TokenKind.INTLITERAL;
   342             }
   343         }
   344     }
   346     /** Read an identifier.
   347      */
   348     private void scanIdent() {
   349         boolean isJavaIdentifierPart;
   350         char high;
   351         reader.putChar(true);
   352         do {
   353             switch (reader.ch) {
   354             case 'A': case 'B': case 'C': case 'D': case 'E':
   355             case 'F': case 'G': case 'H': case 'I': case 'J':
   356             case 'K': case 'L': case 'M': case 'N': case 'O':
   357             case 'P': case 'Q': case 'R': case 'S': case 'T':
   358             case 'U': case 'V': case 'W': case 'X': case 'Y':
   359             case 'Z':
   360             case 'a': case 'b': case 'c': case 'd': case 'e':
   361             case 'f': case 'g': case 'h': case 'i': case 'j':
   362             case 'k': case 'l': case 'm': case 'n': case 'o':
   363             case 'p': case 'q': case 'r': case 's': case 't':
   364             case 'u': case 'v': case 'w': case 'x': case 'y':
   365             case 'z':
   366             case '$': case '_':
   367             case '0': case '1': case '2': case '3': case '4':
   368             case '5': case '6': case '7': case '8': case '9':
   369                 break;
   370             case '\u0000': case '\u0001': case '\u0002': case '\u0003':
   371             case '\u0004': case '\u0005': case '\u0006': case '\u0007':
   372             case '\u0008': case '\u000E': case '\u000F': case '\u0010':
   373             case '\u0011': case '\u0012': case '\u0013': case '\u0014':
   374             case '\u0015': case '\u0016': case '\u0017':
   375             case '\u0018': case '\u0019': case '\u001B':
   376             case '\u007F':
   377                 reader.scanChar();
   378                 continue;
   379             case '\u001A': // EOI is also a legal identifier part
   380                 if (reader.bp >= reader.buflen) {
   381                     name = reader.name();
   382                     tk = tokens.lookupKind(name);
   383                     return;
   384                 }
   385                 reader.scanChar();
   386                 continue;
   387             default:
   388                 if (reader.ch < '\u0080') {
   389                     // all ASCII range chars already handled, above
   390                     isJavaIdentifierPart = false;
   391                 } else {
   392                     if (Character.isIdentifierIgnorable(reader.ch)) {
   393                         reader.scanChar();
   394                         continue;
   395                     } else {
   396                         high = reader.scanSurrogates();
   397                         if (high != 0) {
   398                             reader.putChar(high);
   399                             isJavaIdentifierPart = Character.isJavaIdentifierPart(
   400                                 Character.toCodePoint(high, reader.ch));
   401                         } else {
   402                             isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
   403                         }
   404                     }
   405                 }
   406                 if (!isJavaIdentifierPart) {
   407                     name = reader.name();
   408                     tk = tokens.lookupKind(name);
   409                     return;
   410                 }
   411             }
   412             reader.putChar(true);
   413         } while (true);
   414     }
   416     /** Return true if reader.ch can be part of an operator.
   417      */
   418     private boolean isSpecial(char ch) {
   419         switch (ch) {
   420         case '!': case '%': case '&': case '*': case '?':
   421         case '+': case '-': case ':': case '<': case '=':
   422         case '>': case '^': case '|': case '~':
   423         case '@':
   424             return true;
   425         default:
   426             return false;
   427         }
   428     }
   430     /** Read longest possible sequence of special characters and convert
   431      *  to token.
   432      */
   433     private void scanOperator() {
   434         while (true) {
   435             reader.putChar(false);
   436             Name newname = reader.name();
   437             TokenKind tk1 = tokens.lookupKind(newname);
   438             if (tk1 == TokenKind.IDENTIFIER) {
   439                 reader.sp--;
   440                 break;
   441             }
   442             tk = tk1;
   443             reader.scanChar();
   444             if (!isSpecial(reader.ch)) break;
   445         }
   446     }
   448     /** Read token.
   449      */
   450     public Token readToken() {
   452         reader.sp = 0;
   453         name = null;
   454         radix = 0;
   456         int pos = 0;
   457         int endPos = 0;
   458         List<Comment> comments = null;
   460         try {
   461             loop: while (true) {
   462                 pos = reader.bp;
   463                 switch (reader.ch) {
   464                 case ' ': // (Spec 3.6)
   465                 case '\t': // (Spec 3.6)
   466                 case FF: // (Spec 3.6)
   467                     do {
   468                         reader.scanChar();
   469                     } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
   470                     processWhiteSpace(pos, reader.bp);
   471                     break;
   472                 case LF: // (Spec 3.4)
   473                     reader.scanChar();
   474                     processLineTerminator(pos, reader.bp);
   475                     break;
   476                 case CR: // (Spec 3.4)
   477                     reader.scanChar();
   478                     if (reader.ch == LF) {
   479                         reader.scanChar();
   480                     }
   481                     processLineTerminator(pos, reader.bp);
   482                     break;
   483                 case 'A': case 'B': case 'C': case 'D': case 'E':
   484                 case 'F': case 'G': case 'H': case 'I': case 'J':
   485                 case 'K': case 'L': case 'M': case 'N': case 'O':
   486                 case 'P': case 'Q': case 'R': case 'S': case 'T':
   487                 case 'U': case 'V': case 'W': case 'X': case 'Y':
   488                 case 'Z':
   489                 case 'a': case 'b': case 'c': case 'd': case 'e':
   490                 case 'f': case 'g': case 'h': case 'i': case 'j':
   491                 case 'k': case 'l': case 'm': case 'n': case 'o':
   492                 case 'p': case 'q': case 'r': case 's': case 't':
   493                 case 'u': case 'v': case 'w': case 'x': case 'y':
   494                 case 'z':
   495                 case '$': case '_':
   496                     scanIdent();
   497                     break loop;
   498                 case '0':
   499                     reader.scanChar();
   500                     if (reader.ch == 'x' || reader.ch == 'X') {
   501                         reader.scanChar();
   502                         skipIllegalUnderscores();
   503                         if (reader.ch == '.') {
   504                             scanHexFractionAndSuffix(pos, false);
   505                         } else if (reader.digit(pos, 16) < 0) {
   506                             lexError(pos, "invalid.hex.number");
   507                         } else {
   508                             scanNumber(pos, 16);
   509                         }
   510                     } else if (reader.ch == 'b' || reader.ch == 'B') {
   511                         if (!allowBinaryLiterals) {
   512                             lexError(pos, "unsupported.binary.lit", source.name);
   513                             allowBinaryLiterals = true;
   514                         }
   515                         reader.scanChar();
   516                         skipIllegalUnderscores();
   517                         if (reader.digit(pos, 2) < 0) {
   518                             lexError(pos, "invalid.binary.number");
   519                         } else {
   520                             scanNumber(pos, 2);
   521                         }
   522                     } else {
   523                         reader.putChar('0');
   524                         if (reader.ch == '_') {
   525                             int savePos = reader.bp;
   526                             do {
   527                                 reader.scanChar();
   528                             } while (reader.ch == '_');
   529                             if (reader.digit(pos, 10) < 0) {
   530                                 lexError(savePos, "illegal.underscore");
   531                             }
   532                         }
   533                         scanNumber(pos, 8);
   534                     }
   535                     break loop;
   536                 case '1': case '2': case '3': case '4':
   537                 case '5': case '6': case '7': case '8': case '9':
   538                     scanNumber(pos, 10);
   539                     break loop;
   540                 case '.':
   541                     reader.scanChar();
   542                     if ('0' <= reader.ch && reader.ch <= '9') {
   543                         reader.putChar('.');
   544                         scanFractionAndSuffix(pos);
   545                     } else if (reader.ch == '.') {
   546                         int savePos = reader.bp;
   547                         reader.putChar('.'); reader.putChar('.', true);
   548                         if (reader.ch == '.') {
   549                             reader.scanChar();
   550                             reader.putChar('.');
   551                             tk = TokenKind.ELLIPSIS;
   552                         } else {
   553                             lexError(savePos, "illegal.dot");
   554                         }
   555                     } else {
   556                         tk = TokenKind.DOT;
   557                     }
   558                     break loop;
   559                 case ',':
   560                     reader.scanChar(); tk = TokenKind.COMMA; break loop;
   561                 case ';':
   562                     reader.scanChar(); tk = TokenKind.SEMI; break loop;
   563                 case '(':
   564                     reader.scanChar(); tk = TokenKind.LPAREN; break loop;
   565                 case ')':
   566                     reader.scanChar(); tk = TokenKind.RPAREN; break loop;
   567                 case '[':
   568                     reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
   569                 case ']':
   570                     reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
   571                 case '{':
   572                     reader.scanChar(); tk = TokenKind.LBRACE; break loop;
   573                 case '}':
   574                     reader.scanChar(); tk = TokenKind.RBRACE; break loop;
   575                 case '/':
   576                     reader.scanChar();
   577                     if (reader.ch == '/') {
   578                         do {
   579                             reader.scanCommentChar();
   580                         } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
   581                         if (reader.bp < reader.buflen) {
   582                             comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
   583                         }
   584                         break;
   585                     } else if (reader.ch == '*') {
   586                         boolean isEmpty = false;
   587                         reader.scanChar();
   588                         CommentStyle style;
   589                         if (reader.ch == '*') {
   590                             style = CommentStyle.JAVADOC;
   591                             reader.scanCommentChar();
   592                             if (reader.ch == '/') {
   593                                 isEmpty = true;
   594                             }
   595                         } else {
   596                             style = CommentStyle.BLOCK;
   597                         }
   598                         while (!isEmpty && reader.bp < reader.buflen) {
   599                             if (reader.ch == '*') {
   600                                 reader.scanChar();
   601                                 if (reader.ch == '/') break;
   602                             } else {
   603                                 reader.scanCommentChar();
   604                             }
   605                         }
   606                         if (reader.ch == '/') {
   607                             reader.scanChar();
   608                             comments = addComment(comments, processComment(pos, reader.bp, style));
   609                             break;
   610                         } else {
   611                             lexError(pos, "unclosed.comment");
   612                             break loop;
   613                         }
   614                     } else if (reader.ch == '=') {
   615                         tk = TokenKind.SLASHEQ;
   616                         reader.scanChar();
   617                     } else {
   618                         tk = TokenKind.SLASH;
   619                     }
   620                     break loop;
   621                 case '\'':
   622                     reader.scanChar();
   623                     if (reader.ch == '\'') {
   624                         lexError(pos, "empty.char.lit");
   625                     } else {
   626                         if (reader.ch == CR || reader.ch == LF)
   627                             lexError(pos, "illegal.line.end.in.char.lit");
   628                         scanLitChar(pos);
   629                         char ch2 = reader.ch;
   630                         if (reader.ch == '\'') {
   631                             reader.scanChar();
   632                             tk = TokenKind.CHARLITERAL;
   633                         } else {
   634                             lexError(pos, "unclosed.char.lit");
   635                         }
   636                     }
   637                     break loop;
   638                 case '\"':
   639                     reader.scanChar();
   640                     while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen)
   641                         scanLitChar(pos);
   642                     if (reader.ch == '\"') {
   643                         tk = TokenKind.STRINGLITERAL;
   644                         reader.scanChar();
   645                     } else {
   646                         lexError(pos, "unclosed.str.lit");
   647                     }
   648                     break loop;
   649                 default:
   650                     if (isSpecial(reader.ch)) {
   651                         scanOperator();
   652                     } else {
   653                         boolean isJavaIdentifierStart;
   654                         if (reader.ch < '\u0080') {
   655                             // all ASCII range chars already handled, above
   656                             isJavaIdentifierStart = false;
   657                         } else {
   658                             char high = reader.scanSurrogates();
   659                             if (high != 0) {
   660                                 reader.putChar(high);
   662                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(
   663                                     Character.toCodePoint(high, reader.ch));
   664                             } else {
   665                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
   666                             }
   667                         }
   668                         if (isJavaIdentifierStart) {
   669                             scanIdent();
   670                         } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
   671                             tk = TokenKind.EOF;
   672                             pos = reader.buflen;
   673                         } else {
   674                             String arg = (32 < reader.ch && reader.ch < 127) ?
   675                                             String.format("%s", reader.ch) :
   676                                             String.format("\\u%04x", (int)reader.ch);
   677                             lexError(pos, "illegal.char", arg);
   678                             reader.scanChar();
   679                         }
   680                     }
   681                     break loop;
   682                 }
   683             }
   684             endPos = reader.bp;
   685             switch (tk.tag) {
   686                 case DEFAULT: return new Token(tk, pos, endPos, comments);
   687                 case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
   688                 case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments);
   689                 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
   690                 default: throw new AssertionError();
   691             }
   692         }
   693         finally {
   694             if (scannerDebug) {
   695                     System.out.println("nextToken(" + pos
   696                                        + "," + endPos + ")=|" +
   697                                        new String(reader.getRawCharacters(pos, endPos))
   698                                        + "|");
   699             }
   700         }
   701     }
   702     //where
   703         List<Comment> addComment(List<Comment> comments, Comment comment) {
   704             return comments == null ?
   705                     List.of(comment) :
   706                     comments.prepend(comment);
   707         }
   709     /** Return the position where a lexical error occurred;
   710      */
   711     public int errPos() {
   712         return errPos;
   713     }
   715     /** Set the position where a lexical error occurred;
   716      */
   717     public void errPos(int pos) {
   718         errPos = pos;
   719     }
   721     /**
   722      * Called when a complete comment has been scanned. pos and endPos
   723      * will mark the comment boundary.
   724      */
   725     protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
   726         if (scannerDebug)
   727             System.out.println("processComment(" + pos
   728                                + "," + endPos + "," + style + ")=|"
   729                                + new String(reader.getRawCharacters(pos, endPos))
   730                                + "|");
   731         char[] buf = reader.getRawCharacters(pos, endPos);
   732         return new BasicComment<UnicodeReader>(new UnicodeReader(fac, buf, buf.length), style);
   733     }
   735     /**
   736      * Called when a complete whitespace run has been scanned. pos and endPos
   737      * will mark the whitespace boundary.
   738      */
   739     protected void processWhiteSpace(int pos, int endPos) {
   740         if (scannerDebug)
   741             System.out.println("processWhitespace(" + pos
   742                                + "," + endPos + ")=|" +
   743                                new String(reader.getRawCharacters(pos, endPos))
   744                                + "|");
   745     }
   747     /**
   748      * Called when a line terminator has been processed.
   749      */
   750     protected void processLineTerminator(int pos, int endPos) {
   751         if (scannerDebug)
   752             System.out.println("processTerminator(" + pos
   753                                + "," + endPos + ")=|" +
   754                                new String(reader.getRawCharacters(pos, endPos))
   755                                + "|");
   756     }
   758     /** Build a map for translating between line numbers and
   759      * positions in the input.
   760      *
   761      * @return a LineMap */
   762     public Position.LineMap getLineMap() {
   763         return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
   764     }
   767     /**
   768     * Scan a documentation comment; determine if a deprecated tag is present.
   769     * Called once the initial /, * have been skipped, positioned at the second *
   770     * (which is treated as the beginning of the first line).
   771     * Stops positioned at the closing '/'.
   772     */
   773     protected class BasicComment<U extends UnicodeReader> implements Comment {
   775         CommentStyle cs;
   776         U comment_reader;
   778         protected boolean deprecatedFlag = false;
   779         protected boolean scanned = false;
   781         protected BasicComment(U comment_reader, CommentStyle cs) {
   782             this.comment_reader = comment_reader;
   783             this.cs = cs;
   784         }
   786         public String getText() {
   787             return null;
   788         }
   790         public int getSourcePos(int pos) {
   791             return -1;
   792         }
   794         public CommentStyle getStyle() {
   795             return cs;
   796         }
   798         public boolean isDeprecated() {
   799             if (!scanned && cs == CommentStyle.JAVADOC) {
   800                 scanDocComment();
   801             }
   802             return deprecatedFlag;
   803         }
   805         @SuppressWarnings("fallthrough")
   806         protected void scanDocComment() {
   807             try {
   808                 boolean deprecatedPrefix = false;
   810                 comment_reader.bp += 3; // '/**'
   811                 comment_reader.ch = comment_reader.buf[comment_reader.bp];
   813                 forEachLine:
   814                 while (comment_reader.bp < comment_reader.buflen) {
   816                     // Skip optional WhiteSpace at beginning of line
   817                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
   818                         comment_reader.scanCommentChar();
   819                     }
   821                     // Skip optional consecutive Stars
   822                     while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
   823                         comment_reader.scanCommentChar();
   824                         if (comment_reader.ch == '/') {
   825                             return;
   826                         }
   827                     }
   829                     // Skip optional WhiteSpace after Stars
   830                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
   831                         comment_reader.scanCommentChar();
   832                     }
   834                     deprecatedPrefix = false;
   835                     // At beginning of line in the JavaDoc sense.
   836                     if (!deprecatedFlag) {
   837                         String deprecated = "@deprecated";
   838                         int i = 0;
   839                         while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
   840                             comment_reader.scanCommentChar();
   841                             i++;
   842                             if (i == deprecated.length()) {
   843                                 deprecatedPrefix = true;
   844                                 break;
   845                             }
   846                         }
   847                     }
   849                     if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
   850                         if (Character.isWhitespace(comment_reader.ch)) {
   851                             deprecatedFlag = true;
   852                         } else if (comment_reader.ch == '*') {
   853                             comment_reader.scanCommentChar();
   854                             if (comment_reader.ch == '/') {
   855                                 deprecatedFlag = true;
   856                                 return;
   857                             }
   858                         }
   859                     }
   861                     // Skip rest of line
   862                     while (comment_reader.bp < comment_reader.buflen) {
   863                         switch (comment_reader.ch) {
   864                             case '*':
   865                                 comment_reader.scanCommentChar();
   866                                 if (comment_reader.ch == '/') {
   867                                     return;
   868                                 }
   869                                 break;
   870                             case CR: // (Spec 3.4)
   871                                 comment_reader.scanCommentChar();
   872                                 if (comment_reader.ch != LF) {
   873                                     continue forEachLine;
   874                                 }
   875                             /* fall through to LF case */
   876                             case LF: // (Spec 3.4)
   877                                 comment_reader.scanCommentChar();
   878                                 continue forEachLine;
   879                             default:
   880                                 comment_reader.scanCommentChar();
   881                         }
   882                     } // rest of line
   883                 } // forEachLine
   884                 return;
   885             } finally {
   886                 scanned = true;
   887             }
   888         }
   889     }
   890 }

mercurial