jdk8-mips64-public/langtools: src/share/classes/com/sun/tools/javac/parser/Scanner.java@8cc5b440fdde

7033809: Rename "disjunctive" to "union" in javax.lang.model
Reviewed-by: mcimadamore, jjg

     1 /*

     2  * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Oracle designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Oracle in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    22  * or visit www.oracle.com if you need additional information or have any

    23  * questions.

    24  */

    26 package com.sun.tools.javac.parser;

    28 import java.nio.*;

    30 import com.sun.tools.javac.code.Source;

    31 import com.sun.tools.javac.file.JavacFileManager;

    32 import com.sun.tools.javac.util.*;

    35 import static com.sun.tools.javac.parser.Token.*;

    36 import static com.sun.tools.javac.util.LayoutCharacters.*;

    38 /** The lexical analyzer maps an input stream consisting of

    39  *  ASCII characters and Unicode escapes into a token sequence.

    40  *

    41  *  <p><b>This is NOT part of any supported API.

    42  *  If you write code that depends on this, you do so at your own risk.

    43  *  This code and its internal interfaces are subject to change or

    44  *  deletion without notice.</b>

    45  */

    46 public class Scanner implements Lexer {

    48     private static boolean scannerDebug = false;

    50     /* Output variables; set by nextToken():

    51      */

    53     /** The token, set by nextToken().

    54      */

    55     private Token token;

    57     /** Allow hex floating-point literals.

    58      */

    59     private boolean allowHexFloats;

    61     /** Allow binary literals.

    62      */

    63     private boolean allowBinaryLiterals;

    65     /** Allow underscores in literals.

    66      */

    67     private boolean allowUnderscoresInLiterals;

    69     /** The source language setting.

    70      */

    71     private Source source;

    73     /** The token's position, 0-based offset from beginning of text.

    74      */

    75     private int pos;

    77     /** Character position just after the last character of the token.

    78      */

    79     private int endPos;

    81     /** The last character position of the previous token.

    82      */

    83     private int prevEndPos;

    85     /** The position where a lexical error occurred;

    86      */

    87     private int errPos = Position.NOPOS;

    89     /** The name of an identifier or token:

    90      */

    91     private Name name;

    93     /** The radix of a numeric literal token.

    94      */

    95     private int radix;

    97     /** Has a @deprecated been encountered in last doc comment?

    98      *  this needs to be reset by client.

    99      */

   100     protected boolean deprecatedFlag = false;

   102     /** A character buffer for literals.

   103      */

   104     private char[] sbuf = new char[128];

   105     private int sp;

   107     /** The input buffer, index of next chacter to be read,

   108      *  index of one past last character in buffer.

   109      */

   110     private char[] buf;

   111     private int bp;

   112     private int buflen;

   113     private int eofPos;

   115     /** The current character.

   116      */

   117     private char ch;

   119     /** The buffer index of the last converted unicode character

   120      */

   121     private int unicodeConversionBp = -1;

   123     /** The log to be used for error reporting.

   124      */

   125     private final Log log;

   127     /** The name table. */

   128     private final Names names;

   130     /** The keyword table. */

   131     private final Keywords keywords;

   133     /** Common code for constructors. */

   134     private Scanner(ScannerFactory fac) {

   135         log = fac.log;

   136         names = fac.names;

   137         keywords = fac.keywords;

   138         source = fac.source;

   139         allowBinaryLiterals = source.allowBinaryLiterals();

   140         allowHexFloats = source.allowHexFloats();

   141         allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();

   142     }

   144     private static final boolean hexFloatsWork = hexFloatsWork();

   145     private static boolean hexFloatsWork() {

   146         try {

   147             Float.valueOf("0x1.0p1");

   148             return true;

   149         } catch (NumberFormatException ex) {

   150             return false;

   151         }

   152     }

   154     /** Create a scanner from the input buffer.  buffer must implement

   155      *  array() and compact(), and remaining() must be less than limit().

   156      */

   157     protected Scanner(ScannerFactory fac, CharBuffer buffer) {

   158         this(fac, JavacFileManager.toArray(buffer), buffer.limit());

   159     }

   161     /**

   162      * Create a scanner from the input array.  This method might

   163      * modify the array.  To avoid copying the input array, ensure

   164      * that {@code inputLength < input.length} or

   165      * {@code input[input.length -1]} is a white space character.

   166      *

   167      * @param fac the factory which created this Scanner

   168      * @param input the input, might be modified

   169      * @param inputLength the size of the input.

   170      * Must be positive and less than or equal to input.length.

   171      */

   172     protected Scanner(ScannerFactory fac, char[] input, int inputLength) {

   173         this(fac);

   174         eofPos = inputLength;

   175         if (inputLength == input.length) {

   176             if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {

   177                 inputLength--;

   178             } else {

   179                 char[] newInput = new char[inputLength + 1];

   180                 System.arraycopy(input, 0, newInput, 0, input.length);

   181                 input = newInput;

   182             }

   183         }

   184         buf = input;

   185         buflen = inputLength;

   186         buf[buflen] = EOI;

   187         bp = -1;

   188         scanChar();

   189     }

   191     /** Report an error at the given position using the provided arguments.

   192      */

   193     private void lexError(int pos, String key, Object... args) {

   194         log.error(pos, key, args);

   195         token = ERROR;

   196         errPos = pos;

   197     }

   199     /** Report an error at the current token position using the provided

   200      *  arguments.

   201      */

   202     private void lexError(String key, Object... args) {

   203         lexError(pos, key, args);

   204     }

   206     /** Convert an ASCII digit from its base (8, 10, or 16)

   207      *  to its value.

   208      */

   209     private int digit(int base) {

   210         char c = ch;

   211         int result = Character.digit(c, base);

   212         if (result >= 0 && c > 0x7f) {

   213             lexError(pos+1, "illegal.nonascii.digit");

   214             ch = "0123456789abcdef".charAt(result);

   215         }

   216         return result;

   217     }

   219     /** Convert unicode escape; bp points to initial '\' character

   220      *  (Spec 3.3).

   221      */

   222     private void convertUnicode() {

   223         if (ch == '\\' && unicodeConversionBp != bp) {

   224             bp++; ch = buf[bp];

   225             if (ch == 'u') {

   226                 do {

   227                     bp++; ch = buf[bp];

   228                 } while (ch == 'u');

   229                 int limit = bp + 3;

   230                 if (limit < buflen) {

   231                     int d = digit(16);

   232                     int code = d;

   233                     while (bp < limit && d >= 0) {

   234                         bp++; ch = buf[bp];

   235                         d = digit(16);

   236                         code = (code << 4) + d;

   237                     }

   238                     if (d >= 0) {

   239                         ch = (char)code;

   240                         unicodeConversionBp = bp;

   241                         return;

   242                     }

   243                 }

   244                 lexError(bp, "illegal.unicode.esc");

   245             } else {

   246                 bp--;

   247                 ch = '\\';

   248             }

   249         }

   250     }

   252     /** Read next character.

   253      */

   254     private void scanChar() {

   255         ch = buf[++bp];

   256         if (ch == '\\') {

   257             convertUnicode();

   258         }

   259     }

   261     /** Read next character in comment, skipping over double '\' characters.

   262      */

   263     private void scanCommentChar() {

   264         scanChar();

   265         if (ch == '\\') {

   266             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {

   267                 bp++;

   268             } else {

   269                 convertUnicode();

   270             }

   271         }

   272     }

   274     /** Append a character to sbuf.

   275      */

   276     private void putChar(char ch) {

   277         if (sp == sbuf.length) {

   278             char[] newsbuf = new char[sbuf.length * 2];

   279             System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);

   280             sbuf = newsbuf;

   281         }

   282         sbuf[sp++] = ch;

   283     }

   285     /** Read next character in character or string literal and copy into sbuf.

   286      */

   287     private void scanLitChar() {

   288         if (ch == '\\') {

   289             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {

   290                 bp++;

   291                 putChar('\\');

   292                 scanChar();

   293             } else {

   294                 scanChar();

   295                 switch (ch) {

   296                 case '0': case '1': case '2': case '3':

   297                 case '4': case '5': case '6': case '7':

   298                     char leadch = ch;

   299                     int oct = digit(8);

   300                     scanChar();

   301                     if ('0' <= ch && ch <= '7') {

   302                         oct = oct * 8 + digit(8);

   303                         scanChar();

   304                         if (leadch <= '3' && '0' <= ch && ch <= '7') {

   305                             oct = oct * 8 + digit(8);

   306                             scanChar();

   307                         }

   308                     }

   309                     putChar((char)oct);

   310                     break;

   311                 case 'b':

   312                     putChar('\b'); scanChar(); break;

   313                 case 't':

   314                     putChar('\t'); scanChar(); break;

   315                 case 'n':

   316                     putChar('\n'); scanChar(); break;

   317                 case 'f':

   318                     putChar('\f'); scanChar(); break;

   319                 case 'r':

   320                     putChar('\r'); scanChar(); break;

   321                 case '\'':

   322                     putChar('\''); scanChar(); break;

   323                 case '\"':

   324                     putChar('\"'); scanChar(); break;

   325                 case '\\':

   326                     putChar('\\'); scanChar(); break;

   327                 default:

   328                     lexError(bp, "illegal.esc.char");

   329                 }

   330             }

   331         } else if (bp != buflen) {

   332             putChar(ch); scanChar();

   333         }

   334     }

   336     private void scanDigits(int digitRadix) {

   337         char saveCh;

   338         int savePos;

   339         do {

   340             if (ch != '_') {

   341                 putChar(ch);

   342             } else {

   343                 if (!allowUnderscoresInLiterals) {

   344                     lexError("unsupported.underscore.lit", source.name);

   345                     allowUnderscoresInLiterals = true;

   346                 }

   347             }

   348             saveCh = ch;

   349             savePos = bp;

   350             scanChar();

   351         } while (digit(digitRadix) >= 0 || ch == '_');

   352         if (saveCh == '_')

   353             lexError(savePos, "illegal.underscore");

   354     }

   356     /** Read fractional part of hexadecimal floating point number.

   357      */

   358     private void scanHexExponentAndSuffix() {

   359         if (ch == 'p' || ch == 'P') {

   360             putChar(ch);

   361             scanChar();

   362             skipIllegalUnderscores();

   363             if (ch == '+' || ch == '-') {

   364                 putChar(ch);

   365                 scanChar();

   366             }

   367             skipIllegalUnderscores();

   368             if ('0' <= ch && ch <= '9') {

   369                 scanDigits(10);

   370                 if (!allowHexFloats) {

   371                     lexError("unsupported.fp.lit", source.name);

   372                     allowHexFloats = true;

   373                 }

   374                 else if (!hexFloatsWork)

   375                     lexError("unsupported.cross.fp.lit");

   376             } else

   377                 lexError("malformed.fp.lit");

   378         } else {

   379             lexError("malformed.fp.lit");

   380         }

   381         if (ch == 'f' || ch == 'F') {

   382             putChar(ch);

   383             scanChar();

   384             token = FLOATLITERAL;

   385         } else {

   386             if (ch == 'd' || ch == 'D') {

   387                 putChar(ch);

   388                 scanChar();

   389             }

   390             token = DOUBLELITERAL;

   391         }

   392     }

   394     /** Read fractional part of floating point number.

   395      */

   396     private void scanFraction() {

   397         skipIllegalUnderscores();

   398         if ('0' <= ch && ch <= '9') {

   399             scanDigits(10);

   400         }

   401         int sp1 = sp;

   402         if (ch == 'e' || ch == 'E') {

   403             putChar(ch);

   404             scanChar();

   405             skipIllegalUnderscores();

   406             if (ch == '+' || ch == '-') {

   407                 putChar(ch);

   408                 scanChar();

   409             }

   410             skipIllegalUnderscores();

   411             if ('0' <= ch && ch <= '9') {

   412                 scanDigits(10);

   413                 return;

   414             }

   415             lexError("malformed.fp.lit");

   416             sp = sp1;

   417         }

   418     }

   420     /** Read fractional part and 'd' or 'f' suffix of floating point number.

   421      */

   422     private void scanFractionAndSuffix() {

   423         this.radix = 10;

   424         scanFraction();

   425         if (ch == 'f' || ch == 'F') {

   426             putChar(ch);

   427             scanChar();

   428             token = FLOATLITERAL;

   429         } else {

   430             if (ch == 'd' || ch == 'D') {

   431                 putChar(ch);

   432                 scanChar();

   433             }

   434             token = DOUBLELITERAL;

   435         }

   436     }

   438     /** Read fractional part and 'd' or 'f' suffix of floating point number.

   439      */

   440     private void scanHexFractionAndSuffix(boolean seendigit) {

   441         this.radix = 16;

   442         Assert.check(ch == '.');

   443         putChar(ch);

   444         scanChar();

   445         skipIllegalUnderscores();

   446         if (digit(16) >= 0) {

   447             seendigit = true;

   448             scanDigits(16);

   449         }

   450         if (!seendigit)

   451             lexError("invalid.hex.number");

   452         else

   453             scanHexExponentAndSuffix();

   454     }

   456     private void skipIllegalUnderscores() {

   457         if (ch == '_') {

   458             lexError(bp, "illegal.underscore");

   459             while (ch == '_')

   460                 scanChar();

   461         }

   462     }

   464     /** Read a number.

   465      *  @param radix  The radix of the number; one of 2, j8, 10, 16.

   466      */

   467     private void scanNumber(int radix) {

   468         this.radix = radix;

   469         // for octal, allow base-10 digit in case it's a float literal

   470         int digitRadix = (radix == 8 ? 10 : radix);

   471         boolean seendigit = false;

   472         if (digit(digitRadix) >= 0) {

   473             seendigit = true;

   474             scanDigits(digitRadix);

   475         }

   476         if (radix == 16 && ch == '.') {

   477             scanHexFractionAndSuffix(seendigit);

   478         } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {

   479             scanHexExponentAndSuffix();

   480         } else if (digitRadix == 10 && ch == '.') {

   481             putChar(ch);

   482             scanChar();

   483             scanFractionAndSuffix();

   484         } else if (digitRadix == 10 &&

   485                    (ch == 'e' || ch == 'E' ||

   486                     ch == 'f' || ch == 'F' ||

   487                     ch == 'd' || ch == 'D')) {

   488             scanFractionAndSuffix();

   489         } else {

   490             if (ch == 'l' || ch == 'L') {

   491                 scanChar();

   492                 token = LONGLITERAL;

   493             } else {

   494                 token = INTLITERAL;

   495             }

   496         }

   497     }

   499     /** Read an identifier.

   500      */

   501     private void scanIdent() {

   502         boolean isJavaIdentifierPart;

   503         char high;

   504         do {

   505             if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;

   506             // optimization, was: putChar(ch);

   508             scanChar();

   509             switch (ch) {

   510             case 'A': case 'B': case 'C': case 'D': case 'E':

   511             case 'F': case 'G': case 'H': case 'I': case 'J':

   512             case 'K': case 'L': case 'M': case 'N': case 'O':

   513             case 'P': case 'Q': case 'R': case 'S': case 'T':

   514             case 'U': case 'V': case 'W': case 'X': case 'Y':

   515             case 'Z':

   516             case 'a': case 'b': case 'c': case 'd': case 'e':

   517             case 'f': case 'g': case 'h': case 'i': case 'j':

   518             case 'k': case 'l': case 'm': case 'n': case 'o':

   519             case 'p': case 'q': case 'r': case 's': case 't':

   520             case 'u': case 'v': case 'w': case 'x': case 'y':

   521             case 'z':

   522             case '$': case '_':

   523             case '0': case '1': case '2': case '3': case '4':

   524             case '5': case '6': case '7': case '8': case '9':

   525             case '\u0000': case '\u0001': case '\u0002': case '\u0003':

   526             case '\u0004': case '\u0005': case '\u0006': case '\u0007':

   527             case '\u0008': case '\u000E': case '\u000F': case '\u0010':

   528             case '\u0011': case '\u0012': case '\u0013': case '\u0014':

   529             case '\u0015': case '\u0016': case '\u0017':

   530             case '\u0018': case '\u0019': case '\u001B':

   531             case '\u007F':

   532                 break;

   533             case '\u001A': // EOI is also a legal identifier part

   534                 if (bp >= buflen) {

   535                     name = names.fromChars(sbuf, 0, sp);

   536                     token = keywords.key(name);

   537                     return;

   538                 }

   539                 break;

   540             default:

   541                 if (ch < '\u0080') {

   542                     // all ASCII range chars already handled, above

   543                     isJavaIdentifierPart = false;

   544                 } else {

   545                     high = scanSurrogates();

   546                     if (high != 0) {

   547                         if (sp == sbuf.length) {

   548                             putChar(high);

   549                         } else {

   550                             sbuf[sp++] = high;

   551                         }

   552                         isJavaIdentifierPart = Character.isJavaIdentifierPart(

   553                             Character.toCodePoint(high, ch));

   554                     } else {

   555                         isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);

   556                     }

   557                 }

   558                 if (!isJavaIdentifierPart) {

   559                     name = names.fromChars(sbuf, 0, sp);

   560                     token = keywords.key(name);

   561                     return;

   562                 }

   563             }

   564         } while (true);

   565     }

   567     /** Are surrogates supported?

   568      */

   569     final static boolean surrogatesSupported = surrogatesSupported();

   570     private static boolean surrogatesSupported() {

   571         try {

   572             Character.isHighSurrogate('a');

   573             return true;

   574         } catch (NoSuchMethodError ex) {

   575             return false;

   576         }

   577     }

   579     /** Scan surrogate pairs.  If 'ch' is a high surrogate and

   580      *  the next character is a low surrogate, then put the low

   581      *  surrogate in 'ch', and return the high surrogate.

   582      *  otherwise, just return 0.

   583      */

   584     private char scanSurrogates() {

   585         if (surrogatesSupported && Character.isHighSurrogate(ch)) {

   586             char high = ch;

   588             scanChar();

   590             if (Character.isLowSurrogate(ch)) {

   591                 return high;

   592             }

   594             ch = high;

   595         }

   597         return 0;

   598     }

   600     /** Return true if ch can be part of an operator.

   601      */

   602     private boolean isSpecial(char ch) {

   603         switch (ch) {

   604         case '!': case '%': case '&': case '*': case '?':

   605         case '+': case '-': case ':': case '<': case '=':

   606         case '>': case '^': case '|': case '~':

   607         case '@':

   608             return true;

   609         default:

   610             return false;

   611         }

   612     }

   614     /** Read longest possible sequence of special characters and convert

   615      *  to token.

   616      */

   617     private void scanOperator() {

   618         while (true) {

   619             putChar(ch);

   620             Name newname = names.fromChars(sbuf, 0, sp);

   621             if (keywords.key(newname) == IDENTIFIER) {

   622                 sp--;

   623                 break;

   624             }

   625             name = newname;

   626             token = keywords.key(newname);

   627             scanChar();

   628             if (!isSpecial(ch)) break;

   629         }

   630     }

   632     /**

   633      * Scan a documention comment; determine if a deprecated tag is present.

   634      * Called once the initial /, * have been skipped, positioned at the second *

   635      * (which is treated as the beginning of the first line).

   636      * Stops positioned at the closing '/'.

   637      */

   638     @SuppressWarnings("fallthrough")

   639     private void scanDocComment() {

   640         boolean deprecatedPrefix = false;

   642         forEachLine:

   643         while (bp < buflen) {

   645             // Skip optional WhiteSpace at beginning of line

   646             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {

   647                 scanCommentChar();

   648             }

   650             // Skip optional consecutive Stars

   651             while (bp < buflen && ch == '*') {

   652                 scanCommentChar();

   653                 if (ch == '/') {

   654                     return;

   655                 }

   656             }

   658             // Skip optional WhiteSpace after Stars

   659             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {

   660                 scanCommentChar();

   661             }

   663             deprecatedPrefix = false;

   664             // At beginning of line in the JavaDoc sense.

   665             if (bp < buflen && ch == '@' && !deprecatedFlag) {

   666                 scanCommentChar();

   667                 if (bp < buflen && ch == 'd') {

   668                     scanCommentChar();

   669                     if (bp < buflen && ch == 'e') {

   670                         scanCommentChar();

   671                         if (bp < buflen && ch == 'p') {

   672                             scanCommentChar();

   673                             if (bp < buflen && ch == 'r') {

   674                                 scanCommentChar();

   675                                 if (bp < buflen && ch == 'e') {

   676                                     scanCommentChar();

   677                                     if (bp < buflen && ch == 'c') {

   678                                         scanCommentChar();

   679                                         if (bp < buflen && ch == 'a') {

   680                                             scanCommentChar();

   681                                             if (bp < buflen && ch == 't') {

   682                                                 scanCommentChar();

   683                                                 if (bp < buflen && ch == 'e') {

   684                                                     scanCommentChar();

   685                                                     if (bp < buflen && ch == 'd') {

   686                                                         deprecatedPrefix = true;

   687                                                         scanCommentChar();

   688                                                     }}}}}}}}}}}

   689             if (deprecatedPrefix && bp < buflen) {

   690                 if (Character.isWhitespace(ch)) {

   691                     deprecatedFlag = true;

   692                 } else if (ch == '*') {

   693                     scanCommentChar();

   694                     if (ch == '/') {

   695                         deprecatedFlag = true;

   696                         return;

   697                     }

   698                 }

   699             }

   701             // Skip rest of line

   702             while (bp < buflen) {

   703                 switch (ch) {

   704                 case '*':

   705                     scanCommentChar();

   706                     if (ch == '/') {

   707                         return;

   708                     }

   709                     break;

   710                 case CR: // (Spec 3.4)

   711                     scanCommentChar();

   712                     if (ch != LF) {

   713                         continue forEachLine;

   714                     }

   715                     /* fall through to LF case */

   716                 case LF: // (Spec 3.4)

   717                     scanCommentChar();

   718                     continue forEachLine;

   719                 default:

   720                     scanCommentChar();

   721                 }

   722             } // rest of line

   723         } // forEachLine

   724         return;

   725     }

   727     /** The value of a literal token, recorded as a string.

   728      *  For integers, leading 0x and 'l' suffixes are suppressed.

   729      */

   730     public String stringVal() {

   731         return new String(sbuf, 0, sp);

   732     }

   734     /** Read token.

   735      */

   736     public void nextToken() {

   738         try {

   739             prevEndPos = endPos;

   740             sp = 0;

   742             while (true) {

   743                 pos = bp;

   744                 switch (ch) {

   745                 case ' ': // (Spec 3.6)

   746                 case '\t': // (Spec 3.6)

   747                 case FF: // (Spec 3.6)

   748                     do {

   749                         scanChar();

   750                     } while (ch == ' ' || ch == '\t' || ch == FF);

   751                     endPos = bp;

   752                     processWhiteSpace();

   753                     break;

   754                 case LF: // (Spec 3.4)

   755                     scanChar();

   756                     endPos = bp;

   757                     processLineTerminator();

   758                     break;

   759                 case CR: // (Spec 3.4)

   760                     scanChar();

   761                     if (ch == LF) {

   762                         scanChar();

   763                     }

   764                     endPos = bp;

   765                     processLineTerminator();

   766                     break;

   767                 case 'A': case 'B': case 'C': case 'D': case 'E':

   768                 case 'F': case 'G': case 'H': case 'I': case 'J':

   769                 case 'K': case 'L': case 'M': case 'N': case 'O':

   770                 case 'P': case 'Q': case 'R': case 'S': case 'T':

   771                 case 'U': case 'V': case 'W': case 'X': case 'Y':

   772                 case 'Z':

   773                 case 'a': case 'b': case 'c': case 'd': case 'e':

   774                 case 'f': case 'g': case 'h': case 'i': case 'j':

   775                 case 'k': case 'l': case 'm': case 'n': case 'o':

   776                 case 'p': case 'q': case 'r': case 's': case 't':

   777                 case 'u': case 'v': case 'w': case 'x': case 'y':

   778                 case 'z':

   779                 case '$': case '_':

   780                     scanIdent();

   781                     return;

   782                 case '0':

   783                     scanChar();

   784                     if (ch == 'x' || ch == 'X') {

   785                         scanChar();

   786                         skipIllegalUnderscores();

   787                         if (ch == '.') {

   788                             scanHexFractionAndSuffix(false);

   789                         } else if (digit(16) < 0) {

   790                             lexError("invalid.hex.number");

   791                         } else {

   792                             scanNumber(16);

   793                         }

   794                     } else if (ch == 'b' || ch == 'B') {

   795                         if (!allowBinaryLiterals) {

   796                             lexError("unsupported.binary.lit", source.name);

   797                             allowBinaryLiterals = true;

   798                         }

   799                         scanChar();

   800                         skipIllegalUnderscores();

   801                         if (digit(2) < 0) {

   802                             lexError("invalid.binary.number");

   803                         } else {

   804                             scanNumber(2);

   805                         }

   806                     } else {

   807                         putChar('0');

   808                         if (ch == '_') {

   809                             int savePos = bp;

   810                             do {

   811                                 scanChar();

   812                             } while (ch == '_');

   813                             if (digit(10) < 0) {

   814                                 lexError(savePos, "illegal.underscore");

   815                             }

   816                         }

   817                         scanNumber(8);

   818                     }

   819                     return;

   820                 case '1': case '2': case '3': case '4':

   821                 case '5': case '6': case '7': case '8': case '9':

   822                     scanNumber(10);

   823                     return;

   824                 case '.':

   825                     scanChar();

   826                     if ('0' <= ch && ch <= '9') {

   827                         putChar('.');

   828                         scanFractionAndSuffix();

   829                     } else if (ch == '.') {

   830                         putChar('.'); putChar('.');

   831                         scanChar();

   832                         if (ch == '.') {

   833                             scanChar();

   834                             putChar('.');

   835                             token = ELLIPSIS;

   836                         } else {

   837                             lexError("malformed.fp.lit");

   838                         }

   839                     } else {

   840                         token = DOT;

   841                     }

   842                     return;

   843                 case ',':

   844                     scanChar(); token = COMMA; return;

   845                 case ';':

   846                     scanChar(); token = SEMI; return;

   847                 case '(':

   848                     scanChar(); token = LPAREN; return;

   849                 case ')':

   850                     scanChar(); token = RPAREN; return;

   851                 case '[':

   852                     scanChar(); token = LBRACKET; return;

   853                 case ']':

   854                     scanChar(); token = RBRACKET; return;

   855                 case '{':

   856                     scanChar(); token = LBRACE; return;

   857                 case '}':

   858                     scanChar(); token = RBRACE; return;

   859                 case '/':

   860                     scanChar();

   861                     if (ch == '/') {

   862                         do {

   863                             scanCommentChar();

   864                         } while (ch != CR && ch != LF && bp < buflen);

   865                         if (bp < buflen) {

   866                             endPos = bp;

   867                             processComment(CommentStyle.LINE);

   868                         }

   869                         break;

   870                     } else if (ch == '*') {

   871                         scanChar();

   872                         CommentStyle style;

   873                         if (ch == '*') {

   874                             style = CommentStyle.JAVADOC;

   875                             scanDocComment();

   876                         } else {

   877                             style = CommentStyle.BLOCK;

   878                             while (bp < buflen) {

   879                                 if (ch == '*') {

   880                                     scanChar();

   881                                     if (ch == '/') break;

   882                                 } else {

   883                                     scanCommentChar();

   884                                 }

   885                             }

   886                         }

   887                         if (ch == '/') {

   888                             scanChar();

   889                             endPos = bp;

   890                             processComment(style);

   891                             break;

   892                         } else {

   893                             lexError("unclosed.comment");

   894                             return;

   895                         }

   896                     } else if (ch == '=') {

   897                         name = names.slashequals;

   898                         token = SLASHEQ;

   899                         scanChar();

   900                     } else {

   901                         name = names.slash;

   902                         token = SLASH;

   903                     }

   904                     return;

   905                 case '\'':

   906                     scanChar();

   907                     if (ch == '\'') {

   908                         lexError("empty.char.lit");

   909                     } else {

   910                         if (ch == CR || ch == LF)

   911                             lexError(pos, "illegal.line.end.in.char.lit");

   912                         scanLitChar();

   913                         if (ch == '\'') {

   914                             scanChar();

   915                             token = CHARLITERAL;

   916                         } else {

   917                             lexError(pos, "unclosed.char.lit");

   918                         }

   919                     }

   920                     return;

   921                 case '\"':

   922                     scanChar();

   923                     while (ch != '\"' && ch != CR && ch != LF && bp < buflen)

   924                         scanLitChar();

   925                     if (ch == '\"') {

   926                         token = STRINGLITERAL;

   927                         scanChar();

   928                     } else {

   929                         lexError(pos, "unclosed.str.lit");

   930                     }

   931                     return;

   932                 default:

   933                     if (isSpecial(ch)) {

   934                         scanOperator();

   935                     } else {

   936                         boolean isJavaIdentifierStart;

   937                         if (ch < '\u0080') {

   938                             // all ASCII range chars already handled, above

   939                             isJavaIdentifierStart = false;

   940                         } else {

   941                             char high = scanSurrogates();

   942                             if (high != 0) {

   943                                 if (sp == sbuf.length) {

   944                                     putChar(high);

   945                                 } else {

   946                                     sbuf[sp++] = high;

   947                                 }

   949                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(

   950                                     Character.toCodePoint(high, ch));

   951                             } else {

   952                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);

   953                             }

   954                         }

   955                         if (isJavaIdentifierStart) {

   956                             scanIdent();

   957                         } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5

   958                             token = EOF;

   959                             pos = bp = eofPos;

   960                         } else {

   961                             lexError("illegal.char", String.valueOf((int)ch));

   962                             scanChar();

   963                         }

   964                     }

   965                     return;

   966                 }

   967             }

   968         } finally {

   969             endPos = bp;

   970             if (scannerDebug)

   971                 System.out.println("nextToken(" + pos

   972                                    + "," + endPos + ")=|" +

   973                                    new String(getRawCharacters(pos, endPos))

   974                                    + "|");

   975         }

   976     }

   978     /** Return the current token, set by nextToken().

   979      */

   980     public Token token() {

   981         return token;

   982     }

   984     /** Sets the current token.

   985      */

   986     public void token(Token token) {

   987         this.token = token;

   988     }

   990     /** Return the current token's position: a 0-based

   991      *  offset from beginning of the raw input stream

   992      *  (before unicode translation)

   993      */

   994     public int pos() {

   995         return pos;

   996     }

   998     /** Return the last character position of the current token.

   999      */

  1000     public int endPos() {

  1001         return endPos;

  1002     }

  1004     /** Return the last character position of the previous token.

  1005      */

  1006     public int prevEndPos() {

  1007         return prevEndPos;

  1008     }

  1010     /** Return the position where a lexical error occurred;

  1011      */

  1012     public int errPos() {

  1013         return errPos;

  1014     }

  1016     /** Set the position where a lexical error occurred;

  1017      */

  1018     public void errPos(int pos) {

  1019         errPos = pos;

  1020     }

  1022     /** Return the name of an identifier or token for the current token.

  1023      */

  1024     public Name name() {

  1025         return name;

  1026     }

  1028     /** Return the radix of a numeric literal token.

  1029      */

  1030     public int radix() {

  1031         return radix;

  1032     }

  1034     /** Has a @deprecated been encountered in last doc comment?

  1035      *  This needs to be reset by client with resetDeprecatedFlag.

  1036      */

  1037     public boolean deprecatedFlag() {

  1038         return deprecatedFlag;

  1039     }

  1041     public void resetDeprecatedFlag() {

  1042         deprecatedFlag = false;

  1043     }

  1045     /**

  1046      * Returns the documentation string of the current token.

  1047      */

  1048     public String docComment() {

  1049         return null;

  1050     }

  1052     /**

  1053      * Returns a copy of the input buffer, up to its inputLength.

  1054      * Unicode escape sequences are not translated.

  1055      */

  1056     public char[] getRawCharacters() {

  1057         char[] chars = new char[buflen];

  1058         System.arraycopy(buf, 0, chars, 0, buflen);

  1059         return chars;

  1060     }

  1062     /**

  1063      * Returns a copy of a character array subset of the input buffer.

  1064      * The returned array begins at the <code>beginIndex</code> and

  1065      * extends to the character at index <code>endIndex - 1</code>.

  1066      * Thus the length of the substring is <code>endIndex-beginIndex</code>.

  1067      * This behavior is like

  1068      * <code>String.substring(beginIndex, endIndex)</code>.

  1069      * Unicode escape sequences are not translated.

  1070      *

  1071      * @param beginIndex the beginning index, inclusive.

  1072      * @param endIndex the ending index, exclusive.

  1073      * @throws IndexOutOfBounds if either offset is outside of the

  1074      *         array bounds

  1075      */

  1076     public char[] getRawCharacters(int beginIndex, int endIndex) {

  1077         int length = endIndex - beginIndex;

  1078         char[] chars = new char[length];

  1079         System.arraycopy(buf, beginIndex, chars, 0, length);

  1080         return chars;

  1081     }

  1083     public enum CommentStyle {

  1084         LINE,

  1085         BLOCK,

  1086         JAVADOC,

  1087     }

  1089     /**

  1090      * Called when a complete comment has been scanned. pos and endPos

  1091      * will mark the comment boundary.

  1092      */

  1093     protected void processComment(CommentStyle style) {

  1094         if (scannerDebug)

  1095             System.out.println("processComment(" + pos

  1096                                + "," + endPos + "," + style + ")=|"

  1097                                + new String(getRawCharacters(pos, endPos))

  1098                                + "|");

  1099     }

  1101     /**

  1102      * Called when a complete whitespace run has been scanned. pos and endPos

  1103      * will mark the whitespace boundary.

  1104      */

  1105     protected void processWhiteSpace() {

  1106         if (scannerDebug)

  1107             System.out.println("processWhitespace(" + pos

  1108                                + "," + endPos + ")=|" +

  1109                                new String(getRawCharacters(pos, endPos))

  1110                                + "|");

  1111     }

  1113     /**

  1114      * Called when a line terminator has been processed.

  1115      */

  1116     protected void processLineTerminator() {

  1117         if (scannerDebug)

  1118             System.out.println("processTerminator(" + pos

  1119                                + "," + endPos + ")=|" +

  1120                                new String(getRawCharacters(pos, endPos))

  1121                                + "|");

  1122     }

  1124     /** Build a map for translating between line numbers and

  1125      * positions in the input.

  1126      *

  1127      * @return a LineMap */

  1128     public Position.LineMap getLineMap() {

  1129         return Position.makeLineMap(buf, buflen, false);

  1130     }

  1132 }

Mercurial > jdk8-mips64-public > langtools / file revision

src/share/classes/com/sun/tools/javac/parser/Scanner.java@8cc5b440fdde

src/share/classes/com/sun/tools/javac/parser/Scanner.java