jdk8-mips64-public/langtools: src/share/classes/com/sun/tools/javac/parser/Scanner.java@e2722bd43f3a

6829189: Java programming with JSR 292 needs language support
Summary: Language changes documented in http://wikis.sun.com/display/mlvm/ProjectCoinProposal
Reviewed-by: jjg, darcy, mcimadamore

     1 /*

     2  * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Sun designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Sun in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,

    22  * CA 95054 USA or visit www.sun.com if you need additional information or

    23  * have any questions.

    24  */

    26 package com.sun.tools.javac.parser;

    28 import java.nio.*;

    30 import com.sun.tools.javac.code.Source;

    31 import com.sun.tools.javac.file.JavacFileManager;

    32 import com.sun.tools.javac.util.*;

    35 import static com.sun.tools.javac.parser.Token.*;

    36 import static com.sun.tools.javac.util.LayoutCharacters.*;

    38 /** The lexical analyzer maps an input stream consisting of

    39  *  ASCII characters and Unicode escapes into a token sequence.

    40  *

    41  *  <p><b>This is NOT part of any API supported by Sun Microsystems.  If

    42  *  you write code that depends on this, you do so at your own risk.

    43  *  This code and its internal interfaces are subject to change or

    44  *  deletion without notice.</b>

    45  */

    46 public class Scanner implements Lexer {

    48     private static boolean scannerDebug = false;

    50     /** A factory for creating scanners. */

    51     public static class Factory {

    52         /** The context key for the scanner factory. */

    53         public static final Context.Key<Scanner.Factory> scannerFactoryKey =

    54             new Context.Key<Scanner.Factory>();

    56         /** Get the Factory instance for this context. */

    57         public static Factory instance(Context context) {

    58             Factory instance = context.get(scannerFactoryKey);

    59             if (instance == null)

    60                 instance = new Factory(context);

    61             return instance;

    62         }

    64         final Log log;

    65         final Names names;

    66         final Source source;

    67         final Keywords keywords;

    69         /** Create a new scanner factory. */

    70         protected Factory(Context context) {

    71             context.put(scannerFactoryKey, this);

    72             this.log = Log.instance(context);

    73             this.names = Names.instance(context);

    74             this.source = Source.instance(context);

    75             this.keywords = Keywords.instance(context);

    76         }

    78         public Scanner newScanner(CharSequence input) {

    79             if (input instanceof CharBuffer) {

    80                 return new Scanner(this, (CharBuffer)input);

    81             } else {

    82                 char[] array = input.toString().toCharArray();

    83                 return newScanner(array, array.length);

    84             }

    85         }

    87         public Scanner newScanner(char[] input, int inputLength) {

    88             return new Scanner(this, input, inputLength);

    89         }

    90     }

    92     /* Output variables; set by nextToken():

    93      */

    95     /** The token, set by nextToken().

    96      */

    97     private Token token;

    99     /** Allow hex floating-point literals.

   100      */

   101     private boolean allowHexFloats;

   103     /** The token's position, 0-based offset from beginning of text.

   104      */

   105     private int pos;

   107     /** Character position just after the last character of the token.

   108      */

   109     private int endPos;

   111     /** The last character position of the previous token.

   112      */

   113     private int prevEndPos;

   115     /** The position where a lexical error occurred;

   116      */

   117     private int errPos = Position.NOPOS;

   119     /** The name of an identifier or token:

   120      */

   121     private Name name;

   123     /** The radix of a numeric literal token.

   124      */

   125     private int radix;

   127     /** Has a @deprecated been encountered in last doc comment?

   128      *  this needs to be reset by client.

   129      */

   130     protected boolean deprecatedFlag = false;

   132     /** A character buffer for literals.

   133      */

   134     private char[] sbuf = new char[128];

   135     private int sp;

   137     /** The input buffer, index of next chacter to be read,

   138      *  index of one past last character in buffer.

   139      */

   140     private char[] buf;

   141     private int bp;

   142     private int buflen;

   143     private int eofPos;

   145     /** The current character.

   146      */

   147     private char ch;

   149     /** The buffer index of the last converted unicode character

   150      */

   151     private int unicodeConversionBp = -1;

   153     /** The log to be used for error reporting.

   154      */

   155     private final Log log;

   157     /** The name table. */

   158     private final Names names;

   160     /** The keyword table. */

   161     private final Keywords keywords;

   163     /** Common code for constructors. */

   164     private Scanner(Factory fac) {

   165         this.log = fac.log;

   166         this.names = fac.names;

   167         this.keywords = fac.keywords;

   168         this.allowHexFloats = fac.source.allowHexFloats();

   169     }

   171     private static final boolean hexFloatsWork = hexFloatsWork();

   172     private static boolean hexFloatsWork() {

   173         try {

   174             Float.valueOf("0x1.0p1");

   175             return true;

   176         } catch (NumberFormatException ex) {

   177             return false;

   178         }

   179     }

   181     /** Create a scanner from the input buffer.  buffer must implement

   182      *  array() and compact(), and remaining() must be less than limit().

   183      */

   184     protected Scanner(Factory fac, CharBuffer buffer) {

   185         this(fac, JavacFileManager.toArray(buffer), buffer.limit());

   186     }

   188     /**

   189      * Create a scanner from the input array.  This method might

   190      * modify the array.  To avoid copying the input array, ensure

   191      * that {@code inputLength < input.length} or

   192      * {@code input[input.length -1]} is a white space character.

   193      *

   194      * @param fac the factory which created this Scanner

   195      * @param input the input, might be modified

   196      * @param inputLength the size of the input.

   197      * Must be positive and less than or equal to input.length.

   198      */

   199     protected Scanner(Factory fac, char[] input, int inputLength) {

   200         this(fac);

   201         eofPos = inputLength;

   202         if (inputLength == input.length) {

   203             if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {

   204                 inputLength--;

   205             } else {

   206                 char[] newInput = new char[inputLength + 1];

   207                 System.arraycopy(input, 0, newInput, 0, input.length);

   208                 input = newInput;

   209             }

   210         }

   211         buf = input;

   212         buflen = inputLength;

   213         buf[buflen] = EOI;

   214         bp = -1;

   215         scanChar();

   216     }

   218     /** Report an error at the given position using the provided arguments.

   219      */

   220     private void lexError(int pos, String key, Object... args) {

   221         log.error(pos, key, args);

   222         token = ERROR;

   223         errPos = pos;

   224     }

   226     /** Report an error at the current token position using the provided

   227      *  arguments.

   228      */

   229     private void lexError(String key, Object... args) {

   230         lexError(pos, key, args);

   231     }

   233     /** Convert an ASCII digit from its base (8, 10, or 16)

   234      *  to its value.

   235      */

   236     private int digit(int base) {

   237         char c = ch;

   238         int result = Character.digit(c, base);

   239         if (result >= 0 && c > 0x7f) {

   240             lexError(pos+1, "illegal.nonascii.digit");

   241             ch = "0123456789abcdef".charAt(result);

   242         }

   243         return result;

   244     }

   246     /** Convert unicode escape; bp points to initial '\' character

   247      *  (Spec 3.3).

   248      */

   249     private void convertUnicode() {

   250         if (ch == '\\' && unicodeConversionBp != bp) {

   251             bp++; ch = buf[bp];

   252             if (ch == 'u') {

   253                 do {

   254                     bp++; ch = buf[bp];

   255                 } while (ch == 'u');

   256                 int limit = bp + 3;

   257                 if (limit < buflen) {

   258                     int d = digit(16);

   259                     int code = d;

   260                     while (bp < limit && d >= 0) {

   261                         bp++; ch = buf[bp];

   262                         d = digit(16);

   263                         code = (code << 4) + d;

   264                     }

   265                     if (d >= 0) {

   266                         ch = (char)code;

   267                         unicodeConversionBp = bp;

   268                         return;

   269                     }

   270                 }

   271                 lexError(bp, "illegal.unicode.esc");

   272             } else {

   273                 bp--;

   274                 ch = '\\';

   275             }

   276         }

   277     }

   279     /** Read next character.

   280      */

   281     private void scanChar() {

   282         ch = buf[++bp];

   283         if (ch == '\\') {

   284             convertUnicode();

   285         }

   286     }

   288     /** Read next character in comment, skipping over double '\' characters.

   289      */

   290     private void scanCommentChar() {

   291         scanChar();

   292         if (ch == '\\') {

   293             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {

   294                 bp++;

   295             } else {

   296                 convertUnicode();

   297             }

   298         }

   299     }

   301     /** Append a character to sbuf.

   302      */

   303     private void putChar(char ch) {

   304         if (sp == sbuf.length) {

   305             char[] newsbuf = new char[sbuf.length * 2];

   306             System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);

   307             sbuf = newsbuf;

   308         }

   309         sbuf[sp++] = ch;

   310     }

   312     /** For debugging purposes: print character.

   313      */

   314     private void dch() {

   315         System.err.print(ch); System.out.flush();

   316     }

   318     /** Read next character in character or string literal and copy into sbuf.

   319      */

   320     private void scanLitChar(boolean forBytecodeName) {

   321         if (ch == '\\') {

   322             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {

   323                 bp++;

   324                 putChar('\\');

   325                 scanChar();

   326             } else {

   327                 scanChar();

   328                 switch (ch) {

   329                 case '0': case '1': case '2': case '3':

   330                 case '4': case '5': case '6': case '7':

   331                     char leadch = ch;

   332                     int oct = digit(8);

   333                     scanChar();

   334                     if ('0' <= ch && ch <= '7') {

   335                         oct = oct * 8 + digit(8);

   336                         scanChar();

   337                         if (leadch <= '3' && '0' <= ch && ch <= '7') {

   338                             oct = oct * 8 + digit(8);

   339                             scanChar();

   340                         }

   341                     }

   342                     putChar((char)oct);

   343                     break;

   344                 case 'b':

   345                     putChar('\b'); scanChar(); break;

   346                 case 't':

   347                     putChar('\t'); scanChar(); break;

   348                 case 'n':

   349                     putChar('\n'); scanChar(); break;

   350                 case 'f':

   351                     putChar('\f'); scanChar(); break;

   352                 case 'r':

   353                     putChar('\r'); scanChar(); break;

   354                 case '\'':

   355                     putChar('\''); scanChar(); break;

   356                 case '\"':

   357                     putChar('\"'); scanChar(); break;

   358                 case '\\':

   359                     putChar('\\'); scanChar(); break;

   360                 case '|': case ',': case '?': case '%':

   361                 case '^': case '_': case '{': case '}':

   362                 case '!': case '-': case '=':

   363                     if (forBytecodeName) {

   364                         // Accept escape sequences for dangerous bytecode chars.

   365                         // This is illegal in normal Java string or character literals.

   366                         // Note that the escape sequence itself is passed through.

   367                         putChar('\\'); putChar(ch); scanChar();

   368                     } else {

   369                         lexError(bp, "illegal.esc.char");

   370                     }

   371                     break;

   372                 default:

   373                     lexError(bp, "illegal.esc.char");

   374                 }

   375             }

   376         } else if (bp != buflen) {

   377             putChar(ch); scanChar();

   378         }

   379     }

   380     private void scanLitChar() {

   381         scanLitChar(false);

   382     }

   384     /** Read next character in an exotic name #"foo"

   385      */

   386     private void scanBytecodeNameChar() {

   387         switch (ch) {

   388         // reject any "dangerous" char which is illegal somewhere in the JVM spec

   389         // cf. http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm

   390         case '/': case '.': case ';':  // illegal everywhere

   391         case '<': case '>':  // illegal in methods, dangerous in classes

   392         case '[':  // illegal in classes

   393             lexError(bp, "illegal.bytecode.ident.char", String.valueOf((int)ch));

   394             break;

   395         }

   396         scanLitChar(true);

   397     }

   399     /** Read fractional part of hexadecimal floating point number.

   400      */

   401     private void scanHexExponentAndSuffix() {

   402         if (ch == 'p' || ch == 'P') {

   403             putChar(ch);

   404             scanChar();

   405             if (ch == '+' || ch == '-') {

   406                 putChar(ch);

   407                 scanChar();

   408             }

   409             if ('0' <= ch && ch <= '9') {

   410                 do {

   411                     putChar(ch);

   412                     scanChar();

   413                 } while ('0' <= ch && ch <= '9');

   414                 if (!allowHexFloats) {

   415                     lexError("unsupported.fp.lit");

   416                     allowHexFloats = true;

   417                 }

   418                 else if (!hexFloatsWork)

   419                     lexError("unsupported.cross.fp.lit");

   420             } else

   421                 lexError("malformed.fp.lit");

   422         } else {

   423             lexError("malformed.fp.lit");

   424         }

   425         if (ch == 'f' || ch == 'F') {

   426             putChar(ch);

   427             scanChar();

   428             token = FLOATLITERAL;

   429         } else {

   430             if (ch == 'd' || ch == 'D') {

   431                 putChar(ch);

   432                 scanChar();

   433             }

   434             token = DOUBLELITERAL;

   435         }

   436     }

   438     /** Read fractional part of floating point number.

   439      */

   440     private void scanFraction() {

   441         while (digit(10) >= 0) {

   442             putChar(ch);

   443             scanChar();

   444         }

   445         int sp1 = sp;

   446         if (ch == 'e' || ch == 'E') {

   447             putChar(ch);

   448             scanChar();

   449             if (ch == '+' || ch == '-') {

   450                 putChar(ch);

   451                 scanChar();

   452             }

   453             if ('0' <= ch && ch <= '9') {

   454                 do {

   455                     putChar(ch);

   456                     scanChar();

   457                 } while ('0' <= ch && ch <= '9');

   458                 return;

   459             }

   460             lexError("malformed.fp.lit");

   461             sp = sp1;

   462         }

   463     }

   465     /** Read fractional part and 'd' or 'f' suffix of floating point number.

   466      */

   467     private void scanFractionAndSuffix() {

   468         this.radix = 10;

   469         scanFraction();

   470         if (ch == 'f' || ch == 'F') {

   471             putChar(ch);

   472             scanChar();

   473             token = FLOATLITERAL;

   474         } else {

   475             if (ch == 'd' || ch == 'D') {

   476                 putChar(ch);

   477                 scanChar();

   478             }

   479             token = DOUBLELITERAL;

   480         }

   481     }

   483     /** Read fractional part and 'd' or 'f' suffix of floating point number.

   484      */

   485     private void scanHexFractionAndSuffix(boolean seendigit) {

   486         this.radix = 16;

   487         assert ch == '.';

   488         putChar(ch);

   489         scanChar();

   490         while (digit(16) >= 0) {

   491             seendigit = true;

   492             putChar(ch);

   493             scanChar();

   494         }

   495         if (!seendigit)

   496             lexError("invalid.hex.number");

   497         else

   498             scanHexExponentAndSuffix();

   499     }

   501     /** Read a number.

   502      *  @param radix  The radix of the number; one of 8, 10, 16.

   503      */

   504     private void scanNumber(int radix) {

   505         this.radix = radix;

   506         // for octal, allow base-10 digit in case it's a float literal

   507         int digitRadix = (radix <= 10) ? 10 : 16;

   508         boolean seendigit = false;

   509         while (digit(digitRadix) >= 0) {

   510             seendigit = true;

   511             putChar(ch);

   512             scanChar();

   513         }

   514         if (radix == 16 && ch == '.') {

   515             scanHexFractionAndSuffix(seendigit);

   516         } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {

   517             scanHexExponentAndSuffix();

   518         } else if (radix <= 10 && ch == '.') {

   519             putChar(ch);

   520             scanChar();

   521             scanFractionAndSuffix();

   522         } else if (radix <= 10 &&

   523                    (ch == 'e' || ch == 'E' ||

   524                     ch == 'f' || ch == 'F' ||

   525                     ch == 'd' || ch == 'D')) {

   526             scanFractionAndSuffix();

   527         } else {

   528             if (ch == 'l' || ch == 'L') {

   529                 scanChar();

   530                 token = LONGLITERAL;

   531             } else {

   532                 token = INTLITERAL;

   533             }

   534         }

   535     }

   537     /** Read an identifier.

   538      */

   539     private void scanIdent() {

   540         boolean isJavaIdentifierPart;

   541         char high;

   542         do {

   543             if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;

   544             // optimization, was: putChar(ch);

   546             scanChar();

   547             switch (ch) {

   548             case 'A': case 'B': case 'C': case 'D': case 'E':

   549             case 'F': case 'G': case 'H': case 'I': case 'J':

   550             case 'K': case 'L': case 'M': case 'N': case 'O':

   551             case 'P': case 'Q': case 'R': case 'S': case 'T':

   552             case 'U': case 'V': case 'W': case 'X': case 'Y':

   553             case 'Z':

   554             case 'a': case 'b': case 'c': case 'd': case 'e':

   555             case 'f': case 'g': case 'h': case 'i': case 'j':

   556             case 'k': case 'l': case 'm': case 'n': case 'o':

   557             case 'p': case 'q': case 'r': case 's': case 't':

   558             case 'u': case 'v': case 'w': case 'x': case 'y':

   559             case 'z':

   560             case '$': case '_':

   561             case '0': case '1': case '2': case '3': case '4':

   562             case '5': case '6': case '7': case '8': case '9':

   563             case '\u0000': case '\u0001': case '\u0002': case '\u0003':

   564             case '\u0004': case '\u0005': case '\u0006': case '\u0007':

   565             case '\u0008': case '\u000E': case '\u000F': case '\u0010':

   566             case '\u0011': case '\u0012': case '\u0013': case '\u0014':

   567             case '\u0015': case '\u0016': case '\u0017':

   568             case '\u0018': case '\u0019': case '\u001B':

   569             case '\u007F':

   570                 break;

   571             case '\u001A': // EOI is also a legal identifier part

   572                 if (bp >= buflen) {

   573                     name = names.fromChars(sbuf, 0, sp);

   574                     token = keywords.key(name);

   575                     return;

   576                 }

   577                 break;

   578             default:

   579                 if (ch < '\u0080') {

   580                     // all ASCII range chars already handled, above

   581                     isJavaIdentifierPart = false;

   582                 } else {

   583                     high = scanSurrogates();

   584                     if (high != 0) {

   585                         if (sp == sbuf.length) {

   586                             putChar(high);

   587                         } else {

   588                             sbuf[sp++] = high;

   589                         }

   590                         isJavaIdentifierPart = Character.isJavaIdentifierPart(

   591                             Character.toCodePoint(high, ch));

   592                     } else {

   593                         isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);

   594                     }

   595                 }

   596                 if (!isJavaIdentifierPart) {

   597                     name = names.fromChars(sbuf, 0, sp);

   598                     token = keywords.key(name);

   599                     return;

   600                 }

   601             }

   602         } while (true);

   603     }

   605     /** Are surrogates supported?

   606      */

   607     final static boolean surrogatesSupported = surrogatesSupported();

   608     private static boolean surrogatesSupported() {

   609         try {

   610             Character.isHighSurrogate('a');

   611             return true;

   612         } catch (NoSuchMethodError ex) {

   613             return false;

   614         }

   615     }

   617     /** Scan surrogate pairs.  If 'ch' is a high surrogate and

   618      *  the next character is a low surrogate, then put the low

   619      *  surrogate in 'ch', and return the high surrogate.

   620      *  otherwise, just return 0.

   621      */

   622     private char scanSurrogates() {

   623         if (surrogatesSupported && Character.isHighSurrogate(ch)) {

   624             char high = ch;

   626             scanChar();

   628             if (Character.isLowSurrogate(ch)) {

   629                 return high;

   630             }

   632             ch = high;

   633         }

   635         return 0;

   636     }

   638     /** Return true if ch can be part of an operator.

   639      */

   640     private boolean isSpecial(char ch) {

   641         switch (ch) {

   642         case '!': case '%': case '&': case '*': case '?':

   643         case '+': case '-': case ':': case '<': case '=':

   644         case '>': case '^': case '|': case '~':

   645         case '@':

   646             return true;

   647         default:

   648             return false;

   649         }

   650     }

   652     /** Read longest possible sequence of special characters and convert

   653      *  to token.

   654      */

   655     private void scanOperator() {

   656         while (true) {

   657             putChar(ch);

   658             Name newname = names.fromChars(sbuf, 0, sp);

   659             if (keywords.key(newname) == IDENTIFIER) {

   660                 sp--;

   661                 break;

   662             }

   663             name = newname;

   664             token = keywords.key(newname);

   665             scanChar();

   666             if (!isSpecial(ch)) break;

   667         }

   668     }

   670     /**

   671      * Scan a documention comment; determine if a deprecated tag is present.

   672      * Called once the initial /, * have been skipped, positioned at the second *

   673      * (which is treated as the beginning of the first line).

   674      * Stops positioned at the closing '/'.

   675      */

   676     @SuppressWarnings("fallthrough")

   677     private void scanDocComment() {

   678         boolean deprecatedPrefix = false;

   680         forEachLine:

   681         while (bp < buflen) {

   683             // Skip optional WhiteSpace at beginning of line

   684             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {

   685                 scanCommentChar();

   686             }

   688             // Skip optional consecutive Stars

   689             while (bp < buflen && ch == '*') {

   690                 scanCommentChar();

   691                 if (ch == '/') {

   692                     return;

   693                 }

   694             }

   696             // Skip optional WhiteSpace after Stars

   697             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {

   698                 scanCommentChar();

   699             }

   701             deprecatedPrefix = false;

   702             // At beginning of line in the JavaDoc sense.

   703             if (bp < buflen && ch == '@' && !deprecatedFlag) {

   704                 scanCommentChar();

   705                 if (bp < buflen && ch == 'd') {

   706                     scanCommentChar();

   707                     if (bp < buflen && ch == 'e') {

   708                         scanCommentChar();

   709                         if (bp < buflen && ch == 'p') {

   710                             scanCommentChar();

   711                             if (bp < buflen && ch == 'r') {

   712                                 scanCommentChar();

   713                                 if (bp < buflen && ch == 'e') {

   714                                     scanCommentChar();

   715                                     if (bp < buflen && ch == 'c') {

   716                                         scanCommentChar();

   717                                         if (bp < buflen && ch == 'a') {

   718                                             scanCommentChar();

   719                                             if (bp < buflen && ch == 't') {

   720                                                 scanCommentChar();

   721                                                 if (bp < buflen && ch == 'e') {

   722                                                     scanCommentChar();

   723                                                     if (bp < buflen && ch == 'd') {

   724                                                         deprecatedPrefix = true;

   725                                                         scanCommentChar();

   726                                                     }}}}}}}}}}}

   727             if (deprecatedPrefix && bp < buflen) {

   728                 if (Character.isWhitespace(ch)) {

   729                     deprecatedFlag = true;

   730                 } else if (ch == '*') {

   731                     scanCommentChar();

   732                     if (ch == '/') {

   733                         deprecatedFlag = true;

   734                         return;

   735                     }

   736                 }

   737             }

   739             // Skip rest of line

   740             while (bp < buflen) {

   741                 switch (ch) {

   742                 case '*':

   743                     scanCommentChar();

   744                     if (ch == '/') {

   745                         return;

   746                     }

   747                     break;

   748                 case CR: // (Spec 3.4)

   749                     scanCommentChar();

   750                     if (ch != LF) {

   751                         continue forEachLine;

   752                     }

   753                     /* fall through to LF case */

   754                 case LF: // (Spec 3.4)

   755                     scanCommentChar();

   756                     continue forEachLine;

   757                 default:

   758                     scanCommentChar();

   759                 }

   760             } // rest of line

   761         } // forEachLine

   762         return;

   763     }

   765     /** The value of a literal token, recorded as a string.

   766      *  For integers, leading 0x and 'l' suffixes are suppressed.

   767      */

   768     public String stringVal() {

   769         return new String(sbuf, 0, sp);

   770     }

   772     /** Read token.

   773      */

   774     public void nextToken() {

   776         try {

   777             prevEndPos = endPos;

   778             sp = 0;

   780             while (true) {

   781                 pos = bp;

   782                 switch (ch) {

   783                 case ' ': // (Spec 3.6)

   784                 case '\t': // (Spec 3.6)

   785                 case FF: // (Spec 3.6)

   786                     do {

   787                         scanChar();

   788                     } while (ch == ' ' || ch == '\t' || ch == FF);

   789                     endPos = bp;

   790                     processWhiteSpace();

   791                     break;

   792                 case LF: // (Spec 3.4)

   793                     scanChar();

   794                     endPos = bp;

   795                     processLineTerminator();

   796                     break;

   797                 case CR: // (Spec 3.4)

   798                     scanChar();

   799                     if (ch == LF) {

   800                         scanChar();

   801                     }

   802                     endPos = bp;

   803                     processLineTerminator();

   804                     break;

   805                 case 'A': case 'B': case 'C': case 'D': case 'E':

   806                 case 'F': case 'G': case 'H': case 'I': case 'J':

   807                 case 'K': case 'L': case 'M': case 'N': case 'O':

   808                 case 'P': case 'Q': case 'R': case 'S': case 'T':

   809                 case 'U': case 'V': case 'W': case 'X': case 'Y':

   810                 case 'Z':

   811                 case 'a': case 'b': case 'c': case 'd': case 'e':

   812                 case 'f': case 'g': case 'h': case 'i': case 'j':

   813                 case 'k': case 'l': case 'm': case 'n': case 'o':

   814                 case 'p': case 'q': case 'r': case 's': case 't':

   815                 case 'u': case 'v': case 'w': case 'x': case 'y':

   816                 case 'z':

   817                 case '$': case '_':

   818                     scanIdent();

   819                     return;

   820                 case '0':

   821                     scanChar();

   822                     if (ch == 'x' || ch == 'X') {

   823                         scanChar();

   824                         if (ch == '.') {

   825                             scanHexFractionAndSuffix(false);

   826                         } else if (digit(16) < 0) {

   827                             lexError("invalid.hex.number");

   828                         } else {

   829                             scanNumber(16);

   830                         }

   831                     } else {

   832                         putChar('0');

   833                         scanNumber(8);

   834                     }

   835                     return;

   836                 case '1': case '2': case '3': case '4':

   837                 case '5': case '6': case '7': case '8': case '9':

   838                     scanNumber(10);

   839                     return;

   840                 case '.':

   841                     scanChar();

   842                     if ('0' <= ch && ch <= '9') {

   843                         putChar('.');

   844                         scanFractionAndSuffix();

   845                     } else if (ch == '.') {

   846                         putChar('.'); putChar('.');

   847                         scanChar();

   848                         if (ch == '.') {

   849                             scanChar();

   850                             putChar('.');

   851                             token = ELLIPSIS;

   852                         } else {

   853                             lexError("malformed.fp.lit");

   854                         }

   855                     } else {

   856                         token = DOT;

   857                     }

   858                     return;

   859                 case ',':

   860                     scanChar(); token = COMMA; return;

   861                 case ';':

   862                     scanChar(); token = SEMI; return;

   863                 case '(':

   864                     scanChar(); token = LPAREN; return;

   865                 case ')':

   866                     scanChar(); token = RPAREN; return;

   867                 case '[':

   868                     scanChar(); token = LBRACKET; return;

   869                 case ']':

   870                     scanChar(); token = RBRACKET; return;

   871                 case '{':

   872                     scanChar(); token = LBRACE; return;

   873                 case '}':

   874                     scanChar(); token = RBRACE; return;

   875                 case '/':

   876                     scanChar();

   877                     if (ch == '/') {

   878                         do {

   879                             scanCommentChar();

   880                         } while (ch != CR && ch != LF && bp < buflen);

   881                         if (bp < buflen) {

   882                             endPos = bp;

   883                             processComment(CommentStyle.LINE);

   884                         }

   885                         break;

   886                     } else if (ch == '*') {

   887                         scanChar();

   888                         CommentStyle style;

   889                         if (ch == '*') {

   890                             style = CommentStyle.JAVADOC;

   891                             scanDocComment();

   892                         } else {

   893                             style = CommentStyle.BLOCK;

   894                             while (bp < buflen) {

   895                                 if (ch == '*') {

   896                                     scanChar();

   897                                     if (ch == '/') break;

   898                                 } else {

   899                                     scanCommentChar();

   900                                 }

   901                             }

   902                         }

   903                         if (ch == '/') {

   904                             scanChar();

   905                             endPos = bp;

   906                             processComment(style);

   907                             break;

   908                         } else {

   909                             lexError("unclosed.comment");

   910                             return;

   911                         }

   912                     } else if (ch == '=') {

   913                         name = names.slashequals;

   914                         token = SLASHEQ;

   915                         scanChar();

   916                     } else {

   917                         name = names.slash;

   918                         token = SLASH;

   919                     }

   920                     return;

   921                 case '\'':

   922                     scanChar();

   923                     if (ch == '\'') {

   924                         lexError("empty.char.lit");

   925                     } else {

   926                         if (ch == CR || ch == LF)

   927                             lexError(pos, "illegal.line.end.in.char.lit");

   928                         scanLitChar();

   929                         if (ch == '\'') {

   930                             scanChar();

   931                             token = CHARLITERAL;

   932                         } else {

   933                             lexError(pos, "unclosed.char.lit");

   934                         }

   935                     }

   936                     return;

   937                 case '\"':

   938                     scanChar();

   939                     while (ch != '\"' && ch != CR && ch != LF && bp < buflen)

   940                         scanLitChar();

   941                     if (ch == '\"') {

   942                         token = STRINGLITERAL;

   943                         scanChar();

   944                     } else {

   945                         lexError(pos, "unclosed.str.lit");

   946                     }

   947                     return;

   948                 case '#':

   949                     scanChar();

   950                     if (ch == '\"') {

   951                         scanChar();

   952                         if (ch == '\"')

   953                             lexError(pos, "empty.bytecode.ident");

   954                         while (ch != '\"' && ch != CR && ch != LF && bp < buflen) {

   955                             scanBytecodeNameChar();

   956                         }

   957                         if (ch == '\"') {

   958                             name = names.fromChars(sbuf, 0, sp);

   959                             token = IDENTIFIER;  // even if #"int" or #"do"

   960                             scanChar();

   961                         } else {

   962                             lexError(pos, "unclosed.bytecode.ident");

   963                         }

   964                     } else {

   965                         lexError("illegal.char", String.valueOf((int)'#'));

   966                     }

   967                     return;

   968                 default:

   969                     if (isSpecial(ch)) {

   970                         scanOperator();

   971                     } else {

   972                         boolean isJavaIdentifierStart;

   973                         if (ch < '\u0080') {

   974                             // all ASCII range chars already handled, above

   975                             isJavaIdentifierStart = false;

   976                         } else {

   977                             char high = scanSurrogates();

   978                             if (high != 0) {

   979                                 if (sp == sbuf.length) {

   980                                     putChar(high);

   981                                 } else {

   982                                     sbuf[sp++] = high;

   983                                 }

   985                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(

   986                                     Character.toCodePoint(high, ch));

   987                             } else {

   988                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);

   989                             }

   990                         }

   991                         if (isJavaIdentifierStart) {

   992                             scanIdent();

   993                         } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5

   994                             token = EOF;

   995                             pos = bp = eofPos;

   996                         } else {

   997                             lexError("illegal.char", String.valueOf((int)ch));

   998                             scanChar();

   999                         }

  1000                     }

  1001                     return;

  1002                 }

  1003             }

  1004         } finally {

  1005             endPos = bp;

  1006             if (scannerDebug)

  1007                 System.out.println("nextToken(" + pos

  1008                                    + "," + endPos + ")=|" +

  1009                                    new String(getRawCharacters(pos, endPos))

  1010                                    + "|");

  1011         }

  1012     }

  1014     /** Return the current token, set by nextToken().

  1015      */

  1016     public Token token() {

  1017         return token;

  1018     }

  1020     /** Sets the current token.

  1021      */

  1022     public void token(Token token) {

  1023         this.token = token;

  1024     }

  1026     /** Return the current token's position: a 0-based

  1027      *  offset from beginning of the raw input stream

  1028      *  (before unicode translation)

  1029      */

  1030     public int pos() {

  1031         return pos;

  1032     }

  1034     /** Return the last character position of the current token.

  1035      */

  1036     public int endPos() {

  1037         return endPos;

  1038     }

  1040     /** Return the last character position of the previous token.

  1041      */

  1042     public int prevEndPos() {

  1043         return prevEndPos;

  1044     }

  1046     /** Return the position where a lexical error occurred;

  1047      */

  1048     public int errPos() {

  1049         return errPos;

  1050     }

  1052     /** Set the position where a lexical error occurred;

  1053      */

  1054     public void errPos(int pos) {

  1055         errPos = pos;

  1056     }

  1058     /** Return the name of an identifier or token for the current token.

  1059      */

  1060     public Name name() {

  1061         return name;

  1062     }

  1064     /** Return the radix of a numeric literal token.

  1065      */

  1066     public int radix() {

  1067         return radix;

  1068     }

  1070     /** Has a @deprecated been encountered in last doc comment?

  1071      *  This needs to be reset by client with resetDeprecatedFlag.

  1072      */

  1073     public boolean deprecatedFlag() {

  1074         return deprecatedFlag;

  1075     }

  1077     public void resetDeprecatedFlag() {

  1078         deprecatedFlag = false;

  1079     }

  1081     /**

  1082      * Returns the documentation string of the current token.

  1083      */

  1084     public String docComment() {

  1085         return null;

  1086     }

  1088     /**

  1089      * Returns a copy of the input buffer, up to its inputLength.

  1090      * Unicode escape sequences are not translated.

  1091      */

  1092     public char[] getRawCharacters() {

  1093         char[] chars = new char[buflen];

  1094         System.arraycopy(buf, 0, chars, 0, buflen);

  1095         return chars;

  1096     }

  1098     /**

  1099      * Returns a copy of a character array subset of the input buffer.

  1100      * The returned array begins at the <code>beginIndex</code> and

  1101      * extends to the character at index <code>endIndex - 1</code>.

  1102      * Thus the length of the substring is <code>endIndex-beginIndex</code>.

  1103      * This behavior is like

  1104      * <code>String.substring(beginIndex, endIndex)</code>.

  1105      * Unicode escape sequences are not translated.

  1106      *

  1107      * @param beginIndex the beginning index, inclusive.

  1108      * @param endIndex the ending index, exclusive.

  1109      * @throws IndexOutOfBounds if either offset is outside of the

  1110      *         array bounds

  1111      */

  1112     public char[] getRawCharacters(int beginIndex, int endIndex) {

  1113         int length = endIndex - beginIndex;

  1114         char[] chars = new char[length];

  1115         System.arraycopy(buf, beginIndex, chars, 0, length);

  1116         return chars;

  1117     }

  1119     public enum CommentStyle {

  1120         LINE,

  1121         BLOCK,

  1122         JAVADOC,

  1123     }

  1125     /**

  1126      * Called when a complete comment has been scanned. pos and endPos

  1127      * will mark the comment boundary.

  1128      */

  1129     protected void processComment(CommentStyle style) {

  1130         if (scannerDebug)

  1131             System.out.println("processComment(" + pos

  1132                                + "," + endPos + "," + style + ")=|"

  1133                                + new String(getRawCharacters(pos, endPos))

  1134                                + "|");

  1135     }

  1137     /**

  1138      * Called when a complete whitespace run has been scanned. pos and endPos

  1139      * will mark the whitespace boundary.

  1140      */

  1141     protected void processWhiteSpace() {

  1142         if (scannerDebug)

  1143             System.out.println("processWhitespace(" + pos

  1144                                + "," + endPos + ")=|" +

  1145                                new String(getRawCharacters(pos, endPos))

  1146                                + "|");

  1147     }

  1149     /**

  1150      * Called when a line terminator has been processed.

  1151      */

  1152     protected void processLineTerminator() {

  1153         if (scannerDebug)

  1154             System.out.println("processTerminator(" + pos

  1155                                + "," + endPos + ")=|" +

  1156                                new String(getRawCharacters(pos, endPos))

  1157                                + "|");

  1158     }

  1160     /** Build a map for translating between line numbers and

  1161      * positions in the input.

  1162      *

  1163      * @return a LineMap */

  1164     public Position.LineMap getLineMap() {

  1165         return Position.makeLineMap(buf, buflen, false);

  1166     }

  1168 }

Mercurial > jdk8-mips64-public > langtools / file revision

src/share/classes/com/sun/tools/javac/parser/Scanner.java@e2722bd43f3a

src/share/classes/com/sun/tools/javac/parser/Scanner.java