src/share/classes/com/sun/tools/javac/parser/Scanner.java

Mon, 04 May 2009 21:04:04 -0700

author
jrose
date
Mon, 04 May 2009 21:04:04 -0700
changeset 267
e2722bd43f3a
parent 113
eff38cc97183
child 409
69eaccd3ea85
permissions
-rw-r--r--

6829189: Java programming with JSR 292 needs language support
Summary: Language changes documented in http://wikis.sun.com/display/mlvm/ProjectCoinProposal
Reviewed-by: jjg, darcy, mcimadamore

     1 /*
     2  * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Sun designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Sun in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    22  * CA 95054 USA or visit www.sun.com if you need additional information or
    23  * have any questions.
    24  */
    26 package com.sun.tools.javac.parser;
    28 import java.nio.*;
    30 import com.sun.tools.javac.code.Source;
    31 import com.sun.tools.javac.file.JavacFileManager;
    32 import com.sun.tools.javac.util.*;
    35 import static com.sun.tools.javac.parser.Token.*;
    36 import static com.sun.tools.javac.util.LayoutCharacters.*;
    38 /** The lexical analyzer maps an input stream consisting of
    39  *  ASCII characters and Unicode escapes into a token sequence.
    40  *
    41  *  <p><b>This is NOT part of any API supported by Sun Microsystems.  If
    42  *  you write code that depends on this, you do so at your own risk.
    43  *  This code and its internal interfaces are subject to change or
    44  *  deletion without notice.</b>
    45  */
    46 public class Scanner implements Lexer {
    48     private static boolean scannerDebug = false;
    50     /** A factory for creating scanners. */
    51     public static class Factory {
    52         /** The context key for the scanner factory. */
    53         public static final Context.Key<Scanner.Factory> scannerFactoryKey =
    54             new Context.Key<Scanner.Factory>();
    56         /** Get the Factory instance for this context. */
    57         public static Factory instance(Context context) {
    58             Factory instance = context.get(scannerFactoryKey);
    59             if (instance == null)
    60                 instance = new Factory(context);
    61             return instance;
    62         }
    64         final Log log;
    65         final Names names;
    66         final Source source;
    67         final Keywords keywords;
    69         /** Create a new scanner factory. */
    70         protected Factory(Context context) {
    71             context.put(scannerFactoryKey, this);
    72             this.log = Log.instance(context);
    73             this.names = Names.instance(context);
    74             this.source = Source.instance(context);
    75             this.keywords = Keywords.instance(context);
    76         }
    78         public Scanner newScanner(CharSequence input) {
    79             if (input instanceof CharBuffer) {
    80                 return new Scanner(this, (CharBuffer)input);
    81             } else {
    82                 char[] array = input.toString().toCharArray();
    83                 return newScanner(array, array.length);
    84             }
    85         }
    87         public Scanner newScanner(char[] input, int inputLength) {
    88             return new Scanner(this, input, inputLength);
    89         }
    90     }
    92     /* Output variables; set by nextToken():
    93      */
    95     /** The token, set by nextToken().
    96      */
    97     private Token token;
    99     /** Allow hex floating-point literals.
   100      */
   101     private boolean allowHexFloats;
   103     /** The token's position, 0-based offset from beginning of text.
   104      */
   105     private int pos;
   107     /** Character position just after the last character of the token.
   108      */
   109     private int endPos;
   111     /** The last character position of the previous token.
   112      */
   113     private int prevEndPos;
   115     /** The position where a lexical error occurred;
   116      */
   117     private int errPos = Position.NOPOS;
   119     /** The name of an identifier or token:
   120      */
   121     private Name name;
   123     /** The radix of a numeric literal token.
   124      */
   125     private int radix;
   127     /** Has a @deprecated been encountered in last doc comment?
   128      *  this needs to be reset by client.
   129      */
   130     protected boolean deprecatedFlag = false;
   132     /** A character buffer for literals.
   133      */
   134     private char[] sbuf = new char[128];
   135     private int sp;
   137     /** The input buffer, index of next chacter to be read,
   138      *  index of one past last character in buffer.
   139      */
   140     private char[] buf;
   141     private int bp;
   142     private int buflen;
   143     private int eofPos;
   145     /** The current character.
   146      */
   147     private char ch;
   149     /** The buffer index of the last converted unicode character
   150      */
   151     private int unicodeConversionBp = -1;
   153     /** The log to be used for error reporting.
   154      */
   155     private final Log log;
   157     /** The name table. */
   158     private final Names names;
   160     /** The keyword table. */
   161     private final Keywords keywords;
   163     /** Common code for constructors. */
   164     private Scanner(Factory fac) {
   165         this.log = fac.log;
   166         this.names = fac.names;
   167         this.keywords = fac.keywords;
   168         this.allowHexFloats = fac.source.allowHexFloats();
   169     }
   171     private static final boolean hexFloatsWork = hexFloatsWork();
   172     private static boolean hexFloatsWork() {
   173         try {
   174             Float.valueOf("0x1.0p1");
   175             return true;
   176         } catch (NumberFormatException ex) {
   177             return false;
   178         }
   179     }
   181     /** Create a scanner from the input buffer.  buffer must implement
   182      *  array() and compact(), and remaining() must be less than limit().
   183      */
   184     protected Scanner(Factory fac, CharBuffer buffer) {
   185         this(fac, JavacFileManager.toArray(buffer), buffer.limit());
   186     }
   188     /**
   189      * Create a scanner from the input array.  This method might
   190      * modify the array.  To avoid copying the input array, ensure
   191      * that {@code inputLength < input.length} or
   192      * {@code input[input.length -1]} is a white space character.
   193      *
   194      * @param fac the factory which created this Scanner
   195      * @param input the input, might be modified
   196      * @param inputLength the size of the input.
   197      * Must be positive and less than or equal to input.length.
   198      */
   199     protected Scanner(Factory fac, char[] input, int inputLength) {
   200         this(fac);
   201         eofPos = inputLength;
   202         if (inputLength == input.length) {
   203             if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
   204                 inputLength--;
   205             } else {
   206                 char[] newInput = new char[inputLength + 1];
   207                 System.arraycopy(input, 0, newInput, 0, input.length);
   208                 input = newInput;
   209             }
   210         }
   211         buf = input;
   212         buflen = inputLength;
   213         buf[buflen] = EOI;
   214         bp = -1;
   215         scanChar();
   216     }
   218     /** Report an error at the given position using the provided arguments.
   219      */
   220     private void lexError(int pos, String key, Object... args) {
   221         log.error(pos, key, args);
   222         token = ERROR;
   223         errPos = pos;
   224     }
   226     /** Report an error at the current token position using the provided
   227      *  arguments.
   228      */
   229     private void lexError(String key, Object... args) {
   230         lexError(pos, key, args);
   231     }
   233     /** Convert an ASCII digit from its base (8, 10, or 16)
   234      *  to its value.
   235      */
   236     private int digit(int base) {
   237         char c = ch;
   238         int result = Character.digit(c, base);
   239         if (result >= 0 && c > 0x7f) {
   240             lexError(pos+1, "illegal.nonascii.digit");
   241             ch = "0123456789abcdef".charAt(result);
   242         }
   243         return result;
   244     }
   246     /** Convert unicode escape; bp points to initial '\' character
   247      *  (Spec 3.3).
   248      */
   249     private void convertUnicode() {
   250         if (ch == '\\' && unicodeConversionBp != bp) {
   251             bp++; ch = buf[bp];
   252             if (ch == 'u') {
   253                 do {
   254                     bp++; ch = buf[bp];
   255                 } while (ch == 'u');
   256                 int limit = bp + 3;
   257                 if (limit < buflen) {
   258                     int d = digit(16);
   259                     int code = d;
   260                     while (bp < limit && d >= 0) {
   261                         bp++; ch = buf[bp];
   262                         d = digit(16);
   263                         code = (code << 4) + d;
   264                     }
   265                     if (d >= 0) {
   266                         ch = (char)code;
   267                         unicodeConversionBp = bp;
   268                         return;
   269                     }
   270                 }
   271                 lexError(bp, "illegal.unicode.esc");
   272             } else {
   273                 bp--;
   274                 ch = '\\';
   275             }
   276         }
   277     }
   279     /** Read next character.
   280      */
   281     private void scanChar() {
   282         ch = buf[++bp];
   283         if (ch == '\\') {
   284             convertUnicode();
   285         }
   286     }
   288     /** Read next character in comment, skipping over double '\' characters.
   289      */
   290     private void scanCommentChar() {
   291         scanChar();
   292         if (ch == '\\') {
   293             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
   294                 bp++;
   295             } else {
   296                 convertUnicode();
   297             }
   298         }
   299     }
   301     /** Append a character to sbuf.
   302      */
   303     private void putChar(char ch) {
   304         if (sp == sbuf.length) {
   305             char[] newsbuf = new char[sbuf.length * 2];
   306             System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
   307             sbuf = newsbuf;
   308         }
   309         sbuf[sp++] = ch;
   310     }
   312     /** For debugging purposes: print character.
   313      */
   314     private void dch() {
   315         System.err.print(ch); System.out.flush();
   316     }
   318     /** Read next character in character or string literal and copy into sbuf.
   319      */
   320     private void scanLitChar(boolean forBytecodeName) {
   321         if (ch == '\\') {
   322             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
   323                 bp++;
   324                 putChar('\\');
   325                 scanChar();
   326             } else {
   327                 scanChar();
   328                 switch (ch) {
   329                 case '0': case '1': case '2': case '3':
   330                 case '4': case '5': case '6': case '7':
   331                     char leadch = ch;
   332                     int oct = digit(8);
   333                     scanChar();
   334                     if ('0' <= ch && ch <= '7') {
   335                         oct = oct * 8 + digit(8);
   336                         scanChar();
   337                         if (leadch <= '3' && '0' <= ch && ch <= '7') {
   338                             oct = oct * 8 + digit(8);
   339                             scanChar();
   340                         }
   341                     }
   342                     putChar((char)oct);
   343                     break;
   344                 case 'b':
   345                     putChar('\b'); scanChar(); break;
   346                 case 't':
   347                     putChar('\t'); scanChar(); break;
   348                 case 'n':
   349                     putChar('\n'); scanChar(); break;
   350                 case 'f':
   351                     putChar('\f'); scanChar(); break;
   352                 case 'r':
   353                     putChar('\r'); scanChar(); break;
   354                 case '\'':
   355                     putChar('\''); scanChar(); break;
   356                 case '\"':
   357                     putChar('\"'); scanChar(); break;
   358                 case '\\':
   359                     putChar('\\'); scanChar(); break;
   360                 case '|': case ',': case '?': case '%':
   361                 case '^': case '_': case '{': case '}':
   362                 case '!': case '-': case '=':
   363                     if (forBytecodeName) {
   364                         // Accept escape sequences for dangerous bytecode chars.
   365                         // This is illegal in normal Java string or character literals.
   366                         // Note that the escape sequence itself is passed through.
   367                         putChar('\\'); putChar(ch); scanChar();
   368                     } else {
   369                         lexError(bp, "illegal.esc.char");
   370                     }
   371                     break;
   372                 default:
   373                     lexError(bp, "illegal.esc.char");
   374                 }
   375             }
   376         } else if (bp != buflen) {
   377             putChar(ch); scanChar();
   378         }
   379     }
   380     private void scanLitChar() {
   381         scanLitChar(false);
   382     }
   384     /** Read next character in an exotic name #"foo"
   385      */
   386     private void scanBytecodeNameChar() {
   387         switch (ch) {
   388         // reject any "dangerous" char which is illegal somewhere in the JVM spec
   389         // cf. http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm
   390         case '/': case '.': case ';':  // illegal everywhere
   391         case '<': case '>':  // illegal in methods, dangerous in classes
   392         case '[':  // illegal in classes
   393             lexError(bp, "illegal.bytecode.ident.char", String.valueOf((int)ch));
   394             break;
   395         }
   396         scanLitChar(true);
   397     }
   399     /** Read fractional part of hexadecimal floating point number.
   400      */
   401     private void scanHexExponentAndSuffix() {
   402         if (ch == 'p' || ch == 'P') {
   403             putChar(ch);
   404             scanChar();
   405             if (ch == '+' || ch == '-') {
   406                 putChar(ch);
   407                 scanChar();
   408             }
   409             if ('0' <= ch && ch <= '9') {
   410                 do {
   411                     putChar(ch);
   412                     scanChar();
   413                 } while ('0' <= ch && ch <= '9');
   414                 if (!allowHexFloats) {
   415                     lexError("unsupported.fp.lit");
   416                     allowHexFloats = true;
   417                 }
   418                 else if (!hexFloatsWork)
   419                     lexError("unsupported.cross.fp.lit");
   420             } else
   421                 lexError("malformed.fp.lit");
   422         } else {
   423             lexError("malformed.fp.lit");
   424         }
   425         if (ch == 'f' || ch == 'F') {
   426             putChar(ch);
   427             scanChar();
   428             token = FLOATLITERAL;
   429         } else {
   430             if (ch == 'd' || ch == 'D') {
   431                 putChar(ch);
   432                 scanChar();
   433             }
   434             token = DOUBLELITERAL;
   435         }
   436     }
   438     /** Read fractional part of floating point number.
   439      */
   440     private void scanFraction() {
   441         while (digit(10) >= 0) {
   442             putChar(ch);
   443             scanChar();
   444         }
   445         int sp1 = sp;
   446         if (ch == 'e' || ch == 'E') {
   447             putChar(ch);
   448             scanChar();
   449             if (ch == '+' || ch == '-') {
   450                 putChar(ch);
   451                 scanChar();
   452             }
   453             if ('0' <= ch && ch <= '9') {
   454                 do {
   455                     putChar(ch);
   456                     scanChar();
   457                 } while ('0' <= ch && ch <= '9');
   458                 return;
   459             }
   460             lexError("malformed.fp.lit");
   461             sp = sp1;
   462         }
   463     }
   465     /** Read fractional part and 'd' or 'f' suffix of floating point number.
   466      */
   467     private void scanFractionAndSuffix() {
   468         this.radix = 10;
   469         scanFraction();
   470         if (ch == 'f' || ch == 'F') {
   471             putChar(ch);
   472             scanChar();
   473             token = FLOATLITERAL;
   474         } else {
   475             if (ch == 'd' || ch == 'D') {
   476                 putChar(ch);
   477                 scanChar();
   478             }
   479             token = DOUBLELITERAL;
   480         }
   481     }
   483     /** Read fractional part and 'd' or 'f' suffix of floating point number.
   484      */
   485     private void scanHexFractionAndSuffix(boolean seendigit) {
   486         this.radix = 16;
   487         assert ch == '.';
   488         putChar(ch);
   489         scanChar();
   490         while (digit(16) >= 0) {
   491             seendigit = true;
   492             putChar(ch);
   493             scanChar();
   494         }
   495         if (!seendigit)
   496             lexError("invalid.hex.number");
   497         else
   498             scanHexExponentAndSuffix();
   499     }
   501     /** Read a number.
   502      *  @param radix  The radix of the number; one of 8, 10, 16.
   503      */
   504     private void scanNumber(int radix) {
   505         this.radix = radix;
   506         // for octal, allow base-10 digit in case it's a float literal
   507         int digitRadix = (radix <= 10) ? 10 : 16;
   508         boolean seendigit = false;
   509         while (digit(digitRadix) >= 0) {
   510             seendigit = true;
   511             putChar(ch);
   512             scanChar();
   513         }
   514         if (radix == 16 && ch == '.') {
   515             scanHexFractionAndSuffix(seendigit);
   516         } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
   517             scanHexExponentAndSuffix();
   518         } else if (radix <= 10 && ch == '.') {
   519             putChar(ch);
   520             scanChar();
   521             scanFractionAndSuffix();
   522         } else if (radix <= 10 &&
   523                    (ch == 'e' || ch == 'E' ||
   524                     ch == 'f' || ch == 'F' ||
   525                     ch == 'd' || ch == 'D')) {
   526             scanFractionAndSuffix();
   527         } else {
   528             if (ch == 'l' || ch == 'L') {
   529                 scanChar();
   530                 token = LONGLITERAL;
   531             } else {
   532                 token = INTLITERAL;
   533             }
   534         }
   535     }
   537     /** Read an identifier.
   538      */
   539     private void scanIdent() {
   540         boolean isJavaIdentifierPart;
   541         char high;
   542         do {
   543             if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
   544             // optimization, was: putChar(ch);
   546             scanChar();
   547             switch (ch) {
   548             case 'A': case 'B': case 'C': case 'D': case 'E':
   549             case 'F': case 'G': case 'H': case 'I': case 'J':
   550             case 'K': case 'L': case 'M': case 'N': case 'O':
   551             case 'P': case 'Q': case 'R': case 'S': case 'T':
   552             case 'U': case 'V': case 'W': case 'X': case 'Y':
   553             case 'Z':
   554             case 'a': case 'b': case 'c': case 'd': case 'e':
   555             case 'f': case 'g': case 'h': case 'i': case 'j':
   556             case 'k': case 'l': case 'm': case 'n': case 'o':
   557             case 'p': case 'q': case 'r': case 's': case 't':
   558             case 'u': case 'v': case 'w': case 'x': case 'y':
   559             case 'z':
   560             case '$': case '_':
   561             case '0': case '1': case '2': case '3': case '4':
   562             case '5': case '6': case '7': case '8': case '9':
   563             case '\u0000': case '\u0001': case '\u0002': case '\u0003':
   564             case '\u0004': case '\u0005': case '\u0006': case '\u0007':
   565             case '\u0008': case '\u000E': case '\u000F': case '\u0010':
   566             case '\u0011': case '\u0012': case '\u0013': case '\u0014':
   567             case '\u0015': case '\u0016': case '\u0017':
   568             case '\u0018': case '\u0019': case '\u001B':
   569             case '\u007F':
   570                 break;
   571             case '\u001A': // EOI is also a legal identifier part
   572                 if (bp >= buflen) {
   573                     name = names.fromChars(sbuf, 0, sp);
   574                     token = keywords.key(name);
   575                     return;
   576                 }
   577                 break;
   578             default:
   579                 if (ch < '\u0080') {
   580                     // all ASCII range chars already handled, above
   581                     isJavaIdentifierPart = false;
   582                 } else {
   583                     high = scanSurrogates();
   584                     if (high != 0) {
   585                         if (sp == sbuf.length) {
   586                             putChar(high);
   587                         } else {
   588                             sbuf[sp++] = high;
   589                         }
   590                         isJavaIdentifierPart = Character.isJavaIdentifierPart(
   591                             Character.toCodePoint(high, ch));
   592                     } else {
   593                         isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
   594                     }
   595                 }
   596                 if (!isJavaIdentifierPart) {
   597                     name = names.fromChars(sbuf, 0, sp);
   598                     token = keywords.key(name);
   599                     return;
   600                 }
   601             }
   602         } while (true);
   603     }
   605     /** Are surrogates supported?
   606      */
   607     final static boolean surrogatesSupported = surrogatesSupported();
   608     private static boolean surrogatesSupported() {
   609         try {
   610             Character.isHighSurrogate('a');
   611             return true;
   612         } catch (NoSuchMethodError ex) {
   613             return false;
   614         }
   615     }
   617     /** Scan surrogate pairs.  If 'ch' is a high surrogate and
   618      *  the next character is a low surrogate, then put the low
   619      *  surrogate in 'ch', and return the high surrogate.
   620      *  otherwise, just return 0.
   621      */
   622     private char scanSurrogates() {
   623         if (surrogatesSupported && Character.isHighSurrogate(ch)) {
   624             char high = ch;
   626             scanChar();
   628             if (Character.isLowSurrogate(ch)) {
   629                 return high;
   630             }
   632             ch = high;
   633         }
   635         return 0;
   636     }
   638     /** Return true if ch can be part of an operator.
   639      */
   640     private boolean isSpecial(char ch) {
   641         switch (ch) {
   642         case '!': case '%': case '&': case '*': case '?':
   643         case '+': case '-': case ':': case '<': case '=':
   644         case '>': case '^': case '|': case '~':
   645         case '@':
   646             return true;
   647         default:
   648             return false;
   649         }
   650     }
   652     /** Read longest possible sequence of special characters and convert
   653      *  to token.
   654      */
   655     private void scanOperator() {
   656         while (true) {
   657             putChar(ch);
   658             Name newname = names.fromChars(sbuf, 0, sp);
   659             if (keywords.key(newname) == IDENTIFIER) {
   660                 sp--;
   661                 break;
   662             }
   663             name = newname;
   664             token = keywords.key(newname);
   665             scanChar();
   666             if (!isSpecial(ch)) break;
   667         }
   668     }
   670     /**
   671      * Scan a documention comment; determine if a deprecated tag is present.
   672      * Called once the initial /, * have been skipped, positioned at the second *
   673      * (which is treated as the beginning of the first line).
   674      * Stops positioned at the closing '/'.
   675      */
   676     @SuppressWarnings("fallthrough")
   677     private void scanDocComment() {
   678         boolean deprecatedPrefix = false;
   680         forEachLine:
   681         while (bp < buflen) {
   683             // Skip optional WhiteSpace at beginning of line
   684             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
   685                 scanCommentChar();
   686             }
   688             // Skip optional consecutive Stars
   689             while (bp < buflen && ch == '*') {
   690                 scanCommentChar();
   691                 if (ch == '/') {
   692                     return;
   693                 }
   694             }
   696             // Skip optional WhiteSpace after Stars
   697             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
   698                 scanCommentChar();
   699             }
   701             deprecatedPrefix = false;
   702             // At beginning of line in the JavaDoc sense.
   703             if (bp < buflen && ch == '@' && !deprecatedFlag) {
   704                 scanCommentChar();
   705                 if (bp < buflen && ch == 'd') {
   706                     scanCommentChar();
   707                     if (bp < buflen && ch == 'e') {
   708                         scanCommentChar();
   709                         if (bp < buflen && ch == 'p') {
   710                             scanCommentChar();
   711                             if (bp < buflen && ch == 'r') {
   712                                 scanCommentChar();
   713                                 if (bp < buflen && ch == 'e') {
   714                                     scanCommentChar();
   715                                     if (bp < buflen && ch == 'c') {
   716                                         scanCommentChar();
   717                                         if (bp < buflen && ch == 'a') {
   718                                             scanCommentChar();
   719                                             if (bp < buflen && ch == 't') {
   720                                                 scanCommentChar();
   721                                                 if (bp < buflen && ch == 'e') {
   722                                                     scanCommentChar();
   723                                                     if (bp < buflen && ch == 'd') {
   724                                                         deprecatedPrefix = true;
   725                                                         scanCommentChar();
   726                                                     }}}}}}}}}}}
   727             if (deprecatedPrefix && bp < buflen) {
   728                 if (Character.isWhitespace(ch)) {
   729                     deprecatedFlag = true;
   730                 } else if (ch == '*') {
   731                     scanCommentChar();
   732                     if (ch == '/') {
   733                         deprecatedFlag = true;
   734                         return;
   735                     }
   736                 }
   737             }
   739             // Skip rest of line
   740             while (bp < buflen) {
   741                 switch (ch) {
   742                 case '*':
   743                     scanCommentChar();
   744                     if (ch == '/') {
   745                         return;
   746                     }
   747                     break;
   748                 case CR: // (Spec 3.4)
   749                     scanCommentChar();
   750                     if (ch != LF) {
   751                         continue forEachLine;
   752                     }
   753                     /* fall through to LF case */
   754                 case LF: // (Spec 3.4)
   755                     scanCommentChar();
   756                     continue forEachLine;
   757                 default:
   758                     scanCommentChar();
   759                 }
   760             } // rest of line
   761         } // forEachLine
   762         return;
   763     }
   765     /** The value of a literal token, recorded as a string.
   766      *  For integers, leading 0x and 'l' suffixes are suppressed.
   767      */
   768     public String stringVal() {
   769         return new String(sbuf, 0, sp);
   770     }
   772     /** Read token.
   773      */
   774     public void nextToken() {
   776         try {
   777             prevEndPos = endPos;
   778             sp = 0;
   780             while (true) {
   781                 pos = bp;
   782                 switch (ch) {
   783                 case ' ': // (Spec 3.6)
   784                 case '\t': // (Spec 3.6)
   785                 case FF: // (Spec 3.6)
   786                     do {
   787                         scanChar();
   788                     } while (ch == ' ' || ch == '\t' || ch == FF);
   789                     endPos = bp;
   790                     processWhiteSpace();
   791                     break;
   792                 case LF: // (Spec 3.4)
   793                     scanChar();
   794                     endPos = bp;
   795                     processLineTerminator();
   796                     break;
   797                 case CR: // (Spec 3.4)
   798                     scanChar();
   799                     if (ch == LF) {
   800                         scanChar();
   801                     }
   802                     endPos = bp;
   803                     processLineTerminator();
   804                     break;
   805                 case 'A': case 'B': case 'C': case 'D': case 'E':
   806                 case 'F': case 'G': case 'H': case 'I': case 'J':
   807                 case 'K': case 'L': case 'M': case 'N': case 'O':
   808                 case 'P': case 'Q': case 'R': case 'S': case 'T':
   809                 case 'U': case 'V': case 'W': case 'X': case 'Y':
   810                 case 'Z':
   811                 case 'a': case 'b': case 'c': case 'd': case 'e':
   812                 case 'f': case 'g': case 'h': case 'i': case 'j':
   813                 case 'k': case 'l': case 'm': case 'n': case 'o':
   814                 case 'p': case 'q': case 'r': case 's': case 't':
   815                 case 'u': case 'v': case 'w': case 'x': case 'y':
   816                 case 'z':
   817                 case '$': case '_':
   818                     scanIdent();
   819                     return;
   820                 case '0':
   821                     scanChar();
   822                     if (ch == 'x' || ch == 'X') {
   823                         scanChar();
   824                         if (ch == '.') {
   825                             scanHexFractionAndSuffix(false);
   826                         } else if (digit(16) < 0) {
   827                             lexError("invalid.hex.number");
   828                         } else {
   829                             scanNumber(16);
   830                         }
   831                     } else {
   832                         putChar('0');
   833                         scanNumber(8);
   834                     }
   835                     return;
   836                 case '1': case '2': case '3': case '4':
   837                 case '5': case '6': case '7': case '8': case '9':
   838                     scanNumber(10);
   839                     return;
   840                 case '.':
   841                     scanChar();
   842                     if ('0' <= ch && ch <= '9') {
   843                         putChar('.');
   844                         scanFractionAndSuffix();
   845                     } else if (ch == '.') {
   846                         putChar('.'); putChar('.');
   847                         scanChar();
   848                         if (ch == '.') {
   849                             scanChar();
   850                             putChar('.');
   851                             token = ELLIPSIS;
   852                         } else {
   853                             lexError("malformed.fp.lit");
   854                         }
   855                     } else {
   856                         token = DOT;
   857                     }
   858                     return;
   859                 case ',':
   860                     scanChar(); token = COMMA; return;
   861                 case ';':
   862                     scanChar(); token = SEMI; return;
   863                 case '(':
   864                     scanChar(); token = LPAREN; return;
   865                 case ')':
   866                     scanChar(); token = RPAREN; return;
   867                 case '[':
   868                     scanChar(); token = LBRACKET; return;
   869                 case ']':
   870                     scanChar(); token = RBRACKET; return;
   871                 case '{':
   872                     scanChar(); token = LBRACE; return;
   873                 case '}':
   874                     scanChar(); token = RBRACE; return;
   875                 case '/':
   876                     scanChar();
   877                     if (ch == '/') {
   878                         do {
   879                             scanCommentChar();
   880                         } while (ch != CR && ch != LF && bp < buflen);
   881                         if (bp < buflen) {
   882                             endPos = bp;
   883                             processComment(CommentStyle.LINE);
   884                         }
   885                         break;
   886                     } else if (ch == '*') {
   887                         scanChar();
   888                         CommentStyle style;
   889                         if (ch == '*') {
   890                             style = CommentStyle.JAVADOC;
   891                             scanDocComment();
   892                         } else {
   893                             style = CommentStyle.BLOCK;
   894                             while (bp < buflen) {
   895                                 if (ch == '*') {
   896                                     scanChar();
   897                                     if (ch == '/') break;
   898                                 } else {
   899                                     scanCommentChar();
   900                                 }
   901                             }
   902                         }
   903                         if (ch == '/') {
   904                             scanChar();
   905                             endPos = bp;
   906                             processComment(style);
   907                             break;
   908                         } else {
   909                             lexError("unclosed.comment");
   910                             return;
   911                         }
   912                     } else if (ch == '=') {
   913                         name = names.slashequals;
   914                         token = SLASHEQ;
   915                         scanChar();
   916                     } else {
   917                         name = names.slash;
   918                         token = SLASH;
   919                     }
   920                     return;
   921                 case '\'':
   922                     scanChar();
   923                     if (ch == '\'') {
   924                         lexError("empty.char.lit");
   925                     } else {
   926                         if (ch == CR || ch == LF)
   927                             lexError(pos, "illegal.line.end.in.char.lit");
   928                         scanLitChar();
   929                         if (ch == '\'') {
   930                             scanChar();
   931                             token = CHARLITERAL;
   932                         } else {
   933                             lexError(pos, "unclosed.char.lit");
   934                         }
   935                     }
   936                     return;
   937                 case '\"':
   938                     scanChar();
   939                     while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
   940                         scanLitChar();
   941                     if (ch == '\"') {
   942                         token = STRINGLITERAL;
   943                         scanChar();
   944                     } else {
   945                         lexError(pos, "unclosed.str.lit");
   946                     }
   947                     return;
   948                 case '#':
   949                     scanChar();
   950                     if (ch == '\"') {
   951                         scanChar();
   952                         if (ch == '\"')
   953                             lexError(pos, "empty.bytecode.ident");
   954                         while (ch != '\"' && ch != CR && ch != LF && bp < buflen) {
   955                             scanBytecodeNameChar();
   956                         }
   957                         if (ch == '\"') {
   958                             name = names.fromChars(sbuf, 0, sp);
   959                             token = IDENTIFIER;  // even if #"int" or #"do"
   960                             scanChar();
   961                         } else {
   962                             lexError(pos, "unclosed.bytecode.ident");
   963                         }
   964                     } else {
   965                         lexError("illegal.char", String.valueOf((int)'#'));
   966                     }
   967                     return;
   968                 default:
   969                     if (isSpecial(ch)) {
   970                         scanOperator();
   971                     } else {
   972                         boolean isJavaIdentifierStart;
   973                         if (ch < '\u0080') {
   974                             // all ASCII range chars already handled, above
   975                             isJavaIdentifierStart = false;
   976                         } else {
   977                             char high = scanSurrogates();
   978                             if (high != 0) {
   979                                 if (sp == sbuf.length) {
   980                                     putChar(high);
   981                                 } else {
   982                                     sbuf[sp++] = high;
   983                                 }
   985                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(
   986                                     Character.toCodePoint(high, ch));
   987                             } else {
   988                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
   989                             }
   990                         }
   991                         if (isJavaIdentifierStart) {
   992                             scanIdent();
   993                         } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
   994                             token = EOF;
   995                             pos = bp = eofPos;
   996                         } else {
   997                             lexError("illegal.char", String.valueOf((int)ch));
   998                             scanChar();
   999                         }
  1001                     return;
  1004         } finally {
  1005             endPos = bp;
  1006             if (scannerDebug)
  1007                 System.out.println("nextToken(" + pos
  1008                                    + "," + endPos + ")=|" +
  1009                                    new String(getRawCharacters(pos, endPos))
  1010                                    + "|");
  1014     /** Return the current token, set by nextToken().
  1015      */
  1016     public Token token() {
  1017         return token;
  1020     /** Sets the current token.
  1021      */
  1022     public void token(Token token) {
  1023         this.token = token;
  1026     /** Return the current token's position: a 0-based
  1027      *  offset from beginning of the raw input stream
  1028      *  (before unicode translation)
  1029      */
  1030     public int pos() {
  1031         return pos;
  1034     /** Return the last character position of the current token.
  1035      */
  1036     public int endPos() {
  1037         return endPos;
  1040     /** Return the last character position of the previous token.
  1041      */
  1042     public int prevEndPos() {
  1043         return prevEndPos;
  1046     /** Return the position where a lexical error occurred;
  1047      */
  1048     public int errPos() {
  1049         return errPos;
  1052     /** Set the position where a lexical error occurred;
  1053      */
  1054     public void errPos(int pos) {
  1055         errPos = pos;
  1058     /** Return the name of an identifier or token for the current token.
  1059      */
  1060     public Name name() {
  1061         return name;
  1064     /** Return the radix of a numeric literal token.
  1065      */
  1066     public int radix() {
  1067         return radix;
  1070     /** Has a @deprecated been encountered in last doc comment?
  1071      *  This needs to be reset by client with resetDeprecatedFlag.
  1072      */
  1073     public boolean deprecatedFlag() {
  1074         return deprecatedFlag;
  1077     public void resetDeprecatedFlag() {
  1078         deprecatedFlag = false;
  1081     /**
  1082      * Returns the documentation string of the current token.
  1083      */
  1084     public String docComment() {
  1085         return null;
  1088     /**
  1089      * Returns a copy of the input buffer, up to its inputLength.
  1090      * Unicode escape sequences are not translated.
  1091      */
  1092     public char[] getRawCharacters() {
  1093         char[] chars = new char[buflen];
  1094         System.arraycopy(buf, 0, chars, 0, buflen);
  1095         return chars;
  1098     /**
  1099      * Returns a copy of a character array subset of the input buffer.
  1100      * The returned array begins at the <code>beginIndex</code> and
  1101      * extends to the character at index <code>endIndex - 1</code>.
  1102      * Thus the length of the substring is <code>endIndex-beginIndex</code>.
  1103      * This behavior is like
  1104      * <code>String.substring(beginIndex, endIndex)</code>.
  1105      * Unicode escape sequences are not translated.
  1107      * @param beginIndex the beginning index, inclusive.
  1108      * @param endIndex the ending index, exclusive.
  1109      * @throws IndexOutOfBounds if either offset is outside of the
  1110      *         array bounds
  1111      */
  1112     public char[] getRawCharacters(int beginIndex, int endIndex) {
  1113         int length = endIndex - beginIndex;
  1114         char[] chars = new char[length];
  1115         System.arraycopy(buf, beginIndex, chars, 0, length);
  1116         return chars;
  1119     public enum CommentStyle {
  1120         LINE,
  1121         BLOCK,
  1122         JAVADOC,
  1125     /**
  1126      * Called when a complete comment has been scanned. pos and endPos
  1127      * will mark the comment boundary.
  1128      */
  1129     protected void processComment(CommentStyle style) {
  1130         if (scannerDebug)
  1131             System.out.println("processComment(" + pos
  1132                                + "," + endPos + "," + style + ")=|"
  1133                                + new String(getRawCharacters(pos, endPos))
  1134                                + "|");
  1137     /**
  1138      * Called when a complete whitespace run has been scanned. pos and endPos
  1139      * will mark the whitespace boundary.
  1140      */
  1141     protected void processWhiteSpace() {
  1142         if (scannerDebug)
  1143             System.out.println("processWhitespace(" + pos
  1144                                + "," + endPos + ")=|" +
  1145                                new String(getRawCharacters(pos, endPos))
  1146                                + "|");
  1149     /**
  1150      * Called when a line terminator has been processed.
  1151      */
  1152     protected void processLineTerminator() {
  1153         if (scannerDebug)
  1154             System.out.println("processTerminator(" + pos
  1155                                + "," + endPos + ")=|" +
  1156                                new String(getRawCharacters(pos, endPos))
  1157                                + "|");
  1160     /** Build a map for translating between line numbers and
  1161      * positions in the input.
  1163      * @return a LineMap */
  1164     public Position.LineMap getLineMap() {
  1165         return Position.makeLineMap(buf, buflen, false);

mercurial