src/share/classes/com/sun/tools/javac/parser/Scanner.java

Wed, 21 Apr 2010 12:24:56 +0100

author
mcimadamore
date
Wed, 21 Apr 2010 12:24:56 +0100
changeset 547
04cf82179fa7
parent 423
8a4543b30586
child 554
9d9f26857129
permissions
-rw-r--r--

6730476: invalid "unchecked generic array" warning
Summary: Reifiable-ness of varargs element type should be checked after JLS3 15.12.2.8
Reviewed-by: jjg

     1 /*
     2  * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Sun designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Sun in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    22  * CA 95054 USA or visit www.sun.com if you need additional information or
    23  * have any questions.
    24  */
    26 package com.sun.tools.javac.parser;
    28 import java.nio.*;
    30 import com.sun.tools.javac.code.Source;
    31 import com.sun.tools.javac.file.JavacFileManager;
    32 import com.sun.tools.javac.util.*;
    35 import static com.sun.tools.javac.parser.Token.*;
    36 import static com.sun.tools.javac.util.LayoutCharacters.*;
    38 /** The lexical analyzer maps an input stream consisting of
    39  *  ASCII characters and Unicode escapes into a token sequence.
    40  *
    41  *  <p><b>This is NOT part of any API supported by Sun Microsystems.  If
    42  *  you write code that depends on this, you do so at your own risk.
    43  *  This code and its internal interfaces are subject to change or
    44  *  deletion without notice.</b>
    45  */
    46 public class Scanner implements Lexer {
    48     private static boolean scannerDebug = false;
    50     /** A factory for creating scanners. */
    51     public static class Factory {
    52         /** The context key for the scanner factory. */
    53         public static final Context.Key<Scanner.Factory> scannerFactoryKey =
    54             new Context.Key<Scanner.Factory>();
    56         /** Get the Factory instance for this context. */
    57         public static Factory instance(Context context) {
    58             Factory instance = context.get(scannerFactoryKey);
    59             if (instance == null)
    60                 instance = new Factory(context);
    61             return instance;
    62         }
    64         final Log log;
    65         final Names names;
    66         final Source source;
    67         final Keywords keywords;
    69         /** Create a new scanner factory. */
    70         protected Factory(Context context) {
    71             context.put(scannerFactoryKey, this);
    72             this.log = Log.instance(context);
    73             this.names = Names.instance(context);
    74             this.source = Source.instance(context);
    75             this.keywords = Keywords.instance(context);
    76         }
    78         public Scanner newScanner(CharSequence input) {
    79             if (input instanceof CharBuffer) {
    80                 return new Scanner(this, (CharBuffer)input);
    81             } else {
    82                 char[] array = input.toString().toCharArray();
    83                 return newScanner(array, array.length);
    84             }
    85         }
    87         public Scanner newScanner(char[] input, int inputLength) {
    88             return new Scanner(this, input, inputLength);
    89         }
    90     }
    92     /* Output variables; set by nextToken():
    93      */
    95     /** The token, set by nextToken().
    96      */
    97     private Token token;
    99     /** Allow hex floating-point literals.
   100      */
   101     private boolean allowHexFloats;
   103     /** Allow binary literals.
   104      */
   105     private boolean allowBinaryLiterals;
   107     /** Allow underscores in literals.
   108      */
   109     private boolean allowUnderscoresInLiterals;
   111     /** The source language setting.
   112      */
   113     private Source source;
   115     /** The token's position, 0-based offset from beginning of text.
   116      */
   117     private int pos;
   119     /** Character position just after the last character of the token.
   120      */
   121     private int endPos;
   123     /** The last character position of the previous token.
   124      */
   125     private int prevEndPos;
   127     /** The position where a lexical error occurred;
   128      */
   129     private int errPos = Position.NOPOS;
   131     /** The name of an identifier or token:
   132      */
   133     private Name name;
   135     /** The radix of a numeric literal token.
   136      */
   137     private int radix;
   139     /** Has a @deprecated been encountered in last doc comment?
   140      *  this needs to be reset by client.
   141      */
   142     protected boolean deprecatedFlag = false;
   144     /** A character buffer for literals.
   145      */
   146     private char[] sbuf = new char[128];
   147     private int sp;
   149     /** The input buffer, index of next chacter to be read,
   150      *  index of one past last character in buffer.
   151      */
   152     private char[] buf;
   153     private int bp;
   154     private int buflen;
   155     private int eofPos;
   157     /** The current character.
   158      */
   159     private char ch;
   161     /** The buffer index of the last converted unicode character
   162      */
   163     private int unicodeConversionBp = -1;
   165     /** The log to be used for error reporting.
   166      */
   167     private final Log log;
   169     /** The name table. */
   170     private final Names names;
   172     /** The keyword table. */
   173     private final Keywords keywords;
   175     /** Common code for constructors. */
   176     private Scanner(Factory fac) {
   177         log = fac.log;
   178         names = fac.names;
   179         keywords = fac.keywords;
   180         source = fac.source;
   181         allowBinaryLiterals = source.allowBinaryLiterals();
   182         allowHexFloats = source.allowHexFloats();
   183         allowUnderscoresInLiterals = source.allowBinaryLiterals();
   184     }
   186     private static final boolean hexFloatsWork = hexFloatsWork();
   187     private static boolean hexFloatsWork() {
   188         try {
   189             Float.valueOf("0x1.0p1");
   190             return true;
   191         } catch (NumberFormatException ex) {
   192             return false;
   193         }
   194     }
   196     /** Create a scanner from the input buffer.  buffer must implement
   197      *  array() and compact(), and remaining() must be less than limit().
   198      */
   199     protected Scanner(Factory fac, CharBuffer buffer) {
   200         this(fac, JavacFileManager.toArray(buffer), buffer.limit());
   201     }
   203     /**
   204      * Create a scanner from the input array.  This method might
   205      * modify the array.  To avoid copying the input array, ensure
   206      * that {@code inputLength < input.length} or
   207      * {@code input[input.length -1]} is a white space character.
   208      *
   209      * @param fac the factory which created this Scanner
   210      * @param input the input, might be modified
   211      * @param inputLength the size of the input.
   212      * Must be positive and less than or equal to input.length.
   213      */
   214     protected Scanner(Factory fac, char[] input, int inputLength) {
   215         this(fac);
   216         eofPos = inputLength;
   217         if (inputLength == input.length) {
   218             if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
   219                 inputLength--;
   220             } else {
   221                 char[] newInput = new char[inputLength + 1];
   222                 System.arraycopy(input, 0, newInput, 0, input.length);
   223                 input = newInput;
   224             }
   225         }
   226         buf = input;
   227         buflen = inputLength;
   228         buf[buflen] = EOI;
   229         bp = -1;
   230         scanChar();
   231     }
   233     /** Report an error at the given position using the provided arguments.
   234      */
   235     private void lexError(int pos, String key, Object... args) {
   236         log.error(pos, key, args);
   237         token = ERROR;
   238         errPos = pos;
   239     }
   241     /** Report an error at the current token position using the provided
   242      *  arguments.
   243      */
   244     private void lexError(String key, Object... args) {
   245         lexError(pos, key, args);
   246     }
   248     /** Convert an ASCII digit from its base (8, 10, or 16)
   249      *  to its value.
   250      */
   251     private int digit(int base) {
   252         char c = ch;
   253         int result = Character.digit(c, base);
   254         if (result >= 0 && c > 0x7f) {
   255             lexError(pos+1, "illegal.nonascii.digit");
   256             ch = "0123456789abcdef".charAt(result);
   257         }
   258         return result;
   259     }
   261     /** Convert unicode escape; bp points to initial '\' character
   262      *  (Spec 3.3).
   263      */
   264     private void convertUnicode() {
   265         if (ch == '\\' && unicodeConversionBp != bp) {
   266             bp++; ch = buf[bp];
   267             if (ch == 'u') {
   268                 do {
   269                     bp++; ch = buf[bp];
   270                 } while (ch == 'u');
   271                 int limit = bp + 3;
   272                 if (limit < buflen) {
   273                     int d = digit(16);
   274                     int code = d;
   275                     while (bp < limit && d >= 0) {
   276                         bp++; ch = buf[bp];
   277                         d = digit(16);
   278                         code = (code << 4) + d;
   279                     }
   280                     if (d >= 0) {
   281                         ch = (char)code;
   282                         unicodeConversionBp = bp;
   283                         return;
   284                     }
   285                 }
   286                 lexError(bp, "illegal.unicode.esc");
   287             } else {
   288                 bp--;
   289                 ch = '\\';
   290             }
   291         }
   292     }
   294     /** Read next character.
   295      */
   296     private void scanChar() {
   297         ch = buf[++bp];
   298         if (ch == '\\') {
   299             convertUnicode();
   300         }
   301     }
   303     /** Read next character in comment, skipping over double '\' characters.
   304      */
   305     private void scanCommentChar() {
   306         scanChar();
   307         if (ch == '\\') {
   308             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
   309                 bp++;
   310             } else {
   311                 convertUnicode();
   312             }
   313         }
   314     }
   316     /** Append a character to sbuf.
   317      */
   318     private void putChar(char ch) {
   319         if (sp == sbuf.length) {
   320             char[] newsbuf = new char[sbuf.length * 2];
   321             System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
   322             sbuf = newsbuf;
   323         }
   324         sbuf[sp++] = ch;
   325     }
   327     /** For debugging purposes: print character.
   328      */
   329     private void dch() {
   330         System.err.print(ch); System.out.flush();
   331     }
   333     /** Read next character in character or string literal and copy into sbuf.
   334      */
   335     private void scanLitChar(boolean forBytecodeName) {
   336         if (ch == '\\') {
   337             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
   338                 bp++;
   339                 putChar('\\');
   340                 scanChar();
   341             } else {
   342                 scanChar();
   343                 switch (ch) {
   344                 case '0': case '1': case '2': case '3':
   345                 case '4': case '5': case '6': case '7':
   346                     char leadch = ch;
   347                     int oct = digit(8);
   348                     scanChar();
   349                     if ('0' <= ch && ch <= '7') {
   350                         oct = oct * 8 + digit(8);
   351                         scanChar();
   352                         if (leadch <= '3' && '0' <= ch && ch <= '7') {
   353                             oct = oct * 8 + digit(8);
   354                             scanChar();
   355                         }
   356                     }
   357                     putChar((char)oct);
   358                     break;
   359                 case 'b':
   360                     putChar('\b'); scanChar(); break;
   361                 case 't':
   362                     putChar('\t'); scanChar(); break;
   363                 case 'n':
   364                     putChar('\n'); scanChar(); break;
   365                 case 'f':
   366                     putChar('\f'); scanChar(); break;
   367                 case 'r':
   368                     putChar('\r'); scanChar(); break;
   369                 case '\'':
   370                     putChar('\''); scanChar(); break;
   371                 case '\"':
   372                     putChar('\"'); scanChar(); break;
   373                 case '\\':
   374                     putChar('\\'); scanChar(); break;
   375                 case '|': case ',': case '?': case '%':
   376                 case '^': case '_': case '{': case '}':
   377                 case '!': case '-': case '=':
   378                     if (forBytecodeName) {
   379                         // Accept escape sequences for dangerous bytecode chars.
   380                         // This is illegal in normal Java string or character literals.
   381                         // Note that the escape sequence itself is passed through.
   382                         putChar('\\'); putChar(ch); scanChar();
   383                     } else {
   384                         lexError(bp, "illegal.esc.char");
   385                     }
   386                     break;
   387                 default:
   388                     lexError(bp, "illegal.esc.char");
   389                 }
   390             }
   391         } else if (bp != buflen) {
   392             putChar(ch); scanChar();
   393         }
   394     }
   395     private void scanLitChar() {
   396         scanLitChar(false);
   397     }
   399     /** Read next character in an exotic name #"foo"
   400      */
   401     private void scanBytecodeNameChar() {
   402         switch (ch) {
   403         // reject any "dangerous" char which is illegal somewhere in the JVM spec
   404         // cf. http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm
   405         case '/': case '.': case ';':  // illegal everywhere
   406         case '<': case '>':  // illegal in methods, dangerous in classes
   407         case '[':  // illegal in classes
   408             lexError(bp, "illegal.bytecode.ident.char", String.valueOf((int)ch));
   409             break;
   410         }
   411         scanLitChar(true);
   412     }
   414     private void scanDigits(int digitRadix) {
   415         char saveCh;
   416         int savePos;
   417         do {
   418             if (ch != '_') {
   419                 putChar(ch);
   420             } else {
   421                 if (!allowUnderscoresInLiterals) {
   422                     lexError("unsupported.underscore", source.name);
   423                     allowUnderscoresInLiterals = true;
   424                 }
   425             }
   426             saveCh = ch;
   427             savePos = bp;
   428             scanChar();
   429         } while (digit(digitRadix) >= 0 || ch == '_');
   430         if (saveCh == '_')
   431             lexError(savePos, "illegal.underscore");
   432     }
   434     /** Read fractional part of hexadecimal floating point number.
   435      */
   436     private void scanHexExponentAndSuffix() {
   437         if (ch == 'p' || ch == 'P') {
   438             putChar(ch);
   439             scanChar();
   440             skipIllegalUnderscores();
   441             if (ch == '+' || ch == '-') {
   442                 putChar(ch);
   443                 scanChar();
   444             }
   445             skipIllegalUnderscores();
   446             if ('0' <= ch && ch <= '9') {
   447                 scanDigits(10);
   448                 if (!allowHexFloats) {
   449                     lexError("unsupported.fp.lit", source.name);
   450                     allowHexFloats = true;
   451                 }
   452                 else if (!hexFloatsWork)
   453                     lexError("unsupported.cross.fp.lit");
   454             } else
   455                 lexError("malformed.fp.lit");
   456         } else {
   457             lexError("malformed.fp.lit");
   458         }
   459         if (ch == 'f' || ch == 'F') {
   460             putChar(ch);
   461             scanChar();
   462             token = FLOATLITERAL;
   463         } else {
   464             if (ch == 'd' || ch == 'D') {
   465                 putChar(ch);
   466                 scanChar();
   467             }
   468             token = DOUBLELITERAL;
   469         }
   470     }
   472     /** Read fractional part of floating point number.
   473      */
   474     private void scanFraction() {
   475         skipIllegalUnderscores();
   476         if ('0' <= ch && ch <= '9') {
   477             scanDigits(10);
   478         }
   479         int sp1 = sp;
   480         if (ch == 'e' || ch == 'E') {
   481             putChar(ch);
   482             scanChar();
   483             skipIllegalUnderscores();
   484             if (ch == '+' || ch == '-') {
   485                 putChar(ch);
   486                 scanChar();
   487             }
   488             skipIllegalUnderscores();
   489             if ('0' <= ch && ch <= '9') {
   490                 scanDigits(10);
   491                 return;
   492             }
   493             lexError("malformed.fp.lit");
   494             sp = sp1;
   495         }
   496     }
   498     /** Read fractional part and 'd' or 'f' suffix of floating point number.
   499      */
   500     private void scanFractionAndSuffix() {
   501         this.radix = 10;
   502         scanFraction();
   503         if (ch == 'f' || ch == 'F') {
   504             putChar(ch);
   505             scanChar();
   506             token = FLOATLITERAL;
   507         } else {
   508             if (ch == 'd' || ch == 'D') {
   509                 putChar(ch);
   510                 scanChar();
   511             }
   512             token = DOUBLELITERAL;
   513         }
   514     }
   516     /** Read fractional part and 'd' or 'f' suffix of floating point number.
   517      */
   518     private void scanHexFractionAndSuffix(boolean seendigit) {
   519         this.radix = 16;
   520         assert ch == '.';
   521         putChar(ch);
   522         scanChar();
   523         skipIllegalUnderscores();
   524         if (digit(16) >= 0) {
   525             seendigit = true;
   526             scanDigits(16);
   527         }
   528         if (!seendigit)
   529             lexError("invalid.hex.number");
   530         else
   531             scanHexExponentAndSuffix();
   532     }
   534     private void skipIllegalUnderscores() {
   535         if (ch == '_') {
   536             lexError(bp, "illegal.underscore");
   537             while (ch == '_')
   538                 scanChar();
   539         }
   540     }
   542     /** Read a number.
   543      *  @param radix  The radix of the number; one of 2, j8, 10, 16.
   544      */
   545     private void scanNumber(int radix) {
   546         this.radix = radix;
   547         // for octal, allow base-10 digit in case it's a float literal
   548         int digitRadix = (radix == 8 ? 10 : radix);
   549         boolean seendigit = false;
   550         if (digit(digitRadix) >= 0) {
   551             seendigit = true;
   552             scanDigits(digitRadix);
   553         }
   554         if (radix == 16 && ch == '.') {
   555             scanHexFractionAndSuffix(seendigit);
   556         } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
   557             scanHexExponentAndSuffix();
   558         } else if (digitRadix == 10 && ch == '.') {
   559             putChar(ch);
   560             scanChar();
   561             scanFractionAndSuffix();
   562         } else if (digitRadix == 10 &&
   563                    (ch == 'e' || ch == 'E' ||
   564                     ch == 'f' || ch == 'F' ||
   565                     ch == 'd' || ch == 'D')) {
   566             scanFractionAndSuffix();
   567         } else {
   568             if (ch == 'l' || ch == 'L') {
   569                 scanChar();
   570                 token = LONGLITERAL;
   571             } else {
   572                 token = INTLITERAL;
   573             }
   574         }
   575     }
   577     /** Read an identifier.
   578      */
   579     private void scanIdent() {
   580         boolean isJavaIdentifierPart;
   581         char high;
   582         do {
   583             if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
   584             // optimization, was: putChar(ch);
   586             scanChar();
   587             switch (ch) {
   588             case 'A': case 'B': case 'C': case 'D': case 'E':
   589             case 'F': case 'G': case 'H': case 'I': case 'J':
   590             case 'K': case 'L': case 'M': case 'N': case 'O':
   591             case 'P': case 'Q': case 'R': case 'S': case 'T':
   592             case 'U': case 'V': case 'W': case 'X': case 'Y':
   593             case 'Z':
   594             case 'a': case 'b': case 'c': case 'd': case 'e':
   595             case 'f': case 'g': case 'h': case 'i': case 'j':
   596             case 'k': case 'l': case 'm': case 'n': case 'o':
   597             case 'p': case 'q': case 'r': case 's': case 't':
   598             case 'u': case 'v': case 'w': case 'x': case 'y':
   599             case 'z':
   600             case '$': case '_':
   601             case '0': case '1': case '2': case '3': case '4':
   602             case '5': case '6': case '7': case '8': case '9':
   603             case '\u0000': case '\u0001': case '\u0002': case '\u0003':
   604             case '\u0004': case '\u0005': case '\u0006': case '\u0007':
   605             case '\u0008': case '\u000E': case '\u000F': case '\u0010':
   606             case '\u0011': case '\u0012': case '\u0013': case '\u0014':
   607             case '\u0015': case '\u0016': case '\u0017':
   608             case '\u0018': case '\u0019': case '\u001B':
   609             case '\u007F':
   610                 break;
   611             case '\u001A': // EOI is also a legal identifier part
   612                 if (bp >= buflen) {
   613                     name = names.fromChars(sbuf, 0, sp);
   614                     token = keywords.key(name);
   615                     return;
   616                 }
   617                 break;
   618             default:
   619                 if (ch < '\u0080') {
   620                     // all ASCII range chars already handled, above
   621                     isJavaIdentifierPart = false;
   622                 } else {
   623                     high = scanSurrogates();
   624                     if (high != 0) {
   625                         if (sp == sbuf.length) {
   626                             putChar(high);
   627                         } else {
   628                             sbuf[sp++] = high;
   629                         }
   630                         isJavaIdentifierPart = Character.isJavaIdentifierPart(
   631                             Character.toCodePoint(high, ch));
   632                     } else {
   633                         isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
   634                     }
   635                 }
   636                 if (!isJavaIdentifierPart) {
   637                     name = names.fromChars(sbuf, 0, sp);
   638                     token = keywords.key(name);
   639                     return;
   640                 }
   641             }
   642         } while (true);
   643     }
   645     /** Are surrogates supported?
   646      */
   647     final static boolean surrogatesSupported = surrogatesSupported();
   648     private static boolean surrogatesSupported() {
   649         try {
   650             Character.isHighSurrogate('a');
   651             return true;
   652         } catch (NoSuchMethodError ex) {
   653             return false;
   654         }
   655     }
   657     /** Scan surrogate pairs.  If 'ch' is a high surrogate and
   658      *  the next character is a low surrogate, then put the low
   659      *  surrogate in 'ch', and return the high surrogate.
   660      *  otherwise, just return 0.
   661      */
   662     private char scanSurrogates() {
   663         if (surrogatesSupported && Character.isHighSurrogate(ch)) {
   664             char high = ch;
   666             scanChar();
   668             if (Character.isLowSurrogate(ch)) {
   669                 return high;
   670             }
   672             ch = high;
   673         }
   675         return 0;
   676     }
   678     /** Return true if ch can be part of an operator.
   679      */
   680     private boolean isSpecial(char ch) {
   681         switch (ch) {
   682         case '!': case '%': case '&': case '*': case '?':
   683         case '+': case '-': case ':': case '<': case '=':
   684         case '>': case '^': case '|': case '~':
   685         case '@':
   686             return true;
   687         default:
   688             return false;
   689         }
   690     }
   692     /** Read longest possible sequence of special characters and convert
   693      *  to token.
   694      */
   695     private void scanOperator() {
   696         while (true) {
   697             putChar(ch);
   698             Name newname = names.fromChars(sbuf, 0, sp);
   699             if (keywords.key(newname) == IDENTIFIER) {
   700                 sp--;
   701                 break;
   702             }
   703             name = newname;
   704             token = keywords.key(newname);
   705             scanChar();
   706             if (!isSpecial(ch)) break;
   707         }
   708     }
   710     /**
   711      * Scan a documention comment; determine if a deprecated tag is present.
   712      * Called once the initial /, * have been skipped, positioned at the second *
   713      * (which is treated as the beginning of the first line).
   714      * Stops positioned at the closing '/'.
   715      */
   716     @SuppressWarnings("fallthrough")
   717     private void scanDocComment() {
   718         boolean deprecatedPrefix = false;
   720         forEachLine:
   721         while (bp < buflen) {
   723             // Skip optional WhiteSpace at beginning of line
   724             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
   725                 scanCommentChar();
   726             }
   728             // Skip optional consecutive Stars
   729             while (bp < buflen && ch == '*') {
   730                 scanCommentChar();
   731                 if (ch == '/') {
   732                     return;
   733                 }
   734             }
   736             // Skip optional WhiteSpace after Stars
   737             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
   738                 scanCommentChar();
   739             }
   741             deprecatedPrefix = false;
   742             // At beginning of line in the JavaDoc sense.
   743             if (bp < buflen && ch == '@' && !deprecatedFlag) {
   744                 scanCommentChar();
   745                 if (bp < buflen && ch == 'd') {
   746                     scanCommentChar();
   747                     if (bp < buflen && ch == 'e') {
   748                         scanCommentChar();
   749                         if (bp < buflen && ch == 'p') {
   750                             scanCommentChar();
   751                             if (bp < buflen && ch == 'r') {
   752                                 scanCommentChar();
   753                                 if (bp < buflen && ch == 'e') {
   754                                     scanCommentChar();
   755                                     if (bp < buflen && ch == 'c') {
   756                                         scanCommentChar();
   757                                         if (bp < buflen && ch == 'a') {
   758                                             scanCommentChar();
   759                                             if (bp < buflen && ch == 't') {
   760                                                 scanCommentChar();
   761                                                 if (bp < buflen && ch == 'e') {
   762                                                     scanCommentChar();
   763                                                     if (bp < buflen && ch == 'd') {
   764                                                         deprecatedPrefix = true;
   765                                                         scanCommentChar();
   766                                                     }}}}}}}}}}}
   767             if (deprecatedPrefix && bp < buflen) {
   768                 if (Character.isWhitespace(ch)) {
   769                     deprecatedFlag = true;
   770                 } else if (ch == '*') {
   771                     scanCommentChar();
   772                     if (ch == '/') {
   773                         deprecatedFlag = true;
   774                         return;
   775                     }
   776                 }
   777             }
   779             // Skip rest of line
   780             while (bp < buflen) {
   781                 switch (ch) {
   782                 case '*':
   783                     scanCommentChar();
   784                     if (ch == '/') {
   785                         return;
   786                     }
   787                     break;
   788                 case CR: // (Spec 3.4)
   789                     scanCommentChar();
   790                     if (ch != LF) {
   791                         continue forEachLine;
   792                     }
   793                     /* fall through to LF case */
   794                 case LF: // (Spec 3.4)
   795                     scanCommentChar();
   796                     continue forEachLine;
   797                 default:
   798                     scanCommentChar();
   799                 }
   800             } // rest of line
   801         } // forEachLine
   802         return;
   803     }
   805     /** The value of a literal token, recorded as a string.
   806      *  For integers, leading 0x and 'l' suffixes are suppressed.
   807      */
   808     public String stringVal() {
   809         return new String(sbuf, 0, sp);
   810     }
   812     /** Read token.
   813      */
   814     public void nextToken() {
   816         try {
   817             prevEndPos = endPos;
   818             sp = 0;
   820             while (true) {
   821                 pos = bp;
   822                 switch (ch) {
   823                 case ' ': // (Spec 3.6)
   824                 case '\t': // (Spec 3.6)
   825                 case FF: // (Spec 3.6)
   826                     do {
   827                         scanChar();
   828                     } while (ch == ' ' || ch == '\t' || ch == FF);
   829                     endPos = bp;
   830                     processWhiteSpace();
   831                     break;
   832                 case LF: // (Spec 3.4)
   833                     scanChar();
   834                     endPos = bp;
   835                     processLineTerminator();
   836                     break;
   837                 case CR: // (Spec 3.4)
   838                     scanChar();
   839                     if (ch == LF) {
   840                         scanChar();
   841                     }
   842                     endPos = bp;
   843                     processLineTerminator();
   844                     break;
   845                 case 'A': case 'B': case 'C': case 'D': case 'E':
   846                 case 'F': case 'G': case 'H': case 'I': case 'J':
   847                 case 'K': case 'L': case 'M': case 'N': case 'O':
   848                 case 'P': case 'Q': case 'R': case 'S': case 'T':
   849                 case 'U': case 'V': case 'W': case 'X': case 'Y':
   850                 case 'Z':
   851                 case 'a': case 'b': case 'c': case 'd': case 'e':
   852                 case 'f': case 'g': case 'h': case 'i': case 'j':
   853                 case 'k': case 'l': case 'm': case 'n': case 'o':
   854                 case 'p': case 'q': case 'r': case 's': case 't':
   855                 case 'u': case 'v': case 'w': case 'x': case 'y':
   856                 case 'z':
   857                 case '$': case '_':
   858                     scanIdent();
   859                     return;
   860                 case '0':
   861                     scanChar();
   862                     if (ch == 'x' || ch == 'X') {
   863                         scanChar();
   864                         skipIllegalUnderscores();
   865                         if (ch == '.') {
   866                             scanHexFractionAndSuffix(false);
   867                         } else if (digit(16) < 0) {
   868                             lexError("invalid.hex.number");
   869                         } else {
   870                             scanNumber(16);
   871                         }
   872                     } else if (ch == 'b' || ch == 'B') {
   873                         if (!allowBinaryLiterals) {
   874                             lexError("unsupported.binary.lit", source.name);
   875                             allowBinaryLiterals = true;
   876                         }
   877                         scanChar();
   878                         skipIllegalUnderscores();
   879                         if (digit(2) < 0) {
   880                             lexError("invalid.binary.number");
   881                         } else {
   882                             scanNumber(2);
   883                         }
   884                     } else {
   885                         putChar('0');
   886                         if (ch == '_') {
   887                             int savePos = bp;
   888                             do {
   889                                 scanChar();
   890                             } while (ch == '_');
   891                             if (digit(10) < 0) {
   892                                 lexError(savePos, "illegal.underscore");
   893                             }
   894                         }
   895                         scanNumber(8);
   896                     }
   897                     return;
   898                 case '1': case '2': case '3': case '4':
   899                 case '5': case '6': case '7': case '8': case '9':
   900                     scanNumber(10);
   901                     return;
   902                 case '.':
   903                     scanChar();
   904                     if ('0' <= ch && ch <= '9') {
   905                         putChar('.');
   906                         scanFractionAndSuffix();
   907                     } else if (ch == '.') {
   908                         putChar('.'); putChar('.');
   909                         scanChar();
   910                         if (ch == '.') {
   911                             scanChar();
   912                             putChar('.');
   913                             token = ELLIPSIS;
   914                         } else {
   915                             lexError("malformed.fp.lit");
   916                         }
   917                     } else {
   918                         token = DOT;
   919                     }
   920                     return;
   921                 case ',':
   922                     scanChar(); token = COMMA; return;
   923                 case ';':
   924                     scanChar(); token = SEMI; return;
   925                 case '(':
   926                     scanChar(); token = LPAREN; return;
   927                 case ')':
   928                     scanChar(); token = RPAREN; return;
   929                 case '[':
   930                     scanChar(); token = LBRACKET; return;
   931                 case ']':
   932                     scanChar(); token = RBRACKET; return;
   933                 case '{':
   934                     scanChar(); token = LBRACE; return;
   935                 case '}':
   936                     scanChar(); token = RBRACE; return;
   937                 case '/':
   938                     scanChar();
   939                     if (ch == '/') {
   940                         do {
   941                             scanCommentChar();
   942                         } while (ch != CR && ch != LF && bp < buflen);
   943                         if (bp < buflen) {
   944                             endPos = bp;
   945                             processComment(CommentStyle.LINE);
   946                         }
   947                         break;
   948                     } else if (ch == '*') {
   949                         scanChar();
   950                         CommentStyle style;
   951                         if (ch == '*') {
   952                             style = CommentStyle.JAVADOC;
   953                             scanDocComment();
   954                         } else {
   955                             style = CommentStyle.BLOCK;
   956                             while (bp < buflen) {
   957                                 if (ch == '*') {
   958                                     scanChar();
   959                                     if (ch == '/') break;
   960                                 } else {
   961                                     scanCommentChar();
   962                                 }
   963                             }
   964                         }
   965                         if (ch == '/') {
   966                             scanChar();
   967                             endPos = bp;
   968                             processComment(style);
   969                             break;
   970                         } else {
   971                             lexError("unclosed.comment");
   972                             return;
   973                         }
   974                     } else if (ch == '=') {
   975                         name = names.slashequals;
   976                         token = SLASHEQ;
   977                         scanChar();
   978                     } else {
   979                         name = names.slash;
   980                         token = SLASH;
   981                     }
   982                     return;
   983                 case '\'':
   984                     scanChar();
   985                     if (ch == '\'') {
   986                         lexError("empty.char.lit");
   987                     } else {
   988                         if (ch == CR || ch == LF)
   989                             lexError(pos, "illegal.line.end.in.char.lit");
   990                         scanLitChar();
   991                         if (ch == '\'') {
   992                             scanChar();
   993                             token = CHARLITERAL;
   994                         } else {
   995                             lexError(pos, "unclosed.char.lit");
   996                         }
   997                     }
   998                     return;
   999                 case '\"':
  1000                     scanChar();
  1001                     while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
  1002                         scanLitChar();
  1003                     if (ch == '\"') {
  1004                         token = STRINGLITERAL;
  1005                         scanChar();
  1006                     } else {
  1007                         lexError(pos, "unclosed.str.lit");
  1009                     return;
  1010                 case '#':
  1011                     scanChar();
  1012                     if (ch == '\"') {
  1013                         scanChar();
  1014                         if (ch == '\"')
  1015                             lexError(pos, "empty.bytecode.ident");
  1016                         while (ch != '\"' && ch != CR && ch != LF && bp < buflen) {
  1017                             scanBytecodeNameChar();
  1019                         if (ch == '\"') {
  1020                             name = names.fromChars(sbuf, 0, sp);
  1021                             token = IDENTIFIER;  // even if #"int" or #"do"
  1022                             scanChar();
  1023                         } else {
  1024                             lexError(pos, "unclosed.bytecode.ident");
  1026                     } else {
  1027                         lexError("illegal.char", String.valueOf((int)'#'));
  1029                     return;
  1030                 default:
  1031                     if (isSpecial(ch)) {
  1032                         scanOperator();
  1033                     } else {
  1034                         boolean isJavaIdentifierStart;
  1035                         if (ch < '\u0080') {
  1036                             // all ASCII range chars already handled, above
  1037                             isJavaIdentifierStart = false;
  1038                         } else {
  1039                             char high = scanSurrogates();
  1040                             if (high != 0) {
  1041                                 if (sp == sbuf.length) {
  1042                                     putChar(high);
  1043                                 } else {
  1044                                     sbuf[sp++] = high;
  1047                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(
  1048                                     Character.toCodePoint(high, ch));
  1049                             } else {
  1050                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
  1053                         if (isJavaIdentifierStart) {
  1054                             scanIdent();
  1055                         } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
  1056                             token = EOF;
  1057                             pos = bp = eofPos;
  1058                         } else {
  1059                             lexError("illegal.char", String.valueOf((int)ch));
  1060                             scanChar();
  1063                     return;
  1066         } finally {
  1067             endPos = bp;
  1068             if (scannerDebug)
  1069                 System.out.println("nextToken(" + pos
  1070                                    + "," + endPos + ")=|" +
  1071                                    new String(getRawCharacters(pos, endPos))
  1072                                    + "|");
  1076     /** Return the current token, set by nextToken().
  1077      */
  1078     public Token token() {
  1079         return token;
  1082     /** Sets the current token.
  1083      */
  1084     public void token(Token token) {
  1085         this.token = token;
  1088     /** Return the current token's position: a 0-based
  1089      *  offset from beginning of the raw input stream
  1090      *  (before unicode translation)
  1091      */
  1092     public int pos() {
  1093         return pos;
  1096     /** Return the last character position of the current token.
  1097      */
  1098     public int endPos() {
  1099         return endPos;
  1102     /** Return the last character position of the previous token.
  1103      */
  1104     public int prevEndPos() {
  1105         return prevEndPos;
  1108     /** Return the position where a lexical error occurred;
  1109      */
  1110     public int errPos() {
  1111         return errPos;
  1114     /** Set the position where a lexical error occurred;
  1115      */
  1116     public void errPos(int pos) {
  1117         errPos = pos;
  1120     /** Return the name of an identifier or token for the current token.
  1121      */
  1122     public Name name() {
  1123         return name;
  1126     /** Return the radix of a numeric literal token.
  1127      */
  1128     public int radix() {
  1129         return radix;
  1132     /** Has a @deprecated been encountered in last doc comment?
  1133      *  This needs to be reset by client with resetDeprecatedFlag.
  1134      */
  1135     public boolean deprecatedFlag() {
  1136         return deprecatedFlag;
  1139     public void resetDeprecatedFlag() {
  1140         deprecatedFlag = false;
  1143     /**
  1144      * Returns the documentation string of the current token.
  1145      */
  1146     public String docComment() {
  1147         return null;
  1150     /**
  1151      * Returns a copy of the input buffer, up to its inputLength.
  1152      * Unicode escape sequences are not translated.
  1153      */
  1154     public char[] getRawCharacters() {
  1155         char[] chars = new char[buflen];
  1156         System.arraycopy(buf, 0, chars, 0, buflen);
  1157         return chars;
  1160     /**
  1161      * Returns a copy of a character array subset of the input buffer.
  1162      * The returned array begins at the <code>beginIndex</code> and
  1163      * extends to the character at index <code>endIndex - 1</code>.
  1164      * Thus the length of the substring is <code>endIndex-beginIndex</code>.
  1165      * This behavior is like
  1166      * <code>String.substring(beginIndex, endIndex)</code>.
  1167      * Unicode escape sequences are not translated.
  1169      * @param beginIndex the beginning index, inclusive.
  1170      * @param endIndex the ending index, exclusive.
  1171      * @throws IndexOutOfBounds if either offset is outside of the
  1172      *         array bounds
  1173      */
  1174     public char[] getRawCharacters(int beginIndex, int endIndex) {
  1175         int length = endIndex - beginIndex;
  1176         char[] chars = new char[length];
  1177         System.arraycopy(buf, beginIndex, chars, 0, length);
  1178         return chars;
  1181     public enum CommentStyle {
  1182         LINE,
  1183         BLOCK,
  1184         JAVADOC,
  1187     /**
  1188      * Called when a complete comment has been scanned. pos and endPos
  1189      * will mark the comment boundary.
  1190      */
  1191     protected void processComment(CommentStyle style) {
  1192         if (scannerDebug)
  1193             System.out.println("processComment(" + pos
  1194                                + "," + endPos + "," + style + ")=|"
  1195                                + new String(getRawCharacters(pos, endPos))
  1196                                + "|");
  1199     /**
  1200      * Called when a complete whitespace run has been scanned. pos and endPos
  1201      * will mark the whitespace boundary.
  1202      */
  1203     protected void processWhiteSpace() {
  1204         if (scannerDebug)
  1205             System.out.println("processWhitespace(" + pos
  1206                                + "," + endPos + ")=|" +
  1207                                new String(getRawCharacters(pos, endPos))
  1208                                + "|");
  1211     /**
  1212      * Called when a line terminator has been processed.
  1213      */
  1214     protected void processLineTerminator() {
  1215         if (scannerDebug)
  1216             System.out.println("processTerminator(" + pos
  1217                                + "," + endPos + ")=|" +
  1218                                new String(getRawCharacters(pos, endPos))
  1219                                + "|");
  1222     /** Build a map for translating between line numbers and
  1223      * positions in the input.
  1225      * @return a LineMap */
  1226     public Position.LineMap getLineMap() {
  1227         return Position.makeLineMap(buf, buflen, false);

mercurial