jdk8-mips64-public/langtools: src/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java@d346ab55031b

7096014: Javac tokens should retain state
Summary: Refactor javac tokens from enum constants to stateful instances (to keep track of position, comments, etc.)
Reviewed-by: jjg

     1 /*

     2  * Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Oracle designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Oracle in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    22  * or visit www.oracle.com if you need additional information or have any

    23  * questions.

    24  */

    26 package com.sun.tools.javac.parser;

    28 import com.sun.tools.javac.file.JavacFileManager;

    29 import com.sun.tools.javac.parser.Tokens.Token;

    30 import com.sun.tools.javac.util.*;

    32 import java.nio.*;

    34 import static com.sun.tools.javac.util.LayoutCharacters.*;

    36 /** An extension to the base lexical analyzer that captures

    37  *  and processes the contents of doc comments.  It does so by

    38  *  translating Unicode escape sequences and by stripping the

    39  *  leading whitespace and starts from each line of the comment.

    40  *

    41  *  <p><b>This is NOT part of any supported API.

    42  *  If you write code that depends on this, you do so at your own risk.

    43  *  This code and its internal interfaces are subject to change or

    44  *  deletion without notice.</b>

    45  */

    46 public class JavadocTokenizer extends JavaTokenizer {

    48     /** Create a scanner from the input buffer.  buffer must implement

    49      *  array() and compact(), and remaining() must be less than limit().

    50      */

    51     protected JavadocTokenizer(ScannerFactory fac, CharBuffer buffer) {

    52         super(fac, buffer);

    53     }

    55     /** Create a scanner from the input array.  The array must have at

    56      *  least a single character of extra space.

    57      */

    58     protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) {

    59         super(fac, input, inputLength);

    60     }

    62     /** The comment input buffer, index of next chacter to be read,

    63      *  index of one past last character in buffer.

    64      */

    65     private char[] buf;

    66     private int bp;

    67     private int buflen;

    69     /** The current character.

    70      */

    71     private char ch;

    73     /** The column number position of the current character.

    74      */

    75     private int col;

    77     /** The buffer index of the last converted Unicode character

    78      */

    79     private int unicodeConversionBp = 0;

    81     /**

    82      * Buffer for doc comment.

    83      */

    84     private char[] docCommentBuffer = new char[1024];

    86     /**

    87      * Number of characters in doc comment buffer.

    88      */

    89     private int docCommentCount;

    91     /**

    92      * Translated and stripped contents of doc comment

    93      */

    94     private String docComment = null;

    97     /** Unconditionally expand the comment buffer.

    98      */

    99     private void expandCommentBuffer() {

   100         char[] newBuffer = new char[docCommentBuffer.length * 2];

   101         System.arraycopy(docCommentBuffer, 0, newBuffer,

   102                          0, docCommentBuffer.length);

   103         docCommentBuffer = newBuffer;

   104     }

   106     /** Convert an ASCII digit from its base (8, 10, or 16)

   107      *  to its value.

   108      */

   109     private int digit(int base) {

   110         char c = ch;

   111         int result = Character.digit(c, base);

   112         if (result >= 0 && c > 0x7f) {

   113             ch = "0123456789abcdef".charAt(result);

   114         }

   115         return result;

   116     }

   118     /** Convert Unicode escape; bp points to initial '\' character

   119      *  (Spec 3.3).

   120      */

   121     private void convertUnicode() {

   122         if (ch == '\\' && unicodeConversionBp != bp) {

   123             bp++; ch = buf[bp]; col++;

   124             if (ch == 'u') {

   125                 do {

   126                     bp++; ch = buf[bp]; col++;

   127                 } while (ch == 'u');

   128                 int limit = bp + 3;

   129                 if (limit < buflen) {

   130                     int d = digit(16);

   131                     int code = d;

   132                     while (bp < limit && d >= 0) {

   133                         bp++; ch = buf[bp]; col++;

   134                         d = digit(16);

   135                         code = (code << 4) + d;

   136                     }

   137                     if (d >= 0) {

   138                         ch = (char)code;

   139                         unicodeConversionBp = bp;

   140                         return;

   141                     }

   142                 }

   143                 // "illegal.Unicode.esc", reported by base scanner

   144             } else {

   145                 bp--;

   146                 ch = '\\';

   147                 col--;

   148             }

   149         }

   150     }

   153     /** Read next character.

   154      */

   155     private void scanChar() {

   156         bp++;

   157         ch = buf[bp];

   158         switch (ch) {

   159         case '\r': // return

   160             col = 0;

   161             break;

   162         case '\n': // newline

   163             if (bp == 0 || buf[bp-1] != '\r') {

   164                 col = 0;

   165             }

   166             break;

   167         case '\t': // tab

   168             col = (col / TabInc * TabInc) + TabInc;

   169             break;

   170         case '\\': // possible Unicode

   171             col++;

   172             convertUnicode();

   173             break;

   174         default:

   175             col++;

   176             break;

   177         }

   178     }

   180     @Override

   181     public Token readToken() {

   182         docComment = null;

   183         Token tk = super.readToken();

   184         tk.docComment = docComment;

   185         return tk;

   186     }

   188     /**

   189      * Read next character in doc comment, skipping over double '\' characters.

   190      * If a double '\' is skipped, put in the buffer and update buffer count.

   191      */

   192     private void scanDocCommentChar() {

   193         scanChar();

   194         if (ch == '\\') {

   195             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {

   196                 if (docCommentCount == docCommentBuffer.length)

   197                     expandCommentBuffer();

   198                 docCommentBuffer[docCommentCount++] = ch;

   199                 bp++; col++;

   200             } else {

   201                 convertUnicode();

   202             }

   203         }

   204     }

   206     /**

   207      * Process a doc comment and make the string content available.

   208      * Strips leading whitespace and stars.

   209      */

   210     @SuppressWarnings("fallthrough")

   211     protected void processComment(int pos, int endPos, CommentStyle style) {

   212         if (style != CommentStyle.JAVADOC) {

   213             return;

   214         }

   216         buf = reader.getRawCharacters(pos, endPos);

   217         buflen = buf.length;

   218         bp = 0;

   219         col = 0;

   221         docCommentCount = 0;

   223         boolean firstLine = true;

   225         // Skip over first slash

   226         scanDocCommentChar();

   227         // Skip over first star

   228         scanDocCommentChar();

   230         // consume any number of stars

   231         while (bp < buflen && ch == '*') {

   232             scanDocCommentChar();

   233         }

   234         // is the comment in the form /**/, /***/, /****/, etc. ?

   235         if (bp < buflen && ch == '/') {

   236             docComment = "";

   237             return;

   238         }

   240         // skip a newline on the first line of the comment.

   241         if (bp < buflen) {

   242             if (ch == LF) {

   243                 scanDocCommentChar();

   244                 firstLine = false;

   245             } else if (ch == CR) {

   246                 scanDocCommentChar();

   247                 if (ch == LF) {

   248                     scanDocCommentChar();

   249                     firstLine = false;

   250                 }

   251             }

   252         }

   254     outerLoop:

   256         // The outerLoop processes the doc comment, looping once

   257         // for each line.  For each line, it first strips off

   258         // whitespace, then it consumes any stars, then it

   259         // puts the rest of the line into our buffer.

   260         while (bp < buflen) {

   262             // The wsLoop consumes whitespace from the beginning

   263             // of each line.

   264         wsLoop:

   266             while (bp < buflen) {

   267                 switch(ch) {

   268                 case ' ':

   269                     scanDocCommentChar();

   270                     break;

   271                 case '\t':

   272                     col = ((col - 1) / TabInc * TabInc) + TabInc;

   273                     scanDocCommentChar();

   274                     break;

   275                 case FF:

   276                     col = 0;

   277                     scanDocCommentChar();

   278                     break;

   279 // Treat newline at beginning of line (blank line, no star)

   280 // as comment text.  Old Javadoc compatibility requires this.

   281 /*---------------------------------*

   282                 case CR: // (Spec 3.4)

   283                     scanDocCommentChar();

   284                     if (ch == LF) {

   285                         col = 0;

   286                         scanDocCommentChar();

   287                     }

   288                     break;

   289                 case LF: // (Spec 3.4)

   290                     scanDocCommentChar();

   291                     break;

   292 *---------------------------------*/

   293                 default:

   294                     // we've seen something that isn't whitespace;

   295                     // jump out.

   296                     break wsLoop;

   297                 }

   298             }

   300             // Are there stars here?  If so, consume them all

   301             // and check for the end of comment.

   302             if (ch == '*') {

   303                 // skip all of the stars

   304                 do {

   305                     scanDocCommentChar();

   306                 } while (ch == '*');

   308                 // check for the closing slash.

   309                 if (ch == '/') {

   310                     // We're done with the doc comment

   311                     // scanChar() and breakout.

   312                     break outerLoop;

   313                 }

   314             } else if (! firstLine) {

   315                 //The current line does not begin with a '*' so we will indent it.

   316                 for (int i = 1; i < col; i++) {

   317                     if (docCommentCount == docCommentBuffer.length)

   318                         expandCommentBuffer();

   319                     docCommentBuffer[docCommentCount++] = ' ';

   320                 }

   321             }

   323             // The textLoop processes the rest of the characters

   324             // on the line, adding them to our buffer.

   325         textLoop:

   326             while (bp < buflen) {

   327                 switch (ch) {

   328                 case '*':

   329                     // Is this just a star?  Or is this the

   330                     // end of a comment?

   331                     scanDocCommentChar();

   332                     if (ch == '/') {

   333                         // This is the end of the comment,

   334                         // set ch and return our buffer.

   335                         break outerLoop;

   336                     }

   337                     // This is just an ordinary star.  Add it to

   338                     // the buffer.

   339                     if (docCommentCount == docCommentBuffer.length)

   340                         expandCommentBuffer();

   341                     docCommentBuffer[docCommentCount++] = '*';

   342                     break;

   343                 case ' ':

   344                 case '\t':

   345                     if (docCommentCount == docCommentBuffer.length)

   346                         expandCommentBuffer();

   347                     docCommentBuffer[docCommentCount++] = ch;

   348                     scanDocCommentChar();

   349                     break;

   350                 case FF:

   351                     scanDocCommentChar();

   352                     break textLoop; // treat as end of line

   353                 case CR: // (Spec 3.4)

   354                     scanDocCommentChar();

   355                     if (ch != LF) {

   356                         // Canonicalize CR-only line terminator to LF

   357                         if (docCommentCount == docCommentBuffer.length)

   358                             expandCommentBuffer();

   359                         docCommentBuffer[docCommentCount++] = (char)LF;

   360                         break textLoop;

   361                     }

   362                     /* fall through to LF case */

   363                 case LF: // (Spec 3.4)

   364                     // We've seen a newline.  Add it to our

   365                     // buffer and break out of this loop,

   366                     // starting fresh on a new line.

   367                     if (docCommentCount == docCommentBuffer.length)

   368                         expandCommentBuffer();

   369                     docCommentBuffer[docCommentCount++] = ch;

   370                     scanDocCommentChar();

   371                     break textLoop;

   372                 default:

   373                     // Add the character to our buffer.

   374                     if (docCommentCount == docCommentBuffer.length)

   375                         expandCommentBuffer();

   376                     docCommentBuffer[docCommentCount++] = ch;

   377                     scanDocCommentChar();

   378                 }

   379             } // end textLoop

   380             firstLine = false;

   381         } // end outerLoop

   383         if (docCommentCount > 0) {

   384             int i = docCommentCount - 1;

   385         trailLoop:

   386             while (i > -1) {

   387                 switch (docCommentBuffer[i]) {

   388                 case '*':

   389                     i--;

   390                     break;

   391                 default:

   392                     break trailLoop;

   393                 }

   394             }

   395             docCommentCount = i + 1;

   397             // Store the text of the doc comment

   398             docComment = new String(docCommentBuffer, 0 , docCommentCount);

   399         } else {

   400             docComment = "";

   401         }

   402     }

   404     /** Build a map for translating between line numbers and

   405      * positions in the input.

   406      *

   407      * @return a LineMap */

   408     public Position.LineMap getLineMap() {

   409         char[] buf = reader.getRawCharacters();

   410         return Position.makeLineMap(buf, buf.length, true);

   411     }

   412 }

Mercurial > jdk8-mips64-public > langtools / file revision

src/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java@d346ab55031b

src/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java