src/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java

Mon, 24 Oct 2011 13:00:20 +0100

author
mcimadamore
date
Mon, 24 Oct 2011 13:00:20 +0100
changeset 1113
d346ab55031b
child 1125
56830d5cb5bb
permissions
-rw-r--r--

7096014: Javac tokens should retain state
Summary: Refactor javac tokens from enum constants to stateful instances (to keep track of position, comments, etc.)
Reviewed-by: jjg

     1 /*
     2  * Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.tools.javac.parser;
    28 import com.sun.tools.javac.file.JavacFileManager;
    29 import com.sun.tools.javac.parser.Tokens.Token;
    30 import com.sun.tools.javac.util.*;
    32 import java.nio.*;
    34 import static com.sun.tools.javac.util.LayoutCharacters.*;
    36 /** An extension to the base lexical analyzer that captures
    37  *  and processes the contents of doc comments.  It does so by
    38  *  translating Unicode escape sequences and by stripping the
    39  *  leading whitespace and starts from each line of the comment.
    40  *
    41  *  <p><b>This is NOT part of any supported API.
    42  *  If you write code that depends on this, you do so at your own risk.
    43  *  This code and its internal interfaces are subject to change or
    44  *  deletion without notice.</b>
    45  */
    46 public class JavadocTokenizer extends JavaTokenizer {
    48     /** Create a scanner from the input buffer.  buffer must implement
    49      *  array() and compact(), and remaining() must be less than limit().
    50      */
    51     protected JavadocTokenizer(ScannerFactory fac, CharBuffer buffer) {
    52         super(fac, buffer);
    53     }
    55     /** Create a scanner from the input array.  The array must have at
    56      *  least a single character of extra space.
    57      */
    58     protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) {
    59         super(fac, input, inputLength);
    60     }
    62     /** The comment input buffer, index of next chacter to be read,
    63      *  index of one past last character in buffer.
    64      */
    65     private char[] buf;
    66     private int bp;
    67     private int buflen;
    69     /** The current character.
    70      */
    71     private char ch;
    73     /** The column number position of the current character.
    74      */
    75     private int col;
    77     /** The buffer index of the last converted Unicode character
    78      */
    79     private int unicodeConversionBp = 0;
    81     /**
    82      * Buffer for doc comment.
    83      */
    84     private char[] docCommentBuffer = new char[1024];
    86     /**
    87      * Number of characters in doc comment buffer.
    88      */
    89     private int docCommentCount;
    91     /**
    92      * Translated and stripped contents of doc comment
    93      */
    94     private String docComment = null;
    97     /** Unconditionally expand the comment buffer.
    98      */
    99     private void expandCommentBuffer() {
   100         char[] newBuffer = new char[docCommentBuffer.length * 2];
   101         System.arraycopy(docCommentBuffer, 0, newBuffer,
   102                          0, docCommentBuffer.length);
   103         docCommentBuffer = newBuffer;
   104     }
   106     /** Convert an ASCII digit from its base (8, 10, or 16)
   107      *  to its value.
   108      */
   109     private int digit(int base) {
   110         char c = ch;
   111         int result = Character.digit(c, base);
   112         if (result >= 0 && c > 0x7f) {
   113             ch = "0123456789abcdef".charAt(result);
   114         }
   115         return result;
   116     }
   118     /** Convert Unicode escape; bp points to initial '\' character
   119      *  (Spec 3.3).
   120      */
   121     private void convertUnicode() {
   122         if (ch == '\\' && unicodeConversionBp != bp) {
   123             bp++; ch = buf[bp]; col++;
   124             if (ch == 'u') {
   125                 do {
   126                     bp++; ch = buf[bp]; col++;
   127                 } while (ch == 'u');
   128                 int limit = bp + 3;
   129                 if (limit < buflen) {
   130                     int d = digit(16);
   131                     int code = d;
   132                     while (bp < limit && d >= 0) {
   133                         bp++; ch = buf[bp]; col++;
   134                         d = digit(16);
   135                         code = (code << 4) + d;
   136                     }
   137                     if (d >= 0) {
   138                         ch = (char)code;
   139                         unicodeConversionBp = bp;
   140                         return;
   141                     }
   142                 }
   143                 // "illegal.Unicode.esc", reported by base scanner
   144             } else {
   145                 bp--;
   146                 ch = '\\';
   147                 col--;
   148             }
   149         }
   150     }
   153     /** Read next character.
   154      */
   155     private void scanChar() {
   156         bp++;
   157         ch = buf[bp];
   158         switch (ch) {
   159         case '\r': // return
   160             col = 0;
   161             break;
   162         case '\n': // newline
   163             if (bp == 0 || buf[bp-1] != '\r') {
   164                 col = 0;
   165             }
   166             break;
   167         case '\t': // tab
   168             col = (col / TabInc * TabInc) + TabInc;
   169             break;
   170         case '\\': // possible Unicode
   171             col++;
   172             convertUnicode();
   173             break;
   174         default:
   175             col++;
   176             break;
   177         }
   178     }
   180     @Override
   181     public Token readToken() {
   182         docComment = null;
   183         Token tk = super.readToken();
   184         tk.docComment = docComment;
   185         return tk;
   186     }
   188     /**
   189      * Read next character in doc comment, skipping over double '\' characters.
   190      * If a double '\' is skipped, put in the buffer and update buffer count.
   191      */
   192     private void scanDocCommentChar() {
   193         scanChar();
   194         if (ch == '\\') {
   195             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
   196                 if (docCommentCount == docCommentBuffer.length)
   197                     expandCommentBuffer();
   198                 docCommentBuffer[docCommentCount++] = ch;
   199                 bp++; col++;
   200             } else {
   201                 convertUnicode();
   202             }
   203         }
   204     }
   206     /**
   207      * Process a doc comment and make the string content available.
   208      * Strips leading whitespace and stars.
   209      */
   210     @SuppressWarnings("fallthrough")
   211     protected void processComment(int pos, int endPos, CommentStyle style) {
   212         if (style != CommentStyle.JAVADOC) {
   213             return;
   214         }
   216         buf = reader.getRawCharacters(pos, endPos);
   217         buflen = buf.length;
   218         bp = 0;
   219         col = 0;
   221         docCommentCount = 0;
   223         boolean firstLine = true;
   225         // Skip over first slash
   226         scanDocCommentChar();
   227         // Skip over first star
   228         scanDocCommentChar();
   230         // consume any number of stars
   231         while (bp < buflen && ch == '*') {
   232             scanDocCommentChar();
   233         }
   234         // is the comment in the form /**/, /***/, /****/, etc. ?
   235         if (bp < buflen && ch == '/') {
   236             docComment = "";
   237             return;
   238         }
   240         // skip a newline on the first line of the comment.
   241         if (bp < buflen) {
   242             if (ch == LF) {
   243                 scanDocCommentChar();
   244                 firstLine = false;
   245             } else if (ch == CR) {
   246                 scanDocCommentChar();
   247                 if (ch == LF) {
   248                     scanDocCommentChar();
   249                     firstLine = false;
   250                 }
   251             }
   252         }
   254     outerLoop:
   256         // The outerLoop processes the doc comment, looping once
   257         // for each line.  For each line, it first strips off
   258         // whitespace, then it consumes any stars, then it
   259         // puts the rest of the line into our buffer.
   260         while (bp < buflen) {
   262             // The wsLoop consumes whitespace from the beginning
   263             // of each line.
   264         wsLoop:
   266             while (bp < buflen) {
   267                 switch(ch) {
   268                 case ' ':
   269                     scanDocCommentChar();
   270                     break;
   271                 case '\t':
   272                     col = ((col - 1) / TabInc * TabInc) + TabInc;
   273                     scanDocCommentChar();
   274                     break;
   275                 case FF:
   276                     col = 0;
   277                     scanDocCommentChar();
   278                     break;
   279 // Treat newline at beginning of line (blank line, no star)
   280 // as comment text.  Old Javadoc compatibility requires this.
   281 /*---------------------------------*
   282                 case CR: // (Spec 3.4)
   283                     scanDocCommentChar();
   284                     if (ch == LF) {
   285                         col = 0;
   286                         scanDocCommentChar();
   287                     }
   288                     break;
   289                 case LF: // (Spec 3.4)
   290                     scanDocCommentChar();
   291                     break;
   292 *---------------------------------*/
   293                 default:
   294                     // we've seen something that isn't whitespace;
   295                     // jump out.
   296                     break wsLoop;
   297                 }
   298             }
   300             // Are there stars here?  If so, consume them all
   301             // and check for the end of comment.
   302             if (ch == '*') {
   303                 // skip all of the stars
   304                 do {
   305                     scanDocCommentChar();
   306                 } while (ch == '*');
   308                 // check for the closing slash.
   309                 if (ch == '/') {
   310                     // We're done with the doc comment
   311                     // scanChar() and breakout.
   312                     break outerLoop;
   313                 }
   314             } else if (! firstLine) {
   315                 //The current line does not begin with a '*' so we will indent it.
   316                 for (int i = 1; i < col; i++) {
   317                     if (docCommentCount == docCommentBuffer.length)
   318                         expandCommentBuffer();
   319                     docCommentBuffer[docCommentCount++] = ' ';
   320                 }
   321             }
   323             // The textLoop processes the rest of the characters
   324             // on the line, adding them to our buffer.
   325         textLoop:
   326             while (bp < buflen) {
   327                 switch (ch) {
   328                 case '*':
   329                     // Is this just a star?  Or is this the
   330                     // end of a comment?
   331                     scanDocCommentChar();
   332                     if (ch == '/') {
   333                         // This is the end of the comment,
   334                         // set ch and return our buffer.
   335                         break outerLoop;
   336                     }
   337                     // This is just an ordinary star.  Add it to
   338                     // the buffer.
   339                     if (docCommentCount == docCommentBuffer.length)
   340                         expandCommentBuffer();
   341                     docCommentBuffer[docCommentCount++] = '*';
   342                     break;
   343                 case ' ':
   344                 case '\t':
   345                     if (docCommentCount == docCommentBuffer.length)
   346                         expandCommentBuffer();
   347                     docCommentBuffer[docCommentCount++] = ch;
   348                     scanDocCommentChar();
   349                     break;
   350                 case FF:
   351                     scanDocCommentChar();
   352                     break textLoop; // treat as end of line
   353                 case CR: // (Spec 3.4)
   354                     scanDocCommentChar();
   355                     if (ch != LF) {
   356                         // Canonicalize CR-only line terminator to LF
   357                         if (docCommentCount == docCommentBuffer.length)
   358                             expandCommentBuffer();
   359                         docCommentBuffer[docCommentCount++] = (char)LF;
   360                         break textLoop;
   361                     }
   362                     /* fall through to LF case */
   363                 case LF: // (Spec 3.4)
   364                     // We've seen a newline.  Add it to our
   365                     // buffer and break out of this loop,
   366                     // starting fresh on a new line.
   367                     if (docCommentCount == docCommentBuffer.length)
   368                         expandCommentBuffer();
   369                     docCommentBuffer[docCommentCount++] = ch;
   370                     scanDocCommentChar();
   371                     break textLoop;
   372                 default:
   373                     // Add the character to our buffer.
   374                     if (docCommentCount == docCommentBuffer.length)
   375                         expandCommentBuffer();
   376                     docCommentBuffer[docCommentCount++] = ch;
   377                     scanDocCommentChar();
   378                 }
   379             } // end textLoop
   380             firstLine = false;
   381         } // end outerLoop
   383         if (docCommentCount > 0) {
   384             int i = docCommentCount - 1;
   385         trailLoop:
   386             while (i > -1) {
   387                 switch (docCommentBuffer[i]) {
   388                 case '*':
   389                     i--;
   390                     break;
   391                 default:
   392                     break trailLoop;
   393                 }
   394             }
   395             docCommentCount = i + 1;
   397             // Store the text of the doc comment
   398             docComment = new String(docCommentBuffer, 0 , docCommentCount);
   399         } else {
   400             docComment = "";
   401         }
   402     }
   404     /** Build a map for translating between line numbers and
   405      * positions in the input.
   406      *
   407      * @return a LineMap */
   408     public Position.LineMap getLineMap() {
   409         char[] buf = reader.getRawCharacters();
   410         return Position.makeLineMap(buf, buf.length, true);
   411     }
   412 }

mercurial