src/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java

Fri, 04 Nov 2011 12:36:40 +0000

author
mcimadamore
date
Fri, 04 Nov 2011 12:36:40 +0000
changeset 1125
56830d5cb5bb
parent 1113
d346ab55031b
child 1281
067f51db3402
permissions
-rw-r--r--

7104201: Refactor DocCommentScanner
Summary: Add new Comment helper class to parse contents of comments in source code
Reviewed-by: jjg

     1 /*
     2  * Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.tools.javac.parser;
    28 import com.sun.tools.javac.parser.Tokens.Comment;
    29 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
    30 import com.sun.tools.javac.util.*;
    32 import java.nio.*;
    34 import static com.sun.tools.javac.util.LayoutCharacters.*;
    36 /** An extension to the base lexical analyzer that captures
    37  *  and processes the contents of doc comments.  It does so by
    38  *  translating Unicode escape sequences and by stripping the
    39  *  leading whitespace and starts from each line of the comment.
    40  *
    41  *  <p><b>This is NOT part of any supported API.
    42  *  If you write code that depends on this, you do so at your own risk.
    43  *  This code and its internal interfaces are subject to change or
    44  *  deletion without notice.</b>
    45  */
    46 public class JavadocTokenizer extends JavaTokenizer {
    48     /** Create a scanner from the input buffer.  buffer must implement
    49      *  array() and compact(), and remaining() must be less than limit().
    50      */
    51     protected JavadocTokenizer(ScannerFactory fac, CharBuffer buffer) {
    52         super(fac, buffer);
    53     }
    55     /** Create a scanner from the input array.  The array must have at
    56      *  least a single character of extra space.
    57      */
    58     protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) {
    59         super(fac, input, inputLength);
    60     }
    62     @Override
    63     protected Comment processComment(int pos, int endPos, CommentStyle style) {
    64         char[] buf = reader.getRawCharacters(pos, endPos);
    65         return new JavadocComment(new ColReader(fac, buf, buf.length), style);
    66     }
    68     /**
    69      * This is a specialized version of UnicodeReader that keeps track of the
    70      * column position within a given character stream (used for Javadoc processing).
    71      */
    72     static class ColReader extends UnicodeReader {
    74          int col;
    76          ColReader(ScannerFactory fac, char[] input, int inputLength) {
    77              super(fac, input, inputLength);
    78          }
    80          @Override
    81          protected void convertUnicode() {
    82              if (ch == '\\' && unicodeConversionBp != bp) {
    83                  bp++; ch = buf[bp]; col++;
    84                  if (ch == 'u') {
    85                      do {
    86                          bp++; ch = buf[bp]; col++;
    87                      } while (ch == 'u');
    88                      int limit = bp + 3;
    89                      if (limit < buflen) {
    90                          int d = digit(bp, 16);
    91                          int code = d;
    92                          while (bp < limit && d >= 0) {
    93                              bp++; ch = buf[bp]; col++;
    94                              d = digit(bp, 16);
    95                              code = (code << 4) + d;
    96                          }
    97                          if (d >= 0) {
    98                              ch = (char)code;
    99                              unicodeConversionBp = bp;
   100                              return;
   101                          }
   102                      }
   103                      // "illegal.Unicode.esc", reported by base scanner
   104                  } else {
   105                      bp--;
   106                      ch = '\\';
   107                      col--;
   108                  }
   109              }
   110          }
   112          @Override
   113          protected void scanCommentChar() {
   114              scanChar();
   115              if (ch == '\\') {
   116                  if (peekChar() == '\\' && !isUnicode()) {
   117                      putChar(ch, false);
   118                      bp++; col++;
   119                  } else {
   120                      convertUnicode();
   121                  }
   122              }
   123          }
   125          @Override
   126          protected void scanChar() {
   127              bp++;
   128              ch = buf[bp];
   129              switch (ch) {
   130              case '\r': // return
   131                  col = 0;
   132                  break;
   133              case '\n': // newline
   134                  if (bp == 0 || buf[bp-1] != '\r') {
   135                      col = 0;
   136                  }
   137                  break;
   138              case '\t': // tab
   139                  col = (col / TabInc * TabInc) + TabInc;
   140                  break;
   141              case '\\': // possible Unicode
   142                  col++;
   143                  convertUnicode();
   144                  break;
   145              default:
   146                  col++;
   147                  break;
   148              }
   149          }
   150      }
   152      protected class JavadocComment extends JavaTokenizer.BasicComment<ColReader> {
   154         /**
   155         * Translated and stripped contents of doc comment
   156         */
   157         private String docComment = null;
   159         JavadocComment(ColReader comment_reader, CommentStyle cs) {
   160             super(comment_reader, cs);
   161         }
   163         public String getText() {
   164             if (!scanned && cs == CommentStyle.JAVADOC) {
   165                 scanDocComment();
   166             }
   167             return docComment;
   168         }
   170         @Override
   171         @SuppressWarnings("fallthrough")
   172         protected void scanDocComment() {
   173              try {
   174                  boolean firstLine = true;
   176                  // Skip over first slash
   177                  comment_reader.scanCommentChar();
   178                  // Skip over first star
   179                  comment_reader.scanCommentChar();
   181                  // consume any number of stars
   182                  while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
   183                      comment_reader.scanCommentChar();
   184                  }
   185                  // is the comment in the form /**/, /***/, /****/, etc. ?
   186                  if (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '/') {
   187                      docComment = "";
   188                      return;
   189                  }
   191                  // skip a newline on the first line of the comment.
   192                  if (comment_reader.bp < comment_reader.buflen) {
   193                      if (comment_reader.ch == LF) {
   194                          comment_reader.scanCommentChar();
   195                          firstLine = false;
   196                      } else if (comment_reader.ch == CR) {
   197                          comment_reader.scanCommentChar();
   198                          if (comment_reader.ch == LF) {
   199                              comment_reader.scanCommentChar();
   200                              firstLine = false;
   201                          }
   202                      }
   203                  }
   205              outerLoop:
   207                  // The outerLoop processes the doc comment, looping once
   208                  // for each line.  For each line, it first strips off
   209                  // whitespace, then it consumes any stars, then it
   210                  // puts the rest of the line into our buffer.
   211                  while (comment_reader.bp < comment_reader.buflen) {
   213                      // The wsLoop consumes whitespace from the beginning
   214                      // of each line.
   215                  wsLoop:
   217                      while (comment_reader.bp < comment_reader.buflen) {
   218                          switch(comment_reader.ch) {
   219                          case ' ':
   220                              comment_reader.scanCommentChar();
   221                              break;
   222                          case '\t':
   223                              comment_reader.col = ((comment_reader.col - 1) / TabInc * TabInc) + TabInc;
   224                              comment_reader.scanCommentChar();
   225                              break;
   226                          case FF:
   227                              comment_reader.col = 0;
   228                              comment_reader.scanCommentChar();
   229                              break;
   230          // Treat newline at beginning of line (blank line, no star)
   231          // as comment text.  Old Javadoc compatibility requires this.
   232          /*---------------------------------*
   233                          case CR: // (Spec 3.4)
   234                              doc_reader.scanCommentChar();
   235                              if (ch == LF) {
   236                                  col = 0;
   237                                  doc_reader.scanCommentChar();
   238                              }
   239                              break;
   240                          case LF: // (Spec 3.4)
   241                              doc_reader.scanCommentChar();
   242                              break;
   243          *---------------------------------*/
   244                          default:
   245                              // we've seen something that isn't whitespace;
   246                              // jump out.
   247                              break wsLoop;
   248                          }
   249                      }
   251                      // Are there stars here?  If so, consume them all
   252                      // and check for the end of comment.
   253                      if (comment_reader.ch == '*') {
   254                          // skip all of the stars
   255                          do {
   256                              comment_reader.scanCommentChar();
   257                          } while (comment_reader.ch == '*');
   259                          // check for the closing slash.
   260                          if (comment_reader.ch == '/') {
   261                              // We're done with the doc comment
   262                              // scanChar() and breakout.
   263                              break outerLoop;
   264                          }
   265                      } else if (! firstLine) {
   266                          //The current line does not begin with a '*' so we will indent it.
   267                          for (int i = 1; i < comment_reader.col; i++) {
   268                              comment_reader.putChar(' ', false);
   269                          }
   270                      }
   271                      // The textLoop processes the rest of the characters
   272                      // on the line, adding them to our buffer.
   273                  textLoop:
   274                      while (comment_reader.bp < comment_reader.buflen) {
   275                          switch (comment_reader.ch) {
   276                          case '*':
   277                              // Is this just a star?  Or is this the
   278                              // end of a comment?
   279                              comment_reader.scanCommentChar();
   280                              if (comment_reader.ch == '/') {
   281                                  // This is the end of the comment,
   282                                  // set ch and return our buffer.
   283                                  break outerLoop;
   284                              }
   285                              // This is just an ordinary star.  Add it to
   286                              // the buffer.
   287                              comment_reader.putChar('*', false);
   288                              break;
   289                          case ' ':
   290                          case '\t':
   291                              comment_reader.putChar(comment_reader.ch, false);
   292                              comment_reader.scanCommentChar();
   293                              break;
   294                          case FF:
   295                              comment_reader.scanCommentChar();
   296                              break textLoop; // treat as end of line
   297                          case CR: // (Spec 3.4)
   298                              comment_reader.scanCommentChar();
   299                              if (comment_reader.ch != LF) {
   300                                  // Canonicalize CR-only line terminator to LF
   301                                  comment_reader.putChar((char)LF, false);
   302                                  break textLoop;
   303                              }
   304                              /* fall through to LF case */
   305                          case LF: // (Spec 3.4)
   306                              // We've seen a newline.  Add it to our
   307                              // buffer and break out of this loop,
   308                              // starting fresh on a new line.
   309                              comment_reader.putChar(comment_reader.ch, false);
   310                              comment_reader.scanCommentChar();
   311                              break textLoop;
   312                          default:
   313                              // Add the character to our buffer.
   314                              comment_reader.putChar(comment_reader.ch, false);
   315                              comment_reader.scanCommentChar();
   316                          }
   317                      } // end textLoop
   318                      firstLine = false;
   319                  } // end outerLoop
   321                  if (comment_reader.sp > 0) {
   322                      int i = comment_reader.sp - 1;
   323                  trailLoop:
   324                      while (i > -1) {
   325                          switch (comment_reader.sbuf[i]) {
   326                          case '*':
   327                              i--;
   328                              break;
   329                          default:
   330                              break trailLoop;
   331                          }
   332                      }
   333                      comment_reader.sp = i + 1;
   335                      // Store the text of the doc comment
   336                     docComment = comment_reader.chars();
   337                 } else {
   338                     docComment = "";
   339                 }
   340             } finally {
   341                 scanned = true;
   342                 if (docComment != null &&
   343                         docComment.matches("(?sm).*^\\s*@deprecated( |$).*")) {
   344                     deprecatedFlag = true;
   345                 }
   346             }
   347         }
   348     }
   350     @Override
   351     public Position.LineMap getLineMap() {
   352         char[] buf = reader.getRawCharacters();
   353         return Position.makeLineMap(buf, buf.length, true);
   354     }
   355 }

mercurial