src/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java

Sun, 17 Feb 2013 16:44:55 -0500

author
dholmes
date
Sun, 17 Feb 2013 16:44:55 -0500
changeset 1571
af8417e590f4
parent 1529
950d8195a5a4
child 1679
b402b93cbe38
permissions
-rw-r--r--

Merge

     1 /*
     2  * Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.tools.javac.parser;
    28 import com.sun.tools.javac.parser.Tokens.Comment;
    29 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
    30 import com.sun.tools.javac.util.*;
    32 import java.nio.*;
    34 import static com.sun.tools.javac.util.LayoutCharacters.*;
    36 /** An extension to the base lexical analyzer that captures
    37  *  and processes the contents of doc comments.  It does so by
    38  *  translating Unicode escape sequences and by stripping the
    39  *  leading whitespace and starts from each line of the comment.
    40  *
    41  *  <p><b>This is NOT part of any supported API.
    42  *  If you write code that depends on this, you do so at your own risk.
    43  *  This code and its internal interfaces are subject to change or
    44  *  deletion without notice.</b>
    45  */
    46 public class JavadocTokenizer extends JavaTokenizer {
    48     /** Create a scanner from the input buffer.  buffer must implement
    49      *  array() and compact(), and remaining() must be less than limit().
    50      */
    51     protected JavadocTokenizer(ScannerFactory fac, CharBuffer buffer) {
    52         super(fac, buffer);
    53     }
    55     /** Create a scanner from the input array.  The array must have at
    56      *  least a single character of extra space.
    57      */
    58     protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) {
    59         super(fac, input, inputLength);
    60     }
    62     @Override
    63     protected Comment processComment(int pos, int endPos, CommentStyle style) {
    64         char[] buf = reader.getRawCharacters(pos, endPos);
    65         return new JavadocComment(new DocReader(fac, buf, buf.length, pos), style);
    66     }
    68     /**
    69      * This is a specialized version of UnicodeReader that keeps track of the
    70      * column position within a given character stream (used for Javadoc processing),
    71      * and which builds a table for mapping positions in the comment string to
    72      * positions in the source file.
    73      */
    74     static class DocReader extends UnicodeReader {
    76          int col;
    77          int startPos;
    79          /**
    80           * A buffer for building a table for mapping positions in {@link #sbuf}
    81           * to positions in the source buffer.
    82           *
    83           * The array is organized as a series of pairs of integers: the first
    84           * number in each pair specifies a position in the comment text,
    85           * the second number in each pair specifies the corresponding position
    86           * in the source buffer. The pairs are sorted in ascending order.
    87           *
    88           * Since the mapping function is generally continuous, with successive
    89           * positions in the string corresponding to successive positions in the
    90           * source buffer, the table only needs to record discontinuities in
    91           * the mapping. The values of intermediate positions can be inferred.
    92           *
    93           * Discontinuities may occur in a number of places: when a newline
    94           * is followed by whitespace and asterisks (which are ignored),
    95           * when a tab is expanded into spaces, and when unicode escapes
    96           * are used in the source buffer.
    97           *
    98           * Thus, to find the source position of any position, p, in the comment
    99           * string, find the index, i, of the pair whose string offset
   100           * ({@code pbuf[i] }) is closest to but not greater than p. Then,
   101           * {@code sourcePos(p) = pbuf[i+1] + (p - pbuf[i]) }.
   102           */
   103          int[] pbuf = new int[128];
   105          /**
   106           * The index of the next empty slot in the pbuf buffer.
   107           */
   108          int pp = 0;
   110          DocReader(ScannerFactory fac, char[] input, int inputLength, int startPos) {
   111              super(fac, input, inputLength);
   112              this.startPos = startPos;
   113          }
   115          @Override
   116          protected void convertUnicode() {
   117              if (ch == '\\' && unicodeConversionBp != bp) {
   118                  bp++; ch = buf[bp]; col++;
   119                  if (ch == 'u') {
   120                      do {
   121                          bp++; ch = buf[bp]; col++;
   122                      } while (ch == 'u');
   123                      int limit = bp + 3;
   124                      if (limit < buflen) {
   125                          int d = digit(bp, 16);
   126                          int code = d;
   127                          while (bp < limit && d >= 0) {
   128                              bp++; ch = buf[bp]; col++;
   129                              d = digit(bp, 16);
   130                              code = (code << 4) + d;
   131                          }
   132                          if (d >= 0) {
   133                              ch = (char)code;
   134                              unicodeConversionBp = bp;
   135                              return;
   136                          }
   137                      }
   138                      // "illegal.Unicode.esc", reported by base scanner
   139                  } else {
   140                      bp--;
   141                      ch = '\\';
   142                      col--;
   143                  }
   144              }
   145          }
   147          @Override
   148          protected void scanCommentChar() {
   149              scanChar();
   150              if (ch == '\\') {
   151                  if (peekChar() == '\\' && !isUnicode()) {
   152                      putChar(ch, false);
   153                      bp++; col++;
   154                  } else {
   155                      convertUnicode();
   156                  }
   157              }
   158          }
   160          @Override
   161          protected void scanChar() {
   162              bp++;
   163              ch = buf[bp];
   164              switch (ch) {
   165              case '\r': // return
   166                  col = 0;
   167                  break;
   168              case '\n': // newline
   169                  if (bp == 0 || buf[bp-1] != '\r') {
   170                      col = 0;
   171                  }
   172                  break;
   173              case '\t': // tab
   174                  col = (col / TabInc * TabInc) + TabInc;
   175                  break;
   176              case '\\': // possible Unicode
   177                  col++;
   178                  convertUnicode();
   179                  break;
   180              default:
   181                  col++;
   182                  break;
   183              }
   184          }
   186          @Override
   187          public void putChar(char ch, boolean scan) {
   188              // At this point, bp is the position of the current character in buf,
   189              // and sp is the position in sbuf where this character will be put.
   190              // Record a new entry in pbuf if pbuf is empty or if sp and its
   191              // corresponding source position are not equidistant from the
   192              // corresponding values in the latest entry in the pbuf array.
   193              // (i.e. there is a discontinuity in the map function.)
   194              if ((pp == 0)
   195                      || (sp - pbuf[pp - 2] != (startPos + bp) - pbuf[pp - 1])) {
   196                  if (pp + 1 >= pbuf.length) {
   197                      int[] new_pbuf = new int[pbuf.length * 2];
   198                      System.arraycopy(pbuf, 0, new_pbuf, 0, pbuf.length);
   199                      pbuf = new_pbuf;
   200                  }
   201                  pbuf[pp] = sp;
   202                  pbuf[pp + 1] = startPos + bp;
   203                  pp += 2;
   204              }
   205              super.putChar(ch, scan);
   206          }
   207      }
   209      protected class JavadocComment extends JavaTokenizer.BasicComment<DocReader> {
   211         /**
   212         * Translated and stripped contents of doc comment
   213         */
   214         private String docComment = null;
   215         private int[] docPosns = null;
   217         JavadocComment(DocReader reader, CommentStyle cs) {
   218             super(reader, cs);
   219         }
   221         @Override
   222         public String getText() {
   223             if (!scanned && cs == CommentStyle.JAVADOC) {
   224                 scanDocComment();
   225             }
   226             return docComment;
   227         }
   229         @Override
   230         public int getSourcePos(int pos) {
   231             // Binary search to find the entry for which the string index is
   232             // less than pos. Since docPosns is a list of pairs of integers
   233             // we must make sure the index is always even.
   234             // If we find an exact match for pos, the other item in the pair
   235             // gives the source pos; otherwise, compute the source position
   236             // relative to the best match found in the array.
   237             if (pos == Position.NOPOS)
   238                 return Position.NOPOS;
   239             if (pos < 0 || pos > docComment.length())
   240                 throw new StringIndexOutOfBoundsException(String.valueOf(pos));
   241             if (docPosns == null)
   242                 return Position.NOPOS;
   243             int start = 0;
   244             int end = docPosns.length;
   245             while (start < end - 2) {
   246                 // find an even index midway between start and end
   247                 int index = ((start  + end) / 4) * 2;
   248                 if (docPosns[index] < pos)
   249                     start = index;
   250                 else if (docPosns[index] == pos)
   251                     return docPosns[index + 1];
   252                 else
   253                     end = index;
   254             }
   255             return docPosns[start + 1] + (pos - docPosns[start]);
   256         }
   258         @Override
   259         @SuppressWarnings("fallthrough")
   260         protected void scanDocComment() {
   261              try {
   262                  boolean firstLine = true;
   264                  // Skip over first slash
   265                  comment_reader.scanCommentChar();
   266                  // Skip over first star
   267                  comment_reader.scanCommentChar();
   269                  // consume any number of stars
   270                  while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
   271                      comment_reader.scanCommentChar();
   272                  }
   273                  // is the comment in the form /**/, /***/, /****/, etc. ?
   274                  if (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '/') {
   275                      docComment = "";
   276                      return;
   277                  }
   279                  // skip a newline on the first line of the comment.
   280                  if (comment_reader.bp < comment_reader.buflen) {
   281                      if (comment_reader.ch == LF) {
   282                          comment_reader.scanCommentChar();
   283                          firstLine = false;
   284                      } else if (comment_reader.ch == CR) {
   285                          comment_reader.scanCommentChar();
   286                          if (comment_reader.ch == LF) {
   287                              comment_reader.scanCommentChar();
   288                              firstLine = false;
   289                          }
   290                      }
   291                  }
   293              outerLoop:
   295                  // The outerLoop processes the doc comment, looping once
   296                  // for each line.  For each line, it first strips off
   297                  // whitespace, then it consumes any stars, then it
   298                  // puts the rest of the line into our buffer.
   299                  while (comment_reader.bp < comment_reader.buflen) {
   300                      int begin_bp = comment_reader.bp;
   301                      char begin_ch = comment_reader.ch;
   302                      // The wsLoop consumes whitespace from the beginning
   303                      // of each line.
   304                  wsLoop:
   306                      while (comment_reader.bp < comment_reader.buflen) {
   307                          switch(comment_reader.ch) {
   308                          case ' ':
   309                              comment_reader.scanCommentChar();
   310                              break;
   311                          case '\t':
   312                              comment_reader.col = ((comment_reader.col - 1) / TabInc * TabInc) + TabInc;
   313                              comment_reader.scanCommentChar();
   314                              break;
   315                          case FF:
   316                              comment_reader.col = 0;
   317                              comment_reader.scanCommentChar();
   318                              break;
   319          // Treat newline at beginning of line (blank line, no star)
   320          // as comment text.  Old Javadoc compatibility requires this.
   321          /*---------------------------------*
   322                          case CR: // (Spec 3.4)
   323                              doc_reader.scanCommentChar();
   324                              if (ch == LF) {
   325                                  col = 0;
   326                                  doc_reader.scanCommentChar();
   327                              }
   328                              break;
   329                          case LF: // (Spec 3.4)
   330                              doc_reader.scanCommentChar();
   331                              break;
   332          *---------------------------------*/
   333                          default:
   334                              // we've seen something that isn't whitespace;
   335                              // jump out.
   336                              break wsLoop;
   337                          }
   338                      }
   340                      // Are there stars here?  If so, consume them all
   341                      // and check for the end of comment.
   342                      if (comment_reader.ch == '*') {
   343                          // skip all of the stars
   344                          do {
   345                              comment_reader.scanCommentChar();
   346                          } while (comment_reader.ch == '*');
   348                          // check for the closing slash.
   349                          if (comment_reader.ch == '/') {
   350                              // We're done with the doc comment
   351                              // scanChar() and breakout.
   352                              break outerLoop;
   353                          }
   354                      } else if (! firstLine) {
   355                          // The current line does not begin with a '*' so we will
   356                          // treat it as comment
   357                          comment_reader.bp = begin_bp;
   358                          comment_reader.ch = begin_ch;
   359                      }
   360                      // The textLoop processes the rest of the characters
   361                      // on the line, adding them to our buffer.
   362                  textLoop:
   363                      while (comment_reader.bp < comment_reader.buflen) {
   364                          switch (comment_reader.ch) {
   365                          case '*':
   366                              // Is this just a star?  Or is this the
   367                              // end of a comment?
   368                              comment_reader.scanCommentChar();
   369                              if (comment_reader.ch == '/') {
   370                                  // This is the end of the comment,
   371                                  // set ch and return our buffer.
   372                                  break outerLoop;
   373                              }
   374                              // This is just an ordinary star.  Add it to
   375                              // the buffer.
   376                              comment_reader.putChar('*', false);
   377                              break;
   378                          case ' ':
   379                          case '\t':
   380                              comment_reader.putChar(comment_reader.ch, false);
   381                              comment_reader.scanCommentChar();
   382                              break;
   383                          case FF:
   384                              comment_reader.scanCommentChar();
   385                              break textLoop; // treat as end of line
   386                          case CR: // (Spec 3.4)
   387                              comment_reader.scanCommentChar();
   388                              if (comment_reader.ch != LF) {
   389                                  // Canonicalize CR-only line terminator to LF
   390                                  comment_reader.putChar((char)LF, false);
   391                                  break textLoop;
   392                              }
   393                              /* fall through to LF case */
   394                          case LF: // (Spec 3.4)
   395                              // We've seen a newline.  Add it to our
   396                              // buffer and break out of this loop,
   397                              // starting fresh on a new line.
   398                              comment_reader.putChar(comment_reader.ch, false);
   399                              comment_reader.scanCommentChar();
   400                              break textLoop;
   401                          default:
   402                              // Add the character to our buffer.
   403                              comment_reader.putChar(comment_reader.ch, false);
   404                              comment_reader.scanCommentChar();
   405                          }
   406                      } // end textLoop
   407                      firstLine = false;
   408                  } // end outerLoop
   410                  if (comment_reader.sp > 0) {
   411                      int i = comment_reader.sp - 1;
   412                  trailLoop:
   413                      while (i > -1) {
   414                          switch (comment_reader.sbuf[i]) {
   415                          case '*':
   416                              i--;
   417                              break;
   418                          default:
   419                              break trailLoop;
   420                          }
   421                      }
   422                      comment_reader.sp = i + 1;
   424                      // Store the text of the doc comment
   425                     docComment = comment_reader.chars();
   426                     docPosns = new int[comment_reader.pp];
   427                     System.arraycopy(comment_reader.pbuf, 0, docPosns, 0, docPosns.length);
   428                 } else {
   429                     docComment = "";
   430                 }
   431             } finally {
   432                 scanned = true;
   433                 comment_reader = null;
   434                 if (docComment != null &&
   435                         docComment.matches("(?sm).*^\\s*@deprecated( |$).*")) {
   436                     deprecatedFlag = true;
   437                 }
   438             }
   439         }
   440     }
   442     @Override
   443     public Position.LineMap getLineMap() {
   444         char[] buf = reader.getRawCharacters();
   445         return Position.makeLineMap(buf, buf.length, true);
   446     }
   447 }

mercurial