jdk8-mips64-public/langtools: src/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java@af8417e590f4

     1 /*

     2  * Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Oracle designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Oracle in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    22  * or visit www.oracle.com if you need additional information or have any

    23  * questions.

    24  */

    26 package com.sun.tools.javac.parser;

    28 import com.sun.tools.javac.parser.Tokens.Comment;

    29 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;

    30 import com.sun.tools.javac.util.*;

    32 import java.nio.*;

    34 import static com.sun.tools.javac.util.LayoutCharacters.*;

    36 /** An extension to the base lexical analyzer that captures

    37  *  and processes the contents of doc comments.  It does so by

    38  *  translating Unicode escape sequences and by stripping the

    39  *  leading whitespace and starts from each line of the comment.

    40  *

    41  *  <p><b>This is NOT part of any supported API.

    42  *  If you write code that depends on this, you do so at your own risk.

    43  *  This code and its internal interfaces are subject to change or

    44  *  deletion without notice.</b>

    45  */

    46 public class JavadocTokenizer extends JavaTokenizer {

    48     /** Create a scanner from the input buffer.  buffer must implement

    49      *  array() and compact(), and remaining() must be less than limit().

    50      */

    51     protected JavadocTokenizer(ScannerFactory fac, CharBuffer buffer) {

    52         super(fac, buffer);

    53     }

    55     /** Create a scanner from the input array.  The array must have at

    56      *  least a single character of extra space.

    57      */

    58     protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) {

    59         super(fac, input, inputLength);

    60     }

    62     @Override

    63     protected Comment processComment(int pos, int endPos, CommentStyle style) {

    64         char[] buf = reader.getRawCharacters(pos, endPos);

    65         return new JavadocComment(new DocReader(fac, buf, buf.length, pos), style);

    66     }

    68     /**

    69      * This is a specialized version of UnicodeReader that keeps track of the

    70      * column position within a given character stream (used for Javadoc processing),

    71      * and which builds a table for mapping positions in the comment string to

    72      * positions in the source file.

    73      */

    74     static class DocReader extends UnicodeReader {

    76          int col;

    77          int startPos;

    79          /**

    80           * A buffer for building a table for mapping positions in {@link #sbuf}

    81           * to positions in the source buffer.

    82           *

    83           * The array is organized as a series of pairs of integers: the first

    84           * number in each pair specifies a position in the comment text,

    85           * the second number in each pair specifies the corresponding position

    86           * in the source buffer. The pairs are sorted in ascending order.

    87           *

    88           * Since the mapping function is generally continuous, with successive

    89           * positions in the string corresponding to successive positions in the

    90           * source buffer, the table only needs to record discontinuities in

    91           * the mapping. The values of intermediate positions can be inferred.

    92           *

    93           * Discontinuities may occur in a number of places: when a newline

    94           * is followed by whitespace and asterisks (which are ignored),

    95           * when a tab is expanded into spaces, and when unicode escapes

    96           * are used in the source buffer.

    97           *

    98           * Thus, to find the source position of any position, p, in the comment

    99           * string, find the index, i, of the pair whose string offset

   100           * ({@code pbuf[i] }) is closest to but not greater than p. Then,

   101           * {@code sourcePos(p) = pbuf[i+1] + (p - pbuf[i]) }.

   102           */

   103          int[] pbuf = new int[128];

   105          /**

   106           * The index of the next empty slot in the pbuf buffer.

   107           */

   108          int pp = 0;

   110          DocReader(ScannerFactory fac, char[] input, int inputLength, int startPos) {

   111              super(fac, input, inputLength);

   112              this.startPos = startPos;

   113          }

   115          @Override

   116          protected void convertUnicode() {

   117              if (ch == '\\' && unicodeConversionBp != bp) {

   118                  bp++; ch = buf[bp]; col++;

   119                  if (ch == 'u') {

   120                      do {

   121                          bp++; ch = buf[bp]; col++;

   122                      } while (ch == 'u');

   123                      int limit = bp + 3;

   124                      if (limit < buflen) {

   125                          int d = digit(bp, 16);

   126                          int code = d;

   127                          while (bp < limit && d >= 0) {

   128                              bp++; ch = buf[bp]; col++;

   129                              d = digit(bp, 16);

   130                              code = (code << 4) + d;

   131                          }

   132                          if (d >= 0) {

   133                              ch = (char)code;

   134                              unicodeConversionBp = bp;

   135                              return;

   136                          }

   137                      }

   138                      // "illegal.Unicode.esc", reported by base scanner

   139                  } else {

   140                      bp--;

   141                      ch = '\\';

   142                      col--;

   143                  }

   144              }

   145          }

   147          @Override

   148          protected void scanCommentChar() {

   149              scanChar();

   150              if (ch == '\\') {

   151                  if (peekChar() == '\\' && !isUnicode()) {

   152                      putChar(ch, false);

   153                      bp++; col++;

   154                  } else {

   155                      convertUnicode();

   156                  }

   157              }

   158          }

   160          @Override

   161          protected void scanChar() {

   162              bp++;

   163              ch = buf[bp];

   164              switch (ch) {

   165              case '\r': // return

   166                  col = 0;

   167                  break;

   168              case '\n': // newline

   169                  if (bp == 0 || buf[bp-1] != '\r') {

   170                      col = 0;

   171                  }

   172                  break;

   173              case '\t': // tab

   174                  col = (col / TabInc * TabInc) + TabInc;

   175                  break;

   176              case '\\': // possible Unicode

   177                  col++;

   178                  convertUnicode();

   179                  break;

   180              default:

   181                  col++;

   182                  break;

   183              }

   184          }

   186          @Override

   187          public void putChar(char ch, boolean scan) {

   188              // At this point, bp is the position of the current character in buf,

   189              // and sp is the position in sbuf where this character will be put.

   190              // Record a new entry in pbuf if pbuf is empty or if sp and its

   191              // corresponding source position are not equidistant from the

   192              // corresponding values in the latest entry in the pbuf array.

   193              // (i.e. there is a discontinuity in the map function.)

   194              if ((pp == 0)

   195                      || (sp - pbuf[pp - 2] != (startPos + bp) - pbuf[pp - 1])) {

   196                  if (pp + 1 >= pbuf.length) {

   197                      int[] new_pbuf = new int[pbuf.length * 2];

   198                      System.arraycopy(pbuf, 0, new_pbuf, 0, pbuf.length);

   199                      pbuf = new_pbuf;

   200                  }

   201                  pbuf[pp] = sp;

   202                  pbuf[pp + 1] = startPos + bp;

   203                  pp += 2;

   204              }

   205              super.putChar(ch, scan);

   206          }

   207      }

   209      protected class JavadocComment extends JavaTokenizer.BasicComment<DocReader> {

   211         /**

   212         * Translated and stripped contents of doc comment

   213         */

   214         private String docComment = null;

   215         private int[] docPosns = null;

   217         JavadocComment(DocReader reader, CommentStyle cs) {

   218             super(reader, cs);

   219         }

   221         @Override

   222         public String getText() {

   223             if (!scanned && cs == CommentStyle.JAVADOC) {

   224                 scanDocComment();

   225             }

   226             return docComment;

   227         }

   229         @Override

   230         public int getSourcePos(int pos) {

   231             // Binary search to find the entry for which the string index is

   232             // less than pos. Since docPosns is a list of pairs of integers

   233             // we must make sure the index is always even.

   234             // If we find an exact match for pos, the other item in the pair

   235             // gives the source pos; otherwise, compute the source position

   236             // relative to the best match found in the array.

   237             if (pos == Position.NOPOS)

   238                 return Position.NOPOS;

   239             if (pos < 0 || pos > docComment.length())

   240                 throw new StringIndexOutOfBoundsException(String.valueOf(pos));

   241             if (docPosns == null)

   242                 return Position.NOPOS;

   243             int start = 0;

   244             int end = docPosns.length;

   245             while (start < end - 2) {

   246                 // find an even index midway between start and end

   247                 int index = ((start  + end) / 4) * 2;

   248                 if (docPosns[index] < pos)

   249                     start = index;

   250                 else if (docPosns[index] == pos)

   251                     return docPosns[index + 1];

   252                 else

   253                     end = index;

   254             }

   255             return docPosns[start + 1] + (pos - docPosns[start]);

   256         }

   258         @Override

   259         @SuppressWarnings("fallthrough")

   260         protected void scanDocComment() {

   261              try {

   262                  boolean firstLine = true;

   264                  // Skip over first slash

   265                  comment_reader.scanCommentChar();

   266                  // Skip over first star

   267                  comment_reader.scanCommentChar();

   269                  // consume any number of stars

   270                  while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {

   271                      comment_reader.scanCommentChar();

   272                  }

   273                  // is the comment in the form /**/, /***/, /****/, etc. ?

   274                  if (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '/') {

   275                      docComment = "";

   276                      return;

   277                  }

   279                  // skip a newline on the first line of the comment.

   280                  if (comment_reader.bp < comment_reader.buflen) {

   281                      if (comment_reader.ch == LF) {

   282                          comment_reader.scanCommentChar();

   283                          firstLine = false;

   284                      } else if (comment_reader.ch == CR) {

   285                          comment_reader.scanCommentChar();

   286                          if (comment_reader.ch == LF) {

   287                              comment_reader.scanCommentChar();

   288                              firstLine = false;

   289                          }

   290                      }

   291                  }

   293              outerLoop:

   295                  // The outerLoop processes the doc comment, looping once

   296                  // for each line.  For each line, it first strips off

   297                  // whitespace, then it consumes any stars, then it

   298                  // puts the rest of the line into our buffer.

   299                  while (comment_reader.bp < comment_reader.buflen) {

   300                      int begin_bp = comment_reader.bp;

   301                      char begin_ch = comment_reader.ch;

   302                      // The wsLoop consumes whitespace from the beginning

   303                      // of each line.

   304                  wsLoop:

   306                      while (comment_reader.bp < comment_reader.buflen) {

   307                          switch(comment_reader.ch) {

   308                          case ' ':

   309                              comment_reader.scanCommentChar();

   310                              break;

   311                          case '\t':

   312                              comment_reader.col = ((comment_reader.col - 1) / TabInc * TabInc) + TabInc;

   313                              comment_reader.scanCommentChar();

   314                              break;

   315                          case FF:

   316                              comment_reader.col = 0;

   317                              comment_reader.scanCommentChar();

   318                              break;

   319          // Treat newline at beginning of line (blank line, no star)

   320          // as comment text.  Old Javadoc compatibility requires this.

   321          /*---------------------------------*

   322                          case CR: // (Spec 3.4)

   323                              doc_reader.scanCommentChar();

   324                              if (ch == LF) {

   325                                  col = 0;

   326                                  doc_reader.scanCommentChar();

   327                              }

   328                              break;

   329                          case LF: // (Spec 3.4)

   330                              doc_reader.scanCommentChar();

   331                              break;

   332          *---------------------------------*/

   333                          default:

   334                              // we've seen something that isn't whitespace;

   335                              // jump out.

   336                              break wsLoop;

   337                          }

   338                      }

   340                      // Are there stars here?  If so, consume them all

   341                      // and check for the end of comment.

   342                      if (comment_reader.ch == '*') {

   343                          // skip all of the stars

   344                          do {

   345                              comment_reader.scanCommentChar();

   346                          } while (comment_reader.ch == '*');

   348                          // check for the closing slash.

   349                          if (comment_reader.ch == '/') {

   350                              // We're done with the doc comment

   351                              // scanChar() and breakout.

   352                              break outerLoop;

   353                          }

   354                      } else if (! firstLine) {

   355                          // The current line does not begin with a '*' so we will

   356                          // treat it as comment

   357                          comment_reader.bp = begin_bp;

   358                          comment_reader.ch = begin_ch;

   359                      }

   360                      // The textLoop processes the rest of the characters

   361                      // on the line, adding them to our buffer.

   362                  textLoop:

   363                      while (comment_reader.bp < comment_reader.buflen) {

   364                          switch (comment_reader.ch) {

   365                          case '*':

   366                              // Is this just a star?  Or is this the

   367                              // end of a comment?

   368                              comment_reader.scanCommentChar();

   369                              if (comment_reader.ch == '/') {

   370                                  // This is the end of the comment,

   371                                  // set ch and return our buffer.

   372                                  break outerLoop;

   373                              }

   374                              // This is just an ordinary star.  Add it to

   375                              // the buffer.

   376                              comment_reader.putChar('*', false);

   377                              break;

   378                          case ' ':

   379                          case '\t':

   380                              comment_reader.putChar(comment_reader.ch, false);

   381                              comment_reader.scanCommentChar();

   382                              break;

   383                          case FF:

   384                              comment_reader.scanCommentChar();

   385                              break textLoop; // treat as end of line

   386                          case CR: // (Spec 3.4)

   387                              comment_reader.scanCommentChar();

   388                              if (comment_reader.ch != LF) {

   389                                  // Canonicalize CR-only line terminator to LF

   390                                  comment_reader.putChar((char)LF, false);

   391                                  break textLoop;

   392                              }

   393                              /* fall through to LF case */

   394                          case LF: // (Spec 3.4)

   395                              // We've seen a newline.  Add it to our

   396                              // buffer and break out of this loop,

   397                              // starting fresh on a new line.

   398                              comment_reader.putChar(comment_reader.ch, false);

   399                              comment_reader.scanCommentChar();

   400                              break textLoop;

   401                          default:

   402                              // Add the character to our buffer.

   403                              comment_reader.putChar(comment_reader.ch, false);

   404                              comment_reader.scanCommentChar();

   405                          }

   406                      } // end textLoop

   407                      firstLine = false;

   408                  } // end outerLoop

   410                  if (comment_reader.sp > 0) {

   411                      int i = comment_reader.sp - 1;

   412                  trailLoop:

   413                      while (i > -1) {

   414                          switch (comment_reader.sbuf[i]) {

   415                          case '*':

   416                              i--;

   417                              break;

   418                          default:

   419                              break trailLoop;

   420                          }

   421                      }

   422                      comment_reader.sp = i + 1;

   424                      // Store the text of the doc comment

   425                     docComment = comment_reader.chars();

   426                     docPosns = new int[comment_reader.pp];

   427                     System.arraycopy(comment_reader.pbuf, 0, docPosns, 0, docPosns.length);

   428                 } else {

   429                     docComment = "";

   430                 }

   431             } finally {

   432                 scanned = true;

   433                 comment_reader = null;

   434                 if (docComment != null &&

   435                         docComment.matches("(?sm).*^\\s*@deprecated( |$).*")) {

   436                     deprecatedFlag = true;

   437                 }

   438             }

   439         }

   440     }

   442     @Override

   443     public Position.LineMap getLineMap() {

   444         char[] buf = reader.getRawCharacters();

   445         return Position.makeLineMap(buf, buf.length, true);

   446     }

   447 }

Mercurial > jdk8-mips64-public > langtools / file revision

src/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java@af8417e590f4

src/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java