mcimadamore@1113: /* jjg@1281: * Copyright (c) 2004, 2012, Oracle and/or its affiliates. All rights reserved. mcimadamore@1113: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. mcimadamore@1113: * mcimadamore@1113: * This code is free software; you can redistribute it and/or modify it mcimadamore@1113: * under the terms of the GNU General Public License version 2 only, as mcimadamore@1113: * published by the Free Software Foundation. Oracle designates this mcimadamore@1113: * particular file as subject to the "Classpath" exception as provided mcimadamore@1113: * by Oracle in the LICENSE file that accompanied this code. mcimadamore@1113: * mcimadamore@1113: * This code is distributed in the hope that it will be useful, but WITHOUT mcimadamore@1113: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or mcimadamore@1113: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License mcimadamore@1113: * version 2 for more details (a copy is included in the LICENSE file that mcimadamore@1113: * accompanied this code). mcimadamore@1113: * mcimadamore@1113: * You should have received a copy of the GNU General Public License version mcimadamore@1113: * 2 along with this work; if not, write to the Free Software Foundation, mcimadamore@1113: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. mcimadamore@1113: * mcimadamore@1113: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA mcimadamore@1113: * or visit www.oracle.com if you need additional information or have any mcimadamore@1113: * questions. mcimadamore@1113: */ mcimadamore@1113: mcimadamore@1113: package com.sun.tools.javac.parser; mcimadamore@1113: mcimadamore@1125: import com.sun.tools.javac.parser.Tokens.Comment; mcimadamore@1125: import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; mcimadamore@1113: import com.sun.tools.javac.util.*; mcimadamore@1113: mcimadamore@1113: import java.nio.*; mcimadamore@1113: mcimadamore@1113: import static com.sun.tools.javac.util.LayoutCharacters.*; mcimadamore@1113: mcimadamore@1113: /** An extension to the base lexical analyzer that captures mcimadamore@1113: * and processes the contents of doc comments. It does so by mcimadamore@1113: * translating Unicode escape sequences and by stripping the mcimadamore@1113: * leading whitespace and starts from each line of the comment. mcimadamore@1113: * mcimadamore@1113: *

This is NOT part of any supported API. mcimadamore@1113: * If you write code that depends on this, you do so at your own risk. mcimadamore@1113: * This code and its internal interfaces are subject to change or mcimadamore@1113: * deletion without notice. mcimadamore@1113: */ mcimadamore@1113: public class JavadocTokenizer extends JavaTokenizer { mcimadamore@1113: mcimadamore@1113: /** Create a scanner from the input buffer. buffer must implement mcimadamore@1113: * array() and compact(), and remaining() must be less than limit(). mcimadamore@1113: */ mcimadamore@1113: protected JavadocTokenizer(ScannerFactory fac, CharBuffer buffer) { mcimadamore@1113: super(fac, buffer); mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: /** Create a scanner from the input array. The array must have at mcimadamore@1113: * least a single character of extra space. mcimadamore@1113: */ mcimadamore@1113: protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) { mcimadamore@1113: super(fac, input, inputLength); mcimadamore@1113: } mcimadamore@1113: mcimadamore@1125: @Override mcimadamore@1125: protected Comment processComment(int pos, int endPos, CommentStyle style) { mcimadamore@1125: char[] buf = reader.getRawCharacters(pos, endPos); jjg@1281: return new JavadocComment(new DocReader(fac, buf, buf.length, pos), style); mcimadamore@1125: } mcimadamore@1113: mcimadamore@1113: /** mcimadamore@1125: * This is a specialized version of UnicodeReader that keeps track of the jjg@1281: * column position within a given character stream (used for Javadoc processing), jjg@1281: * and which builds a table for mapping positions in the comment string to jjg@1281: * positions in the source file. mcimadamore@1113: */ jjg@1281: static class DocReader extends UnicodeReader { mcimadamore@1113: mcimadamore@1125: int col; jjg@1281: int startPos; mcimadamore@1113: jjg@1281: /** jjg@1281: * A buffer for building a table for mapping positions in {@link #sbuf} jjg@1281: * to positions in the source buffer. jjg@1281: * jjg@1281: * The array is organized as a series of pairs of integers: the first jjg@1281: * number in each pair specifies a position in the comment text, jjg@1281: * the second number in each pair specifies the corresponding position jjg@1281: * in the source buffer. The pairs are sorted in ascending order. jjg@1281: * jjg@1281: * Since the mapping function is generally continuous, with successive jjg@1281: * positions in the string corresponding to successive positions in the jjg@1281: * source buffer, the table only needs to record discontinuities in jjg@1281: * the mapping. The values of intermediate positions can be inferred. jjg@1281: * jjg@1281: * Discontinuities may occur in a number of places: when a newline jjg@1281: * is followed by whitespace and asterisks (which are ignored), jjg@1281: * when a tab is expanded into spaces, and when unicode escapes jjg@1281: * are used in the source buffer. jjg@1281: * jjg@1281: * Thus, to find the source position of any position, p, in the comment jjg@1281: * string, find the index, i, of the pair whose string offset jjg@1281: * ({@code pbuf[i] }) is closest to but not greater than p. Then, jjg@1281: * {@code sourcePos(p) = pbuf[i+1] + (p - pbuf[i]) }. jjg@1281: */ jjg@1281: int[] pbuf = new int[128]; jjg@1281: jjg@1281: /** jjg@1281: * The index of the next empty slot in the pbuf buffer. jjg@1281: */ jjg@1281: int pp = 0; jjg@1281: jjg@1281: DocReader(ScannerFactory fac, char[] input, int inputLength, int startPos) { mcimadamore@1125: super(fac, input, inputLength); jjg@1281: this.startPos = startPos; mcimadamore@1125: } mcimadamore@1113: mcimadamore@1125: @Override mcimadamore@1125: protected void convertUnicode() { mcimadamore@1125: if (ch == '\\' && unicodeConversionBp != bp) { mcimadamore@1125: bp++; ch = buf[bp]; col++; mcimadamore@1125: if (ch == 'u') { mcimadamore@1125: do { mcimadamore@1125: bp++; ch = buf[bp]; col++; mcimadamore@1125: } while (ch == 'u'); mcimadamore@1125: int limit = bp + 3; mcimadamore@1125: if (limit < buflen) { mcimadamore@1125: int d = digit(bp, 16); mcimadamore@1125: int code = d; mcimadamore@1125: while (bp < limit && d >= 0) { mcimadamore@1125: bp++; ch = buf[bp]; col++; mcimadamore@1125: d = digit(bp, 16); mcimadamore@1125: code = (code << 4) + d; mcimadamore@1125: } mcimadamore@1125: if (d >= 0) { mcimadamore@1125: ch = (char)code; mcimadamore@1125: unicodeConversionBp = bp; mcimadamore@1125: return; mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1125: // "illegal.Unicode.esc", reported by base scanner mcimadamore@1125: } else { mcimadamore@1125: bp--; mcimadamore@1125: ch = '\\'; mcimadamore@1125: col--; mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1113: mcimadamore@1125: @Override mcimadamore@1125: protected void scanCommentChar() { mcimadamore@1125: scanChar(); mcimadamore@1125: if (ch == '\\') { mcimadamore@1125: if (peekChar() == '\\' && !isUnicode()) { mcimadamore@1125: putChar(ch, false); mcimadamore@1125: bp++; col++; mcimadamore@1125: } else { mcimadamore@1125: convertUnicode(); mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1113: mcimadamore@1125: @Override mcimadamore@1125: protected void scanChar() { mcimadamore@1125: bp++; mcimadamore@1125: ch = buf[bp]; mcimadamore@1125: switch (ch) { mcimadamore@1125: case '\r': // return mcimadamore@1125: col = 0; mcimadamore@1125: break; mcimadamore@1125: case '\n': // newline mcimadamore@1125: if (bp == 0 || buf[bp-1] != '\r') { mcimadamore@1125: col = 0; mcimadamore@1125: } mcimadamore@1125: break; mcimadamore@1125: case '\t': // tab mcimadamore@1125: col = (col / TabInc * TabInc) + TabInc; mcimadamore@1125: break; mcimadamore@1125: case '\\': // possible Unicode mcimadamore@1125: col++; mcimadamore@1125: convertUnicode(); mcimadamore@1125: break; mcimadamore@1125: default: mcimadamore@1125: col++; mcimadamore@1125: break; mcimadamore@1125: } mcimadamore@1125: } jjg@1281: jjg@1281: @Override jjg@1281: public void putChar(char ch, boolean scan) { jjg@1281: // At this point, bp is the position of the current character in buf, jjg@1281: // and sp is the position in sbuf where this character will be put. jjg@1281: // Record a new entry in pbuf if pbuf is empty or if sp and its jjg@1281: // corresponding source position are not equidistant from the jjg@1281: // corresponding values in the latest entry in the pbuf array. jjg@1281: // (i.e. there is a discontinuity in the map function.) jjg@1281: if ((pp == 0) jjg@1281: || (sp - pbuf[pp - 2] != (startPos + bp) - pbuf[pp - 1])) { jjg@1281: if (pp + 1 >= pbuf.length) { jjg@1281: int[] new_pbuf = new int[pbuf.length * 2]; jjg@1281: System.arraycopy(pbuf, 0, new_pbuf, 0, pbuf.length); jjg@1281: pbuf = new_pbuf; jjg@1281: } jjg@1281: pbuf[pp] = sp; jjg@1281: pbuf[pp + 1] = startPos + bp; jjg@1281: pp += 2; jjg@1281: } jjg@1281: super.putChar(ch, scan); jjg@1281: } mcimadamore@1125: } mcimadamore@1125: jjg@1281: protected class JavadocComment extends JavaTokenizer.BasicComment { mcimadamore@1125: mcimadamore@1125: /** mcimadamore@1125: * Translated and stripped contents of doc comment mcimadamore@1125: */ mcimadamore@1125: private String docComment = null; jjg@1281: private int[] docPosns = null; mcimadamore@1125: jjg@1281: JavadocComment(DocReader reader, CommentStyle cs) { jjg@1281: super(reader, cs); mcimadamore@1113: } mcimadamore@1113: jjg@1281: @Override mcimadamore@1125: public String getText() { mcimadamore@1125: if (!scanned && cs == CommentStyle.JAVADOC) { mcimadamore@1125: scanDocComment(); mcimadamore@1125: } mcimadamore@1125: return docComment; mcimadamore@1125: } mcimadamore@1125: mcimadamore@1125: @Override jjg@1281: public int getSourcePos(int pos) { jjg@1281: // Binary search to find the entry for which the string index is jjg@1281: // less than pos. Since docPosns is a list of pairs of integers jjg@1281: // we must make sure the index is always even. jjg@1281: // If we find an exact match for pos, the other item in the pair jjg@1281: // gives the source pos; otherwise, compute the source position jjg@1281: // relative to the best match found in the array. jjg@1409: if (pos == Position.NOPOS) jjg@1409: return Position.NOPOS; jjg@1281: if (pos < 0 || pos >= docComment.length()) jjg@1409: throw new StringIndexOutOfBoundsException(String.valueOf(pos)); jjg@1281: if (docPosns == null) jjg@1409: return Position.NOPOS; jjg@1281: int start = 0; jjg@1281: int end = docPosns.length; jjg@1281: while (start < end - 2) { jjg@1281: // find an even index midway between start and end jjg@1281: int index = ((start + end) / 4) * 2; jjg@1281: if (docPosns[index] < pos) jjg@1281: start = index; jjg@1281: else if (docPosns[index] == pos) jjg@1281: return docPosns[index + 1]; jjg@1281: else jjg@1281: end = index; jjg@1281: } jjg@1281: return docPosns[start + 1] + (pos - docPosns[start]); jjg@1281: } jjg@1281: jjg@1281: @Override mcimadamore@1125: @SuppressWarnings("fallthrough") mcimadamore@1125: protected void scanDocComment() { mcimadamore@1125: try { mcimadamore@1125: boolean firstLine = true; mcimadamore@1125: mcimadamore@1125: // Skip over first slash mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: // Skip over first star mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: mcimadamore@1125: // consume any number of stars mcimadamore@1125: while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') { mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: } mcimadamore@1125: // is the comment in the form /**/, /***/, /****/, etc. ? mcimadamore@1125: if (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '/') { mcimadamore@1125: docComment = ""; mcimadamore@1125: return; mcimadamore@1125: } mcimadamore@1125: mcimadamore@1125: // skip a newline on the first line of the comment. mcimadamore@1125: if (comment_reader.bp < comment_reader.buflen) { mcimadamore@1125: if (comment_reader.ch == LF) { mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: firstLine = false; mcimadamore@1125: } else if (comment_reader.ch == CR) { mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: if (comment_reader.ch == LF) { mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: firstLine = false; mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1125: mcimadamore@1125: outerLoop: mcimadamore@1125: mcimadamore@1125: // The outerLoop processes the doc comment, looping once mcimadamore@1125: // for each line. For each line, it first strips off mcimadamore@1125: // whitespace, then it consumes any stars, then it mcimadamore@1125: // puts the rest of the line into our buffer. mcimadamore@1125: while (comment_reader.bp < comment_reader.buflen) { jjg@1281: int begin_bp = comment_reader.bp; jjg@1281: char begin_ch = comment_reader.ch; mcimadamore@1125: // The wsLoop consumes whitespace from the beginning mcimadamore@1125: // of each line. mcimadamore@1125: wsLoop: mcimadamore@1125: mcimadamore@1125: while (comment_reader.bp < comment_reader.buflen) { mcimadamore@1125: switch(comment_reader.ch) { mcimadamore@1125: case ' ': mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: break; mcimadamore@1125: case '\t': mcimadamore@1125: comment_reader.col = ((comment_reader.col - 1) / TabInc * TabInc) + TabInc; mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: break; mcimadamore@1125: case FF: mcimadamore@1125: comment_reader.col = 0; mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: break; mcimadamore@1125: // Treat newline at beginning of line (blank line, no star) mcimadamore@1125: // as comment text. Old Javadoc compatibility requires this. mcimadamore@1125: /*---------------------------------* mcimadamore@1125: case CR: // (Spec 3.4) mcimadamore@1125: doc_reader.scanCommentChar(); mcimadamore@1125: if (ch == LF) { mcimadamore@1125: col = 0; mcimadamore@1125: doc_reader.scanCommentChar(); mcimadamore@1125: } mcimadamore@1125: break; mcimadamore@1125: case LF: // (Spec 3.4) mcimadamore@1125: doc_reader.scanCommentChar(); mcimadamore@1125: break; mcimadamore@1125: *---------------------------------*/ mcimadamore@1125: default: mcimadamore@1125: // we've seen something that isn't whitespace; mcimadamore@1125: // jump out. mcimadamore@1125: break wsLoop; mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1125: mcimadamore@1125: // Are there stars here? If so, consume them all mcimadamore@1125: // and check for the end of comment. mcimadamore@1125: if (comment_reader.ch == '*') { mcimadamore@1125: // skip all of the stars mcimadamore@1125: do { mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: } while (comment_reader.ch == '*'); mcimadamore@1125: mcimadamore@1125: // check for the closing slash. mcimadamore@1125: if (comment_reader.ch == '/') { mcimadamore@1125: // We're done with the doc comment mcimadamore@1125: // scanChar() and breakout. mcimadamore@1125: break outerLoop; mcimadamore@1125: } mcimadamore@1125: } else if (! firstLine) { jjg@1281: // The current line does not begin with a '*' so we will jjg@1281: // treat it as comment jjg@1281: comment_reader.bp = begin_bp; jjg@1281: comment_reader.ch = begin_ch; mcimadamore@1125: } mcimadamore@1125: // The textLoop processes the rest of the characters mcimadamore@1125: // on the line, adding them to our buffer. mcimadamore@1125: textLoop: mcimadamore@1125: while (comment_reader.bp < comment_reader.buflen) { mcimadamore@1125: switch (comment_reader.ch) { mcimadamore@1125: case '*': mcimadamore@1125: // Is this just a star? Or is this the mcimadamore@1125: // end of a comment? mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: if (comment_reader.ch == '/') { mcimadamore@1125: // This is the end of the comment, mcimadamore@1125: // set ch and return our buffer. mcimadamore@1125: break outerLoop; mcimadamore@1125: } mcimadamore@1125: // This is just an ordinary star. Add it to mcimadamore@1125: // the buffer. mcimadamore@1125: comment_reader.putChar('*', false); mcimadamore@1125: break; mcimadamore@1125: case ' ': mcimadamore@1125: case '\t': mcimadamore@1125: comment_reader.putChar(comment_reader.ch, false); mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: break; mcimadamore@1125: case FF: mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: break textLoop; // treat as end of line mcimadamore@1125: case CR: // (Spec 3.4) mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: if (comment_reader.ch != LF) { mcimadamore@1125: // Canonicalize CR-only line terminator to LF mcimadamore@1125: comment_reader.putChar((char)LF, false); mcimadamore@1125: break textLoop; mcimadamore@1125: } mcimadamore@1125: /* fall through to LF case */ mcimadamore@1125: case LF: // (Spec 3.4) mcimadamore@1125: // We've seen a newline. Add it to our mcimadamore@1125: // buffer and break out of this loop, mcimadamore@1125: // starting fresh on a new line. mcimadamore@1125: comment_reader.putChar(comment_reader.ch, false); mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: break textLoop; mcimadamore@1125: default: mcimadamore@1125: // Add the character to our buffer. mcimadamore@1125: comment_reader.putChar(comment_reader.ch, false); mcimadamore@1125: comment_reader.scanCommentChar(); mcimadamore@1125: } mcimadamore@1125: } // end textLoop mcimadamore@1125: firstLine = false; mcimadamore@1125: } // end outerLoop mcimadamore@1125: mcimadamore@1125: if (comment_reader.sp > 0) { mcimadamore@1125: int i = comment_reader.sp - 1; mcimadamore@1125: trailLoop: mcimadamore@1125: while (i > -1) { mcimadamore@1125: switch (comment_reader.sbuf[i]) { mcimadamore@1125: case '*': mcimadamore@1125: i--; mcimadamore@1125: break; mcimadamore@1125: default: mcimadamore@1125: break trailLoop; mcimadamore@1125: } mcimadamore@1125: } mcimadamore@1125: comment_reader.sp = i + 1; mcimadamore@1125: mcimadamore@1125: // Store the text of the doc comment mcimadamore@1125: docComment = comment_reader.chars(); jjg@1281: docPosns = new int[comment_reader.pp]; jjg@1281: System.arraycopy(comment_reader.pbuf, 0, docPosns, 0, docPosns.length); mcimadamore@1125: } else { mcimadamore@1125: docComment = ""; mcimadamore@1113: } mcimadamore@1125: } finally { mcimadamore@1125: scanned = true; jjg@1282: comment_reader = null; mcimadamore@1125: if (docComment != null && mcimadamore@1125: docComment.matches("(?sm).*^\\s*@deprecated( |$).*")) { mcimadamore@1125: deprecatedFlag = true; mcimadamore@1125: } mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: } mcimadamore@1113: mcimadamore@1113: @Override mcimadamore@1113: public Position.LineMap getLineMap() { mcimadamore@1113: char[] buf = reader.getRawCharacters(); mcimadamore@1113: return Position.makeLineMap(buf, buf.length, true); mcimadamore@1113: } mcimadamore@1113: }