duke@1: /* duke@1: * Copyright 2004-2006 Sun Microsystems, Inc. All Rights Reserved. duke@1: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. duke@1: * duke@1: * This code is free software; you can redistribute it and/or modify it duke@1: * under the terms of the GNU General Public License version 2 only, as duke@1: * published by the Free Software Foundation. Sun designates this duke@1: * particular file as subject to the "Classpath" exception as provided duke@1: * by Sun in the LICENSE file that accompanied this code. duke@1: * duke@1: * This code is distributed in the hope that it will be useful, but WITHOUT duke@1: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or duke@1: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License duke@1: * version 2 for more details (a copy is included in the LICENSE file that duke@1: * accompanied this code). duke@1: * duke@1: * You should have received a copy of the GNU General Public License version duke@1: * 2 along with this work; if not, write to the Free Software Foundation, duke@1: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. duke@1: * duke@1: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, duke@1: * CA 95054 USA or visit www.sun.com if you need additional information or duke@1: * have any questions. duke@1: */ duke@1: duke@1: package com.sun.tools.javac.parser; duke@1: duke@1: import java.nio.*; duke@1: duke@1: import com.sun.tools.javac.util.*; duke@1: import static com.sun.tools.javac.util.LayoutCharacters.*; duke@1: duke@1: /** An extension to the base lexical analyzer that captures duke@1: * and processes the contents of doc comments. It does so by duke@1: * translating Unicode escape sequences and by stripping the duke@1: * leading whitespace and starts from each line of the comment. duke@1: * duke@1: *

This is NOT part of any API supported by Sun Microsystems. If duke@1: * you write code that depends on this, you do so at your own risk. duke@1: * This code and its internal interfaces are subject to change or duke@1: * deletion without notice. duke@1: */ duke@1: public class DocCommentScanner extends Scanner { duke@1: duke@1: /** A factory for creating scanners. */ duke@1: public static class Factory extends Scanner.Factory { duke@1: duke@1: public static void preRegister(final Context context) { duke@1: context.put(scannerFactoryKey, new Context.Factory() { duke@1: public Factory make() { duke@1: return new Factory(context); duke@1: } duke@1: }); duke@1: } duke@1: duke@1: /** Create a new scanner factory. */ duke@1: protected Factory(Context context) { duke@1: super(context); duke@1: } duke@1: duke@1: @Override duke@1: public Scanner newScanner(CharSequence input) { duke@1: if (input instanceof CharBuffer) { duke@1: return new DocCommentScanner(this, (CharBuffer)input); duke@1: } else { duke@1: char[] array = input.toString().toCharArray(); duke@1: return newScanner(array, array.length); duke@1: } duke@1: } duke@1: duke@1: @Override duke@1: public Scanner newScanner(char[] input, int inputLength) { duke@1: return new DocCommentScanner(this, input, inputLength); duke@1: } duke@1: } duke@1: duke@1: duke@1: /** Create a scanner from the input buffer. buffer must implement duke@1: * array() and compact(), and remaining() must be less than limit(). duke@1: */ duke@1: protected DocCommentScanner(Factory fac, CharBuffer buffer) { duke@1: super(fac, buffer); duke@1: } duke@1: duke@1: /** Create a scanner from the input array. The array must have at duke@1: * least a single character of extra space. duke@1: */ duke@1: protected DocCommentScanner(Factory fac, char[] input, int inputLength) { duke@1: super(fac, input, inputLength); duke@1: } duke@1: duke@1: /** Starting position of the comment in original source duke@1: */ duke@1: private int pos; duke@1: duke@1: /** The comment input buffer, index of next chacter to be read, duke@1: * index of one past last character in buffer. duke@1: */ duke@1: private char[] buf; duke@1: private int bp; duke@1: private int buflen; duke@1: duke@1: /** The current character. duke@1: */ duke@1: private char ch; duke@1: duke@1: /** The column number position of the current character. duke@1: */ duke@1: private int col; duke@1: duke@1: /** The buffer index of the last converted Unicode character duke@1: */ duke@1: private int unicodeConversionBp = 0; duke@1: duke@1: /** duke@1: * Buffer for doc comment. duke@1: */ duke@1: private char[] docCommentBuffer = new char[1024]; duke@1: duke@1: /** duke@1: * Number of characters in doc comment buffer. duke@1: */ duke@1: private int docCommentCount; duke@1: duke@1: /** duke@1: * Translated and stripped contents of doc comment duke@1: */ duke@1: private String docComment = null; duke@1: duke@1: duke@1: /** Unconditionally expand the comment buffer. duke@1: */ duke@1: private void expandCommentBuffer() { duke@1: char[] newBuffer = new char[docCommentBuffer.length * 2]; duke@1: System.arraycopy(docCommentBuffer, 0, newBuffer, duke@1: 0, docCommentBuffer.length); duke@1: docCommentBuffer = newBuffer; duke@1: } duke@1: duke@1: /** Convert an ASCII digit from its base (8, 10, or 16) duke@1: * to its value. duke@1: */ duke@1: private int digit(int base) { duke@1: char c = ch; duke@1: int result = Character.digit(c, base); duke@1: if (result >= 0 && c > 0x7f) { duke@1: ch = "0123456789abcdef".charAt(result); duke@1: } duke@1: return result; duke@1: } duke@1: duke@1: /** Convert Unicode escape; bp points to initial '\' character duke@1: * (Spec 3.3). duke@1: */ duke@1: private void convertUnicode() { duke@1: if (ch == '\\' && unicodeConversionBp != bp) { duke@1: bp++; ch = buf[bp]; col++; duke@1: if (ch == 'u') { duke@1: do { duke@1: bp++; ch = buf[bp]; col++; duke@1: } while (ch == 'u'); duke@1: int limit = bp + 3; duke@1: if (limit < buflen) { duke@1: int d = digit(16); duke@1: int code = d; duke@1: while (bp < limit && d >= 0) { duke@1: bp++; ch = buf[bp]; col++; duke@1: d = digit(16); duke@1: code = (code << 4) + d; duke@1: } duke@1: if (d >= 0) { duke@1: ch = (char)code; duke@1: unicodeConversionBp = bp; duke@1: return; duke@1: } duke@1: } duke@1: // "illegal.Unicode.esc", reported by base scanner duke@1: } else { duke@1: bp--; duke@1: ch = '\\'; duke@1: col--; duke@1: } duke@1: } duke@1: } duke@1: duke@1: duke@1: /** Read next character. duke@1: */ duke@1: private void scanChar() { duke@1: bp++; duke@1: ch = buf[bp]; duke@1: switch (ch) { duke@1: case '\r': // return duke@1: col = 0; duke@1: break; duke@1: case '\n': // newline duke@1: if (bp == 0 || buf[bp-1] != '\r') { duke@1: col = 0; duke@1: } duke@1: break; duke@1: case '\t': // tab duke@1: col = (col / TabInc * TabInc) + TabInc; duke@1: break; duke@1: case '\\': // possible Unicode duke@1: col++; duke@1: convertUnicode(); duke@1: break; duke@1: default: duke@1: col++; duke@1: break; duke@1: } duke@1: } duke@1: duke@1: /** duke@1: * Read next character in doc comment, skipping over double '\' characters. duke@1: * If a double '\' is skipped, put in the buffer and update buffer count. duke@1: */ duke@1: private void scanDocCommentChar() { duke@1: scanChar(); duke@1: if (ch == '\\') { duke@1: if (buf[bp+1] == '\\' && unicodeConversionBp != bp) { duke@1: if (docCommentCount == docCommentBuffer.length) duke@1: expandCommentBuffer(); duke@1: docCommentBuffer[docCommentCount++] = ch; duke@1: bp++; col++; duke@1: } else { duke@1: convertUnicode(); duke@1: } duke@1: } duke@1: } duke@1: duke@1: /* Reset doc comment before reading each new token duke@1: */ duke@1: public void nextToken() { duke@1: docComment = null; duke@1: super.nextToken(); duke@1: } duke@1: duke@1: /** duke@1: * Returns the documentation string of the current token. duke@1: */ duke@1: public String docComment() { duke@1: return docComment; duke@1: } duke@1: duke@1: /** duke@1: * Process a doc comment and make the string content available. duke@1: * Strips leading whitespace and stars. duke@1: */ duke@1: @SuppressWarnings("fallthrough") duke@1: protected void processComment(CommentStyle style) { duke@1: if (style != CommentStyle.JAVADOC) { duke@1: return; duke@1: } duke@1: duke@1: pos = pos(); duke@1: buf = getRawCharacters(pos, endPos()); duke@1: buflen = buf.length; duke@1: bp = 0; duke@1: col = 0; duke@1: duke@1: docCommentCount = 0; duke@1: duke@1: boolean firstLine = true; duke@1: duke@1: // Skip over first slash duke@1: scanDocCommentChar(); duke@1: // Skip over first star duke@1: scanDocCommentChar(); duke@1: duke@1: // consume any number of stars duke@1: while (bp < buflen && ch == '*') { duke@1: scanDocCommentChar(); duke@1: } duke@1: // is the comment in the form /**/, /***/, /****/, etc. ? duke@1: if (bp < buflen && ch == '/') { duke@1: docComment = ""; duke@1: return; duke@1: } duke@1: duke@1: // skip a newline on the first line of the comment. duke@1: if (bp < buflen) { duke@1: if (ch == LF) { duke@1: scanDocCommentChar(); duke@1: firstLine = false; duke@1: } else if (ch == CR) { duke@1: scanDocCommentChar(); duke@1: if (ch == LF) { duke@1: scanDocCommentChar(); duke@1: firstLine = false; duke@1: } duke@1: } duke@1: } duke@1: duke@1: outerLoop: duke@1: duke@1: // The outerLoop processes the doc comment, looping once duke@1: // for each line. For each line, it first strips off duke@1: // whitespace, then it consumes any stars, then it duke@1: // puts the rest of the line into our buffer. duke@1: while (bp < buflen) { duke@1: duke@1: // The wsLoop consumes whitespace from the beginning duke@1: // of each line. duke@1: wsLoop: duke@1: duke@1: while (bp < buflen) { duke@1: switch(ch) { duke@1: case ' ': duke@1: scanDocCommentChar(); duke@1: break; duke@1: case '\t': duke@1: col = ((col - 1) / TabInc * TabInc) + TabInc; duke@1: scanDocCommentChar(); duke@1: break; duke@1: case FF: duke@1: col = 0; duke@1: scanDocCommentChar(); duke@1: break; duke@1: // Treat newline at beginning of line (blank line, no star) duke@1: // as comment text. Old Javadoc compatibility requires this. duke@1: /*---------------------------------* duke@1: case CR: // (Spec 3.4) duke@1: scanDocCommentChar(); duke@1: if (ch == LF) { duke@1: col = 0; duke@1: scanDocCommentChar(); duke@1: } duke@1: break; duke@1: case LF: // (Spec 3.4) duke@1: scanDocCommentChar(); duke@1: break; duke@1: *---------------------------------*/ duke@1: default: duke@1: // we've seen something that isn't whitespace; duke@1: // jump out. duke@1: break wsLoop; duke@1: } duke@1: } duke@1: duke@1: // Are there stars here? If so, consume them all duke@1: // and check for the end of comment. duke@1: if (ch == '*') { duke@1: // skip all of the stars duke@1: do { duke@1: scanDocCommentChar(); duke@1: } while (ch == '*'); duke@1: duke@1: // check for the closing slash. duke@1: if (ch == '/') { duke@1: // We're done with the doc comment duke@1: // scanChar() and breakout. duke@1: break outerLoop; duke@1: } duke@1: } else if (! firstLine) { duke@1: //The current line does not begin with a '*' so we will indent it. duke@1: for (int i = 1; i < col; i++) { duke@1: if (docCommentCount == docCommentBuffer.length) duke@1: expandCommentBuffer(); duke@1: docCommentBuffer[docCommentCount++] = ' '; duke@1: } duke@1: } duke@1: duke@1: // The textLoop processes the rest of the characters duke@1: // on the line, adding them to our buffer. duke@1: textLoop: duke@1: while (bp < buflen) { duke@1: switch (ch) { duke@1: case '*': duke@1: // Is this just a star? Or is this the duke@1: // end of a comment? duke@1: scanDocCommentChar(); duke@1: if (ch == '/') { duke@1: // This is the end of the comment, duke@1: // set ch and return our buffer. duke@1: break outerLoop; duke@1: } duke@1: // This is just an ordinary star. Add it to duke@1: // the buffer. duke@1: if (docCommentCount == docCommentBuffer.length) duke@1: expandCommentBuffer(); duke@1: docCommentBuffer[docCommentCount++] = '*'; duke@1: break; duke@1: case ' ': duke@1: case '\t': duke@1: if (docCommentCount == docCommentBuffer.length) duke@1: expandCommentBuffer(); duke@1: docCommentBuffer[docCommentCount++] = ch; duke@1: scanDocCommentChar(); duke@1: break; duke@1: case FF: duke@1: scanDocCommentChar(); duke@1: break textLoop; // treat as end of line duke@1: case CR: // (Spec 3.4) duke@1: scanDocCommentChar(); duke@1: if (ch != LF) { duke@1: // Canonicalize CR-only line terminator to LF duke@1: if (docCommentCount == docCommentBuffer.length) duke@1: expandCommentBuffer(); duke@1: docCommentBuffer[docCommentCount++] = (char)LF; duke@1: break textLoop; duke@1: } duke@1: /* fall through to LF case */ duke@1: case LF: // (Spec 3.4) duke@1: // We've seen a newline. Add it to our duke@1: // buffer and break out of this loop, duke@1: // starting fresh on a new line. duke@1: if (docCommentCount == docCommentBuffer.length) duke@1: expandCommentBuffer(); duke@1: docCommentBuffer[docCommentCount++] = ch; duke@1: scanDocCommentChar(); duke@1: break textLoop; duke@1: default: duke@1: // Add the character to our buffer. duke@1: if (docCommentCount == docCommentBuffer.length) duke@1: expandCommentBuffer(); duke@1: docCommentBuffer[docCommentCount++] = ch; duke@1: scanDocCommentChar(); duke@1: } duke@1: } // end textLoop duke@1: firstLine = false; duke@1: } // end outerLoop duke@1: duke@1: if (docCommentCount > 0) { duke@1: int i = docCommentCount - 1; duke@1: trailLoop: duke@1: while (i > -1) { duke@1: switch (docCommentBuffer[i]) { duke@1: case '*': duke@1: i--; duke@1: break; duke@1: default: duke@1: break trailLoop; duke@1: } duke@1: } duke@1: docCommentCount = i + 1; duke@1: duke@1: // Store the text of the doc comment duke@1: docComment = new String(docCommentBuffer, 0 , docCommentCount); duke@1: } else { duke@1: docComment = ""; duke@1: } duke@1: } duke@1: duke@1: /** Build a map for translating between line numbers and duke@1: * positions in the input. duke@1: * duke@1: * @return a LineMap */ duke@1: public Position.LineMap getLineMap() { duke@1: char[] buf = getRawCharacters(); duke@1: return Position.makeLineMap(buf, buf.length, true); duke@1: } duke@1: }