duke@1: /*
xdono@117: * Copyright 2004-2008 Sun Microsystems, Inc. All Rights Reserved.
duke@1: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
duke@1: *
duke@1: * This code is free software; you can redistribute it and/or modify it
duke@1: * under the terms of the GNU General Public License version 2 only, as
duke@1: * published by the Free Software Foundation. Sun designates this
duke@1: * particular file as subject to the "Classpath" exception as provided
duke@1: * by Sun in the LICENSE file that accompanied this code.
duke@1: *
duke@1: * This code is distributed in the hope that it will be useful, but WITHOUT
duke@1: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
duke@1: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
duke@1: * version 2 for more details (a copy is included in the LICENSE file that
duke@1: * accompanied this code).
duke@1: *
duke@1: * You should have received a copy of the GNU General Public License version
duke@1: * 2 along with this work; if not, write to the Free Software Foundation,
duke@1: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
duke@1: *
duke@1: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
duke@1: * CA 95054 USA or visit www.sun.com if you need additional information or
duke@1: * have any questions.
duke@1: */
duke@1:
duke@1: package com.sun.tools.javac.parser;
duke@1:
duke@1: import java.nio.*;
duke@1:
duke@1: import com.sun.tools.javac.util.*;
duke@1: import static com.sun.tools.javac.util.LayoutCharacters.*;
duke@1:
duke@1: /** An extension to the base lexical analyzer that captures
duke@1: * and processes the contents of doc comments. It does so by
duke@1: * translating Unicode escape sequences and by stripping the
duke@1: * leading whitespace and starts from each line of the comment.
duke@1: *
duke@1: *
This is NOT part of any API supported by Sun Microsystems. If
duke@1: * you write code that depends on this, you do so at your own risk.
duke@1: * This code and its internal interfaces are subject to change or
duke@1: * deletion without notice.
duke@1: */
duke@1: public class DocCommentScanner extends Scanner {
duke@1:
duke@1: /** A factory for creating scanners. */
duke@1: public static class Factory extends Scanner.Factory {
duke@1:
duke@1: public static void preRegister(final Context context) {
duke@1: context.put(scannerFactoryKey, new Context.Factory() {
duke@1: public Factory make() {
duke@1: return new Factory(context);
duke@1: }
duke@1: });
duke@1: }
duke@1:
duke@1: /** Create a new scanner factory. */
duke@1: protected Factory(Context context) {
duke@1: super(context);
duke@1: }
duke@1:
duke@1: @Override
duke@1: public Scanner newScanner(CharSequence input) {
duke@1: if (input instanceof CharBuffer) {
duke@1: return new DocCommentScanner(this, (CharBuffer)input);
duke@1: } else {
duke@1: char[] array = input.toString().toCharArray();
duke@1: return newScanner(array, array.length);
duke@1: }
duke@1: }
duke@1:
duke@1: @Override
duke@1: public Scanner newScanner(char[] input, int inputLength) {
duke@1: return new DocCommentScanner(this, input, inputLength);
duke@1: }
duke@1: }
duke@1:
duke@1:
duke@1: /** Create a scanner from the input buffer. buffer must implement
duke@1: * array() and compact(), and remaining() must be less than limit().
duke@1: */
duke@1: protected DocCommentScanner(Factory fac, CharBuffer buffer) {
duke@1: super(fac, buffer);
duke@1: }
duke@1:
duke@1: /** Create a scanner from the input array. The array must have at
duke@1: * least a single character of extra space.
duke@1: */
duke@1: protected DocCommentScanner(Factory fac, char[] input, int inputLength) {
duke@1: super(fac, input, inputLength);
duke@1: }
duke@1:
duke@1: /** Starting position of the comment in original source
duke@1: */
duke@1: private int pos;
duke@1:
duke@1: /** The comment input buffer, index of next chacter to be read,
duke@1: * index of one past last character in buffer.
duke@1: */
duke@1: private char[] buf;
duke@1: private int bp;
duke@1: private int buflen;
duke@1:
duke@1: /** The current character.
duke@1: */
duke@1: private char ch;
duke@1:
duke@1: /** The column number position of the current character.
duke@1: */
duke@1: private int col;
duke@1:
duke@1: /** The buffer index of the last converted Unicode character
duke@1: */
duke@1: private int unicodeConversionBp = 0;
duke@1:
duke@1: /**
duke@1: * Buffer for doc comment.
duke@1: */
duke@1: private char[] docCommentBuffer = new char[1024];
duke@1:
duke@1: /**
duke@1: * Number of characters in doc comment buffer.
duke@1: */
duke@1: private int docCommentCount;
duke@1:
duke@1: /**
duke@1: * Translated and stripped contents of doc comment
duke@1: */
duke@1: private String docComment = null;
duke@1:
duke@1:
duke@1: /** Unconditionally expand the comment buffer.
duke@1: */
duke@1: private void expandCommentBuffer() {
duke@1: char[] newBuffer = new char[docCommentBuffer.length * 2];
duke@1: System.arraycopy(docCommentBuffer, 0, newBuffer,
duke@1: 0, docCommentBuffer.length);
duke@1: docCommentBuffer = newBuffer;
duke@1: }
duke@1:
duke@1: /** Convert an ASCII digit from its base (8, 10, or 16)
duke@1: * to its value.
duke@1: */
duke@1: private int digit(int base) {
duke@1: char c = ch;
duke@1: int result = Character.digit(c, base);
duke@1: if (result >= 0 && c > 0x7f) {
duke@1: ch = "0123456789abcdef".charAt(result);
duke@1: }
duke@1: return result;
duke@1: }
duke@1:
duke@1: /** Convert Unicode escape; bp points to initial '\' character
duke@1: * (Spec 3.3).
duke@1: */
duke@1: private void convertUnicode() {
duke@1: if (ch == '\\' && unicodeConversionBp != bp) {
duke@1: bp++; ch = buf[bp]; col++;
duke@1: if (ch == 'u') {
duke@1: do {
duke@1: bp++; ch = buf[bp]; col++;
duke@1: } while (ch == 'u');
duke@1: int limit = bp + 3;
duke@1: if (limit < buflen) {
duke@1: int d = digit(16);
duke@1: int code = d;
duke@1: while (bp < limit && d >= 0) {
duke@1: bp++; ch = buf[bp]; col++;
duke@1: d = digit(16);
duke@1: code = (code << 4) + d;
duke@1: }
duke@1: if (d >= 0) {
duke@1: ch = (char)code;
duke@1: unicodeConversionBp = bp;
duke@1: return;
duke@1: }
duke@1: }
duke@1: // "illegal.Unicode.esc", reported by base scanner
duke@1: } else {
duke@1: bp--;
duke@1: ch = '\\';
duke@1: col--;
duke@1: }
duke@1: }
duke@1: }
duke@1:
duke@1:
duke@1: /** Read next character.
duke@1: */
duke@1: private void scanChar() {
duke@1: bp++;
duke@1: ch = buf[bp];
duke@1: switch (ch) {
duke@1: case '\r': // return
duke@1: col = 0;
duke@1: break;
duke@1: case '\n': // newline
duke@1: if (bp == 0 || buf[bp-1] != '\r') {
duke@1: col = 0;
duke@1: }
duke@1: break;
duke@1: case '\t': // tab
duke@1: col = (col / TabInc * TabInc) + TabInc;
duke@1: break;
duke@1: case '\\': // possible Unicode
duke@1: col++;
duke@1: convertUnicode();
duke@1: break;
duke@1: default:
duke@1: col++;
duke@1: break;
duke@1: }
duke@1: }
duke@1:
duke@1: /**
duke@1: * Read next character in doc comment, skipping over double '\' characters.
duke@1: * If a double '\' is skipped, put in the buffer and update buffer count.
duke@1: */
duke@1: private void scanDocCommentChar() {
duke@1: scanChar();
duke@1: if (ch == '\\') {
duke@1: if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
duke@1: if (docCommentCount == docCommentBuffer.length)
duke@1: expandCommentBuffer();
duke@1: docCommentBuffer[docCommentCount++] = ch;
duke@1: bp++; col++;
duke@1: } else {
duke@1: convertUnicode();
duke@1: }
duke@1: }
duke@1: }
duke@1:
duke@1: /* Reset doc comment before reading each new token
duke@1: */
duke@1: public void nextToken() {
duke@1: docComment = null;
duke@1: super.nextToken();
duke@1: }
duke@1:
duke@1: /**
duke@1: * Returns the documentation string of the current token.
duke@1: */
duke@1: public String docComment() {
duke@1: return docComment;
duke@1: }
duke@1:
duke@1: /**
duke@1: * Process a doc comment and make the string content available.
duke@1: * Strips leading whitespace and stars.
duke@1: */
duke@1: @SuppressWarnings("fallthrough")
duke@1: protected void processComment(CommentStyle style) {
duke@1: if (style != CommentStyle.JAVADOC) {
duke@1: return;
duke@1: }
duke@1:
duke@1: pos = pos();
duke@1: buf = getRawCharacters(pos, endPos());
duke@1: buflen = buf.length;
duke@1: bp = 0;
duke@1: col = 0;
duke@1:
duke@1: docCommentCount = 0;
duke@1:
duke@1: boolean firstLine = true;
duke@1:
duke@1: // Skip over first slash
duke@1: scanDocCommentChar();
duke@1: // Skip over first star
duke@1: scanDocCommentChar();
duke@1:
duke@1: // consume any number of stars
duke@1: while (bp < buflen && ch == '*') {
duke@1: scanDocCommentChar();
duke@1: }
duke@1: // is the comment in the form /**/, /***/, /****/, etc. ?
duke@1: if (bp < buflen && ch == '/') {
duke@1: docComment = "";
duke@1: return;
duke@1: }
duke@1:
duke@1: // skip a newline on the first line of the comment.
duke@1: if (bp < buflen) {
duke@1: if (ch == LF) {
duke@1: scanDocCommentChar();
duke@1: firstLine = false;
duke@1: } else if (ch == CR) {
duke@1: scanDocCommentChar();
duke@1: if (ch == LF) {
duke@1: scanDocCommentChar();
duke@1: firstLine = false;
duke@1: }
duke@1: }
duke@1: }
duke@1:
duke@1: outerLoop:
duke@1:
duke@1: // The outerLoop processes the doc comment, looping once
duke@1: // for each line. For each line, it first strips off
duke@1: // whitespace, then it consumes any stars, then it
duke@1: // puts the rest of the line into our buffer.
duke@1: while (bp < buflen) {
duke@1:
duke@1: // The wsLoop consumes whitespace from the beginning
duke@1: // of each line.
duke@1: wsLoop:
duke@1:
duke@1: while (bp < buflen) {
duke@1: switch(ch) {
duke@1: case ' ':
duke@1: scanDocCommentChar();
duke@1: break;
duke@1: case '\t':
duke@1: col = ((col - 1) / TabInc * TabInc) + TabInc;
duke@1: scanDocCommentChar();
duke@1: break;
duke@1: case FF:
duke@1: col = 0;
duke@1: scanDocCommentChar();
duke@1: break;
duke@1: // Treat newline at beginning of line (blank line, no star)
duke@1: // as comment text. Old Javadoc compatibility requires this.
duke@1: /*---------------------------------*
duke@1: case CR: // (Spec 3.4)
duke@1: scanDocCommentChar();
duke@1: if (ch == LF) {
duke@1: col = 0;
duke@1: scanDocCommentChar();
duke@1: }
duke@1: break;
duke@1: case LF: // (Spec 3.4)
duke@1: scanDocCommentChar();
duke@1: break;
duke@1: *---------------------------------*/
duke@1: default:
duke@1: // we've seen something that isn't whitespace;
duke@1: // jump out.
duke@1: break wsLoop;
duke@1: }
duke@1: }
duke@1:
duke@1: // Are there stars here? If so, consume them all
duke@1: // and check for the end of comment.
duke@1: if (ch == '*') {
duke@1: // skip all of the stars
duke@1: do {
duke@1: scanDocCommentChar();
duke@1: } while (ch == '*');
duke@1:
duke@1: // check for the closing slash.
duke@1: if (ch == '/') {
duke@1: // We're done with the doc comment
duke@1: // scanChar() and breakout.
duke@1: break outerLoop;
duke@1: }
duke@1: } else if (! firstLine) {
duke@1: //The current line does not begin with a '*' so we will indent it.
duke@1: for (int i = 1; i < col; i++) {
duke@1: if (docCommentCount == docCommentBuffer.length)
duke@1: expandCommentBuffer();
duke@1: docCommentBuffer[docCommentCount++] = ' ';
duke@1: }
duke@1: }
duke@1:
duke@1: // The textLoop processes the rest of the characters
duke@1: // on the line, adding them to our buffer.
duke@1: textLoop:
duke@1: while (bp < buflen) {
duke@1: switch (ch) {
duke@1: case '*':
duke@1: // Is this just a star? Or is this the
duke@1: // end of a comment?
duke@1: scanDocCommentChar();
duke@1: if (ch == '/') {
duke@1: // This is the end of the comment,
duke@1: // set ch and return our buffer.
duke@1: break outerLoop;
duke@1: }
duke@1: // This is just an ordinary star. Add it to
duke@1: // the buffer.
duke@1: if (docCommentCount == docCommentBuffer.length)
duke@1: expandCommentBuffer();
duke@1: docCommentBuffer[docCommentCount++] = '*';
duke@1: break;
duke@1: case ' ':
duke@1: case '\t':
duke@1: if (docCommentCount == docCommentBuffer.length)
duke@1: expandCommentBuffer();
duke@1: docCommentBuffer[docCommentCount++] = ch;
duke@1: scanDocCommentChar();
duke@1: break;
duke@1: case FF:
duke@1: scanDocCommentChar();
duke@1: break textLoop; // treat as end of line
duke@1: case CR: // (Spec 3.4)
duke@1: scanDocCommentChar();
duke@1: if (ch != LF) {
duke@1: // Canonicalize CR-only line terminator to LF
duke@1: if (docCommentCount == docCommentBuffer.length)
duke@1: expandCommentBuffer();
duke@1: docCommentBuffer[docCommentCount++] = (char)LF;
duke@1: break textLoop;
duke@1: }
duke@1: /* fall through to LF case */
duke@1: case LF: // (Spec 3.4)
duke@1: // We've seen a newline. Add it to our
duke@1: // buffer and break out of this loop,
duke@1: // starting fresh on a new line.
duke@1: if (docCommentCount == docCommentBuffer.length)
duke@1: expandCommentBuffer();
duke@1: docCommentBuffer[docCommentCount++] = ch;
duke@1: scanDocCommentChar();
duke@1: break textLoop;
duke@1: default:
duke@1: // Add the character to our buffer.
duke@1: if (docCommentCount == docCommentBuffer.length)
duke@1: expandCommentBuffer();
duke@1: docCommentBuffer[docCommentCount++] = ch;
duke@1: scanDocCommentChar();
duke@1: }
duke@1: } // end textLoop
duke@1: firstLine = false;
duke@1: } // end outerLoop
duke@1:
duke@1: if (docCommentCount > 0) {
duke@1: int i = docCommentCount - 1;
duke@1: trailLoop:
duke@1: while (i > -1) {
duke@1: switch (docCommentBuffer[i]) {
duke@1: case '*':
duke@1: i--;
duke@1: break;
duke@1: default:
duke@1: break trailLoop;
duke@1: }
duke@1: }
duke@1: docCommentCount = i + 1;
duke@1:
duke@1: // Store the text of the doc comment
duke@1: docComment = new String(docCommentBuffer, 0 , docCommentCount);
duke@1: } else {
duke@1: docComment = "";
duke@1: }
duke@1: }
duke@1:
duke@1: /** Build a map for translating between line numbers and
duke@1: * positions in the input.
duke@1: *
duke@1: * @return a LineMap */
duke@1: public Position.LineMap getLineMap() {
duke@1: char[] buf = getRawCharacters();
duke@1: return Position.makeLineMap(buf, buf.length, true);
duke@1: }
duke@1: }