mcimadamore@1113: /* jjg@1281: * Copyright (c) 2004, 2012, Oracle and/or its affiliates. All rights reserved. mcimadamore@1113: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. mcimadamore@1113: * mcimadamore@1113: * This code is free software; you can redistribute it and/or modify it mcimadamore@1113: * under the terms of the GNU General Public License version 2 only, as mcimadamore@1113: * published by the Free Software Foundation. Oracle designates this mcimadamore@1113: * particular file as subject to the "Classpath" exception as provided mcimadamore@1113: * by Oracle in the LICENSE file that accompanied this code. mcimadamore@1113: * mcimadamore@1113: * This code is distributed in the hope that it will be useful, but WITHOUT mcimadamore@1113: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or mcimadamore@1113: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License mcimadamore@1113: * version 2 for more details (a copy is included in the LICENSE file that mcimadamore@1113: * accompanied this code). mcimadamore@1113: * mcimadamore@1113: * You should have received a copy of the GNU General Public License version mcimadamore@1113: * 2 along with this work; if not, write to the Free Software Foundation, mcimadamore@1113: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. mcimadamore@1113: * mcimadamore@1113: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA mcimadamore@1113: * or visit www.oracle.com if you need additional information or have any mcimadamore@1113: * questions. mcimadamore@1113: */ mcimadamore@1113: mcimadamore@1113: package com.sun.tools.javac.parser; mcimadamore@1113: mcimadamore@1125: import com.sun.tools.javac.parser.Tokens.Comment; mcimadamore@1125: import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; mcimadamore@1113: import com.sun.tools.javac.util.*; mcimadamore@1113: mcimadamore@1113: import java.nio.*; mcimadamore@1113: mcimadamore@1113: import static com.sun.tools.javac.util.LayoutCharacters.*; mcimadamore@1113: mcimadamore@1113: /** An extension to the base lexical analyzer that captures mcimadamore@1113: * and processes the contents of doc comments. It does so by mcimadamore@1113: * translating Unicode escape sequences and by stripping the mcimadamore@1113: * leading whitespace and starts from each line of the comment. mcimadamore@1113: * mcimadamore@1113: *
This is NOT part of any supported API.
mcimadamore@1113: * If you write code that depends on this, you do so at your own risk.
mcimadamore@1113: * This code and its internal interfaces are subject to change or
mcimadamore@1113: * deletion without notice.
mcimadamore@1113: */
mcimadamore@1113: public class JavadocTokenizer extends JavaTokenizer {
mcimadamore@1113:
mcimadamore@1113: /** Create a scanner from the input buffer. buffer must implement
mcimadamore@1113: * array() and compact(), and remaining() must be less than limit().
mcimadamore@1113: */
mcimadamore@1113: protected JavadocTokenizer(ScannerFactory fac, CharBuffer buffer) {
mcimadamore@1113: super(fac, buffer);
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1113: /** Create a scanner from the input array. The array must have at
mcimadamore@1113: * least a single character of extra space.
mcimadamore@1113: */
mcimadamore@1113: protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) {
mcimadamore@1113: super(fac, input, inputLength);
mcimadamore@1113: }
mcimadamore@1113:
mcimadamore@1125: @Override
mcimadamore@1125: protected Comment processComment(int pos, int endPos, CommentStyle style) {
mcimadamore@1125: char[] buf = reader.getRawCharacters(pos, endPos);
jjg@1281: return new JavadocComment(new DocReader(fac, buf, buf.length, pos), style);
mcimadamore@1125: }
mcimadamore@1113:
mcimadamore@1113: /**
mcimadamore@1125: * This is a specialized version of UnicodeReader that keeps track of the
jjg@1281: * column position within a given character stream (used for Javadoc processing),
jjg@1281: * and which builds a table for mapping positions in the comment string to
jjg@1281: * positions in the source file.
mcimadamore@1113: */
jjg@1281: static class DocReader extends UnicodeReader {
mcimadamore@1113:
mcimadamore@1125: int col;
jjg@1281: int startPos;
mcimadamore@1113:
jjg@1281: /**
jjg@1281: * A buffer for building a table for mapping positions in {@link #sbuf}
jjg@1281: * to positions in the source buffer.
jjg@1281: *
jjg@1281: * The array is organized as a series of pairs of integers: the first
jjg@1281: * number in each pair specifies a position in the comment text,
jjg@1281: * the second number in each pair specifies the corresponding position
jjg@1281: * in the source buffer. The pairs are sorted in ascending order.
jjg@1281: *
jjg@1281: * Since the mapping function is generally continuous, with successive
jjg@1281: * positions in the string corresponding to successive positions in the
jjg@1281: * source buffer, the table only needs to record discontinuities in
jjg@1281: * the mapping. The values of intermediate positions can be inferred.
jjg@1281: *
jjg@1281: * Discontinuities may occur in a number of places: when a newline
jjg@1281: * is followed by whitespace and asterisks (which are ignored),
jjg@1281: * when a tab is expanded into spaces, and when unicode escapes
jjg@1281: * are used in the source buffer.
jjg@1281: *
jjg@1281: * Thus, to find the source position of any position, p, in the comment
jjg@1281: * string, find the index, i, of the pair whose string offset
jjg@1281: * ({@code pbuf[i] }) is closest to but not greater than p. Then,
jjg@1281: * {@code sourcePos(p) = pbuf[i+1] + (p - pbuf[i]) }.
jjg@1281: */
jjg@1281: int[] pbuf = new int[128];
jjg@1281:
jjg@1281: /**
jjg@1281: * The index of the next empty slot in the pbuf buffer.
jjg@1281: */
jjg@1281: int pp = 0;
jjg@1281:
jjg@1281: DocReader(ScannerFactory fac, char[] input, int inputLength, int startPos) {
mcimadamore@1125: super(fac, input, inputLength);
jjg@1281: this.startPos = startPos;
mcimadamore@1125: }
mcimadamore@1113:
mcimadamore@1125: @Override
mcimadamore@1125: protected void convertUnicode() {
mcimadamore@1125: if (ch == '\\' && unicodeConversionBp != bp) {
mcimadamore@1125: bp++; ch = buf[bp]; col++;
mcimadamore@1125: if (ch == 'u') {
mcimadamore@1125: do {
mcimadamore@1125: bp++; ch = buf[bp]; col++;
mcimadamore@1125: } while (ch == 'u');
mcimadamore@1125: int limit = bp + 3;
mcimadamore@1125: if (limit < buflen) {
mcimadamore@1125: int d = digit(bp, 16);
mcimadamore@1125: int code = d;
mcimadamore@1125: while (bp < limit && d >= 0) {
mcimadamore@1125: bp++; ch = buf[bp]; col++;
mcimadamore@1125: d = digit(bp, 16);
mcimadamore@1125: code = (code << 4) + d;
mcimadamore@1125: }
mcimadamore@1125: if (d >= 0) {
mcimadamore@1125: ch = (char)code;
mcimadamore@1125: unicodeConversionBp = bp;
mcimadamore@1125: return;
mcimadamore@1125: }
mcimadamore@1125: }
mcimadamore@1125: // "illegal.Unicode.esc", reported by base scanner
mcimadamore@1125: } else {
mcimadamore@1125: bp--;
mcimadamore@1125: ch = '\\';
mcimadamore@1125: col--;
mcimadamore@1125: }
mcimadamore@1125: }
mcimadamore@1125: }
mcimadamore@1113:
mcimadamore@1125: @Override
mcimadamore@1125: protected void scanCommentChar() {
mcimadamore@1125: scanChar();
mcimadamore@1125: if (ch == '\\') {
mcimadamore@1125: if (peekChar() == '\\' && !isUnicode()) {
mcimadamore@1125: putChar(ch, false);
mcimadamore@1125: bp++; col++;
mcimadamore@1125: } else {
mcimadamore@1125: convertUnicode();
mcimadamore@1125: }
mcimadamore@1125: }
mcimadamore@1125: }
mcimadamore@1113:
mcimadamore@1125: @Override
mcimadamore@1125: protected void scanChar() {
mcimadamore@1125: bp++;
mcimadamore@1125: ch = buf[bp];
mcimadamore@1125: switch (ch) {
mcimadamore@1125: case '\r': // return
mcimadamore@1125: col = 0;
mcimadamore@1125: break;
mcimadamore@1125: case '\n': // newline
mcimadamore@1125: if (bp == 0 || buf[bp-1] != '\r') {
mcimadamore@1125: col = 0;
mcimadamore@1125: }
mcimadamore@1125: break;
mcimadamore@1125: case '\t': // tab
mcimadamore@1125: col = (col / TabInc * TabInc) + TabInc;
mcimadamore@1125: break;
mcimadamore@1125: case '\\': // possible Unicode
mcimadamore@1125: col++;
mcimadamore@1125: convertUnicode();
mcimadamore@1125: break;
mcimadamore@1125: default:
mcimadamore@1125: col++;
mcimadamore@1125: break;
mcimadamore@1125: }
mcimadamore@1125: }
jjg@1281:
jjg@1281: @Override
jjg@1281: public void putChar(char ch, boolean scan) {
jjg@1281: // At this point, bp is the position of the current character in buf,
jjg@1281: // and sp is the position in sbuf where this character will be put.
jjg@1281: // Record a new entry in pbuf if pbuf is empty or if sp and its
jjg@1281: // corresponding source position are not equidistant from the
jjg@1281: // corresponding values in the latest entry in the pbuf array.
jjg@1281: // (i.e. there is a discontinuity in the map function.)
jjg@1281: if ((pp == 0)
jjg@1281: || (sp - pbuf[pp - 2] != (startPos + bp) - pbuf[pp - 1])) {
jjg@1281: if (pp + 1 >= pbuf.length) {
jjg@1281: int[] new_pbuf = new int[pbuf.length * 2];
jjg@1281: System.arraycopy(pbuf, 0, new_pbuf, 0, pbuf.length);
jjg@1281: pbuf = new_pbuf;
jjg@1281: }
jjg@1281: pbuf[pp] = sp;
jjg@1281: pbuf[pp + 1] = startPos + bp;
jjg@1281: pp += 2;
jjg@1281: }
jjg@1281: super.putChar(ch, scan);
jjg@1281: }
mcimadamore@1125: }
mcimadamore@1125:
jjg@1281: protected class JavadocComment extends JavaTokenizer.BasicComment