src/share/classes/com/sun/tools/javac/parser/DocCommentParser.java

Mon, 25 Nov 2013 17:42:28 -0800

author
jjg
date
Mon, 25 Nov 2013 17:42:28 -0800
changeset 2204
a78f51d6bd5e
parent 1704
ed918a442b83
child 2268
b4e592c5314d
permissions
-rw-r--r--

8028318: [doclint] doclint will reject existing user-written doc comments using custom tags that follow the recommended rules
Reviewed-by: darcy

     1 /*
     2  * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.tools.javac.parser;
    28 import java.text.BreakIterator;
    29 import java.util.Arrays;
    30 import java.util.HashMap;
    31 import java.util.HashSet;
    32 import java.util.Locale;
    33 import java.util.Map;
    34 import java.util.Set;
    36 import com.sun.source.doctree.AttributeTree.ValueKind;
    37 import com.sun.tools.javac.parser.DocCommentParser.TagParser.Kind;
    38 import com.sun.tools.javac.parser.Tokens.Comment;
    39 import com.sun.tools.javac.parser.Tokens.TokenKind;
    40 import com.sun.tools.javac.tree.DCTree;
    41 import com.sun.tools.javac.tree.DCTree.DCAttribute;
    42 import com.sun.tools.javac.tree.DCTree.DCDocComment;
    43 import com.sun.tools.javac.tree.DCTree.DCEndElement;
    44 import com.sun.tools.javac.tree.DCTree.DCEndPosTree;
    45 import com.sun.tools.javac.tree.DCTree.DCErroneous;
    46 import com.sun.tools.javac.tree.DCTree.DCIdentifier;
    47 import com.sun.tools.javac.tree.DCTree.DCReference;
    48 import com.sun.tools.javac.tree.DCTree.DCStartElement;
    49 import com.sun.tools.javac.tree.DCTree.DCText;
    50 import com.sun.tools.javac.tree.DocTreeMaker;
    51 import com.sun.tools.javac.tree.JCTree;
    52 import com.sun.tools.javac.util.DiagnosticSource;
    53 import com.sun.tools.javac.util.List;
    54 import com.sun.tools.javac.util.ListBuffer;
    55 import com.sun.tools.javac.util.Log;
    56 import com.sun.tools.javac.util.Name;
    57 import com.sun.tools.javac.util.Names;
    58 import com.sun.tools.javac.util.Options;
    59 import com.sun.tools.javac.util.Position;
    60 import static com.sun.tools.javac.util.LayoutCharacters.*;
    62 /**
    63  *
    64  *  <p><b>This is NOT part of any supported API.
    65  *  If you write code that depends on this, you do so at your own risk.
    66  *  This code and its internal interfaces are subject to change or
    67  *  deletion without notice.</b>
    68  */
    69 public class DocCommentParser {
    70     static class ParseException extends Exception {
    71         private static final long serialVersionUID = 0;
    72         ParseException(String key) {
    73             super(key);
    74         }
    75     }
    77     final ParserFactory fac;
    78     final DiagnosticSource diagSource;
    79     final Comment comment;
    80     final DocTreeMaker m;
    81     final Names names;
    83     BreakIterator sentenceBreaker;
    85     /** The input buffer, index of most recent character read,
    86      *  index of one past last character in buffer.
    87      */
    88     protected char[] buf;
    89     protected int bp;
    90     protected int buflen;
    92     /** The current character.
    93      */
    94     protected char ch;
    96     int textStart = -1;
    97     int lastNonWhite = -1;
    98     boolean newline = true;
   100     Map<Name, TagParser> tagParsers;
   102     DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) {
   103         this.fac = fac;
   104         this.diagSource = diagSource;
   105         this.comment = comment;
   106         names = fac.names;
   107         m = fac.docTreeMaker;
   109         Locale locale = (fac.locale == null) ? Locale.getDefault() : fac.locale;
   111         Options options = fac.options;
   112         boolean useBreakIterator = options.isSet("breakIterator");
   113         if (useBreakIterator || !locale.getLanguage().equals(Locale.ENGLISH.getLanguage()))
   114             sentenceBreaker = BreakIterator.getSentenceInstance(locale);
   116         initTagParsers();
   117     }
   119     DCDocComment parse() {
   120         String c = comment.getText();
   121         buf = new char[c.length() + 1];
   122         c.getChars(0, c.length(), buf, 0);
   123         buf[buf.length - 1] = EOI;
   124         buflen = buf.length - 1;
   125         bp = -1;
   126         nextChar();
   128         List<DCTree> body = blockContent();
   129         List<DCTree> tags = blockTags();
   131         // split body into first sentence and body
   132         ListBuffer<DCTree> fs = new ListBuffer<DCTree>();
   133         loop:
   134         for (; body.nonEmpty(); body = body.tail) {
   135             DCTree t = body.head;
   136             switch (t.getKind()) {
   137                 case TEXT:
   138                     String s = ((DCText) t).getBody();
   139                     int i = getSentenceBreak(s);
   140                     if (i > 0) {
   141                         int i0 = i;
   142                         while (i0 > 0 && isWhitespace(s.charAt(i0 - 1)))
   143                             i0--;
   144                         fs.add(m.at(t.pos).Text(s.substring(0, i0)));
   145                         int i1 = i;
   146                         while (i1 < s.length() && isWhitespace(s.charAt(i1)))
   147                             i1++;
   148                         body = body.tail;
   149                         if (i1 < s.length())
   150                             body = body.prepend(m.at(t.pos + i1).Text(s.substring(i1)));
   151                         break loop;
   152                     } else if (body.tail.nonEmpty()) {
   153                         if (isSentenceBreak(body.tail.head)) {
   154                             int i0 = s.length() - 1;
   155                             while (i0 > 0 && isWhitespace(s.charAt(i0)))
   156                                 i0--;
   157                             fs.add(m.at(t.pos).Text(s.substring(0, i0 + 1)));
   158                             body = body.tail;
   159                             break loop;
   160                         }
   161                     }
   162                     break;
   164                 case START_ELEMENT:
   165                 case END_ELEMENT:
   166                     if (isSentenceBreak(t))
   167                         break loop;
   168                     break;
   169             }
   170             fs.add(t);
   171         }
   173         @SuppressWarnings("unchecked")
   174         DCTree first = getFirst(fs.toList(), body, tags);
   175         int pos = (first == null) ? Position.NOPOS : first.pos;
   177         DCDocComment dc = m.at(pos).DocComment(comment, fs.toList(), body, tags);
   178         return dc;
   179     }
   181     void nextChar() {
   182         ch = buf[bp < buflen ? ++bp : buflen];
   183         switch (ch) {
   184             case '\f': case '\n': case '\r':
   185                 newline = true;
   186         }
   187     }
   189     /**
   190      * Read block content, consisting of text, html and inline tags.
   191      * Terminated by the end of input, or the beginning of the next block tag:
   192      * i.e. @ as the first non-whitespace character on a line.
   193      */
   194     @SuppressWarnings("fallthrough")
   195     protected List<DCTree> blockContent() {
   196         ListBuffer<DCTree> trees = new ListBuffer<DCTree>();
   197         textStart = -1;
   199         loop:
   200         while (bp < buflen) {
   201             switch (ch) {
   202                 case '\n': case '\r': case '\f':
   203                     newline = true;
   204                     // fallthrough
   206                 case ' ': case '\t':
   207                     nextChar();
   208                     break;
   210                 case '&':
   211                     entity(trees);
   212                     break;
   214                 case '<':
   215                     newline = false;
   216                     addPendingText(trees, bp - 1);
   217                     trees.add(html());
   218                     if (textStart == -1) {
   219                         textStart = bp;
   220                         lastNonWhite = -1;
   221                     }
   222                     break;
   224                 case '>':
   225                     newline = false;
   226                     addPendingText(trees, bp - 1);
   227                     trees.add(m.at(bp).Erroneous(newString(bp, bp+1), diagSource, "dc.bad.gt"));
   228                     nextChar();
   229                     if (textStart == -1) {
   230                         textStart = bp;
   231                         lastNonWhite = -1;
   232                     }
   233                     break;
   235                 case '{':
   236                     inlineTag(trees);
   237                     break;
   239                 case '@':
   240                     if (newline) {
   241                         addPendingText(trees, lastNonWhite);
   242                         break loop;
   243                     }
   244                     // fallthrough
   246                 default:
   247                     newline = false;
   248                     if (textStart == -1)
   249                         textStart = bp;
   250                     lastNonWhite = bp;
   251                     nextChar();
   252             }
   253         }
   255         if (lastNonWhite != -1)
   256             addPendingText(trees, lastNonWhite);
   258         return trees.toList();
   259     }
   261     /**
   262      * Read a series of block tags, including their content.
   263      * Standard tags parse their content appropriately.
   264      * Non-standard tags are represented by {@link UnknownBlockTag}.
   265      */
   266     protected List<DCTree> blockTags() {
   267         ListBuffer<DCTree> tags = new ListBuffer<DCTree>();
   268         while (ch == '@')
   269             tags.add(blockTag());
   270         return tags.toList();
   271     }
   273     /**
   274      * Read a single block tag, including its content.
   275      * Standard tags parse their content appropriately.
   276      * Non-standard tags are represented by {@link UnknownBlockTag}.
   277      */
   278     protected DCTree blockTag() {
   279         int p = bp;
   280         try {
   281             nextChar();
   282             if (isIdentifierStart(ch)) {
   283                 Name name = readTagName();
   284                 TagParser tp = tagParsers.get(name);
   285                 if (tp == null) {
   286                     List<DCTree> content = blockContent();
   287                     return m.at(p).UnknownBlockTag(name, content);
   288                 } else {
   289                     switch (tp.getKind()) {
   290                         case BLOCK:
   291                             return tp.parse(p);
   292                         case INLINE:
   293                             return erroneous("dc.bad.inline.tag", p);
   294                     }
   295                 }
   296             }
   297             blockContent();
   299             return erroneous("dc.no.tag.name", p);
   300         } catch (ParseException e) {
   301             blockContent();
   302             return erroneous(e.getMessage(), p);
   303         }
   304     }
   306     protected void inlineTag(ListBuffer<DCTree> list) {
   307         newline = false;
   308         nextChar();
   309         if (ch == '@') {
   310             addPendingText(list, bp - 2);
   311             list.add(inlineTag());
   312             textStart = bp;
   313             lastNonWhite = -1;
   314         } else {
   315             if (textStart == -1)
   316                 textStart = bp - 1;
   317             lastNonWhite = bp;
   318         }
   319     }
   321     /**
   322      * Read a single inline tag, including its content.
   323      * Standard tags parse their content appropriately.
   324      * Non-standard tags are represented by {@link UnknownBlockTag}.
   325      * Malformed tags may be returned as {@link Erroneous}.
   326      */
   327     protected DCTree inlineTag() {
   328         int p = bp - 1;
   329         try {
   330             nextChar();
   331             if (isIdentifierStart(ch)) {
   332                 Name name = readTagName();
   333                 skipWhitespace();
   335                 TagParser tp = tagParsers.get(name);
   336                 if (tp == null) {
   337                     DCTree text = inlineText();
   338                     if (text != null) {
   339                         nextChar();
   340                         return m.at(p).UnknownInlineTag(name, List.of(text)).setEndPos(bp);
   341                     }
   342                 } else if (tp.getKind() == TagParser.Kind.INLINE) {
   343                     DCEndPosTree<?> tree = (DCEndPosTree<?>) tp.parse(p);
   344                     if (tree != null) {
   345                         return tree.setEndPos(bp);
   346                     }
   347                 } else {
   348                     inlineText(); // skip content
   349                     nextChar();
   350                 }
   351             }
   352             return erroneous("dc.no.tag.name", p);
   353         } catch (ParseException e) {
   354             return erroneous(e.getMessage(), p);
   355         }
   356     }
   358     /**
   359      * Read plain text content of an inline tag.
   360      * Matching pairs of { } are skipped; the text is terminated by the first
   361      * unmatched }. It is an error if the beginning of the next tag is detected.
   362      */
   363     protected DCTree inlineText() throws ParseException {
   364         skipWhitespace();
   365         int pos = bp;
   366         int depth = 1;
   368         loop:
   369         while (bp < buflen) {
   370             switch (ch) {
   371                 case '\n': case '\r': case '\f':
   372                     newline = true;
   373                     break;
   375                 case ' ': case '\t':
   376                     break;
   378                 case '{':
   379                     newline = false;
   380                     lastNonWhite = bp;
   381                     depth++;
   382                     break;
   384                 case '}':
   385                     if (--depth == 0) {
   386                         return m.at(pos).Text(newString(pos, bp));
   387                     }
   388                     newline = false;
   389                     lastNonWhite = bp;
   390                     break;
   392                 case '@':
   393                     if (newline)
   394                         break loop;
   395                     newline = false;
   396                     lastNonWhite = bp;
   397                     break;
   399                 default:
   400                     newline = false;
   401                     lastNonWhite = bp;
   402                     break;
   403             }
   404             nextChar();
   405         }
   406         throw new ParseException("dc.unterminated.inline.tag");
   407     }
   409     /**
   410      * Read Java class name, possibly followed by member
   411      * Matching pairs of < > are skipped. The text is terminated by the first
   412      * unmatched }. It is an error if the beginning of the next tag is detected.
   413      */
   414     // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE
   415     // TODO: improve quality of parse to forbid bad constructions.
   416     @SuppressWarnings("fallthrough")
   417     protected DCReference reference(boolean allowMember) throws ParseException {
   418         int pos = bp;
   419         int depth = 0;
   421         // scan to find the end of the signature, by looking for the first
   422         // whitespace not enclosed in () or <>, or the end of the tag
   423         loop:
   424         while (bp < buflen) {
   425             switch (ch) {
   426                 case '\n': case '\r': case '\f':
   427                     newline = true;
   428                     // fallthrough
   430                 case ' ': case '\t':
   431                     if (depth == 0)
   432                         break loop;
   433                     break;
   435                 case '(':
   436                 case '<':
   437                     newline = false;
   438                     depth++;
   439                     break;
   441                 case ')':
   442                 case '>':
   443                     newline = false;
   444                     --depth;
   445                     break;
   447                 case '}':
   448                     if (bp == pos)
   449                         return null;
   450                     newline = false;
   451                     break loop;
   453                 case '@':
   454                     if (newline)
   455                         break loop;
   456                     // fallthrough
   458                 default:
   459                     newline = false;
   461             }
   462             nextChar();
   463         }
   465         if (depth != 0)
   466             throw new ParseException("dc.unterminated.signature");
   468         String sig = newString(pos, bp);
   470         // Break sig apart into qualifiedExpr member paramTypes.
   471         JCTree qualExpr;
   472         Name member;
   473         List<JCTree> paramTypes;
   475         Log.DeferredDiagnosticHandler deferredDiagnosticHandler
   476                 = new Log.DeferredDiagnosticHandler(fac.log);
   478         try {
   479             int hash = sig.indexOf("#");
   480             int lparen = sig.indexOf("(", hash + 1);
   481             if (hash == -1) {
   482                 if (lparen == -1) {
   483                     qualExpr = parseType(sig);
   484                     member = null;
   485                 } else {
   486                     qualExpr = null;
   487                     member = parseMember(sig.substring(0, lparen));
   488                 }
   489             } else {
   490                 qualExpr = (hash == 0) ? null : parseType(sig.substring(0, hash));
   491                 if (lparen == -1)
   492                     member = parseMember(sig.substring(hash + 1));
   493                 else
   494                     member = parseMember(sig.substring(hash + 1, lparen));
   495             }
   497             if (lparen < 0) {
   498                 paramTypes = null;
   499             } else {
   500                 int rparen = sig.indexOf(")", lparen);
   501                 if (rparen != sig.length() - 1)
   502                     throw new ParseException("dc.ref.bad.parens");
   503                 paramTypes = parseParams(sig.substring(lparen + 1, rparen));
   504             }
   506             if (!deferredDiagnosticHandler.getDiagnostics().isEmpty())
   507                 throw new ParseException("dc.ref.syntax.error");
   509         } finally {
   510             fac.log.popDiagnosticHandler(deferredDiagnosticHandler);
   511         }
   513         return m.at(pos).Reference(sig, qualExpr, member, paramTypes).setEndPos(bp);
   514     }
   516     JCTree parseType(String s) throws ParseException {
   517         JavacParser p = fac.newParser(s, false, false, false);
   518         JCTree tree = p.parseType();
   519         if (p.token().kind != TokenKind.EOF)
   520             throw new ParseException("dc.ref.unexpected.input");
   521         return tree;
   522     }
   524     Name parseMember(String s) throws ParseException {
   525         JavacParser p = fac.newParser(s, false, false, false);
   526         Name name = p.ident();
   527         if (p.token().kind != TokenKind.EOF)
   528             throw new ParseException("dc.ref.unexpected.input");
   529         return name;
   530     }
   532     List<JCTree> parseParams(String s) throws ParseException {
   533         if (s.trim().isEmpty())
   534             return List.nil();
   536         JavacParser p = fac.newParser(s.replace("...", "[]"), false, false, false);
   537         ListBuffer<JCTree> paramTypes = new ListBuffer<JCTree>();
   538         paramTypes.add(p.parseType());
   540         if (p.token().kind == TokenKind.IDENTIFIER)
   541             p.nextToken();
   543         while (p.token().kind == TokenKind.COMMA) {
   544             p.nextToken();
   545             paramTypes.add(p.parseType());
   547             if (p.token().kind == TokenKind.IDENTIFIER)
   548                 p.nextToken();
   549         }
   551         if (p.token().kind != TokenKind.EOF)
   552             throw new ParseException("dc.ref.unexpected.input");
   554         return paramTypes.toList();
   555     }
   557     /**
   558      * Read Java identifier
   559      * Matching pairs of { } are skipped; the text is terminated by the first
   560      * unmatched }. It is an error if the beginning of the next tag is detected.
   561      */
   562     @SuppressWarnings("fallthrough")
   563     protected DCIdentifier identifier() throws ParseException {
   564         skipWhitespace();
   565         int pos = bp;
   567         if (isJavaIdentifierStart(ch)) {
   568             Name name = readJavaIdentifier();
   569             return m.at(pos).Identifier(name);
   570         }
   572         throw new ParseException("dc.identifier.expected");
   573     }
   575     /**
   576      * Read a quoted string.
   577      * It is an error if the beginning of the next tag is detected.
   578      */
   579     @SuppressWarnings("fallthrough")
   580     protected DCText quotedString() {
   581         int pos = bp;
   582         nextChar();
   584         loop:
   585         while (bp < buflen) {
   586             switch (ch) {
   587                 case '\n': case '\r': case '\f':
   588                     newline = true;
   589                     break;
   591                 case ' ': case '\t':
   592                     break;
   594                 case '"':
   595                     nextChar();
   596                     // trim trailing white-space?
   597                     return m.at(pos).Text(newString(pos, bp));
   599                 case '@':
   600                     if (newline)
   601                         break loop;
   603             }
   604             nextChar();
   605         }
   606         return null;
   607     }
   609     /**
   610      * Read general text content of an inline tag, including HTML entities and elements.
   611      * Matching pairs of { } are skipped; the text is terminated by the first
   612      * unmatched }. It is an error if the beginning of the next tag is detected.
   613      */
   614     @SuppressWarnings("fallthrough")
   615     protected List<DCTree> inlineContent() {
   616         ListBuffer<DCTree> trees = new ListBuffer<DCTree>();
   618         skipWhitespace();
   619         int pos = bp;
   620         int depth = 1;
   621         textStart = -1;
   623         loop:
   624         while (bp < buflen) {
   626             switch (ch) {
   627                 case '\n': case '\r': case '\f':
   628                     newline = true;
   629                     // fall through
   631                 case ' ': case '\t':
   632                     nextChar();
   633                     break;
   635                 case '&':
   636                     entity(trees);
   637                     break;
   639                 case '<':
   640                     newline = false;
   641                     addPendingText(trees, bp - 1);
   642                     trees.add(html());
   643                     break;
   645                 case '{':
   646                     newline = false;
   647                     depth++;
   648                     nextChar();
   649                     break;
   651                 case '}':
   652                     newline = false;
   653                     if (--depth == 0) {
   654                         addPendingText(trees, bp - 1);
   655                         nextChar();
   656                         return trees.toList();
   657                     }
   658                     nextChar();
   659                     break;
   661                 case '@':
   662                     if (newline)
   663                         break loop;
   664                     // fallthrough
   666                 default:
   667                     if (textStart == -1)
   668                         textStart = bp;
   669                     nextChar();
   670                     break;
   671             }
   672         }
   674         return List.<DCTree>of(erroneous("dc.unterminated.inline.tag", pos));
   675     }
   677     protected void entity(ListBuffer<DCTree> list) {
   678         newline = false;
   679         addPendingText(list, bp - 1);
   680         list.add(entity());
   681         if (textStart == -1) {
   682             textStart = bp;
   683             lastNonWhite = -1;
   684         }
   685     }
   687     /**
   688      * Read an HTML entity.
   689      * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; }
   690      */
   691     protected DCTree entity() {
   692         int p = bp;
   693         nextChar();
   694         Name name = null;
   695         boolean checkSemi = false;
   696         if (ch == '#') {
   697             int namep = bp;
   698             nextChar();
   699             if (isDecimalDigit(ch)) {
   700                 nextChar();
   701                 while (isDecimalDigit(ch))
   702                     nextChar();
   703                 name = names.fromChars(buf, namep, bp - namep);
   704             } else if (ch == 'x' || ch == 'X') {
   705                 nextChar();
   706                 if (isHexDigit(ch)) {
   707                     nextChar();
   708                     while (isHexDigit(ch))
   709                         nextChar();
   710                     name = names.fromChars(buf, namep, bp - namep);
   711                 }
   712             }
   713         } else if (isIdentifierStart(ch)) {
   714             name = readIdentifier();
   715         }
   717         if (name == null)
   718             return erroneous("dc.bad.entity", p);
   719         else {
   720             if (ch != ';')
   721                 return erroneous("dc.missing.semicolon", p);
   722             nextChar();
   723             return m.at(p).Entity(name);
   724         }
   725     }
   727     /**
   728      * Read the start or end of an HTML tag, or an HTML comment
   729      * {@literal <identifier attrs> } or {@literal </identifier> }
   730      */
   731     protected DCTree html() {
   732         int p = bp;
   733         nextChar();
   734         if (isIdentifierStart(ch)) {
   735             Name name = readIdentifier();
   736             List<DCTree> attrs = htmlAttrs();
   737             if (attrs != null) {
   738                 boolean selfClosing = false;
   739                 if (ch == '/') {
   740                     nextChar();
   741                     selfClosing = true;
   742                 }
   743                 if (ch == '>') {
   744                     nextChar();
   745                     return m.at(p).StartElement(name, attrs, selfClosing).setEndPos(bp);
   746                 }
   747             }
   748         } else if (ch == '/') {
   749             nextChar();
   750             if (isIdentifierStart(ch)) {
   751                 Name name = readIdentifier();
   752                 skipWhitespace();
   753                 if (ch == '>') {
   754                     nextChar();
   755                     return m.at(p).EndElement(name);
   756                 }
   757             }
   758         } else if (ch == '!') {
   759             nextChar();
   760             if (ch == '-') {
   761                 nextChar();
   762                 if (ch == '-') {
   763                     nextChar();
   764                     while (bp < buflen) {
   765                         int dash = 0;
   766                         while (ch == '-') {
   767                             dash++;
   768                             nextChar();
   769                         }
   770                         // strictly speaking, a comment should not contain "--"
   771                         // so dash > 2 is an error, dash == 2 implies ch == '>'
   772                         if (dash >= 2 && ch == '>') {
   773                             nextChar();
   774                             return m.at(p).Comment(newString(p, bp));
   775                         }
   777                         nextChar();
   778                     }
   779                 }
   780             }
   781         }
   783         bp = p + 1;
   784         ch = buf[bp];
   785         return erroneous("dc.malformed.html", p);
   786     }
   788     /**
   789      * Read a series of HTML attributes, terminated by {@literal > }.
   790      * Each attribute is of the form {@literal identifier[=value] }.
   791      * "value" may be unquoted, single-quoted, or double-quoted.
   792      */
   793     protected List<DCTree> htmlAttrs() {
   794         ListBuffer<DCTree> attrs = new ListBuffer<DCTree>();
   795         skipWhitespace();
   797         loop:
   798         while (isIdentifierStart(ch)) {
   799             int namePos = bp;
   800             Name name = readIdentifier();
   801             skipWhitespace();
   802             List<DCTree> value = null;
   803             ValueKind vkind = ValueKind.EMPTY;
   804             if (ch == '=') {
   805                 ListBuffer<DCTree> v = new ListBuffer<DCTree>();
   806                 nextChar();
   807                 skipWhitespace();
   808                 if (ch == '\'' || ch == '"') {
   809                     vkind = (ch == '\'') ? ValueKind.SINGLE : ValueKind.DOUBLE;
   810                     char quote = ch;
   811                     nextChar();
   812                     textStart = bp;
   813                     while (bp < buflen && ch != quote) {
   814                         if (newline && ch == '@') {
   815                             attrs.add(erroneous("dc.unterminated.string", namePos));
   816                             // No point trying to read more.
   817                             // In fact, all attrs get discarded by the caller
   818                             // and superseded by a malformed.html node because
   819                             // the html tag itself is not terminated correctly.
   820                             break loop;
   821                         }
   822                         attrValueChar(v);
   823                     }
   824                     addPendingText(v, bp - 1);
   825                     nextChar();
   826                 } else {
   827                     vkind = ValueKind.UNQUOTED;
   828                     textStart = bp;
   829                     while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) {
   830                         attrValueChar(v);
   831                     }
   832                     addPendingText(v, bp - 1);
   833                 }
   834                 skipWhitespace();
   835                 value = v.toList();
   836             }
   837             DCAttribute attr = m.at(namePos).Attribute(name, vkind, value);
   838             attrs.add(attr);
   839         }
   841         return attrs.toList();
   842     }
   844     protected void attrValueChar(ListBuffer<DCTree> list) {
   845         switch (ch) {
   846             case '&':
   847                 entity(list);
   848                 break;
   850             case '{':
   851                 inlineTag(list);
   852                 break;
   854             default:
   855                 nextChar();
   856         }
   857     }
   859     protected void addPendingText(ListBuffer<DCTree> list, int textEnd) {
   860         if (textStart != -1) {
   861             if (textStart <= textEnd) {
   862                 list.add(m.at(textStart).Text(newString(textStart, textEnd + 1)));
   863             }
   864             textStart = -1;
   865         }
   866     }
   868     protected DCErroneous erroneous(String code, int pos) {
   869         int i = bp - 1;
   870         loop:
   871         while (i > pos) {
   872             switch (buf[i]) {
   873                 case '\f': case '\n': case '\r':
   874                     newline = true;
   875                     break;
   876                 case '\t': case ' ':
   877                     break;
   878                 default:
   879                     break loop;
   880             }
   881             i--;
   882         }
   883         textStart = -1;
   884         return m.at(pos).Erroneous(newString(pos, i + 1), diagSource, code);
   885     }
   887     @SuppressWarnings("unchecked")
   888     <T> T getFirst(List<T>... lists) {
   889         for (List<T> list: lists) {
   890             if (list.nonEmpty())
   891                 return list.head;
   892         }
   893         return null;
   894     }
   896     protected boolean isIdentifierStart(char ch) {
   897         return Character.isUnicodeIdentifierStart(ch);
   898     }
   900     protected Name readIdentifier() {
   901         int start = bp;
   902         nextChar();
   903         while (bp < buflen && Character.isUnicodeIdentifierPart(ch))
   904             nextChar();
   905         return names.fromChars(buf, start, bp - start);
   906     }
   908     protected Name readTagName() {
   909         int start = bp;
   910         nextChar();
   911         while (bp < buflen && (Character.isUnicodeIdentifierPart(ch) || ch == '.'))
   912             nextChar();
   913         return names.fromChars(buf, start, bp - start);
   914     }
   916     protected boolean isJavaIdentifierStart(char ch) {
   917         return Character.isJavaIdentifierStart(ch);
   918     }
   920     protected Name readJavaIdentifier() {
   921         int start = bp;
   922         nextChar();
   923         while (bp < buflen && Character.isJavaIdentifierPart(ch))
   924             nextChar();
   925         return names.fromChars(buf, start, bp - start);
   926     }
   928     protected boolean isDecimalDigit(char ch) {
   929         return ('0' <= ch && ch <= '9');
   930     }
   932     protected boolean isHexDigit(char ch) {
   933         return ('0' <= ch && ch <= '9')
   934                 || ('a' <= ch && ch <= 'f')
   935                 || ('A' <= ch && ch <= 'F');
   936     }
   938     protected boolean isUnquotedAttrValueTerminator(char ch) {
   939         switch (ch) {
   940             case '\f': case '\n': case '\r': case '\t':
   941             case ' ':
   942             case '"': case '\'': case '`':
   943             case '=': case '<': case '>':
   944                 return true;
   945             default:
   946                 return false;
   947         }
   948     }
   950     protected boolean isWhitespace(char ch) {
   951         return Character.isWhitespace(ch);
   952     }
   954     protected void skipWhitespace() {
   955         while (isWhitespace(ch))
   956             nextChar();
   957     }
   959     protected int getSentenceBreak(String s) {
   960         if (sentenceBreaker != null) {
   961             sentenceBreaker.setText(s);
   962             int i = sentenceBreaker.next();
   963             return (i == s.length()) ? -1 : i;
   964         }
   966         // scan for period followed by whitespace
   967         boolean period = false;
   968         for (int i = 0; i < s.length(); i++) {
   969             switch (s.charAt(i)) {
   970                 case '.':
   971                     period = true;
   972                     break;
   974                 case ' ':
   975                 case '\f':
   976                 case '\n':
   977                 case '\r':
   978                 case '\t':
   979                     if (period)
   980                         return i;
   981                     break;
   983                 default:
   984                     period = false;
   985                     break;
   986             }
   987         }
   988         return -1;
   989     }
   992     Set<String> htmlBlockTags = new HashSet<String>(Arrays.asList(
   993                     "h1", "h2", "h3", "h4", "h5", "h6", "p", "pre"));
   995     protected boolean isSentenceBreak(Name n) {
   996         return htmlBlockTags.contains(n.toString().toLowerCase());
   997     }
   999     protected boolean isSentenceBreak(DCTree t) {
  1000         switch (t.getKind()) {
  1001             case START_ELEMENT:
  1002                 return isSentenceBreak(((DCStartElement) t).getName());
  1004             case END_ELEMENT:
  1005                 return isSentenceBreak(((DCEndElement) t).getName());
  1007         return false;
  1010     /**
  1011      * @param start position of first character of string
  1012      * @param end position of character beyond last character to be included
  1013      */
  1014     String newString(int start, int end) {
  1015         return new String(buf, start, end - start);
  1018     static abstract class TagParser {
  1019         enum Kind { INLINE, BLOCK }
  1021         Kind kind;
  1022         DCTree.Kind treeKind;
  1024         TagParser(Kind k, DCTree.Kind tk) {
  1025             kind = k;
  1026             treeKind = tk;
  1029         Kind getKind() {
  1030             return kind;
  1033         DCTree.Kind getTreeKind() {
  1034             return treeKind;
  1037         abstract DCTree parse(int pos) throws ParseException;
  1040     /**
  1041      * @see <a href="http://docs.oracle.com/javase/7/docs/technotes/tools/solaris/javadoc.html#javadoctags">Javadoc Tags</a>
  1042      */
  1043     private void initTagParsers() {
  1044         TagParser[] parsers = {
  1045             // @author name-text
  1046             new TagParser(Kind.BLOCK, DCTree.Kind.AUTHOR) {
  1047                 public DCTree parse(int pos) {
  1048                     List<DCTree> name = blockContent();
  1049                     return m.at(pos).Author(name);
  1051             },
  1053             // {@code text}
  1054             new TagParser(Kind.INLINE, DCTree.Kind.CODE) {
  1055                 public DCTree parse(int pos) throws ParseException {
  1056                     DCTree text = inlineText();
  1057                     nextChar();
  1058                     return m.at(pos).Code((DCText) text);
  1060             },
  1062             // @deprecated deprecated-text
  1063             new TagParser(Kind.BLOCK, DCTree.Kind.DEPRECATED) {
  1064                 public DCTree parse(int pos) {
  1065                     List<DCTree> reason = blockContent();
  1066                     return m.at(pos).Deprecated(reason);
  1068             },
  1070             // {@docRoot}
  1071             new TagParser(Kind.INLINE, DCTree.Kind.DOC_ROOT) {
  1072                 public DCTree parse(int pos) throws ParseException {
  1073                     if (ch == '}') {
  1074                         nextChar();
  1075                         return m.at(pos).DocRoot();
  1077                     inlineText(); // skip unexpected content
  1078                     nextChar();
  1079                     throw new ParseException("dc.unexpected.content");
  1081             },
  1083             // @exception class-name description
  1084             new TagParser(Kind.BLOCK, DCTree.Kind.EXCEPTION) {
  1085                 public DCTree parse(int pos) throws ParseException {
  1086                     skipWhitespace();
  1087                     DCReference ref = reference(false);
  1088                     List<DCTree> description = blockContent();
  1089                     return m.at(pos).Exception(ref, description);
  1091             },
  1093             // {@inheritDoc}
  1094             new TagParser(Kind.INLINE, DCTree.Kind.INHERIT_DOC) {
  1095                 public DCTree parse(int pos) throws ParseException {
  1096                     if (ch == '}') {
  1097                         nextChar();
  1098                         return m.at(pos).InheritDoc();
  1100                     inlineText(); // skip unexpected content
  1101                     nextChar();
  1102                     throw new ParseException("dc.unexpected.content");
  1104             },
  1106             // {@link package.class#member label}
  1107             new TagParser(Kind.INLINE, DCTree.Kind.LINK) {
  1108                 public DCTree parse(int pos) throws ParseException {
  1109                     DCReference ref = reference(true);
  1110                     List<DCTree> label = inlineContent();
  1111                     return m.at(pos).Link(ref, label);
  1113             },
  1115             // {@linkplain package.class#member label}
  1116             new TagParser(Kind.INLINE, DCTree.Kind.LINK_PLAIN) {
  1117                 public DCTree parse(int pos) throws ParseException {
  1118                     DCReference ref = reference(true);
  1119                     List<DCTree> label = inlineContent();
  1120                     return m.at(pos).LinkPlain(ref, label);
  1122             },
  1124             // {@literal text}
  1125             new TagParser(Kind.INLINE, DCTree.Kind.LITERAL) {
  1126                 public DCTree parse(int pos) throws ParseException {
  1127                     DCTree text = inlineText();
  1128                     nextChar();
  1129                     return m.at(pos).Literal((DCText) text);
  1131             },
  1133             // @param parameter-name description
  1134             new TagParser(Kind.BLOCK, DCTree.Kind.PARAM) {
  1135                 public DCTree parse(int pos) throws ParseException {
  1136                     skipWhitespace();
  1138                     boolean typaram = false;
  1139                     if (ch == '<') {
  1140                         typaram = true;
  1141                         nextChar();
  1144                     DCIdentifier id = identifier();
  1146                     if (typaram) {
  1147                         if (ch != '>')
  1148                             throw new ParseException("dc.gt.expected");
  1149                         nextChar();
  1152                     skipWhitespace();
  1153                     List<DCTree> desc = blockContent();
  1154                     return m.at(pos).Param(typaram, id, desc);
  1156             },
  1158             // @return description
  1159             new TagParser(Kind.BLOCK, DCTree.Kind.RETURN) {
  1160                 public DCTree parse(int pos) {
  1161                     List<DCTree> description = blockContent();
  1162                     return m.at(pos).Return(description);
  1164             },
  1166             // @see reference | quoted-string | HTML
  1167             new TagParser(Kind.BLOCK, DCTree.Kind.SEE) {
  1168                 public DCTree parse(int pos) throws ParseException {
  1169                     skipWhitespace();
  1170                     switch (ch) {
  1171                         case '"':
  1172                             DCText string = quotedString();
  1173                             if (string != null) {
  1174                                 skipWhitespace();
  1175                                 if (ch == '@')
  1176                                     return m.at(pos).See(List.<DCTree>of(string));
  1178                             break;
  1180                         case '<':
  1181                             List<DCTree> html = blockContent();
  1182                             if (html != null)
  1183                                 return m.at(pos).See(html);
  1184                             break;
  1186                         case '@':
  1187                             if (newline)
  1188                                 throw new ParseException("dc.no.content");
  1189                             break;
  1191                         case EOI:
  1192                             if (bp == buf.length - 1)
  1193                                 throw new ParseException("dc.no.content");
  1194                             break;
  1196                         default:
  1197                             if (isJavaIdentifierStart(ch) || ch == '#') {
  1198                                 DCReference ref = reference(true);
  1199                                 List<DCTree> description = blockContent();
  1200                                 return m.at(pos).See(description.prepend(ref));
  1203                     throw new ParseException("dc.unexpected.content");
  1205             },
  1207             // @serialData data-description
  1208             new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_DATA) {
  1209                 public DCTree parse(int pos) {
  1210                     List<DCTree> description = blockContent();
  1211                     return m.at(pos).SerialData(description);
  1213             },
  1215             // @serialField field-name field-type description
  1216             new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_FIELD) {
  1217                 public DCTree parse(int pos) throws ParseException {
  1218                     skipWhitespace();
  1219                     DCIdentifier name = identifier();
  1220                     skipWhitespace();
  1221                     DCReference type = reference(false);
  1222                     List<DCTree> description = null;
  1223                     if (isWhitespace(ch)) {
  1224                         skipWhitespace();
  1225                         description = blockContent();
  1227                     return m.at(pos).SerialField(name, type, description);
  1229             },
  1231             // @serial field-description | include | exclude
  1232             new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL) {
  1233                 public DCTree parse(int pos) {
  1234                     List<DCTree> description = blockContent();
  1235                     return m.at(pos).Serial(description);
  1237             },
  1239             // @since since-text
  1240             new TagParser(Kind.BLOCK, DCTree.Kind.SINCE) {
  1241                 public DCTree parse(int pos) {
  1242                     List<DCTree> description = blockContent();
  1243                     return m.at(pos).Since(description);
  1245             },
  1247             // @throws class-name description
  1248             new TagParser(Kind.BLOCK, DCTree.Kind.THROWS) {
  1249                 public DCTree parse(int pos) throws ParseException {
  1250                     skipWhitespace();
  1251                     DCReference ref = reference(false);
  1252                     List<DCTree> description = blockContent();
  1253                     return m.at(pos).Throws(ref, description);
  1255             },
  1257             // {@value package.class#field}
  1258             new TagParser(Kind.INLINE, DCTree.Kind.VALUE) {
  1259                 public DCTree parse(int pos) throws ParseException {
  1260                     DCReference ref = reference(true);
  1261                     skipWhitespace();
  1262                     if (ch == '}') {
  1263                         nextChar();
  1264                         return m.at(pos).Value(ref);
  1266                     nextChar();
  1267                     throw new ParseException("dc.unexpected.content");
  1269             },
  1271             // @version version-text
  1272             new TagParser(Kind.BLOCK, DCTree.Kind.VERSION) {
  1273                 public DCTree parse(int pos) {
  1274                     List<DCTree> description = blockContent();
  1275                     return m.at(pos).Version(description);
  1277             },
  1278         };
  1280         tagParsers = new HashMap<Name,TagParser>();
  1281         for (TagParser p: parsers)
  1282             tagParsers.put(names.fromString(p.getTreeKind().tagName), p);

mercurial