src/share/classes/com/sun/tools/javac/parser/DocCommentParser.java

Wed, 27 Apr 2016 01:34:52 +0800

author
aoqi
date
Wed, 27 Apr 2016 01:34:52 +0800
changeset 0
959103a6100f
child 2525
2eb010b6cb22
permissions
-rw-r--r--

Initial load
http://hg.openjdk.java.net/jdk8u/jdk8u/langtools/
changeset: 2573:53ca196be1ae
tag: jdk8u25-b17

     1 /*
     2  * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.tools.javac.parser;
    28 import java.text.BreakIterator;
    29 import java.util.Arrays;
    30 import java.util.HashMap;
    31 import java.util.HashSet;
    32 import java.util.Locale;
    33 import java.util.Map;
    34 import java.util.Set;
    36 import com.sun.source.doctree.AttributeTree.ValueKind;
    37 import com.sun.tools.javac.parser.DocCommentParser.TagParser.Kind;
    38 import com.sun.tools.javac.parser.Tokens.Comment;
    39 import com.sun.tools.javac.parser.Tokens.TokenKind;
    40 import com.sun.tools.javac.tree.DCTree;
    41 import com.sun.tools.javac.tree.DCTree.DCAttribute;
    42 import com.sun.tools.javac.tree.DCTree.DCDocComment;
    43 import com.sun.tools.javac.tree.DCTree.DCEndElement;
    44 import com.sun.tools.javac.tree.DCTree.DCEndPosTree;
    45 import com.sun.tools.javac.tree.DCTree.DCErroneous;
    46 import com.sun.tools.javac.tree.DCTree.DCIdentifier;
    47 import com.sun.tools.javac.tree.DCTree.DCReference;
    48 import com.sun.tools.javac.tree.DCTree.DCStartElement;
    49 import com.sun.tools.javac.tree.DCTree.DCText;
    50 import com.sun.tools.javac.tree.DocTreeMaker;
    51 import com.sun.tools.javac.tree.JCTree;
    52 import com.sun.tools.javac.util.DiagnosticSource;
    53 import com.sun.tools.javac.util.List;
    54 import com.sun.tools.javac.util.ListBuffer;
    55 import com.sun.tools.javac.util.Log;
    56 import com.sun.tools.javac.util.Name;
    57 import com.sun.tools.javac.util.Names;
    58 import com.sun.tools.javac.util.Options;
    59 import com.sun.tools.javac.util.Position;
    60 import com.sun.tools.javac.util.StringUtils;
    61 import static com.sun.tools.javac.util.LayoutCharacters.*;
    63 /**
    64  *
    65  *  <p><b>This is NOT part of any supported API.
    66  *  If you write code that depends on this, you do so at your own risk.
    67  *  This code and its internal interfaces are subject to change or
    68  *  deletion without notice.</b>
    69  */
    70 public class DocCommentParser {
    71     static class ParseException extends Exception {
    72         private static final long serialVersionUID = 0;
    73         ParseException(String key) {
    74             super(key);
    75         }
    76     }
    78     final ParserFactory fac;
    79     final DiagnosticSource diagSource;
    80     final Comment comment;
    81     final DocTreeMaker m;
    82     final Names names;
    84     BreakIterator sentenceBreaker;
    86     /** The input buffer, index of most recent character read,
    87      *  index of one past last character in buffer.
    88      */
    89     protected char[] buf;
    90     protected int bp;
    91     protected int buflen;
    93     /** The current character.
    94      */
    95     protected char ch;
    97     int textStart = -1;
    98     int lastNonWhite = -1;
    99     boolean newline = true;
   101     Map<Name, TagParser> tagParsers;
   103     DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) {
   104         this.fac = fac;
   105         this.diagSource = diagSource;
   106         this.comment = comment;
   107         names = fac.names;
   108         m = fac.docTreeMaker;
   110         Locale locale = (fac.locale == null) ? Locale.getDefault() : fac.locale;
   112         Options options = fac.options;
   113         boolean useBreakIterator = options.isSet("breakIterator");
   114         if (useBreakIterator || !locale.getLanguage().equals(Locale.ENGLISH.getLanguage()))
   115             sentenceBreaker = BreakIterator.getSentenceInstance(locale);
   117         initTagParsers();
   118     }
   120     DCDocComment parse() {
   121         String c = comment.getText();
   122         buf = new char[c.length() + 1];
   123         c.getChars(0, c.length(), buf, 0);
   124         buf[buf.length - 1] = EOI;
   125         buflen = buf.length - 1;
   126         bp = -1;
   127         nextChar();
   129         List<DCTree> body = blockContent();
   130         List<DCTree> tags = blockTags();
   132         // split body into first sentence and body
   133         ListBuffer<DCTree> fs = new ListBuffer<DCTree>();
   134         loop:
   135         for (; body.nonEmpty(); body = body.tail) {
   136             DCTree t = body.head;
   137             switch (t.getKind()) {
   138                 case TEXT:
   139                     String s = ((DCText) t).getBody();
   140                     int i = getSentenceBreak(s);
   141                     if (i > 0) {
   142                         int i0 = i;
   143                         while (i0 > 0 && isWhitespace(s.charAt(i0 - 1)))
   144                             i0--;
   145                         fs.add(m.at(t.pos).Text(s.substring(0, i0)));
   146                         int i1 = i;
   147                         while (i1 < s.length() && isWhitespace(s.charAt(i1)))
   148                             i1++;
   149                         body = body.tail;
   150                         if (i1 < s.length())
   151                             body = body.prepend(m.at(t.pos + i1).Text(s.substring(i1)));
   152                         break loop;
   153                     } else if (body.tail.nonEmpty()) {
   154                         if (isSentenceBreak(body.tail.head)) {
   155                             int i0 = s.length() - 1;
   156                             while (i0 > 0 && isWhitespace(s.charAt(i0)))
   157                                 i0--;
   158                             fs.add(m.at(t.pos).Text(s.substring(0, i0 + 1)));
   159                             body = body.tail;
   160                             break loop;
   161                         }
   162                     }
   163                     break;
   165                 case START_ELEMENT:
   166                 case END_ELEMENT:
   167                     if (isSentenceBreak(t))
   168                         break loop;
   169                     break;
   170             }
   171             fs.add(t);
   172         }
   174         @SuppressWarnings("unchecked")
   175         DCTree first = getFirst(fs.toList(), body, tags);
   176         int pos = (first == null) ? Position.NOPOS : first.pos;
   178         DCDocComment dc = m.at(pos).DocComment(comment, fs.toList(), body, tags);
   179         return dc;
   180     }
   182     void nextChar() {
   183         ch = buf[bp < buflen ? ++bp : buflen];
   184         switch (ch) {
   185             case '\f': case '\n': case '\r':
   186                 newline = true;
   187         }
   188     }
   190     /**
   191      * Read block content, consisting of text, html and inline tags.
   192      * Terminated by the end of input, or the beginning of the next block tag:
   193      * i.e. @ as the first non-whitespace character on a line.
   194      */
   195     @SuppressWarnings("fallthrough")
   196     protected List<DCTree> blockContent() {
   197         ListBuffer<DCTree> trees = new ListBuffer<DCTree>();
   198         textStart = -1;
   200         loop:
   201         while (bp < buflen) {
   202             switch (ch) {
   203                 case '\n': case '\r': case '\f':
   204                     newline = true;
   205                     // fallthrough
   207                 case ' ': case '\t':
   208                     nextChar();
   209                     break;
   211                 case '&':
   212                     entity(trees);
   213                     break;
   215                 case '<':
   216                     newline = false;
   217                     addPendingText(trees, bp - 1);
   218                     trees.add(html());
   219                     if (textStart == -1) {
   220                         textStart = bp;
   221                         lastNonWhite = -1;
   222                     }
   223                     break;
   225                 case '>':
   226                     newline = false;
   227                     addPendingText(trees, bp - 1);
   228                     trees.add(m.at(bp).Erroneous(newString(bp, bp+1), diagSource, "dc.bad.gt"));
   229                     nextChar();
   230                     if (textStart == -1) {
   231                         textStart = bp;
   232                         lastNonWhite = -1;
   233                     }
   234                     break;
   236                 case '{':
   237                     inlineTag(trees);
   238                     break;
   240                 case '@':
   241                     if (newline) {
   242                         addPendingText(trees, lastNonWhite);
   243                         break loop;
   244                     }
   245                     // fallthrough
   247                 default:
   248                     newline = false;
   249                     if (textStart == -1)
   250                         textStart = bp;
   251                     lastNonWhite = bp;
   252                     nextChar();
   253             }
   254         }
   256         if (lastNonWhite != -1)
   257             addPendingText(trees, lastNonWhite);
   259         return trees.toList();
   260     }
   262     /**
   263      * Read a series of block tags, including their content.
   264      * Standard tags parse their content appropriately.
   265      * Non-standard tags are represented by {@link UnknownBlockTag}.
   266      */
   267     protected List<DCTree> blockTags() {
   268         ListBuffer<DCTree> tags = new ListBuffer<DCTree>();
   269         while (ch == '@')
   270             tags.add(blockTag());
   271         return tags.toList();
   272     }
   274     /**
   275      * Read a single block tag, including its content.
   276      * Standard tags parse their content appropriately.
   277      * Non-standard tags are represented by {@link UnknownBlockTag}.
   278      */
   279     protected DCTree blockTag() {
   280         int p = bp;
   281         try {
   282             nextChar();
   283             if (isIdentifierStart(ch)) {
   284                 Name name = readTagName();
   285                 TagParser tp = tagParsers.get(name);
   286                 if (tp == null) {
   287                     List<DCTree> content = blockContent();
   288                     return m.at(p).UnknownBlockTag(name, content);
   289                 } else {
   290                     switch (tp.getKind()) {
   291                         case BLOCK:
   292                             return tp.parse(p);
   293                         case INLINE:
   294                             return erroneous("dc.bad.inline.tag", p);
   295                     }
   296                 }
   297             }
   298             blockContent();
   300             return erroneous("dc.no.tag.name", p);
   301         } catch (ParseException e) {
   302             blockContent();
   303             return erroneous(e.getMessage(), p);
   304         }
   305     }
   307     protected void inlineTag(ListBuffer<DCTree> list) {
   308         newline = false;
   309         nextChar();
   310         if (ch == '@') {
   311             addPendingText(list, bp - 2);
   312             list.add(inlineTag());
   313             textStart = bp;
   314             lastNonWhite = -1;
   315         } else {
   316             if (textStart == -1)
   317                 textStart = bp - 1;
   318             lastNonWhite = bp;
   319         }
   320     }
   322     /**
   323      * Read a single inline tag, including its content.
   324      * Standard tags parse their content appropriately.
   325      * Non-standard tags are represented by {@link UnknownBlockTag}.
   326      * Malformed tags may be returned as {@link Erroneous}.
   327      */
   328     protected DCTree inlineTag() {
   329         int p = bp - 1;
   330         try {
   331             nextChar();
   332             if (isIdentifierStart(ch)) {
   333                 Name name = readTagName();
   334                 skipWhitespace();
   336                 TagParser tp = tagParsers.get(name);
   337                 if (tp == null) {
   338                     DCTree text = inlineText();
   339                     if (text != null) {
   340                         nextChar();
   341                         return m.at(p).UnknownInlineTag(name, List.of(text)).setEndPos(bp);
   342                     }
   343                 } else if (tp.getKind() == TagParser.Kind.INLINE) {
   344                     DCEndPosTree<?> tree = (DCEndPosTree<?>) tp.parse(p);
   345                     if (tree != null) {
   346                         return tree.setEndPos(bp);
   347                     }
   348                 } else {
   349                     inlineText(); // skip content
   350                     nextChar();
   351                 }
   352             }
   353             return erroneous("dc.no.tag.name", p);
   354         } catch (ParseException e) {
   355             return erroneous(e.getMessage(), p);
   356         }
   357     }
   359     /**
   360      * Read plain text content of an inline tag.
   361      * Matching pairs of { } are skipped; the text is terminated by the first
   362      * unmatched }. It is an error if the beginning of the next tag is detected.
   363      */
   364     protected DCTree inlineText() throws ParseException {
   365         skipWhitespace();
   366         int pos = bp;
   367         int depth = 1;
   369         loop:
   370         while (bp < buflen) {
   371             switch (ch) {
   372                 case '\n': case '\r': case '\f':
   373                     newline = true;
   374                     break;
   376                 case ' ': case '\t':
   377                     break;
   379                 case '{':
   380                     newline = false;
   381                     lastNonWhite = bp;
   382                     depth++;
   383                     break;
   385                 case '}':
   386                     if (--depth == 0) {
   387                         return m.at(pos).Text(newString(pos, bp));
   388                     }
   389                     newline = false;
   390                     lastNonWhite = bp;
   391                     break;
   393                 case '@':
   394                     if (newline)
   395                         break loop;
   396                     newline = false;
   397                     lastNonWhite = bp;
   398                     break;
   400                 default:
   401                     newline = false;
   402                     lastNonWhite = bp;
   403                     break;
   404             }
   405             nextChar();
   406         }
   407         throw new ParseException("dc.unterminated.inline.tag");
   408     }
   410     /**
   411      * Read Java class name, possibly followed by member
   412      * Matching pairs of < > are skipped. The text is terminated by the first
   413      * unmatched }. It is an error if the beginning of the next tag is detected.
   414      */
   415     // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE
   416     // TODO: improve quality of parse to forbid bad constructions.
   417     @SuppressWarnings("fallthrough")
   418     protected DCReference reference(boolean allowMember) throws ParseException {
   419         int pos = bp;
   420         int depth = 0;
   422         // scan to find the end of the signature, by looking for the first
   423         // whitespace not enclosed in () or <>, or the end of the tag
   424         loop:
   425         while (bp < buflen) {
   426             switch (ch) {
   427                 case '\n': case '\r': case '\f':
   428                     newline = true;
   429                     // fallthrough
   431                 case ' ': case '\t':
   432                     if (depth == 0)
   433                         break loop;
   434                     break;
   436                 case '(':
   437                 case '<':
   438                     newline = false;
   439                     depth++;
   440                     break;
   442                 case ')':
   443                 case '>':
   444                     newline = false;
   445                     --depth;
   446                     break;
   448                 case '}':
   449                     if (bp == pos)
   450                         return null;
   451                     newline = false;
   452                     break loop;
   454                 case '@':
   455                     if (newline)
   456                         break loop;
   457                     // fallthrough
   459                 default:
   460                     newline = false;
   462             }
   463             nextChar();
   464         }
   466         if (depth != 0)
   467             throw new ParseException("dc.unterminated.signature");
   469         String sig = newString(pos, bp);
   471         // Break sig apart into qualifiedExpr member paramTypes.
   472         JCTree qualExpr;
   473         Name member;
   474         List<JCTree> paramTypes;
   476         Log.DeferredDiagnosticHandler deferredDiagnosticHandler
   477                 = new Log.DeferredDiagnosticHandler(fac.log);
   479         try {
   480             int hash = sig.indexOf("#");
   481             int lparen = sig.indexOf("(", hash + 1);
   482             if (hash == -1) {
   483                 if (lparen == -1) {
   484                     qualExpr = parseType(sig);
   485                     member = null;
   486                 } else {
   487                     qualExpr = null;
   488                     member = parseMember(sig.substring(0, lparen));
   489                 }
   490             } else {
   491                 qualExpr = (hash == 0) ? null : parseType(sig.substring(0, hash));
   492                 if (lparen == -1)
   493                     member = parseMember(sig.substring(hash + 1));
   494                 else
   495                     member = parseMember(sig.substring(hash + 1, lparen));
   496             }
   498             if (lparen < 0) {
   499                 paramTypes = null;
   500             } else {
   501                 int rparen = sig.indexOf(")", lparen);
   502                 if (rparen != sig.length() - 1)
   503                     throw new ParseException("dc.ref.bad.parens");
   504                 paramTypes = parseParams(sig.substring(lparen + 1, rparen));
   505             }
   507             if (!deferredDiagnosticHandler.getDiagnostics().isEmpty())
   508                 throw new ParseException("dc.ref.syntax.error");
   510         } finally {
   511             fac.log.popDiagnosticHandler(deferredDiagnosticHandler);
   512         }
   514         return m.at(pos).Reference(sig, qualExpr, member, paramTypes).setEndPos(bp);
   515     }
   517     JCTree parseType(String s) throws ParseException {
   518         JavacParser p = fac.newParser(s, false, false, false);
   519         JCTree tree = p.parseType();
   520         if (p.token().kind != TokenKind.EOF)
   521             throw new ParseException("dc.ref.unexpected.input");
   522         return tree;
   523     }
   525     Name parseMember(String s) throws ParseException {
   526         JavacParser p = fac.newParser(s, false, false, false);
   527         Name name = p.ident();
   528         if (p.token().kind != TokenKind.EOF)
   529             throw new ParseException("dc.ref.unexpected.input");
   530         return name;
   531     }
   533     List<JCTree> parseParams(String s) throws ParseException {
   534         if (s.trim().isEmpty())
   535             return List.nil();
   537         JavacParser p = fac.newParser(s.replace("...", "[]"), false, false, false);
   538         ListBuffer<JCTree> paramTypes = new ListBuffer<JCTree>();
   539         paramTypes.add(p.parseType());
   541         if (p.token().kind == TokenKind.IDENTIFIER)
   542             p.nextToken();
   544         while (p.token().kind == TokenKind.COMMA) {
   545             p.nextToken();
   546             paramTypes.add(p.parseType());
   548             if (p.token().kind == TokenKind.IDENTIFIER)
   549                 p.nextToken();
   550         }
   552         if (p.token().kind != TokenKind.EOF)
   553             throw new ParseException("dc.ref.unexpected.input");
   555         return paramTypes.toList();
   556     }
   558     /**
   559      * Read Java identifier
   560      * Matching pairs of { } are skipped; the text is terminated by the first
   561      * unmatched }. It is an error if the beginning of the next tag is detected.
   562      */
   563     @SuppressWarnings("fallthrough")
   564     protected DCIdentifier identifier() throws ParseException {
   565         skipWhitespace();
   566         int pos = bp;
   568         if (isJavaIdentifierStart(ch)) {
   569             Name name = readJavaIdentifier();
   570             return m.at(pos).Identifier(name);
   571         }
   573         throw new ParseException("dc.identifier.expected");
   574     }
   576     /**
   577      * Read a quoted string.
   578      * It is an error if the beginning of the next tag is detected.
   579      */
   580     @SuppressWarnings("fallthrough")
   581     protected DCText quotedString() {
   582         int pos = bp;
   583         nextChar();
   585         loop:
   586         while (bp < buflen) {
   587             switch (ch) {
   588                 case '\n': case '\r': case '\f':
   589                     newline = true;
   590                     break;
   592                 case ' ': case '\t':
   593                     break;
   595                 case '"':
   596                     nextChar();
   597                     // trim trailing white-space?
   598                     return m.at(pos).Text(newString(pos, bp));
   600                 case '@':
   601                     if (newline)
   602                         break loop;
   604             }
   605             nextChar();
   606         }
   607         return null;
   608     }
   610     /**
   611      * Read general text content of an inline tag, including HTML entities and elements.
   612      * Matching pairs of { } are skipped; the text is terminated by the first
   613      * unmatched }. It is an error if the beginning of the next tag is detected.
   614      */
   615     @SuppressWarnings("fallthrough")
   616     protected List<DCTree> inlineContent() {
   617         ListBuffer<DCTree> trees = new ListBuffer<DCTree>();
   619         skipWhitespace();
   620         int pos = bp;
   621         int depth = 1;
   622         textStart = -1;
   624         loop:
   625         while (bp < buflen) {
   627             switch (ch) {
   628                 case '\n': case '\r': case '\f':
   629                     newline = true;
   630                     // fall through
   632                 case ' ': case '\t':
   633                     nextChar();
   634                     break;
   636                 case '&':
   637                     entity(trees);
   638                     break;
   640                 case '<':
   641                     newline = false;
   642                     addPendingText(trees, bp - 1);
   643                     trees.add(html());
   644                     break;
   646                 case '{':
   647                     newline = false;
   648                     depth++;
   649                     nextChar();
   650                     break;
   652                 case '}':
   653                     newline = false;
   654                     if (--depth == 0) {
   655                         addPendingText(trees, bp - 1);
   656                         nextChar();
   657                         return trees.toList();
   658                     }
   659                     nextChar();
   660                     break;
   662                 case '@':
   663                     if (newline)
   664                         break loop;
   665                     // fallthrough
   667                 default:
   668                     if (textStart == -1)
   669                         textStart = bp;
   670                     nextChar();
   671                     break;
   672             }
   673         }
   675         return List.<DCTree>of(erroneous("dc.unterminated.inline.tag", pos));
   676     }
   678     protected void entity(ListBuffer<DCTree> list) {
   679         newline = false;
   680         addPendingText(list, bp - 1);
   681         list.add(entity());
   682         if (textStart == -1) {
   683             textStart = bp;
   684             lastNonWhite = -1;
   685         }
   686     }
   688     /**
   689      * Read an HTML entity.
   690      * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; }
   691      */
   692     protected DCTree entity() {
   693         int p = bp;
   694         nextChar();
   695         Name name = null;
   696         boolean checkSemi = false;
   697         if (ch == '#') {
   698             int namep = bp;
   699             nextChar();
   700             if (isDecimalDigit(ch)) {
   701                 nextChar();
   702                 while (isDecimalDigit(ch))
   703                     nextChar();
   704                 name = names.fromChars(buf, namep, bp - namep);
   705             } else if (ch == 'x' || ch == 'X') {
   706                 nextChar();
   707                 if (isHexDigit(ch)) {
   708                     nextChar();
   709                     while (isHexDigit(ch))
   710                         nextChar();
   711                     name = names.fromChars(buf, namep, bp - namep);
   712                 }
   713             }
   714         } else if (isIdentifierStart(ch)) {
   715             name = readIdentifier();
   716         }
   718         if (name == null)
   719             return erroneous("dc.bad.entity", p);
   720         else {
   721             if (ch != ';')
   722                 return erroneous("dc.missing.semicolon", p);
   723             nextChar();
   724             return m.at(p).Entity(name);
   725         }
   726     }
   728     /**
   729      * Read the start or end of an HTML tag, or an HTML comment
   730      * {@literal <identifier attrs> } or {@literal </identifier> }
   731      */
   732     protected DCTree html() {
   733         int p = bp;
   734         nextChar();
   735         if (isIdentifierStart(ch)) {
   736             Name name = readIdentifier();
   737             List<DCTree> attrs = htmlAttrs();
   738             if (attrs != null) {
   739                 boolean selfClosing = false;
   740                 if (ch == '/') {
   741                     nextChar();
   742                     selfClosing = true;
   743                 }
   744                 if (ch == '>') {
   745                     nextChar();
   746                     return m.at(p).StartElement(name, attrs, selfClosing).setEndPos(bp);
   747                 }
   748             }
   749         } else if (ch == '/') {
   750             nextChar();
   751             if (isIdentifierStart(ch)) {
   752                 Name name = readIdentifier();
   753                 skipWhitespace();
   754                 if (ch == '>') {
   755                     nextChar();
   756                     return m.at(p).EndElement(name);
   757                 }
   758             }
   759         } else if (ch == '!') {
   760             nextChar();
   761             if (ch == '-') {
   762                 nextChar();
   763                 if (ch == '-') {
   764                     nextChar();
   765                     while (bp < buflen) {
   766                         int dash = 0;
   767                         while (ch == '-') {
   768                             dash++;
   769                             nextChar();
   770                         }
   771                         // strictly speaking, a comment should not contain "--"
   772                         // so dash > 2 is an error, dash == 2 implies ch == '>'
   773                         if (dash >= 2 && ch == '>') {
   774                             nextChar();
   775                             return m.at(p).Comment(newString(p, bp));
   776                         }
   778                         nextChar();
   779                     }
   780                 }
   781             }
   782         }
   784         bp = p + 1;
   785         ch = buf[bp];
   786         return erroneous("dc.malformed.html", p);
   787     }
   789     /**
   790      * Read a series of HTML attributes, terminated by {@literal > }.
   791      * Each attribute is of the form {@literal identifier[=value] }.
   792      * "value" may be unquoted, single-quoted, or double-quoted.
   793      */
   794     protected List<DCTree> htmlAttrs() {
   795         ListBuffer<DCTree> attrs = new ListBuffer<DCTree>();
   796         skipWhitespace();
   798         loop:
   799         while (isIdentifierStart(ch)) {
   800             int namePos = bp;
   801             Name name = readIdentifier();
   802             skipWhitespace();
   803             List<DCTree> value = null;
   804             ValueKind vkind = ValueKind.EMPTY;
   805             if (ch == '=') {
   806                 ListBuffer<DCTree> v = new ListBuffer<DCTree>();
   807                 nextChar();
   808                 skipWhitespace();
   809                 if (ch == '\'' || ch == '"') {
   810                     vkind = (ch == '\'') ? ValueKind.SINGLE : ValueKind.DOUBLE;
   811                     char quote = ch;
   812                     nextChar();
   813                     textStart = bp;
   814                     while (bp < buflen && ch != quote) {
   815                         if (newline && ch == '@') {
   816                             attrs.add(erroneous("dc.unterminated.string", namePos));
   817                             // No point trying to read more.
   818                             // In fact, all attrs get discarded by the caller
   819                             // and superseded by a malformed.html node because
   820                             // the html tag itself is not terminated correctly.
   821                             break loop;
   822                         }
   823                         attrValueChar(v);
   824                     }
   825                     addPendingText(v, bp - 1);
   826                     nextChar();
   827                 } else {
   828                     vkind = ValueKind.UNQUOTED;
   829                     textStart = bp;
   830                     while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) {
   831                         attrValueChar(v);
   832                     }
   833                     addPendingText(v, bp - 1);
   834                 }
   835                 skipWhitespace();
   836                 value = v.toList();
   837             }
   838             DCAttribute attr = m.at(namePos).Attribute(name, vkind, value);
   839             attrs.add(attr);
   840         }
   842         return attrs.toList();
   843     }
   845     protected void attrValueChar(ListBuffer<DCTree> list) {
   846         switch (ch) {
   847             case '&':
   848                 entity(list);
   849                 break;
   851             case '{':
   852                 inlineTag(list);
   853                 break;
   855             default:
   856                 nextChar();
   857         }
   858     }
   860     protected void addPendingText(ListBuffer<DCTree> list, int textEnd) {
   861         if (textStart != -1) {
   862             if (textStart <= textEnd) {
   863                 list.add(m.at(textStart).Text(newString(textStart, textEnd + 1)));
   864             }
   865             textStart = -1;
   866         }
   867     }
   869     protected DCErroneous erroneous(String code, int pos) {
   870         int i = bp - 1;
   871         loop:
   872         while (i > pos) {
   873             switch (buf[i]) {
   874                 case '\f': case '\n': case '\r':
   875                     newline = true;
   876                     break;
   877                 case '\t': case ' ':
   878                     break;
   879                 default:
   880                     break loop;
   881             }
   882             i--;
   883         }
   884         textStart = -1;
   885         return m.at(pos).Erroneous(newString(pos, i + 1), diagSource, code);
   886     }
   888     @SuppressWarnings("unchecked")
   889     <T> T getFirst(List<T>... lists) {
   890         for (List<T> list: lists) {
   891             if (list.nonEmpty())
   892                 return list.head;
   893         }
   894         return null;
   895     }
   897     protected boolean isIdentifierStart(char ch) {
   898         return Character.isUnicodeIdentifierStart(ch);
   899     }
   901     protected Name readIdentifier() {
   902         int start = bp;
   903         nextChar();
   904         while (bp < buflen && Character.isUnicodeIdentifierPart(ch))
   905             nextChar();
   906         return names.fromChars(buf, start, bp - start);
   907     }
   909     protected Name readTagName() {
   910         int start = bp;
   911         nextChar();
   912         while (bp < buflen && (Character.isUnicodeIdentifierPart(ch) || ch == '.'))
   913             nextChar();
   914         return names.fromChars(buf, start, bp - start);
   915     }
   917     protected boolean isJavaIdentifierStart(char ch) {
   918         return Character.isJavaIdentifierStart(ch);
   919     }
   921     protected Name readJavaIdentifier() {
   922         int start = bp;
   923         nextChar();
   924         while (bp < buflen && Character.isJavaIdentifierPart(ch))
   925             nextChar();
   926         return names.fromChars(buf, start, bp - start);
   927     }
   929     protected boolean isDecimalDigit(char ch) {
   930         return ('0' <= ch && ch <= '9');
   931     }
   933     protected boolean isHexDigit(char ch) {
   934         return ('0' <= ch && ch <= '9')
   935                 || ('a' <= ch && ch <= 'f')
   936                 || ('A' <= ch && ch <= 'F');
   937     }
   939     protected boolean isUnquotedAttrValueTerminator(char ch) {
   940         switch (ch) {
   941             case '\f': case '\n': case '\r': case '\t':
   942             case ' ':
   943             case '"': case '\'': case '`':
   944             case '=': case '<': case '>':
   945                 return true;
   946             default:
   947                 return false;
   948         }
   949     }
   951     protected boolean isWhitespace(char ch) {
   952         return Character.isWhitespace(ch);
   953     }
   955     protected void skipWhitespace() {
   956         while (isWhitespace(ch))
   957             nextChar();
   958     }
   960     protected int getSentenceBreak(String s) {
   961         if (sentenceBreaker != null) {
   962             sentenceBreaker.setText(s);
   963             int i = sentenceBreaker.next();
   964             return (i == s.length()) ? -1 : i;
   965         }
   967         // scan for period followed by whitespace
   968         boolean period = false;
   969         for (int i = 0; i < s.length(); i++) {
   970             switch (s.charAt(i)) {
   971                 case '.':
   972                     period = true;
   973                     break;
   975                 case ' ':
   976                 case '\f':
   977                 case '\n':
   978                 case '\r':
   979                 case '\t':
   980                     if (period)
   981                         return i;
   982                     break;
   984                 default:
   985                     period = false;
   986                     break;
   987             }
   988         }
   989         return -1;
   990     }
   993     Set<String> htmlBlockTags = new HashSet<String>(Arrays.asList(
   994                     "h1", "h2", "h3", "h4", "h5", "h6", "p", "pre"));
   996     protected boolean isSentenceBreak(Name n) {
   997         return htmlBlockTags.contains(StringUtils.toLowerCase(n.toString()));
   998     }
  1000     protected boolean isSentenceBreak(DCTree t) {
  1001         switch (t.getKind()) {
  1002             case START_ELEMENT:
  1003                 return isSentenceBreak(((DCStartElement) t).getName());
  1005             case END_ELEMENT:
  1006                 return isSentenceBreak(((DCEndElement) t).getName());
  1008         return false;
  1011     /**
  1012      * @param start position of first character of string
  1013      * @param end position of character beyond last character to be included
  1014      */
  1015     String newString(int start, int end) {
  1016         return new String(buf, start, end - start);
  1019     static abstract class TagParser {
  1020         enum Kind { INLINE, BLOCK }
  1022         Kind kind;
  1023         DCTree.Kind treeKind;
  1025         TagParser(Kind k, DCTree.Kind tk) {
  1026             kind = k;
  1027             treeKind = tk;
  1030         Kind getKind() {
  1031             return kind;
  1034         DCTree.Kind getTreeKind() {
  1035             return treeKind;
  1038         abstract DCTree parse(int pos) throws ParseException;
  1041     /**
  1042      * @see <a href="http://docs.oracle.com/javase/7/docs/technotes/tools/solaris/javadoc.html#javadoctags">Javadoc Tags</a>
  1043      */
  1044     private void initTagParsers() {
  1045         TagParser[] parsers = {
  1046             // @author name-text
  1047             new TagParser(Kind.BLOCK, DCTree.Kind.AUTHOR) {
  1048                 public DCTree parse(int pos) {
  1049                     List<DCTree> name = blockContent();
  1050                     return m.at(pos).Author(name);
  1052             },
  1054             // {@code text}
  1055             new TagParser(Kind.INLINE, DCTree.Kind.CODE) {
  1056                 public DCTree parse(int pos) throws ParseException {
  1057                     DCTree text = inlineText();
  1058                     nextChar();
  1059                     return m.at(pos).Code((DCText) text);
  1061             },
  1063             // @deprecated deprecated-text
  1064             new TagParser(Kind.BLOCK, DCTree.Kind.DEPRECATED) {
  1065                 public DCTree parse(int pos) {
  1066                     List<DCTree> reason = blockContent();
  1067                     return m.at(pos).Deprecated(reason);
  1069             },
  1071             // {@docRoot}
  1072             new TagParser(Kind.INLINE, DCTree.Kind.DOC_ROOT) {
  1073                 public DCTree parse(int pos) throws ParseException {
  1074                     if (ch == '}') {
  1075                         nextChar();
  1076                         return m.at(pos).DocRoot();
  1078                     inlineText(); // skip unexpected content
  1079                     nextChar();
  1080                     throw new ParseException("dc.unexpected.content");
  1082             },
  1084             // @exception class-name description
  1085             new TagParser(Kind.BLOCK, DCTree.Kind.EXCEPTION) {
  1086                 public DCTree parse(int pos) throws ParseException {
  1087                     skipWhitespace();
  1088                     DCReference ref = reference(false);
  1089                     List<DCTree> description = blockContent();
  1090                     return m.at(pos).Exception(ref, description);
  1092             },
  1094             // {@inheritDoc}
  1095             new TagParser(Kind.INLINE, DCTree.Kind.INHERIT_DOC) {
  1096                 public DCTree parse(int pos) throws ParseException {
  1097                     if (ch == '}') {
  1098                         nextChar();
  1099                         return m.at(pos).InheritDoc();
  1101                     inlineText(); // skip unexpected content
  1102                     nextChar();
  1103                     throw new ParseException("dc.unexpected.content");
  1105             },
  1107             // {@link package.class#member label}
  1108             new TagParser(Kind.INLINE, DCTree.Kind.LINK) {
  1109                 public DCTree parse(int pos) throws ParseException {
  1110                     DCReference ref = reference(true);
  1111                     List<DCTree> label = inlineContent();
  1112                     return m.at(pos).Link(ref, label);
  1114             },
  1116             // {@linkplain package.class#member label}
  1117             new TagParser(Kind.INLINE, DCTree.Kind.LINK_PLAIN) {
  1118                 public DCTree parse(int pos) throws ParseException {
  1119                     DCReference ref = reference(true);
  1120                     List<DCTree> label = inlineContent();
  1121                     return m.at(pos).LinkPlain(ref, label);
  1123             },
  1125             // {@literal text}
  1126             new TagParser(Kind.INLINE, DCTree.Kind.LITERAL) {
  1127                 public DCTree parse(int pos) throws ParseException {
  1128                     DCTree text = inlineText();
  1129                     nextChar();
  1130                     return m.at(pos).Literal((DCText) text);
  1132             },
  1134             // @param parameter-name description
  1135             new TagParser(Kind.BLOCK, DCTree.Kind.PARAM) {
  1136                 public DCTree parse(int pos) throws ParseException {
  1137                     skipWhitespace();
  1139                     boolean typaram = false;
  1140                     if (ch == '<') {
  1141                         typaram = true;
  1142                         nextChar();
  1145                     DCIdentifier id = identifier();
  1147                     if (typaram) {
  1148                         if (ch != '>')
  1149                             throw new ParseException("dc.gt.expected");
  1150                         nextChar();
  1153                     skipWhitespace();
  1154                     List<DCTree> desc = blockContent();
  1155                     return m.at(pos).Param(typaram, id, desc);
  1157             },
  1159             // @return description
  1160             new TagParser(Kind.BLOCK, DCTree.Kind.RETURN) {
  1161                 public DCTree parse(int pos) {
  1162                     List<DCTree> description = blockContent();
  1163                     return m.at(pos).Return(description);
  1165             },
  1167             // @see reference | quoted-string | HTML
  1168             new TagParser(Kind.BLOCK, DCTree.Kind.SEE) {
  1169                 public DCTree parse(int pos) throws ParseException {
  1170                     skipWhitespace();
  1171                     switch (ch) {
  1172                         case '"':
  1173                             DCText string = quotedString();
  1174                             if (string != null) {
  1175                                 skipWhitespace();
  1176                                 if (ch == '@'
  1177                                         || ch == EOI && bp == buf.length - 1) {
  1178                                     return m.at(pos).See(List.<DCTree>of(string));
  1181                             break;
  1183                         case '<':
  1184                             List<DCTree> html = blockContent();
  1185                             if (html != null)
  1186                                 return m.at(pos).See(html);
  1187                             break;
  1189                         case '@':
  1190                             if (newline)
  1191                                 throw new ParseException("dc.no.content");
  1192                             break;
  1194                         case EOI:
  1195                             if (bp == buf.length - 1)
  1196                                 throw new ParseException("dc.no.content");
  1197                             break;
  1199                         default:
  1200                             if (isJavaIdentifierStart(ch) || ch == '#') {
  1201                                 DCReference ref = reference(true);
  1202                                 List<DCTree> description = blockContent();
  1203                                 return m.at(pos).See(description.prepend(ref));
  1206                     throw new ParseException("dc.unexpected.content");
  1208             },
  1210             // @serialData data-description
  1211             new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_DATA) {
  1212                 public DCTree parse(int pos) {
  1213                     List<DCTree> description = blockContent();
  1214                     return m.at(pos).SerialData(description);
  1216             },
  1218             // @serialField field-name field-type description
  1219             new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_FIELD) {
  1220                 public DCTree parse(int pos) throws ParseException {
  1221                     skipWhitespace();
  1222                     DCIdentifier name = identifier();
  1223                     skipWhitespace();
  1224                     DCReference type = reference(false);
  1225                     List<DCTree> description = null;
  1226                     if (isWhitespace(ch)) {
  1227                         skipWhitespace();
  1228                         description = blockContent();
  1230                     return m.at(pos).SerialField(name, type, description);
  1232             },
  1234             // @serial field-description | include | exclude
  1235             new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL) {
  1236                 public DCTree parse(int pos) {
  1237                     List<DCTree> description = blockContent();
  1238                     return m.at(pos).Serial(description);
  1240             },
  1242             // @since since-text
  1243             new TagParser(Kind.BLOCK, DCTree.Kind.SINCE) {
  1244                 public DCTree parse(int pos) {
  1245                     List<DCTree> description = blockContent();
  1246                     return m.at(pos).Since(description);
  1248             },
  1250             // @throws class-name description
  1251             new TagParser(Kind.BLOCK, DCTree.Kind.THROWS) {
  1252                 public DCTree parse(int pos) throws ParseException {
  1253                     skipWhitespace();
  1254                     DCReference ref = reference(false);
  1255                     List<DCTree> description = blockContent();
  1256                     return m.at(pos).Throws(ref, description);
  1258             },
  1260             // {@value package.class#field}
  1261             new TagParser(Kind.INLINE, DCTree.Kind.VALUE) {
  1262                 public DCTree parse(int pos) throws ParseException {
  1263                     DCReference ref = reference(true);
  1264                     skipWhitespace();
  1265                     if (ch == '}') {
  1266                         nextChar();
  1267                         return m.at(pos).Value(ref);
  1269                     nextChar();
  1270                     throw new ParseException("dc.unexpected.content");
  1272             },
  1274             // @version version-text
  1275             new TagParser(Kind.BLOCK, DCTree.Kind.VERSION) {
  1276                 public DCTree parse(int pos) {
  1277                     List<DCTree> description = blockContent();
  1278                     return m.at(pos).Version(description);
  1280             },
  1281         };
  1283         tagParsers = new HashMap<Name,TagParser>();
  1284         for (TagParser p: parsers)
  1285             tagParsers.put(names.fromString(p.getTreeKind().tagName), p);

mercurial