src/share/classes/com/sun/tools/javac/parser/DocCommentParser.java

Sun, 17 Feb 2013 16:44:55 -0500

author
dholmes
date
Sun, 17 Feb 2013 16:44:55 -0500
changeset 1571
af8417e590f4
parent 1529
950d8195a5a4
child 1704
ed918a442b83
permissions
-rw-r--r--

Merge

     1 /*
     2  * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.tools.javac.parser;
    28 import java.text.BreakIterator;
    29 import java.util.Arrays;
    30 import java.util.HashMap;
    31 import java.util.HashSet;
    32 import java.util.Locale;
    33 import java.util.Map;
    34 import java.util.Set;
    36 import com.sun.source.doctree.AttributeTree.ValueKind;
    37 import com.sun.tools.javac.parser.DocCommentParser.TagParser.Kind;
    38 import com.sun.tools.javac.parser.Tokens.Comment;
    39 import com.sun.tools.javac.parser.Tokens.TokenKind;
    40 import com.sun.tools.javac.tree.DCTree;
    41 import com.sun.tools.javac.tree.DCTree.DCAttribute;
    42 import com.sun.tools.javac.tree.DCTree.DCDocComment;
    43 import com.sun.tools.javac.tree.DCTree.DCEndElement;
    44 import com.sun.tools.javac.tree.DCTree.DCErroneous;
    45 import com.sun.tools.javac.tree.DCTree.DCIdentifier;
    46 import com.sun.tools.javac.tree.DCTree.DCReference;
    47 import com.sun.tools.javac.tree.DCTree.DCStartElement;
    48 import com.sun.tools.javac.tree.DCTree.DCText;
    49 import com.sun.tools.javac.tree.DocTreeMaker;
    50 import com.sun.tools.javac.tree.JCTree;
    51 import com.sun.tools.javac.util.DiagnosticSource;
    52 import com.sun.tools.javac.util.List;
    53 import com.sun.tools.javac.util.ListBuffer;
    54 import com.sun.tools.javac.util.Log;
    55 import com.sun.tools.javac.util.Name;
    56 import com.sun.tools.javac.util.Names;
    57 import com.sun.tools.javac.util.Options;
    58 import com.sun.tools.javac.util.Position;
    59 import static com.sun.tools.javac.util.LayoutCharacters.*;
    61 /**
    62  *
    63  *  <p><b>This is NOT part of any supported API.
    64  *  If you write code that depends on this, you do so at your own risk.
    65  *  This code and its internal interfaces are subject to change or
    66  *  deletion without notice.</b>
    67  */
    68 public class DocCommentParser {
    69     static class ParseException extends Exception {
    70         private static final long serialVersionUID = 0;
    71         ParseException(String key) {
    72             super(key);
    73         }
    74     }
    76     final ParserFactory fac;
    77     final DiagnosticSource diagSource;
    78     final Comment comment;
    79     final DocTreeMaker m;
    80     final Names names;
    82     BreakIterator sentenceBreaker;
    84     /** The input buffer, index of most recent character read,
    85      *  index of one past last character in buffer.
    86      */
    87     protected char[] buf;
    88     protected int bp;
    89     protected int buflen;
    91     /** The current character.
    92      */
    93     protected char ch;
    95     int textStart = -1;
    96     int lastNonWhite = -1;
    97     boolean newline = true;
    99     Map<Name, TagParser> tagParsers;
   101     DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) {
   102         this.fac = fac;
   103         this.diagSource = diagSource;
   104         this.comment = comment;
   105         names = fac.names;
   106         m = fac.docTreeMaker;
   108         Locale locale = (fac.locale == null) ? Locale.getDefault() : fac.locale;
   110         Options options = fac.options;
   111         boolean useBreakIterator = options.isSet("breakIterator");
   112         if (useBreakIterator || !locale.getLanguage().equals(Locale.ENGLISH.getLanguage()))
   113             sentenceBreaker = BreakIterator.getSentenceInstance(locale);
   115         initTagParsers();
   116     }
   118     DCDocComment parse() {
   119         String c = comment.getText();
   120         buf = new char[c.length() + 1];
   121         c.getChars(0, c.length(), buf, 0);
   122         buf[buf.length - 1] = EOI;
   123         buflen = buf.length - 1;
   124         bp = -1;
   125         nextChar();
   127         List<DCTree> body = blockContent();
   128         List<DCTree> tags = blockTags();
   130         // split body into first sentence and body
   131         ListBuffer<DCTree> fs = new ListBuffer<DCTree>();
   132         loop:
   133         for (; body.nonEmpty(); body = body.tail) {
   134             DCTree t = body.head;
   135             switch (t.getKind()) {
   136                 case TEXT:
   137                     String s = ((DCText) t).getBody();
   138                     int i = getSentenceBreak(s);
   139                     if (i > 0) {
   140                         int i0 = i;
   141                         while (i0 > 0 && isWhitespace(s.charAt(i0 - 1)))
   142                             i0--;
   143                         fs.add(m.at(t.pos).Text(s.substring(0, i0)));
   144                         int i1 = i;
   145                         while (i1 < s.length() && isWhitespace(s.charAt(i1)))
   146                             i1++;
   147                         body = body.tail;
   148                         if (i1 < s.length())
   149                             body = body.prepend(m.at(t.pos + i1).Text(s.substring(i1)));
   150                         break loop;
   151                     } else if (body.tail.nonEmpty()) {
   152                         if (isSentenceBreak(body.tail.head)) {
   153                             int i0 = s.length() - 1;
   154                             while (i0 > 0 && isWhitespace(s.charAt(i0)))
   155                                 i0--;
   156                             fs.add(m.at(t.pos).Text(s.substring(0, i0 + 1)));
   157                             body = body.tail;
   158                             break loop;
   159                         }
   160                     }
   161                     break;
   163                 case START_ELEMENT:
   164                 case END_ELEMENT:
   165                     if (isSentenceBreak(t))
   166                         break loop;
   167                     break;
   168             }
   169             fs.add(t);
   170         }
   172         @SuppressWarnings("unchecked")
   173         DCTree first = getFirst(fs.toList(), body, tags);
   174         int pos = (first == null) ? Position.NOPOS : first.pos;
   176         DCDocComment dc = m.at(pos).DocComment(comment, fs.toList(), body, tags);
   177         return dc;
   178     }
   180     void nextChar() {
   181         ch = buf[bp < buflen ? ++bp : buflen];
   182         switch (ch) {
   183             case '\f': case '\n': case '\r':
   184                 newline = true;
   185         }
   186     }
   188     /**
   189      * Read block content, consisting of text, html and inline tags.
   190      * Terminated by the end of input, or the beginning of the next block tag:
   191      * i.e. @ as the first non-whitespace character on a line.
   192      */
   193     @SuppressWarnings("fallthrough")
   194     protected List<DCTree> blockContent() {
   195         ListBuffer<DCTree> trees = new ListBuffer<DCTree>();
   196         textStart = -1;
   198         loop:
   199         while (bp < buflen) {
   200             switch (ch) {
   201                 case '\n': case '\r': case '\f':
   202                     newline = true;
   203                     // fallthrough
   205                 case ' ': case '\t':
   206                     nextChar();
   207                     break;
   209                 case '&':
   210                     entity(trees);
   211                     break;
   213                 case '<':
   214                     newline = false;
   215                     addPendingText(trees, bp - 1);
   216                     trees.add(html());
   217                     if (textStart == -1) {
   218                         textStart = bp;
   219                         lastNonWhite = -1;
   220                     }
   221                     break;
   223                 case '>':
   224                     newline = false;
   225                     addPendingText(trees, bp - 1);
   226                     trees.add(m.at(bp).Erroneous(newString(bp, bp+1), diagSource, "dc.bad.gt"));
   227                     nextChar();
   228                     if (textStart == -1) {
   229                         textStart = bp;
   230                         lastNonWhite = -1;
   231                     }
   232                     break;
   234                 case '{':
   235                     inlineTag(trees);
   236                     break;
   238                 case '@':
   239                     if (newline) {
   240                         addPendingText(trees, lastNonWhite);
   241                         break loop;
   242                     }
   243                     // fallthrough
   245                 default:
   246                     newline = false;
   247                     if (textStart == -1)
   248                         textStart = bp;
   249                     lastNonWhite = bp;
   250                     nextChar();
   251             }
   252         }
   254         if (lastNonWhite != -1)
   255             addPendingText(trees, lastNonWhite);
   257         return trees.toList();
   258     }
   260     /**
   261      * Read a series of block tags, including their content.
   262      * Standard tags parse their content appropriately.
   263      * Non-standard tags are represented by {@link UnknownBlockTag}.
   264      */
   265     protected List<DCTree> blockTags() {
   266         ListBuffer<DCTree> tags = new ListBuffer<DCTree>();
   267         while (ch == '@')
   268             tags.add(blockTag());
   269         return tags.toList();
   270     }
   272     /**
   273      * Read a single block tag, including its content.
   274      * Standard tags parse their content appropriately.
   275      * Non-standard tags are represented by {@link UnknownBlockTag}.
   276      */
   277     protected DCTree blockTag() {
   278         int p = bp;
   279         try {
   280             nextChar();
   281             if (isIdentifierStart(ch)) {
   282                 Name name = readIdentifier();
   283                 TagParser tp = tagParsers.get(name);
   284                 if (tp == null) {
   285                     List<DCTree> content = blockContent();
   286                     return m.at(p).UnknownBlockTag(name, content);
   287                 } else {
   288                     switch (tp.getKind()) {
   289                         case BLOCK:
   290                             return tp.parse(p);
   291                         case INLINE:
   292                             return erroneous("dc.bad.inline.tag", p);
   293                     }
   294                 }
   295             }
   296             blockContent();
   298             return erroneous("dc.no.tag.name", p);
   299         } catch (ParseException e) {
   300             blockContent();
   301             return erroneous(e.getMessage(), p);
   302         }
   303     }
   305     protected void inlineTag(ListBuffer<DCTree> list) {
   306         newline = false;
   307         nextChar();
   308         if (ch == '@') {
   309             addPendingText(list, bp - 2);
   310             list.add(inlineTag());
   311             textStart = bp;
   312             lastNonWhite = -1;
   313         } else {
   314             if (textStart == -1)
   315                 textStart = bp - 1;
   316             lastNonWhite = bp;
   317         }
   318     }
   320     /**
   321      * Read a single inline tag, including its content.
   322      * Standard tags parse their content appropriately.
   323      * Non-standard tags are represented by {@link UnknownBlockTag}.
   324      * Malformed tags may be returned as {@link Erroneous}.
   325      */
   326     protected DCTree inlineTag() {
   327         int p = bp - 1;
   328         try {
   329             nextChar();
   330             if (isIdentifierStart(ch)) {
   331                 Name name = readIdentifier();
   332                 skipWhitespace();
   334                 TagParser tp = tagParsers.get(name);
   335                 if (tp == null) {
   336                     DCTree text = inlineText();
   337                     if (text != null) {
   338                         nextChar();
   339                         return m.at(p).UnknownInlineTag(name, List.of(text));
   340                     }
   341                 } else if (tp.getKind() == TagParser.Kind.INLINE) {
   342                     DCTree tree =  tp.parse(p);
   343                     if (tree != null) {
   344                         return tree;
   345                     }
   346                 } else {
   347                     inlineText(); // skip content
   348                     nextChar();
   349                 }
   350             }
   351             return erroneous("dc.no.tag.name", p);
   352         } catch (ParseException e) {
   353             return erroneous(e.getMessage(), p);
   354         }
   355     }
   357     /**
   358      * Read plain text content of an inline tag.
   359      * Matching pairs of { } are skipped; the text is terminated by the first
   360      * unmatched }. It is an error if the beginning of the next tag is detected.
   361      */
   362     protected DCTree inlineText() throws ParseException {
   363         skipWhitespace();
   364         int pos = bp;
   365         int depth = 1;
   367         loop:
   368         while (bp < buflen) {
   369             switch (ch) {
   370                 case '\n': case '\r': case '\f':
   371                     newline = true;
   372                     break;
   374                 case ' ': case '\t':
   375                     break;
   377                 case '{':
   378                     newline = false;
   379                     lastNonWhite = bp;
   380                     depth++;
   381                     break;
   383                 case '}':
   384                     if (--depth == 0) {
   385                         return m.at(pos).Text(newString(pos, bp));
   386                     }
   387                     newline = false;
   388                     lastNonWhite = bp;
   389                     break;
   391                 case '@':
   392                     if (newline)
   393                         break loop;
   394                     newline = false;
   395                     lastNonWhite = bp;
   396                     break;
   398                 default:
   399                     newline = false;
   400                     lastNonWhite = bp;
   401                     break;
   402             }
   403             nextChar();
   404         }
   405         throw new ParseException("dc.unterminated.inline.tag");
   406     }
   408     /**
   409      * Read Java class name, possibly followed by member
   410      * Matching pairs of < > are skipped. The text is terminated by the first
   411      * unmatched }. It is an error if the beginning of the next tag is detected.
   412      */
   413     // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE
   414     // TODO: improve quality of parse to forbid bad constructions.
   415     @SuppressWarnings("fallthrough")
   416     protected DCReference reference(boolean allowMember) throws ParseException {
   417         int pos = bp;
   418         int depth = 0;
   420         // scan to find the end of the signature, by looking for the first
   421         // whitespace not enclosed in () or <>, or the end of the tag
   422         loop:
   423         while (bp < buflen) {
   424             switch (ch) {
   425                 case '\n': case '\r': case '\f':
   426                     newline = true;
   427                     // fallthrough
   429                 case ' ': case '\t':
   430                     if (depth == 0)
   431                         break loop;
   432                     break;
   434                 case '(':
   435                 case '<':
   436                     newline = false;
   437                     depth++;
   438                     break;
   440                 case ')':
   441                 case '>':
   442                     newline = false;
   443                     --depth;
   444                     break;
   446                 case '}':
   447                     if (bp == pos)
   448                         return null;
   449                     newline = false;
   450                     break loop;
   452                 case '@':
   453                     if (newline)
   454                         break loop;
   455                     // fallthrough
   457                 default:
   458                     newline = false;
   460             }
   461             nextChar();
   462         }
   464         if (depth != 0)
   465             throw new ParseException("dc.unterminated.signature");
   467         String sig = newString(pos, bp);
   469         // Break sig apart into qualifiedExpr member paramTypes.
   470         JCTree qualExpr;
   471         Name member;
   472         List<JCTree> paramTypes;
   474         Log.DeferredDiagnosticHandler deferredDiagnosticHandler
   475                 = new Log.DeferredDiagnosticHandler(fac.log);
   477         try {
   478             int hash = sig.indexOf("#");
   479             int lparen = sig.indexOf("(", hash + 1);
   480             if (hash == -1) {
   481                 if (lparen == -1) {
   482                     qualExpr = parseType(sig);
   483                     member = null;
   484                 } else {
   485                     qualExpr = null;
   486                     member = parseMember(sig.substring(0, lparen));
   487                 }
   488             } else {
   489                 qualExpr = (hash == 0) ? null : parseType(sig.substring(0, hash));
   490                 if (lparen == -1)
   491                     member = parseMember(sig.substring(hash + 1));
   492                 else
   493                     member = parseMember(sig.substring(hash + 1, lparen));
   494             }
   496             if (lparen < 0) {
   497                 paramTypes = null;
   498             } else {
   499                 int rparen = sig.indexOf(")", lparen);
   500                 if (rparen != sig.length() - 1)
   501                     throw new ParseException("dc.ref.bad.parens");
   502                 paramTypes = parseParams(sig.substring(lparen + 1, rparen));
   503             }
   505             if (!deferredDiagnosticHandler.getDiagnostics().isEmpty())
   506                 throw new ParseException("dc.ref.syntax.error");
   508         } finally {
   509             fac.log.popDiagnosticHandler(deferredDiagnosticHandler);
   510         }
   512         return m.at(pos).Reference(sig, qualExpr, member, paramTypes);
   513     }
   515     JCTree parseType(String s) throws ParseException {
   516         JavacParser p = fac.newParser(s, false, false, false);
   517         JCTree tree = p.parseType();
   518         if (p.token().kind != TokenKind.EOF)
   519             throw new ParseException("dc.ref.unexpected.input");
   520         return tree;
   521     }
   523     Name parseMember(String s) throws ParseException {
   524         JavacParser p = fac.newParser(s, false, false, false);
   525         Name name = p.ident();
   526         if (p.token().kind != TokenKind.EOF)
   527             throw new ParseException("dc.ref.unexpected.input");
   528         return name;
   529     }
   531     List<JCTree> parseParams(String s) throws ParseException {
   532         if (s.trim().isEmpty())
   533             return List.nil();
   535         JavacParser p = fac.newParser(s.replace("...", "[]"), false, false, false);
   536         ListBuffer<JCTree> paramTypes = new ListBuffer<JCTree>();
   537         paramTypes.add(p.parseType());
   539         if (p.token().kind == TokenKind.IDENTIFIER)
   540             p.nextToken();
   542         while (p.token().kind == TokenKind.COMMA) {
   543             p.nextToken();
   544             paramTypes.add(p.parseType());
   546             if (p.token().kind == TokenKind.IDENTIFIER)
   547                 p.nextToken();
   548         }
   550         if (p.token().kind != TokenKind.EOF)
   551             throw new ParseException("dc.ref.unexpected.input");
   553         return paramTypes.toList();
   554     }
   556     /**
   557      * Read Java identifier
   558      * Matching pairs of { } are skipped; the text is terminated by the first
   559      * unmatched }. It is an error if the beginning of the next tag is detected.
   560      */
   561     @SuppressWarnings("fallthrough")
   562     protected DCIdentifier identifier() throws ParseException {
   563         skipWhitespace();
   564         int pos = bp;
   566         if (isJavaIdentifierStart(ch)) {
   567             Name name = readJavaIdentifier();
   568             return m.at(pos).Identifier(name);
   569         }
   571         throw new ParseException("dc.identifier.expected");
   572     }
   574     /**
   575      * Read a quoted string.
   576      * It is an error if the beginning of the next tag is detected.
   577      */
   578     @SuppressWarnings("fallthrough")
   579     protected DCText quotedString() {
   580         int pos = bp;
   581         nextChar();
   583         loop:
   584         while (bp < buflen) {
   585             switch (ch) {
   586                 case '\n': case '\r': case '\f':
   587                     newline = true;
   588                     break;
   590                 case ' ': case '\t':
   591                     break;
   593                 case '"':
   594                     nextChar();
   595                     // trim trailing white-space?
   596                     return m.at(pos).Text(newString(pos, bp));
   598                 case '@':
   599                     if (newline)
   600                         break loop;
   602             }
   603             nextChar();
   604         }
   605         return null;
   606     }
   608     /**
   609      * Read general text content of an inline tag, including HTML entities and elements.
   610      * Matching pairs of { } are skipped; the text is terminated by the first
   611      * unmatched }. It is an error if the beginning of the next tag is detected.
   612      */
   613     @SuppressWarnings("fallthrough")
   614     protected List<DCTree> inlineContent() {
   615         ListBuffer<DCTree> trees = new ListBuffer<DCTree>();
   617         skipWhitespace();
   618         int pos = bp;
   619         int depth = 1;
   620         textStart = -1;
   622         loop:
   623         while (bp < buflen) {
   625             switch (ch) {
   626                 case '\n': case '\r': case '\f':
   627                     newline = true;
   628                     // fall through
   630                 case ' ': case '\t':
   631                     nextChar();
   632                     break;
   634                 case '&':
   635                     entity(trees);
   636                     break;
   638                 case '<':
   639                     newline = false;
   640                     addPendingText(trees, bp - 1);
   641                     trees.add(html());
   642                     break;
   644                 case '{':
   645                     newline = false;
   646                     depth++;
   647                     nextChar();
   648                     break;
   650                 case '}':
   651                     newline = false;
   652                     if (--depth == 0) {
   653                         addPendingText(trees, bp - 1);
   654                         nextChar();
   655                         return trees.toList();
   656                     }
   657                     nextChar();
   658                     break;
   660                 case '@':
   661                     if (newline)
   662                         break loop;
   663                     // fallthrough
   665                 default:
   666                     if (textStart == -1)
   667                         textStart = bp;
   668                     nextChar();
   669                     break;
   670             }
   671         }
   673         return List.<DCTree>of(erroneous("dc.unterminated.inline.tag", pos));
   674     }
   676     protected void entity(ListBuffer<DCTree> list) {
   677         newline = false;
   678         addPendingText(list, bp - 1);
   679         list.add(entity());
   680         if (textStart == -1) {
   681             textStart = bp;
   682             lastNonWhite = -1;
   683         }
   684     }
   686     /**
   687      * Read an HTML entity.
   688      * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; }
   689      */
   690     protected DCTree entity() {
   691         int p = bp;
   692         nextChar();
   693         Name name = null;
   694         boolean checkSemi = false;
   695         if (ch == '#') {
   696             int namep = bp;
   697             nextChar();
   698             if (isDecimalDigit(ch)) {
   699                 nextChar();
   700                 while (isDecimalDigit(ch))
   701                     nextChar();
   702                 name = names.fromChars(buf, namep, bp - namep);
   703             } else if (ch == 'x' || ch == 'X') {
   704                 nextChar();
   705                 if (isHexDigit(ch)) {
   706                     nextChar();
   707                     while (isHexDigit(ch))
   708                         nextChar();
   709                     name = names.fromChars(buf, namep, bp - namep);
   710                 }
   711             }
   712         } else if (isIdentifierStart(ch)) {
   713             name = readIdentifier();
   714         }
   716         if (name == null)
   717             return erroneous("dc.bad.entity", p);
   718         else {
   719             if (ch != ';')
   720                 return erroneous("dc.missing.semicolon", p);
   721             nextChar();
   722             return m.at(p).Entity(name);
   723         }
   724     }
   726     /**
   727      * Read the start or end of an HTML tag, or an HTML comment
   728      * {@literal <identifier attrs> } or {@literal </identifier> }
   729      */
   730     protected DCTree html() {
   731         int p = bp;
   732         nextChar();
   733         if (isIdentifierStart(ch)) {
   734             Name name = readIdentifier();
   735             List<DCTree> attrs = htmlAttrs();
   736             if (attrs != null) {
   737                 boolean selfClosing = false;
   738                 if (ch == '/') {
   739                     nextChar();
   740                     selfClosing = true;
   741                 }
   742                 if (ch == '>') {
   743                     nextChar();
   744                     return m.at(p).StartElement(name, attrs, selfClosing);
   745                 }
   746             }
   747         } else if (ch == '/') {
   748             nextChar();
   749             if (isIdentifierStart(ch)) {
   750                 Name name = readIdentifier();
   751                 skipWhitespace();
   752                 if (ch == '>') {
   753                     nextChar();
   754                     return m.at(p).EndElement(name);
   755                 }
   756             }
   757         } else if (ch == '!') {
   758             nextChar();
   759             if (ch == '-') {
   760                 nextChar();
   761                 if (ch == '-') {
   762                     nextChar();
   763                     while (bp < buflen) {
   764                         int dash = 0;
   765                         while (ch == '-') {
   766                             dash++;
   767                             nextChar();
   768                         }
   769                         // strictly speaking, a comment should not contain "--"
   770                         // so dash > 2 is an error, dash == 2 implies ch == '>'
   771                         if (dash >= 2 && ch == '>') {
   772                             nextChar();
   773                             return m.at(p).Comment(newString(p, bp));
   774                         }
   776                         nextChar();
   777                     }
   778                 }
   779             }
   780         }
   782         bp = p + 1;
   783         ch = buf[bp];
   784         return erroneous("dc.malformed.html", p);
   785     }
   787     /**
   788      * Read a series of HTML attributes, terminated by {@literal > }.
   789      * Each attribute is of the form {@literal identifier[=value] }.
   790      * "value" may be unquoted, single-quoted, or double-quoted.
   791      */
   792     protected List<DCTree> htmlAttrs() {
   793         ListBuffer<DCTree> attrs = new ListBuffer<DCTree>();
   794         skipWhitespace();
   796         loop:
   797         while (isIdentifierStart(ch)) {
   798             int namePos = bp;
   799             Name name = readIdentifier();
   800             skipWhitespace();
   801             List<DCTree> value = null;
   802             ValueKind vkind = ValueKind.EMPTY;
   803             if (ch == '=') {
   804                 ListBuffer<DCTree> v = new ListBuffer<DCTree>();
   805                 nextChar();
   806                 skipWhitespace();
   807                 if (ch == '\'' || ch == '"') {
   808                     vkind = (ch == '\'') ? ValueKind.SINGLE : ValueKind.DOUBLE;
   809                     char quote = ch;
   810                     nextChar();
   811                     textStart = bp;
   812                     while (bp < buflen && ch != quote) {
   813                         if (newline && ch == '@') {
   814                             attrs.add(erroneous("dc.unterminated.string", namePos));
   815                             // No point trying to read more.
   816                             // In fact, all attrs get discarded by the caller
   817                             // and superseded by a malformed.html node because
   818                             // the html tag itself is not terminated correctly.
   819                             break loop;
   820                         }
   821                         attrValueChar(v);
   822                     }
   823                     addPendingText(v, bp - 1);
   824                     nextChar();
   825                 } else {
   826                     vkind = ValueKind.UNQUOTED;
   827                     textStart = bp;
   828                     while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) {
   829                         attrValueChar(v);
   830                     }
   831                     addPendingText(v, bp - 1);
   832                 }
   833                 skipWhitespace();
   834                 value = v.toList();
   835             }
   836             DCAttribute attr = m.at(namePos).Attribute(name, vkind, value);
   837             attrs.add(attr);
   838         }
   840         return attrs.toList();
   841     }
   843     protected void attrValueChar(ListBuffer<DCTree> list) {
   844         switch (ch) {
   845             case '&':
   846                 entity(list);
   847                 break;
   849             case '{':
   850                 inlineTag(list);
   851                 break;
   853             default:
   854                 nextChar();
   855         }
   856     }
   858     protected void addPendingText(ListBuffer<DCTree> list, int textEnd) {
   859         if (textStart != -1) {
   860             if (textStart <= textEnd) {
   861                 list.add(m.at(textStart).Text(newString(textStart, textEnd + 1)));
   862             }
   863             textStart = -1;
   864         }
   865     }
   867     protected DCErroneous erroneous(String code, int pos) {
   868         int i = bp - 1;
   869         loop:
   870         while (i > pos) {
   871             switch (buf[i]) {
   872                 case '\f': case '\n': case '\r':
   873                     newline = true;
   874                     break;
   875                 case '\t': case ' ':
   876                     break;
   877                 default:
   878                     break loop;
   879             }
   880             i--;
   881         }
   882         textStart = -1;
   883         return m.at(pos).Erroneous(newString(pos, i + 1), diagSource, code);
   884     }
   886     @SuppressWarnings("unchecked")
   887     <T> T getFirst(List<T>... lists) {
   888         for (List<T> list: lists) {
   889             if (list.nonEmpty())
   890                 return list.head;
   891         }
   892         return null;
   893     }
   895     protected boolean isIdentifierStart(char ch) {
   896         return Character.isUnicodeIdentifierStart(ch);
   897     }
   899     protected Name readIdentifier() {
   900         int start = bp;
   901         nextChar();
   902         while (bp < buflen && Character.isUnicodeIdentifierPart(ch))
   903             nextChar();
   904         return names.fromChars(buf, start, bp - start);
   905     }
   907     protected boolean isJavaIdentifierStart(char ch) {
   908         return Character.isJavaIdentifierStart(ch);
   909     }
   911     protected Name readJavaIdentifier() {
   912         int start = bp;
   913         nextChar();
   914         while (bp < buflen && Character.isJavaIdentifierPart(ch))
   915             nextChar();
   916         return names.fromChars(buf, start, bp - start);
   917     }
   919     protected boolean isDecimalDigit(char ch) {
   920         return ('0' <= ch && ch <= '9');
   921     }
   923     protected boolean isHexDigit(char ch) {
   924         return ('0' <= ch && ch <= '9')
   925                 || ('a' <= ch && ch <= 'f')
   926                 || ('A' <= ch && ch <= 'F');
   927     }
   929     protected boolean isUnquotedAttrValueTerminator(char ch) {
   930         switch (ch) {
   931             case '\f': case '\n': case '\r': case '\t':
   932             case ' ':
   933             case '"': case '\'': case '`':
   934             case '=': case '<': case '>':
   935                 return true;
   936             default:
   937                 return false;
   938         }
   939     }
   941     protected boolean isWhitespace(char ch) {
   942         return Character.isWhitespace(ch);
   943     }
   945     protected void skipWhitespace() {
   946         while (isWhitespace(ch))
   947             nextChar();
   948     }
   950     protected int getSentenceBreak(String s) {
   951         if (sentenceBreaker != null) {
   952             sentenceBreaker.setText(s);
   953             int i = sentenceBreaker.next();
   954             return (i == s.length()) ? -1 : i;
   955         }
   957         // scan for period followed by whitespace
   958         boolean period = false;
   959         for (int i = 0; i < s.length(); i++) {
   960             switch (s.charAt(i)) {
   961                 case '.':
   962                     period = true;
   963                     break;
   965                 case ' ':
   966                 case '\f':
   967                 case '\n':
   968                 case '\r':
   969                 case '\t':
   970                     if (period)
   971                         return i;
   972                     break;
   974                 default:
   975                     period = false;
   976                     break;
   977             }
   978         }
   979         return -1;
   980     }
   983     Set<String> htmlBlockTags = new HashSet<String>(Arrays.asList(
   984                     "h1", "h2", "h3", "h4", "h5", "h6", "p", "pre"));
   986     protected boolean isSentenceBreak(Name n) {
   987         return htmlBlockTags.contains(n.toString().toLowerCase());
   988     }
   990     protected boolean isSentenceBreak(DCTree t) {
   991         switch (t.getKind()) {
   992             case START_ELEMENT:
   993                 return isSentenceBreak(((DCStartElement) t).getName());
   995             case END_ELEMENT:
   996                 return isSentenceBreak(((DCEndElement) t).getName());
   997         }
   998         return false;
   999     }
  1001     /**
  1002      * @param start position of first character of string
  1003      * @param end position of character beyond last character to be included
  1004      */
  1005     String newString(int start, int end) {
  1006         return new String(buf, start, end - start);
  1009     static abstract class TagParser {
  1010         enum Kind { INLINE, BLOCK }
  1012         Kind kind;
  1013         DCTree.Kind treeKind;
  1015         TagParser(Kind k, DCTree.Kind tk) {
  1016             kind = k;
  1017             treeKind = tk;
  1020         Kind getKind() {
  1021             return kind;
  1024         DCTree.Kind getTreeKind() {
  1025             return treeKind;
  1028         abstract DCTree parse(int pos) throws ParseException;
  1031     /**
  1032      * @see <a href="http://docs.oracle.com/javase/7/docs/technotes/tools/solaris/javadoc.html#javadoctags">Javadoc Tags</a>
  1033      */
  1034     private void initTagParsers() {
  1035         TagParser[] parsers = {
  1036             // @author name-text
  1037             new TagParser(Kind.BLOCK, DCTree.Kind.AUTHOR) {
  1038                 public DCTree parse(int pos) {
  1039                     List<DCTree> name = blockContent();
  1040                     return m.at(pos).Author(name);
  1042             },
  1044             // {@code text}
  1045             new TagParser(Kind.INLINE, DCTree.Kind.CODE) {
  1046                 public DCTree parse(int pos) throws ParseException {
  1047                     DCTree text = inlineText();
  1048                     nextChar();
  1049                     return m.at(pos).Code((DCText) text);
  1051             },
  1053             // @deprecated deprecated-text
  1054             new TagParser(Kind.BLOCK, DCTree.Kind.DEPRECATED) {
  1055                 public DCTree parse(int pos) {
  1056                     List<DCTree> reason = blockContent();
  1057                     return m.at(pos).Deprecated(reason);
  1059             },
  1061             // {@docRoot}
  1062             new TagParser(Kind.INLINE, DCTree.Kind.DOC_ROOT) {
  1063                 public DCTree parse(int pos) throws ParseException {
  1064                     if (ch == '}') {
  1065                         nextChar();
  1066                         return m.at(pos).DocRoot();
  1068                     inlineText(); // skip unexpected content
  1069                     nextChar();
  1070                     throw new ParseException("dc.unexpected.content");
  1072             },
  1074             // @exception class-name description
  1075             new TagParser(Kind.BLOCK, DCTree.Kind.EXCEPTION) {
  1076                 public DCTree parse(int pos) throws ParseException {
  1077                     skipWhitespace();
  1078                     DCReference ref = reference(false);
  1079                     List<DCTree> description = blockContent();
  1080                     return m.at(pos).Exception(ref, description);
  1082             },
  1084             // {@inheritDoc}
  1085             new TagParser(Kind.INLINE, DCTree.Kind.INHERIT_DOC) {
  1086                 public DCTree parse(int pos) throws ParseException {
  1087                     if (ch == '}') {
  1088                         nextChar();
  1089                         return m.at(pos).InheritDoc();
  1091                     inlineText(); // skip unexpected content
  1092                     nextChar();
  1093                     throw new ParseException("dc.unexpected.content");
  1095             },
  1097             // {@link package.class#member label}
  1098             new TagParser(Kind.INLINE, DCTree.Kind.LINK) {
  1099                 public DCTree parse(int pos) throws ParseException {
  1100                     DCReference ref = reference(true);
  1101                     List<DCTree> label = inlineContent();
  1102                     return m.at(pos).Link(ref, label);
  1104             },
  1106             // {@linkplain package.class#member label}
  1107             new TagParser(Kind.INLINE, DCTree.Kind.LINK_PLAIN) {
  1108                 public DCTree parse(int pos) throws ParseException {
  1109                     DCReference ref = reference(true);
  1110                     List<DCTree> label = inlineContent();
  1111                     return m.at(pos).LinkPlain(ref, label);
  1113             },
  1115             // {@literal text}
  1116             new TagParser(Kind.INLINE, DCTree.Kind.LITERAL) {
  1117                 public DCTree parse(int pos) throws ParseException {
  1118                     DCTree text = inlineText();
  1119                     nextChar();
  1120                     return m.at(pos).Literal((DCText) text);
  1122             },
  1124             // @param parameter-name description
  1125             new TagParser(Kind.BLOCK, DCTree.Kind.PARAM) {
  1126                 public DCTree parse(int pos) throws ParseException {
  1127                     skipWhitespace();
  1129                     boolean typaram = false;
  1130                     if (ch == '<') {
  1131                         typaram = true;
  1132                         nextChar();
  1135                     DCIdentifier id = identifier();
  1137                     if (typaram) {
  1138                         if (ch != '>')
  1139                             throw new ParseException("dc.gt.expected");
  1140                         nextChar();
  1143                     skipWhitespace();
  1144                     List<DCTree> desc = blockContent();
  1145                     return m.at(pos).Param(typaram, id, desc);
  1147             },
  1149             // @return description
  1150             new TagParser(Kind.BLOCK, DCTree.Kind.RETURN) {
  1151                 public DCTree parse(int pos) {
  1152                     List<DCTree> description = blockContent();
  1153                     return m.at(pos).Return(description);
  1155             },
  1157             // @see reference | quoted-string | HTML
  1158             new TagParser(Kind.BLOCK, DCTree.Kind.SEE) {
  1159                 public DCTree parse(int pos) throws ParseException {
  1160                     skipWhitespace();
  1161                     switch (ch) {
  1162                         case '"':
  1163                             DCText string = quotedString();
  1164                             if (string != null) {
  1165                                 skipWhitespace();
  1166                                 if (ch == '@')
  1167                                     return m.at(pos).See(List.<DCTree>of(string));
  1169                             break;
  1171                         case '<':
  1172                             List<DCTree> html = blockContent();
  1173                             if (html != null)
  1174                                 return m.at(pos).See(html);
  1175                             break;
  1177                         case '@':
  1178                             if (newline)
  1179                                 throw new ParseException("dc.no.content");
  1180                             break;
  1182                         case EOI:
  1183                             if (bp == buf.length - 1)
  1184                                 throw new ParseException("dc.no.content");
  1185                             break;
  1187                         default:
  1188                             if (isJavaIdentifierStart(ch) || ch == '#') {
  1189                                 DCReference ref = reference(true);
  1190                                 List<DCTree> description = blockContent();
  1191                                 return m.at(pos).See(description.prepend(ref));
  1194                     throw new ParseException("dc.unexpected.content");
  1196             },
  1198             // @serialData data-description
  1199             new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_DATA) {
  1200                 public DCTree parse(int pos) {
  1201                     List<DCTree> description = blockContent();
  1202                     return m.at(pos).SerialData(description);
  1204             },
  1206             // @serialField field-name field-type description
  1207             new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_FIELD) {
  1208                 public DCTree parse(int pos) throws ParseException {
  1209                     skipWhitespace();
  1210                     DCIdentifier name = identifier();
  1211                     skipWhitespace();
  1212                     DCReference type = reference(false);
  1213                     List<DCTree> description = null;
  1214                     if (isWhitespace(ch)) {
  1215                         skipWhitespace();
  1216                         description = blockContent();
  1218                     return m.at(pos).SerialField(name, type, description);
  1220             },
  1222             // @serial field-description | include | exclude
  1223             new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL) {
  1224                 public DCTree parse(int pos) {
  1225                     List<DCTree> description = blockContent();
  1226                     return m.at(pos).Serial(description);
  1228             },
  1230             // @since since-text
  1231             new TagParser(Kind.BLOCK, DCTree.Kind.SINCE) {
  1232                 public DCTree parse(int pos) {
  1233                     List<DCTree> description = blockContent();
  1234                     return m.at(pos).Since(description);
  1236             },
  1238             // @throws class-name description
  1239             new TagParser(Kind.BLOCK, DCTree.Kind.THROWS) {
  1240                 public DCTree parse(int pos) throws ParseException {
  1241                     skipWhitespace();
  1242                     DCReference ref = reference(false);
  1243                     List<DCTree> description = blockContent();
  1244                     return m.at(pos).Throws(ref, description);
  1246             },
  1248             // {@value package.class#field}
  1249             new TagParser(Kind.INLINE, DCTree.Kind.VALUE) {
  1250                 public DCTree parse(int pos) throws ParseException {
  1251                     DCReference ref = reference(true);
  1252                     skipWhitespace();
  1253                     if (ch == '}') {
  1254                         nextChar();
  1255                         return m.at(pos).Value(ref);
  1257                     nextChar();
  1258                     throw new ParseException("dc.unexpected.content");
  1260             },
  1262             // @version version-text
  1263             new TagParser(Kind.BLOCK, DCTree.Kind.VERSION) {
  1264                 public DCTree parse(int pos) {
  1265                     List<DCTree> description = blockContent();
  1266                     return m.at(pos).Version(description);
  1268             },
  1269         };
  1271         tagParsers = new HashMap<Name,TagParser>();
  1272         for (TagParser p: parsers)
  1273             tagParsers.put(names.fromString(p.getTreeKind().tagName), p);

mercurial