src/share/classes/com/sun/tools/javac/parser/DocCommentParser.java

Mon, 21 Jan 2013 20:19:53 +0000

author
mcimadamore
date
Mon, 21 Jan 2013 20:19:53 +0000
changeset 1513
cf84b07a82db
parent 1455
75ab654b5cd5
child 1529
950d8195a5a4
permissions
-rw-r--r--

8005166: Add support for static interface methods
Summary: Support public static interface methods
Reviewed-by: jjg

     1 /*
     2  * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.tools.javac.parser;
    28 import java.text.BreakIterator;
    29 import java.util.Arrays;
    30 import java.util.HashMap;
    31 import java.util.HashSet;
    32 import java.util.Locale;
    33 import java.util.Map;
    34 import java.util.Set;
    36 import com.sun.source.doctree.AttributeTree.ValueKind;
    37 import com.sun.tools.javac.parser.DocCommentParser.TagParser.Kind;
    38 import com.sun.tools.javac.parser.Tokens.Comment;
    39 import com.sun.tools.javac.parser.Tokens.TokenKind;
    40 import com.sun.tools.javac.tree.DCTree;
    41 import com.sun.tools.javac.tree.DCTree.DCAttribute;
    42 import com.sun.tools.javac.tree.DCTree.DCDocComment;
    43 import com.sun.tools.javac.tree.DCTree.DCEndElement;
    44 import com.sun.tools.javac.tree.DCTree.DCErroneous;
    45 import com.sun.tools.javac.tree.DCTree.DCIdentifier;
    46 import com.sun.tools.javac.tree.DCTree.DCReference;
    47 import com.sun.tools.javac.tree.DCTree.DCStartElement;
    48 import com.sun.tools.javac.tree.DCTree.DCText;
    49 import com.sun.tools.javac.tree.DocTreeMaker;
    50 import com.sun.tools.javac.tree.JCTree;
    51 import com.sun.tools.javac.util.DiagnosticSource;
    52 import com.sun.tools.javac.util.List;
    53 import com.sun.tools.javac.util.ListBuffer;
    54 import com.sun.tools.javac.util.Log;
    55 import com.sun.tools.javac.util.Name;
    56 import com.sun.tools.javac.util.Names;
    57 import com.sun.tools.javac.util.Options;
    58 import com.sun.tools.javac.util.Position;
    59 import static com.sun.tools.javac.util.LayoutCharacters.*;
    61 /**
    62  *
    63  *  <p><b>This is NOT part of any supported API.
    64  *  If you write code that depends on this, you do so at your own risk.
    65  *  This code and its internal interfaces are subject to change or
    66  *  deletion without notice.</b>
    67  */
    68 public class DocCommentParser {
    69     static class ParseException extends Exception {
    70         private static final long serialVersionUID = 0;
    71         ParseException(String key) {
    72             super(key);
    73         }
    74     }
    76     final ParserFactory fac;
    77     final DiagnosticSource diagSource;
    78     final Comment comment;
    79     final DocTreeMaker m;
    80     final Names names;
    82     BreakIterator sentenceBreaker;
    84     /** The input buffer, index of most recent character read,
    85      *  index of one past last character in buffer.
    86      */
    87     protected char[] buf;
    88     protected int bp;
    89     protected int buflen;
    91     /** The current character.
    92      */
    93     protected char ch;
    95     int textStart = -1;
    96     int lastNonWhite = -1;
    97     boolean newline = true;
    99     Map<Name, TagParser> tagParsers;
   101     DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) {
   102         this.fac = fac;
   103         this.diagSource = diagSource;
   104         this.comment = comment;
   105         names = fac.names;
   106         m = fac.docTreeMaker;
   108         Locale locale = (fac.locale == null) ? Locale.getDefault() : fac.locale;
   110         Options options = fac.options;
   111         boolean useBreakIterator = options.isSet("breakIterator");
   112         if (useBreakIterator || !locale.getLanguage().equals(Locale.ENGLISH.getLanguage()))
   113             sentenceBreaker = BreakIterator.getSentenceInstance(locale);
   115         initTagParsers();
   116     }
   118     DCDocComment parse() {
   119         String c = comment.getText();
   120         buf = new char[c.length() + 1];
   121         c.getChars(0, c.length(), buf, 0);
   122         buf[buf.length - 1] = EOI;
   123         buflen = buf.length - 1;
   124         bp = -1;
   125         nextChar();
   127         List<DCTree> body = blockContent();
   128         List<DCTree> tags = blockTags();
   130         // split body into first sentence and body
   131         ListBuffer<DCTree> fs = new ListBuffer<DCTree>();
   132         loop:
   133         for (; body.nonEmpty(); body = body.tail) {
   134             DCTree t = body.head;
   135             switch (t.getKind()) {
   136                 case TEXT:
   137                     String s = ((DCText) t).getBody();
   138                     int i = getSentenceBreak(s);
   139                     if (i > 0) {
   140                         int i0 = i;
   141                         while (i0 > 0 && isWhitespace(s.charAt(i0 - 1)))
   142                             i0--;
   143                         fs.add(m.at(t.pos).Text(s.substring(0, i0)));
   144                         int i1 = i;
   145                         while (i1 < s.length() && isWhitespace(s.charAt(i1)))
   146                             i1++;
   147                         body = body.tail;
   148                         if (i1 < s.length())
   149                             body = body.prepend(m.at(t.pos + i1).Text(s.substring(i1)));
   150                         break loop;
   151                     } else if (body.tail.nonEmpty()) {
   152                         if (isSentenceBreak(body.tail.head)) {
   153                             int i0 = s.length() - 1;
   154                             while (i0 > 0 && isWhitespace(s.charAt(i0)))
   155                                 i0--;
   156                             fs.add(m.at(t.pos).Text(s.substring(0, i0 + 1)));
   157                             body = body.tail;
   158                             break loop;
   159                         }
   160                     }
   161                     break;
   163                 case START_ELEMENT:
   164                 case END_ELEMENT:
   165                     if (isSentenceBreak(t))
   166                         break loop;
   167                     break;
   168             }
   169             fs.add(t);
   170         }
   172         @SuppressWarnings("unchecked")
   173         DCTree first = getFirst(fs.toList(), body, tags);
   174         int pos = (first == null) ? Position.NOPOS : first.pos;
   176         DCDocComment dc = m.at(pos).DocComment(comment, fs.toList(), body, tags);
   177         return dc;
   178     }
   180     void nextChar() {
   181         ch = buf[bp < buflen ? ++bp : buflen];
   182         switch (ch) {
   183             case '\f': case '\n': case '\r':
   184                 newline = true;
   185         }
   186     }
   188     /**
   189      * Read block content, consisting of text, html and inline tags.
   190      * Terminated by the end of input, or the beginning of the next block tag:
   191      * i.e. @ as the first non-whitespace character on a line.
   192      */
   193     @SuppressWarnings("fallthrough")
   194     protected List<DCTree> blockContent() {
   195         ListBuffer<DCTree> trees = new ListBuffer<DCTree>();
   196         textStart = -1;
   198         loop:
   199         while (bp < buflen) {
   200             switch (ch) {
   201                 case '\n': case '\r': case '\f':
   202                     newline = true;
   203                     // fallthrough
   205                 case ' ': case '\t':
   206                     nextChar();
   207                     break;
   209                 case '&':
   210                     entity(trees);
   211                     break;
   213                 case '<':
   214                     newline = false;
   215                     addPendingText(trees, bp - 1);
   216                     trees.add(html());
   217                     if (textStart == -1) {
   218                         textStart = bp;
   219                         lastNonWhite = -1;
   220                     }
   221                     break;
   223                 case '>':
   224                     newline = false;
   225                     addPendingText(trees, bp - 1);
   226                     trees.add(m.at(bp).Erroneous(newString(bp, bp+1), diagSource, "dc.bad.gt"));
   227                     nextChar();
   228                     if (textStart == -1) {
   229                         textStart = bp;
   230                         lastNonWhite = -1;
   231                     }
   232                     break;
   234                 case '{':
   235                     inlineTag(trees);
   236                     break;
   238                 case '@':
   239                     if (newline) {
   240                         addPendingText(trees, lastNonWhite);
   241                         break loop;
   242                     }
   243                     // fallthrough
   245                 default:
   246                     newline = false;
   247                     if (textStart == -1)
   248                         textStart = bp;
   249                     lastNonWhite = bp;
   250                     nextChar();
   251             }
   252         }
   254         if (lastNonWhite != -1)
   255             addPendingText(trees, lastNonWhite);
   257         return trees.toList();
   258     }
   260     /**
   261      * Read a series of block tags, including their content.
   262      * Standard tags parse their content appropriately.
   263      * Non-standard tags are represented by {@link UnknownBlockTag}.
   264      */
   265     protected List<DCTree> blockTags() {
   266         ListBuffer<DCTree> tags = new ListBuffer<DCTree>();
   267         while (ch == '@')
   268             tags.add(blockTag());
   269         return tags.toList();
   270     }
   272     /**
   273      * Read a single block tag, including its content.
   274      * Standard tags parse their content appropriately.
   275      * Non-standard tags are represented by {@link UnknownBlockTag}.
   276      */
   277     protected DCTree blockTag() {
   278         int p = bp;
   279         try {
   280             nextChar();
   281             if (isIdentifierStart(ch)) {
   282                 int namePos = bp;
   283                 nextChar();
   284                 while (isIdentifierPart(ch))
   285                     nextChar();
   286                 int nameLen = bp - namePos;
   288                 Name name = names.fromChars(buf, namePos, nameLen);
   289                 TagParser tp = tagParsers.get(name);
   290                 if (tp == null) {
   291                     List<DCTree> content = blockContent();
   292                     return m.at(p).UnknownBlockTag(name, content);
   293                 } else {
   294                     switch (tp.getKind()) {
   295                         case BLOCK:
   296                             return tp.parse(p);
   297                         case INLINE:
   298                             return erroneous("dc.bad.inline.tag", p);
   299                     }
   300                 }
   301             }
   302             blockContent();
   304             return erroneous("dc.no.tag.name", p);
   305         } catch (ParseException e) {
   306             blockContent();
   307             return erroneous(e.getMessage(), p);
   308         }
   309     }
   311     protected void inlineTag(ListBuffer<DCTree> list) {
   312         newline = false;
   313         nextChar();
   314         if (ch == '@') {
   315             addPendingText(list, bp - 2);
   316             list.add(inlineTag());
   317             textStart = bp;
   318             lastNonWhite = -1;
   319         } else {
   320             if (textStart == -1)
   321                 textStart = bp - 1;
   322             lastNonWhite = bp;
   323         }
   324     }
   326     /**
   327      * Read a single inline tag, including its content.
   328      * Standard tags parse their content appropriately.
   329      * Non-standard tags are represented by {@link UnknownBlockTag}.
   330      * Malformed tags may be returned as {@link Erroneous}.
   331      */
   332     protected DCTree inlineTag() {
   333         int p = bp - 1;
   334         try {
   335             nextChar();
   336             if (isIdentifierStart(ch)) {
   337                 int namePos = bp;
   338                 nextChar();
   339                 while (isIdentifierPart(ch))
   340                     nextChar();
   341                 int nameLen = bp - namePos;
   342                 skipWhitespace();
   344                 Name name = names.fromChars(buf, namePos, nameLen);
   345                 TagParser tp = tagParsers.get(name);
   346                 if (tp == null) {
   347                     DCTree text = inlineText();
   348                     if (text != null) {
   349                         nextChar();
   350                         return m.at(p).UnknownInlineTag(name, List.of(text));
   351                     }
   352                 } else if (tp.getKind() == TagParser.Kind.INLINE) {
   353                     DCTree tree =  tp.parse(p);
   354                     if (tree != null) {
   355                         return tree;
   356                     }
   357                 } else {
   358                     inlineText(); // skip content
   359                     nextChar();
   360                 }
   361             }
   362             return erroneous("dc.no.tag.name", p);
   363         } catch (ParseException e) {
   364             return erroneous(e.getMessage(), p);
   365         }
   366     }
   368     /**
   369      * Read plain text content of an inline tag.
   370      * Matching pairs of { } are skipped; the text is terminated by the first
   371      * unmatched }. It is an error if the beginning of the next tag is detected.
   372      */
   373     protected DCTree inlineText() throws ParseException {
   374         skipWhitespace();
   375         int pos = bp;
   376         int depth = 1;
   378         loop:
   379         while (bp < buflen) {
   380             switch (ch) {
   381                 case '\n': case '\r': case '\f':
   382                     newline = true;
   383                     break;
   385                 case ' ': case '\t':
   386                     break;
   388                 case '{':
   389                     newline = false;
   390                     lastNonWhite = bp;
   391                     depth++;
   392                     break;
   394                 case '}':
   395                     if (--depth == 0) {
   396                         return m.at(pos).Text(newString(pos, bp));
   397                     }
   398                     newline = false;
   399                     lastNonWhite = bp;
   400                     break;
   402                 case '@':
   403                     if (newline)
   404                         break loop;
   405                     newline = false;
   406                     lastNonWhite = bp;
   407                     break;
   409                 default:
   410                     newline = false;
   411                     lastNonWhite = bp;
   412                     break;
   413             }
   414             nextChar();
   415         }
   416         throw new ParseException("dc.unterminated.inline.tag");
   417     }
   419     /**
   420      * Read Java class name, possibly followed by member
   421      * Matching pairs of < > are skipped. The text is terminated by the first
   422      * unmatched }. It is an error if the beginning of the next tag is detected.
   423      */
   424     // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE
   425     // TODO: improve quality of parse to forbid bad constructions.
   426     @SuppressWarnings("fallthrough")
   427     protected DCReference reference(boolean allowMember) throws ParseException {
   428         int pos = bp;
   429         int depth = 0;
   431         // scan to find the end of the signature, by looking for the first
   432         // whitespace not enclosed in () or <>, or the end of the tag
   433         loop:
   434         while (bp < buflen) {
   435             switch (ch) {
   436                 case '\n': case '\r': case '\f':
   437                     newline = true;
   438                     // fallthrough
   440                 case ' ': case '\t':
   441                     if (depth == 0)
   442                         break loop;
   443                     break;
   445                 case '(':
   446                 case '<':
   447                     newline = false;
   448                     depth++;
   449                     break;
   451                 case ')':
   452                 case '>':
   453                     newline = false;
   454                     --depth;
   455                     break;
   457                 case '}':
   458                     if (bp == pos)
   459                         return null;
   460                     newline = false;
   461                     break loop;
   463                 case '@':
   464                     if (newline)
   465                         break loop;
   466                     // fallthrough
   468                 default:
   469                     newline = false;
   471             }
   472             nextChar();
   473         }
   475         if (depth != 0)
   476             throw new ParseException("dc.unterminated.signature");
   478         String sig = newString(pos, bp);
   480         // Break sig apart into qualifiedExpr member paramTypes.
   481         JCTree qualExpr;
   482         Name member;
   483         List<JCTree> paramTypes;
   485         Log.DeferredDiagnosticHandler deferredDiagnosticHandler
   486                 = new Log.DeferredDiagnosticHandler(fac.log);
   488         try {
   489             int hash = sig.indexOf("#");
   490             int lparen = sig.indexOf("(", hash + 1);
   491             if (hash == -1) {
   492                 if (lparen == -1) {
   493                     qualExpr = parseType(sig);
   494                     member = null;
   495                 } else {
   496                     qualExpr = null;
   497                     member = parseMember(sig.substring(0, lparen));
   498                 }
   499             } else {
   500                 qualExpr = (hash == 0) ? null : parseType(sig.substring(0, hash));
   501                 if (lparen == -1)
   502                     member = parseMember(sig.substring(hash + 1));
   503                 else
   504                     member = parseMember(sig.substring(hash + 1, lparen));
   505             }
   507             if (lparen < 0) {
   508                 paramTypes = null;
   509             } else {
   510                 int rparen = sig.indexOf(")", lparen);
   511                 if (rparen != sig.length() - 1)
   512                     throw new ParseException("dc.ref.bad.parens");
   513                 paramTypes = parseParams(sig.substring(lparen + 1, rparen));
   514             }
   516             if (!deferredDiagnosticHandler.getDiagnostics().isEmpty())
   517                 throw new ParseException("dc.ref.syntax.error");
   519         } finally {
   520             fac.log.popDiagnosticHandler(deferredDiagnosticHandler);
   521         }
   523         return m.at(pos).Reference(sig, qualExpr, member, paramTypes);
   524     }
   526     JCTree parseType(String s) throws ParseException {
   527         JavacParser p = fac.newParser(s, false, false, false);
   528         JCTree tree = p.parseType();
   529         if (p.token().kind != TokenKind.EOF)
   530             throw new ParseException("dc.ref.unexpected.input");
   531         return tree;
   532     }
   534     Name parseMember(String s) throws ParseException {
   535         JavacParser p = fac.newParser(s, false, false, false);
   536         Name name = p.ident();
   537         if (p.token().kind != TokenKind.EOF)
   538             throw new ParseException("dc.ref.unexpected.input");
   539         return name;
   540     }
   542     List<JCTree> parseParams(String s) throws ParseException {
   543         if (s.trim().isEmpty())
   544             return List.nil();
   546         JavacParser p = fac.newParser(s.replace("...", "[]"), false, false, false);
   547         ListBuffer<JCTree> paramTypes = new ListBuffer<JCTree>();
   548         paramTypes.add(p.parseType());
   550         if (p.token().kind == TokenKind.IDENTIFIER)
   551             p.nextToken();
   553         while (p.token().kind == TokenKind.COMMA) {
   554             p.nextToken();
   555             paramTypes.add(p.parseType());
   557             if (p.token().kind == TokenKind.IDENTIFIER)
   558                 p.nextToken();
   559         }
   561         if (p.token().kind != TokenKind.EOF)
   562             throw new ParseException("dc.ref.unexpected.input");
   564         return paramTypes.toList();
   565     }
   567     /**
   568      * Read Java identifier
   569      * Matching pairs of { } are skipped; the text is terminated by the first
   570      * unmatched }. It is an error if the beginning of the next tag is detected.
   571      */
   572     @SuppressWarnings("fallthrough")
   573     protected DCIdentifier identifier() throws ParseException {
   574         skipWhitespace();
   575         int pos = bp;
   577         if (isJavaIdentifierStart(ch)) {
   578             nextChar();
   579             while (isJavaIdentifierPart(ch))
   580                 nextChar();
   581             return m.at(pos).Identifier(names.fromChars(buf, pos, bp - pos));
   582         }
   584         throw new ParseException("dc.identifier.expected");
   585     }
   587     /**
   588      * Read a quoted string.
   589      * It is an error if the beginning of the next tag is detected.
   590      */
   591     @SuppressWarnings("fallthrough")
   592     protected DCText quotedString() {
   593         int pos = bp;
   594         nextChar();
   596         loop:
   597         while (bp < buflen) {
   598             switch (ch) {
   599                 case '\n': case '\r': case '\f':
   600                     newline = true;
   601                     break;
   603                 case ' ': case '\t':
   604                     break;
   606                 case '"':
   607                     nextChar();
   608                     // trim trailing white-space?
   609                     return m.at(pos).Text(newString(pos, bp));
   611                 case '@':
   612                     if (newline)
   613                         break loop;
   615             }
   616             nextChar();
   617         }
   618         return null;
   619     }
   621     /**
   622      * Read general text content of an inline tag, including HTML entities and elements.
   623      * Matching pairs of { } are skipped; the text is terminated by the first
   624      * unmatched }. It is an error if the beginning of the next tag is detected.
   625      */
   626     @SuppressWarnings("fallthrough")
   627     protected List<DCTree> inlineContent() {
   628         ListBuffer<DCTree> trees = new ListBuffer<DCTree>();
   630         skipWhitespace();
   631         int pos = bp;
   632         int depth = 1;
   633         textStart = -1;
   635         loop:
   636         while (bp < buflen) {
   638             switch (ch) {
   639                 case '\n': case '\r': case '\f':
   640                     newline = true;
   641                     // fall through
   643                 case ' ': case '\t':
   644                     nextChar();
   645                     break;
   647                 case '&':
   648                     entity(trees);
   649                     break;
   651                 case '<':
   652                     newline = false;
   653                     addPendingText(trees, bp - 1);
   654                     trees.add(html());
   655                     break;
   657                 case '{':
   658                     newline = false;
   659                     depth++;
   660                     nextChar();
   661                     break;
   663                 case '}':
   664                     newline = false;
   665                     if (--depth == 0) {
   666                         addPendingText(trees, bp - 1);
   667                         nextChar();
   668                         return trees.toList();
   669                     }
   670                     nextChar();
   671                     break;
   673                 case '@':
   674                     if (newline)
   675                         break loop;
   676                     // fallthrough
   678                 default:
   679                     if (textStart == -1)
   680                         textStart = bp;
   681                     nextChar();
   682                     break;
   683             }
   684         }
   686         return List.<DCTree>of(erroneous("dc.unterminated.inline.tag", pos));
   687     }
   689     protected void entity(ListBuffer<DCTree> list) {
   690         newline = false;
   691         addPendingText(list, bp - 1);
   692         list.add(entity());
   693         if (textStart == -1) {
   694             textStart = bp;
   695             lastNonWhite = -1;
   696         }
   697     }
   699     /**
   700      * Read an HTML entity.
   701      * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; }
   702      */
   703     protected DCTree entity() {
   704         int p = bp;
   705         nextChar();
   706         int namep = bp;
   707         boolean checkSemi = false;
   708         if (ch == '#') {
   709             nextChar();
   710             if (isDecimalDigit(ch)) {
   711                 nextChar();
   712                 while (isDecimalDigit(ch))
   713                     nextChar();
   714                 checkSemi = true;
   715             } else if (ch == 'x' || ch == 'X') {
   716                 nextChar();
   717                 if (isHexDigit(ch)) {
   718                     nextChar();
   719                     while (isHexDigit(ch))
   720                         nextChar();
   721                     checkSemi = true;
   722                 }
   723             }
   724         } else if (isIdentifierStart(ch)) {
   725             nextChar();
   726             while (isIdentifierPart(ch))
   727                 nextChar();
   728             checkSemi = true;
   729         }
   731         if (checkSemi && ch == ';') {
   732             nextChar();
   733             return m.at(p).Entity(names.fromChars(buf, namep, bp - namep - 1));
   734         } else {
   735             String code = checkSemi
   736                     ? "dc.missing.semicolon"
   737                     : "dc.bad.entity";
   738             return erroneous(code, p);
   739         }
   740     }
   742     /**
   743      * Read the start or end of an HTML tag, or an HTML comment
   744      * {@literal <identifier attrs> } or {@literal </identifier> }
   745      */
   746     protected DCTree html() {
   747         int p = bp;
   748         nextChar();
   749         if (isIdentifierStart(ch)) {
   750             int namePos = bp;
   751             nextChar();
   752             while (isIdentifierPart(ch))
   753                 nextChar();
   754             int nameLen = bp - namePos;
   755             List<DCTree> attrs = htmlAttrs();
   756             if (attrs != null) {
   757                 boolean selfClosing = false;
   758                 if (ch == '/') {
   759                     nextChar();
   760                     selfClosing = true;
   761                 }
   762                 if (ch == '>') {
   763                     nextChar();
   764                     Name name = names.fromChars(buf, namePos, nameLen);
   765                     return m.at(p).StartElement(name, attrs, selfClosing);
   766                 }
   767             }
   768         } else if (ch == '/') {
   769             nextChar();
   770             if (isIdentifierStart(ch)) {
   771                 int namePos = bp;
   772                 nextChar();
   773                 while (isIdentifierPart(ch))
   774                     nextChar();
   775                 int nameLen = bp - namePos;
   776                 skipWhitespace();
   777                 if (ch == '>') {
   778                     nextChar();
   779                     Name name = names.fromChars(buf, namePos, nameLen);
   780                     return m.at(p).EndElement(name);
   781                 }
   782             }
   783         } else if (ch == '!') {
   784             nextChar();
   785             if (ch == '-') {
   786                 nextChar();
   787                 if (ch == '-') {
   788                     nextChar();
   789                     while (bp < buflen) {
   790                         int dash = 0;
   791                         while (ch == '-') {
   792                             dash++;
   793                             nextChar();
   794                         }
   795                         // strictly speaking, a comment should not contain "--"
   796                         // so dash > 2 is an error, dash == 2 implies ch == '>'
   797                         if (dash >= 2 && ch == '>') {
   798                             nextChar();
   799                             return m.at(p).Comment(newString(p, bp));
   800                         }
   802                         nextChar();
   803                     }
   804                 }
   805             }
   806         }
   808         bp = p + 1;
   809         ch = buf[bp];
   810         return erroneous("dc.malformed.html", p);
   811     }
   813     /**
   814      * Read a series of HTML attributes, terminated by {@literal > }.
   815      * Each attribute is of the form {@literal identifier[=value] }.
   816      * "value" may be unquoted, single-quoted, or double-quoted.
   817      */
   818     protected List<DCTree> htmlAttrs() {
   819         ListBuffer<DCTree> attrs = new ListBuffer<DCTree>();
   820         skipWhitespace();
   822         loop:
   823         while (isIdentifierStart(ch)) {
   824             int namePos = bp;
   825             nextChar();
   826             while (isIdentifierPart(ch))
   827                 nextChar();
   828             int nameLen = bp - namePos;
   829             skipWhitespace();
   830             List<DCTree> value = null;
   831             ValueKind vkind = ValueKind.EMPTY;
   832             if (ch == '=') {
   833                 ListBuffer<DCTree> v = new ListBuffer<DCTree>();
   834                 nextChar();
   835                 skipWhitespace();
   836                 if (ch == '\'' || ch == '"') {
   837                     vkind = (ch == '\'') ? ValueKind.SINGLE : ValueKind.DOUBLE;
   838                     char quote = ch;
   839                     nextChar();
   840                     textStart = bp;
   841                     while (bp < buflen && ch != quote) {
   842                         if (newline && ch == '@') {
   843                             attrs.add(erroneous("dc.unterminated.string", namePos));
   844                             // No point trying to read more.
   845                             // In fact, all attrs get discarded by the caller
   846                             // and superseded by a malformed.html node because
   847                             // the html tag itself is not terminated correctly.
   848                             break loop;
   849                         }
   850                         attrValueChar(v);
   851                     }
   852                     addPendingText(v, bp - 1);
   853                     nextChar();
   854                 } else {
   855                     vkind = ValueKind.UNQUOTED;
   856                     textStart = bp;
   857                     while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) {
   858                         attrValueChar(v);
   859                     }
   860                     addPendingText(v, bp - 1);
   861                 }
   862                 skipWhitespace();
   863                 value = v.toList();
   864             }
   865             Name name = names.fromChars(buf, namePos, nameLen);
   866             DCAttribute attr = m.at(namePos).Attribute(name, vkind, value);
   867             attrs.add(attr);
   868         }
   870         return attrs.toList();
   871     }
   873     protected void attrValueChar(ListBuffer<DCTree> list) {
   874         switch (ch) {
   875             case '&':
   876                 entity(list);
   877                 break;
   879             case '{':
   880                 inlineTag(list);
   881                 break;
   883             default:
   884                 nextChar();
   885         }
   886     }
   888     protected void addPendingText(ListBuffer<DCTree> list, int textEnd) {
   889         if (textStart != -1) {
   890             if (textStart <= textEnd) {
   891                 list.add(m.at(textStart).Text(newString(textStart, textEnd + 1)));
   892             }
   893             textStart = -1;
   894         }
   895     }
   897     protected DCErroneous erroneous(String code, int pos) {
   898         int i = bp - 1;
   899         loop:
   900         while (i > 0) {
   901             switch (buf[i]) {
   902                 case '\f': case '\n': case '\r':
   903                     newline = true;
   904                     break;
   905                 case '\t': case ' ':
   906                     break;
   907                 default:
   908                     break loop;
   909             }
   910             i--;
   911         }
   912         textStart = -1;
   913         return m.at(pos).Erroneous(newString(pos, i + 1), diagSource, code);
   914     }
   916     @SuppressWarnings("unchecked")
   917     <T> T getFirst(List<T>... lists) {
   918         for (List<T> list: lists) {
   919             if (list.nonEmpty())
   920                 return list.head;
   921         }
   922         return null;
   923     }
   925     protected boolean isIdentifierStart(char ch) {
   926         return Character.isUnicodeIdentifierStart(ch);
   927     }
   929     protected boolean isIdentifierPart(char ch) {
   930         return Character.isUnicodeIdentifierPart(ch);
   931     }
   933     protected boolean isJavaIdentifierStart(char ch) {
   934         return Character.isJavaIdentifierStart(ch);
   935     }
   937     protected boolean isJavaIdentifierPart(char ch) {
   938         return Character.isJavaIdentifierPart(ch);
   939     }
   941     protected boolean isDecimalDigit(char ch) {
   942         return ('0' <= ch && ch <= '9');
   943     }
   945     protected boolean isHexDigit(char ch) {
   946         return ('0' <= ch && ch <= '9')
   947                 || ('a' <= ch && ch <= 'f')
   948                 || ('A' <= ch && ch <= 'F');
   949     }
   951     protected boolean isUnquotedAttrValueTerminator(char ch) {
   952         switch (ch) {
   953             case '\f': case '\n': case '\r': case '\t':
   954             case ' ':
   955             case '"': case '\'': case '`':
   956             case '=': case '<': case '>':
   957                 return true;
   958             default:
   959                 return false;
   960         }
   961     }
   963     protected boolean isWhitespace(char ch) {
   964         return Character.isWhitespace(ch);
   965     }
   967     protected void skipWhitespace() {
   968         while (isWhitespace(ch))
   969             nextChar();
   970     }
   972     protected int getSentenceBreak(String s) {
   973         if (sentenceBreaker != null) {
   974             sentenceBreaker.setText(s);
   975             int i = sentenceBreaker.next();
   976             return (i == s.length()) ? -1 : i;
   977         }
   979         // scan for period followed by whitespace
   980         boolean period = false;
   981         for (int i = 0; i < s.length(); i++) {
   982             switch (s.charAt(i)) {
   983                 case '.':
   984                     period = true;
   985                     break;
   987                 case ' ':
   988                 case '\f':
   989                 case '\n':
   990                 case '\r':
   991                 case '\t':
   992                     if (period)
   993                         return i;
   994                     break;
   996                 default:
   997                     period = false;
   998                     break;
   999             }
  1001         return -1;
  1005     Set<String> htmlBlockTags = new HashSet<String>(Arrays.asList(
  1006                     "h1", "h2", "h3", "h4", "h5", "h6", "p", "pre"));
  1008     protected boolean isSentenceBreak(Name n) {
  1009         return htmlBlockTags.contains(n.toString().toLowerCase());
  1012     protected boolean isSentenceBreak(DCTree t) {
  1013         switch (t.getKind()) {
  1014             case START_ELEMENT:
  1015                 return isSentenceBreak(((DCStartElement) t).getName());
  1017             case END_ELEMENT:
  1018                 return isSentenceBreak(((DCEndElement) t).getName());
  1020         return false;
  1023     /**
  1024      * @param start position of first character of string
  1025      * @param end position of character beyond last character to be included
  1026      */
  1027     String newString(int start, int end) {
  1028         return new String(buf, start, end - start);
  1031     static abstract class TagParser {
  1032         enum Kind { INLINE, BLOCK }
  1034         Kind kind;
  1035         DCTree.Kind treeKind;
  1037         TagParser(Kind k, DCTree.Kind tk) {
  1038             kind = k;
  1039             treeKind = tk;
  1042         Kind getKind() {
  1043             return kind;
  1046         DCTree.Kind getTreeKind() {
  1047             return treeKind;
  1050         abstract DCTree parse(int pos) throws ParseException;
  1053     /**
  1054      * @see <a href="http://docs.oracle.com/javase/7/docs/technotes/tools/solaris/javadoc.html#javadoctags">Javadoc Tags</a>
  1055      */
  1056     private void initTagParsers() {
  1057         TagParser[] parsers = {
  1058             // @author name-text
  1059             new TagParser(Kind.BLOCK, DCTree.Kind.AUTHOR) {
  1060                 public DCTree parse(int pos) {
  1061                     List<DCTree> name = blockContent();
  1062                     return m.at(pos).Author(name);
  1064             },
  1066             // {@code text}
  1067             new TagParser(Kind.INLINE, DCTree.Kind.CODE) {
  1068                 public DCTree parse(int pos) throws ParseException {
  1069                     DCTree text = inlineText();
  1070                     nextChar();
  1071                     return m.at(pos).Code((DCText) text);
  1073             },
  1075             // @deprecated deprecated-text
  1076             new TagParser(Kind.BLOCK, DCTree.Kind.DEPRECATED) {
  1077                 public DCTree parse(int pos) {
  1078                     List<DCTree> reason = blockContent();
  1079                     return m.at(pos).Deprecated(reason);
  1081             },
  1083             // {@docRoot}
  1084             new TagParser(Kind.INLINE, DCTree.Kind.DOC_ROOT) {
  1085                 public DCTree parse(int pos) throws ParseException {
  1086                     if (ch == '}') {
  1087                         nextChar();
  1088                         return m.at(pos).DocRoot();
  1090                     inlineText(); // skip unexpected content
  1091                     nextChar();
  1092                     throw new ParseException("dc.unexpected.content");
  1094             },
  1096             // @exception class-name description
  1097             new TagParser(Kind.BLOCK, DCTree.Kind.EXCEPTION) {
  1098                 public DCTree parse(int pos) throws ParseException {
  1099                     skipWhitespace();
  1100                     DCReference ref = reference(false);
  1101                     List<DCTree> description = blockContent();
  1102                     return m.at(pos).Exception(ref, description);
  1104             },
  1106             // {@inheritDoc}
  1107             new TagParser(Kind.INLINE, DCTree.Kind.INHERIT_DOC) {
  1108                 public DCTree parse(int pos) throws ParseException {
  1109                     if (ch == '}') {
  1110                         nextChar();
  1111                         return m.at(pos).InheritDoc();
  1113                     inlineText(); // skip unexpected content
  1114                     nextChar();
  1115                     throw new ParseException("dc.unexpected.content");
  1117             },
  1119             // {@link package.class#member label}
  1120             new TagParser(Kind.INLINE, DCTree.Kind.LINK) {
  1121                 public DCTree parse(int pos) throws ParseException {
  1122                     DCReference ref = reference(true);
  1123                     List<DCTree> label = inlineContent();
  1124                     return m.at(pos).Link(ref, label);
  1126             },
  1128             // {@linkplain package.class#member label}
  1129             new TagParser(Kind.INLINE, DCTree.Kind.LINK_PLAIN) {
  1130                 public DCTree parse(int pos) throws ParseException {
  1131                     DCReference ref = reference(true);
  1132                     List<DCTree> label = inlineContent();
  1133                     return m.at(pos).LinkPlain(ref, label);
  1135             },
  1137             // {@literal text}
  1138             new TagParser(Kind.INLINE, DCTree.Kind.LITERAL) {
  1139                 public DCTree parse(int pos) throws ParseException {
  1140                     DCTree text = inlineText();
  1141                     nextChar();
  1142                     return m.at(pos).Literal((DCText) text);
  1144             },
  1146             // @param parameter-name description
  1147             new TagParser(Kind.BLOCK, DCTree.Kind.PARAM) {
  1148                 public DCTree parse(int pos) throws ParseException {
  1149                     skipWhitespace();
  1151                     boolean typaram = false;
  1152                     if (ch == '<') {
  1153                         typaram = true;
  1154                         nextChar();
  1157                     DCIdentifier id = identifier();
  1159                     if (typaram) {
  1160                         if (ch != '>')
  1161                             throw new ParseException("dc.gt.expected");
  1162                         nextChar();
  1165                     skipWhitespace();
  1166                     List<DCTree> desc = blockContent();
  1167                     return m.at(pos).Param(typaram, id, desc);
  1169             },
  1171             // @return description
  1172             new TagParser(Kind.BLOCK, DCTree.Kind.RETURN) {
  1173                 public DCTree parse(int pos) {
  1174                     List<DCTree> description = blockContent();
  1175                     return m.at(pos).Return(description);
  1177             },
  1179             // @see reference | quoted-string | HTML
  1180             new TagParser(Kind.BLOCK, DCTree.Kind.SEE) {
  1181                 public DCTree parse(int pos) throws ParseException {
  1182                     skipWhitespace();
  1183                     switch (ch) {
  1184                         case '"':
  1185                             DCText string = quotedString();
  1186                             if (string != null) {
  1187                                 skipWhitespace();
  1188                                 if (ch == '@')
  1189                                     return m.at(pos).See(List.<DCTree>of(string));
  1191                             break;
  1193                         case '<':
  1194                             List<DCTree> html = blockContent();
  1195                             if (html != null)
  1196                                 return m.at(pos).See(html);
  1197                             break;
  1199                         case '@':
  1200                             if (newline)
  1201                                 throw new ParseException("dc.no.content");
  1202                             break;
  1204                         case EOI:
  1205                             if (bp == buf.length - 1)
  1206                                 throw new ParseException("dc.no.content");
  1207                             break;
  1209                         default:
  1210                             if (isJavaIdentifierStart(ch) || ch == '#') {
  1211                                 DCReference ref = reference(true);
  1212                                 List<DCTree> description = blockContent();
  1213                                 return m.at(pos).See(description.prepend(ref));
  1216                     throw new ParseException("dc.unexpected.content");
  1218             },
  1220             // @serialData data-description
  1221             new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_DATA) {
  1222                 public DCTree parse(int pos) {
  1223                     List<DCTree> description = blockContent();
  1224                     return m.at(pos).SerialData(description);
  1226             },
  1228             // @serialField field-name field-type description
  1229             new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_FIELD) {
  1230                 public DCTree parse(int pos) throws ParseException {
  1231                     skipWhitespace();
  1232                     DCIdentifier name = identifier();
  1233                     skipWhitespace();
  1234                     DCReference type = reference(false);
  1235                     List<DCTree> description = null;
  1236                     if (isWhitespace(ch)) {
  1237                         skipWhitespace();
  1238                         description = blockContent();
  1240                     return m.at(pos).SerialField(name, type, description);
  1242             },
  1244             // @serial field-description | include | exclude
  1245             new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL) {
  1246                 public DCTree parse(int pos) {
  1247                     List<DCTree> description = blockContent();
  1248                     return m.at(pos).Serial(description);
  1250             },
  1252             // @since since-text
  1253             new TagParser(Kind.BLOCK, DCTree.Kind.SINCE) {
  1254                 public DCTree parse(int pos) {
  1255                     List<DCTree> description = blockContent();
  1256                     return m.at(pos).Since(description);
  1258             },
  1260             // @throws class-name description
  1261             new TagParser(Kind.BLOCK, DCTree.Kind.THROWS) {
  1262                 public DCTree parse(int pos) throws ParseException {
  1263                     skipWhitespace();
  1264                     DCReference ref = reference(false);
  1265                     List<DCTree> description = blockContent();
  1266                     return m.at(pos).Throws(ref, description);
  1268             },
  1270             // {@value package.class#field}
  1271             new TagParser(Kind.INLINE, DCTree.Kind.VALUE) {
  1272                 public DCTree parse(int pos) throws ParseException {
  1273                     DCReference ref = reference(true);
  1274                     skipWhitespace();
  1275                     if (ch == '}') {
  1276                         nextChar();
  1277                         return m.at(pos).Value(ref);
  1279                     nextChar();
  1280                     throw new ParseException("dc.unexpected.content");
  1282             },
  1284             // @version version-text
  1285             new TagParser(Kind.BLOCK, DCTree.Kind.VERSION) {
  1286                 public DCTree parse(int pos) {
  1287                     List<DCTree> description = blockContent();
  1288                     return m.at(pos).Version(description);
  1290             },
  1291         };
  1293         tagParsers = new HashMap<Name,TagParser>();
  1294         for (TagParser p: parsers)
  1295             tagParsers.put(names.fromString(p.getTreeKind().tagName), p);

mercurial