aefimov@3315: /* aefimov@3315: * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. aefimov@3315: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. aefimov@3315: * aefimov@3315: * This code is free software; you can redistribute it and/or modify it aefimov@3315: * under the terms of the GNU General Public License version 2 only, as aefimov@3315: * published by the Free Software Foundation. Oracle designates this aefimov@3315: * particular file as subject to the "Classpath" exception as provided aefimov@3315: * by Oracle in the LICENSE file that accompanied this code. aefimov@3315: * aefimov@3315: * This code is distributed in the hope that it will be useful, but WITHOUT aefimov@3315: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or aefimov@3315: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License aefimov@3315: * version 2 for more details (a copy is included in the LICENSE file that aefimov@3315: * accompanied this code). aefimov@3315: * aefimov@3315: * You should have received a copy of the GNU General Public License version aefimov@3315: * 2 along with this work; if not, write to the Free Software Foundation, aefimov@3315: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. aefimov@3315: * aefimov@3315: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA aefimov@3315: * or visit www.oracle.com if you need additional information or have any aefimov@3315: * questions. aefimov@3315: */ aefimov@3315: aefimov@3315: package com.sun.tools.javadoc; aefimov@3315: aefimov@3315: import java.util.Arrays; aefimov@3315: import java.util.HashMap; aefimov@3315: import java.util.HashSet; aefimov@3315: import java.util.Locale; aefimov@3315: import java.util.Map; aefimov@3315: import java.util.Set; aefimov@3315: aefimov@3315: import com.sun.tools.javadoc.JavaScriptScanner.TagParser.Kind; aefimov@3315: aefimov@3315: import static com.sun.tools.javac.util.LayoutCharacters.EOI; aefimov@3315: aefimov@3315: /** aefimov@3315: * Parser to detect use of JavaScript in documentation comments. aefimov@3315: */ aefimov@3315: @Deprecated aefimov@3315: public class JavaScriptScanner { aefimov@3315: public static interface Reporter { aefimov@3315: void report(); aefimov@3315: } aefimov@3315: aefimov@3315: static class ParseException extends Exception { aefimov@3315: private static final long serialVersionUID = 0; aefimov@3315: ParseException(String key) { aefimov@3315: super(key); aefimov@3315: } aefimov@3315: } aefimov@3315: aefimov@3315: private Reporter reporter; aefimov@3315: aefimov@3315: /** The input buffer, index of most recent character read, aefimov@3315: * index of one past last character in buffer. aefimov@3315: */ aefimov@3315: protected char[] buf; aefimov@3315: protected int bp; aefimov@3315: protected int buflen; aefimov@3315: aefimov@3315: /** The current character. aefimov@3315: */ aefimov@3315: protected char ch; aefimov@3315: aefimov@3315: private boolean newline = true; aefimov@3315: aefimov@3315: Map tagParsers; aefimov@3315: Set uriAttrs; aefimov@3315: aefimov@3315: public JavaScriptScanner() { aefimov@3315: initTagParsers(); aefimov@3315: initURIAttrs(); aefimov@3315: } aefimov@3315: aefimov@3315: public void parse(String comment, Reporter r) { aefimov@3315: reporter = r; aefimov@3315: String c = comment; aefimov@3315: buf = new char[c.length() + 1]; aefimov@3315: c.getChars(0, c.length(), buf, 0); aefimov@3315: buf[buf.length - 1] = EOI; aefimov@3315: buflen = buf.length - 1; aefimov@3315: bp = -1; aefimov@3315: newline = true; aefimov@3315: nextChar(); aefimov@3315: aefimov@3315: blockContent(); aefimov@3315: blockTags(); aefimov@3315: } aefimov@3315: aefimov@3315: private void checkHtmlTag(String tag) { aefimov@3315: if (tag.equalsIgnoreCase("script")) { aefimov@3315: reporter.report(); aefimov@3315: } aefimov@3315: } aefimov@3315: aefimov@3315: private void checkHtmlAttr(String name, String value) { aefimov@3315: String n = name.toLowerCase(Locale.ENGLISH); igerasim@3845: // https://www.w3.org/TR/html52/fullindex.html#attributes-table igerasim@3845: // See https://www.w3.org/TR/html52/webappapis.html#events-event-handlers igerasim@3845: // An event handler has a name, which always starts with "on" and is followed by igerasim@3845: // the name of the event for which it is intended. igerasim@3845: if (n.startsWith("on") aefimov@3315: || uriAttrs.contains(n) aefimov@3315: && value != null && value.toLowerCase(Locale.ENGLISH).trim().startsWith("javascript:")) { aefimov@3315: reporter.report(); aefimov@3315: } aefimov@3315: } aefimov@3315: aefimov@3315: void nextChar() { aefimov@3315: ch = buf[bp < buflen ? ++bp : buflen]; aefimov@3315: switch (ch) { aefimov@3315: case '\f': case '\n': case '\r': aefimov@3315: newline = true; aefimov@3315: } aefimov@3315: } aefimov@3315: aefimov@3315: /** aefimov@3315: * Read block content, consisting of text, html and inline tags. aefimov@3315: * Terminated by the end of input, or the beginning of the next block tag: aefimov@3315: * i.e. @ as the first non-whitespace character on a line. aefimov@3315: */ aefimov@3315: @SuppressWarnings("fallthrough") aefimov@3315: protected void blockContent() { aefimov@3315: aefimov@3315: loop: aefimov@3315: while (bp < buflen) { aefimov@3315: switch (ch) { aefimov@3315: case '\n': case '\r': case '\f': aefimov@3315: newline = true; aefimov@3315: // fallthrough aefimov@3315: aefimov@3315: case ' ': case '\t': aefimov@3315: nextChar(); aefimov@3315: break; aefimov@3315: aefimov@3315: case '&': aefimov@3315: entity(null); aefimov@3315: break; aefimov@3315: aefimov@3315: case '<': aefimov@3315: html(); aefimov@3315: break; aefimov@3315: aefimov@3315: case '>': aefimov@3315: newline = false; aefimov@3315: nextChar(); aefimov@3315: break; aefimov@3315: aefimov@3315: case '{': aefimov@3315: inlineTag(null); aefimov@3315: break; aefimov@3315: aefimov@3315: case '@': aefimov@3315: if (newline) { aefimov@3315: break loop; aefimov@3315: } aefimov@3315: // fallthrough aefimov@3315: aefimov@3315: default: aefimov@3315: newline = false; aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: } aefimov@3315: } aefimov@3315: aefimov@3315: /** aefimov@3315: * Read a series of block tags, including their content. aefimov@3315: * Standard tags parse their content appropriately. aefimov@3315: * Non-standard tags are represented by {@link UnknownBlockTag}. aefimov@3315: */ aefimov@3315: protected void blockTags() { aefimov@3315: while (ch == '@') aefimov@3315: blockTag(); aefimov@3315: } aefimov@3315: aefimov@3315: /** aefimov@3315: * Read a single block tag, including its content. aefimov@3315: * Standard tags parse their content appropriately. aefimov@3315: * Non-standard tags are represented by {@link UnknownBlockTag}. aefimov@3315: */ aefimov@3315: protected void blockTag() { aefimov@3315: int p = bp; aefimov@3315: try { aefimov@3315: nextChar(); aefimov@3315: if (isIdentifierStart(ch)) { aefimov@3315: String name = readTagName(); aefimov@3315: TagParser tp = tagParsers.get(name); aefimov@3315: if (tp == null) { aefimov@3315: blockContent(); aefimov@3315: } else { aefimov@3315: switch (tp.getKind()) { aefimov@3315: case BLOCK: aefimov@3315: tp.parse(p); aefimov@3315: return; aefimov@3315: case INLINE: aefimov@3315: return; aefimov@3315: } aefimov@3315: } aefimov@3315: } aefimov@3315: blockContent(); aefimov@3315: } catch (ParseException e) { aefimov@3315: blockContent(); aefimov@3315: } aefimov@3315: } aefimov@3315: aefimov@3315: protected void inlineTag(Void list) { aefimov@3315: newline = false; aefimov@3315: nextChar(); aefimov@3315: if (ch == '@') { aefimov@3315: inlineTag(); aefimov@3315: } aefimov@3315: } aefimov@3315: aefimov@3315: /** aefimov@3315: * Read a single inline tag, including its content. aefimov@3315: * Standard tags parse their content appropriately. aefimov@3315: * Non-standard tags are represented by {@link UnknownBlockTag}. aefimov@3315: * Malformed tags may be returned as {@link Erroneous}. aefimov@3315: */ aefimov@3315: protected void inlineTag() { aefimov@3315: int p = bp - 1; aefimov@3315: try { aefimov@3315: nextChar(); aefimov@3315: if (isIdentifierStart(ch)) { aefimov@3315: String name = readTagName(); aefimov@3315: TagParser tp = tagParsers.get(name); aefimov@3315: aefimov@3315: if (tp == null) { aefimov@3315: skipWhitespace(); aefimov@3315: inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); aefimov@3315: nextChar(); aefimov@3315: } else { aefimov@3315: skipWhitespace(); aefimov@3315: if (tp.getKind() == TagParser.Kind.INLINE) { aefimov@3315: tp.parse(p); aefimov@3315: } else { // handle block tags (ex: @see) in inline content aefimov@3315: inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip content aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: } aefimov@3315: } aefimov@3315: } catch (ParseException e) { aefimov@3315: } aefimov@3315: } aefimov@3315: aefimov@3315: private static enum WhitespaceRetentionPolicy { aefimov@3315: RETAIN_ALL, aefimov@3315: REMOVE_FIRST_SPACE, aefimov@3315: REMOVE_ALL aefimov@3315: } aefimov@3315: aefimov@3315: /** aefimov@3315: * Read plain text content of an inline tag. aefimov@3315: * Matching pairs of { } are skipped; the text is terminated by the first aefimov@3315: * unmatched }. It is an error if the beginning of the next tag is detected. aefimov@3315: */ aefimov@3315: private void inlineText(WhitespaceRetentionPolicy whitespacePolicy) throws ParseException { aefimov@3315: switch (whitespacePolicy) { aefimov@3315: case REMOVE_ALL: aefimov@3315: skipWhitespace(); aefimov@3315: break; aefimov@3315: case REMOVE_FIRST_SPACE: aefimov@3315: if (ch == ' ') aefimov@3315: nextChar(); aefimov@3315: break; aefimov@3315: case RETAIN_ALL: aefimov@3315: default: aefimov@3315: // do nothing aefimov@3315: break; aefimov@3315: aefimov@3315: } aefimov@3315: int pos = bp; aefimov@3315: int depth = 1; aefimov@3315: aefimov@3315: loop: aefimov@3315: while (bp < buflen) { aefimov@3315: switch (ch) { aefimov@3315: case '\n': case '\r': case '\f': aefimov@3315: newline = true; aefimov@3315: break; aefimov@3315: aefimov@3315: case ' ': case '\t': aefimov@3315: break; aefimov@3315: aefimov@3315: case '{': aefimov@3315: newline = false; aefimov@3315: depth++; aefimov@3315: break; aefimov@3315: aefimov@3315: case '}': aefimov@3315: if (--depth == 0) { aefimov@3315: return; aefimov@3315: } aefimov@3315: newline = false; aefimov@3315: break; aefimov@3315: aefimov@3315: case '@': aefimov@3315: if (newline) aefimov@3315: break loop; aefimov@3315: newline = false; aefimov@3315: break; aefimov@3315: aefimov@3315: default: aefimov@3315: newline = false; aefimov@3315: break; aefimov@3315: } aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: throw new ParseException("dc.unterminated.inline.tag"); aefimov@3315: } aefimov@3315: aefimov@3315: /** aefimov@3315: * Read Java class name, possibly followed by member aefimov@3315: * Matching pairs of {@literal < >} are skipped. The text is terminated by the first aefimov@3315: * unmatched }. It is an error if the beginning of the next tag is detected. aefimov@3315: */ aefimov@3315: // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE aefimov@3315: // TODO: improve quality of parse to forbid bad constructions. aefimov@3315: // TODO: update to use ReferenceParser aefimov@3315: @SuppressWarnings("fallthrough") aefimov@3315: protected void reference(boolean allowMember) throws ParseException { aefimov@3315: int pos = bp; aefimov@3315: int depth = 0; aefimov@3315: aefimov@3315: // scan to find the end of the signature, by looking for the first aefimov@3315: // whitespace not enclosed in () or <>, or the end of the tag aefimov@3315: loop: aefimov@3315: while (bp < buflen) { aefimov@3315: switch (ch) { aefimov@3315: case '\n': case '\r': case '\f': aefimov@3315: newline = true; aefimov@3315: // fallthrough aefimov@3315: aefimov@3315: case ' ': case '\t': aefimov@3315: if (depth == 0) aefimov@3315: break loop; aefimov@3315: break; aefimov@3315: aefimov@3315: case '(': aefimov@3315: case '<': aefimov@3315: newline = false; aefimov@3315: depth++; aefimov@3315: break; aefimov@3315: aefimov@3315: case ')': aefimov@3315: case '>': aefimov@3315: newline = false; aefimov@3315: --depth; aefimov@3315: break; aefimov@3315: aefimov@3315: case '}': aefimov@3315: if (bp == pos) aefimov@3315: return; aefimov@3315: newline = false; aefimov@3315: break loop; aefimov@3315: aefimov@3315: case '@': aefimov@3315: if (newline) aefimov@3315: break loop; aefimov@3315: // fallthrough aefimov@3315: aefimov@3315: default: aefimov@3315: newline = false; aefimov@3315: aefimov@3315: } aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: aefimov@3315: if (depth != 0) aefimov@3315: throw new ParseException("dc.unterminated.signature"); aefimov@3315: } aefimov@3315: aefimov@3315: /** aefimov@3315: * Read Java identifier aefimov@3315: * Matching pairs of { } are skipped; the text is terminated by the first aefimov@3315: * unmatched }. It is an error if the beginning of the next tag is detected. aefimov@3315: */ aefimov@3315: @SuppressWarnings("fallthrough") aefimov@3315: protected void identifier() throws ParseException { aefimov@3315: skipWhitespace(); aefimov@3315: int pos = bp; aefimov@3315: aefimov@3315: if (isJavaIdentifierStart(ch)) { aefimov@3315: readJavaIdentifier(); aefimov@3315: return; aefimov@3315: } aefimov@3315: aefimov@3315: throw new ParseException("dc.identifier.expected"); aefimov@3315: } aefimov@3315: aefimov@3315: /** aefimov@3315: * Read a quoted string. aefimov@3315: * It is an error if the beginning of the next tag is detected. aefimov@3315: */ aefimov@3315: @SuppressWarnings("fallthrough") aefimov@3315: protected void quotedString() { aefimov@3315: int pos = bp; aefimov@3315: nextChar(); aefimov@3315: aefimov@3315: loop: aefimov@3315: while (bp < buflen) { aefimov@3315: switch (ch) { aefimov@3315: case '\n': case '\r': case '\f': aefimov@3315: newline = true; aefimov@3315: break; aefimov@3315: aefimov@3315: case ' ': case '\t': aefimov@3315: break; aefimov@3315: aefimov@3315: case '"': aefimov@3315: nextChar(); aefimov@3315: // trim trailing white-space? aefimov@3315: return; aefimov@3315: aefimov@3315: case '@': aefimov@3315: if (newline) aefimov@3315: break loop; aefimov@3315: aefimov@3315: } aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: } aefimov@3315: aefimov@3315: /** aefimov@3315: * Read a term ie. one word. aefimov@3315: * It is an error if the beginning of the next tag is detected. aefimov@3315: */ aefimov@3315: @SuppressWarnings("fallthrough") aefimov@3315: protected void inlineWord() { aefimov@3315: int pos = bp; aefimov@3315: int depth = 0; aefimov@3315: loop: aefimov@3315: while (bp < buflen) { aefimov@3315: switch (ch) { aefimov@3315: case '\n': aefimov@3315: newline = true; aefimov@3315: // fallthrough aefimov@3315: aefimov@3315: case '\r': case '\f': case ' ': case '\t': aefimov@3315: return; aefimov@3315: aefimov@3315: case '@': aefimov@3315: if (newline) aefimov@3315: break loop; aefimov@3315: aefimov@3315: case '{': aefimov@3315: depth++; aefimov@3315: break; aefimov@3315: aefimov@3315: case '}': aefimov@3315: if (depth == 0 || --depth == 0) aefimov@3315: return; aefimov@3315: break; aefimov@3315: } aefimov@3315: newline = false; aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: } aefimov@3315: aefimov@3315: /** aefimov@3315: * Read general text content of an inline tag, including HTML entities and elements. aefimov@3315: * Matching pairs of { } are skipped; the text is terminated by the first aefimov@3315: * unmatched }. It is an error if the beginning of the next tag is detected. aefimov@3315: */ aefimov@3315: @SuppressWarnings("fallthrough") aefimov@3315: private void inlineContent() { aefimov@3315: aefimov@3315: skipWhitespace(); aefimov@3315: int pos = bp; aefimov@3315: int depth = 1; aefimov@3315: aefimov@3315: loop: aefimov@3315: while (bp < buflen) { aefimov@3315: aefimov@3315: switch (ch) { aefimov@3315: case '\n': case '\r': case '\f': aefimov@3315: newline = true; aefimov@3315: // fall through aefimov@3315: aefimov@3315: case ' ': case '\t': aefimov@3315: nextChar(); aefimov@3315: break; aefimov@3315: aefimov@3315: case '&': aefimov@3315: entity(null); aefimov@3315: break; aefimov@3315: aefimov@3315: case '<': aefimov@3315: newline = false; aefimov@3315: html(); aefimov@3315: break; aefimov@3315: aefimov@3315: case '{': aefimov@3315: newline = false; aefimov@3315: depth++; aefimov@3315: nextChar(); aefimov@3315: break; aefimov@3315: aefimov@3315: case '}': aefimov@3315: newline = false; aefimov@3315: if (--depth == 0) { aefimov@3315: nextChar(); aefimov@3315: return; aefimov@3315: } aefimov@3315: nextChar(); aefimov@3315: break; aefimov@3315: aefimov@3315: case '@': aefimov@3315: if (newline) aefimov@3315: break loop; aefimov@3315: // fallthrough aefimov@3315: aefimov@3315: default: aefimov@3315: nextChar(); aefimov@3315: break; aefimov@3315: } aefimov@3315: } aefimov@3315: aefimov@3315: } aefimov@3315: aefimov@3315: protected void entity(Void list) { aefimov@3315: newline = false; aefimov@3315: entity(); aefimov@3315: } aefimov@3315: aefimov@3315: /** aefimov@3315: * Read an HTML entity. aefimov@3315: * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; } aefimov@3315: */ aefimov@3315: protected void entity() { aefimov@3315: nextChar(); aefimov@3315: String name = null; aefimov@3315: if (ch == '#') { aefimov@3315: int namep = bp; aefimov@3315: nextChar(); aefimov@3315: if (isDecimalDigit(ch)) { aefimov@3315: nextChar(); aefimov@3315: while (isDecimalDigit(ch)) aefimov@3315: nextChar(); aefimov@3315: name = new String(buf, namep, bp - namep); aefimov@3315: } else if (ch == 'x' || ch == 'X') { aefimov@3315: nextChar(); aefimov@3315: if (isHexDigit(ch)) { aefimov@3315: nextChar(); aefimov@3315: while (isHexDigit(ch)) aefimov@3315: nextChar(); aefimov@3315: name = new String(buf, namep, bp - namep); aefimov@3315: } aefimov@3315: } aefimov@3315: } else if (isIdentifierStart(ch)) { aefimov@3315: name = readIdentifier(); aefimov@3315: } aefimov@3315: aefimov@3315: if (name != null) { aefimov@3315: if (ch != ';') aefimov@3315: return; aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: } aefimov@3315: aefimov@3315: /** aefimov@3315: * Read the start or end of an HTML tag, or an HTML comment aefimov@3315: * {@literal } or {@literal } aefimov@3315: */ aefimov@3315: protected void html() { aefimov@3315: int p = bp; aefimov@3315: nextChar(); aefimov@3315: if (isIdentifierStart(ch)) { aefimov@3315: String name = readIdentifier(); aefimov@3315: checkHtmlTag(name); aefimov@3315: htmlAttrs(); aefimov@3315: if (ch == '/') { aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: if (ch == '>') { aefimov@3315: nextChar(); aefimov@3315: return; aefimov@3315: } aefimov@3315: } else if (ch == '/') { aefimov@3315: nextChar(); aefimov@3315: if (isIdentifierStart(ch)) { aefimov@3315: readIdentifier(); aefimov@3315: skipWhitespace(); aefimov@3315: if (ch == '>') { aefimov@3315: nextChar(); aefimov@3315: return; aefimov@3315: } aefimov@3315: } aefimov@3315: } else if (ch == '!') { aefimov@3315: nextChar(); aefimov@3315: if (ch == '-') { aefimov@3315: nextChar(); aefimov@3315: if (ch == '-') { aefimov@3315: nextChar(); aefimov@3315: while (bp < buflen) { aefimov@3315: int dash = 0; aefimov@3315: while (ch == '-') { aefimov@3315: dash++; aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: // Strictly speaking, a comment should not contain "--" aefimov@3315: // so dash > 2 is an error, dash == 2 implies ch == '>' aefimov@3315: // See http://www.w3.org/TR/html-markup/syntax.html#syntax-comments aefimov@3315: // for more details. aefimov@3315: if (dash >= 2 && ch == '>') { aefimov@3315: nextChar(); aefimov@3315: return; aefimov@3315: } aefimov@3315: aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: } aefimov@3315: } aefimov@3315: } aefimov@3315: aefimov@3315: bp = p + 1; aefimov@3315: ch = buf[bp]; aefimov@3315: } aefimov@3315: aefimov@3315: /** aefimov@3315: * Read a series of HTML attributes, terminated by {@literal > }. aefimov@3315: * Each attribute is of the form {@literal identifier[=value] }. aefimov@3315: * "value" may be unquoted, single-quoted, or double-quoted. aefimov@3315: */ aefimov@3315: protected void htmlAttrs() { aefimov@3315: skipWhitespace(); aefimov@3315: aefimov@3315: loop: aefimov@3315: while (isIdentifierStart(ch)) { aefimov@3315: int namePos = bp; aefimov@3315: String name = readAttributeName(); aefimov@3315: skipWhitespace(); aefimov@3315: StringBuilder value = new StringBuilder(); aefimov@3315: if (ch == '=') { aefimov@3315: nextChar(); aefimov@3315: skipWhitespace(); aefimov@3315: if (ch == '\'' || ch == '"') { aefimov@3315: char quote = ch; aefimov@3315: nextChar(); aefimov@3315: while (bp < buflen && ch != quote) { aefimov@3315: if (newline && ch == '@') { aefimov@3315: // No point trying to read more. aefimov@3315: // In fact, all attrs get discarded by the caller aefimov@3315: // and superseded by a malformed.html node because aefimov@3315: // the html tag itself is not terminated correctly. aefimov@3315: break loop; aefimov@3315: } aefimov@3315: value.append(ch); aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: nextChar(); aefimov@3315: } else { aefimov@3315: while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) { aefimov@3315: value.append(ch); aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: } aefimov@3315: skipWhitespace(); aefimov@3315: } aefimov@3315: checkHtmlAttr(name, value.toString()); aefimov@3315: } aefimov@3315: } aefimov@3315: aefimov@3315: protected void attrValueChar(Void list) { aefimov@3315: switch (ch) { aefimov@3315: case '&': aefimov@3315: entity(list); aefimov@3315: break; aefimov@3315: aefimov@3315: case '{': aefimov@3315: inlineTag(list); aefimov@3315: break; aefimov@3315: aefimov@3315: default: aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: } aefimov@3315: aefimov@3315: protected boolean isIdentifierStart(char ch) { aefimov@3315: return Character.isUnicodeIdentifierStart(ch); aefimov@3315: } aefimov@3315: aefimov@3315: protected String readIdentifier() { aefimov@3315: int start = bp; aefimov@3315: nextChar(); aefimov@3315: while (bp < buflen && Character.isUnicodeIdentifierPart(ch)) aefimov@3315: nextChar(); aefimov@3315: return new String(buf, start, bp - start); aefimov@3315: } aefimov@3315: aefimov@3315: protected String readAttributeName() { aefimov@3315: int start = bp; aefimov@3315: nextChar(); aefimov@3315: while (bp < buflen && (Character.isUnicodeIdentifierPart(ch) || ch == '-')) aefimov@3315: nextChar(); aefimov@3315: return new String(buf, start, bp - start); aefimov@3315: } aefimov@3315: aefimov@3315: protected String readTagName() { aefimov@3315: int start = bp; aefimov@3315: nextChar(); aefimov@3315: while (bp < buflen aefimov@3315: && (Character.isUnicodeIdentifierPart(ch) || ch == '.' aefimov@3315: || ch == '-' || ch == ':')) { aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: return new String(buf, start, bp - start); aefimov@3315: } aefimov@3315: aefimov@3315: protected boolean isJavaIdentifierStart(char ch) { aefimov@3315: return Character.isJavaIdentifierStart(ch); aefimov@3315: } aefimov@3315: aefimov@3315: protected String readJavaIdentifier() { aefimov@3315: int start = bp; aefimov@3315: nextChar(); aefimov@3315: while (bp < buflen && Character.isJavaIdentifierPart(ch)) aefimov@3315: nextChar(); aefimov@3315: return new String(buf, start, bp - start); aefimov@3315: } aefimov@3315: aefimov@3315: protected boolean isDecimalDigit(char ch) { aefimov@3315: return ('0' <= ch && ch <= '9'); aefimov@3315: } aefimov@3315: aefimov@3315: protected boolean isHexDigit(char ch) { aefimov@3315: return ('0' <= ch && ch <= '9') aefimov@3315: || ('a' <= ch && ch <= 'f') aefimov@3315: || ('A' <= ch && ch <= 'F'); aefimov@3315: } aefimov@3315: aefimov@3315: protected boolean isUnquotedAttrValueTerminator(char ch) { aefimov@3315: switch (ch) { aefimov@3315: case '\f': case '\n': case '\r': case '\t': aefimov@3315: case ' ': aefimov@3315: case '"': case '\'': case '`': aefimov@3315: case '=': case '<': case '>': aefimov@3315: return true; aefimov@3315: default: aefimov@3315: return false; aefimov@3315: } aefimov@3315: } aefimov@3315: aefimov@3315: protected boolean isWhitespace(char ch) { aefimov@3315: return Character.isWhitespace(ch); aefimov@3315: } aefimov@3315: aefimov@3315: protected void skipWhitespace() { aefimov@3315: while (isWhitespace(ch)) { aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: } aefimov@3315: aefimov@3315: /** aefimov@3315: * @param start position of first character of string aefimov@3315: * @param end position of character beyond last character to be included aefimov@3315: */ aefimov@3315: String newString(int start, int end) { aefimov@3315: return new String(buf, start, end - start); aefimov@3315: } aefimov@3315: aefimov@3315: static abstract class TagParser { aefimov@3315: enum Kind { INLINE, BLOCK } aefimov@3315: aefimov@3315: final Kind kind; aefimov@3315: final String name; aefimov@3315: aefimov@3315: aefimov@3315: TagParser(Kind k, String tk) { aefimov@3315: kind = k; aefimov@3315: name = tk; aefimov@3315: } aefimov@3315: aefimov@3315: TagParser(Kind k, String tk, boolean retainWhiteSpace) { aefimov@3315: this(k, tk); aefimov@3315: } aefimov@3315: aefimov@3315: Kind getKind() { aefimov@3315: return kind; aefimov@3315: } aefimov@3315: aefimov@3315: String getName() { aefimov@3315: return name; aefimov@3315: } aefimov@3315: aefimov@3315: abstract void parse(int pos) throws ParseException; aefimov@3315: } aefimov@3315: aefimov@3315: /** aefimov@3315: * @see Javadoc Tags aefimov@3315: */ aefimov@3315: @SuppressWarnings("deprecation") aefimov@3315: private void initTagParsers() { aefimov@3315: TagParser[] parsers = { aefimov@3315: // @author name-text aefimov@3315: new TagParser(Kind.BLOCK, "author") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) { aefimov@3315: blockContent(); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // {@code text} aefimov@3315: new TagParser(Kind.INLINE, "code", true) { aefimov@3315: @Override aefimov@3315: public void parse(int pos) throws ParseException { aefimov@3315: inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE); aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // @deprecated deprecated-text aefimov@3315: new TagParser(Kind.BLOCK, "deprecated") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) { aefimov@3315: blockContent(); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // {@docRoot} aefimov@3315: new TagParser(Kind.INLINE, "docRoot") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) throws ParseException { aefimov@3315: if (ch == '}') { aefimov@3315: nextChar(); aefimov@3315: return; aefimov@3315: } aefimov@3315: inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content aefimov@3315: nextChar(); aefimov@3315: throw new ParseException("dc.unexpected.content"); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // @exception class-name description aefimov@3315: new TagParser(Kind.BLOCK, "exception") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) throws ParseException { aefimov@3315: skipWhitespace(); aefimov@3315: reference(false); aefimov@3315: blockContent(); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // @hidden hidden-text aefimov@3315: new TagParser(Kind.BLOCK, "hidden") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) { aefimov@3315: blockContent(); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // @index search-term options-description aefimov@3315: new TagParser(Kind.INLINE, "index") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) throws ParseException { aefimov@3315: skipWhitespace(); aefimov@3315: if (ch == '}') { aefimov@3315: throw new ParseException("dc.no.content"); aefimov@3315: } aefimov@3315: if (ch == '"') quotedString(); else inlineWord(); aefimov@3315: skipWhitespace(); aefimov@3315: if (ch != '}') { aefimov@3315: inlineContent(); aefimov@3315: } else { aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // {@inheritDoc} aefimov@3315: new TagParser(Kind.INLINE, "inheritDoc") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) throws ParseException { aefimov@3315: if (ch == '}') { aefimov@3315: nextChar(); aefimov@3315: return; aefimov@3315: } aefimov@3315: inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content aefimov@3315: nextChar(); aefimov@3315: throw new ParseException("dc.unexpected.content"); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // {@link package.class#member label} aefimov@3315: new TagParser(Kind.INLINE, "link") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) throws ParseException { aefimov@3315: reference(true); aefimov@3315: inlineContent(); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // {@linkplain package.class#member label} aefimov@3315: new TagParser(Kind.INLINE, "linkplain") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) throws ParseException { aefimov@3315: reference(true); aefimov@3315: inlineContent(); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // {@literal text} aefimov@3315: new TagParser(Kind.INLINE, "literal", true) { aefimov@3315: @Override aefimov@3315: public void parse(int pos) throws ParseException { aefimov@3315: inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE); aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // @param parameter-name description aefimov@3315: new TagParser(Kind.BLOCK, "param") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) throws ParseException { aefimov@3315: skipWhitespace(); aefimov@3315: aefimov@3315: boolean typaram = false; aefimov@3315: if (ch == '<') { aefimov@3315: typaram = true; aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: aefimov@3315: identifier(); aefimov@3315: aefimov@3315: if (typaram) { aefimov@3315: if (ch != '>') aefimov@3315: throw new ParseException("dc.gt.expected"); aefimov@3315: nextChar(); aefimov@3315: } aefimov@3315: aefimov@3315: skipWhitespace(); aefimov@3315: blockContent(); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // @return description aefimov@3315: new TagParser(Kind.BLOCK, "return") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) { aefimov@3315: blockContent(); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // @see reference | quoted-string | HTML aefimov@3315: new TagParser(Kind.BLOCK, "see") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) throws ParseException { aefimov@3315: skipWhitespace(); aefimov@3315: switch (ch) { aefimov@3315: case '"': aefimov@3315: quotedString(); aefimov@3315: skipWhitespace(); aefimov@3315: if (ch == '@' aefimov@3315: || ch == EOI && bp == buf.length - 1) { aefimov@3315: return; aefimov@3315: } aefimov@3315: break; aefimov@3315: aefimov@3315: case '<': aefimov@3315: blockContent(); aefimov@3315: return; aefimov@3315: aefimov@3315: case '@': aefimov@3315: if (newline) aefimov@3315: throw new ParseException("dc.no.content"); aefimov@3315: break; aefimov@3315: aefimov@3315: case EOI: aefimov@3315: if (bp == buf.length - 1) aefimov@3315: throw new ParseException("dc.no.content"); aefimov@3315: break; aefimov@3315: aefimov@3315: default: aefimov@3315: if (isJavaIdentifierStart(ch) || ch == '#') { aefimov@3315: reference(true); aefimov@3315: blockContent(); aefimov@3315: } aefimov@3315: } aefimov@3315: throw new ParseException("dc.unexpected.content"); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // @serialData data-description aefimov@3315: new TagParser(Kind.BLOCK, "@serialData") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) { aefimov@3315: blockContent(); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // @serialField field-name field-type description aefimov@3315: new TagParser(Kind.BLOCK, "serialField") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) throws ParseException { aefimov@3315: skipWhitespace(); aefimov@3315: identifier(); aefimov@3315: skipWhitespace(); aefimov@3315: reference(false); aefimov@3315: if (isWhitespace(ch)) { aefimov@3315: skipWhitespace(); aefimov@3315: blockContent(); aefimov@3315: } aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // @serial field-description | include | exclude aefimov@3315: new TagParser(Kind.BLOCK, "serial") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) { aefimov@3315: blockContent(); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // @since since-text aefimov@3315: new TagParser(Kind.BLOCK, "since") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) { aefimov@3315: blockContent(); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // @throws class-name description aefimov@3315: new TagParser(Kind.BLOCK, "throws") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) throws ParseException { aefimov@3315: skipWhitespace(); aefimov@3315: reference(false); aefimov@3315: blockContent(); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // {@value package.class#field} aefimov@3315: new TagParser(Kind.INLINE, "value") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) throws ParseException { aefimov@3315: reference(true); aefimov@3315: skipWhitespace(); aefimov@3315: if (ch == '}') { aefimov@3315: nextChar(); aefimov@3315: return; aefimov@3315: } aefimov@3315: nextChar(); aefimov@3315: throw new ParseException("dc.unexpected.content"); aefimov@3315: } aefimov@3315: }, aefimov@3315: aefimov@3315: // @version version-text aefimov@3315: new TagParser(Kind.BLOCK, "version") { aefimov@3315: @Override aefimov@3315: public void parse(int pos) { aefimov@3315: blockContent(); aefimov@3315: } aefimov@3315: }, aefimov@3315: }; aefimov@3315: aefimov@3315: tagParsers = new HashMap<>(); aefimov@3315: for (TagParser p: parsers) aefimov@3315: tagParsers.put(p.getName(), p); aefimov@3315: aefimov@3315: } aefimov@3315: aefimov@3315: private void initURIAttrs() { aefimov@3315: uriAttrs = new HashSet<>(Arrays.asList( aefimov@3315: // See https://www.w3.org/TR/html4/sgml/dtd.html aefimov@3315: // https://www.w3.org/TR/html5/ aefimov@3315: // These are all the attributes that take a %URI or a valid URL potentially surrounded aefimov@3315: // by spaces aefimov@3315: "action", "cite", "classid", "codebase", "data", aefimov@3315: "datasrc", "for", "href", "longdesc", "profile", aefimov@3315: "src", "usemap" aefimov@3315: )); aefimov@3315: } aefimov@3315: aefimov@3315: }