test/tools/doclint/tidy/util/Main.java

Wed, 13 Aug 2014 14:50:00 -0700

author
katleman
date
Wed, 13 Aug 2014 14:50:00 -0700
changeset 2549
0b6cc4ea670f
parent 2061
3d61984b077c
child 2525
2eb010b6cb22
permissions
-rw-r--r--

Added tag jdk8u40-b01 for changeset bf89a471779d

jjg@2061 1 /*
jjg@2061 2 * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
jjg@2061 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
jjg@2061 4 *
jjg@2061 5 * This code is free software; you can redistribute it and/or modify it
jjg@2061 6 * under the terms of the GNU General Public License version 2 only, as
jjg@2061 7 * published by the Free Software Foundation.
jjg@2061 8 *
jjg@2061 9 * This code is distributed in the hope that it will be useful, but WITHOUT
jjg@2061 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
jjg@2061 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
jjg@2061 12 * version 2 for more details (a copy is included in the LICENSE file that
jjg@2061 13 * accompanied this code).
jjg@2061 14 *
jjg@2061 15 * You should have received a copy of the GNU General Public License version
jjg@2061 16 * 2 along with this work; if not, write to the Free Software Foundation,
jjg@2061 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
jjg@2061 18 *
jjg@2061 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
jjg@2061 20 * or visit www.oracle.com if you need additional information or have any
jjg@2061 21 * questions.
jjg@2061 22 */
jjg@2061 23
jjg@2061 24
jjg@1455 25 package tidystats;
jjg@1455 26
jjg@1455 27 import java.io.IOException;
jjg@1455 28 import java.nio.charset.Charset;
jjg@1455 29 import java.nio.file.FileSystem;
jjg@1455 30 import java.nio.file.FileSystems;
jjg@1455 31 import java.nio.file.Files;
jjg@1455 32 import java.nio.file.Path;
jjg@1455 33 import java.util.ArrayList;
jjg@1455 34 import java.util.Comparator;
jjg@1455 35 import java.util.HashMap;
jjg@1455 36 import java.util.List;
jjg@1455 37 import java.util.Map;
jjg@1455 38 import java.util.Set;
jjg@1455 39 import java.util.TreeMap;
jjg@1455 40 import java.util.TreeSet;
jjg@1455 41 import java.util.regex.Matcher;
jjg@1455 42 import java.util.regex.Pattern;
jjg@1455 43
jjg@2061 44 /**
jjg@2061 45 * Generate statistics from the files generated by tidy.sh.
jjg@2061 46 *
jjg@2061 47 * <p>The tidy.sh script is used to run tidy on all the HTML files
jjg@2061 48 * in a directory, creating files in a new directory, and for each
jjg@2061 49 * HTML file, it writes the console output from tidy into a file
jjg@2061 50 * beside the fixed up file, with an additional .tidy extension.
jjg@2061 51 *
jjg@2061 52 * <p>This program will scan a directory for *.tidy files and
jjg@2061 53 * analyze the messages reported by tidy, in order to generate a
jjg@2061 54 * report with statistics on the various messages that were
jjg@2061 55 * reported by tidy.
jjg@2061 56 *
jjg@2061 57 * <p>Typical usage:
jjg@2061 58 * <pre>
jjg@2061 59 * $ bash /path/to/tidy.sh /path/to/htmldir
jjg@2061 60 * $ javac -d /path/to/classes /path/to/Main.java
jjg@2061 61 * $ java -cp /path/to/classes tidystats.Main /path/to/htmldir.tidy
jjg@2061 62 * </pre>
jjg@2061 63 *
jjg@2061 64 * <p>Internally, the program works by matching lines in the *.tidy
jjg@2061 65 * files against a series of regular expressions that are used to
jjg@2061 66 * categorize the messages. The set of regular expressions was
jjg@2061 67 * empirically determined by running the program on the output from
jjg@2061 68 * running tidy.sh on all the generated JDK documentation. It is
jjg@2061 69 * possible that tidy may generate more/different messages on other
jjg@2061 70 * doc sets, in which case, the set of regexes in the program should
jjg@2061 71 * be updated.
jjg@2061 72 */
jjg@1455 73 public class Main {
jjg@1455 74 public static void main(String... args) throws IOException {
jjg@1455 75 new Main().run(args);
jjg@1455 76 }
jjg@1455 77
jjg@1455 78 void run(String... args) throws IOException {
jjg@1455 79 FileSystem fs = FileSystems.getDefault();
jjg@1455 80 List<Path> paths = new ArrayList<>();
jjg@1455 81
jjg@1455 82 int i;
jjg@1455 83 for (i = 0; i < args.length; i++) {
jjg@1455 84 String arg = args[i];
jjg@1455 85 if (arg.startsWith("-"))
jjg@1455 86 throw new IllegalArgumentException(arg);
jjg@1455 87 else
jjg@1455 88 break;
jjg@1455 89 }
jjg@1455 90
jjg@1455 91 for ( ; i < args.length; i++) {
jjg@1455 92 Path p = fs.getPath(args[i]);
jjg@1455 93 paths.add(p);
jjg@1455 94 }
jjg@1455 95
jjg@1455 96 for (Path p: paths) {
jjg@1455 97 scan(p);
jjg@1455 98 }
jjg@1455 99
jjg@1455 100 print("%6d files read", files);
jjg@1455 101 print("%6d files had no errors or warnings", ok);
jjg@1455 102 print("%6d files reported \"Not all warnings/errors were shown.\"", overflow);
jjg@1455 103 print("%6d errors found", errs);
jjg@1455 104 print("%6d warnings found", warns);
jjg@1455 105 print("%6d recommendations to use CSS", css);
jjg@1455 106 print("");
jjg@1455 107
jjg@1455 108 Map<Integer, Set<String>> sortedCounts = new TreeMap<>(
jjg@1455 109 new Comparator<Integer>() {
jjg@1455 110 @Override
jjg@1455 111 public int compare(Integer o1, Integer o2) {
jjg@1455 112 return o2.compareTo(o1);
jjg@1455 113 }
jjg@1455 114 });
jjg@1455 115
jjg@1455 116 for (Map.Entry<Pattern, Integer> e: counts.entrySet()) {
jjg@1455 117 Pattern p = e.getKey();
jjg@1455 118 Integer n = e.getValue();
jjg@1455 119 Set<String> set = sortedCounts.get(n);
jjg@1455 120 if (set == null)
jjg@1455 121 sortedCounts.put(n, (set = new TreeSet<>()));
jjg@1455 122 set.add(p.toString());
jjg@1455 123 }
jjg@1455 124
jjg@1455 125 for (Map.Entry<Integer, Set<String>> e: sortedCounts.entrySet()) {
jjg@1455 126 for (String p: e.getValue()) {
jjg@1455 127 if (p.startsWith(".*")) p = p.substring(2);
jjg@1455 128 print("%6d: %s", e.getKey(), p);
jjg@1455 129 }
jjg@1455 130 }
jjg@1455 131 }
jjg@1455 132
jjg@1455 133 void scan(Path p) throws IOException {
jjg@1455 134 if (Files.isDirectory(p)) {
jjg@1455 135 for (Path c: Files.newDirectoryStream(p)) {
jjg@1455 136 scan(c);
jjg@1455 137 }
jjg@1455 138 } else if (isTidyFile(p)) {
jjg@1455 139 scan(Files.readAllLines(p, Charset.defaultCharset()));
jjg@1455 140 }
jjg@1455 141 }
jjg@1455 142
jjg@1455 143 boolean isTidyFile(Path p) {
jjg@1455 144 return Files.isRegularFile(p) && p.getFileName().toString().endsWith(".tidy");
jjg@1455 145 }
jjg@1455 146
jjg@1455 147 void scan(List<String> lines) {
jjg@1455 148 Matcher m;
jjg@1455 149 files++;
jjg@1455 150 for (String line: lines) {
jjg@1455 151 if (okPattern.matcher(line).matches()) {
jjg@1455 152 ok++;
jjg@1455 153 } else if ((m = countPattern.matcher(line)).matches()) {
jjg@1455 154 warns += Integer.valueOf(m.group(1));
jjg@1455 155 errs += Integer.valueOf(m.group(2));
jjg@1455 156 if (m.group(3) != null)
jjg@1455 157 overflow++;
jjg@1455 158 } else if ((m = guardPattern.matcher(line)).matches()) {
jjg@1455 159 boolean found = false;
jjg@1455 160 for (Pattern p: patterns) {
jjg@1455 161 if ((m = p.matcher(line)).matches()) {
jjg@1455 162 found = true;
jjg@1455 163 count(p);
jjg@1455 164 break;
jjg@1455 165 }
jjg@1455 166 }
jjg@1455 167 if (!found)
jjg@1455 168 System.err.println("Unrecognized line: " + line);
jjg@1455 169 } else if (cssPattern.matcher(line).matches()) {
jjg@1455 170 css++;
jjg@1455 171 }
jjg@1455 172 }
jjg@1455 173 }
jjg@1455 174
jjg@1455 175 Map<Pattern, Integer> counts = new HashMap<>();
jjg@1455 176 void count(Pattern p) {
jjg@1455 177 Integer i = counts.get(p);
jjg@1455 178 counts.put(p, (i == null) ? 1 : i + 1);
jjg@1455 179 }
jjg@1455 180
jjg@1455 181 void print(String format, Object... args) {
jjg@1455 182 System.out.println(String.format(format, args));
jjg@1455 183 }
jjg@1455 184
jjg@1455 185 Pattern okPattern = Pattern.compile("No warnings or errors were found.");
jjg@1455 186 Pattern countPattern = Pattern.compile("([0-9]+) warnings, ([0-9]+) errors were found!.*?(Not all warnings/errors were shown.)?");
jjg@1455 187 Pattern cssPattern = Pattern.compile("You are recommended to use CSS.*");
jjg@1455 188 Pattern guardPattern = Pattern.compile("line [0-9]+ column [0-9]+ - (Error|Warning):.*");
jjg@1455 189
jjg@1455 190 Pattern[] patterns = {
jjg@1455 191 Pattern.compile(".*Error: <.*> is not recognized!"),
jjg@1455 192 Pattern.compile(".*Error: missing quote mark for attribute value"),
jjg@1455 193 Pattern.compile(".*Warning: <.*> anchor \".*\" already defined"),
jjg@1455 194 Pattern.compile(".*Warning: <.*> attribute \".*\" has invalid value \".*\""),
jjg@1455 195 Pattern.compile(".*Warning: <.*> attribute \".*\" lacks value"),
jjg@1455 196 Pattern.compile(".*Warning: <.*> attribute \".*\" lacks value"),
jjg@1455 197 Pattern.compile(".*Warning: <.*> attribute with missing trailing quote mark"),
jjg@1455 198 Pattern.compile(".*Warning: <.*> dropping value \".*\" for repeated attribute \".*\""),
jjg@1455 199 Pattern.compile(".*Warning: <.*> inserting \".*\" attribute"),
jjg@1455 200 Pattern.compile(".*Warning: <.*> is probably intended as </.*>"),
jjg@1455 201 Pattern.compile(".*Warning: <.*> isn't allowed in <.*> elements"),
jjg@1455 202 Pattern.compile(".*Warning: <.*> lacks \".*\" attribute"),
jjg@1455 203 Pattern.compile(".*Warning: <.*> missing '>' for end of tag"),
jjg@1455 204 Pattern.compile(".*Warning: <.*> proprietary attribute \".*\""),
jjg@1455 205 Pattern.compile(".*Warning: <.*> unexpected or duplicate quote mark"),
jjg@1455 206 Pattern.compile(".*Warning: <a> cannot copy name attribute to id"),
jjg@1455 207 Pattern.compile(".*Warning: <a> escaping malformed URI reference"),
jjg@1455 208 Pattern.compile(".*Warning: <blockquote> proprietary attribute \"pre\""),
jjg@1455 209 Pattern.compile(".*Warning: discarding unexpected <.*>"),
jjg@1455 210 Pattern.compile(".*Warning: discarding unexpected </.*>"),
jjg@1455 211 Pattern.compile(".*Warning: entity \".*\" doesn't end in ';'"),
jjg@1455 212 Pattern.compile(".*Warning: inserting implicit <.*>"),
jjg@1455 213 Pattern.compile(".*Warning: inserting missing 'title' element"),
jjg@1455 214 Pattern.compile(".*Warning: missing <!DOCTYPE> declaration"),
jjg@1455 215 Pattern.compile(".*Warning: missing <.*>"),
jjg@1455 216 Pattern.compile(".*Warning: missing </.*> before <.*>"),
jjg@1455 217 Pattern.compile(".*Warning: nested emphasis <.*>"),
jjg@1455 218 Pattern.compile(".*Warning: plain text isn't allowed in <.*> elements"),
jjg@1455 219 Pattern.compile(".*Warning: replacing <p> by <br>"),
jjg@1455 220 Pattern.compile(".*Warning: replacing invalid numeric character reference .*"),
jjg@1455 221 Pattern.compile(".*Warning: replacing unexpected .* by </.*>"),
jjg@1455 222 Pattern.compile(".*Warning: trimming empty <.*>"),
jjg@1455 223 Pattern.compile(".*Warning: unescaped & or unknown entity \".*\""),
jjg@1455 224 Pattern.compile(".*Warning: unescaped & which should be written as &amp;"),
jjg@1455 225 Pattern.compile(".*Warning: using <br> in place of <p>")
jjg@1455 226 };
jjg@1455 227
jjg@1455 228 int files;
jjg@1455 229 int ok;
jjg@1455 230 int warns;
jjg@1455 231 int errs;
jjg@1455 232 int css;
jjg@1455 233 int overflow;
jjg@1455 234 }
jjg@1455 235

mercurial