Wed, 13 Aug 2014 14:50:00 -0700
Added tag jdk8u40-b01 for changeset bf89a471779d
jjg@2061 | 1 | /* |
jjg@2061 | 2 | * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. |
jjg@2061 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
jjg@2061 | 4 | * |
jjg@2061 | 5 | * This code is free software; you can redistribute it and/or modify it |
jjg@2061 | 6 | * under the terms of the GNU General Public License version 2 only, as |
jjg@2061 | 7 | * published by the Free Software Foundation. |
jjg@2061 | 8 | * |
jjg@2061 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
jjg@2061 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
jjg@2061 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
jjg@2061 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
jjg@2061 | 13 | * accompanied this code). |
jjg@2061 | 14 | * |
jjg@2061 | 15 | * You should have received a copy of the GNU General Public License version |
jjg@2061 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
jjg@2061 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
jjg@2061 | 18 | * |
jjg@2061 | 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
jjg@2061 | 20 | * or visit www.oracle.com if you need additional information or have any |
jjg@2061 | 21 | * questions. |
jjg@2061 | 22 | */ |
jjg@2061 | 23 | |
jjg@2061 | 24 | |
jjg@1455 | 25 | package tidystats; |
jjg@1455 | 26 | |
jjg@1455 | 27 | import java.io.IOException; |
jjg@1455 | 28 | import java.nio.charset.Charset; |
jjg@1455 | 29 | import java.nio.file.FileSystem; |
jjg@1455 | 30 | import java.nio.file.FileSystems; |
jjg@1455 | 31 | import java.nio.file.Files; |
jjg@1455 | 32 | import java.nio.file.Path; |
jjg@1455 | 33 | import java.util.ArrayList; |
jjg@1455 | 34 | import java.util.Comparator; |
jjg@1455 | 35 | import java.util.HashMap; |
jjg@1455 | 36 | import java.util.List; |
jjg@1455 | 37 | import java.util.Map; |
jjg@1455 | 38 | import java.util.Set; |
jjg@1455 | 39 | import java.util.TreeMap; |
jjg@1455 | 40 | import java.util.TreeSet; |
jjg@1455 | 41 | import java.util.regex.Matcher; |
jjg@1455 | 42 | import java.util.regex.Pattern; |
jjg@1455 | 43 | |
jjg@2061 | 44 | /** |
jjg@2061 | 45 | * Generate statistics from the files generated by tidy.sh. |
jjg@2061 | 46 | * |
jjg@2061 | 47 | * <p>The tidy.sh script is used to run tidy on all the HTML files |
jjg@2061 | 48 | * in a directory, creating files in a new directory, and for each |
jjg@2061 | 49 | * HTML file, it writes the console output from tidy into a file |
jjg@2061 | 50 | * beside the fixed up file, with an additional .tidy extension. |
jjg@2061 | 51 | * |
jjg@2061 | 52 | * <p>This program will scan a directory for *.tidy files and |
jjg@2061 | 53 | * analyze the messages reported by tidy, in order to generate a |
jjg@2061 | 54 | * report with statistics on the various messages that were |
jjg@2061 | 55 | * reported by tidy. |
jjg@2061 | 56 | * |
jjg@2061 | 57 | * <p>Typical usage: |
jjg@2061 | 58 | * <pre> |
jjg@2061 | 59 | * $ bash /path/to/tidy.sh /path/to/htmldir |
jjg@2061 | 60 | * $ javac -d /path/to/classes /path/to/Main.java |
jjg@2061 | 61 | * $ java -cp /path/to/classes tidystats.Main /path/to/htmldir.tidy |
jjg@2061 | 62 | * </pre> |
jjg@2061 | 63 | * |
jjg@2061 | 64 | * <p>Internally, the program works by matching lines in the *.tidy |
jjg@2061 | 65 | * files against a series of regular expressions that are used to |
jjg@2061 | 66 | * categorize the messages. The set of regular expressions was |
jjg@2061 | 67 | * empirically determined by running the program on the output from |
jjg@2061 | 68 | * running tidy.sh on all the generated JDK documentation. It is |
jjg@2061 | 69 | * possible that tidy may generate more/different messages on other |
jjg@2061 | 70 | * doc sets, in which case, the set of regexes in the program should |
jjg@2061 | 71 | * be updated. |
jjg@2061 | 72 | */ |
jjg@1455 | 73 | public class Main { |
jjg@1455 | 74 | public static void main(String... args) throws IOException { |
jjg@1455 | 75 | new Main().run(args); |
jjg@1455 | 76 | } |
jjg@1455 | 77 | |
jjg@1455 | 78 | void run(String... args) throws IOException { |
jjg@1455 | 79 | FileSystem fs = FileSystems.getDefault(); |
jjg@1455 | 80 | List<Path> paths = new ArrayList<>(); |
jjg@1455 | 81 | |
jjg@1455 | 82 | int i; |
jjg@1455 | 83 | for (i = 0; i < args.length; i++) { |
jjg@1455 | 84 | String arg = args[i]; |
jjg@1455 | 85 | if (arg.startsWith("-")) |
jjg@1455 | 86 | throw new IllegalArgumentException(arg); |
jjg@1455 | 87 | else |
jjg@1455 | 88 | break; |
jjg@1455 | 89 | } |
jjg@1455 | 90 | |
jjg@1455 | 91 | for ( ; i < args.length; i++) { |
jjg@1455 | 92 | Path p = fs.getPath(args[i]); |
jjg@1455 | 93 | paths.add(p); |
jjg@1455 | 94 | } |
jjg@1455 | 95 | |
jjg@1455 | 96 | for (Path p: paths) { |
jjg@1455 | 97 | scan(p); |
jjg@1455 | 98 | } |
jjg@1455 | 99 | |
jjg@1455 | 100 | print("%6d files read", files); |
jjg@1455 | 101 | print("%6d files had no errors or warnings", ok); |
jjg@1455 | 102 | print("%6d files reported \"Not all warnings/errors were shown.\"", overflow); |
jjg@1455 | 103 | print("%6d errors found", errs); |
jjg@1455 | 104 | print("%6d warnings found", warns); |
jjg@1455 | 105 | print("%6d recommendations to use CSS", css); |
jjg@1455 | 106 | print(""); |
jjg@1455 | 107 | |
jjg@1455 | 108 | Map<Integer, Set<String>> sortedCounts = new TreeMap<>( |
jjg@1455 | 109 | new Comparator<Integer>() { |
jjg@1455 | 110 | @Override |
jjg@1455 | 111 | public int compare(Integer o1, Integer o2) { |
jjg@1455 | 112 | return o2.compareTo(o1); |
jjg@1455 | 113 | } |
jjg@1455 | 114 | }); |
jjg@1455 | 115 | |
jjg@1455 | 116 | for (Map.Entry<Pattern, Integer> e: counts.entrySet()) { |
jjg@1455 | 117 | Pattern p = e.getKey(); |
jjg@1455 | 118 | Integer n = e.getValue(); |
jjg@1455 | 119 | Set<String> set = sortedCounts.get(n); |
jjg@1455 | 120 | if (set == null) |
jjg@1455 | 121 | sortedCounts.put(n, (set = new TreeSet<>())); |
jjg@1455 | 122 | set.add(p.toString()); |
jjg@1455 | 123 | } |
jjg@1455 | 124 | |
jjg@1455 | 125 | for (Map.Entry<Integer, Set<String>> e: sortedCounts.entrySet()) { |
jjg@1455 | 126 | for (String p: e.getValue()) { |
jjg@1455 | 127 | if (p.startsWith(".*")) p = p.substring(2); |
jjg@1455 | 128 | print("%6d: %s", e.getKey(), p); |
jjg@1455 | 129 | } |
jjg@1455 | 130 | } |
jjg@1455 | 131 | } |
jjg@1455 | 132 | |
jjg@1455 | 133 | void scan(Path p) throws IOException { |
jjg@1455 | 134 | if (Files.isDirectory(p)) { |
jjg@1455 | 135 | for (Path c: Files.newDirectoryStream(p)) { |
jjg@1455 | 136 | scan(c); |
jjg@1455 | 137 | } |
jjg@1455 | 138 | } else if (isTidyFile(p)) { |
jjg@1455 | 139 | scan(Files.readAllLines(p, Charset.defaultCharset())); |
jjg@1455 | 140 | } |
jjg@1455 | 141 | } |
jjg@1455 | 142 | |
jjg@1455 | 143 | boolean isTidyFile(Path p) { |
jjg@1455 | 144 | return Files.isRegularFile(p) && p.getFileName().toString().endsWith(".tidy"); |
jjg@1455 | 145 | } |
jjg@1455 | 146 | |
jjg@1455 | 147 | void scan(List<String> lines) { |
jjg@1455 | 148 | Matcher m; |
jjg@1455 | 149 | files++; |
jjg@1455 | 150 | for (String line: lines) { |
jjg@1455 | 151 | if (okPattern.matcher(line).matches()) { |
jjg@1455 | 152 | ok++; |
jjg@1455 | 153 | } else if ((m = countPattern.matcher(line)).matches()) { |
jjg@1455 | 154 | warns += Integer.valueOf(m.group(1)); |
jjg@1455 | 155 | errs += Integer.valueOf(m.group(2)); |
jjg@1455 | 156 | if (m.group(3) != null) |
jjg@1455 | 157 | overflow++; |
jjg@1455 | 158 | } else if ((m = guardPattern.matcher(line)).matches()) { |
jjg@1455 | 159 | boolean found = false; |
jjg@1455 | 160 | for (Pattern p: patterns) { |
jjg@1455 | 161 | if ((m = p.matcher(line)).matches()) { |
jjg@1455 | 162 | found = true; |
jjg@1455 | 163 | count(p); |
jjg@1455 | 164 | break; |
jjg@1455 | 165 | } |
jjg@1455 | 166 | } |
jjg@1455 | 167 | if (!found) |
jjg@1455 | 168 | System.err.println("Unrecognized line: " + line); |
jjg@1455 | 169 | } else if (cssPattern.matcher(line).matches()) { |
jjg@1455 | 170 | css++; |
jjg@1455 | 171 | } |
jjg@1455 | 172 | } |
jjg@1455 | 173 | } |
jjg@1455 | 174 | |
jjg@1455 | 175 | Map<Pattern, Integer> counts = new HashMap<>(); |
jjg@1455 | 176 | void count(Pattern p) { |
jjg@1455 | 177 | Integer i = counts.get(p); |
jjg@1455 | 178 | counts.put(p, (i == null) ? 1 : i + 1); |
jjg@1455 | 179 | } |
jjg@1455 | 180 | |
jjg@1455 | 181 | void print(String format, Object... args) { |
jjg@1455 | 182 | System.out.println(String.format(format, args)); |
jjg@1455 | 183 | } |
jjg@1455 | 184 | |
jjg@1455 | 185 | Pattern okPattern = Pattern.compile("No warnings or errors were found."); |
jjg@1455 | 186 | Pattern countPattern = Pattern.compile("([0-9]+) warnings, ([0-9]+) errors were found!.*?(Not all warnings/errors were shown.)?"); |
jjg@1455 | 187 | Pattern cssPattern = Pattern.compile("You are recommended to use CSS.*"); |
jjg@1455 | 188 | Pattern guardPattern = Pattern.compile("line [0-9]+ column [0-9]+ - (Error|Warning):.*"); |
jjg@1455 | 189 | |
jjg@1455 | 190 | Pattern[] patterns = { |
jjg@1455 | 191 | Pattern.compile(".*Error: <.*> is not recognized!"), |
jjg@1455 | 192 | Pattern.compile(".*Error: missing quote mark for attribute value"), |
jjg@1455 | 193 | Pattern.compile(".*Warning: <.*> anchor \".*\" already defined"), |
jjg@1455 | 194 | Pattern.compile(".*Warning: <.*> attribute \".*\" has invalid value \".*\""), |
jjg@1455 | 195 | Pattern.compile(".*Warning: <.*> attribute \".*\" lacks value"), |
jjg@1455 | 196 | Pattern.compile(".*Warning: <.*> attribute \".*\" lacks value"), |
jjg@1455 | 197 | Pattern.compile(".*Warning: <.*> attribute with missing trailing quote mark"), |
jjg@1455 | 198 | Pattern.compile(".*Warning: <.*> dropping value \".*\" for repeated attribute \".*\""), |
jjg@1455 | 199 | Pattern.compile(".*Warning: <.*> inserting \".*\" attribute"), |
jjg@1455 | 200 | Pattern.compile(".*Warning: <.*> is probably intended as </.*>"), |
jjg@1455 | 201 | Pattern.compile(".*Warning: <.*> isn't allowed in <.*> elements"), |
jjg@1455 | 202 | Pattern.compile(".*Warning: <.*> lacks \".*\" attribute"), |
jjg@1455 | 203 | Pattern.compile(".*Warning: <.*> missing '>' for end of tag"), |
jjg@1455 | 204 | Pattern.compile(".*Warning: <.*> proprietary attribute \".*\""), |
jjg@1455 | 205 | Pattern.compile(".*Warning: <.*> unexpected or duplicate quote mark"), |
jjg@1455 | 206 | Pattern.compile(".*Warning: <a> cannot copy name attribute to id"), |
jjg@1455 | 207 | Pattern.compile(".*Warning: <a> escaping malformed URI reference"), |
jjg@1455 | 208 | Pattern.compile(".*Warning: <blockquote> proprietary attribute \"pre\""), |
jjg@1455 | 209 | Pattern.compile(".*Warning: discarding unexpected <.*>"), |
jjg@1455 | 210 | Pattern.compile(".*Warning: discarding unexpected </.*>"), |
jjg@1455 | 211 | Pattern.compile(".*Warning: entity \".*\" doesn't end in ';'"), |
jjg@1455 | 212 | Pattern.compile(".*Warning: inserting implicit <.*>"), |
jjg@1455 | 213 | Pattern.compile(".*Warning: inserting missing 'title' element"), |
jjg@1455 | 214 | Pattern.compile(".*Warning: missing <!DOCTYPE> declaration"), |
jjg@1455 | 215 | Pattern.compile(".*Warning: missing <.*>"), |
jjg@1455 | 216 | Pattern.compile(".*Warning: missing </.*> before <.*>"), |
jjg@1455 | 217 | Pattern.compile(".*Warning: nested emphasis <.*>"), |
jjg@1455 | 218 | Pattern.compile(".*Warning: plain text isn't allowed in <.*> elements"), |
jjg@1455 | 219 | Pattern.compile(".*Warning: replacing <p> by <br>"), |
jjg@1455 | 220 | Pattern.compile(".*Warning: replacing invalid numeric character reference .*"), |
jjg@1455 | 221 | Pattern.compile(".*Warning: replacing unexpected .* by </.*>"), |
jjg@1455 | 222 | Pattern.compile(".*Warning: trimming empty <.*>"), |
jjg@1455 | 223 | Pattern.compile(".*Warning: unescaped & or unknown entity \".*\""), |
jjg@1455 | 224 | Pattern.compile(".*Warning: unescaped & which should be written as &"), |
jjg@1455 | 225 | Pattern.compile(".*Warning: using <br> in place of <p>") |
jjg@1455 | 226 | }; |
jjg@1455 | 227 | |
jjg@1455 | 228 | int files; |
jjg@1455 | 229 | int ok; |
jjg@1455 | 230 | int warns; |
jjg@1455 | 231 | int errs; |
jjg@1455 | 232 | int css; |
jjg@1455 | 233 | int overflow; |
jjg@1455 | 234 | } |
jjg@1455 | 235 |