test/tools/doclint/tidy/util/Main.java

changeset 0
959103a6100f
child 2525
2eb010b6cb22
equal deleted inserted replaced
-1:000000000000 0:959103a6100f
1 /*
2 * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24
25 package tidystats;
26
27 import java.io.IOException;
28 import java.nio.charset.Charset;
29 import java.nio.file.FileSystem;
30 import java.nio.file.FileSystems;
31 import java.nio.file.Files;
32 import java.nio.file.Path;
33 import java.util.ArrayList;
34 import java.util.Comparator;
35 import java.util.HashMap;
36 import java.util.List;
37 import java.util.Map;
38 import java.util.Set;
39 import java.util.TreeMap;
40 import java.util.TreeSet;
41 import java.util.regex.Matcher;
42 import java.util.regex.Pattern;
43
44 /**
45 * Generate statistics from the files generated by tidy.sh.
46 *
47 * <p>The tidy.sh script is used to run tidy on all the HTML files
48 * in a directory, creating files in a new directory, and for each
49 * HTML file, it writes the console output from tidy into a file
50 * beside the fixed up file, with an additional .tidy extension.
51 *
52 * <p>This program will scan a directory for *.tidy files and
53 * analyze the messages reported by tidy, in order to generate a
54 * report with statistics on the various messages that were
55 * reported by tidy.
56 *
57 * <p>Typical usage:
58 * <pre>
59 * $ bash /path/to/tidy.sh /path/to/htmldir
60 * $ javac -d /path/to/classes /path/to/Main.java
61 * $ java -cp /path/to/classes tidystats.Main /path/to/htmldir.tidy
62 * </pre>
63 *
64 * <p>Internally, the program works by matching lines in the *.tidy
65 * files against a series of regular expressions that are used to
66 * categorize the messages. The set of regular expressions was
67 * empirically determined by running the program on the output from
68 * running tidy.sh on all the generated JDK documentation. It is
69 * possible that tidy may generate more/different messages on other
70 * doc sets, in which case, the set of regexes in the program should
71 * be updated.
72 */
73 public class Main {
74 public static void main(String... args) throws IOException {
75 new Main().run(args);
76 }
77
78 void run(String... args) throws IOException {
79 FileSystem fs = FileSystems.getDefault();
80 List<Path> paths = new ArrayList<>();
81
82 int i;
83 for (i = 0; i < args.length; i++) {
84 String arg = args[i];
85 if (arg.startsWith("-"))
86 throw new IllegalArgumentException(arg);
87 else
88 break;
89 }
90
91 for ( ; i < args.length; i++) {
92 Path p = fs.getPath(args[i]);
93 paths.add(p);
94 }
95
96 for (Path p: paths) {
97 scan(p);
98 }
99
100 print("%6d files read", files);
101 print("%6d files had no errors or warnings", ok);
102 print("%6d files reported \"Not all warnings/errors were shown.\"", overflow);
103 print("%6d errors found", errs);
104 print("%6d warnings found", warns);
105 print("%6d recommendations to use CSS", css);
106 print("");
107
108 Map<Integer, Set<String>> sortedCounts = new TreeMap<>(
109 new Comparator<Integer>() {
110 @Override
111 public int compare(Integer o1, Integer o2) {
112 return o2.compareTo(o1);
113 }
114 });
115
116 for (Map.Entry<Pattern, Integer> e: counts.entrySet()) {
117 Pattern p = e.getKey();
118 Integer n = e.getValue();
119 Set<String> set = sortedCounts.get(n);
120 if (set == null)
121 sortedCounts.put(n, (set = new TreeSet<>()));
122 set.add(p.toString());
123 }
124
125 for (Map.Entry<Integer, Set<String>> e: sortedCounts.entrySet()) {
126 for (String p: e.getValue()) {
127 if (p.startsWith(".*")) p = p.substring(2);
128 print("%6d: %s", e.getKey(), p);
129 }
130 }
131 }
132
133 void scan(Path p) throws IOException {
134 if (Files.isDirectory(p)) {
135 for (Path c: Files.newDirectoryStream(p)) {
136 scan(c);
137 }
138 } else if (isTidyFile(p)) {
139 scan(Files.readAllLines(p, Charset.defaultCharset()));
140 }
141 }
142
143 boolean isTidyFile(Path p) {
144 return Files.isRegularFile(p) && p.getFileName().toString().endsWith(".tidy");
145 }
146
147 void scan(List<String> lines) {
148 Matcher m;
149 files++;
150 for (String line: lines) {
151 if (okPattern.matcher(line).matches()) {
152 ok++;
153 } else if ((m = countPattern.matcher(line)).matches()) {
154 warns += Integer.valueOf(m.group(1));
155 errs += Integer.valueOf(m.group(2));
156 if (m.group(3) != null)
157 overflow++;
158 } else if ((m = guardPattern.matcher(line)).matches()) {
159 boolean found = false;
160 for (Pattern p: patterns) {
161 if ((m = p.matcher(line)).matches()) {
162 found = true;
163 count(p);
164 break;
165 }
166 }
167 if (!found)
168 System.err.println("Unrecognized line: " + line);
169 } else if (cssPattern.matcher(line).matches()) {
170 css++;
171 }
172 }
173 }
174
175 Map<Pattern, Integer> counts = new HashMap<>();
176 void count(Pattern p) {
177 Integer i = counts.get(p);
178 counts.put(p, (i == null) ? 1 : i + 1);
179 }
180
181 void print(String format, Object... args) {
182 System.out.println(String.format(format, args));
183 }
184
185 Pattern okPattern = Pattern.compile("No warnings or errors were found.");
186 Pattern countPattern = Pattern.compile("([0-9]+) warnings, ([0-9]+) errors were found!.*?(Not all warnings/errors were shown.)?");
187 Pattern cssPattern = Pattern.compile("You are recommended to use CSS.*");
188 Pattern guardPattern = Pattern.compile("line [0-9]+ column [0-9]+ - (Error|Warning):.*");
189
190 Pattern[] patterns = {
191 Pattern.compile(".*Error: <.*> is not recognized!"),
192 Pattern.compile(".*Error: missing quote mark for attribute value"),
193 Pattern.compile(".*Warning: <.*> anchor \".*\" already defined"),
194 Pattern.compile(".*Warning: <.*> attribute \".*\" has invalid value \".*\""),
195 Pattern.compile(".*Warning: <.*> attribute \".*\" lacks value"),
196 Pattern.compile(".*Warning: <.*> attribute \".*\" lacks value"),
197 Pattern.compile(".*Warning: <.*> attribute with missing trailing quote mark"),
198 Pattern.compile(".*Warning: <.*> dropping value \".*\" for repeated attribute \".*\""),
199 Pattern.compile(".*Warning: <.*> inserting \".*\" attribute"),
200 Pattern.compile(".*Warning: <.*> is probably intended as </.*>"),
201 Pattern.compile(".*Warning: <.*> isn't allowed in <.*> elements"),
202 Pattern.compile(".*Warning: <.*> lacks \".*\" attribute"),
203 Pattern.compile(".*Warning: <.*> missing '>' for end of tag"),
204 Pattern.compile(".*Warning: <.*> proprietary attribute \".*\""),
205 Pattern.compile(".*Warning: <.*> unexpected or duplicate quote mark"),
206 Pattern.compile(".*Warning: <a> cannot copy name attribute to id"),
207 Pattern.compile(".*Warning: <a> escaping malformed URI reference"),
208 Pattern.compile(".*Warning: <blockquote> proprietary attribute \"pre\""),
209 Pattern.compile(".*Warning: discarding unexpected <.*>"),
210 Pattern.compile(".*Warning: discarding unexpected </.*>"),
211 Pattern.compile(".*Warning: entity \".*\" doesn't end in ';'"),
212 Pattern.compile(".*Warning: inserting implicit <.*>"),
213 Pattern.compile(".*Warning: inserting missing 'title' element"),
214 Pattern.compile(".*Warning: missing <!DOCTYPE> declaration"),
215 Pattern.compile(".*Warning: missing <.*>"),
216 Pattern.compile(".*Warning: missing </.*> before <.*>"),
217 Pattern.compile(".*Warning: nested emphasis <.*>"),
218 Pattern.compile(".*Warning: plain text isn't allowed in <.*> elements"),
219 Pattern.compile(".*Warning: replacing <p> by <br>"),
220 Pattern.compile(".*Warning: replacing invalid numeric character reference .*"),
221 Pattern.compile(".*Warning: replacing unexpected .* by </.*>"),
222 Pattern.compile(".*Warning: trimming empty <.*>"),
223 Pattern.compile(".*Warning: unescaped & or unknown entity \".*\""),
224 Pattern.compile(".*Warning: unescaped & which should be written as &amp;"),
225 Pattern.compile(".*Warning: using <br> in place of <p>")
226 };
227
228 int files;
229 int ok;
230 int warns;
231 int errs;
232 int css;
233 int overflow;
234 }
235

mercurial