|
1 /* |
|
2 * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. |
|
8 * |
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 * version 2 for more details (a copy is included in the LICENSE file that |
|
13 * accompanied this code). |
|
14 * |
|
15 * You should have received a copy of the GNU General Public License version |
|
16 * 2 along with this work; if not, write to the Free Software Foundation, |
|
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 * |
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
20 * or visit www.oracle.com if you need additional information or have any |
|
21 * questions. |
|
22 */ |
|
23 |
|
24 |
|
25 package tidystats; |
|
26 |
|
27 import java.io.IOException; |
|
28 import java.nio.charset.Charset; |
|
29 import java.nio.file.FileSystem; |
|
30 import java.nio.file.FileSystems; |
|
31 import java.nio.file.Files; |
|
32 import java.nio.file.Path; |
|
33 import java.util.ArrayList; |
|
34 import java.util.Comparator; |
|
35 import java.util.HashMap; |
|
36 import java.util.List; |
|
37 import java.util.Map; |
|
38 import java.util.Set; |
|
39 import java.util.TreeMap; |
|
40 import java.util.TreeSet; |
|
41 import java.util.regex.Matcher; |
|
42 import java.util.regex.Pattern; |
|
43 |
|
44 /** |
|
45 * Generate statistics from the files generated by tidy.sh. |
|
46 * |
|
47 * <p>The tidy.sh script is used to run tidy on all the HTML files |
|
48 * in a directory, creating files in a new directory, and for each |
|
49 * HTML file, it writes the console output from tidy into a file |
|
50 * beside the fixed up file, with an additional .tidy extension. |
|
51 * |
|
52 * <p>This program will scan a directory for *.tidy files and |
|
53 * analyze the messages reported by tidy, in order to generate a |
|
54 * report with statistics on the various messages that were |
|
55 * reported by tidy. |
|
56 * |
|
57 * <p>Typical usage: |
|
58 * <pre> |
|
59 * $ bash /path/to/tidy.sh /path/to/htmldir |
|
60 * $ javac -d /path/to/classes /path/to/Main.java |
|
61 * $ java -cp /path/to/classes tidystats.Main /path/to/htmldir.tidy |
|
62 * </pre> |
|
63 * |
|
64 * <p>Internally, the program works by matching lines in the *.tidy |
|
65 * files against a series of regular expressions that are used to |
|
66 * categorize the messages. The set of regular expressions was |
|
67 * empirically determined by running the program on the output from |
|
68 * running tidy.sh on all the generated JDK documentation. It is |
|
69 * possible that tidy may generate more/different messages on other |
|
70 * doc sets, in which case, the set of regexes in the program should |
|
71 * be updated. |
|
72 */ |
|
73 public class Main { |
|
74 public static void main(String... args) throws IOException { |
|
75 new Main().run(args); |
|
76 } |
|
77 |
|
78 void run(String... args) throws IOException { |
|
79 FileSystem fs = FileSystems.getDefault(); |
|
80 List<Path> paths = new ArrayList<>(); |
|
81 |
|
82 int i; |
|
83 for (i = 0; i < args.length; i++) { |
|
84 String arg = args[i]; |
|
85 if (arg.startsWith("-")) |
|
86 throw new IllegalArgumentException(arg); |
|
87 else |
|
88 break; |
|
89 } |
|
90 |
|
91 for ( ; i < args.length; i++) { |
|
92 Path p = fs.getPath(args[i]); |
|
93 paths.add(p); |
|
94 } |
|
95 |
|
96 for (Path p: paths) { |
|
97 scan(p); |
|
98 } |
|
99 |
|
100 print("%6d files read", files); |
|
101 print("%6d files had no errors or warnings", ok); |
|
102 print("%6d files reported \"Not all warnings/errors were shown.\"", overflow); |
|
103 print("%6d errors found", errs); |
|
104 print("%6d warnings found", warns); |
|
105 print("%6d recommendations to use CSS", css); |
|
106 print(""); |
|
107 |
|
108 Map<Integer, Set<String>> sortedCounts = new TreeMap<>( |
|
109 new Comparator<Integer>() { |
|
110 @Override |
|
111 public int compare(Integer o1, Integer o2) { |
|
112 return o2.compareTo(o1); |
|
113 } |
|
114 }); |
|
115 |
|
116 for (Map.Entry<Pattern, Integer> e: counts.entrySet()) { |
|
117 Pattern p = e.getKey(); |
|
118 Integer n = e.getValue(); |
|
119 Set<String> set = sortedCounts.get(n); |
|
120 if (set == null) |
|
121 sortedCounts.put(n, (set = new TreeSet<>())); |
|
122 set.add(p.toString()); |
|
123 } |
|
124 |
|
125 for (Map.Entry<Integer, Set<String>> e: sortedCounts.entrySet()) { |
|
126 for (String p: e.getValue()) { |
|
127 if (p.startsWith(".*")) p = p.substring(2); |
|
128 print("%6d: %s", e.getKey(), p); |
|
129 } |
|
130 } |
|
131 } |
|
132 |
|
133 void scan(Path p) throws IOException { |
|
134 if (Files.isDirectory(p)) { |
|
135 for (Path c: Files.newDirectoryStream(p)) { |
|
136 scan(c); |
|
137 } |
|
138 } else if (isTidyFile(p)) { |
|
139 scan(Files.readAllLines(p, Charset.defaultCharset())); |
|
140 } |
|
141 } |
|
142 |
|
143 boolean isTidyFile(Path p) { |
|
144 return Files.isRegularFile(p) && p.getFileName().toString().endsWith(".tidy"); |
|
145 } |
|
146 |
|
147 void scan(List<String> lines) { |
|
148 Matcher m; |
|
149 files++; |
|
150 for (String line: lines) { |
|
151 if (okPattern.matcher(line).matches()) { |
|
152 ok++; |
|
153 } else if ((m = countPattern.matcher(line)).matches()) { |
|
154 warns += Integer.valueOf(m.group(1)); |
|
155 errs += Integer.valueOf(m.group(2)); |
|
156 if (m.group(3) != null) |
|
157 overflow++; |
|
158 } else if ((m = guardPattern.matcher(line)).matches()) { |
|
159 boolean found = false; |
|
160 for (Pattern p: patterns) { |
|
161 if ((m = p.matcher(line)).matches()) { |
|
162 found = true; |
|
163 count(p); |
|
164 break; |
|
165 } |
|
166 } |
|
167 if (!found) |
|
168 System.err.println("Unrecognized line: " + line); |
|
169 } else if (cssPattern.matcher(line).matches()) { |
|
170 css++; |
|
171 } |
|
172 } |
|
173 } |
|
174 |
|
175 Map<Pattern, Integer> counts = new HashMap<>(); |
|
176 void count(Pattern p) { |
|
177 Integer i = counts.get(p); |
|
178 counts.put(p, (i == null) ? 1 : i + 1); |
|
179 } |
|
180 |
|
181 void print(String format, Object... args) { |
|
182 System.out.println(String.format(format, args)); |
|
183 } |
|
184 |
|
185 Pattern okPattern = Pattern.compile("No warnings or errors were found."); |
|
186 Pattern countPattern = Pattern.compile("([0-9]+) warnings, ([0-9]+) errors were found!.*?(Not all warnings/errors were shown.)?"); |
|
187 Pattern cssPattern = Pattern.compile("You are recommended to use CSS.*"); |
|
188 Pattern guardPattern = Pattern.compile("line [0-9]+ column [0-9]+ - (Error|Warning):.*"); |
|
189 |
|
190 Pattern[] patterns = { |
|
191 Pattern.compile(".*Error: <.*> is not recognized!"), |
|
192 Pattern.compile(".*Error: missing quote mark for attribute value"), |
|
193 Pattern.compile(".*Warning: <.*> anchor \".*\" already defined"), |
|
194 Pattern.compile(".*Warning: <.*> attribute \".*\" has invalid value \".*\""), |
|
195 Pattern.compile(".*Warning: <.*> attribute \".*\" lacks value"), |
|
196 Pattern.compile(".*Warning: <.*> attribute \".*\" lacks value"), |
|
197 Pattern.compile(".*Warning: <.*> attribute with missing trailing quote mark"), |
|
198 Pattern.compile(".*Warning: <.*> dropping value \".*\" for repeated attribute \".*\""), |
|
199 Pattern.compile(".*Warning: <.*> inserting \".*\" attribute"), |
|
200 Pattern.compile(".*Warning: <.*> is probably intended as </.*>"), |
|
201 Pattern.compile(".*Warning: <.*> isn't allowed in <.*> elements"), |
|
202 Pattern.compile(".*Warning: <.*> lacks \".*\" attribute"), |
|
203 Pattern.compile(".*Warning: <.*> missing '>' for end of tag"), |
|
204 Pattern.compile(".*Warning: <.*> proprietary attribute \".*\""), |
|
205 Pattern.compile(".*Warning: <.*> unexpected or duplicate quote mark"), |
|
206 Pattern.compile(".*Warning: <a> cannot copy name attribute to id"), |
|
207 Pattern.compile(".*Warning: <a> escaping malformed URI reference"), |
|
208 Pattern.compile(".*Warning: <blockquote> proprietary attribute \"pre\""), |
|
209 Pattern.compile(".*Warning: discarding unexpected <.*>"), |
|
210 Pattern.compile(".*Warning: discarding unexpected </.*>"), |
|
211 Pattern.compile(".*Warning: entity \".*\" doesn't end in ';'"), |
|
212 Pattern.compile(".*Warning: inserting implicit <.*>"), |
|
213 Pattern.compile(".*Warning: inserting missing 'title' element"), |
|
214 Pattern.compile(".*Warning: missing <!DOCTYPE> declaration"), |
|
215 Pattern.compile(".*Warning: missing <.*>"), |
|
216 Pattern.compile(".*Warning: missing </.*> before <.*>"), |
|
217 Pattern.compile(".*Warning: nested emphasis <.*>"), |
|
218 Pattern.compile(".*Warning: plain text isn't allowed in <.*> elements"), |
|
219 Pattern.compile(".*Warning: replacing <p> by <br>"), |
|
220 Pattern.compile(".*Warning: replacing invalid numeric character reference .*"), |
|
221 Pattern.compile(".*Warning: replacing unexpected .* by </.*>"), |
|
222 Pattern.compile(".*Warning: trimming empty <.*>"), |
|
223 Pattern.compile(".*Warning: unescaped & or unknown entity \".*\""), |
|
224 Pattern.compile(".*Warning: unescaped & which should be written as &"), |
|
225 Pattern.compile(".*Warning: using <br> in place of <p>") |
|
226 }; |
|
227 |
|
228 int files; |
|
229 int ok; |
|
230 int warns; |
|
231 int errs; |
|
232 int css; |
|
233 int overflow; |
|
234 } |
|
235 |