|
1 /* |
|
2 * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. |
|
8 * |
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 * version 2 for more details (a copy is included in the LICENSE file that |
|
13 * accompanied this code). |
|
14 * |
|
15 * You should have received a copy of the GNU General Public License version |
|
16 * 2 along with this work; if not, write to the Free Software Foundation, |
|
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 * |
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
20 * or visit www.oracle.com if you need additional information or have any |
|
21 * questions. |
|
22 */ |
|
23 |
|
24 |
1 package tidystats; |
25 package tidystats; |
2 |
26 |
3 import java.io.IOException; |
27 import java.io.IOException; |
4 import java.nio.charset.Charset; |
28 import java.nio.charset.Charset; |
5 import java.nio.file.FileSystem; |
29 import java.nio.file.FileSystem; |
15 import java.util.TreeMap; |
39 import java.util.TreeMap; |
16 import java.util.TreeSet; |
40 import java.util.TreeSet; |
17 import java.util.regex.Matcher; |
41 import java.util.regex.Matcher; |
18 import java.util.regex.Pattern; |
42 import java.util.regex.Pattern; |
19 |
43 |
|
44 /** |
|
45 * Generate statistics from the files generated by tidy.sh. |
|
46 * |
|
47 * <p>The tidy.sh script is used to run tidy on all the HTML files |
|
48 * in a directory, creating files in a new directory, and for each |
|
49 * HTML file, it writes the console output from tidy into a file |
|
50 * beside the fixed up file, with an additional .tidy extension. |
|
51 * |
|
52 * <p>This program will scan a directory for *.tidy files and |
|
53 * analyze the messages reported by tidy, in order to generate a |
|
54 * report with statistics on the various messages that were |
|
55 * reported by tidy. |
|
56 * |
|
57 * <p>Typical usage: |
|
58 * <pre> |
|
59 * $ bash /path/to/tidy.sh /path/to/htmldir |
|
60 * $ javac -d /path/to/classes /path/to/Main.java |
|
61 * $ java -cp /path/to/classes tidystats.Main /path/to/htmldir.tidy |
|
62 * </pre> |
|
63 * |
|
64 * <p>Internally, the program works by matching lines in the *.tidy |
|
65 * files against a series of regular expressions that are used to |
|
66 * categorize the messages. The set of regular expressions was |
|
67 * empirically determined by running the program on the output from |
|
68 * running tidy.sh on all the generated JDK documentation. It is |
|
69 * possible that tidy may generate more/different messages on other |
|
70 * doc sets, in which case, the set of regexes in the program should |
|
71 * be updated. |
|
72 */ |
20 public class Main { |
73 public class Main { |
21 public static void main(String... args) throws IOException { |
74 public static void main(String... args) throws IOException { |
22 new Main().run(args); |
75 new Main().run(args); |
23 } |
76 } |
24 |
77 |