test/tools/doclint/tidy/util/Main.java

Wed, 25 Sep 2013 14:04:24 -0700

author
jjg
date
Wed, 25 Sep 2013 14:04:24 -0700
changeset 2061
3d61984b077c
parent 1455
75ab654b5cd5
child 2525
2eb010b6cb22
permissions
-rw-r--r--

8025412: Add legal header and comments to test/tools/doclint/tidy/util/Main.java
Reviewed-by: bpatel

     1 /*
     2  * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  */
    25 package tidystats;
    27 import java.io.IOException;
    28 import java.nio.charset.Charset;
    29 import java.nio.file.FileSystem;
    30 import java.nio.file.FileSystems;
    31 import java.nio.file.Files;
    32 import java.nio.file.Path;
    33 import java.util.ArrayList;
    34 import java.util.Comparator;
    35 import java.util.HashMap;
    36 import java.util.List;
    37 import java.util.Map;
    38 import java.util.Set;
    39 import java.util.TreeMap;
    40 import java.util.TreeSet;
    41 import java.util.regex.Matcher;
    42 import java.util.regex.Pattern;
    44 /**
    45  * Generate statistics from the files generated by tidy.sh.
    46  *
    47  * <p>The tidy.sh script is used to run tidy on all the HTML files
    48  * in a directory, creating files in a new directory, and for each
    49  * HTML file, it writes the console output from tidy into a file
    50  * beside the fixed up file, with an additional .tidy extension.
    51  *
    52  * <p>This program will scan a directory for *.tidy files and
    53  * analyze the messages reported by tidy, in order to generate a
    54  * report with statistics on the various messages that were
    55  * reported by tidy.
    56  *
    57  * <p>Typical usage:
    58  * <pre>
    59  * $ bash /path/to/tidy.sh /path/to/htmldir
    60  * $ javac -d /path/to/classes /path/to/Main.java
    61  * $ java -cp /path/to/classes tidystats.Main /path/to/htmldir.tidy
    62  * </pre>
    63  *
    64  * <p>Internally, the program works by matching lines in the *.tidy
    65  * files against a series of regular expressions that are used to
    66  * categorize the messages.  The set of regular expressions was
    67  * empirically determined by running the program on the output from
    68  * running tidy.sh on all the generated JDK documentation. It is
    69  * possible that tidy may generate more/different messages on other
    70  * doc sets, in which case, the set of regexes in the program should
    71  * be updated.
    72  */
    73 public class Main {
    74     public static void main(String... args) throws IOException {
    75         new Main().run(args);
    76     }
    78     void run(String... args) throws IOException {
    79         FileSystem fs = FileSystems.getDefault();
    80         List<Path> paths = new ArrayList<>();
    82         int i;
    83         for (i = 0; i < args.length; i++) {
    84             String arg = args[i];
    85             if (arg.startsWith("-"))
    86                 throw new IllegalArgumentException(arg);
    87             else
    88                 break;
    89         }
    91         for ( ; i < args.length; i++) {
    92             Path p = fs.getPath(args[i]);
    93             paths.add(p);
    94         }
    96         for (Path p: paths) {
    97             scan(p);
    98         }
   100         print("%6d files read", files);
   101         print("%6d files had no errors or warnings", ok);
   102         print("%6d files reported \"Not all warnings/errors were shown.\"", overflow);
   103         print("%6d errors found", errs);
   104         print("%6d warnings found", warns);
   105         print("%6d recommendations to use CSS", css);
   106         print("");
   108         Map<Integer, Set<String>> sortedCounts = new TreeMap<>(
   109                 new Comparator<Integer>() {
   110                     @Override
   111                     public int compare(Integer o1, Integer o2) {
   112                         return o2.compareTo(o1);
   113                     }
   114                 });
   116         for (Map.Entry<Pattern, Integer> e: counts.entrySet()) {
   117             Pattern p = e.getKey();
   118             Integer n = e.getValue();
   119             Set<String> set = sortedCounts.get(n);
   120             if (set == null)
   121                 sortedCounts.put(n, (set = new TreeSet<>()));
   122             set.add(p.toString());
   123         }
   125         for (Map.Entry<Integer, Set<String>> e: sortedCounts.entrySet()) {
   126             for (String p: e.getValue()) {
   127                 if (p.startsWith(".*")) p = p.substring(2);
   128                 print("%6d: %s", e.getKey(), p);
   129             }
   130         }
   131     }
   133     void scan(Path p) throws IOException {
   134         if (Files.isDirectory(p)) {
   135             for (Path c: Files.newDirectoryStream(p)) {
   136                 scan(c);
   137             }
   138         } else if (isTidyFile(p)) {
   139             scan(Files.readAllLines(p, Charset.defaultCharset()));
   140         }
   141     }
   143     boolean isTidyFile(Path p) {
   144         return Files.isRegularFile(p) && p.getFileName().toString().endsWith(".tidy");
   145     }
   147     void scan(List<String> lines) {
   148         Matcher m;
   149         files++;
   150         for (String line: lines) {
   151             if (okPattern.matcher(line).matches()) {
   152                 ok++;
   153             } else if ((m = countPattern.matcher(line)).matches()) {
   154                 warns += Integer.valueOf(m.group(1));
   155                 errs += Integer.valueOf(m.group(2));
   156                 if (m.group(3) != null)
   157                     overflow++;
   158             } else if ((m = guardPattern.matcher(line)).matches()) {
   159                 boolean found = false;
   160                 for (Pattern p: patterns) {
   161                     if ((m = p.matcher(line)).matches()) {
   162                         found = true;
   163                         count(p);
   164                         break;
   165                     }
   166                 }
   167                 if (!found)
   168                     System.err.println("Unrecognized line: " + line);
   169             } else if (cssPattern.matcher(line).matches()) {
   170                 css++;
   171             }
   172         }
   173     }
   175     Map<Pattern, Integer> counts = new HashMap<>();
   176     void count(Pattern p) {
   177         Integer i = counts.get(p);
   178         counts.put(p, (i == null) ? 1 : i + 1);
   179     }
   181     void print(String format, Object... args) {
   182         System.out.println(String.format(format, args));
   183     }
   185     Pattern okPattern = Pattern.compile("No warnings or errors were found.");
   186     Pattern countPattern = Pattern.compile("([0-9]+) warnings, ([0-9]+) errors were found!.*?(Not all warnings/errors were shown.)?");
   187     Pattern cssPattern = Pattern.compile("You are recommended to use CSS.*");
   188     Pattern guardPattern = Pattern.compile("line [0-9]+ column [0-9]+ - (Error|Warning):.*");
   190     Pattern[] patterns = {
   191         Pattern.compile(".*Error: <.*> is not recognized!"),
   192         Pattern.compile(".*Error: missing quote mark for attribute value"),
   193         Pattern.compile(".*Warning: <.*> anchor \".*\" already defined"),
   194         Pattern.compile(".*Warning: <.*> attribute \".*\" has invalid value \".*\""),
   195         Pattern.compile(".*Warning: <.*> attribute \".*\" lacks value"),
   196         Pattern.compile(".*Warning: <.*> attribute \".*\" lacks value"),
   197         Pattern.compile(".*Warning: <.*> attribute with missing trailing quote mark"),
   198         Pattern.compile(".*Warning: <.*> dropping value \".*\" for repeated attribute \".*\""),
   199         Pattern.compile(".*Warning: <.*> inserting \".*\" attribute"),
   200         Pattern.compile(".*Warning: <.*> is probably intended as </.*>"),
   201         Pattern.compile(".*Warning: <.*> isn't allowed in <.*> elements"),
   202         Pattern.compile(".*Warning: <.*> lacks \".*\" attribute"),
   203         Pattern.compile(".*Warning: <.*> missing '>' for end of tag"),
   204         Pattern.compile(".*Warning: <.*> proprietary attribute \".*\""),
   205         Pattern.compile(".*Warning: <.*> unexpected or duplicate quote mark"),
   206         Pattern.compile(".*Warning: <a> cannot copy name attribute to id"),
   207         Pattern.compile(".*Warning: <a> escaping malformed URI reference"),
   208         Pattern.compile(".*Warning: <blockquote> proprietary attribute \"pre\""),
   209         Pattern.compile(".*Warning: discarding unexpected <.*>"),
   210         Pattern.compile(".*Warning: discarding unexpected </.*>"),
   211         Pattern.compile(".*Warning: entity \".*\" doesn't end in ';'"),
   212         Pattern.compile(".*Warning: inserting implicit <.*>"),
   213         Pattern.compile(".*Warning: inserting missing 'title' element"),
   214         Pattern.compile(".*Warning: missing <!DOCTYPE> declaration"),
   215         Pattern.compile(".*Warning: missing <.*>"),
   216         Pattern.compile(".*Warning: missing </.*> before <.*>"),
   217         Pattern.compile(".*Warning: nested emphasis <.*>"),
   218         Pattern.compile(".*Warning: plain text isn't allowed in <.*> elements"),
   219         Pattern.compile(".*Warning: replacing <p> by <br>"),
   220         Pattern.compile(".*Warning: replacing invalid numeric character reference .*"),
   221         Pattern.compile(".*Warning: replacing unexpected .* by </.*>"),
   222         Pattern.compile(".*Warning: trimming empty <.*>"),
   223         Pattern.compile(".*Warning: unescaped & or unknown entity \".*\""),
   224         Pattern.compile(".*Warning: unescaped & which should be written as &amp;"),
   225         Pattern.compile(".*Warning: using <br> in place of <p>")
   226     };
   228     int files;
   229     int ok;
   230     int warns;
   231     int errs;
   232     int css;
   233     int overflow;
   234 }

mercurial