3789983e89
Reviewed-by: darcy, ihse
245 lines
9.9 KiB
Java
245 lines
9.9 KiB
Java
/*
|
|
* Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License version 2 only, as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* version 2 for more details (a copy is included in the LICENSE file that
|
|
* accompanied this code).
|
|
*
|
|
* You should have received a copy of the GNU General Public License version
|
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*
|
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
* or visit www.oracle.com if you need additional information or have any
|
|
* questions.
|
|
*/
|
|
|
|
package tidystats;
|
|
|
|
import java.io.IOException;
|
|
import java.nio.charset.Charset;
|
|
import java.nio.file.FileSystem;
|
|
import java.nio.file.FileSystems;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Path;
|
|
import java.util.ArrayList;
|
|
import java.util.Comparator;
|
|
import java.util.HashMap;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.Set;
|
|
import java.util.TreeMap;
|
|
import java.util.TreeSet;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
|
|
/**
|
|
* Generate statistics from the files generated by tidy.sh.
|
|
*
|
|
* <p>The tidy.sh script is used to run tidy on all the HTML files
|
|
* in a directory, creating files in a new directory, and for each
|
|
* HTML file, it writes the console output from tidy into a file
|
|
* beside the fixed up file, with an additional .tidy extension.
|
|
*
|
|
* <p>This program will scan a directory for *.tidy files and
|
|
* analyze the messages reported by tidy, in order to generate a
|
|
* report with statistics on the various messages that were
|
|
* reported by tidy.
|
|
*
|
|
* <p>Typical usage:
|
|
* <pre>
|
|
* $ bash /path/to/tidy.sh /path/to/htmldir
|
|
* $ javac -d /path/to/classes /path/to/Main.java
|
|
* $ java -cp /path/to/classes tidystats.Main /path/to/htmldir.tidy
|
|
* </pre>
|
|
*
|
|
* <p>Internally, the program works by matching lines in the *.tidy
|
|
* files against a series of regular expressions that are used to
|
|
* categorize the messages. The set of regular expressions was
|
|
* empirically determined by running the program on the output from
|
|
* running tidy.sh on all the generated JDK documentation. It is
|
|
* possible that tidy may generate more/different messages on other
|
|
* doc sets, in which case, the set of regexes in the program should
|
|
* be updated.
|
|
*/
|
|
public class Main {
|
|
public static void main(String... args) throws IOException {
|
|
new Main().run(args);
|
|
}
|
|
|
|
void run(String... args) throws IOException {
|
|
FileSystem fs = FileSystems.getDefault();
|
|
List<Path> paths = new ArrayList<>();
|
|
|
|
int i;
|
|
for (i = 0; i < args.length; i++) {
|
|
String arg = args[i];
|
|
if (arg.startsWith("-"))
|
|
throw new IllegalArgumentException(arg);
|
|
else
|
|
break;
|
|
}
|
|
|
|
for ( ; i < args.length; i++) {
|
|
Path p = fs.getPath(args[i]);
|
|
paths.add(p);
|
|
}
|
|
|
|
for (Path p: paths) {
|
|
scan(p);
|
|
}
|
|
|
|
print("%6d files read", files);
|
|
print("%6d files had no errors or warnings", ok);
|
|
print("%6d files reported \"Not all warnings/errors were shown.\"", overflow);
|
|
print("%6d errors found", errs);
|
|
print("%6d warnings found", warns);
|
|
print("%6d recommendations to use CSS", css);
|
|
print("");
|
|
|
|
Map<Integer, Set<String>> sortedCounts = new TreeMap<>(
|
|
new Comparator<Integer>() {
|
|
@Override
|
|
public int compare(Integer o1, Integer o2) {
|
|
return o2.compareTo(o1);
|
|
}
|
|
});
|
|
|
|
for (Map.Entry<Pattern, Integer> e: counts.entrySet()) {
|
|
Pattern p = e.getKey();
|
|
Integer n = e.getValue();
|
|
Set<String> set = sortedCounts.get(n);
|
|
if (set == null)
|
|
sortedCounts.put(n, (set = new TreeSet<>()));
|
|
set.add(p.toString());
|
|
}
|
|
|
|
for (Map.Entry<Integer, Set<String>> e: sortedCounts.entrySet()) {
|
|
for (String p: e.getValue()) {
|
|
if (p.startsWith(".*")) p = p.substring(2);
|
|
print("%6d: %s", e.getKey(), p);
|
|
}
|
|
}
|
|
}
|
|
|
|
void scan(Path p) throws IOException {
|
|
if (Files.isDirectory(p)) {
|
|
for (Path c: Files.newDirectoryStream(p)) {
|
|
scan(c);
|
|
}
|
|
} else if (isTidyFile(p)) {
|
|
scan(Files.readAllLines(p, Charset.defaultCharset()));
|
|
}
|
|
}
|
|
|
|
boolean isTidyFile(Path p) {
|
|
return Files.isRegularFile(p) && p.getFileName().toString().endsWith(".tidy");
|
|
}
|
|
|
|
void scan(List<String> lines) {
|
|
Matcher m;
|
|
files++;
|
|
for (String line: lines) {
|
|
if (okPattern.matcher(line).matches()) {
|
|
ok++;
|
|
} else if ((m = countPattern.matcher(line)).matches()) {
|
|
warns += Integer.valueOf(m.group(1));
|
|
errs += Integer.valueOf(m.group(2));
|
|
if (m.group(3) != null)
|
|
overflow++;
|
|
} else if ((m = countPattern2.matcher(line)).matches()) {
|
|
warns += Integer.valueOf(m.group(1));
|
|
errs += Integer.valueOf(m.group(2));
|
|
if (m.group(3) != null)
|
|
overflow++;
|
|
} else if ((m = guardPattern.matcher(line)).matches()) {
|
|
boolean found = false;
|
|
for (Pattern p: patterns) {
|
|
if ((m = p.matcher(line)).matches()) {
|
|
found = true;
|
|
count(p);
|
|
break;
|
|
}
|
|
}
|
|
if (!found)
|
|
System.err.println("Unrecognized line: " + line);
|
|
} else if (cssPattern.matcher(line).matches()) {
|
|
css++;
|
|
}
|
|
}
|
|
}
|
|
|
|
Map<Pattern, Integer> counts = new HashMap<>();
|
|
void count(Pattern p) {
|
|
Integer i = counts.get(p);
|
|
counts.put(p, (i == null) ? 1 : i + 1);
|
|
}
|
|
|
|
void print(String format, Object... args) {
|
|
System.out.println(String.format(format, args));
|
|
}
|
|
|
|
Pattern okPattern = Pattern.compile("No warnings or errors were found.");
|
|
Pattern countPattern = Pattern.compile("([0-9]+) warnings, ([0-9]+) errors were found!.*?(Not all warnings/errors were shown.)?");
|
|
Pattern countPattern2 = Pattern.compile("Tidy found ([0-9]+) warning[s]? and ([0-9]+) error[s]?!.*?(Not all warnings/errors were shown.)?");
|
|
Pattern cssPattern = Pattern.compile("You are recommended to use CSS.*");
|
|
Pattern guardPattern = Pattern.compile("line [0-9]+ column [0-9]+ - (Error|Warning):.*");
|
|
|
|
Pattern[] patterns = {
|
|
Pattern.compile(".*Error: <.*> is not recognized!"),
|
|
Pattern.compile(".*Error: missing quote mark for attribute value"),
|
|
Pattern.compile(".*Warning: <.*> anchor \".*\" already defined"),
|
|
Pattern.compile(".*Warning: <.*> attribute \".*\" has invalid value \".*\""),
|
|
Pattern.compile(".*Warning: <.*> attribute \".*\" lacks value"),
|
|
Pattern.compile(".*Warning: <.*> attribute \".*\" lacks value"),
|
|
Pattern.compile(".*Warning: <.*> attribute with missing trailing quote mark"),
|
|
Pattern.compile(".*Warning: <.*> dropping value \".*\" for repeated attribute \".*\""),
|
|
Pattern.compile(".*Warning: <.*> inserting \".*\" attribute"),
|
|
Pattern.compile(".*Warning: <.*> is probably intended as </.*>"),
|
|
Pattern.compile(".*Warning: <.*> isn't allowed in <.*> elements"),
|
|
Pattern.compile(".*Warning: <.*> lacks \".*\" attribute"),
|
|
Pattern.compile(".*Warning: <.*> missing '>' for end of tag"),
|
|
Pattern.compile(".*Warning: <.*> proprietary attribute \".*\""),
|
|
Pattern.compile(".*Warning: <.*> unexpected or duplicate quote mark"),
|
|
Pattern.compile(".*Warning: <a> cannot copy name attribute to id"),
|
|
Pattern.compile(".*Warning: <a> escaping malformed URI reference"),
|
|
Pattern.compile(".*Warning: <blockquote> proprietary attribute \"pre\""),
|
|
Pattern.compile(".*Warning: discarding unexpected <.*>"),
|
|
Pattern.compile(".*Warning: discarding unexpected </.*>"),
|
|
Pattern.compile(".*Warning: entity \".*\" doesn't end in ';'"),
|
|
Pattern.compile(".*Warning: inserting implicit <.*>"),
|
|
Pattern.compile(".*Warning: inserting missing 'title' element"),
|
|
Pattern.compile(".*Warning: missing <!DOCTYPE> declaration"),
|
|
Pattern.compile(".*Warning: missing <.*>"),
|
|
Pattern.compile(".*Warning: missing </.*> before <.*>"),
|
|
Pattern.compile(".*Warning: nested emphasis <.*>"),
|
|
Pattern.compile(".*Warning: plain text isn't allowed in <.*> elements"),
|
|
Pattern.compile(".*Warning: replacing <p> by <br>"),
|
|
Pattern.compile(".*Warning: replacing invalid numeric character reference .*"),
|
|
Pattern.compile(".*Warning: replacing unexpected .* by </.*>"),
|
|
Pattern.compile(".*Warning: trimming empty <.*>"),
|
|
Pattern.compile(".*Warning: unescaped & or unknown entity \".*\""),
|
|
Pattern.compile(".*Warning: unescaped & which should be written as &"),
|
|
Pattern.compile(".*Warning: using <br> in place of <p>"),
|
|
Pattern.compile(".*Warning: <.*> element removed from HTML5"),
|
|
Pattern.compile(".*Warning: <.*> attribute \".*\" not allowed for HTML5"),
|
|
Pattern.compile(".*Warning: The summary attribute on the <table> element is obsolete in HTML5"),
|
|
Pattern.compile(".*Warning: replacing invalid UTF-8 bytes \\(char. code U\\+.*\\)")
|
|
};
|
|
|
|
int files;
|
|
int ok;
|
|
int warns;
|
|
int errs;
|
|
int css;
|
|
int overflow;
|
|
}
|
|
|