8234835: Use UTF-8 charset in fixuppandoc

Reviewed-by: martin, jjg
This commit is contained in:
Dan Smith 2019-12-06 12:33:28 -07:00
parent bf2f855cd7
commit b0f3e76e60

View File

@ -46,6 +46,7 @@ import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static java.nio.charset.StandardCharsets.UTF_8;
/**
* Fixup HTML generated by pandoc.
@ -98,6 +99,10 @@ public class Main {
* If no output file is specified, the program will write to standard output.
* Any error messages will be written to the standard error stream.
*
* Consistent with the
* <a href="https://pandoc.org/MANUAL.html#character-encoding">pandoc tool</a>,
* input and output text is encoded as UTF-8.
*
* @param args the command-line arguments
*/
public static void main(String... args) {
@ -184,7 +189,7 @@ public class Main {
if (inFile != null) {
read(inFile);
} else {
read(new BufferedReader(new InputStreamReader(System.in)));
read(new BufferedReader(new InputStreamReader(System.in, UTF_8)));
}
}
}
@ -198,9 +203,9 @@ public class Main {
*/
private Writer openWriter(Path file) throws IOException {
if (file != null) {
return Files.newBufferedWriter(file);
return Files.newBufferedWriter(file, UTF_8);
} else {
return new BufferedWriter(new OutputStreamWriter(System.out) {
return new BufferedWriter(new OutputStreamWriter(System.out, UTF_8) {
@Override
public void close() throws IOException {
flush();
@ -615,7 +620,7 @@ public class Main {
* @param file the file
*/
void read(Path file) {
try (Reader r = Files.newBufferedReader(file)) {
try (Reader r = Files.newBufferedReader(file, UTF_8)) {
this.file = file;
read(r);
} catch (IOException e) {