jdk-24/test/jdk/sun/nio/cs/mapping/CoderTest.java
Naoto Sato a253b46021 8301119: Support for GB18030-2022
Reviewed-by: alanb, coffeys, lancea
2023-02-27 16:35:59 +00:00

531 lines
18 KiB
Java

/*
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/* @test
@bug 4691554 6221056 6380723 6404504 6419565 6529796 8301119
@summary Test the supported New I/O coders
@modules jdk.charsets
@run main CoderTest
@run main/othervm -Djdk.charset.GB18030=2000 CoderTest
*/
import java.io.*;
import java.nio.*;
import java.nio.charset.*;
import java.util.regex.*;
public class CoderTest {
private static final int BUFSIZ = 8192; // Initial buffer size
private static final int MAXERRS = 10; // Errors reported per test
private static final String testRootDir
= System.getProperty("test.src", ".");
private static final PrintStream log = System.out;
// Set by -v on the command line
private static boolean verbose = false;
// Test modes
private static final int ROUNDTRIP = 0;
private static final int ENCODE = 1;
private static final int DECODE = 2;
private static boolean shiftHackDBCS = false;
// File extensions, indexed by test mode
private static final String[] extension
= new String[] { ".b2c",
".c2b-irreversible",
".b2c-irreversible" };
private static final boolean IS_2000 =
"2000".equals(System.getProperty("jdk.charset.GB18030"));
// Utilities
private static ByteBuffer expand(ByteBuffer bb) {
ByteBuffer nbb = ByteBuffer.allocate(bb.capacity() * 2);
bb.flip();
nbb.put(bb);
return nbb;
}
private static CharBuffer expand(CharBuffer cb) {
CharBuffer ncb = CharBuffer.allocate(cb.capacity() * 2);
cb.flip();
ncb.put(cb);
return ncb;
}
private static byte[] parseBytes(String s) {
int nb = s.length() / 2;
byte[] bs = new byte[nb];
for (int i = 0; i < nb; i++) {
int j = i * 2;
if (j + 2 > s.length())
throw new RuntimeException("Malformed byte string: " + s);
bs[i] = (byte)Integer.parseInt(s.substring(j, j + 2), 16);
}
return bs;
}
private static String printBytes(byte[] bs) {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < bs.length; i++) {
sb.append(Integer.toHexString((bs[i] >> 4) & 0xf));
sb.append(Integer.toHexString((bs[i] >> 0) & 0xf));
}
return sb.toString();
}
private static String printCodePoint(int cp) {
StringBuffer sb = new StringBuffer();
sb.append("U+");
if (cp > 0xffff)
sb.append(Integer.toHexString((cp >> 16) & 0xf));
sb.append(Integer.toHexString((cp >> 12) & 0xf));
sb.append(Integer.toHexString((cp >> 8) & 0xf));
sb.append(Integer.toHexString((cp >> 4) & 0xf));
sb.append(Integer.toHexString((cp >> 0) & 0xf));
return sb.toString();
}
private static int getCodePoint(CharBuffer cb) {
char c = cb.get();
if (Character.isHighSurrogate(c))
return Character.toCodePoint(c, cb.get());
else
return c;
}
private static String plural(int n) {
return (n == 1 ? "" : "s");
}
static class Entry {
byte[] bb;
int cp;
int cp2;
}
public static class Parser {
static Pattern p = Pattern.compile("(0[xX])?(00)?([0-9a-fA-F]+)\\s+(0[xX])?([0-9a-fA-F]+)(\\+0x([0-9a-fA-F]+))?\\s*");
static final int gBS = 1;
static final int gCP = 2;
static final int gCP2 = 3;
boolean isStateful = false;
BufferedReader reader;
boolean closed;
Matcher matcher;
public Parser (InputStream in)
throws IOException
{
this.reader = new BufferedReader(new InputStreamReader(in));
this.closed = false;
this.matcher = p.matcher("");
}
public boolean isStateful() {
return isStateful;
}
protected boolean isDirective(String line) {
// Stateful DBCS encodings need special treatment
if (line.startsWith("#STATEFUL")) {
return isStateful = true;
}
return line.startsWith("#");
}
protected Entry parse(Matcher m, Entry e) {
e.bb = parseBytes(m.group(3));
e.cp = Integer.parseInt(m.group(5), 16);
if (m.group(7) != null)
e.cp2 = Integer.parseInt(m.group(7), 16);
else
e.cp2 = 0;
return e;
}
public Entry next() throws Exception {
return next(new Entry());
}
// returns null and closes the input stream if the eof has beenreached.
public Entry next(Entry mapping) throws Exception {
if (closed)
return null;
String line;
while ((line = reader.readLine()) != null) {
if (isDirective(line))
continue;
matcher.reset(line);
if (!matcher.lookingAt()) {
//System.out.println("Missed: " + line);
continue;
}
return parse(matcher, mapping);
}
reader.close();
closed = true;
return null;
}
}
// CoderTest
private String encoding;
private Charset cs;
private CharsetDecoder decoder = null;
private CharsetEncoder encoder = null;
private CoderTest(String enc) throws Exception {
encoding = enc;
cs = Charset.forName(enc);
decoder = cs.newDecoder();
encoder = cs.newEncoder();
}
private class Test {
// An instance of this class tests all mappings for
// a particular bytesPerChar value
private int bytesPerChar;
// Reference data from .b2c file
private ByteBuffer refBytes = ByteBuffer.allocate(BUFSIZ);
private CharBuffer refChars = CharBuffer.allocate(BUFSIZ);
private ByteBuffer dRefBytes = ByteBuffer.allocateDirect(BUFSIZ);
private CharBuffer dRefChars = ByteBuffer.allocateDirect(BUFSIZ*2).asCharBuffer();
private Test(int bpc) {
bytesPerChar = bpc;
}
private void put(byte[] bs, char[] cc) {
if (bs.length != bytesPerChar)
throw new IllegalArgumentException(bs.length
+ " != "
+ bytesPerChar);
if (refBytes.remaining() < bytesPerChar)
refBytes = expand(refBytes);
refBytes.put(bs);
if (refChars.remaining() < cc.length)
refChars = expand(refChars);
refChars.put(cc);
}
private boolean decode(ByteBuffer refByte, CharBuffer refChars)
throws Exception {
log.println(" decode" + (refByte.isDirect()?" (direct)":""));
CharBuffer out = decoder.decode(refBytes);
refBytes.rewind();
byte[] bs = new byte[bytesPerChar];
int e = 0;
while (refBytes.hasRemaining()) {
refBytes.get(bs);
int rcp = getCodePoint(refChars);
int ocp = getCodePoint(out);
if (rcp != ocp) {
log.println(" Error: "
+ printBytes(bs)
+ " --> "
+ printCodePoint(ocp)
+ ", expected "
+ printCodePoint(rcp));
if (++e >= MAXERRS) {
log.println(" Too many errors, giving up");
break;
}
}
if (verbose) {
log.println(" "
+ printBytes(bs)
+ " --> "
+ printCodePoint(rcp));
}
}
if (e == 0 && (refChars.hasRemaining() || out.hasRemaining())) {
// Paranoia: Didn't consume everything
throw new IllegalStateException();
}
refBytes.rewind();
refChars.rewind();
return (e == 0);
}
private boolean encode(ByteBuffer refByte, CharBuffer refChars)
throws Exception {
log.println(" encode" + (refByte.isDirect()?" (direct)":""));
ByteBuffer out = encoder.encode(refChars);
refChars.rewind();
// Stateful b2c files have leading and trailing
// shift bytes for each mapping. However when
// block encoded the output will consist of a single
// leadByte followed by the raw DBCS byte values and
// a final trail byte. The state variable shiftHackDBCS
// which is true for stateful DBCS encodings is used
// to conditionally strip away per-mapping shift bytes
// from the comparison of expected versus actual encoded
// byte values. This hack can be eliminated in Mustang
// when sun.io converters and their associated tests are
// removed.
boolean boundaryBytes = false;
int bytesPC = bytesPerChar;
if (shiftHackDBCS && bytesPerChar==4) {
bytesPC = 2;
boundaryBytes = true;
if ((out.get()) != (byte)0x0e) {
log.println("Missing lead byte");
return(false);
}
}
byte[] rbs = new byte[bytesPC];
byte[] obs = new byte[bytesPC];
int e = 0;
while (refChars.hasRemaining()) {
int cp = getCodePoint(refChars);
// Skip lead shift ref byte for stateful encoding tests
if (shiftHackDBCS && bytesPC == 2)
refBytes.get();
refBytes.get(rbs);
out.get(obs);
boolean eq = true;
for (int i = 0; i < bytesPC; i++)
eq &= rbs[i] == obs[i];
if (!eq) {
log.println(" Error: "
+ printCodePoint(cp)
+ " --> "
+ printBytes(obs)
+ ", expected "
+ printBytes(rbs));
if (++e >= MAXERRS) {
log.println(" Too many errors, giving up");
break;
}
}
if (verbose) {
log.println(" "
+ printCodePoint(cp)
+ " --> "
+ printBytes(rbs));
}
// For stateful encodings ignore/exclude per-mapping
// trail bytes from byte comparison
if (shiftHackDBCS && bytesPC == 2)
refBytes.get();
}
if (shiftHackDBCS && boundaryBytes) {
if ((out.get()) != (byte)0x0f) {
log.println("Missing trail byte");
return(false);
}
}
if (e == 0 && (refBytes.hasRemaining() || out.hasRemaining())) {
// Paranoia: Didn't consume everything
throw new IllegalStateException();
}
refBytes.rewind();
refChars.rewind();
return (e == 0);
}
private boolean run(int mode) throws Exception {
log.println(" " + bytesPerChar
+ " byte" + plural(bytesPerChar) + "/char");
if (dRefBytes.capacity() < refBytes.capacity()) {
dRefBytes = ByteBuffer.allocateDirect(refBytes.capacity());
}
if (dRefChars.capacity() < refChars.capacity()) {
dRefChars = ByteBuffer.allocateDirect(refChars.capacity()*2)
.asCharBuffer();
}
refBytes.flip();
refChars.flip();
dRefBytes.clear();
dRefChars.clear();
dRefBytes.put(refBytes).flip();
dRefChars.put(refChars).flip();
refBytes.flip();
refChars.flip();
boolean rv = true;
if (mode != ENCODE) {
rv &= decode(refBytes, refChars);
rv &= decode(dRefBytes, dRefChars);
}
if (mode != DECODE) {
rv &= encode(refBytes, refChars);
rv &= encode(dRefBytes, dRefChars);
}
return rv;
}
}
// Maximum bytes/char being tested
private int maxBytesPerChar = 0;
// Tests, indexed by bytesPerChar - 1
private Test[] tests;
private void clearTests() {
maxBytesPerChar = 0;
tests = new Test[0];
}
// Find the test for the given bytes/char value,
// expanding the test array if needed
//
private Test testFor(int bpc) {
if (bpc > maxBytesPerChar) {
Test[] ts = new Test[bpc];
System.arraycopy(tests, 0, ts, 0, maxBytesPerChar);
for (int i = maxBytesPerChar; i < bpc; i++)
ts[i] = new Test(i + 1);
tests = ts;
maxBytesPerChar = bpc;
}
return tests[bpc - 1];
}
// Compute the name of the test file for the given encoding and mode. If
// the file exists then return its name, otherwise return null.
//
private File testFile(String encoding, int mode) {
File f = new File(testRootDir, encoding + extension[mode]);
if (!f.exists())
return null;
return f;
}
// Parse the given b2c file and load up the required test objects
//
private void loadTests(File f)
throws Exception
{
clearTests();
FileInputStream in = new FileInputStream(f);
try {
Parser p = new Parser(in);
Entry e = new Entry();
while ((e = (Entry)p.next(e)) != null) {
if (e.cp2 != 0)
continue; // skip composite (base+cc) for now
byte[] bs = e.bb;
char[] cc = Character.toChars(e.cp);
testFor(bs.length).put(bs, cc);
}
shiftHackDBCS = p.isStateful();
} finally {
in.close();
}
}
private boolean run() throws Exception {
encoder
.onUnmappableCharacter(CodingErrorAction.REPLACE)
.onMalformedInput(CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE)
.onMalformedInput(CodingErrorAction.REPLACE);
boolean rv = true;
log.println();
log.println(cs.name() + " (" + encoding + ")");
// Outer loop runs three passes: roundtrip, irreversible encodings,
// and then irreversible decodings
for (int mode = ROUNDTRIP; mode <= DECODE; mode++) {
var fileName = encoding;
if (fileName.equals("GB18030") && IS_2000) {
// tweak the map file name
fileName = "GB18030_2000";
}
File f = testFile(fileName, mode);
if (f == null)
continue;
loadTests(f);
for (int i = 0; i < maxBytesPerChar; i++)
rv &= tests[i].run(mode);
}
return rv;
}
// For debugging: java CoderTest [-v] foo.b2c bar.b2c ...
//
public static void main(String args[])
throws Exception
{
File d = new File(System.getProperty("test.src", "."));
String[] av = (args.length != 0) ? args : d.list();
int errors = 0;
int tested = 0;
int skipped = 0;
for (int i = 0; i < av.length; i++) {
String a = av[i];
if (a.equals("-v")) {
verbose = true;
continue;
}
if (a.endsWith(".b2c")) {
String encoding = a.substring(0, a.length() - 4);
if (!Charset.isSupported(encoding)) {
log.println();
log.println("Not supported: " + encoding);
skipped++;
continue;
}
tested++;
if (!new CoderTest(encoding).run())
errors++;
}
}
log.println();
log.println(tested + " charset" + plural(tested) + " tested, "
+ skipped + " not supported");
log.println();
if (errors > 0)
throw new Exception("Errors detected in "
+ errors + " charset" + plural(errors));
}
}