/* * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ /* @test @bug 4691554 6221056 6380723 6404504 6419565 6529796 8301119 @summary Test the supported New I/O coders @modules jdk.charsets @run main CoderTest @run main/othervm -Djdk.charset.GB18030=2000 CoderTest */ import java.io.*; import java.nio.*; import java.nio.charset.*; import java.util.regex.*; public class CoderTest { private static final int BUFSIZ = 8192; // Initial buffer size private static final int MAXERRS = 10; // Errors reported per test private static final String testRootDir = System.getProperty("test.src", "."); private static final PrintStream log = System.out; // Set by -v on the command line private static boolean verbose = false; // Test modes private static final int ROUNDTRIP = 0; private static final int ENCODE = 1; private static final int DECODE = 2; private static boolean shiftHackDBCS = false; // File extensions, indexed by test mode private static final String[] extension = new String[] { ".b2c", ".c2b-irreversible", ".b2c-irreversible" }; private static final boolean IS_2000 = "2000".equals(System.getProperty("jdk.charset.GB18030")); // Utilities private static ByteBuffer expand(ByteBuffer bb) { ByteBuffer nbb = ByteBuffer.allocate(bb.capacity() * 2); bb.flip(); nbb.put(bb); return nbb; } private static CharBuffer expand(CharBuffer cb) { CharBuffer ncb = CharBuffer.allocate(cb.capacity() * 2); cb.flip(); ncb.put(cb); return ncb; } private static byte[] parseBytes(String s) { int nb = s.length() / 2; byte[] bs = new byte[nb]; for (int i = 0; i < nb; i++) { int j = i * 2; if (j + 2 > s.length()) throw new RuntimeException("Malformed byte string: " + s); bs[i] = (byte)Integer.parseInt(s.substring(j, j + 2), 16); } return bs; } private static String printBytes(byte[] bs) { StringBuffer sb = new StringBuffer(); for (int i = 0; i < bs.length; i++) { sb.append(Integer.toHexString((bs[i] >> 4) & 0xf)); sb.append(Integer.toHexString((bs[i] >> 0) & 0xf)); } return sb.toString(); } private static String printCodePoint(int cp) { StringBuffer sb = new StringBuffer(); sb.append("U+"); if (cp > 0xffff) sb.append(Integer.toHexString((cp >> 16) & 0xf)); sb.append(Integer.toHexString((cp >> 12) & 0xf)); sb.append(Integer.toHexString((cp >> 8) & 0xf)); sb.append(Integer.toHexString((cp >> 4) & 0xf)); sb.append(Integer.toHexString((cp >> 0) & 0xf)); return sb.toString(); } private static int getCodePoint(CharBuffer cb) { char c = cb.get(); if (Character.isHighSurrogate(c)) return Character.toCodePoint(c, cb.get()); else return c; } private static String plural(int n) { return (n == 1 ? "" : "s"); } static class Entry { byte[] bb; int cp; int cp2; } public static class Parser { static Pattern p = Pattern.compile("(0[xX])?(00)?([0-9a-fA-F]+)\\s+(0[xX])?([0-9a-fA-F]+)(\\+0x([0-9a-fA-F]+))?\\s*"); static final int gBS = 1; static final int gCP = 2; static final int gCP2 = 3; boolean isStateful = false; BufferedReader reader; boolean closed; Matcher matcher; public Parser (InputStream in) throws IOException { this.reader = new BufferedReader(new InputStreamReader(in)); this.closed = false; this.matcher = p.matcher(""); } public boolean isStateful() { return isStateful; } protected boolean isDirective(String line) { // Stateful DBCS encodings need special treatment if (line.startsWith("#STATEFUL")) { return isStateful = true; } return line.startsWith("#"); } protected Entry parse(Matcher m, Entry e) { e.bb = parseBytes(m.group(3)); e.cp = Integer.parseInt(m.group(5), 16); if (m.group(7) != null) e.cp2 = Integer.parseInt(m.group(7), 16); else e.cp2 = 0; return e; } public Entry next() throws Exception { return next(new Entry()); } // returns null and closes the input stream if the eof has beenreached. public Entry next(Entry mapping) throws Exception { if (closed) return null; String line; while ((line = reader.readLine()) != null) { if (isDirective(line)) continue; matcher.reset(line); if (!matcher.lookingAt()) { //System.out.println("Missed: " + line); continue; } return parse(matcher, mapping); } reader.close(); closed = true; return null; } } // CoderTest private String encoding; private Charset cs; private CharsetDecoder decoder = null; private CharsetEncoder encoder = null; private CoderTest(String enc) throws Exception { encoding = enc; cs = Charset.forName(enc); decoder = cs.newDecoder(); encoder = cs.newEncoder(); } private class Test { // An instance of this class tests all mappings for // a particular bytesPerChar value private int bytesPerChar; // Reference data from .b2c file private ByteBuffer refBytes = ByteBuffer.allocate(BUFSIZ); private CharBuffer refChars = CharBuffer.allocate(BUFSIZ); private ByteBuffer dRefBytes = ByteBuffer.allocateDirect(BUFSIZ); private CharBuffer dRefChars = ByteBuffer.allocateDirect(BUFSIZ*2).asCharBuffer(); private Test(int bpc) { bytesPerChar = bpc; } private void put(byte[] bs, char[] cc) { if (bs.length != bytesPerChar) throw new IllegalArgumentException(bs.length + " != " + bytesPerChar); if (refBytes.remaining() < bytesPerChar) refBytes = expand(refBytes); refBytes.put(bs); if (refChars.remaining() < cc.length) refChars = expand(refChars); refChars.put(cc); } private boolean decode(ByteBuffer refByte, CharBuffer refChars) throws Exception { log.println(" decode" + (refByte.isDirect()?" (direct)":"")); CharBuffer out = decoder.decode(refBytes); refBytes.rewind(); byte[] bs = new byte[bytesPerChar]; int e = 0; while (refBytes.hasRemaining()) { refBytes.get(bs); int rcp = getCodePoint(refChars); int ocp = getCodePoint(out); if (rcp != ocp) { log.println(" Error: " + printBytes(bs) + " --> " + printCodePoint(ocp) + ", expected " + printCodePoint(rcp)); if (++e >= MAXERRS) { log.println(" Too many errors, giving up"); break; } } if (verbose) { log.println(" " + printBytes(bs) + " --> " + printCodePoint(rcp)); } } if (e == 0 && (refChars.hasRemaining() || out.hasRemaining())) { // Paranoia: Didn't consume everything throw new IllegalStateException(); } refBytes.rewind(); refChars.rewind(); return (e == 0); } private boolean encode(ByteBuffer refByte, CharBuffer refChars) throws Exception { log.println(" encode" + (refByte.isDirect()?" (direct)":"")); ByteBuffer out = encoder.encode(refChars); refChars.rewind(); // Stateful b2c files have leading and trailing // shift bytes for each mapping. However when // block encoded the output will consist of a single // leadByte followed by the raw DBCS byte values and // a final trail byte. The state variable shiftHackDBCS // which is true for stateful DBCS encodings is used // to conditionally strip away per-mapping shift bytes // from the comparison of expected versus actual encoded // byte values. This hack can be eliminated in Mustang // when sun.io converters and their associated tests are // removed. boolean boundaryBytes = false; int bytesPC = bytesPerChar; if (shiftHackDBCS && bytesPerChar==4) { bytesPC = 2; boundaryBytes = true; if ((out.get()) != (byte)0x0e) { log.println("Missing lead byte"); return(false); } } byte[] rbs = new byte[bytesPC]; byte[] obs = new byte[bytesPC]; int e = 0; while (refChars.hasRemaining()) { int cp = getCodePoint(refChars); // Skip lead shift ref byte for stateful encoding tests if (shiftHackDBCS && bytesPC == 2) refBytes.get(); refBytes.get(rbs); out.get(obs); boolean eq = true; for (int i = 0; i < bytesPC; i++) eq &= rbs[i] == obs[i]; if (!eq) { log.println(" Error: " + printCodePoint(cp) + " --> " + printBytes(obs) + ", expected " + printBytes(rbs)); if (++e >= MAXERRS) { log.println(" Too many errors, giving up"); break; } } if (verbose) { log.println(" " + printCodePoint(cp) + " --> " + printBytes(rbs)); } // For stateful encodings ignore/exclude per-mapping // trail bytes from byte comparison if (shiftHackDBCS && bytesPC == 2) refBytes.get(); } if (shiftHackDBCS && boundaryBytes) { if ((out.get()) != (byte)0x0f) { log.println("Missing trail byte"); return(false); } } if (e == 0 && (refBytes.hasRemaining() || out.hasRemaining())) { // Paranoia: Didn't consume everything throw new IllegalStateException(); } refBytes.rewind(); refChars.rewind(); return (e == 0); } private boolean run(int mode) throws Exception { log.println(" " + bytesPerChar + " byte" + plural(bytesPerChar) + "/char"); if (dRefBytes.capacity() < refBytes.capacity()) { dRefBytes = ByteBuffer.allocateDirect(refBytes.capacity()); } if (dRefChars.capacity() < refChars.capacity()) { dRefChars = ByteBuffer.allocateDirect(refChars.capacity()*2) .asCharBuffer(); } refBytes.flip(); refChars.flip(); dRefBytes.clear(); dRefChars.clear(); dRefBytes.put(refBytes).flip(); dRefChars.put(refChars).flip(); refBytes.flip(); refChars.flip(); boolean rv = true; if (mode != ENCODE) { rv &= decode(refBytes, refChars); rv &= decode(dRefBytes, dRefChars); } if (mode != DECODE) { rv &= encode(refBytes, refChars); rv &= encode(dRefBytes, dRefChars); } return rv; } } // Maximum bytes/char being tested private int maxBytesPerChar = 0; // Tests, indexed by bytesPerChar - 1 private Test[] tests; private void clearTests() { maxBytesPerChar = 0; tests = new Test[0]; } // Find the test for the given bytes/char value, // expanding the test array if needed // private Test testFor(int bpc) { if (bpc > maxBytesPerChar) { Test[] ts = new Test[bpc]; System.arraycopy(tests, 0, ts, 0, maxBytesPerChar); for (int i = maxBytesPerChar; i < bpc; i++) ts[i] = new Test(i + 1); tests = ts; maxBytesPerChar = bpc; } return tests[bpc - 1]; } // Compute the name of the test file for the given encoding and mode. If // the file exists then return its name, otherwise return null. // private File testFile(String encoding, int mode) { File f = new File(testRootDir, encoding + extension[mode]); if (!f.exists()) return null; return f; } // Parse the given b2c file and load up the required test objects // private void loadTests(File f) throws Exception { clearTests(); FileInputStream in = new FileInputStream(f); try { Parser p = new Parser(in); Entry e = new Entry(); while ((e = (Entry)p.next(e)) != null) { if (e.cp2 != 0) continue; // skip composite (base+cc) for now byte[] bs = e.bb; char[] cc = Character.toChars(e.cp); testFor(bs.length).put(bs, cc); } shiftHackDBCS = p.isStateful(); } finally { in.close(); } } private boolean run() throws Exception { encoder .onUnmappableCharacter(CodingErrorAction.REPLACE) .onMalformedInput(CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE) .onMalformedInput(CodingErrorAction.REPLACE); boolean rv = true; log.println(); log.println(cs.name() + " (" + encoding + ")"); // Outer loop runs three passes: roundtrip, irreversible encodings, // and then irreversible decodings for (int mode = ROUNDTRIP; mode <= DECODE; mode++) { var fileName = encoding; if (fileName.equals("GB18030") && IS_2000) { // tweak the map file name fileName = "GB18030_2000"; } File f = testFile(fileName, mode); if (f == null) continue; loadTests(f); for (int i = 0; i < maxBytesPerChar; i++) rv &= tests[i].run(mode); } return rv; } // For debugging: java CoderTest [-v] foo.b2c bar.b2c ... // public static void main(String args[]) throws Exception { File d = new File(System.getProperty("test.src", ".")); String[] av = (args.length != 0) ? args : d.list(); int errors = 0; int tested = 0; int skipped = 0; for (int i = 0; i < av.length; i++) { String a = av[i]; if (a.equals("-v")) { verbose = true; continue; } if (a.endsWith(".b2c")) { String encoding = a.substring(0, a.length() - 4); if (!Charset.isSupported(encoding)) { log.println(); log.println("Not supported: " + encoding); skipped++; continue; } tested++; if (!new CoderTest(encoding).run()) errors++; } } log.println(); log.println(tested + " charset" + plural(tested) + " tested, " + skipped + " not supported"); log.println(); if (errors > 0) throw new Exception("Errors detected in " + errors + " charset" + plural(errors)); } }