/* * Copyright 2008 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. */ /* * @test * @bug 4831163 5053096 5056440 * @summary NIO charset basic verification of JISAutodetect decoder * @author Martin Buchholz */ import java.io.*; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CoderResult; import static java.lang.System.*; public class NIOJISAutoDetectTest { private static int failures = 0; private static void fail(String failureMsg) { System.out.println(failureMsg); failures++; } private static void check(boolean cond, String msg) { if (!cond) { fail("test failed: " + msg); new Exception().printStackTrace(); } } private static String SJISName() throws Exception { return detectingCharset(new byte[] {(byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2}); } private static String EUCJName() throws Exception { return detectingCharset(new byte[] {(byte)0xa4, (byte)0xd2, (byte)0xa4, (byte)0xe9}); } private static String detectingCharset(byte[] bytes) throws Exception { //---------------------------------------------------------------- // Test special public methods of CharsetDecoder while we're here //---------------------------------------------------------------- CharsetDecoder cd = Charset.forName("JISAutodetect").newDecoder(); check(cd.isAutoDetecting(), "isAutodecting()"); check(! cd.isCharsetDetected(), "isCharsetDetected"); cd.decode(ByteBuffer.wrap(new byte[] {(byte)'A'})); check(! cd.isCharsetDetected(), "isCharsetDetected"); try { cd.detectedCharset(); fail("no IllegalStateException"); } catch (IllegalStateException e) {} cd.decode(ByteBuffer.wrap(bytes)); check(cd.isCharsetDetected(), "isCharsetDetected"); Charset cs = cd.detectedCharset(); check(cs != null, "cs != null"); check(! cs.newDecoder().isAutoDetecting(), "isAutodetecting()"); return cs.name(); } public static void main(String[] argv) throws Exception { //---------------------------------------------------------------- // Used to throw BufferOverflowException //---------------------------------------------------------------- out.println(new String(new byte[] {0x61}, "JISAutoDetect")); //---------------------------------------------------------------- // InputStreamReader(...JISAutoDetect) used to infloop //---------------------------------------------------------------- { byte[] bytes = "ABCD\n".getBytes(); ByteArrayInputStream bais = new ByteArrayInputStream(bytes); InputStreamReader isr = new InputStreamReader(bais, "JISAutoDetect"); BufferedReader reader = new BufferedReader(isr); check (reader.readLine().equals("ABCD"), "first read gets text"); // used to return "ABCD" on second and subsequent reads check (reader.readLine() == null, "second read gets null"); } //---------------------------------------------------------------- // Check all Japanese chars for sanity //---------------------------------------------------------------- String SJIS = SJISName(); String EUCJ = EUCJName(); out.printf("SJIS charset is %s%n", SJIS); out.printf("EUCJ charset is %s%n", EUCJ); int cnt2022 = 0; int cnteucj = 0; int cntsjis = 0; int cntBAD = 0; for (char c = '\u0000'; c < '\uffff'; c++) { if (c == '\u001b' || // ESC c == '\u2014') // Em-Dash? continue; String s = new String (new char[] {c}); //---------------------------------------------------------------- // JISAutoDetect can handle all chars that EUC-JP can, // unless there is an ambiguity with SJIS. //---------------------------------------------------------------- byte[] beucj = s.getBytes(EUCJ); String seucj = new String(beucj, EUCJ); if (seucj.equals(s)) { cnteucj++; String sauto = new String(beucj, "JISAutoDetect"); if (! sauto.equals(seucj)) { cntBAD++; String ssjis = new String(beucj, SJIS); if (! sauto.equals(ssjis)) { fail("Autodetection agrees with neither EUC nor SJIS"); } } } else continue; // Optimization //---------------------------------------------------------------- // JISAutoDetect can handle all chars that ISO-2022-JP can. //---------------------------------------------------------------- byte[] b2022 = s.getBytes("ISO-2022-JP"); if (new String(b2022, "ISO-2022-JP").equals(s)) { cnt2022++; check(new String(b2022,"JISAutoDetect").equals(s), "ISO2022 autodetection"); } //---------------------------------------------------------------- // JISAutoDetect can handle almost all chars that SJIS can. //---------------------------------------------------------------- byte[] bsjis = s.getBytes(SJIS); if (new String(bsjis, SJIS).equals(s)) { cntsjis++; check(new String(bsjis,"JISAutoDetect").equals(s), "SJIS autodetection"); } } out.printf("There are %d ISO-2022-JP-encodable characters.%n", cnt2022); out.printf("There are %d SJIS-encodable characters.%n", cntsjis); out.printf("There are %d EUC-JP-encodable characters.%n", cnteucj); out.printf("There are %d characters that are " + "misdetected as SJIS after being EUC-encoded.%n", cntBAD); //---------------------------------------------------------------- // tests for specific byte sequences //---------------------------------------------------------------- test("ISO-2022-JP", new byte[] {'A', 'B', 'C'}); test("EUC-JP", new byte[] {'A', 'B', 'C'}); test("SJIS", new byte[] {'A', 'B', 'C'}); test("SJIS", new byte[] { 'C', 'o', 'p', 'y', 'r', 'i', 'g', 'h', 't', ' ', (byte)0xa9, ' ', '1', '9', '9', '8' }); test("SJIS", new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde, (byte)0x82, (byte)0xc5, (byte)0x82, (byte)0xb7 }); test("EUC-JP", new byte[] { (byte)0xa4, (byte)0xd2, (byte)0xa4, (byte)0xe9, (byte)0xa4, (byte)0xac, (byte)0xa4, (byte)0xca }); test("SJIS", new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde}); test("SJIS", new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, (byte)0xc3, (byte)0xd1, (byte)0xbd }); test("SJIS", new byte[] { (byte)0x8f, (byte)0xa1, (byte)0xaa }); test("EUC-JP", new byte[] { (byte)0x8f, (byte)0xc5, (byte)0xe0, (byte)0x20}); test("EUC-JP", new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde, (byte)0xa4, (byte)0xc7, (byte)0xa4, (byte)0xb9 }); test("ISO-2022-JP", new byte[] { 0x1b, '$', 'B', '#', '4', '$', '5', 0x1b, '(', 'B' }); //---------------------------------------------------------------- // Check handling of ambiguous end-of-input in middle of first char //---------------------------------------------------------------- { CharsetDecoder dc = Charset.forName("x-JISAutoDetect").newDecoder(); ByteBuffer bb = ByteBuffer.allocate(128); CharBuffer cb = CharBuffer.allocate(128); bb.put((byte)'A').put((byte)0x8f); bb.flip(); CoderResult res = dc.decode(bb,cb,false); check(res.isUnderflow(), "isUnderflow"); check(bb.position() == 1, "bb.position()"); check(cb.position() == 1, "cb.position()"); res = dc.decode(bb,cb,false); check(res.isUnderflow(), "isUnderflow"); check(bb.position() == 1, "bb.position()"); check(cb.position() == 1, "cb.position()"); bb.compact(); bb.put((byte)0xa1); bb.flip(); res = dc.decode(bb,cb,true); check(res.isUnderflow(), "isUnderflow"); check(bb.position() == 2, "bb.position()"); check(cb.position() == 2, "cb.position()"); } if (failures > 0) throw new RuntimeException(failures + " tests failed"); } static void checkCoderResult(CoderResult result) { check(result.isUnderflow(), "Unexpected coder result: " + result); } static void test(String expectedCharset, byte[] input) throws Exception { Charset cs = Charset.forName("x-JISAutoDetect"); CharsetDecoder autoDetect = cs.newDecoder(); Charset cs2 = Charset.forName(expectedCharset); CharsetDecoder decoder = cs2.newDecoder(); ByteBuffer bb = ByteBuffer.allocate(128); CharBuffer charOutput = CharBuffer.allocate(128); CharBuffer charExpected = CharBuffer.allocate(128); bb.put(input); bb.flip(); bb.mark(); CoderResult result = autoDetect.decode(bb, charOutput, true); checkCoderResult(result); charOutput.flip(); String actual = charOutput.toString(); bb.reset(); result = decoder.decode(bb, charExpected, true); checkCoderResult(result); charExpected.flip(); String expected = charExpected.toString(); check(actual.equals(expected), String.format("actual=%s expected=%s", actual, expected)); } }