jdk-24/jdk/test/sun/nio/cs/NIOJISAutoDetectTest.java

283 lines
12 KiB
Java
Raw Normal View History

/*
* Copyright 2008 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
/*
* @test
* @bug 4831163 5053096 5056440
* @summary NIO charset basic verification of JISAutodetect decoder
* @author Martin Buchholz
*/
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import static java.lang.System.*;
public class NIOJISAutoDetectTest {
private static int failures = 0;
private static void fail(String failureMsg) {
System.out.println(failureMsg);
failures++;
}
private static void check(boolean cond, String msg) {
if (!cond) {
fail("test failed: " + msg);
new Exception().printStackTrace();
}
}
private static String SJISName() throws Exception {
return detectingCharset(new byte[] {(byte)0xbb, (byte)0xdd,
(byte)0xcf, (byte)0xb2});
}
private static String EUCJName() throws Exception {
return detectingCharset(new byte[] {(byte)0xa4, (byte)0xd2,
(byte)0xa4, (byte)0xe9});
}
private static String detectingCharset(byte[] bytes) throws Exception {
//----------------------------------------------------------------
// Test special public methods of CharsetDecoder while we're here
//----------------------------------------------------------------
CharsetDecoder cd = Charset.forName("JISAutodetect").newDecoder();
check(cd.isAutoDetecting(), "isAutodecting()");
check(! cd.isCharsetDetected(), "isCharsetDetected");
cd.decode(ByteBuffer.wrap(new byte[] {(byte)'A'}));
check(! cd.isCharsetDetected(), "isCharsetDetected");
try {
cd.detectedCharset();
fail("no IllegalStateException");
} catch (IllegalStateException e) {}
cd.decode(ByteBuffer.wrap(bytes));
check(cd.isCharsetDetected(), "isCharsetDetected");
Charset cs = cd.detectedCharset();
check(cs != null, "cs != null");
check(! cs.newDecoder().isAutoDetecting(), "isAutodetecting()");
return cs.name();
}
public static void main(String[] argv) throws Exception {
//----------------------------------------------------------------
// Used to throw BufferOverflowException
//----------------------------------------------------------------
out.println(new String(new byte[] {0x61}, "JISAutoDetect"));
//----------------------------------------------------------------
// InputStreamReader(...JISAutoDetect) used to infloop
//----------------------------------------------------------------
{
byte[] bytes = "ABCD\n".getBytes();
ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
InputStreamReader isr = new InputStreamReader(bais, "JISAutoDetect");
BufferedReader reader = new BufferedReader(isr);
check (reader.readLine().equals("ABCD"), "first read gets text");
// used to return "ABCD" on second and subsequent reads
check (reader.readLine() == null, "second read gets null");
}
//----------------------------------------------------------------
// Check all Japanese chars for sanity
//----------------------------------------------------------------
String SJIS = SJISName();
String EUCJ = EUCJName();
out.printf("SJIS charset is %s%n", SJIS);
out.printf("EUCJ charset is %s%n", EUCJ);
int cnt2022 = 0;
int cnteucj = 0;
int cntsjis = 0;
int cntBAD = 0;
for (char c = '\u0000'; c < '\uffff'; c++) {
if (c == '\u001b' || // ESC
c == '\u2014') // Em-Dash?
continue;
String s = new String (new char[] {c});
//----------------------------------------------------------------
// JISAutoDetect can handle all chars that EUC-JP can,
// unless there is an ambiguity with SJIS.
//----------------------------------------------------------------
byte[] beucj = s.getBytes(EUCJ);
String seucj = new String(beucj, EUCJ);
if (seucj.equals(s)) {
cnteucj++;
String sauto = new String(beucj, "JISAutoDetect");
if (! sauto.equals(seucj)) {
cntBAD++;
String ssjis = new String(beucj, SJIS);
if (! sauto.equals(ssjis)) {
fail("Autodetection agrees with neither EUC nor SJIS");
}
}
} else
continue; // Optimization
//----------------------------------------------------------------
// JISAutoDetect can handle all chars that ISO-2022-JP can.
//----------------------------------------------------------------
byte[] b2022 = s.getBytes("ISO-2022-JP");
if (new String(b2022, "ISO-2022-JP").equals(s)) {
cnt2022++;
check(new String(b2022,"JISAutoDetect").equals(s),
"ISO2022 autodetection");
}
//----------------------------------------------------------------
// JISAutoDetect can handle almost all chars that SJIS can.
//----------------------------------------------------------------
byte[] bsjis = s.getBytes(SJIS);
if (new String(bsjis, SJIS).equals(s)) {
cntsjis++;
check(new String(bsjis,"JISAutoDetect").equals(s),
"SJIS autodetection");
}
}
out.printf("There are %d ISO-2022-JP-encodable characters.%n", cnt2022);
out.printf("There are %d SJIS-encodable characters.%n", cntsjis);
out.printf("There are %d EUC-JP-encodable characters.%n", cnteucj);
out.printf("There are %d characters that are " +
"misdetected as SJIS after being EUC-encoded.%n", cntBAD);
//----------------------------------------------------------------
// tests for specific byte sequences
//----------------------------------------------------------------
test("ISO-2022-JP", new byte[] {'A', 'B', 'C'});
test("EUC-JP", new byte[] {'A', 'B', 'C'});
test("SJIS", new byte[] {'A', 'B', 'C'});
test("SJIS",
new byte[] { 'C', 'o', 'p', 'y', 'r', 'i', 'g', 'h', 't',
' ', (byte)0xa9, ' ', '1', '9', '9', '8' });
test("SJIS",
new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
(byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
(byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde,
(byte)0x82, (byte)0xc5, (byte)0x82, (byte)0xb7 });
test("EUC-JP",
new byte[] { (byte)0xa4, (byte)0xd2, (byte)0xa4, (byte)0xe9,
(byte)0xa4, (byte)0xac, (byte)0xa4, (byte)0xca });
test("SJIS",
new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
(byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
(byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde});
test("SJIS",
new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
(byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
(byte)0xc3, (byte)0xd1, (byte)0xbd });
test("SJIS",
new byte[] { (byte)0x8f, (byte)0xa1, (byte)0xaa });
test("EUC-JP",
new byte[] { (byte)0x8f, (byte)0xc5, (byte)0xe0, (byte)0x20});
test("EUC-JP",
new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
(byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
(byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde,
(byte)0xa4, (byte)0xc7, (byte)0xa4, (byte)0xb9 });
test("ISO-2022-JP",
new byte[] { 0x1b, '$', 'B', '#', '4', '$', '5', 0x1b, '(', 'B' });
//----------------------------------------------------------------
// Check handling of ambiguous end-of-input in middle of first char
//----------------------------------------------------------------
{
CharsetDecoder dc = Charset.forName("x-JISAutoDetect").newDecoder();
ByteBuffer bb = ByteBuffer.allocate(128);
CharBuffer cb = CharBuffer.allocate(128);
bb.put((byte)'A').put((byte)0x8f);
bb.flip();
CoderResult res = dc.decode(bb,cb,false);
check(res.isUnderflow(), "isUnderflow");
check(bb.position() == 1, "bb.position()");
check(cb.position() == 1, "cb.position()");
res = dc.decode(bb,cb,false);
check(res.isUnderflow(), "isUnderflow");
check(bb.position() == 1, "bb.position()");
check(cb.position() == 1, "cb.position()");
bb.compact();
bb.put((byte)0xa1);
bb.flip();
res = dc.decode(bb,cb,true);
check(res.isUnderflow(), "isUnderflow");
check(bb.position() == 2, "bb.position()");
check(cb.position() == 2, "cb.position()");
}
if (failures > 0)
throw new RuntimeException(failures + " tests failed");
}
static void checkCoderResult(CoderResult result) {
check(result.isUnderflow(),
"Unexpected coder result: " + result);
}
static void test(String expectedCharset, byte[] input) throws Exception {
Charset cs = Charset.forName("x-JISAutoDetect");
CharsetDecoder autoDetect = cs.newDecoder();
Charset cs2 = Charset.forName(expectedCharset);
CharsetDecoder decoder = cs2.newDecoder();
ByteBuffer bb = ByteBuffer.allocate(128);
CharBuffer charOutput = CharBuffer.allocate(128);
CharBuffer charExpected = CharBuffer.allocate(128);
bb.put(input);
bb.flip();
bb.mark();
CoderResult result = autoDetect.decode(bb, charOutput, true);
checkCoderResult(result);
charOutput.flip();
String actual = charOutput.toString();
bb.reset();
result = decoder.decode(bb, charExpected, true);
checkCoderResult(result);
charExpected.flip();
String expected = charExpected.toString();
check(actual.equals(expected),
String.format("actual=%s expected=%s", actual, expected));
}
}