db85090553
Co-authored-by: Sean Mullan <mullan@openjdk.org> Co-authored-by: Alan Bateman <alanb@openjdk.org> Co-authored-by: Weijun Wang <weijun@openjdk.org> Co-authored-by: Aleksei Efimov <aefimov@openjdk.org> Co-authored-by: Brian Burkhalter <bpb@openjdk.org> Co-authored-by: Daniel Fuchs <dfuchs@openjdk.org> Co-authored-by: Harshitha Onkar <honkar@openjdk.org> Co-authored-by: Joe Wang <joehw@openjdk.org> Co-authored-by: Jorn Vernee <jvernee@openjdk.org> Co-authored-by: Justin Lu <jlu@openjdk.org> Co-authored-by: Kevin Walls <kevinw@openjdk.org> Co-authored-by: Lance Andersen <lancea@openjdk.org> Co-authored-by: Naoto Sato <naoto@openjdk.org> Co-authored-by: Roger Riggs <rriggs@openjdk.org> Co-authored-by: Brent Christian <bchristi@openjdk.org> Co-authored-by: Stuart Marks <smarks@openjdk.org> Co-authored-by: Ian Graves <igraves@openjdk.org> Co-authored-by: Phil Race <prr@openjdk.org> Co-authored-by: Erik Gahlin <egahlin@openjdk.org> Co-authored-by: Jaikiran Pai <jpai@openjdk.org> Reviewed-by: kevinw, aivanov, rriggs, lancea, coffeys, dfuchs, ihse, erikj, cjplummer, coleenp, naoto, mchung, prr, weijun, joehw, azvegint, psadhukhan, bchristi, sundar, attila
246 lines
10 KiB
Java
246 lines
10 KiB
Java
/*
|
|
* Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License version 2 only, as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* version 2 for more details (a copy is included in the LICENSE file that
|
|
* accompanied this code).
|
|
*
|
|
* You should have received a copy of the GNU General Public License version
|
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*
|
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
* or visit www.oracle.com if you need additional information or have any
|
|
* questions.
|
|
*/
|
|
|
|
/* @test
|
|
* @bug 6636323 6636319 7040220 7096080 7183053 8080248 8054307
|
|
* @summary Test if StringCoding and NIO result have the same de/encoding result
|
|
* @modules java.base/sun.nio.cs
|
|
* @run main/othervm/timeout=2000 TestStringCoding
|
|
* @key randomness
|
|
*/
|
|
|
|
import java.util.*;
|
|
import java.nio.*;
|
|
import java.nio.charset.*;
|
|
|
|
public class TestStringCoding {
|
|
public static void main(String[] args) throws Throwable {
|
|
|
|
// full bmp first
|
|
char[] bmp = new char[0x10000];
|
|
for (int i = 0; i < 0x10000; i++) {
|
|
bmp[i] = (char)i;
|
|
}
|
|
char[] latin = Arrays.copyOf(bmp, 0x100);
|
|
char[] ascii = Arrays.copyOf(bmp, 0x80);
|
|
|
|
byte[] latinBA = new byte[0x100];
|
|
for (int i = 0; i < 0x100; i++) {
|
|
latinBA[i] = (byte)i;
|
|
}
|
|
byte[] asciiBA = Arrays.copyOf(latinBA, 0x80);
|
|
|
|
|
|
for (Charset cs: Charset.availableCharsets().values()) {
|
|
if ("ISO-2022-CN".equals(cs.name()) ||
|
|
"x-COMPOUND_TEXT".equals(cs.name()) ||
|
|
"x-JISAutoDetect".equals(cs.name()))
|
|
continue;
|
|
System.out.println("Testing " + cs.name() + "....");
|
|
|
|
testNewString(cs, testGetBytes(cs, new String(bmp)));
|
|
testNewString(cs, testGetBytes(cs, new String(latin)));
|
|
testNewString(cs, testGetBytes(cs, new String(ascii)));
|
|
testGetBytes(cs, testNewString(cs, latinBA));
|
|
testGetBytes(cs, testNewString(cs, asciiBA));
|
|
|
|
// "randomed" sizes
|
|
Random rnd = new Random();
|
|
for (int i = 0; i < 10; i++) {
|
|
//System.out.printf(" blen=%d, clen=%d%n", blen, clen);
|
|
char[] bmp0 = Arrays.copyOf(bmp, rnd.nextInt(0x10000));
|
|
testNewString(cs, testGetBytes(cs, new String(bmp0)));
|
|
//add a pair of surrogates
|
|
int pos = bmp0.length / 2;
|
|
if ((pos + 1) < bmp0.length) {
|
|
bmp0[pos] = '\uD800';
|
|
bmp0[pos+1] = '\uDC00';
|
|
}
|
|
testNewString(cs, testGetBytes(cs, new String(bmp0)));
|
|
|
|
char[] latin0 = Arrays.copyOf(latin, rnd.nextInt(0x100));
|
|
char[] ascii0 = Arrays.copyOf(ascii, rnd.nextInt(0x80));
|
|
byte[] latinBA0 = Arrays.copyOf(latinBA, rnd.nextInt(0x100));
|
|
byte[] asciiBA0 = Arrays.copyOf(asciiBA, rnd.nextInt(0x80));
|
|
testNewString(cs, testGetBytes(cs, new String(latin0)));
|
|
testNewString(cs, testGetBytes(cs, new String(ascii0)));
|
|
testGetBytes(cs, testNewString(cs, latinBA0));
|
|
testGetBytes(cs, testNewString(cs, asciiBA0));
|
|
}
|
|
testSurrogates(cs);
|
|
testMixed(cs);
|
|
System.out.println("done!");
|
|
}
|
|
}
|
|
|
|
static void testMixed(Charset cs) throws Throwable {
|
|
CharsetDecoder dec = cs.newDecoder()
|
|
.onMalformedInput(CodingErrorAction.REPLACE)
|
|
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
|
CharsetEncoder enc = cs.newEncoder()
|
|
.onMalformedInput(CodingErrorAction.REPLACE)
|
|
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
|
List<Integer> cps = new ArrayList<>(0x10000);
|
|
int off = 0;
|
|
int cp = 0;
|
|
while (cp < 0x10000) {
|
|
if (enc.canEncode((char)cp)) {
|
|
cps.add(cp);
|
|
}
|
|
cp++;
|
|
}
|
|
Collections.shuffle(cps);
|
|
char[] bmpCA = new char[cps.size()];
|
|
for (int i = 0; i < cps.size(); i++)
|
|
bmpCA[i] = (char)(int)cps.get(i);
|
|
String bmpStr = new String(bmpCA);
|
|
//getBytes(csn);
|
|
byte[] bmpBA = bmpStr.getBytes(cs.name());
|
|
ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA));
|
|
byte[] baNIO = new byte[bf.limit()];
|
|
bf.get(baNIO, 0, baNIO.length);
|
|
if (!Arrays.equals(bmpBA, baNIO)) {
|
|
throw new RuntimeException("getBytes(csn) failed -> " + cs.name());
|
|
}
|
|
|
|
//getBytes(cs);
|
|
bmpBA = bmpStr.getBytes(cs);
|
|
if (!Arrays.equals(bmpBA, baNIO)) {
|
|
throw new RuntimeException("getBytes(cs) failed -> " + cs.name());
|
|
}
|
|
|
|
//new String(csn);
|
|
String strSC = new String(bmpBA, cs.name());
|
|
String strNIO = dec.reset().decode(ByteBuffer.wrap(bmpBA)).toString();
|
|
if(!strNIO.equals(strSC)) {
|
|
throw new RuntimeException("new String(csn) failed -> " + cs.name());
|
|
}
|
|
//new String(cs);
|
|
strSC = new String(bmpBA, cs);
|
|
if (!strNIO.equals(strSC)) {
|
|
throw new RuntimeException("new String(cs) failed -> " + cs.name());
|
|
}
|
|
}
|
|
|
|
static byte[] getBytes(CharsetEncoder enc, String str) throws Throwable {
|
|
ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(str.toCharArray()));
|
|
byte[] ba = new byte[bf.limit()];
|
|
bf.get(ba, 0, ba.length);
|
|
return ba;
|
|
}
|
|
|
|
static byte[] testGetBytes(Charset cs, String str) throws Throwable {
|
|
CharsetEncoder enc = cs.newEncoder()
|
|
.onMalformedInput(CodingErrorAction.REPLACE)
|
|
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
|
//getBytes(csn);
|
|
byte[] baSC = str.getBytes(cs.name());
|
|
byte[] baNIO = getBytes(enc, str);
|
|
if (!Arrays.equals(baSC, baNIO)) {
|
|
throw new RuntimeException("getBytes(csn) failed -> " + cs.name());
|
|
}
|
|
//getBytes(cs);
|
|
baSC = str.getBytes(cs);
|
|
if (!Arrays.equals(baSC, baNIO)) {
|
|
throw new RuntimeException("getBytes(cs) failed -> " + cs.name());
|
|
}
|
|
return baSC;
|
|
}
|
|
|
|
static String testNewString(Charset cs, byte[] ba) throws Throwable {
|
|
CharsetDecoder dec = cs.newDecoder()
|
|
.onMalformedInput(CodingErrorAction.REPLACE)
|
|
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
|
//new String(csn);
|
|
String strSC = new String(ba, cs.name());
|
|
String strNIO = dec.reset().decode(ByteBuffer.wrap(ba)).toString();
|
|
if(!strNIO.equals(strSC)) {
|
|
throw new RuntimeException("new String(csn) failed -> " + cs.name());
|
|
}
|
|
//new String(cs);
|
|
strSC = new String(ba, cs);
|
|
if (!strNIO.equals(strSC)) {
|
|
throw new RuntimeException("new String(cs)/bmp failed -> " + cs.name());
|
|
}
|
|
return strSC;
|
|
}
|
|
|
|
static void testSurrogates(Charset cs) throws Throwable {
|
|
//encode unmappable surrogates
|
|
CharsetEncoder enc = cs.newEncoder()
|
|
.onMalformedInput(CodingErrorAction.REPLACE)
|
|
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
|
if (enc instanceof sun.nio.cs.ArrayEncoder &&
|
|
cs.contains(Charset.forName("ASCII"))) {
|
|
if (cs.name().equals("UTF-8") || // utf8 handles surrogates
|
|
cs.name().equals("CESU-8")) // utf8 handles surrogates
|
|
return;
|
|
enc.replaceWith(new byte[] { (byte)'A'});
|
|
sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder)enc;
|
|
|
|
String str = "ab\uD800\uDC00\uD800\uDC00cd";
|
|
byte[] ba = new byte[str.length() - 2];
|
|
int n = cae.encode(str.toCharArray(), 0, str.length(), ba);
|
|
if (n != 6 || !"abAAcd".equals(new String(ba, cs.name())))
|
|
throw new RuntimeException("encode1(surrogates) failed -> "
|
|
+ cs.name());
|
|
|
|
ba = new byte[str.length()];
|
|
n = cae.encode(str.toCharArray(), 0, str.length(), ba);
|
|
if (n != 6 || !"abAAcd".equals(new String(ba, 0, n,
|
|
cs.name())))
|
|
throw new RuntimeException("encode2(surrogates) failed -> "
|
|
+ cs.name());
|
|
str = "ab\uD800B\uDC00Bcd";
|
|
ba = new byte[str.length()];
|
|
n = cae.encode(str.toCharArray(), 0, str.length(), ba);
|
|
if (n != 8 || !"abABABcd".equals(new String(ba, 0, n,
|
|
cs.name())))
|
|
throw new RuntimeException("encode3(surrogates) failed -> "
|
|
+ cs.name());
|
|
/* sun.nio.cs.ArrayDeEncoder works on the assumption that the
|
|
invoker (StringCoder) allocates enough output buf, utf8
|
|
and double-byte coder does not check the output buffer limit.
|
|
ba = new byte[str.length() - 1];
|
|
n = cae.encode(str.toCharArray(), 0, str.length(), ba);
|
|
if (n != 7 || !"abABABc".equals(new String(ba, 0, n, cs.name()))) {
|
|
throw new RuntimeException("encode4(surrogates) failed -> "
|
|
+ cs.name());
|
|
}
|
|
*/
|
|
}
|
|
|
|
//encode mappable surrogates for hkscs
|
|
if (cs.name().equals("Big5-HKSCS") || cs.name().equals("x-MS950-HKSCS")) {
|
|
String str = "ab\uD840\uDD0Ccd";
|
|
byte[] expected = new byte[] {(byte)'a', (byte)'b',
|
|
(byte)0x88, (byte)0x45, (byte)'c', (byte)'d' };
|
|
if (!Arrays.equals(str.getBytes(cs.name()), expected) ||
|
|
!Arrays.equals(str.getBytes(cs), expected)) {
|
|
throw new RuntimeException("encode(surrogates) failed -> "
|
|
+ cs.name());
|
|
}
|
|
}
|
|
}
|
|
}
|