From a253b4602147633a3d2e83775d1feef4f12a5272 Mon Sep 17 00:00:00 2001 From: Naoto Sato Date: Mon, 27 Feb 2023 16:35:59 +0000 Subject: [PATCH] 8301119: Support for GB18030-2022 Reviewed-by: alanb, coffeys, lancea --- make/data/charsetmapping/charsets | 12 +- make/data/charsetmapping/stdcs-aix | 1 - make/data/charsetmapping/stdcs-linux | 1 - make/data/charsetmapping/stdcs-solaris | 26 --- make/data/charsetmapping/stdcs-windows | 1 - .../build/tools/charsetmapping/SPI.java | 38 +++-- .../share/classes/sun/nio/cs/GB18030.java} | 161 +++++++++++++----- .../sun/nio/cs/StandardCharsets.java.template | 11 +- .../nio/cs/ext/ExtendedCharsets.java.template | 5 +- .../charset/Charset/RegisteredCharsets.java | 10 +- test/jdk/sun/nio/cs/TestGB18030.java | 21 ++- test/jdk/sun/nio/cs/mapping/CoderTest.java | 16 +- test/jdk/sun/nio/cs/mapping/GB18030.b2c | 81 ++++----- .../jdk/sun/nio/cs/mapping/GB18030_2000.b2c | 4 +- 14 files changed, 232 insertions(+), 156 deletions(-) delete mode 100644 make/data/charsetmapping/stdcs-solaris rename src/{jdk.charsets/share/classes/sun/nio/cs/ext/GB18030.java.template => java.base/share/classes/sun/nio/cs/GB18030.java} (99%) rename make/data/charsetmapping/GB18030.map => test/jdk/sun/nio/cs/mapping/GB18030_2000.b2c (99%) diff --git a/make/data/charsetmapping/charsets b/make/data/charsetmapping/charsets index 5932645bfbd..4ee272f0b7a 100644 --- a/make/data/charsetmapping/charsets +++ b/make/data/charsetmapping/charsets @@ -1,5 +1,5 @@ # -# Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -476,6 +476,11 @@ charset x-IBM874 IBM874 alias ibm-874 alias 874 +# alias for GB18030 is generated at runtime +charset GB18030 GB18030 + package sun.nio.cs + type source + ######################################################## # # charsets provided by ExtendedCharsets provider. @@ -563,11 +568,6 @@ charset GBK GBK # Simplified Chinese alias windows-936 alias CP936 -charset GB18030 GB18030 - package sun.nio.cs.ext - type template - alias gb18030-2000 - charset GB2312 EUC_CN package sun.nio.cs.ext type dbcs diff --git a/make/data/charsetmapping/stdcs-aix b/make/data/charsetmapping/stdcs-aix index f17468bbdbc..750308b3bfb 100644 --- a/make/data/charsetmapping/stdcs-aix +++ b/make/data/charsetmapping/stdcs-aix @@ -7,7 +7,6 @@ Big5_HKSCS EUC_CN EUC_KR GBK -GB18030 IBM856 IBM921 IBM922 diff --git a/make/data/charsetmapping/stdcs-linux b/make/data/charsetmapping/stdcs-linux index d7b3b4d9672..b9e80ca6455 100644 --- a/make/data/charsetmapping/stdcs-linux +++ b/make/data/charsetmapping/stdcs-linux @@ -11,7 +11,6 @@ EUC_JP_LINUX EUC_JP_Open EUC_TW GBK -GB18030 ISO_8859_11 ISO_8859_3 ISO_8859_6 diff --git a/make/data/charsetmapping/stdcs-solaris b/make/data/charsetmapping/stdcs-solaris deleted file mode 100644 index cf3c01f12f8..00000000000 --- a/make/data/charsetmapping/stdcs-solaris +++ /dev/null @@ -1,26 +0,0 @@ -# -# generate these charsets into sun.nio.cs -# -Big5 -Big5_Solaris -Big5_HKSCS # always together with Big5 -EUC_CN -EUC_KR -EUC_JP -EUC_JP_LINUX -EUC_JP_Open -EUC_TW -GBK -GB18030 -ISO_8859_11 -ISO_8859_3 -ISO_8859_6 -ISO_8859_8 -Johab -PCK -TIS_620 -JIS_X_0201 -JIS_X_0208 -JIS_X_0212 -JIS_X_0208_Solaris -JIS_X_0212_Solaris diff --git a/make/data/charsetmapping/stdcs-windows b/make/data/charsetmapping/stdcs-windows index 3185e9a966d..482b699bcd1 100644 --- a/make/data/charsetmapping/stdcs-windows +++ b/make/data/charsetmapping/stdcs-windows @@ -2,7 +2,6 @@ # generate these charsets into sun.nio.cs # GBK -GB18030 Johab MS1255 MS1256 diff --git a/make/jdk/src/classes/build/tools/charsetmapping/SPI.java b/make/jdk/src/classes/build/tools/charsetmapping/SPI.java index 3ed09fd938b..15e0b9fcf68 100644 --- a/make/jdk/src/classes/build/tools/charsetmapping/SPI.java +++ b/make/jdk/src/classes/build/tools/charsetmapping/SPI.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -50,18 +50,19 @@ public class SPI { out.println(line); } else { charsets.values() - .stream() - .filter(cs -> cs.pkgName.equals("sun.nio.cs.ext") && - !cs.isInternal && - (cs.os == null || cs.os.equals(os))) - .forEach( cs -> { - out.printf(" charset(\"%s\", \"%s\",%n", cs.csName, cs.clzName); - out.printf(" new String[] {%n"); - for (String alias : cs.aliases) { - out.printf(" \"%s\",%n", alias); - } - out.printf(" });%n%n"); - }); + .stream() + .filter(cs -> cs.pkgName.equals("sun.nio.cs.ext") && + !cs.isInternal && + (cs.os == null || cs.os.equals(os))) + .forEach( cs -> { + out.printf(" charset(\"%s\", \"%s\",%n", cs.csName, cs.clzName); + out.printf(" new String[] {%n"); + for (String alias : cs.aliases) { + out.printf(" \"%s\",%n", + alias); + } + out.printf(" });%n%n"); + }); } } } else if (type.startsWith("stdcs")) { // StandardCharsets.java @@ -93,8 +94,15 @@ public class SPI { .filter(cs -> cs.pkgName.equals("sun.nio.cs")) .forEach( cs -> { if (cs.aliases == null || cs.aliases.length == 0) { - out.printf(" static String[] aliases_%s() { return null; }%n%n", - cs.clzName); + if (cs.csName.equals("GB18030")) { + out.printf(" static String[] aliases_GB18030() { return new String[] {%n"); + out.printf(" GB18030.IS_2000 ? \"gb18030-2000\" : \"gb18030-2022\"%n"); + out.printf(" };%n"); + out.printf(" }%n%n"); + } else { + out.printf(" static String[] aliases_%s() { return null; }%n%n", + cs.clzName); + } } else { boolean methodEnd = true; // non-final for SJIS and MS932 to support sun.nio.cs.map diff --git a/src/jdk.charsets/share/classes/sun/nio/cs/ext/GB18030.java.template b/src/java.base/share/classes/sun/nio/cs/GB18030.java similarity index 99% rename from src/jdk.charsets/share/classes/sun/nio/cs/ext/GB18030.java.template rename to src/java.base/share/classes/sun/nio/cs/GB18030.java index 378e4857ba5..362553a5dbb 100644 --- a/src/jdk.charsets/share/classes/sun/nio/cs/ext/GB18030.java.template +++ b/src/java.base/share/classes/sun/nio/cs/GB18030.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,7 +26,7 @@ /* */ -package $PACKAGE$; +package sun.nio.cs; import java.nio.ByteBuffer; import java.nio.CharBuffer; @@ -34,7 +34,9 @@ import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; +import jdk.internal.misc.VM; import sun.nio.cs.Surrogate; +import sun.security.action.GetPropertyAction; public class GB18030 extends Charset @@ -43,8 +45,14 @@ public class GB18030 private static final int GB18030_DOUBLE_BYTE = 2; private static final int GB18030_FOUR_BYTE = 3; + // Assumes non-2000 standard if initialized during System.initPhase1(), + // as the system property is not ready to be read in that case. + static final boolean IS_2000 = + VM.initLevel() >= 1 && + "2000".equals(GetPropertyAction.privilegedGetProperty("jdk.charset.GB18030", "")); + public GB18030() { - super("GB18030", $ALIASES$); + super("GB18030", StandardCharsets.aliases_GB18030()); } public boolean contains(Charset cs) { @@ -1046,7 +1054,8 @@ public class GB18030 "\u1E26\u1E27\u1E28\u1E29\u1E2A\u1E2B\u1E2C\u1E2D"+ "\u1E2E\u1E2F\u1E30\u1E31\u1E32\u1E33\u1E34\u1E35"+ "\u1E36\u1E37\u1E38\u1E39\u1E3A\u1E3B\u1E3C\u1E3D"+ - "\u1E3E\u1E3F\u1E40\u1E41\u1E42\u1E43\u1E44\u1E45"+ + (IS_2000 ? "\u1E3E\u1E3F\u1E40\u1E41\u1E42\u1E43\u1E44\u1E45" : + "\u1E3E\uE7C7\u1E40\u1E41\u1E42\u1E43\u1E44\u1E45")+ "\u1E46\u1E47\u1E48\u1E49\u1E4A\u1E4B\u1E4C\u1E4D"+ "\u1E4E\u1E4F\u1E50\u1E51\u1E52\u1E53\u1E54\u1E55"+ "\u1E56\u1E57\u1E58\u1E59\u1E5A\u1E5B\u1E5C\u1E5D"+ @@ -2502,8 +2511,10 @@ public class GB18030 "\u4DF5\u4DF6\u4DF7\u4DF8\u4DF9\u4DFA\u4DFB\u4DFC"+ "\u4DFD\u4DFE\u4DFF\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"+ "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"+ - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"+ - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"+ + (IS_2000 ? "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"+ + "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" : + "\uFFFD\uE81E\uE826\uE82B\uE82C\uE832\uE843\uE854"+ + "\uE864\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD")+ "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"+ "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"+ "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"+ @@ -2766,8 +2777,10 @@ public class GB18030 "\uFDF4\uFDF5\uFDF6\uFDF7\uFDF8\uFDF9\uFDFA\uFDFB"+ "\uFDFC\uFDFD\uFDFE\uFDFF\uFE00\uFE01\uFE02\uFE03"+ "\uFE04\uFE05\uFE06\uFE07\uFE08\uFE09\uFE0A\uFE0B"+ - "\uFE0C\uFE0D\uFE0E\uFE0F\uFE10\uFE11\uFE12\uFE13"+ - "\uFE14\uFE15\uFE16\uFE17\uFE18\uFE19\uFE1A\uFE1B"+ + (IS_2000 ? "\uFE0C\uFE0D\uFE0E\uFE0F\uFE10\uFE11\uFE12\uFE13"+ + "\uFE14\uFE15\uFE16\uFE17\uFE18\uFE19\uFE1A\uFE1B" : + "\uFE0C\uFE0D\uFE0E\uFE0F\uE78D\uE78F\uE78E\uE790"+ + "\uE791\uE792\uE793\uE794\uE795\uE796\uFE1A\uFE1B")+ "\uFE1C\uFE1D\uFE1E\uFE1F\uFE20\uFE21\uFE22\uFE23"+ "\uFE24\uFE25\uFE26\uFE27\uFE28\uFE29\uFE2A\uFE2B"+ "\uFE2C\uFE2D\uFE2E\uFE2F\uFE32\uFE45\uFE46\uFE47"+ @@ -3773,10 +3786,14 @@ public class GB18030 "\uE78A\uE78B\uE78C\u03B1\u03B2\u03B3\u03B4\u03B5"+ "\u03B6\u03B7\u03B8\u03B9\u03BA\u03BB\u03BC\u03BD"+ "\u03BE\u03BF\u03C0\u03C1\u03C3\u03C4\u03C5\u03C6"+ - "\u03C7\u03C8\u03C9\uE78D\uE78E\uE78F\uE790\uE791"+ - "\uE792\uE793\uFE35\uFE36\uFE39\uFE3A\uFE3F\uFE40"+ - "\uFE3D\uFE3E\uFE41\uFE42\uFE43\uFE44\uE794\uE795"+ - "\uFE3B\uFE3C\uFE37\uFE38\uFE31\uE796\uFE33\uFE34"+ + (IS_2000 ? "\u03C7\u03C8\u03C9\uE78D\uE78E\uE78F\uE790\uE791"+ + "\uE792\uE793\uFE35\uFE36\uFE39\uFE3A\uFE3F\uFE40"+ + "\uFE3D\uFE3E\uFE41\uFE42\uFE43\uFE44\uE794\uE795"+ + "\uFE3B\uFE3C\uFE37\uFE38\uFE31\uE796\uFE33\uFE34" : + "\u03C7\u03C8\u03C9\uFE10\uFE12\uFE11\uFE13\uFE14"+ + "\uFE15\uFE16\uFE35\uFE36\uFE39\uFE3A\uFE3F\uFE40"+ + "\uFE3D\uFE3E\uFE41\uFE42\uFE43\uFE44\uFE17\uFE18"+ + "\uFE3B\uFE3C\uFE37\uFE38\uFE31\uFE19\uFE33\uFE34")+ "\uE797\uE798\uE799\uE79A\uE79B\uE79C\uE79D\uE79E"+ "\uE79F\uE706\uE707\uE708\uE709\uE70A\uE70B\uE70C"+ "\uE70D\uE70E\uE70F\uE710\uE711\uE712\uE713\uE714"+ @@ -3817,7 +3834,8 @@ public class GB18030 "\uE7C6\u0101\u00E1\u01CE\u00E0\u0113\u00E9\u011B"+ "\u00E8\u012B\u00ED\u01D0\u00EC\u014D\u00F3\u01D2"+ "\u00F2\u016B\u00FA\u01D4\u00F9\u01D6\u01D8\u01DA"+ - "\u01DC\u00FC\u00EA\u0251\uE7C7\u0144\u0148\u01F9"+ + (IS_2000 ? "\u01DC\u00FC\u00EA\u0251\uE7C7\u0144\u0148\u01F9" : + "\u01DC\u00FC\u00EA\u0251\u1E3F\u0144\u0148\u01F9")+ "\u0261\uE7C9\uE7CA\uE7CB\uE7CC\u3105\u3106\u3107"+ "\u3108\u3109\u310A\u310B\u310C\u310D\u310E\u310F"+ "\u3110\u3111\u3112\u3113\u3114\u3115\u3116\u3117"+ @@ -5868,16 +5886,22 @@ public class GB18030 "\uE466\uE467\uFA0C\uFA0D\uFA0E\uFA0F\uFA11\uFA13"+ "\uFA14\uFA18\uFA1F\uFA20\uFA21\uFA23\uFA24\uFA27"+ "\uFA28\uFA29\u2E81\uE816\uE817\uE818\u2E84\u3473"+ - "\u3447\u2E88\u2E8B\uE81E\u359E\u361A\u360E\u2E8C"+ - "\u2E97\u396E\u3918\uE826\u39CF\u39DF\u3A73\u39D0"+ - "\uE82B\uE82C\u3B4E\u3C6E\u3CE0\u2EA7\uE831\uE832"+ + (IS_2000 ? "\u3447\u2E88\u2E8B\uE81E\u359E\u361A\u360E\u2E8C"+ + "\u2E97\u396E\u3918\uE826\u39CF\u39DF\u3A73\u39D0"+ + "\uE82B\uE82C\u3B4E\u3C6E\u3CE0\u2EA7\uE831\uE832" : + "\u3447\u2E88\u2E8B\u9FB4\u359E\u361A\u360E\u2E8C"+ + "\u2E97\u396E\u3918\u9FB5\u39CF\u39DF\u3A73\u39D0"+ + "\u9FB6\u9FB7\u3B4E\u3C6E\u3CE0\u2EA7\uE831\u9FB8")+ "\u2EAA\u4056\u415F\u2EAE\u4337\u2EB3\u2EB6\u2EB7"+ "\uE83B\u43B1\u43AC\u2EBB\u43DD\u44D6\u4661\u464C"+ - "\uE843\uFFFD\u4723\u4729\u477C\u478D\u2ECA\u4947"+ + (IS_2000 ? "\uE843\uFFFD\u4723\u4729\u477C\u478D\u2ECA\u4947" : + "\u9FB9\uFFFD\u4723\u4729\u477C\u478D\u2ECA\u4947")+ "\u497A\u497D\u4982\u4983\u4985\u4986\u499F\u499B"+ - "\u49B7\u49B6\uE854\uE855\u4CA3\u4C9F\u4CA0\u4CA1"+ + (IS_2000 ? "\u49B7\u49B6\uE854\uE855\u4CA3\u4C9F\u4CA0\u4CA1" : + "\u49B7\u49B6\u9FBA\uE855\u4CA3\u4C9F\u4CA0\u4CA1")+ "\u4C77\u4CA2\u4D13\u4D14\u4D15\u4D16\u4D17\u4D18"+ - "\u4D19\u4DAE\uE864\uE468\uE469\uE46A\uE46B\uE46C"+ + (IS_2000 ? "\u4D19\u4DAE\uE864\uE468\uE469\uE46A\uE46B\uE46C" : + "\u4D19\u4DAE\u9FBB\uE468\uE469\uE46A\uE46B\uE46C")+ "\uE46D\uE46E\uE46F\uE470\uE471\uE472\uE473\uE474"+ "\uE475\uE476\uE477\uE478\uE479\uE47A\uE47B\uE47C"+ "\uE47D\uE47E\uE47F\uE480\uE481\uE482\uE483\uE484"+ @@ -6895,7 +6919,8 @@ public class GB18030 "\u3D02\u3D03\u3D04\u3D05\u3D06\u3D07\u3D08\u3D09"+ "\u3D0A\u3D0B\u3D0C\u3D0D\u3D0E\u3D0F\u3D10\u3D11"+ "\u3D12\u3D13\u3D14\u3D15\u3D16\u3D17\u3D18\u3D19"+ - "\u3D1A\u3D1B\u3D1C\u3D1D\u3D1E\u3D1F\u3D20\u3D21"+ + (IS_2000 ? "\u3D1A\u3D1B\u3D1C\u3D1D\u3D1E\u3D1F\u3D20\u3D21" : + "\u3D1A\u3D1B\u3D1C\u3D1D\u3D1E\u3D1F\u3D20\uA8BC")+ "\u3D22\u3D23\u3D24\u3D25\u3D26\u3D27\u3D28\u3D29"+ "\u3D2A\u3D2B\u3D2C\u3D2D\u3D2E\u3D2F\u3D30\u3D31"+ "\u3D32\u3D33\u3D34\u3D35\u3D36\u3D37\u3D38\u3D39"+ @@ -11054,8 +11079,10 @@ public class GB18030 "\uFD93\uC1FA\uB9A8\uEDE8\uFD94\uFD95\uFD96\uB9EA"+ "\uD9DF\uFD97\uFD98\uFD99\uFD9A\uFD9B\u6A63\u6A64"+ "\u6A65\u6A66\u6A67\u6A68\u6A69\u6A6A\u6A6B\u6A6C"+ - "\u6A6D\u6A6E\u6A6F\u6A70\u6A71\u6A72\u6A73\u6A74"+ - "\u6A75\u6A76\u6A77\u6A78\u6A79\u6A7A\u6A7B\u6A7C"+ + (IS_2000 ? "\u6A6D\u6A6E\u6A6F\u6A70\u6A71\u6A72\u6A73\u6A74"+ + "\u6A75\u6A76\u6A77\u6A78\u6A79\u6A7A\u6A7B\u6A7C" : + "\u6A6D\u6A6E\u6A6F\u6A70\uFE59\uFE61\uFE66\uFE67"+ + "\uFE6D\uFE7E\uFE90\uFEA0\u6A79\u6A7A\u6A7B\u6A7C")+ "\u6A7D\u6A7E\u6A7F\u6A80\u6A81\u6A82\u6A83\u6A84"+ "\u6A85\u6A86\u6A87\u6A88\u6A89\u6A8A\u6A8B\u6A8C"+ "\u6A8D\u6A8E\u6A8F\u6A90\u6A91\u6A92\u6A93\u6A94"+ @@ -11467,14 +11494,17 @@ public class GB18030 "\uA2FD\uA2FE\uA4F4\uA4F5\uA4F6\uA4F7\uA4F8\uA4F9"+ "\uA4FA\uA4FB\uA4FC\uA4FD\uA4FE\uA5F7\uA5F8\uA5F9"+ "\uA5FA\uA5FB\uA5FC\uA5FD\uA5FE\uA6B9\uA6BA\uA6BB"+ - "\uA6BC\uA6BD\uA6BE\uA6BF\uA6C0\uA6D9\uA6DA\uA6DB"+ - "\uA6DC\uA6DD\uA6DE\uA6DF\uA6EC\uA6ED\uA6F3\uA6F6"+ + (IS_2000 ? "\uA6BC\uA6BD\uA6BE\uA6BF\uA6C0\uA6D9\uA6DA\uA6DB"+ + "\uA6DC\uA6DD\uA6DE\uA6DF\uA6EC\uA6ED\uA6F3\uA6F6" : + "\uA6BC\uA6BD\uA6BE\uA6BF\uA6C0\u35E7\u35E9\u35E8"+ + "\u35EA\u35EB\u35EC\u35ED\u35EE\u35EF\u35F0\uA6F6")+ "\uA6F7\uA6F8\uA6F9\uA6FA\uA6FB\uA6FC\uA6FD\uA6FE"+ "\uA7C2\uA7C3\uA7C4\uA7C5\uA7C6\uA7C7\uA7C8\uA7C9"+ "\uA7CA\uA7CB\uA7CC\uA7CD\uA7CE\uA7CF\uA7D0\uA7F2"+ "\uA7F3\uA7F4\uA7F5\uA7F6\uA7F7\uA7F8\uA7F9\uA7FA"+ "\uA7FB\uA7FC\uA7FD\uA7FE\uA896\uA897\uA898\uA899"+ - "\uA89A\uA89B\uA89C\uA89D\uA89E\uA89F\uA8A0\uA8BC"+ + (IS_2000 ? "\uA89A\uA89B\uA89C\uA89D\uA89E\uA89F\uA8A0\uA8BC" : + "\uA89A\uA89B\uA89C\uA89D\uA89E\uA89F\uA8A0\u3D21")+ "\u2001\uA8C1\uA8C2\uA8C3\uA8C4\uA8EA\uA8EB\uA8EC"+ "\uA8ED\uA8EE\uA8EF\uA8F0\uA8F1\uA8F2\uA8F3\uA8F4"+ "\uA8F5\uA8F6\uA8F7\uA8F8\uA8F9\uA8FA\uA8FB\uA8FC"+ @@ -11485,16 +11515,23 @@ public class GB18030 "\uA9A3\uA9F0\uA9F1\uA9F2\uA9F3\uA9F4\uA9F5\uA9F6"+ "\uA9F7\uA9F8\uA9F9\uA9FA\uA9FB\uA9FC\uA9FD\uA9FE"+ "\uD7FA\uD7FB\uD7FC\uD7FD\uD7FE\u200F\uFE51\uFE52"+ - "\uFE53\u2010\u2011\u2012\u2013\u2014\uFE59\u2015"+ - "\u2016\u2017\u2018\u2019\u201A\u201B\uFE61\u201C"+ - "\u201D\u201E\u201F\uFE66\uFE67\u2020\u2021\u2022"+ - "\u2023\uFE6C\uFE6D\u2024\u2025\u2026\u2027\u2028"+ + (IS_2000 ? "\uFE53\u2010\u2011\u2012\u2013\u2014\uFE59\u2015"+ + "\u2016\u2017\u2018\u2019\u201A\u201B\uFE61\u201C"+ + "\u201D\u201E\u201F\uFE66\uFE67\u2020\u2021\u2022"+ + "\u2023\uFE6C\uFE6D\u2024\u2025\u2026\u2027\u2028" : + "\uFE53\u2010\u2011\u2012\u2013\u2014\u6A71\u2015"+ + "\u2016\u2017\u2018\u2019\u201A\u201B\u6A72\u201C"+ + "\u201D\u201E\u201F\u6A73\u6A74\u2020\u2021\u2022"+ + "\u2023\uFE6C\u6A75\u2024\u2025\u2026\u2027\u2028")+ "\u2029\u202A\u202B\uFE76\u202C\u202D\u202E\u202F"+ - "\u2030\u2031\u2032\uFE7E\u2033\u2034\u2035\u2036"+ + (IS_2000 ? "\u2030\u2031\u2032\uFE7E\u2033\u2034\u2035\u2036" : + "\u2030\u2031\u2032\u6A76\u2033\u2034\u2035\u2036")+ "\u2037\u2038\u2039\u203A\u203B\u203C\u203D\u203E"+ - "\u203F\u2040\u2041\u2042\uFE90\uFE91\u2043\u2044"+ + (IS_2000 ? "\u203F\u2040\u2041\u2042\uFE90\uFE91\u2043\u2044" : + "\u203F\u2040\u2041\u2042\u6A77\uFE91\u2043\u2044")+ "\u2045\u2046\u2047\u2048\u2049\u204A\u204B\u204C"+ - "\u204D\u204E\u204F\u2050\uFEA0\u2051\u2052\u2053"+ + (IS_2000 ? "\u204D\u204E\u204F\u2050\uFEA0\u2051\u2052\u2053" : + "\u204D\u204E\u204F\u2050\u6A78\u2051\u2052\u2053")+ "\u2054\u2055\u2056\u2057\u2058\u2059\u205A\u205B"+ "\u205C\u205D\u205E\u205F\u2060\u2061\u2062\u2063"+ "\u2064\u2065\u2066\u2067\u2068\u2069\u206A\u206B"+ @@ -12192,8 +12229,10 @@ public class GB18030 "\u24E3\u24E4\u24E5\u24E6\u24E7\u24E8\u24E9\u24EA"+ "\u24EB\u24EC\u24ED\u24EE\u24EF\u24F0\u24F1\u24F2"+ "\u24F3\u24F4\u24F5\u24F6\u24F7\u24F8\u24F9\u24FA"+ - "\u24FB\u24FC\u24FD\u24FE\u24FF\u2500\u2501\u2502"+ - "\u2503\u2504\u2505\u2506\u2507\u2508\u2509\u250A"+ + (IS_2000 ? "\u24FB\u24FC\u24FD\u24FE\u24FF\u2500\u2501\u2502"+ + "\u2503\u2504\u2505\u2506\u2507\u2508\u2509\u250A" : + "\uA6D9\uA6DB\uA6DA\uA6DC\uA6DD\uA6DE\uA6DF\uA6EC"+ + "\uA6ED\uA6F3\u2505\u2506\u2507\u2508\u2509\u250A")+ "\u250B\u250C\u250D\u250E\u250F\u2510\u2511\u2512"+ "\u2513\u2514\u2515\u2516\u2517\u2518\u2519\u251A"+ "\uA955\uA6F2\u251B\uA6F4\uA6F5\uA6E0\uA6E1\uA6F0"+ @@ -12406,8 +12445,13 @@ public class GB18030 // BMP Ranges if (offset <= 0x4A62) da[dp++] = getChar(offset); - else if (offset > 0x4A62 && offset <= 0x82BC) - da[dp++] = (char)(offset + 0x5543); + else if (offset > 0x4A62 && offset <= 0x82BC) { + if (offset >= 0x4A71 && offset <= 0x4A78 && !IS_2000) { + da[dp++] = getChar(offset); + } else { + da[dp++] = (char)(offset + 0x5543); + } + } else if (offset >= 0x82BD && offset <= 0x830D) da[dp++] = getChar(offset); else if (offset >= 0x830D && offset <= 0x93A8) @@ -12509,8 +12553,13 @@ public class GB18030 // BMP Ranges if (offset <= 0x4A62) dst.put(getChar(offset)); - else if (offset > 0x4A62 && offset <= 0x82BC) - dst.put((char)(offset + 0x5543)); + else if (offset > 0x4A62 && offset <= 0x82BC) { + if (offset >= 0x4A71 && offset <= 0x4A78 && !IS_2000) { + dst.put(getChar(offset)); + } else { + dst.put((char)(offset + 0x5543)); + } + } else if (offset >= 0x82BD && offset <= 0x830D) dst.put(getChar(offset)); else if (offset >= 0x830D && offset <= 0x93A8) @@ -12620,15 +12669,24 @@ public class GB18030 condensedKey = (hiByte - 0x20) * 256 + loByte; - if (c >= 0xE000 && c < 0xF900) + if (c >= 0xE000 && c < 0xF900) { + if (IS_2000) { condensedKey += 0x82BD; + } else { + condensedKey = switch (c) { + case 0xE7C7, 0xE81E, 0xE826, 0xE82B, 0xE82C, 0xE832, + 0xE843, 0xE854, 0xE864 -> condensedKey; + default -> condensedKey + 0x82BD; + }; + } + } else if (c >= 0xF900) - condensedKey += 0x93A9; + condensedKey += 0x93A9; if (hiByte > 0x80) - currentState = GB18030_DOUBLE_BYTE; + currentState = GB18030_DOUBLE_BYTE; else - currentState = GB18030_FOUR_BYTE; + currentState = GB18030_FOUR_BYTE; } else if (c >= 0xA4C7 && c <= 0xD7FF) { condensedKey = c - 0x5543; @@ -12671,7 +12729,7 @@ public class GB18030 } sp += inputSize; } - return CoderResult.UNDERFLOW; + return CoderResult.UNDERFLOW; } finally { src.position(sp - src.arrayOffset()); dst.position(dp - dst.arrayOffset()); @@ -12711,15 +12769,24 @@ public class GB18030 condensedKey = (hiByte - 0x20) * 256 + loByte; - if (c >= 0xE000 && c < 0xF900) + if (c >= 0xE000 && c < 0xF900) { + if (IS_2000) { condensedKey += 0x82BD; + } else { + condensedKey = switch (c) { + case 0xE7C7, 0xE81E, 0xE826, 0xE82B, 0xE82C, 0xE832, + 0xE843, 0xE854, 0xE864 -> condensedKey; + default -> condensedKey + 0x82BD; + }; + } + } else if (c >= 0xF900) - condensedKey += 0x93A9; + condensedKey += 0x93A9; if (hiByte > 0x80) - currentState = GB18030_DOUBLE_BYTE; + currentState = GB18030_DOUBLE_BYTE; else - currentState = GB18030_FOUR_BYTE; + currentState = GB18030_FOUR_BYTE; } else if (c >= 0xA4C7 && c <= 0xD7FF) { condensedKey = c - 0x5543; diff --git a/src/java.base/share/classes/sun/nio/cs/StandardCharsets.java.template b/src/java.base/share/classes/sun/nio/cs/StandardCharsets.java.template index b5590880e2d..339e01de39e 100644 --- a/src/java.base/share/classes/sun/nio/cs/StandardCharsets.java.template +++ b/src/java.base/share/classes/sun/nio/cs/StandardCharsets.java.template @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved. * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -58,8 +58,13 @@ public class StandardCharsets extends CharsetProvider { } private String canonicalize(String csn) { - String acn = aliasMap().get(csn); - return (acn != null) ? acn : csn; + if (csn.startsWith("gb18030-")) { + return csn.equals("gb18030-2022") && !GB18030.IS_2000 || + csn.equals("gb18030-2000") && GB18030.IS_2000 ? "gb18030" : csn; + } else { + String acn = aliasMap().get(csn); + return (acn != null) ? acn : csn; + } } private Map aliasMap() { diff --git a/src/jdk.charsets/share/classes/sun/nio/cs/ext/ExtendedCharsets.java.template b/src/jdk.charsets/share/classes/sun/nio/cs/ext/ExtendedCharsets.java.template index 536f3f4891d..81dd21bb20b 100644 --- a/src/jdk.charsets/share/classes/sun/nio/cs/ext/ExtendedCharsets.java.template +++ b/src/jdk.charsets/share/classes/sun/nio/cs/ext/ExtendedCharsets.java.template @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved. * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -29,9 +29,6 @@ package sun.nio.cs.ext; -import java.nio.charset.Charset; -import java.nio.charset.spi.CharsetProvider; - /** * Provider for extended charsets. */ diff --git a/test/jdk/java/nio/charset/Charset/RegisteredCharsets.java b/test/jdk/java/nio/charset/Charset/RegisteredCharsets.java index e62ae05a0f7..495d2a32bcd 100644 --- a/test/jdk/java/nio/charset/Charset/RegisteredCharsets.java +++ b/test/jdk/java/nio/charset/Charset/RegisteredCharsets.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,9 +23,11 @@ /* @test * @bug 4473201 4696726 4652234 4482298 4784385 4966197 4267354 5015668 - 6911753 8071447 8186751 8242541 8260265 + 6911753 8071447 8186751 8242541 8260265 8301119 * @summary Check that registered charsets are actually registered * @modules jdk.charsets + * @run main RegisteredCharsets + * @run main/othervm -Djdk.charset.GB18030=2000 RegisteredCharsets */ import java.nio.charset.*; @@ -251,8 +253,12 @@ public class RegisteredCharsets { }); aliasCheck("GB18030", + "2000".equals(System.getProperty("jdk.charset.GB18030")) ? new String[] { "gb18030-2000" + } : + new String[] { + "gb18030-2022" }); aliasCheck("ISO-2022-KR", new String[] {"csISO2022KR"}); diff --git a/test/jdk/sun/nio/cs/TestGB18030.java b/test/jdk/sun/nio/cs/TestGB18030.java index d7183967058..147da36d3c6 100644 --- a/test/jdk/sun/nio/cs/TestGB18030.java +++ b/test/jdk/sun/nio/cs/TestGB18030.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -22,20 +22,22 @@ */ /* @test - * @bug 8211382 + * @bug 8211382 8301119 * @summary Check GB18030 * @modules jdk.charsets + * @run main TestGB18030 + * @run main/othervm -Djdk.charset.GB18030=2000 TestGB18030 */ -import java.io.*; import java.nio.*; import java.nio.charset.*; +import java.util.Set; public class TestGB18030 { + private static final Charset cs = Charset.forName("GB18030"); public static void gb18030_1(boolean useDirect) throws Exception { for(char ch : new char[]{'\uFFFE', '\uFFFF'}) { char[] ca = new char[]{ch}; - Charset cs = Charset.forName("GB18030"); CharsetEncoder ce = cs.newEncoder(); CharsetDecoder cd = cs.newDecoder(); CharBuffer cb = CharBuffer.wrap(ca); @@ -75,8 +77,19 @@ public class TestGB18030 { } } } + + static void checkAlias() { + var IS_2000 = "2000".equals(System.getProperty("jdk.charset.GB18030")); + var expected = IS_2000 ? Set.of("gb18030-2000") : Set.of("gb18030-2022"); + var found = cs.aliases(); + System.out.printf("checkAlias(): IS_2000: %s, expected: %s, found: %s\n", IS_2000, expected, found); + if (!cs.aliases().equals(expected)) { + throw new RuntimeException("Result mismatch"); + } + } public static void main(String args[]) throws Exception { gb18030_1(false); gb18030_1(true); + checkAlias(); } } diff --git a/test/jdk/sun/nio/cs/mapping/CoderTest.java b/test/jdk/sun/nio/cs/mapping/CoderTest.java index d1ec009b30b..05913a40535 100644 --- a/test/jdk/sun/nio/cs/mapping/CoderTest.java +++ b/test/jdk/sun/nio/cs/mapping/CoderTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -22,9 +22,11 @@ */ /* @test - @bug 4691554 6221056 6380723 6404504 6419565 6529796 + @bug 4691554 6221056 6380723 6404504 6419565 6529796 8301119 @summary Test the supported New I/O coders @modules jdk.charsets + @run main CoderTest + @run main/othervm -Djdk.charset.GB18030=2000 CoderTest */ import java.io.*; @@ -56,6 +58,9 @@ public class CoderTest { ".c2b-irreversible", ".b2c-irreversible" }; + private static final boolean IS_2000 = + "2000".equals(System.getProperty("jdk.charset.GB18030")); + // Utilities private static ByteBuffer expand(ByteBuffer bb) { @@ -466,7 +471,12 @@ public class CoderTest { // Outer loop runs three passes: roundtrip, irreversible encodings, // and then irreversible decodings for (int mode = ROUNDTRIP; mode <= DECODE; mode++) { - File f = testFile(encoding, mode); + var fileName = encoding; + if (fileName.equals("GB18030") && IS_2000) { + // tweak the map file name + fileName = "GB18030_2000"; + } + File f = testFile(fileName, mode); if (f == null) continue; loadTests(f); diff --git a/test/jdk/sun/nio/cs/mapping/GB18030.b2c b/test/jdk/sun/nio/cs/mapping/GB18030.b2c index 7bf135cbc35..955d1306aed 100644 --- a/test/jdk/sun/nio/cs/mapping/GB18030.b2c +++ b/test/jdk/sun/nio/cs/mapping/GB18030.b2c @@ -1,3 +1,4 @@ +# based on GB18030-2022 mapping 00 0000 01 0001 02 0002 @@ -7741,7 +7742,7 @@ A7D7 0451 8135F434 1E3C 8135F435 1E3D 8135F436 1E3E -8135F437 1E3F +A8BC 1E3F 8135F438 1E40 8135F439 1E41 8135F530 1E42 @@ -40882,14 +40883,14 @@ FD9B 9FA5 82359034 9FB1 82359035 9FB2 82359036 9FB3 -82359037 9FB4 -82359038 9FB5 -82359039 9FB6 -82359130 9FB7 -82359131 9FB8 -82359132 9FB9 -82359133 9FBA -82359134 9FBB +82359037 E81E +82359038 E826 +82359039 E82B +82359130 E82C +82359131 E832 +82359132 E843 +82359133 E854 +82359134 E864 82359135 9FBC 82359136 9FBD 82359137 9FBE @@ -57227,16 +57228,16 @@ A6BD E789 A6BE E78A A6BF E78B A6C0 E78C -A6D9 E78D -A6DA E78E -A6DB E78F -A6DC E790 -A6DD E791 -A6DE E792 -A6DF E793 -A6EC E794 -A6ED E795 -A6F3 E796 +A6D9 FE10 +A6DA FE12 +A6DB FE11 +A6DC FE13 +A6DD FE14 +A6DE FE15 +A6DF FE16 +A6EC FE17 +A6ED FE18 +A6F3 FE19 A6F6 E797 A6F7 E798 A6F8 E799 @@ -57285,7 +57286,7 @@ A89D E7C3 A89E E7C4 A89F E7C5 A8A0 E7C6 -A8BC E7C7 +8135F437 E7C7 8336C830 E7C8 A8C1 E7C9 A8C2 E7CA @@ -57372,7 +57373,7 @@ FE53 E818 8336C937 E81B 8336C938 E81C 8336C939 E81D -FE59 E81E +FE59 9FB4 8336CA30 E81F 8336CA31 E820 8336CA32 E821 @@ -57380,19 +57381,19 @@ FE59 E81E 8336CA34 E823 8336CA35 E824 8336CA36 E825 -FE61 E826 +FE61 9FB5 8336CA37 E827 8336CA38 E828 8336CA39 E829 8336CB30 E82A -FE66 E82B -FE67 E82C +FE66 9FB6 +FE67 9FB7 8336CB31 E82D 8336CB32 E82E 8336CB33 E82F 8336CB34 E830 FE6C E831 -FE6D E832 +FE6D 9FB8 8336CB35 E833 8336CB36 E834 8336CB37 E835 @@ -57409,7 +57410,7 @@ FE76 E83B 8336CC37 E840 8336CC38 E841 8336CC39 E842 -FE7E E843 +FE7E 9FB9 8336CD30 E844 8336CD31 E845 8336CD32 E846 @@ -57426,7 +57427,7 @@ FE7E E843 8336CE33 E851 8336CE34 E852 8336CE35 E853 -FE90 E854 +FE90 9FBA FE91 E855 8336CE36 E856 8336CE37 E857 @@ -57442,7 +57443,7 @@ FE91 E855 8336CF37 E861 8336CF38 E862 8336CF39 E863 -FEA0 E864 +FEA0 9FBB 8336D030 E865 8336D031 E866 8336D032 E867 @@ -62990,16 +62991,16 @@ FE4F FA29 84318233 FE0D 84318234 FE0E 84318235 FE0F -84318236 FE10 -84318237 FE11 -84318238 FE12 -84318239 FE13 -84318330 FE14 -84318331 FE15 -84318332 FE16 -84318333 FE17 -84318334 FE18 -84318335 FE19 +84318236 E78D +84318237 E78F +84318238 E78E +84318239 E790 +84318330 E791 +84318331 E792 +84318332 E793 +84318333 E794 +84318334 E795 +84318335 E796 84318336 FE1A 84318337 FE1B 84318338 FE1C @@ -63484,5 +63485,5 @@ A3A4 FFE5 8431A435 FFFB 8431A436 FFFC 8431A437 FFFD -#8431A438 FFFE -#8431A439 FFFF +8431A438 FFFE +8431A439 FFFF diff --git a/make/data/charsetmapping/GB18030.map b/test/jdk/sun/nio/cs/mapping/GB18030_2000.b2c similarity index 99% rename from make/data/charsetmapping/GB18030.map rename to test/jdk/sun/nio/cs/mapping/GB18030_2000.b2c index 2303efc9a6b..8eeda1c6dbd 100644 --- a/make/data/charsetmapping/GB18030.map +++ b/test/jdk/sun/nio/cs/mapping/GB18030_2000.b2c @@ -1,6 +1,4 @@ -# GB18030.java is NOT generated from this mapping right now. This -# map is here for testing only. -# +# based on GB18030-2000 mapping 00 0000 01 0001 02 0002