4116222: Errors in Arabic code-conversion tables, part II

Updated the IBM420 datatable

Reviewed-by: alanb
This commit is contained in:
Xueming Shen 2009-08-04 12:44:03 -07:00
parent 2f8af77fdb
commit 6dfe09a9a6
4 changed files with 543 additions and 351 deletions

View File

@ -1 +1,187 @@
#
# The diff of 01A434B0.TXMAP110 and 34B001A4.RXMAP110
#
# Added: 0x15 U+0085
#
0x15 U+0085
0x42 U+FE7C
0x46 U+FE80
0x47 U+FE81
0x49 U+FE83
0x4B U+066C
0x4B U+FF0E
0x4C U+FF1C
0x4D U+FF08
0x4E U+FF0B
0x4F U+FF5C
0x50 U+FF06
0x52 U+FE85
0x52 U+FE86
0x55 U+FE89
0x55 U+FE8A
0x55 U+FE8B
0x55 U+FE8C
0x56 U+0625
0x56 U+FE87
0x56 U+FE8D
0x57 U+FE88
0x58 U+FE8F
0x58 U+FE90
0x59 U+FE92
0x5A U+FF01
0x5B U+FF04
0x5C U+066D
0x5C U+FF0A
0x5D U+FF09
0x5E U+FF1B
0x60 U+FF0D
0x61 U+FF0F
0x62 U+FE93
0x62 U+FE94
0x63 U+FE95
0x63 U+FE96
0x64 U+FE98
0x65 U+FE99
0x65 U+FE9A
0x66 U+FE9C
0x67 U+FE9D
0x67 U+FE9E
0x68 U+FEA0
0x69 U+FEA1
0x69 U+FEA2
0x6B U+066B
0x6B U+FF0C
0x6C U+066A
0x6C U+FF05
0x6D U+FF3F
0x6E U+FF1E
0x6F U+FF1F
0x70 U+FEA4
0x71 U+FEA5
0x71 U+FEA6
0x72 U+FEA8
0x73 U+FEA9
0x73 U+FEAA
0x74 U+FEAB
0x74 U+FEAC
0x75 U+FEAD
0x75 U+FEAE
0x76 U+FEAF
0x76 U+FEB0
0x77 U+FEB1
0x77 U+FEB2
0x78 U+FEB4
0x7A U+FF1A
0x7B U+FF03
0x7C U+FF20
0x7D U+FF07
0x7E U+FF1D
0x7F U+FF02
0x80 U+FEB5
0x80 U+FEB6
0x81 U+FF41
0x82 U+FF42
0x83 U+FF43
0x84 U+FF44
0x85 U+FF45
0x86 U+FF46
0x87 U+FF47
0x88 U+FF48
0x89 U+FF49
0x8A U+FEB8
0x8B U+FEB9
0x8B U+FEBA
0x8C U+FEBC
0x8D U+FEBD
0x8D U+FEBE
0x8E U+FEC0
0x8F U+FEC1
0x8F U+FEC2
0x8F U+FEC3
0x8F U+FEC4
0x90 U+FEC5
0x90 U+FEC6
0x90 U+FEC7
0x90 U+FEC8
0x91 U+FF4A
0x92 U+FF4B
0x93 U+FF4C
0x94 U+FF4D
0x95 U+FF4E
0x96 U+FF4F
0x97 U+FF50
0x98 U+FF51
0x99 U+FF52
0x9A U+FEC9
0x9E U+FECD
0xA2 U+FF53
0xA3 U+FF54
0xA4 U+FF55
0xA5 U+FF56
0xA6 U+FF57
0xA7 U+FF58
0xA8 U+FF59
0xA9 U+FF5A
0xAB U+FED1
0xAB U+FED2
0xAC U+FED4
0xAD U+FED5
0xAD U+FED6
0xAE U+FED8
0xAF U+FED9
0xAF U+FEDA
0xB0 U+FEDC
0xB1 U+FEDD
0xB1 U+FEDE
0xB8 U+FEF9
0xB9 U+FEFA
0xBA U+FEE0
0xBB U+FEE1
0xBB U+FEE2
0xBC U+FEE4
0xBD U+FEE5
0xBD U+FEE6
0xBE U+FEE8
0xBF U+FEE9
0xBF U+FEEA
0xC1 U+FF21
0xC2 U+FF22
0xC3 U+FF23
0xC4 U+FF24
0xC5 U+FF25
0xC6 U+FF26
0xC7 U+FF27
0xC8 U+FF28
0xC9 U+FF29
0xCF U+FEED
0xCF U+FEEE
0xD1 U+FF2A
0xD2 U+FF2B
0xD3 U+FF2C
0xD4 U+FF2D
0xD5 U+FF2E
0xD6 U+FF2F
0xD7 U+FF30
0xD8 U+FF31
0xD9 U+FF32
0xDA U+FEEF
0xDC U+FEF1
0xDE U+FEF4
0xE2 U+FF33
0xE3 U+FF34
0xE4 U+FF35
0xE5 U+FF36
0xE6 U+FF37
0xE7 U+FF38
0xE8 U+FF39
0xE9 U+FF3A
0xF0 U+FF10
0xF1 U+FF11
0xF2 U+FF12
0xF3 U+FF13
0xF4 U+FF14
0xF5 U+FF15
0xF6 U+FF16
0xF7 U+FF17
0xF8 U+FF18
0xF9 U+FF19

View File

@ -1,52 +1,57 @@
#Generated from IBM420.java
#
# Frm IBMCDC datatable 01A434B0.TXMAP110
#
# Changed
# 0x15 U+0085 -> 0x15 U+000a
#
0x00 U+0000
0x01 U+0001
0x02 U+0002
0x03 U+0003
0x04 U+009c
0x04 U+009C
0x05 U+0009
0x06 U+0086
0x07 U+007f
0x07 U+007F
0x08 U+0097
0x09 U+008d
0x0a U+008e
0x0b U+000b
0x0c U+000c
0x0d U+000d
0x0e U+000e
0x0f U+000f
0x09 U+008D
0x0A U+008E
0x0B U+000B
0x0C U+000C
0x0D U+000D
0x0E U+000E
0x0F U+000F
0x10 U+0010
0x11 U+0011
0x12 U+0012
0x13 U+0013
0x14 U+009d
0x15 U+000a
0x14 U+009D
0x15 U+000A
0x16 U+0008
0x17 U+0087
0x18 U+0018
0x19 U+0019
0x1a U+0092
0x1b U+008f
0x1c U+001c
0x1d U+001d
0x1e U+001e
0x1f U+001f
0x1A U+0092
0x1B U+008F
0x1C U+001C
0x1D U+001D
0x1E U+001E
0x1F U+001F
0x20 U+0080
0x21 U+0081
0x22 U+0082
0x23 U+0083
0x24 U+0084
0x25 U+000a
0x25 U+000A
0x26 U+0017
0x27 U+001b
0x27 U+001B
0x28 U+0088
0x29 U+0089
0x2a U+008a
0x2b U+008b
0x2c U+008c
0x2d U+0005
0x2e U+0006
0x2f U+0007
0x2A U+008A
0x2B U+008B
0x2C U+008C
0x2D U+0005
0x2E U+0006
0x2F U+0007
0x30 U+0090
0x31 U+0091
0x32 U+0016
@ -57,77 +62,75 @@
0x37 U+0004
0x38 U+0098
0x39 U+0099
0x3a U+009a
0x3b U+009b
0x3c U+0014
0x3d U+0015
0x3e U+009e
0x3f U+001a
0x3A U+009A
0x3B U+009B
0x3C U+0014
0x3D U+0015
0x3E U+009E
0x3F U+001A
0x40 U+0020
0x41 U+00a0
0x42 U+fe7c
0x43 U+fe7d
0x41 U+00A0
0x42 U+0651
0x43 U+FE7D
0x44 U+0640
0x45 U+f8fc
0x46 U+fe80
0x47 U+fe81
0x48 U+fe82
0x49 U+fe83
0x4a U+00a2
0x4b U+002e
0x4c U+003c
0x4d U+0028
0x4e U+002b
0x4f U+007c
0x45 U+200B
0x46 U+0621
0x47 U+0622
0x48 U+FE82
0x49 U+0623
0x4A U+00A2
0x4B U+002E
0x4C U+003C
0x4D U+0028
0x4E U+002B
0x4F U+007C
0x50 U+0026
0x51 U+fe84
0x52 U+fe85
0x53 U+fffd
0x54 U+fffd
0x55 U+fe8b
0x56 U+fe8d
0x57 U+fe8e
0x58 U+fe8f
0x59 U+fe91
0x5a U+0021
0x5b U+0024
0x5c U+002a
0x5d U+0029
0x5e U+003b
0x5f U+00ac
0x60 U+002d
0x61 U+002f
0x62 U+fe93
0x63 U+fe95
0x64 U+fe97
0x65 U+fe99
0x66 U+fe9b
0x67 U+fe9d
0x68 U+fe9f
0x69 U+fea1
0x6a U+00a6
0x6b U+002c
0x6c U+0025
0x6d U+005f
0x6e U+003e
0x6f U+003f
0x70 U+fea3
0x71 U+fea5
0x72 U+fea7
0x73 U+fea9
0x74 U+feab
0x75 U+fead
0x76 U+feaf
0x77 U+f8f6
0x78 U+feb3
0x79 U+060c
0x7a U+003a
0x7b U+0023
0x7c U+0040
0x7d U+0027
0x7e U+003d
0x7f U+0022
0x80 U+f8f5
0x51 U+FE84
0x52 U+0624
0x55 U+0626
0x56 U+0627
0x57 U+FE8E
0x58 U+0628
0x59 U+FE91
0x5A U+0021
0x5B U+0024
0x5C U+002A
0x5D U+0029
0x5E U+003B
0x5F U+00AC
0x60 U+002D
0x61 U+002F
0x62 U+0629
0x63 U+062A
0x64 U+FE97
0x65 U+062B
0x66 U+FE9B
0x67 U+062C
0x68 U+FE9F
0x69 U+062D
0x6A U+00A6
0x6B U+002C
0x6C U+0025
0x6D U+005F
0x6E U+003E
0x6F U+003F
0x70 U+FEA3
0x71 U+062E
0x72 U+FEA7
0x73 U+062F
0x74 U+0630
0x75 U+0631
0x76 U+0632
0x77 U+0633
0x78 U+FEB3
0x79 U+060C
0x7A U+003A
0x7B U+0023
0x7C U+0040
0x7D U+0027
0x7E U+003D
0x7F U+0022
0x80 U+0634
0x81 U+0061
0x82 U+0062
0x83 U+0063
@ -137,121 +140,114 @@
0x87 U+0067
0x88 U+0068
0x89 U+0069
0x8a U+feb7
0x8b U+f8f4
0x8c U+febb
0x8d U+f8f7
0x8e U+febf
0x8f U+fec3
0x90 U+fec7
0x91 U+006a
0x92 U+006b
0x93 U+006c
0x94 U+006d
0x95 U+006e
0x96 U+006f
0x8A U+FEB7
0x8B U+0635
0x8C U+FEBB
0x8D U+0636
0x8E U+FEBF
0x8F U+0637
0x90 U+0638
0x91 U+006A
0x92 U+006B
0x93 U+006C
0x94 U+006D
0x95 U+006E
0x96 U+006F
0x97 U+0070
0x98 U+0071
0x99 U+0072
0x9a U+fec9
0x9b U+feca
0x9c U+fecb
0x9d U+fecc
0x9e U+fecd
0x9f U+fece
0xa0 U+fecf
0xa1 U+00f7
0xa2 U+0073
0xa3 U+0074
0xa4 U+0075
0xa5 U+0076
0xa6 U+0077
0xa7 U+0078
0xa8 U+0079
0xa9 U+007a
0xaa U+fed0
0xab U+fed1
0xac U+fed3
0xad U+fed5
0xae U+fed7
0xaf U+fed9
0xb0 U+fedb
0xb1 U+fedd
0xb2 U+fef5
0xb3 U+fef6
0xb4 U+fef7
0xb5 U+fef8
0xb6 U+fffd
0xb7 U+fffd
0xb8 U+fefb
0xb9 U+fefc
0xba U+fedf
0xbb U+fee1
0xbc U+fee3
0xbd U+fee5
0xbe U+fee7
0xbf U+fee9
0xc0 U+061b
0xc1 U+0041
0xc2 U+0042
0xc3 U+0043
0xc4 U+0044
0xc5 U+0045
0xc6 U+0046
0xc7 U+0047
0xc8 U+0048
0xc9 U+0049
0xca U+00ad
0xcb U+feeb
0xcc U+fffd
0xcd U+feec
0xce U+fffd
0xcf U+feed
0xd0 U+061f
0xd1 U+004a
0xd2 U+004b
0xd3 U+004c
0xd4 U+004d
0xd5 U+004e
0xd6 U+004f
0xd7 U+0050
0xd8 U+0051
0xd9 U+0052
0xda U+feef
0xdb U+fef0
0xdc U+fef1
0xdd U+fef2
0xde U+fef3
0xdf U+0660
0xe0 U+00d7
0xe1 U+2007
0xe2 U+0053
0xe3 U+0054
0xe4 U+0055
0xe5 U+0056
0xe6 U+0057
0xe7 U+0058
0xe8 U+0059
0xe9 U+005a
0xea U+0661
0xeb U+0662
0xec U+fffd
0xed U+0663
0xee U+0664
0xef U+0665
0xf0 U+0030
0xf1 U+0031
0xf2 U+0032
0xf3 U+0033
0xf4 U+0034
0xf5 U+0035
0xf6 U+0036
0xf7 U+0037
0xf8 U+0038
0xf9 U+0039
0xfa U+fffd
0xfb U+0666
0xfc U+0667
0xfd U+0668
0xfe U+0669
0xff U+009f
0x9A U+0639
0x9B U+FECA
0x9C U+FECB
0x9D U+FECC
0x9E U+063A
0x9F U+FECE
0xA0 U+FECF
0xA1 U+00F7
0xA2 U+0073
0xA3 U+0074
0xA4 U+0075
0xA5 U+0076
0xA6 U+0077
0xA7 U+0078
0xA8 U+0079
0xA9 U+007A
0xAA U+FED0
0xAB U+0641
0xAC U+FED3
0xAD U+0642
0xAE U+FED7
0xAF U+0643
0xB0 U+FEDB
0xB1 U+0644
0xB2 U+FEF5
0xB3 U+FEF6
0xB4 U+FEF7
0xB5 U+FEF8
0xB8 U+FEFB
0xB9 U+FEFC
0xBA U+FEDF
0xBB U+0645
0xBC U+FEE3
0xBD U+0646
0xBE U+FEE7
0xBF U+0647
0xC0 U+061B
0xC1 U+0041
0xC2 U+0042
0xC3 U+0043
0xC4 U+0044
0xC5 U+0045
0xC6 U+0046
0xC7 U+0047
0xC8 U+0048
0xC9 U+0049
0xCA U+00AD
0xCB U+FEEB
0xCD U+FEEC
0xCF U+0648
0xD0 U+061F
0xD1 U+004A
0xD2 U+004B
0xD3 U+004C
0xD4 U+004D
0xD5 U+004E
0xD6 U+004F
0xD7 U+0050
0xD8 U+0051
0xD9 U+0052
0xDA U+0649
0xDB U+FEF0
0xDC U+064A
0xDD U+FEF2
0xDE U+FEF3
0xDF U+0660
0xE0 U+00D7
0xE2 U+0053
0xE3 U+0054
0xE4 U+0055
0xE5 U+0056
0xE6 U+0057
0xE7 U+0058
0xE8 U+0059
0xE9 U+005A
0xEA U+0661
0xEB U+0662
0xED U+0663
0xEE U+0664
0xEF U+0665
0xF0 U+0030
0xF1 U+0031
0xF2 U+0032
0xF3 U+0033
0xF4 U+0034
0xF5 U+0035
0xF6 U+0036
0xF7 U+0037
0xF8 U+0038
0xF9 U+0039
0xFB U+0666
0xFC U+0667
0xFD U+0668
0xFE U+0669
0xFF U+009F

View File

@ -26,6 +26,7 @@
package build.tools.charsetmapping;
import java.io.*;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Scanner;
import java.util.Formatter;
@ -54,33 +55,19 @@ public class GenerateSBCS {
String pkgName = fields[4];
System.out.printf("%s,%s,%s,%b,%s%n", clzName, csName, hisName, isASCII, pkgName);
StringBuilder b2c = new StringBuilder();
int c2bLen = genB2C(
new FileInputStream(new File(args[0], clzName+".map")), b2c);
String b2cNR = null;
File nrF = new File(args[0], clzName+".nr");
if (nrF.exists()) {
b2cNR = genNR(new FileInputStream(nrF));
}
String c2bNR = null;
File c2bF = new File(args[0], clzName+".c2b");
if (c2bF.exists()) {
c2bNR = genC2BNR(new FileInputStream(c2bF));
}
genSBCSClass(args[0], args[1], "SingleByte-X.java",
clzName, csName, hisName, pkgName, isASCII,
b2c.toString(), b2cNR, c2bNR, c2bLen);
genClass(args[0], args[1], "SingleByte-X.java",
clzName, csName, hisName, pkgName, isASCII);
}
}
private static void toString(char[] sb, int off, int end,
Formatter out, String closure) {
Formatter out, String closure,
boolean comment) {
while (off < end) {
out.format(" \"");
for (int j = 0; j < 8; j++) {
if (off == end)
break;
char c = sb[off++];
switch (c) {
case '\b':
@ -103,54 +90,74 @@ public class GenerateSBCS {
out.format("\\u%04X", c & 0xffff);
}
}
if (comment) {
if (off == end)
out.format("\" %s // 0x%02x - 0x%02x%n", closure, off-8, off-1);
out.format("\" %s // 0x%02x - 0x%02x%n",
closure, off-8, off-1);
else
out.format("\" + // 0x%02x - 0x%02x%n", off-8, off-1);
out.format("\" + // 0x%02x - 0x%02x%n",
off-8, off-1);
} else {
if (off == end)
out.format("\"%s%n", closure);
else
out.format("\" +%n");
}
}
}
static Pattern sbmap = Pattern.compile("0x(\\p{XDigit}++)\\s++U\\+(\\p{XDigit}++)(\\s++#.*)?");
private static int genB2C(InputStream in, StringBuilder out)
private static void genClass(String srcDir, String dstDir,
String template,
String clzName,
String csName,
String hisName,
String pkgName,
boolean isASCII)
throws Exception
{
StringBuilder b2cSB = new StringBuilder();
StringBuilder b2cNRSB = new StringBuilder();
StringBuilder c2bNRSB = new StringBuilder();
char[] sb = new char[0x100];
int[] indexC2B = new int[0x100];
char[] c2bIndex = new char[0x100];
int c2bOff = 0;
Arrays.fill(sb, UNMAPPABLE_DECODING);
Arrays.fill(c2bIndex, UNMAPPABLE_DECODING);
for (int i = 0; i < sb.length; i++)
sb[i] = UNMAPPABLE_DECODING;
// parse the b2c mapping table
// (1)read in .map to parse all b->c entries
FileInputStream in = new FileInputStream(
new File(srcDir, clzName + ".map"));
Parser p = new Parser(in, sbmap);
Entry e = null;
int off = 0;
while ((e = p.next()) != null) {
sb[e.bs] = (char)e.cp;
if (indexC2B[e.cp>>8] == 0) {
off += 0x100;
indexC2B[e.cp>>8] = 1;
if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) {
c2bOff += 0x100;
c2bIndex[e.cp>>8] = 1;
}
}
Formatter fm = new Formatter(out);
Formatter fm = new Formatter(b2cSB);
fm.format("%n");
// vm -server shows cc[byte + 128] access is much faster than
// cc[byte&0xff] so we output the upper segment first
toString(sb, 0x80, 0x100, fm, "+");
toString(sb, 0x00, 0x80, fm, ";");
toString(sb, 0x80, 0x100, fm, "+", true);
toString(sb, 0x00, 0x80, fm, ";", true);
fm.close();
return off;
}
// generate non-roundtrip entries from xxx.nr file
private static String genNR(InputStream in) throws Exception
{
StringBuilder sb = new StringBuilder();
Formatter fm = new Formatter(sb);
Parser p = new Parser(in, sbmap);
Entry e = null;
// (2)now the .nr file which includes "b->c" non-roundtrip entries
File f = new File(srcDir, clzName + ".nr");
if (f.exists()) {
in = new FileInputStream(f);
fm = new Formatter(b2cNRSB);
p = new Parser(in, sbmap);
e = null;
fm.format("// remove non-roundtrip entries%n");
fm.format(" b2cMap = b2cTable.toCharArray();%n");
while ((e = p.next()) != null) {
@ -158,46 +165,49 @@ public class GenerateSBCS {
(e.bs>=0x80)?(e.bs-0x80):(e.bs+0x80));
}
fm.close();
return sb.toString();
}
// generate c2b only entries from xxx.c2b file
private static String genC2BNR(InputStream in) throws Exception
{
StringBuilder sb = new StringBuilder();
Formatter fm = new Formatter(sb);
Parser p = new Parser(in, sbmap);
// (3)finally the .c2b file which includes c->b non-roundtrip entries
f = new File(srcDir, clzName + ".c2b");
if (f.exists()) {
in = new FileInputStream(f);
fm = new Formatter(c2bNRSB);
p = new Parser(in, sbmap);
e = null;
ArrayList<Entry> es = new ArrayList<Entry>();
Entry e = null;
while ((e = p.next()) != null) {
if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) {
c2bOff += 0x100;
c2bIndex[e.cp>>8] = 1;
}
es.add(e);
}
fm.format("// non-roundtrip c2b only entries%n");
if (es.size() < 100) {
fm.format(" c2bNR = new char[%d];%n", es.size() * 2);
int i = 0;
for (Entry entry: es) {
fm.format(" c2bNR[%d] = 0x%x; c2bNR[%d] = 0x%x;%n",
i++, entry.bs, i++, entry.cp);
}
} else {
char[] cc = new char[es.size() * 2];
int i = 0;
for (Entry entry: es) {
cc[i++] = (char)entry.bs;
cc[i++] = (char)entry.cp;
}
fm.format(" c2bNR = (%n");
toString(cc, 0, i, fm, ").toCharArray();", false);
}
fm.close();
return sb.toString();
}
private static void genSBCSClass(String srcDir,
String dstDir,
String template,
String clzName,
String csName,
String hisName,
String pkgName,
boolean isASCII,
String b2c,
String b2cNR,
String c2bNR,
int c2blen)
throws Exception
{
// (4)it's time to generate the source file
String b2c = b2cSB.toString();
String b2cNR = b2cNRSB.toString();
String c2bNR = c2bNRSB.toString();
Scanner s = new Scanner(new File(srcDir, template));
PrintStream out = new PrintStream(new FileOutputStream(
new File(dstDir, clzName + ".java")));
@ -239,16 +249,16 @@ public class GenerateSBCS {
line = line.replace("$B2CTABLE$", b2c);
}
if (line.indexOf("$C2BLENGTH$") != -1) {
line = line.replace("$C2BLENGTH$", "0x" + Integer.toString(c2blen, 16));
line = line.replace("$C2BLENGTH$", "0x" + Integer.toString(c2bOff, 16));
}
if (line.indexOf("$NONROUNDTRIP_B2C$") != -1) {
if (b2cNR == null)
if (b2cNR.length() == 0)
continue;
line = line.replace("$NONROUNDTRIP_B2C$", b2cNR);
}
if (line.indexOf("$NONROUNDTRIP_C2B$") != -1) {
if (c2bNR == null)
if (c2bNR.length() == 0)
continue;
line = line.replace("$NONROUNDTRIP_C2B$", c2bNR);
}