8261418: Reduce decoder creation overheads for sun.nio.cs.ext Charsets

Reviewed-by: naoto
This commit is contained in:
Claes Redestad 2021-02-11 10:43:56 +00:00
parent 5e1b8092d2
commit 8b6ab31d31
15 changed files with 358 additions and 505 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -113,14 +113,10 @@ public class EUC_JP
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
int b1 = 0, b2 = 0;
int inputSize = 0;
@ -296,13 +292,10 @@ public class EUC_JP
char[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
byte[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
int outputSize = 0;
byte[] outputByte;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -146,6 +146,19 @@ public class EUC_TW extends Charset implements HistoricallyNamedCharset
return b >= b1Min && b <= b1Max;
}
public static char decodeSingleOrReplace(int b1, int b2, int p, char replace) {
if (b1 < b1Min || b1 > b1Max || b2 < b2Min || b2 > b2Max)
return replace;
int index = (b1 - b1Min) * dbSegSize + b2 - b2Min;
char c = b2c[p].charAt(index);
if (c == UNMAPPABLE_DECODING)
return replace;
if ((b2cIsSupp[index] & (1 << p)) == 0) {
return c;
}
return replace;
}
static char[] decode(int b1, int b2, int p, char[] c1, char[] c2)
{
if (b1 < b1Min || b1 > b1Max || b2 < b2Min || b2 > b2Max)

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -12332,14 +12332,10 @@ public class GB18030
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
int inputSize = 1;
@ -12585,13 +12581,10 @@ public class GB18030
char[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
byte[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
int condensedKey = 0; // expands to a four byte sequence
int hiByte = 0, loByte = 0;

View File

@ -212,14 +212,10 @@ public class IBM29626C
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
int b1 = 0, b2 = 0;
int inputSize = 0;
@ -484,13 +480,10 @@ public class IBM29626C
char[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
byte[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
int outputSize = 0;
byte[] outputByte;

View File

@ -1,6 +1,5 @@
/*
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -65,15 +64,8 @@ public class IBM33722
protected static class Decoder extends CharsetDecoder {
private final int G0 = 0;
private final int G1 = 1;
private final int G2 = 2;
private final int G3 = 3;
private final int G4 = 4;
private final int SS2 = 0x8E;
private final int SS3 = 0x8F;
private int firstByte, state;
private static final int SS2 = 0x8E;
private static final int SS3 = 0x8F;
public Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
@ -83,19 +75,16 @@ public class IBM33722
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
try {
while (sp < sl) {
int byte1, byte2;
int inputSize = 1;
char outputChar = '\uFFFD';
char outputChar;
byte1 = sa[sp] & 0xff;
if (byte1 == SS2) {
@ -157,7 +146,7 @@ public class IBM33722
while (src.hasRemaining()) {
int byte1, byte2;
int inputSize = 1;
char outputChar = '\uFFFD';
char outputChar;
byte1 = src.get() & 0xff;
if (byte1 == SS2) {
@ -212,16 +201,16 @@ public class IBM33722
}
protected CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {
if (true && src.hasArray() && dst.hasArray())
if (src.hasArray() && dst.hasArray())
return decodeArrayLoop(src, dst);
else
return decodeBufferLoop(src, dst);
}
private final static String byteToCharTable;
private final static String mappingTableG1;
private final static String mappingTableG2;
private final static String mappingTableG3;
private static final String byteToCharTable;
private static final String mappingTableG1;
private static final String mappingTableG2;
private static final String mappingTableG3;
static {
byteToCharTable =
"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" +
@ -2749,9 +2738,9 @@ public class IBM33722
21792, 192, 73, 21824, 41, 2344, 2344, 0,
};
private final static String index2;
private final static String index2a;
private final static String index2b;
private static final String index2;
private static final String index2a;
private static final String index2b;
static {
index2 =
"\u0000\uA1F1\u0000\uA1F2\u0000\uA2CC\u0000\uA1B1\u0000\uA2C3" + // 0 - 4

View File

@ -1,6 +1,5 @@
/*
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -76,13 +75,10 @@ public class IBM964
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
try {
while (sp < sl) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -500,14 +500,10 @@ public class ISCII91 extends Charset implements HistoricallyNamedCharset
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
try {
while (sp < sl) {
@ -821,15 +817,10 @@ public class ISCII91 extends Charset implements HistoricallyNamedCharset
char[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
byte[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
int outputSize = 0;
try {
char inputChar;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -39,359 +39,33 @@ import sun.nio.cs.Surrogate;
abstract class ISO2022
extends Charset
{
private static final byte ISO_ESC = 0x1b;
private static final byte ISO_SI = 0x0f;
private static final byte ISO_SO = 0x0e;
private static final byte ISO_SS2_7 = 0x4e;
private static final byte ISO_SS3_7 = 0x4f;
private static final byte MSB = (byte)0x80;
private static final char REPLACE_CHAR = '\uFFFD';
private static final byte minDesignatorLength = 3;
public ISO2022(String csname, String[] aliases) {
super(csname, aliases);
}
public CharsetDecoder newDecoder() {
return new Decoder(this);
}
public abstract CharsetDecoder newDecoder();
public CharsetEncoder newEncoder() {
return new Encoder(this);
}
public abstract CharsetEncoder newEncoder();
protected static class Decoder extends CharsetDecoder {
// Value to be filled by subclass
protected byte SODesig[][];
protected byte SS2Desig[][] = null;
protected byte SS3Desig[][] = null;
protected CharsetDecoder SODecoder[];
protected CharsetDecoder SS2Decoder[] = null;
protected CharsetDecoder SS3Decoder[] = null;
private static final byte SOFlag = 0;
private static final byte SS2Flag = 1;
private static final byte SS3Flag = 2;
private int curSODes, curSS2Des, curSS3Des;
private boolean shiftout;
private CharsetDecoder tmpDecoder[];
protected Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
protected void implReset() {
curSODes = 0;
curSS2Des = 0;
curSS3Des = 0;
shiftout = false;
}
private char decode(byte byte1, byte byte2, byte shiftFlag)
{
byte1 |= MSB;
byte2 |= MSB;
byte[] tmpByte = { byte1,byte2 };
char[] tmpChar = new char[1];
int i = 0,
tmpIndex = 0;
switch(shiftFlag) {
case SOFlag:
tmpIndex = curSODes;
tmpDecoder = SODecoder;
break;
case SS2Flag:
tmpIndex = curSS2Des;
tmpDecoder = SS2Decoder;
break;
case SS3Flag:
tmpIndex = curSS3Des;
tmpDecoder = SS3Decoder;
break;
}
if (tmpDecoder != null) {
for(i = 0; i < tmpDecoder.length; i++) {
if(tmpIndex == i) {
try {
ByteBuffer bb = ByteBuffer.wrap(tmpByte,0,2);
CharBuffer cc = CharBuffer.wrap(tmpChar,0,1);
tmpDecoder[i].decode(bb, cc, true);
cc.flip();
return cc.get();
} catch (Exception e) {}
}
}
}
return REPLACE_CHAR;
}
private int findDesig(byte[] in, int sp, int sl, byte[][] desigs) {
if (desigs == null) return -1;
int i = 0;
while (i < desigs.length) {
if (desigs[i] != null && sl - sp >= desigs[i].length) {
int j = 0;
while (j < desigs[i].length && in[sp+j] == desigs[i][j]) { j++; }
if (j == desigs[i].length)
return i;
}
i++;
}
return -1;
}
private int findDesigBuf(ByteBuffer in, byte[][] desigs) {
if (desigs == null) return -1;
int i = 0;
while (i < desigs.length) {
if (desigs[i] != null && in.remaining() >= desigs[i].length) {
int j = 0;
in.mark();
while (j < desigs[i].length && in.get() == desigs[i][j]) { j++; }
if (j == desigs[i].length)
return i;
in.reset();
}
i++;
}
return -1;
}
private CoderResult decodeArrayLoop(ByteBuffer src,
CharBuffer dst)
{
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
int b1 = 0, b2 = 0, b3 = 0;
try {
while (sp < sl) {
b1 = sa[sp] & 0xff;
int inputSize = 1;
switch (b1) {
case ISO_SO:
shiftout = true;
inputSize = 1;
break;
case ISO_SI:
shiftout = false;
inputSize = 1;
break;
case ISO_ESC:
if (sl - sp - 1 < minDesignatorLength)
return CoderResult.UNDERFLOW;
int desig = findDesig(sa, sp + 1, sl, SODesig);
if (desig != -1) {
curSODes = desig;
inputSize = SODesig[desig].length + 1;
break;
}
desig = findDesig(sa, sp + 1, sl, SS2Desig);
if (desig != -1) {
curSS2Des = desig;
inputSize = SS2Desig[desig].length + 1;
break;
}
desig = findDesig(sa, sp + 1, sl, SS3Desig);
if (desig != -1) {
curSS3Des = desig;
inputSize = SS3Desig[desig].length + 1;
break;
}
if (sl - sp < 2)
return CoderResult.UNDERFLOW;
b1 = sa[sp + 1];
switch(b1) {
case ISO_SS2_7:
if (sl - sp < 4)
return CoderResult.UNDERFLOW;
b2 = sa[sp +2];
b3 = sa[sp +3];
if (dl - dp <1)
return CoderResult.OVERFLOW;
da[dp] = decode((byte)b2,
(byte)b3,
SS2Flag);
dp++;
inputSize = 4;
break;
case ISO_SS3_7:
if (sl - sp < 4)
return CoderResult.UNDERFLOW;
b2 = sa[sp + 2];
b3 = sa[sp + 3];
if (dl - dp <1)
return CoderResult.OVERFLOW;
da[dp] = decode((byte)b2,
(byte)b3,
SS3Flag);
dp++;
inputSize = 4;
break;
default:
return CoderResult.malformedForLength(2);
}
break;
default:
if (dl - dp < 1)
return CoderResult.OVERFLOW;
if (!shiftout) {
da[dp++]=(char)(sa[sp] & 0xff);
} else {
if (dl - dp < 1)
return CoderResult.OVERFLOW;
if (sl - sp < 2)
return CoderResult.UNDERFLOW;
b2 = sa[sp+1] & 0xff;
da[dp++] = decode((byte)b1,
(byte)b2,
SOFlag);
inputSize = 2;
}
break;
}
sp += inputSize;
}
return CoderResult.UNDERFLOW;
} finally {
src.position(sp - src.arrayOffset());
dst.position(dp - dst.arrayOffset());
}
}
private CoderResult decodeBufferLoop(ByteBuffer src,
CharBuffer dst)
{
int mark = src.position();
int b1 = 0, b2 = 0, b3 = 0;
try {
while (src.hasRemaining()) {
b1 = src.get();
int inputSize = 1;
switch (b1) {
case ISO_SO:
shiftout = true;
break;
case ISO_SI:
shiftout = false;
break;
case ISO_ESC:
if (src.remaining() < minDesignatorLength)
return CoderResult.UNDERFLOW;
int desig = findDesigBuf(src, SODesig);
if (desig != -1) {
curSODes = desig;
inputSize = SODesig[desig].length + 1;
break;
}
desig = findDesigBuf(src, SS2Desig);
if (desig != -1) {
curSS2Des = desig;
inputSize = SS2Desig[desig].length + 1;
break;
}
desig = findDesigBuf(src, SS3Desig);
if (desig != -1) {
curSS3Des = desig;
inputSize = SS3Desig[desig].length + 1;
break;
}
if (src.remaining() < 1)
return CoderResult.UNDERFLOW;
b1 = src.get();
switch(b1) {
case ISO_SS2_7:
if (src.remaining() < 2)
return CoderResult.UNDERFLOW;
b2 = src.get();
b3 = src.get();
if (dst.remaining() < 1)
return CoderResult.OVERFLOW;
dst.put(decode((byte)b2,
(byte)b3,
SS2Flag));
inputSize = 4;
break;
case ISO_SS3_7:
if (src.remaining() < 2)
return CoderResult.UNDERFLOW;
b2 = src.get();
b3 = src.get();
if (dst.remaining() < 1)
return CoderResult.OVERFLOW;
dst.put(decode((byte)b2,
(byte)b3,
SS3Flag));
inputSize = 4;
break;
default:
return CoderResult.malformedForLength(2);
}
break;
default:
if (dst.remaining() < 1)
return CoderResult.OVERFLOW;
if (!shiftout) {
dst.put((char)(b1 & 0xff));
} else {
if (dst.remaining() < 1)
return CoderResult.OVERFLOW;
if (src.remaining() < 1)
return CoderResult.UNDERFLOW;
b2 = src.get() & 0xff;
dst.put(decode((byte)b1,
(byte)b2,
SOFlag));
inputSize = 2;
}
break;
}
mark += inputSize;
}
return CoderResult.UNDERFLOW;
} catch (Exception e) { e.printStackTrace(); return CoderResult.OVERFLOW; }
finally {
src.position(mark);
}
}
protected CoderResult decodeLoop(ByteBuffer src,
CharBuffer dst)
{
if (src.hasArray() && dst.hasArray())
return decodeArrayLoop(src, dst);
else
return decodeBufferLoop(src, dst);
}
}
// No default Decoder implementation is provided here; the concrete
// encodings differ enough that most had been specialized for
// performance reasons, leaving the generic implementation that existed
// here before JDK-8261418 unused except by ISO2022_KR. As both a
// simplification and an optimization the implementation was moved
// there and specialized.
protected static class Encoder extends CharsetEncoder {
private static final byte ISO_ESC = 0x1b;
private static final byte ISO_SI = 0x0f;
private static final byte ISO_SO = 0x0e;
private static final byte ISO_SS2_7 = 0x4e;
private static final byte ISO_SS3_7 = 0x4f;
private final Surrogate.Parser sgp = new Surrogate.Parser();
public static final byte SS2 = (byte)0x8e;
public static final byte PLANE2 = (byte)0xA2;
public static final byte PLANE3 = (byte)0xA3;
private final byte MSB = (byte)0x80;
protected final byte maximumDesignatorLength = 4;
@ -428,9 +102,9 @@ abstract class ISO2022
private int unicodeToNative(char unicode, byte ebyte[]) {
int index = 0;
char convChar[] = {unicode};
byte convByte[] = new byte[4];
int converted;
char[] convChar = {unicode};
byte[] convByte = new byte[4];
int converted;
try{
CharBuffer cc = CharBuffer.wrap(convChar);
@ -491,16 +165,13 @@ abstract class ISO2022
char[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
byte[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
int outputSize = 0;
byte[] outputByte = new byte[8];
int outputSize;
byte[] outputByte = new byte[8];
newshiftout = shiftout;
newSODesDefined = SODesDefined;
newSS2DesDefined = SS2DesDefined;
@ -557,9 +228,8 @@ abstract class ISO2022
private CoderResult encodeBufferLoop(CharBuffer src,
ByteBuffer dst)
{
int outputSize = 0;
byte[] outputByte = new byte[8];
int inputSize = 0; // Size of input
int outputSize;
byte[] outputByte = new byte[8];
newshiftout = shiftout;
newSODesDefined = SODesDefined;
newSS2DesDefined = SS2DesDefined;
@ -584,7 +254,7 @@ abstract class ISO2022
outputSize = 1;
outputByte[0] = (byte)(inputChar & 0x7f);
}
if(inputChar == '\n'){
if (inputChar == '\n') {
newSODesDefined = false;
newSS2DesDefined = false;
newSS3DesDefined = false;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,7 +34,6 @@ import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CharacterCodingException;
import sun.nio.cs.DoubleByte;
import sun.nio.cs.HistoricallyNamedCharset;
import sun.nio.cs.US_ASCII;
@ -86,17 +85,13 @@ public class ISO2022_CN
private boolean shiftOut;
private byte currentSODesig;
private static final Charset gb2312 = new EUC_CN();
private static final Charset cns = new EUC_TW();
private final DoubleByte.Decoder gb2312Decoder;
private final EUC_TW.Decoder cnsDecoder;
private static final DoubleByte.Decoder GB2312 =
(DoubleByte.Decoder)new EUC_CN().newDecoder();
Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
shiftOut = false;
currentSODesig = SODesigGB;
gb2312Decoder = (DoubleByte.Decoder)gb2312.newDecoder();
cnsDecoder = (EUC_TW.Decoder)cns.newDecoder();
}
protected void implReset() {
@ -107,34 +102,30 @@ public class ISO2022_CN
private char cnsDecode(byte byte1, byte byte2, byte SS) {
byte1 |= MSB;
byte2 |= MSB;
int p = 0;
int p;
if (SS == ISO_SS2_7)
p = 1; //plane 2, index -- 1
else if (SS == ISO_SS3_7)
p = 2; //plane 3, index -- 2
else
return REPLACE_CHAR; //never happen.
char[] ret = cnsDecoder.toUnicode(byte1 & 0xff,
byte2 & 0xff,
p);
if (ret == null || ret.length == 2)
return REPLACE_CHAR;
return ret[0];
return EUC_TW.Decoder.decodeSingleOrReplace(byte1 & 0xff,
byte2 & 0xff,
p,
REPLACE_CHAR);
}
private char SODecode(byte byte1, byte byte2, byte SOD) {
byte1 |= MSB;
byte2 |= MSB;
if (SOD == SODesigGB) {
return gb2312Decoder.decodeDouble(byte1 & 0xff,
byte2 & 0xff);
return GB2312.decodeDouble(byte1 & 0xff,
byte2 & 0xff);
} else { // SOD == SODesigCNS
char[] ret = cnsDecoder.toUnicode(byte1 & 0xff,
byte2 & 0xff,
0);
if (ret == null)
return REPLACE_CHAR;
return ret[0];
return EUC_TW.Decoder.decodeSingleOrReplace(byte1 & 0xff,
byte2 & 0xff,
0,
REPLACE_CHAR);
}
}
@ -142,9 +133,9 @@ public class ISO2022_CN
CharBuffer dst)
{
int mark = src.position();
byte b1 = 0, b2 = 0, b3 = 0, b4 = 0;
int inputSize = 0;
char c = REPLACE_CHAR;
byte b1, b2, b3, b4;
int inputSize;
char c;
try {
while (src.hasRemaining()) {
b1 = src.get();
@ -264,21 +255,17 @@ public class ISO2022_CN
private CoderResult decodeArrayLoop(ByteBuffer src,
CharBuffer dst)
{
int inputSize = 0;
byte b1 = 0, b2 = 0, b3 = 0, b4 = 0;
char c = REPLACE_CHAR;
int inputSize;
byte b1, b2, b3, b4;
char c;
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
try {
while (sp < sl) {
@ -306,7 +293,7 @@ public class ISO2022_CN
if ((b3 & (byte)0x80) != 0)
return CoderResult.malformedForLength(inputSize);
if (b3 == 'A'){ // "$A"
if (b3 == 'A') { // "$A"
/* <ESC>$A is not a legal designator sequence for
ISO2022_CN, it is listed as an escape sequence
for GB2312 in ISO2022-JP-2. Keep it here just for

View File

@ -29,11 +29,8 @@
package sun.nio.cs.ext;
import java.nio.charset.Charset;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import sun.nio.cs.HistoricallyNamedCharset;
import sun.nio.cs.*;
@ -66,18 +63,22 @@ public class ISO2022_CN_CNS extends ISO2022 implements HistoricallyNamedCharset
private static final Charset cns = new EUC_TW();
private static final byte[] SOD = new byte[] {'$', ')', 'G' };
private static final byte[] SS2D = new byte[] {'$', '*', 'H' };
private static final byte[] SS3D = new byte[] {'$', '+', 'I' };
public Encoder(Charset cs)
{
super(cs);
SODesig = new byte[] {'$', ')', 'G' };
SS2Desig = new byte[] {'$', '*', 'H' };
SS3Desig = new byte[] {'$', '+', 'I' };
SODesig = SOD;
SS2Desig = SS2D;
SS3Desig = SS3D;
ISOEncoder = cns.newEncoder();
}
private byte[] bb = new byte[4];
private final byte[] bb = new byte[4];
public boolean canEncode(char c) {
int n = 0;
int n;
return (c <= '\u007f' ||
(n = ((EUC_TW.Encoder)ISOEncoder).toEUC(c, bb)) == 2 ||
(n == 4 && bb[0] == SS2 &&

View File

@ -29,11 +29,8 @@
package sun.nio.cs.ext;
import java.nio.charset.Charset;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import sun.nio.cs.*;
import sun.nio.cs.HistoricallyNamedCharset;
@ -66,10 +63,13 @@ public class ISO2022_CN_GB extends ISO2022 implements HistoricallyNamedCharset
private static class Encoder extends ISO2022.Encoder {
private static final Charset gb2312 = new EUC_CN();
private static final byte[] SOD = new byte[] {'$', ')', 'A' };
public Encoder(Charset cs)
{
super(cs);
SODesig = new byte[] { '$', ')', 'A'};
SODesig = SOD;
ISOEncoder = gb2312.newEncoder();
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -172,14 +172,14 @@ public class ISO2022_JP
static class Decoder extends CharsetDecoder
implements DelegatableDecoder {
final static DoubleByte.Decoder DEC0208 =
static final DoubleByte.Decoder DEC0208 =
(DoubleByte.Decoder)new JIS_X_0208().newDecoder();
private int currentState;
private int previousState;
private DoubleByte.Decoder dec0208;
private DoubleByte.Decoder dec0212;
private final DoubleByte.Decoder dec0208;
private final DoubleByte.Decoder dec0212;
private Decoder(Charset cs) {
this(cs, DEC0208, null);
@ -203,20 +203,16 @@ public class ISO2022_JP
private CoderResult decodeArrayLoop(ByteBuffer src,
CharBuffer dst)
{
int inputSize = 0;
int b1 = 0, b2 = 0, b3 = 0, b4 = 0;
char c = UNMAPPABLE_DECODING;
int inputSize;
int b1, b2, b3, b4;
char c;
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
try {
while (sp < sl) {
@ -331,8 +327,8 @@ public class ISO2022_JP
CharBuffer dst)
{
int mark = src.position();
int b1 = 0, b2 = 0, b3 = 0, b4=0;
char c = UNMAPPABLE_DECODING;
int b1, b2, b3, b4;
char c;
int inputSize = 0;
try {
while (src.hasRemaining()) {
@ -465,12 +461,12 @@ public class ISO2022_JP
final static DoubleByte.Encoder ENC0208 =
(DoubleByte.Encoder)new JIS_X_0208().newEncoder();
private static byte[] repl = { (byte)0x21, (byte)0x29 };
private static final byte[] repl = { (byte)0x21, (byte)0x29 };
private int currentMode = ASCII;
private int replaceMode = JISX0208_1983;
private DoubleByte.Encoder enc0208;
private DoubleByte.Encoder enc0212;
private boolean doSBKANA;
private final DoubleByte.Encoder enc0208;
private final DoubleByte.Encoder enc0212;
private final boolean doSBKANA;
private Encoder(Charset cs) {
this(cs, ENC0208, null, true);
@ -535,13 +531,10 @@ public class ISO2022_JP
char[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
byte[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
try {
while (sp < sl) {

View File

@ -67,24 +67,261 @@ implements HistoricallyNamedCharset
return new Encoder(this);
}
private static class Decoder extends ISO2022.Decoder {
public Decoder(Charset cs)
private static class Decoder extends CharsetDecoder {
private static final byte[] SOD = new byte[] {'$', ')', 'C' };
private static final DoubleByte.Decoder KSC5601 = (DoubleByte.Decoder)
new EUC_KR().newDecoder();
private static final byte ISO_ESC = 0x1b;
private static final byte ISO_SI = 0x0f;
private static final byte ISO_SO = 0x0e;
private static final byte ISO_SS2_7 = 0x4e;
private static final byte ISO_SS3_7 = 0x4f;
private static final byte MSB = (byte)0x80;
private static final char REPLACE_CHAR = '\uFFFD';
private static final byte minDesignatorLength = 3;
private static final byte SOFlag = 0;
private static final byte SS2Flag = 1;
private static final byte SS3Flag = 2;
private boolean shiftout;
private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
protected void implReset() {
shiftout = false;
}
private char decode(byte byte1, byte byte2, byte shiftFlag)
{
super(cs);
SODesig = new byte[][] {{(byte)'$', (byte)')', (byte)'C'}};
SODecoder = new CharsetDecoder[1];
if (shiftFlag == SOFlag) {
return KSC5601.decodeDouble((byte1 | MSB) & 0xFF, (byte2 | MSB) & 0xFF);
}
return REPLACE_CHAR;
}
private boolean findDesig(byte[] in, int sp, int sl) {
if (sl - sp >= SOD.length) {
int j = 0;
while (j < SOD.length && in[sp + j] == SOD[j]) { j++; }
return j == SOD.length;
}
return false;
}
private boolean findDesigBuf(ByteBuffer in) {
if (in.remaining() >= SOD.length) {
int j = 0;
in.mark();
while (j < SOD.length && in.get() == SOD[j]) { j++; }
if (j == SOD.length)
return true;
in.reset();
}
return false;
}
private CoderResult decodeArrayLoop(ByteBuffer src,
CharBuffer dst)
{
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
int b1, b2, b3;
try {
SODecoder[0] = Holder.ksc5601_cs.newDecoder();
} catch (Exception e) {};
while (sp < sl) {
b1 = sa[sp] & 0xff;
int inputSize = 1;
switch (b1) {
case ISO_SO:
shiftout = true;
inputSize = 1;
break;
case ISO_SI:
shiftout = false;
inputSize = 1;
break;
case ISO_ESC:
if (sl - sp - 1 < minDesignatorLength)
return CoderResult.UNDERFLOW;
if (findDesig(sa, sp + 1, sl)) {
inputSize = SOD.length + 1;
break;
}
if (sl - sp < 2)
return CoderResult.UNDERFLOW;
b1 = sa[sp + 1];
switch (b1) {
case ISO_SS2_7:
if (sl - sp < 4)
return CoderResult.UNDERFLOW;
b2 = sa[sp +2];
b3 = sa[sp +3];
if (dl - dp <1)
return CoderResult.OVERFLOW;
da[dp] = decode((byte)b2,
(byte)b3,
SS2Flag);
dp++;
inputSize = 4;
break;
case ISO_SS3_7:
if (sl - sp < 4)
return CoderResult.UNDERFLOW;
b2 = sa[sp + 2];
b3 = sa[sp + 3];
if (dl - dp < 1)
return CoderResult.OVERFLOW;
da[dp] = decode((byte)b2,
(byte)b3,
SS3Flag);
dp++;
inputSize = 4;
break;
default:
return CoderResult.malformedForLength(2);
}
break;
default:
if (dl - dp < 1)
return CoderResult.OVERFLOW;
if (!shiftout) {
da[dp++]=(char)(sa[sp] & 0xff);
} else {
if (dl - dp < 1)
return CoderResult.OVERFLOW;
if (sl - sp < 2)
return CoderResult.UNDERFLOW;
b2 = sa[sp+1] & 0xff;
da[dp++] = decode((byte)b1,
(byte)b2,
SOFlag);
inputSize = 2;
}
break;
}
sp += inputSize;
}
return CoderResult.UNDERFLOW;
} finally {
src.position(sp - src.arrayOffset());
dst.position(dp - dst.arrayOffset());
}
}
private CoderResult decodeBufferLoop(ByteBuffer src,
CharBuffer dst)
{
int mark = src.position();
int b1, b2, b3;
try {
while (src.hasRemaining()) {
b1 = src.get();
int inputSize = 1;
switch (b1) {
case ISO_SO:
shiftout = true;
break;
case ISO_SI:
shiftout = false;
break;
case ISO_ESC:
if (src.remaining() < minDesignatorLength)
return CoderResult.UNDERFLOW;
if (findDesigBuf(src)) {
inputSize = SOD.length + 1;
break;
}
if (src.remaining() < 1)
return CoderResult.UNDERFLOW;
b1 = src.get();
switch(b1) {
case ISO_SS2_7:
if (src.remaining() < 2)
return CoderResult.UNDERFLOW;
b2 = src.get();
b3 = src.get();
if (dst.remaining() < 1)
return CoderResult.OVERFLOW;
dst.put(decode((byte)b2,
(byte)b3,
SS2Flag));
inputSize = 4;
break;
case ISO_SS3_7:
if (src.remaining() < 2)
return CoderResult.UNDERFLOW;
b2 = src.get();
b3 = src.get();
if (dst.remaining() < 1)
return CoderResult.OVERFLOW;
dst.put(decode((byte)b2,
(byte)b3,
SS3Flag));
inputSize = 4;
break;
default:
return CoderResult.malformedForLength(2);
}
break;
default:
if (dst.remaining() < 1)
return CoderResult.OVERFLOW;
if (!shiftout) {
dst.put((char)(b1 & 0xff));
} else {
if (src.remaining() < 1)
return CoderResult.UNDERFLOW;
b2 = src.get() & 0xff;
dst.put(decode((byte)b1,
(byte)b2,
SOFlag));
inputSize = 2;
}
break;
}
mark += inputSize;
}
return CoderResult.UNDERFLOW;
} catch (Exception e) { e.printStackTrace(); return CoderResult.OVERFLOW; }
finally {
src.position(mark);
}
}
protected CoderResult decodeLoop(ByteBuffer src,
CharBuffer dst)
{
if (src.hasArray() && dst.hasArray())
return decodeArrayLoop(src, dst);
else
return decodeBufferLoop(src, dst);
}
}
private static class Encoder extends ISO2022.Encoder {
private static final byte[] SOD = new byte[] {'$', ')', 'C' };
public Encoder(Charset cs) {
super(cs);
SODesig = new byte[] {'$', ')', 'C' };
SODesig = SOD;
try {
ISOEncoder = Holder.ksc5601_cs.newEncoder();
} catch (Exception e) { }

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -98,13 +98,10 @@ public abstract class SimpleEUCEncoder
char[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
byte[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
int index;
int spaceNeeded;

View File

@ -54,7 +54,7 @@ public class StringDecode {
@State(Scope.Thread)
public static class WithCharset {
@Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6"})
@Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6", "ISO-2022-KR"})
private String charsetName;
private Charset charset;