8261744: Implement CharsetDecoder ASCII and latin-1 fast-paths

Reviewed-by: naoto, alanb
This commit is contained in:
Claes Redestad 2021-02-19 15:05:25 +00:00
parent efbaedeb81
commit 433096a45e
13 changed files with 453 additions and 263 deletions

View File

@ -1008,6 +1008,27 @@ public final class String
//////////////////////////////// utf8 ////////////////////////////////////
/**
* Decodes ASCII from the source byte array into the destination
* char array. Used via JavaLangAccess from UTF_8 and other charset
* decoders.
*
* @return the number of bytes successfully decoded, at most len
*/
/* package-private */
static int decodeASCII(byte[] sa, int sp, char[] da, int dp, int len) {
if (!StringCoding.hasNegatives(sa, sp, len)) {
StringLatin1.inflate(sa, sp, da, dp, len);
return len;
} else {
int start = sp;
int end = sp + len;
while (sp < end && sa[sp] >= 0) {
da[dp++] = (char) sa[sp++];
}
return sp - start;
}
}
private static boolean isNotContinuation(int b) {
return (b & 0xc0) != 0x80;

View File

@ -1981,6 +1981,11 @@ public final class System {
* Initialize the system class. Called after thread initialization.
*/
private static void initPhase1() {
// register the shared secrets - do this first, since SystemProps.initProperties
// might initialize CharsetDecoders that rely on it
setJavaLangAccess();
// VM might invoke JNU_NewStringPlatform() to set those encoding
// sensitive properties (user.home, user.name, boot.class.path, etc.)
// during "props" initialization.
@ -2026,8 +2031,6 @@ public final class System {
Thread current = Thread.currentThread();
current.getThreadGroup().add(current);
// register shared secrets
setJavaLangAccess();
// Subsystems that are invoked during initialization can invoke
// VM.isBooted() in order to avoid doing things that should
@ -2277,6 +2280,14 @@ public final class System {
return String.getBytesUTF8NoRepl(s);
}
public void inflateBytesToChars(byte[] src, int srcOff, char[] dst, int dstOff, int len) {
StringLatin1.inflate(src, srcOff, dst, dstOff, len);
}
public int decodeASCII(byte[] src, int srcOff, char[] dst, int dstOff, int len) {
return String.decodeASCII(src, srcOff, dst, dstOff, len);
}
public void setCause(Throwable t, Throwable cause) {
t.setCause(cause);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -328,6 +328,19 @@ public interface JavaLangAccess {
*/
byte[] getBytesUTF8NoRepl(String s);
/**
* Inflated copy from byte[] to char[], as defined by StringLatin1.inflate
*/
void inflateBytesToChars(byte[] src, int srcOff, char[] dst, int dstOff, int len);
/**
* Decodes ASCII from the source byte array into the destination
* char array.
*
* @return the number of bytes successfully decoded, at most len
*/
int decodeASCII(byte[] src, int srcOff, char[] dst, int dstOff, int len);
/**
* Set the cause of Throwable
* @param cause set t's cause to new value

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -88,7 +88,7 @@ public class SharedSecrets {
try {
Class.forName("java.util.ImmutableCollections$Access", true, null);
access = javaUtilCollectionAccess;
} catch (ClassNotFoundException e) {};
} catch (ClassNotFoundException e) {}
}
return access;
}
@ -126,7 +126,7 @@ public class SharedSecrets {
try {
Class.forName("java.lang.invoke.MethodHandleImpl", true, null);
access = javaLangInvokeAccess;
} catch (ClassNotFoundException e) {};
} catch (ClassNotFoundException e) {}
}
return access;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,6 +25,9 @@
package sun.nio.cs;
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
@ -75,6 +78,9 @@ class CESU_8 extends Unicode
private static class Decoder extends CharsetDecoder
implements ArrayDecoder {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
@ -96,27 +102,6 @@ class CESU_8 extends Unicode
(b2 & 0xc0) != 0x80;
}
// [F0] [90..BF] [80..BF] [80..BF]
// [F1..F3] [80..BF] [80..BF] [80..BF]
// [F4] [80..8F] [80..BF] [80..BF]
// only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...]
// will be checked by Character.isSupplementaryCodePoint(uc)
private static boolean isMalformed4(int b2, int b3, int b4) {
return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
(b4 & 0xc0) != 0x80;
}
// only used when there is less than 4 bytes left in src buffer
private static boolean isMalformed4_2(int b1, int b2) {
return (b1 == 0xf0 && b2 == 0x90) ||
(b2 & 0xc0) != 0x80;
}
private static boolean isMalformed4_3(int b3) {
return (b3 & 0xc0) != 0x80;
}
private static CoderResult malformedN(ByteBuffer src, int nb) {
switch (nb) {
case 1:
@ -202,17 +187,19 @@ class CESU_8 extends Unicode
{
// This method is optimized for ASCII input.
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
int soff = src.arrayOffset();
int sp = soff + src.position();
int sl = soff + src.limit();
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
int dlASCII = dp + Math.min(sl - sp, dl - dp);
int doff = dst.arrayOffset();
int dp = doff + dst.position();
int dl = doff + dst.limit();
int n = JLA.decodeASCII(sa, sp, da, dp, Math.min(sl - sp, dl - dp));
sp += n;
dp += n;
// ASCII only loop
while (dp < dlASCII && sa[sp] >= 0)
da[dp++] = (char) sa[sp++];
while (sp < sl) {
int b1 = sa[sp];
if (b1 >= 0) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -32,6 +32,9 @@ import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.util.Arrays;
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
import sun.nio.cs.Surrogate;
import sun.nio.cs.ArrayDecoder;
import sun.nio.cs.ArrayEncoder;
@ -111,6 +114,8 @@ public class DoubleByte {
public static class Decoder extends CharsetDecoder
implements DelegatableDecoder, ArrayDecoder
{
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
final char[][] b2c;
final char[] b2cSB;
final int b2Min;
@ -154,14 +159,21 @@ public class DoubleByte {
protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
int soff = src.arrayOffset();
int sp = soff + src.position();
int sl = soff + src.limit();
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
int doff = dst.arrayOffset();
int dp = doff + dst.position();
int dl = doff + dst.limit();
try {
if (isASCIICompatible) {
int n = JLA.decodeASCII(sa, sp, da, dp, Math.min(dl - dp, sl - sp));
dp += n;
sp += n;
}
while (sp < sl && dp < dl) {
// inline the decodeSingle/Double() for better performance
int inSize = 1;
@ -183,8 +195,8 @@ public class DoubleByte {
return (sp >= sl) ? CoderResult.UNDERFLOW
: CoderResult.OVERFLOW;
} finally {
src.position(sp - src.arrayOffset());
dst.position(dp - dst.arrayOffset());
src.position(sp - soff);
dst.position(dp - doff);
}
}
@ -342,7 +354,7 @@ public class DoubleByte {
else
currentState = SBCS;
} else {
char c = UNMAPPABLE_DECODING;
char c;
if (currentState == SBCS) {
c = b2cSB[b1];
if (c == UNMAPPABLE_DECODING)

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -33,6 +33,8 @@ import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.util.Objects;
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
import jdk.internal.vm.annotation.IntrinsicCandidate;
public class ISO_8859_1
@ -64,6 +66,8 @@ public class ISO_8859_1
private static class Decoder extends CharsetDecoder {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
@ -72,29 +76,25 @@ public class ISO_8859_1
CharBuffer dst)
{
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
int soff = src.arrayOffset();
int sp = soff + src.position();
int sl = soff + src.limit();
try {
while (sp < sl) {
byte b = sa[sp];
if (dp >= dl)
char[] da = dst.array();
int doff = dst.arrayOffset();
int dp = doff + dst.position();
int dl = doff + dst.limit();
int decodeLen = Math.min(sl - sp, dl - dp);
JLA.inflateBytesToChars(sa, sp, da, dp, decodeLen);
sp += decodeLen;
dp += decodeLen;
src.position(sp - soff);
dst.position(dp - doff);
if (sl - sp > dl - dp) {
return CoderResult.OVERFLOW;
da[dp++] = (char)(b & 0xff);
sp++;
}
return CoderResult.UNDERFLOW;
} finally {
src.position(sp - src.arrayOffset());
dst.position(dp - dst.arrayOffset());
}
}
private CoderResult decodeBufferLoop(ByteBuffer src,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,6 +25,9 @@
package sun.nio.cs;
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
@ -48,6 +51,9 @@ public class SingleByte
public static final class Decoder extends CharsetDecoder
implements ArrayDecoder {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private final char[] b2c;
private final boolean isASCIICompatible;
private final boolean isLatin1Decodable;
@ -88,6 +94,11 @@ public class SingleByte
cr = CoderResult.OVERFLOW;
}
if (isASCIICompatible) {
int n = JLA.decodeASCII(sa, sp, da, dp, Math.min(dl - dp, sl - sp));
sp += n;
dp += n;
}
while (sp < sl) {
char c = decode(sa[sp]);
if (c == UNMAPPABLE_DECODING) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -28,7 +28,6 @@
package sun.nio.cs;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
@ -42,9 +41,9 @@ import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
public class StreamDecoder extends Reader
{
public class StreamDecoder extends Reader {
private static final int MIN_BYTE_BUFFER_SIZE = 32;
private static final int DEFAULT_BYTE_BUFFER_SIZE = 8192;
@ -72,14 +71,15 @@ public class StreamDecoder extends Reader
throws UnsupportedEncodingException
{
String csn = charsetName;
if (csn == null)
if (csn == null) {
csn = Charset.defaultCharset().name();
}
try {
if (Charset.isSupported(csn))
return new StreamDecoder(in, lock, Charset.forName(csn));
} catch (IllegalCharsetNameException x) { }
} catch (IllegalCharsetNameException | UnsupportedCharsetException x) {
throw new UnsupportedEncodingException (csn);
}
}
public static StreamDecoder forInputStreamReader(InputStream in,
Object lock,
@ -133,7 +133,7 @@ public class StreamDecoder extends Reader
}
// Convert more bytes
char cb[] = new char[2];
char[] cb = new char[2];
int n = read(cb, 0, 2);
switch (n) {
case -1:
@ -151,7 +151,7 @@ public class StreamDecoder extends Reader
}
}
public int read(char cbuf[], int offset, int length) throws IOException {
public int read(char[] cbuf, int offset, int length) throws IOException {
int off = offset;
int len = length;
synchronized (lock) {
@ -215,30 +215,13 @@ public class StreamDecoder extends Reader
// -- Charset-based stream decoder impl --
// In the early stages of the build we haven't yet built the NIO native
// code, so guard against that by catching the first UnsatisfiedLinkError
// and setting this flag so that later attempts fail quickly.
//
private static volatile boolean channelsAvailable = true;
private static FileChannel getChannel(FileInputStream in) {
if (!channelsAvailable)
return null;
try {
return in.getChannel();
} catch (UnsatisfiedLinkError x) {
channelsAvailable = false;
return null;
}
}
private Charset cs;
private CharsetDecoder decoder;
private ByteBuffer bb;
private final Charset cs;
private final CharsetDecoder decoder;
private final ByteBuffer bb;
// Exactly one of these is non-null
private InputStream in;
private ReadableByteChannel ch;
private final InputStream in;
private final ReadableByteChannel ch;
StreamDecoder(InputStream in, Object lock, Charset cs) {
this(in, lock,
@ -251,18 +234,9 @@ public class StreamDecoder extends Reader
super(lock);
this.cs = dec.charset();
this.decoder = dec;
// This path disabled until direct buffers are faster
if (false && in instanceof FileInputStream) {
ch = getChannel((FileInputStream)in);
if (ch != null)
bb = ByteBuffer.allocateDirect(DEFAULT_BYTE_BUFFER_SIZE);
}
if (ch == null) {
this.in = in;
this.ch = null;
bb = ByteBuffer.allocate(DEFAULT_BYTE_BUFFER_SIZE);
}
bb.flip(); // So that bb is initially empty
}
@ -293,7 +267,6 @@ public class StreamDecoder extends Reader
int pos = bb.position();
assert (pos <= lim);
int rem = (pos <= lim ? lim - pos : 0);
assert rem > 0;
int n = in.read(bb.array(), bb.arrayOffset() + pos, rem);
if (n < 0)
return n;
@ -322,9 +295,10 @@ public class StreamDecoder extends Reader
assert (end - off > 1);
CharBuffer cb = CharBuffer.wrap(cbuf, off, end - off);
if (cb.position() != 0)
if (cb.position() != 0) {
// Ensure that cb[0] == cbuf[off]
cb = cb.slice();
}
boolean eof = false;
for (;;) {
@ -358,8 +332,9 @@ public class StreamDecoder extends Reader
}
if (cb.position() == 0) {
if (eof)
if (eof) {
return -1;
}
assert false;
}
return cb.position();
@ -385,10 +360,10 @@ public class StreamDecoder extends Reader
}
void implClose() throws IOException {
if (ch != null)
if (ch != null) {
ch.close();
else
} else {
in.close();
}
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,7 +25,6 @@
package sun.nio.cs;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
@ -38,6 +37,7 @@ import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
public class StreamEncoder extends Writer
{
@ -58,14 +58,15 @@ public class StreamEncoder extends Writer
throws UnsupportedEncodingException
{
String csn = charsetName;
if (csn == null)
if (csn == null) {
csn = Charset.defaultCharset().name();
}
try {
if (Charset.isSupported(csn))
return new StreamEncoder(out, lock, Charset.forName(csn));
} catch (IllegalCharsetNameException x) { }
} catch (IllegalCharsetNameException | UnsupportedCharsetException x) {
throw new UnsupportedEncodingException (csn);
}
}
public static StreamEncoder forOutputStreamWriter(OutputStream out,
Object lock,
@ -114,12 +115,12 @@ public class StreamEncoder extends Writer
}
public void write(int c) throws IOException {
char cbuf[] = new char[1];
char[] cbuf = new char[1];
cbuf[0] = (char) c;
write(cbuf, 0, 1);
}
public void write(char cbuf[], int off, int len) throws IOException {
public void write(char[] cbuf, int off, int len) throws IOException {
synchronized (lock) {
ensureOpen();
if ((off < 0) || (off > cbuf.length) || (len < 0) ||
@ -136,7 +137,7 @@ public class StreamEncoder extends Writer
/* Check the len before creating a char buffer */
if (len < 0)
throw new IndexOutOfBoundsException();
char cbuf[] = new char[len];
char[] cbuf = new char[len];
str.getChars(off, off + len, cbuf, 0);
write(cbuf, 0, len);
}
@ -179,13 +180,13 @@ public class StreamEncoder extends Writer
// -- Charset-based stream encoder impl --
private Charset cs;
private CharsetEncoder encoder;
private ByteBuffer bb;
private final Charset cs;
private final CharsetEncoder encoder;
private final ByteBuffer bb;
// Exactly one of these is non-null
private final OutputStream out;
private WritableByteChannel ch;
private final WritableByteChannel ch;
// Leftover first char in a surrogate pair
private boolean haveLeftoverChar = false;
@ -205,16 +206,7 @@ public class StreamEncoder extends Writer
this.ch = null;
this.cs = enc.charset();
this.encoder = enc;
// This path disabled until direct buffers are faster
if (false && out instanceof FileOutputStream) {
ch = ((FileOutputStream)out).getChannel();
if (ch != null)
bb = ByteBuffer.allocateDirect(DEFAULT_BYTE_BUFFER_SIZE);
}
if (ch == null) {
bb = ByteBuffer.allocate(DEFAULT_BYTE_BUFFER_SIZE);
}
this.bb = ByteBuffer.allocate(DEFAULT_BYTE_BUFFER_SIZE);
}
private StreamEncoder(WritableByteChannel ch, CharsetEncoder enc, int mbc) {
@ -236,8 +228,8 @@ public class StreamEncoder extends Writer
if (rem > 0) {
if (ch != null) {
if (ch.write(bb) != rem)
assert false : rem;
int wc = ch.write(bb);
assert wc == rem : rem;
} else {
out.write(bb.array(), bb.arrayOffset() + pos, rem);
}
@ -283,7 +275,7 @@ public class StreamEncoder extends Writer
haveLeftoverChar = false;
}
void implWrite(char cbuf[], int off, int len)
void implWrite(char[] cbuf, int off, int len)
throws IOException
{
CharBuffer cb = CharBuffer.wrap(cbuf, off, len);
@ -317,15 +309,17 @@ public class StreamEncoder extends Writer
}
void implFlushBuffer() throws IOException {
if (bb.position() > 0)
if (bb.position() > 0) {
writeBytes();
}
}
void implFlush() throws IOException {
implFlushBuffer();
if (out != null)
if (out != null) {
out.flush();
}
}
void implClose() throws IOException {
flushLeftoverChar(null, true);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,6 +25,9 @@
package sun.nio.cs;
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
@ -60,6 +63,8 @@ public class US_ASCII
private static class Decoder extends CharsetDecoder {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
@ -68,33 +73,28 @@ public class US_ASCII
CharBuffer dst)
{
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
int soff = src.arrayOffset();
int sp = soff + src.position();
int sl = soff + src.limit();
try {
while (sp < sl) {
byte b = sa[sp];
if (b >= 0) {
if (dp >= dl)
char[] da = dst.array();
int doff = dst.arrayOffset();
int dp = doff + dst.position();
int dl = doff + dst.limit();
// ASCII only loop
int n = JLA.decodeASCII(sa, sp, da, dp, Math.min(sl - sp, dl - dp));
sp += n;
dp += n;
src.position(sp - soff);
dst.position(dp - doff);
if (sp < sl) {
if (dp >= dl) {
return CoderResult.OVERFLOW;
da[dp++] = (char)b;
sp++;
continue;
}
return CoderResult.malformedForLength(1);
}
return CoderResult.UNDERFLOW;
} finally {
src.position(sp - src.arrayOffset());
dst.position(dp - dst.arrayOffset());
}
}
private CoderResult decodeBufferLoop(ByteBuffer src,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,6 +25,9 @@
package sun.nio.cs;
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
@ -82,6 +85,8 @@ public final class UTF_8 extends Unicode {
private static class Decoder extends CharsetDecoder {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
@ -129,15 +134,6 @@ public final class UTF_8 extends Unicode {
return (b3 & 0xc0) != 0x80;
}
private static CoderResult lookupN(ByteBuffer src, int n)
{
for (int i = 1; i < n; i++) {
if (isNotContinuation(src.get()))
return CoderResult.malformedForLength(i);
}
return CoderResult.malformedForLength(n);
}
private static CoderResult malformedN(ByteBuffer src, int nb) {
switch (nb) {
case 1:
@ -223,17 +219,19 @@ public final class UTF_8 extends Unicode {
{
// This method is optimized for ASCII input.
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
int soff = src.arrayOffset();
int sp = soff + src.position();
int sl = soff + src.limit();
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
int dlASCII = dp + Math.min(sl - sp, dl - dp);
int doff = dst.arrayOffset();
int dp = doff + dst.position();
int dl = doff + dst.limit();
int n = JLA.decodeASCII(sa, sp, da, dp, Math.min(sl - sp, dl - dp));
sp += n;
dp += n;
// ASCII only loop
while (dp < dlASCII && sa[sp] >= 0)
da[dp++] = (char) sa[sp++];
while (sp < sl) {
int b1 = sa[sp];
if (b1 >= 0) {
@ -415,14 +413,6 @@ public final class UTF_8 extends Unicode {
else
return decodeBufferLoop(src, dst);
}
private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp)
{
if (bb == null)
bb = ByteBuffer.wrap(ba);
bb.position(sp);
return bb;
}
}
private static final class Encoder extends CharsetEncoder {

View File

@ -0,0 +1,176 @@
/*
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.java.io;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.concurrent.TimeUnit;
/**
* Tests the overheads of reading encoded byte arrays via StreamDecoder
*/
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
@Warmup(time=2, iterations=5)
@Measurement(time=3, iterations=5)
@Fork(value=2, jvmArgs="-Xmx1g")
public class ByteStreamDecoder {
@Param({"US-ASCII", "ISO-8859-1", "UTF-8", "ISO-8859-6", "MS932"})
private String charsetName;
@Param({"256", "4096", "25000"})
private int length;
private byte[] bytes;
private byte[] nonASCIIBytesEnd;
private byte[] nonASCIIBytesStart;
private byte[] nonASCIIBytesEveryOther;
private char[] chars;
private Charset cs;
@Setup
public void setup() throws IOException {
var s = """
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed non
magna augue. Sed tristique ante id maximus interdum. Suspendisse
potenti. Aliquam molestie metus vitae magna gravida egestas.
Phasellus eleifend tortor sit amet neque euismod, vitae luctus
ante viverra. Sed quis justo ultrices, eleifend dui sed, egestas
lorem. Mauris ipsum ex, interdum eu turpis sed, fermentum efficitur
lorem. Sed vel imperdiet libero, eget ullamcorper sem. Praesent
gravida arcu quis ipsum viverra tristique. Quisque maximus
elit nec nisi vulputate tempor. Integer aliquet tortor vel
vehicula efficitur. Sed neque felis, ultricies eu leo ultricies,
egestas placerat dolor. Etiam iaculis magna quis lacinia
tincidunt. Donec in tellus volutpat, semper nunc ornare,
tempus erat. Donec volutpat mauris in arcu mattis sollicitudin.
Morbi vestibulum ipsum sed erat porta, mollis commodo nisi
gravida.
""";
int n = length / s.length();
String full = "";
if (n > 0) {
full = s.repeat(n);
}
n = length % s.length();
if (n > 0) {
full += s.substring(0, n);
}
cs = Charset.forName(charsetName);
bytes = full.getBytes(cs);
nonASCIIBytesEnd = (full + "\u00e5").getBytes(cs);
nonASCIIBytesStart = ("\u00e5" + full).getBytes(cs);
// string hostile to ASCII fast-path optimizations: every other char is ASCII
StringBuilder sb = new StringBuilder();
for (int i = 0; i < full.length(); i++) {
sb.append(i % 2 == 0 ? full.charAt(i) : '\u00e5');
}
nonASCIIBytesEveryOther = sb.toString().getBytes(cs);
chars = new char[full.length() + 2];
try {
if (!readStringDirect().equals(readStringReader())) {
System.out.println("direct: " + readStringDirect());
System.out.println("reader: " + readStringReader());
throw new RuntimeException("Unexpectedly different");
}
if (!readStringDirect_NonASCIIEnd().equals(readStringReader_NonASCIIEnd())) {
throw new RuntimeException("Unexpectedly different");
}
if (!readStringDirect_NonASCIIStart().equals(readStringReader_NonASCIIStart())) {
throw new RuntimeException("Unexpectedly different");
}
if (!readStringDirect_NonASCIIEveryOther().equals(readStringReader_NonASCIIEveryOther())) {
throw new RuntimeException("Unexpectedly different");
}
} catch (Exception e) {
e.printStackTrace();
}
}
@Benchmark
public String readStringReader() throws Exception {
int len = new InputStreamReader(new ByteArrayInputStream(bytes), cs).read(chars);
return new String(chars, 0, len);
}
@Benchmark
public String readStringReader_NonASCIIEnd() throws Exception {
int len = new InputStreamReader(new ByteArrayInputStream(nonASCIIBytesEnd), cs).read(chars);
return new String(chars, 0, len);
}
@Benchmark
public String readStringReader_NonASCIIStart() throws Exception {
int len = new InputStreamReader(new ByteArrayInputStream(nonASCIIBytesStart), cs).read(chars);
return new String(chars, 0, len);
}
@Benchmark
public String readStringReader_NonASCIIEveryOther() throws Exception {
int len = new InputStreamReader(new ByteArrayInputStream(nonASCIIBytesEveryOther), cs).read(chars);
return new String(chars, 0, len);
}
@Benchmark
public String readStringDirect() throws Exception {
return new String(bytes, cs);
}
@Benchmark
public String readStringDirect_NonASCIIEnd() throws Exception {
return new String(nonASCIIBytesEnd, cs);
}
@Benchmark
public String readStringDirect_NonASCIIStart() throws Exception {
return new String(nonASCIIBytesStart, cs);
}
@Benchmark
public String readStringDirect_NonASCIIEveryOther() throws Exception {
return new String(nonASCIIBytesEveryOther, cs);
}
}