8261744: Implement CharsetDecoder ASCII and latin-1 fast-paths

Reviewed-by: naoto, alanb
This commit is contained in:
Claes Redestad 2021-02-19 15:05:25 +00:00
parent efbaedeb81
commit 433096a45e
13 changed files with 453 additions and 263 deletions

View File

@ -1008,6 +1008,27 @@ public final class String
//////////////////////////////// utf8 //////////////////////////////////// //////////////////////////////// utf8 ////////////////////////////////////
/**
* Decodes ASCII from the source byte array into the destination
* char array. Used via JavaLangAccess from UTF_8 and other charset
* decoders.
*
* @return the number of bytes successfully decoded, at most len
*/
/* package-private */
static int decodeASCII(byte[] sa, int sp, char[] da, int dp, int len) {
if (!StringCoding.hasNegatives(sa, sp, len)) {
StringLatin1.inflate(sa, sp, da, dp, len);
return len;
} else {
int start = sp;
int end = sp + len;
while (sp < end && sa[sp] >= 0) {
da[dp++] = (char) sa[sp++];
}
return sp - start;
}
}
private static boolean isNotContinuation(int b) { private static boolean isNotContinuation(int b) {
return (b & 0xc0) != 0x80; return (b & 0xc0) != 0x80;

View File

@ -1981,6 +1981,11 @@ public final class System {
* Initialize the system class. Called after thread initialization. * Initialize the system class. Called after thread initialization.
*/ */
private static void initPhase1() { private static void initPhase1() {
// register the shared secrets - do this first, since SystemProps.initProperties
// might initialize CharsetDecoders that rely on it
setJavaLangAccess();
// VM might invoke JNU_NewStringPlatform() to set those encoding // VM might invoke JNU_NewStringPlatform() to set those encoding
// sensitive properties (user.home, user.name, boot.class.path, etc.) // sensitive properties (user.home, user.name, boot.class.path, etc.)
// during "props" initialization. // during "props" initialization.
@ -2026,8 +2031,6 @@ public final class System {
Thread current = Thread.currentThread(); Thread current = Thread.currentThread();
current.getThreadGroup().add(current); current.getThreadGroup().add(current);
// register shared secrets
setJavaLangAccess();
// Subsystems that are invoked during initialization can invoke // Subsystems that are invoked during initialization can invoke
// VM.isBooted() in order to avoid doing things that should // VM.isBooted() in order to avoid doing things that should
@ -2277,6 +2280,14 @@ public final class System {
return String.getBytesUTF8NoRepl(s); return String.getBytesUTF8NoRepl(s);
} }
public void inflateBytesToChars(byte[] src, int srcOff, char[] dst, int dstOff, int len) {
StringLatin1.inflate(src, srcOff, dst, dstOff, len);
}
public int decodeASCII(byte[] src, int srcOff, char[] dst, int dstOff, int len) {
return String.decodeASCII(src, srcOff, dst, dstOff, len);
}
public void setCause(Throwable t, Throwable cause) { public void setCause(Throwable t, Throwable cause) {
t.setCause(cause); t.setCause(cause);
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -328,6 +328,19 @@ public interface JavaLangAccess {
*/ */
byte[] getBytesUTF8NoRepl(String s); byte[] getBytesUTF8NoRepl(String s);
/**
* Inflated copy from byte[] to char[], as defined by StringLatin1.inflate
*/
void inflateBytesToChars(byte[] src, int srcOff, char[] dst, int dstOff, int len);
/**
* Decodes ASCII from the source byte array into the destination
* char array.
*
* @return the number of bytes successfully decoded, at most len
*/
int decodeASCII(byte[] src, int srcOff, char[] dst, int dstOff, int len);
/** /**
* Set the cause of Throwable * Set the cause of Throwable
* @param cause set t's cause to new value * @param cause set t's cause to new value

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -88,7 +88,7 @@ public class SharedSecrets {
try { try {
Class.forName("java.util.ImmutableCollections$Access", true, null); Class.forName("java.util.ImmutableCollections$Access", true, null);
access = javaUtilCollectionAccess; access = javaUtilCollectionAccess;
} catch (ClassNotFoundException e) {}; } catch (ClassNotFoundException e) {}
} }
return access; return access;
} }
@ -126,7 +126,7 @@ public class SharedSecrets {
try { try {
Class.forName("java.lang.invoke.MethodHandleImpl", true, null); Class.forName("java.lang.invoke.MethodHandleImpl", true, null);
access = javaLangInvokeAccess; access = javaLangInvokeAccess;
} catch (ClassNotFoundException e) {}; } catch (ClassNotFoundException e) {}
} }
return access; return access;
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -25,6 +25,9 @@
package sun.nio.cs; package sun.nio.cs;
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
import java.nio.Buffer; import java.nio.Buffer;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.CharBuffer; import java.nio.CharBuffer;
@ -75,6 +78,9 @@ class CESU_8 extends Unicode
private static class Decoder extends CharsetDecoder private static class Decoder extends CharsetDecoder
implements ArrayDecoder { implements ArrayDecoder {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private Decoder(Charset cs) { private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f); super(cs, 1.0f, 1.0f);
} }
@ -96,27 +102,6 @@ class CESU_8 extends Unicode
(b2 & 0xc0) != 0x80; (b2 & 0xc0) != 0x80;
} }
// [F0] [90..BF] [80..BF] [80..BF]
// [F1..F3] [80..BF] [80..BF] [80..BF]
// [F4] [80..8F] [80..BF] [80..BF]
// only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...]
// will be checked by Character.isSupplementaryCodePoint(uc)
private static boolean isMalformed4(int b2, int b3, int b4) {
return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
(b4 & 0xc0) != 0x80;
}
// only used when there is less than 4 bytes left in src buffer
private static boolean isMalformed4_2(int b1, int b2) {
return (b1 == 0xf0 && b2 == 0x90) ||
(b2 & 0xc0) != 0x80;
}
private static boolean isMalformed4_3(int b3) {
return (b3 & 0xc0) != 0x80;
}
private static CoderResult malformedN(ByteBuffer src, int nb) { private static CoderResult malformedN(ByteBuffer src, int nb) {
switch (nb) { switch (nb) {
case 1: case 1:
@ -202,17 +187,19 @@ class CESU_8 extends Unicode
{ {
// This method is optimized for ASCII input. // This method is optimized for ASCII input.
byte[] sa = src.array(); byte[] sa = src.array();
int sp = src.arrayOffset() + src.position(); int soff = src.arrayOffset();
int sl = src.arrayOffset() + src.limit(); int sp = soff + src.position();
int sl = soff + src.limit();
char[] da = dst.array(); char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position(); int doff = dst.arrayOffset();
int dl = dst.arrayOffset() + dst.limit(); int dp = doff + dst.position();
int dlASCII = dp + Math.min(sl - sp, dl - dp); int dl = doff + dst.limit();
int n = JLA.decodeASCII(sa, sp, da, dp, Math.min(sl - sp, dl - dp));
sp += n;
dp += n;
// ASCII only loop
while (dp < dlASCII && sa[sp] >= 0)
da[dp++] = (char) sa[sp++];
while (sp < sl) { while (sp < sl) {
int b1 = sa[sp]; int b1 = sa[sp];
if (b1 >= 0) { if (b1 >= 0) {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -32,6 +32,9 @@ import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder; import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult; import java.nio.charset.CoderResult;
import java.util.Arrays; import java.util.Arrays;
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
import sun.nio.cs.Surrogate; import sun.nio.cs.Surrogate;
import sun.nio.cs.ArrayDecoder; import sun.nio.cs.ArrayDecoder;
import sun.nio.cs.ArrayEncoder; import sun.nio.cs.ArrayEncoder;
@ -111,6 +114,8 @@ public class DoubleByte {
public static class Decoder extends CharsetDecoder public static class Decoder extends CharsetDecoder
implements DelegatableDecoder, ArrayDecoder implements DelegatableDecoder, ArrayDecoder
{ {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
final char[][] b2c; final char[][] b2c;
final char[] b2cSB; final char[] b2cSB;
final int b2Min; final int b2Min;
@ -154,14 +159,21 @@ public class DoubleByte {
protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
byte[] sa = src.array(); byte[] sa = src.array();
int sp = src.arrayOffset() + src.position(); int soff = src.arrayOffset();
int sl = src.arrayOffset() + src.limit(); int sp = soff + src.position();
int sl = soff + src.limit();
char[] da = dst.array(); char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position(); int doff = dst.arrayOffset();
int dl = dst.arrayOffset() + dst.limit(); int dp = doff + dst.position();
int dl = doff + dst.limit();
try { try {
if (isASCIICompatible) {
int n = JLA.decodeASCII(sa, sp, da, dp, Math.min(dl - dp, sl - sp));
dp += n;
sp += n;
}
while (sp < sl && dp < dl) { while (sp < sl && dp < dl) {
// inline the decodeSingle/Double() for better performance // inline the decodeSingle/Double() for better performance
int inSize = 1; int inSize = 1;
@ -183,8 +195,8 @@ public class DoubleByte {
return (sp >= sl) ? CoderResult.UNDERFLOW return (sp >= sl) ? CoderResult.UNDERFLOW
: CoderResult.OVERFLOW; : CoderResult.OVERFLOW;
} finally { } finally {
src.position(sp - src.arrayOffset()); src.position(sp - soff);
dst.position(dp - dst.arrayOffset()); dst.position(dp - doff);
} }
} }
@ -342,7 +354,7 @@ public class DoubleByte {
else else
currentState = SBCS; currentState = SBCS;
} else { } else {
char c = UNMAPPABLE_DECODING; char c;
if (currentState == SBCS) { if (currentState == SBCS) {
c = b2cSB[b1]; c = b2cSB[b1];
if (c == UNMAPPABLE_DECODING) if (c == UNMAPPABLE_DECODING)

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -33,6 +33,8 @@ import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult; import java.nio.charset.CoderResult;
import java.util.Objects; import java.util.Objects;
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
import jdk.internal.vm.annotation.IntrinsicCandidate; import jdk.internal.vm.annotation.IntrinsicCandidate;
public class ISO_8859_1 public class ISO_8859_1
@ -64,6 +66,8 @@ public class ISO_8859_1
private static class Decoder extends CharsetDecoder { private static class Decoder extends CharsetDecoder {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private Decoder(Charset cs) { private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f); super(cs, 1.0f, 1.0f);
} }
@ -72,29 +76,25 @@ public class ISO_8859_1
CharBuffer dst) CharBuffer dst)
{ {
byte[] sa = src.array(); byte[] sa = src.array();
int sp = src.arrayOffset() + src.position(); int soff = src.arrayOffset();
int sl = src.arrayOffset() + src.limit(); int sp = soff + src.position();
assert (sp <= sl); int sl = soff + src.limit();
sp = (sp <= sl ? sp : sl);
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
try { char[] da = dst.array();
while (sp < sl) { int doff = dst.arrayOffset();
byte b = sa[sp]; int dp = doff + dst.position();
if (dp >= dl) int dl = doff + dst.limit();
return CoderResult.OVERFLOW;
da[dp++] = (char)(b & 0xff); int decodeLen = Math.min(sl - sp, dl - dp);
sp++; JLA.inflateBytesToChars(sa, sp, da, dp, decodeLen);
} sp += decodeLen;
return CoderResult.UNDERFLOW; dp += decodeLen;
} finally { src.position(sp - soff);
src.position(sp - src.arrayOffset()); dst.position(dp - doff);
dst.position(dp - dst.arrayOffset()); if (sl - sp > dl - dp) {
return CoderResult.OVERFLOW;
} }
return CoderResult.UNDERFLOW;
} }
private CoderResult decodeBufferLoop(ByteBuffer src, private CoderResult decodeBufferLoop(ByteBuffer src,

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -25,6 +25,9 @@
package sun.nio.cs; package sun.nio.cs;
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
import java.nio.Buffer; import java.nio.Buffer;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.CharBuffer; import java.nio.CharBuffer;
@ -48,6 +51,9 @@ public class SingleByte
public static final class Decoder extends CharsetDecoder public static final class Decoder extends CharsetDecoder
implements ArrayDecoder { implements ArrayDecoder {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private final char[] b2c; private final char[] b2c;
private final boolean isASCIICompatible; private final boolean isASCIICompatible;
private final boolean isLatin1Decodable; private final boolean isLatin1Decodable;
@ -88,6 +94,11 @@ public class SingleByte
cr = CoderResult.OVERFLOW; cr = CoderResult.OVERFLOW;
} }
if (isASCIICompatible) {
int n = JLA.decodeASCII(sa, sp, da, dp, Math.min(dl - dp, sl - sp));
sp += n;
dp += n;
}
while (sp < sl) { while (sp < sl) {
char c = decode(sa[sp]); char c = decode(sa[sp]);
if (c == UNMAPPABLE_DECODING) { if (c == UNMAPPABLE_DECODING) {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2001, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -28,7 +28,6 @@
package sun.nio.cs; package sun.nio.cs;
import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
@ -42,9 +41,9 @@ import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult; import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction; import java.nio.charset.CodingErrorAction;
import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
public class StreamDecoder extends Reader public class StreamDecoder extends Reader {
{
private static final int MIN_BYTE_BUFFER_SIZE = 32; private static final int MIN_BYTE_BUFFER_SIZE = 32;
private static final int DEFAULT_BYTE_BUFFER_SIZE = 8192; private static final int DEFAULT_BYTE_BUFFER_SIZE = 8192;
@ -72,13 +71,14 @@ public class StreamDecoder extends Reader
throws UnsupportedEncodingException throws UnsupportedEncodingException
{ {
String csn = charsetName; String csn = charsetName;
if (csn == null) if (csn == null) {
csn = Charset.defaultCharset().name(); csn = Charset.defaultCharset().name();
}
try { try {
if (Charset.isSupported(csn)) return new StreamDecoder(in, lock, Charset.forName(csn));
return new StreamDecoder(in, lock, Charset.forName(csn)); } catch (IllegalCharsetNameException | UnsupportedCharsetException x) {
} catch (IllegalCharsetNameException x) { } throw new UnsupportedEncodingException (csn);
throw new UnsupportedEncodingException (csn); }
} }
public static StreamDecoder forInputStreamReader(InputStream in, public static StreamDecoder forInputStreamReader(InputStream in,
@ -133,7 +133,7 @@ public class StreamDecoder extends Reader
} }
// Convert more bytes // Convert more bytes
char cb[] = new char[2]; char[] cb = new char[2];
int n = read(cb, 0, 2); int n = read(cb, 0, 2);
switch (n) { switch (n) {
case -1: case -1:
@ -151,7 +151,7 @@ public class StreamDecoder extends Reader
} }
} }
public int read(char cbuf[], int offset, int length) throws IOException { public int read(char[] cbuf, int offset, int length) throws IOException {
int off = offset; int off = offset;
int len = length; int len = length;
synchronized (lock) { synchronized (lock) {
@ -215,54 +215,28 @@ public class StreamDecoder extends Reader
// -- Charset-based stream decoder impl -- // -- Charset-based stream decoder impl --
// In the early stages of the build we haven't yet built the NIO native private final Charset cs;
// code, so guard against that by catching the first UnsatisfiedLinkError private final CharsetDecoder decoder;
// and setting this flag so that later attempts fail quickly. private final ByteBuffer bb;
//
private static volatile boolean channelsAvailable = true;
private static FileChannel getChannel(FileInputStream in) {
if (!channelsAvailable)
return null;
try {
return in.getChannel();
} catch (UnsatisfiedLinkError x) {
channelsAvailable = false;
return null;
}
}
private Charset cs;
private CharsetDecoder decoder;
private ByteBuffer bb;
// Exactly one of these is non-null // Exactly one of these is non-null
private InputStream in; private final InputStream in;
private ReadableByteChannel ch; private final ReadableByteChannel ch;
StreamDecoder(InputStream in, Object lock, Charset cs) { StreamDecoder(InputStream in, Object lock, Charset cs) {
this(in, lock, this(in, lock,
cs.newDecoder() cs.newDecoder()
.onMalformedInput(CodingErrorAction.REPLACE) .onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE)); .onUnmappableCharacter(CodingErrorAction.REPLACE));
} }
StreamDecoder(InputStream in, Object lock, CharsetDecoder dec) { StreamDecoder(InputStream in, Object lock, CharsetDecoder dec) {
super(lock); super(lock);
this.cs = dec.charset(); this.cs = dec.charset();
this.decoder = dec; this.decoder = dec;
// This path disabled until direct buffers are faster
if (false && in instanceof FileInputStream) {
ch = getChannel((FileInputStream)in);
if (ch != null)
bb = ByteBuffer.allocateDirect(DEFAULT_BYTE_BUFFER_SIZE);
}
if (ch == null) {
this.in = in; this.in = in;
this.ch = null; this.ch = null;
bb = ByteBuffer.allocate(DEFAULT_BYTE_BUFFER_SIZE); bb = ByteBuffer.allocate(DEFAULT_BYTE_BUFFER_SIZE);
}
bb.flip(); // So that bb is initially empty bb.flip(); // So that bb is initially empty
} }
@ -282,35 +256,34 @@ public class StreamDecoder extends Reader
private int readBytes() throws IOException { private int readBytes() throws IOException {
bb.compact(); bb.compact();
try { try {
if (ch != null) { if (ch != null) {
// Read from the channel // Read from the channel
int n = ch.read(bb); int n = ch.read(bb);
if (n < 0) if (n < 0)
return n; return n;
} else { } else {
// Read from the input stream, and then update the buffer // Read from the input stream, and then update the buffer
int lim = bb.limit(); int lim = bb.limit();
int pos = bb.position(); int pos = bb.position();
assert (pos <= lim); assert (pos <= lim);
int rem = (pos <= lim ? lim - pos : 0); int rem = (pos <= lim ? lim - pos : 0);
assert rem > 0; int n = in.read(bb.array(), bb.arrayOffset() + pos, rem);
int n = in.read(bb.array(), bb.arrayOffset() + pos, rem); if (n < 0)
if (n < 0) return n;
return n; if (n == 0)
if (n == 0) throw new IOException("Underlying input stream returned zero bytes");
throw new IOException("Underlying input stream returned zero bytes"); assert (n <= rem) : "n = " + n + ", rem = " + rem;
assert (n <= rem) : "n = " + n + ", rem = " + rem; bb.position(pos + n);
bb.position(pos + n); }
}
} finally { } finally {
// Flip even when an IOException is thrown, // Flip even when an IOException is thrown,
// otherwise the stream will stutter // otherwise the stream will stutter
bb.flip(); bb.flip();
} }
int rem = bb.remaining(); int rem = bb.remaining();
assert (rem != 0) : rem; assert (rem != 0) : rem;
return rem; return rem;
} }
int implRead(char[] cbuf, int off, int end) throws IOException { int implRead(char[] cbuf, int off, int end) throws IOException {
@ -322,44 +295,46 @@ public class StreamDecoder extends Reader
assert (end - off > 1); assert (end - off > 1);
CharBuffer cb = CharBuffer.wrap(cbuf, off, end - off); CharBuffer cb = CharBuffer.wrap(cbuf, off, end - off);
if (cb.position() != 0) if (cb.position() != 0) {
// Ensure that cb[0] == cbuf[off] // Ensure that cb[0] == cbuf[off]
cb = cb.slice(); cb = cb.slice();
}
boolean eof = false; boolean eof = false;
for (;;) { for (;;) {
CoderResult cr = decoder.decode(bb, cb, eof); CoderResult cr = decoder.decode(bb, cb, eof);
if (cr.isUnderflow()) { if (cr.isUnderflow()) {
if (eof) if (eof)
break;
if (!cb.hasRemaining())
break;
if ((cb.position() > 0) && !inReady())
break; // Block at most once
int n = readBytes();
if (n < 0) {
eof = true;
if ((cb.position() == 0) && (!bb.hasRemaining()))
break; break;
decoder.reset(); if (!cb.hasRemaining())
break;
if ((cb.position() > 0) && !inReady())
break; // Block at most once
int n = readBytes();
if (n < 0) {
eof = true;
if ((cb.position() == 0) && (!bb.hasRemaining()))
break;
decoder.reset();
}
continue;
} }
continue; if (cr.isOverflow()) {
} assert cb.position() > 0;
if (cr.isOverflow()) { break;
assert cb.position() > 0; }
break; cr.throwException();
}
cr.throwException();
} }
if (eof) { if (eof) {
// ## Need to flush decoder // ## Need to flush decoder
decoder.reset(); decoder.reset();
} }
if (cb.position() == 0) { if (cb.position() == 0) {
if (eof) if (eof) {
return -1; return -1;
}
assert false; assert false;
} }
return cb.position(); return cb.position();
@ -373,22 +348,22 @@ public class StreamDecoder extends Reader
private boolean inReady() { private boolean inReady() {
try { try {
return (((in != null) && (in.available() > 0)) return (((in != null) && (in.available() > 0))
|| (ch instanceof FileChannel)); // ## RBC.available()? || (ch instanceof FileChannel)); // ## RBC.available()?
} catch (IOException x) { } catch (IOException x) {
return false; return false;
} }
} }
boolean implReady() { boolean implReady() {
return bb.hasRemaining() || inReady(); return bb.hasRemaining() || inReady();
} }
void implClose() throws IOException { void implClose() throws IOException {
if (ch != null) if (ch != null) {
ch.close(); ch.close();
else } else {
in.close(); in.close();
}
} }
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2001, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -25,7 +25,6 @@
package sun.nio.cs; package sun.nio.cs;
import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
@ -38,6 +37,7 @@ import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult; import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction; import java.nio.charset.CodingErrorAction;
import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
public class StreamEncoder extends Writer public class StreamEncoder extends Writer
{ {
@ -58,13 +58,14 @@ public class StreamEncoder extends Writer
throws UnsupportedEncodingException throws UnsupportedEncodingException
{ {
String csn = charsetName; String csn = charsetName;
if (csn == null) if (csn == null) {
csn = Charset.defaultCharset().name(); csn = Charset.defaultCharset().name();
}
try { try {
if (Charset.isSupported(csn)) return new StreamEncoder(out, lock, Charset.forName(csn));
return new StreamEncoder(out, lock, Charset.forName(csn)); } catch (IllegalCharsetNameException | UnsupportedCharsetException x) {
} catch (IllegalCharsetNameException x) { } throw new UnsupportedEncodingException (csn);
throw new UnsupportedEncodingException (csn); }
} }
public static StreamEncoder forOutputStreamWriter(OutputStream out, public static StreamEncoder forOutputStreamWriter(OutputStream out,
@ -114,12 +115,12 @@ public class StreamEncoder extends Writer
} }
public void write(int c) throws IOException { public void write(int c) throws IOException {
char cbuf[] = new char[1]; char[] cbuf = new char[1];
cbuf[0] = (char) c; cbuf[0] = (char) c;
write(cbuf, 0, 1); write(cbuf, 0, 1);
} }
public void write(char cbuf[], int off, int len) throws IOException { public void write(char[] cbuf, int off, int len) throws IOException {
synchronized (lock) { synchronized (lock) {
ensureOpen(); ensureOpen();
if ((off < 0) || (off > cbuf.length) || (len < 0) || if ((off < 0) || (off > cbuf.length) || (len < 0) ||
@ -136,7 +137,7 @@ public class StreamEncoder extends Writer
/* Check the len before creating a char buffer */ /* Check the len before creating a char buffer */
if (len < 0) if (len < 0)
throw new IndexOutOfBoundsException(); throw new IndexOutOfBoundsException();
char cbuf[] = new char[len]; char[] cbuf = new char[len];
str.getChars(off, off + len, cbuf, 0); str.getChars(off, off + len, cbuf, 0);
write(cbuf, 0, len); write(cbuf, 0, len);
} }
@ -179,13 +180,13 @@ public class StreamEncoder extends Writer
// -- Charset-based stream encoder impl -- // -- Charset-based stream encoder impl --
private Charset cs; private final Charset cs;
private CharsetEncoder encoder; private final CharsetEncoder encoder;
private ByteBuffer bb; private final ByteBuffer bb;
// Exactly one of these is non-null // Exactly one of these is non-null
private final OutputStream out; private final OutputStream out;
private WritableByteChannel ch; private final WritableByteChannel ch;
// Leftover first char in a surrogate pair // Leftover first char in a surrogate pair
private boolean haveLeftoverChar = false; private boolean haveLeftoverChar = false;
@ -194,9 +195,9 @@ public class StreamEncoder extends Writer
private StreamEncoder(OutputStream out, Object lock, Charset cs) { private StreamEncoder(OutputStream out, Object lock, Charset cs) {
this(out, lock, this(out, lock,
cs.newEncoder() cs.newEncoder()
.onMalformedInput(CodingErrorAction.REPLACE) .onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE)); .onUnmappableCharacter(CodingErrorAction.REPLACE));
} }
private StreamEncoder(OutputStream out, Object lock, CharsetEncoder enc) { private StreamEncoder(OutputStream out, Object lock, CharsetEncoder enc) {
@ -205,16 +206,7 @@ public class StreamEncoder extends Writer
this.ch = null; this.ch = null;
this.cs = enc.charset(); this.cs = enc.charset();
this.encoder = enc; this.encoder = enc;
this.bb = ByteBuffer.allocate(DEFAULT_BYTE_BUFFER_SIZE);
// This path disabled until direct buffers are faster
if (false && out instanceof FileOutputStream) {
ch = ((FileOutputStream)out).getChannel();
if (ch != null)
bb = ByteBuffer.allocateDirect(DEFAULT_BYTE_BUFFER_SIZE);
}
if (ch == null) {
bb = ByteBuffer.allocate(DEFAULT_BYTE_BUFFER_SIZE);
}
} }
private StreamEncoder(WritableByteChannel ch, CharsetEncoder enc, int mbc) { private StreamEncoder(WritableByteChannel ch, CharsetEncoder enc, int mbc) {
@ -234,16 +226,16 @@ public class StreamEncoder extends Writer
assert (pos <= lim); assert (pos <= lim);
int rem = (pos <= lim ? lim - pos : 0); int rem = (pos <= lim ? lim - pos : 0);
if (rem > 0) { if (rem > 0) {
if (ch != null) { if (ch != null) {
if (ch.write(bb) != rem) int wc = ch.write(bb);
assert false : rem; assert wc == rem : rem;
} else { } else {
out.write(bb.array(), bb.arrayOffset() + pos, rem); out.write(bb.array(), bb.arrayOffset() + pos, rem);
} }
} }
bb.clear(); bb.clear();
} }
private void flushLeftoverChar(CharBuffer cb, boolean endOfInput) private void flushLeftoverChar(CharBuffer cb, boolean endOfInput)
throws IOException throws IOException
@ -283,7 +275,7 @@ public class StreamEncoder extends Writer
haveLeftoverChar = false; haveLeftoverChar = false;
} }
void implWrite(char cbuf[], int off, int len) void implWrite(char[] cbuf, int off, int len)
throws IOException throws IOException
{ {
CharBuffer cb = CharBuffer.wrap(cbuf, off, len); CharBuffer cb = CharBuffer.wrap(cbuf, off, len);
@ -317,14 +309,16 @@ public class StreamEncoder extends Writer
} }
void implFlushBuffer() throws IOException { void implFlushBuffer() throws IOException {
if (bb.position() > 0) if (bb.position() > 0) {
writeBytes(); writeBytes();
}
} }
void implFlush() throws IOException { void implFlush() throws IOException {
implFlushBuffer(); implFlushBuffer();
if (out != null) if (out != null) {
out.flush(); out.flush();
}
} }
void implClose() throws IOException { void implClose() throws IOException {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -25,6 +25,9 @@
package sun.nio.cs; package sun.nio.cs;
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.CharBuffer; import java.nio.CharBuffer;
import java.nio.charset.Charset; import java.nio.charset.Charset;
@ -60,6 +63,8 @@ public class US_ASCII
private static class Decoder extends CharsetDecoder { private static class Decoder extends CharsetDecoder {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private Decoder(Charset cs) { private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f); super(cs, 1.0f, 1.0f);
} }
@ -68,33 +73,28 @@ public class US_ASCII
CharBuffer dst) CharBuffer dst)
{ {
byte[] sa = src.array(); byte[] sa = src.array();
int sp = src.arrayOffset() + src.position(); int soff = src.arrayOffset();
int sl = src.arrayOffset() + src.limit(); int sp = soff + src.position();
assert (sp <= sl); int sl = soff + src.limit();
sp = (sp <= sl ? sp : sl);
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
try { char[] da = dst.array();
while (sp < sl) { int doff = dst.arrayOffset();
byte b = sa[sp]; int dp = doff + dst.position();
if (b >= 0) { int dl = doff + dst.limit();
if (dp >= dl)
return CoderResult.OVERFLOW; // ASCII only loop
da[dp++] = (char)b; int n = JLA.decodeASCII(sa, sp, da, dp, Math.min(sl - sp, dl - dp));
sp++; sp += n;
continue; dp += n;
} src.position(sp - soff);
return CoderResult.malformedForLength(1); dst.position(dp - doff);
if (sp < sl) {
if (dp >= dl) {
return CoderResult.OVERFLOW;
} }
return CoderResult.UNDERFLOW; return CoderResult.malformedForLength(1);
} finally {
src.position(sp - src.arrayOffset());
dst.position(dp - dst.arrayOffset());
} }
return CoderResult.UNDERFLOW;
} }
private CoderResult decodeBufferLoop(ByteBuffer src, private CoderResult decodeBufferLoop(ByteBuffer src,

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -25,6 +25,9 @@
package sun.nio.cs; package sun.nio.cs;
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
import java.nio.Buffer; import java.nio.Buffer;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.CharBuffer; import java.nio.CharBuffer;
@ -82,6 +85,8 @@ public final class UTF_8 extends Unicode {
private static class Decoder extends CharsetDecoder { private static class Decoder extends CharsetDecoder {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private Decoder(Charset cs) { private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f); super(cs, 1.0f, 1.0f);
} }
@ -129,15 +134,6 @@ public final class UTF_8 extends Unicode {
return (b3 & 0xc0) != 0x80; return (b3 & 0xc0) != 0x80;
} }
private static CoderResult lookupN(ByteBuffer src, int n)
{
for (int i = 1; i < n; i++) {
if (isNotContinuation(src.get()))
return CoderResult.malformedForLength(i);
}
return CoderResult.malformedForLength(n);
}
private static CoderResult malformedN(ByteBuffer src, int nb) { private static CoderResult malformedN(ByteBuffer src, int nb) {
switch (nb) { switch (nb) {
case 1: case 1:
@ -223,17 +219,19 @@ public final class UTF_8 extends Unicode {
{ {
// This method is optimized for ASCII input. // This method is optimized for ASCII input.
byte[] sa = src.array(); byte[] sa = src.array();
int sp = src.arrayOffset() + src.position(); int soff = src.arrayOffset();
int sl = src.arrayOffset() + src.limit(); int sp = soff + src.position();
int sl = soff + src.limit();
char[] da = dst.array(); char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position(); int doff = dst.arrayOffset();
int dl = dst.arrayOffset() + dst.limit(); int dp = doff + dst.position();
int dlASCII = dp + Math.min(sl - sp, dl - dp); int dl = doff + dst.limit();
int n = JLA.decodeASCII(sa, sp, da, dp, Math.min(sl - sp, dl - dp));
sp += n;
dp += n;
// ASCII only loop
while (dp < dlASCII && sa[sp] >= 0)
da[dp++] = (char) sa[sp++];
while (sp < sl) { while (sp < sl) {
int b1 = sa[sp]; int b1 = sa[sp];
if (b1 >= 0) { if (b1 >= 0) {
@ -415,14 +413,6 @@ public final class UTF_8 extends Unicode {
else else
return decodeBufferLoop(src, dst); return decodeBufferLoop(src, dst);
} }
private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp)
{
if (bb == null)
bb = ByteBuffer.wrap(ba);
bb.position(sp);
return bb;
}
} }
private static final class Encoder extends CharsetEncoder { private static final class Encoder extends CharsetEncoder {

View File

@ -0,0 +1,176 @@
/*
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.java.io;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.concurrent.TimeUnit;
/**
* Tests the overheads of reading encoded byte arrays via StreamDecoder
*/
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
@Warmup(time=2, iterations=5)
@Measurement(time=3, iterations=5)
@Fork(value=2, jvmArgs="-Xmx1g")
public class ByteStreamDecoder {
@Param({"US-ASCII", "ISO-8859-1", "UTF-8", "ISO-8859-6", "MS932"})
private String charsetName;
@Param({"256", "4096", "25000"})
private int length;
private byte[] bytes;
private byte[] nonASCIIBytesEnd;
private byte[] nonASCIIBytesStart;
private byte[] nonASCIIBytesEveryOther;
private char[] chars;
private Charset cs;
@Setup
public void setup() throws IOException {
var s = """
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed non
magna augue. Sed tristique ante id maximus interdum. Suspendisse
potenti. Aliquam molestie metus vitae magna gravida egestas.
Phasellus eleifend tortor sit amet neque euismod, vitae luctus
ante viverra. Sed quis justo ultrices, eleifend dui sed, egestas
lorem. Mauris ipsum ex, interdum eu turpis sed, fermentum efficitur
lorem. Sed vel imperdiet libero, eget ullamcorper sem. Praesent
gravida arcu quis ipsum viverra tristique. Quisque maximus
elit nec nisi vulputate tempor. Integer aliquet tortor vel
vehicula efficitur. Sed neque felis, ultricies eu leo ultricies,
egestas placerat dolor. Etiam iaculis magna quis lacinia
tincidunt. Donec in tellus volutpat, semper nunc ornare,
tempus erat. Donec volutpat mauris in arcu mattis sollicitudin.
Morbi vestibulum ipsum sed erat porta, mollis commodo nisi
gravida.
""";
int n = length / s.length();
String full = "";
if (n > 0) {
full = s.repeat(n);
}
n = length % s.length();
if (n > 0) {
full += s.substring(0, n);
}
cs = Charset.forName(charsetName);
bytes = full.getBytes(cs);
nonASCIIBytesEnd = (full + "\u00e5").getBytes(cs);
nonASCIIBytesStart = ("\u00e5" + full).getBytes(cs);
// string hostile to ASCII fast-path optimizations: every other char is ASCII
StringBuilder sb = new StringBuilder();
for (int i = 0; i < full.length(); i++) {
sb.append(i % 2 == 0 ? full.charAt(i) : '\u00e5');
}
nonASCIIBytesEveryOther = sb.toString().getBytes(cs);
chars = new char[full.length() + 2];
try {
if (!readStringDirect().equals(readStringReader())) {
System.out.println("direct: " + readStringDirect());
System.out.println("reader: " + readStringReader());
throw new RuntimeException("Unexpectedly different");
}
if (!readStringDirect_NonASCIIEnd().equals(readStringReader_NonASCIIEnd())) {
throw new RuntimeException("Unexpectedly different");
}
if (!readStringDirect_NonASCIIStart().equals(readStringReader_NonASCIIStart())) {
throw new RuntimeException("Unexpectedly different");
}
if (!readStringDirect_NonASCIIEveryOther().equals(readStringReader_NonASCIIEveryOther())) {
throw new RuntimeException("Unexpectedly different");
}
} catch (Exception e) {
e.printStackTrace();
}
}
@Benchmark
public String readStringReader() throws Exception {
int len = new InputStreamReader(new ByteArrayInputStream(bytes), cs).read(chars);
return new String(chars, 0, len);
}
@Benchmark
public String readStringReader_NonASCIIEnd() throws Exception {
int len = new InputStreamReader(new ByteArrayInputStream(nonASCIIBytesEnd), cs).read(chars);
return new String(chars, 0, len);
}
@Benchmark
public String readStringReader_NonASCIIStart() throws Exception {
int len = new InputStreamReader(new ByteArrayInputStream(nonASCIIBytesStart), cs).read(chars);
return new String(chars, 0, len);
}
@Benchmark
public String readStringReader_NonASCIIEveryOther() throws Exception {
int len = new InputStreamReader(new ByteArrayInputStream(nonASCIIBytesEveryOther), cs).read(chars);
return new String(chars, 0, len);
}
@Benchmark
public String readStringDirect() throws Exception {
return new String(bytes, cs);
}
@Benchmark
public String readStringDirect_NonASCIIEnd() throws Exception {
return new String(nonASCIIBytesEnd, cs);
}
@Benchmark
public String readStringDirect_NonASCIIStart() throws Exception {
return new String(nonASCIIBytesStart, cs);
}
@Benchmark
public String readStringDirect_NonASCIIEveryOther() throws Exception {
return new String(nonASCIIBytesEveryOther, cs);
}
}