8201276: (fs) Add methods to Files for reading/writing a string from/to a file

Reviewed-by: rriggs, smarks, sherman, forax, alanb, mli
This commit is contained in:
Joe Wang 2018-06-13 12:50:45 -07:00
parent b5eadc5721
commit ca487166f4
5 changed files with 635 additions and 3 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -589,6 +589,10 @@ class StringCoding {
}
private static byte[] encode8859_1(byte coder, byte[] val) {
return encode8859_1(coder, val, true);
}
private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
if (coder == LATIN1) {
return Arrays.copyOf(val, val.length);
}
@ -602,6 +606,9 @@ class StringCoding {
sp = sp + ret;
dp = dp + ret;
if (ret != len) {
if (!doReplace) {
throwMalformed(sp, 1);
}
char c = StringUTF16.getChar(val, sp++);
if (Character.isHighSurrogate(c) && sp < sl &&
Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
@ -676,6 +683,12 @@ class StringCoding {
", length : " + nb);
}
private static void throwMalformed(byte[] val) {
int dp = 0;
while (dp < val.length && val[dp] >=0) { dp++; }
throwMalformed(dp, 1);
}
private static char repl = '\ufffd';
private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
@ -931,7 +944,7 @@ class StringCoding {
////////////////////// for j.u.z.ZipCoder //////////////////////////
/*
* Throws iae, instead of replacing, if malformed or unmappble.
* Throws iae, instead of replacing, if malformed or unmappable.
*/
static String newStringUTF8NoRepl(byte[] src, int off, int len) {
if (COMPACT_STRINGS && !hasNegatives(src, off, len))
@ -941,9 +954,137 @@ class StringCoding {
}
/*
* Throws iae, instead of replacing, if unmappble.
* Throws iae, instead of replacing, if unmappable.
*/
static byte[] getBytesUTF8NoRepl(String s) {
return encodeUTF8(s.coder(), s.value(), false);
}
////////////////////// for j.n.f.Files //////////////////////////
private static boolean isASCII(byte[] src) {
return !hasNegatives(src, 0, src.length);
}
private static String newStringLatin1(byte[] src) {
if (COMPACT_STRINGS)
return new String(src, LATIN1);
return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
}
static String newStringNoRepl(byte[] src, Charset cs) {
if (cs == UTF_8) {
if (COMPACT_STRINGS && isASCII(src))
return new String(src, LATIN1);
Result ret = decodeUTF8_0(src, 0, src.length, false);
return new String(ret.value, ret.coder);
}
if (cs == ISO_8859_1) {
return newStringLatin1(src);
}
if (cs == US_ASCII) {
if (isASCII(src)) {
return newStringLatin1(src);
} else {
throwMalformed(src);
}
}
CharsetDecoder cd = cs.newDecoder();
// ascii fastpath
if ((cd instanceof ArrayDecoder) &&
((ArrayDecoder)cd).isASCIICompatible() && isASCII(src)) {
return newStringLatin1(src);
}
int len = src.length;
if (len == 0) {
return "";
}
int en = scale(len, cd.maxCharsPerByte());
char[] ca = new char[en];
if (cs.getClass().getClassLoader0() != null &&
System.getSecurityManager() != null) {
src = Arrays.copyOf(src, len);
}
ByteBuffer bb = ByteBuffer.wrap(src);
CharBuffer cb = CharBuffer.wrap(ca);
try {
CoderResult cr = cd.decode(bb, cb, true);
if (!cr.isUnderflow())
cr.throwException();
cr = cd.flush(cb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
throw new IllegalArgumentException(x); // todo
}
Result ret = resultCached.get().with(ca, 0, cb.position());
return new String(ret.value, ret.coder);
}
/*
* Throws iae, instead of replacing, if unmappable.
*/
static byte[] getBytesNoRepl(String s, Charset cs) {
byte[] val = s.value();
byte coder = s.coder();
if (cs == UTF_8) {
if (isASCII(val)) {
return val;
}
return encodeUTF8(coder, val, false);
}
if (cs == ISO_8859_1) {
if (coder == LATIN1) {
return val;
}
return encode8859_1(coder, val, false);
}
if (cs == US_ASCII) {
if (coder == LATIN1) {
if (isASCII(val)) {
return val;
} else {
throwMalformed(val);
}
}
}
CharsetEncoder ce = cs.newEncoder();
// fastpath for ascii compatible
if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
((ArrayEncoder)ce).isASCIICompatible() &&
isASCII(val)))) {
return val;
}
int len = val.length >> coder; // assume LATIN1=0/UTF16=1;
int en = scale(len, ce.maxBytesPerChar());
byte[] ba = new byte[en];
if (len == 0) {
return ba;
}
if (ce instanceof ArrayEncoder) {
int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
: ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
if (blen != -1) {
return safeTrim(ba, blen, true);
}
}
boolean isTrusted = cs.getClass().getClassLoader0() == null ||
System.getSecurityManager() == null;
char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
: StringUTF16.toChars(val);
ByteBuffer bb = ByteBuffer.wrap(ba);
CharBuffer cb = CharBuffer.wrap(ca, 0, len);
try {
CoderResult cr = ce.encode(cb, bb, true);
if (!cr.isUnderflow())
cr.throwException();
cr = ce.flush(bb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
throw new Error(x);
}
return safeTrim(ba, bb.position(), isTrusted);
}
}

View File

@ -47,6 +47,7 @@ import java.security.AccessController;
import java.security.PrivilegedAction;
import java.nio.channels.Channel;
import java.nio.channels.spi.SelectorProvider;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@ -2152,6 +2153,14 @@ public final class System {
return ModuleLayer.layers(loader);
}
public String newStringNoRepl(byte[] bytes, Charset cs) {
return StringCoding.newStringNoRepl(bytes, cs);
}
public byte[] getBytesNoRepl(String s, Charset cs) {
return StringCoding.getBytesNoRepl(s, cs);
}
public String newStringUTF8NoRepl(byte[] bytes, int off, int len) {
return StringCoding.newStringUTF8NoRepl(bytes, off, len);
}

View File

@ -3121,6 +3121,9 @@ public final class Files {
*/
private static final int MAX_BUFFER_SIZE = Integer.MAX_VALUE - 8;
private static final jdk.internal.misc.JavaLangAccess JLA =
jdk.internal.misc.SharedSecrets.getJavaLangAccess();
/**
* Reads all the bytes from an input stream. Uses {@code initialSize} as a hint
* about how many bytes the stream will have.
@ -3202,6 +3205,81 @@ public final class Files {
}
}
/**
* Reads all content from a file into a string, decoding from bytes to characters
* using the {@link StandardCharsets#UTF_8 UTF-8} {@link Charset charset}.
* The method ensures that the file is closed when all content have been read
* or an I/O error, or other runtime exception, is thrown.
*
* <p> This method is equivalent to:
* {@code readString(path, StandardCharsets.UTF_8) }
*
* @param path the path to the file
*
* @return a String containing the content read from the file
*
* @throws IOException
* if an I/O error occurs reading from the file or a malformed or
* unmappable byte sequence is read
* @throws OutOfMemoryError
* if the file is extremely large, for example larger than {@code 2GB}
* @throws SecurityException
* In the case of the default provider, and a security manager is
* installed, the {@link SecurityManager#checkRead(String) checkRead}
* method is invoked to check read access to the file.
*
* @since 11
*/
public static String readString(Path path) throws IOException {
return readString(path, StandardCharsets.UTF_8);
}
/**
* Reads all characters from a file into a string, decoding from bytes to characters
* using the specified {@linkplain Charset charset}.
* The method ensures that the file is closed when all content have been read
* or an I/O error, or other runtime exception, is thrown.
*
* <p> This method reads all content including the line separators in the middle
* and/or at the end. The resulting string will contain line separators as they
* appear in the file.
*
* @apiNote
* This method is intended for simple cases where it is appropriate and convenient
* to read the content of a file into a String. It is not intended for reading
* very large files.
*
*
*
* @param path the path to the file
* @param cs the charset to use for decoding
*
* @return a String containing the content read from the file
*
* @throws IOException
* if an I/O error occurs reading from the file or a malformed or
* unmappable byte sequence is read
* @throws OutOfMemoryError
* if the file is extremely large, for example larger than {@code 2GB}
* @throws SecurityException
* In the case of the default provider, and a security manager is
* installed, the {@link SecurityManager#checkRead(String) checkRead}
* method is invoked to check read access to the file.
*
* @since 11
*/
public static String readString(Path path, Charset cs) throws IOException {
Objects.requireNonNull(path);
Objects.requireNonNull(cs);
byte[] ba = readAllBytes(path);
try {
return JLA.newStringNoRepl(ba, cs);
} catch (IllegalArgumentException e) {
throw new IOException(e);
}
}
/**
* Read all lines from a file. This method ensures that the file is
* closed when all bytes have been read or an I/O error, or other runtime
@ -3456,6 +3534,110 @@ public final class Files {
return write(path, lines, StandardCharsets.UTF_8, options);
}
/**
* Write a {@linkplain java.lang.CharSequence CharSequence} to a file.
* Characters are encoded into bytes using the
* {@link StandardCharsets#UTF_8 UTF-8} {@link Charset charset}.
*
* <p> This method is equivalent to:
* {@code writeString(path, test, StandardCharsets.UTF_8, options) }
*
* @param path
* the path to the file
* @param csq
* the CharSequence to be written
* @param options
* options specifying how the file is opened
*
* @return the path
*
* @throws IllegalArgumentException
* if {@code options} contains an invalid combination of options
* @throws IOException
* if an I/O error occurs writing to or creating the file, or the
* text cannot be encoded using the specified charset
* @throws UnsupportedOperationException
* if an unsupported option is specified
* @throws SecurityException
* In the case of the default provider, and a security manager is
* installed, the {@link SecurityManager#checkWrite(String) checkWrite}
* method is invoked to check write access to the file. The {@link
* SecurityManager#checkDelete(String) checkDelete} method is
* invoked to check delete access if the file is opened with the
* {@code DELETE_ON_CLOSE} option.
*
* @since 11
*/
public static Path writeString(Path path, CharSequence csq, OpenOption... options)
throws IOException
{
return writeString(path, csq, StandardCharsets.UTF_8, options);
}
/**
* Write a {@linkplain java.lang.CharSequence CharSequence} to a file.
* Characters are encoded into bytes using the specified
* {@linkplain java.nio.charset.Charset charset}.
*
* <p> All characters are written as they are, including the line separators in
* the char sequence. No extra characters are added.
*
* <p> The {@code options} parameter specifies how the file is created
* or opened. If no options are present then this method works as if the
* {@link StandardOpenOption#CREATE CREATE}, {@link
* StandardOpenOption#TRUNCATE_EXISTING TRUNCATE_EXISTING}, and {@link
* StandardOpenOption#WRITE WRITE} options are present. In other words, it
* opens the file for writing, creating the file if it doesn't exist, or
* initially truncating an existing {@link #isRegularFile regular-file} to
* a size of {@code 0}.
*
*
* @param path
* the path to the file
* @param csq
* the CharSequence to be written
* @param cs
* the charset to use for encoding
* @param options
* options specifying how the file is opened
*
* @return the path
*
* @throws IllegalArgumentException
* if {@code options} contains an invalid combination of options
* @throws IOException
* if an I/O error occurs writing to or creating the file, or the
* text cannot be encoded using the specified charset
* @throws UnsupportedOperationException
* if an unsupported option is specified
* @throws SecurityException
* In the case of the default provider, and a security manager is
* installed, the {@link SecurityManager#checkWrite(String) checkWrite}
* method is invoked to check write access to the file. The {@link
* SecurityManager#checkDelete(String) checkDelete} method is
* invoked to check delete access if the file is opened with the
* {@code DELETE_ON_CLOSE} option.
*
* @since 11
*/
public static Path writeString(Path path, CharSequence csq, Charset cs, OpenOption... options)
throws IOException
{
// ensure the text is not null before opening file
Objects.requireNonNull(path);
Objects.requireNonNull(csq);
Objects.requireNonNull(cs);
try {
byte[] bytes = JLA.getBytesNoRepl(String.valueOf(csq), cs);
write(path, bytes, options);
} catch (IllegalArgumentException e) {
throw new IOException(e);
}
return path;
}
// -- Stream APIs --
/**

View File

@ -30,6 +30,7 @@ import java.lang.module.ModuleDescriptor;
import java.lang.reflect.Executable;
import java.lang.reflect.Method;
import java.net.URI;
import java.nio.charset.Charset;
import java.security.AccessControlContext;
import java.security.ProtectionDomain;
import java.util.Iterator;
@ -255,6 +256,36 @@ public interface JavaLangAccess {
*/
Stream<ModuleLayer> layers(ClassLoader loader);
/**
* Constructs a new {@code String} by decoding the specified subarray of
* bytes using the specified {@linkplain java.nio.charset.Charset charset}.
*
* The caller of this method shall relinquish and transfer the ownership of
* the byte array to the callee since the later will not make a copy.
*
* @param bytes the byte array source
* @param cs the Charset
* @return the newly created string
* @throws IllegalArgumentException for malformed or unmappable bytes
*/
String newStringNoRepl(byte[] bytes, Charset cs);
/**
* Encode the given string into a sequence of bytes using the specified Charset.
*
* This method avoids copying the String's internal representation if the input
* is ASCII.
*
* This method throws IllegalArgumentException instead of replacing when
* malformed input or unmappable characters are encountered.
*
* @param s the string to encode
* @param cs the charset
* @return the encoded bytes
* @throws IllegalArgumentException for malformed input or unmappable characters
*/
byte[] getBytesNoRepl(String s, Charset cs);
/**
* Returns a new string by decoding from the given utf8 bytes array.
*

View File

@ -0,0 +1,269 @@
/*
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import static java.nio.charset.StandardCharsets.US_ASCII;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import static java.nio.file.StandardOpenOption.APPEND;
import java.util.Random;
import java.util.concurrent.Callable;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
/* @test
* @bug 8201276
* @build ReadWriteString PassThroughFileSystem
* @run testng ReadWriteString
* @summary Unit test for methods for Files readString and write methods.
* @key randomness
*/
@Test(groups = "readwrite")
public class ReadWriteString {
private static final OpenOption OPTION_CREATE = StandardOpenOption.CREATE;
// data for text files
private static final String EN_STRING = "The quick brown fox jumps over the lazy dog";
private static final String JA_STRING = "\u65e5\u672c\u8a9e\u6587\u5b57\u5217";
// malformed input: a high surrogate without the low surrogate
static char[] illChars = {
'\u00fa', '\ud800'
};
static byte[] data = getData();
static byte[] getData() {
try {
String str1 = "A string that contains ";
String str2 = " , an invalid character for UTF-8.";
ByteArrayOutputStream baos = new ByteArrayOutputStream();
baos.write(str1.getBytes());
baos.write(0xFA);
baos.write(str2.getBytes());
return baos.toByteArray();
} catch (IOException ex) {
return null; //shouldn't happen
}
}
// file used by most tests
private Path tmpfile;
/*
* DataProvider for malformed write test. Provides the following fields:
* file path, malformed input string, charset
*/
@DataProvider(name = "malformedWrite")
public Object[][] getMalformedWrite() throws IOException {
Path path = Files.createTempFile("malformedWrite", null);
return new Object[][]{
{path, "\ud800", null}, //the default Charset is UTF_8
{path, "\u00A0\u00A1", US_ASCII},
{path, "\ud800", UTF_8},
{path, JA_STRING, ISO_8859_1},
};
}
/*
* DataProvider for illegal input test
* Writes the data in ISO8859 and reads with UTF_8, expects MalformedInputException
*/
@DataProvider(name = "illegalInput")
public Object[][] getIllegalInput() throws IOException {
Path path = Files.createTempFile("illegalInput", null);
return new Object[][]{
{path, data, ISO_8859_1, null},
{path, data, ISO_8859_1, UTF_8}
};
}
@BeforeClass
void setup() throws IOException {
tmpfile = Files.createTempFile("readWriteString", null);
}
@AfterClass
void cleanup() throws IOException {
Files.deleteIfExists(tmpfile);
}
/**
* Verifies that NPE is thrown when one of the parameters is null.
*/
@Test
public void testNulls() {
Path path = Paths.get(".");
String s = "abc";
checkNullPointerException(() -> Files.readString((Path) null));
checkNullPointerException(() -> Files.readString((Path) null, UTF_8));
checkNullPointerException(() -> Files.readString(path, (Charset) null));
checkNullPointerException(() -> Files.writeString((Path) null, s, OPTION_CREATE));
checkNullPointerException(() -> Files.writeString(path, (CharSequence) null, OPTION_CREATE));
checkNullPointerException(() -> Files.writeString(path, s, (OpenOption[]) null));
checkNullPointerException(() -> Files.writeString((Path) null, s, UTF_8, OPTION_CREATE));
checkNullPointerException(() -> Files.writeString(path, (CharSequence) null, UTF_8, OPTION_CREATE));
checkNullPointerException(() -> Files.writeString(path, s, (Charset) null, OPTION_CREATE));
checkNullPointerException(() -> Files.writeString(path, s, UTF_8, (OpenOption[]) null));
}
/**
* Verifies the readString and write String methods. Writes to files Strings
* of various sizes, with/without specifying the Charset, and then compares
* the result of reading the files.
*/
@Test
public void testReadWrite() throws IOException {
int size = 0;
while (size < 16 * 1024) {
testReadWrite(size, null, false);
testReadWrite(size, null, true);
testReadWrite(size, UTF_8, false);
testReadWrite(size, UTF_8, true);
size += 1024;
}
}
/**
* Verifies that IOException is thrown (as specified) when giving a malformed
* string input.
*
* @param path the path to write
* @param s the string
* @param cs the Charset
* @throws IOException if the input is malformed
*/
@Test(dataProvider = "malformedWrite", expectedExceptions = IOException.class)
public void testMalformedWrite(Path path, String s, Charset cs) throws IOException {
path.toFile().deleteOnExit();
if (cs == null) {
Files.writeString(path, s, OPTION_CREATE);
} else {
Files.writeString(path, s, cs, OPTION_CREATE);
}
}
/**
* Verifies that IOException is thrown when reading a file using the wrong
* Charset.
*
* @param path the path to write and read
* @param data the data used for the test
* @param csWrite the Charset to use for writing the test file
* @param csRead the Charset to use for reading the file
* @throws IOException when the Charset used for reading the file is incorrect
*/
@Test(dataProvider = "illegalInput", expectedExceptions = IOException.class)
public void testMalformedRead(Path path, byte[] data, Charset csWrite, Charset csRead) throws IOException {
path.toFile().deleteOnExit();
String temp = new String(data, csWrite);
Files.writeString(path, temp, csWrite, OPTION_CREATE);
String s;
if (csRead == null) {
s = Files.readString(path);
} else {
s = Files.readString(path, csRead);
}
}
private void checkNullPointerException(Callable<?> c) {
try {
c.call();
fail("NullPointerException expected");
} catch (NullPointerException ignore) {
} catch (Exception e) {
fail(e + " not expected");
}
}
private void testReadWrite(int size, Charset cs, boolean append) throws IOException {
StringBuilder sb = new StringBuilder(size);
String expected;
String str = generateString(size);
Path result;
if (cs == null) {
result = Files.writeString(tmpfile, str);
} else {
result = Files.writeString(tmpfile, str, cs);
}
//System.out.println(result.toUri().toASCIIString());
assertTrue(result == tmpfile);
if (append) {
if (cs == null) {
Files.writeString(tmpfile, str, APPEND);
} else {
Files.writeString(tmpfile, str, cs, APPEND);
}
assertTrue(Files.size(tmpfile) == size * 2);
}
if (append) {
sb.append(str).append(str);
expected = sb.toString();
} else {
expected = str;
}
String read;
if (cs == null) {
read = Files.readString(result);
} else {
read = Files.readString(result, cs);
}
//System.out.println("chars read: " + read.length());
//System.out.println(read);
//System.out.println("---end---");
assertTrue(read.equals(expected), "String read not the same as written");
}
static final char[] CHARS = "abcdefghijklmnopqrstuvwxyz \r\n".toCharArray();
StringBuilder sb = new StringBuilder(512);
Random random = new Random();
private String generateString(int size) {
sb.setLength(0);
for (int i = 0; i < size; i++) {
char c = CHARS[random.nextInt(CHARS.length)];
sb.append(c);
}
return sb.toString();
}
}