8181147: JNI_GetStringPlatformChars should have a fast path for UTF-8

8182451: Inconsistency between String.getBytes("Cp1252") and JNU_GetStringPlatformChars

Reviewed-by: martin, erikj, sherman, chegar, mikael
This commit is contained in:
Claes Redestad 2017-06-19 09:29:11 +02:00
parent 06a254a8f5
commit 81a4918f16
6 changed files with 384 additions and 86 deletions

View File

@ -42,12 +42,22 @@ include TestFilesCompilation.gmk
# Add more directories here when needed.
BUILD_JDK_JTREG_NATIVE_SRC := \
$(JDK_TOPDIR)/test/native_sanity \
$(JDK_TOPDIR)/test/java/lang/String/nativeEncoding \
#
BUILD_JDK_JTREG_OUTPUT_DIR := $(BUILD_OUTPUT)/support/test/jdk/jtreg/native
BUILD_JDK_JTREG_IMAGE_DIR := $(TEST_IMAGE_DIR)/jdk/jtreg
ifeq ($(OPENJDK_TARGET_OS), windows)
WIN_LIB_JAVA := $(SUPPORT_OUTPUTDIR)/native/java.base/libjava/java.lib
BUILD_JDK_JTREG_LIBRARIES_LIBS_libstringPlatformChars := $(WIN_LIB_JAVA)
else ifeq ($(OPENJDK_TARGET_OS), solaris)
BUILD_JDK_JTREG_LIBRARIES_LIBS_libstringPlatformChars := -ljava -lc
else
BUILD_JDK_JTREG_LIBRARIES_LIBS_libstringPlatformChars := -ljava
endif
$(eval $(call SetupTestFilesCompilation, BUILD_JDK_JTREG_LIBRARIES, \
TYPE := LIBRARY, \
SOURCE_DIRS := $(BUILD_JDK_JTREG_NATIVE_SRC), \

View File

@ -27,6 +27,7 @@ package java.lang;
import java.io.ObjectStreamField;
import java.io.UnsupportedEncodingException;
import java.lang.annotation.Native;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
@ -3063,8 +3064,8 @@ public final class String
return COMPACT_STRINGS && coder == LATIN1;
}
static final byte LATIN1 = 0;
static final byte UTF16 = 1;
@Native static final byte LATIN1 = 0;
@Native static final byte UTF16 = 1;
/*
* StringIndexOutOfBoundsException if {@code index} is

View File

@ -29,6 +29,7 @@
#include "jvm.h"
#include "jni.h"
#include "jni_util.h"
#include "java_lang_String.h"
/* Due to a bug in the win32 C runtime library strings
* such as "z:" need to be appended with a "." so we
@ -442,16 +443,18 @@ JNU_NewObjectByName(JNIEnv *env, const char *class_name,
return obj;
}
/* Optimized for char set ISO_8559_1 */
/* Optimized for charset ISO_8559_1 */
static jstring
newString8859_1(JNIEnv *env, const char *str)
newSizedString8859_1(JNIEnv *env, const char *str, const int len)
{
int len = (int)strlen(str);
jchar buf[512];
jchar *str1;
jstring result;
int i;
if ((*env)->EnsureLocalCapacity(env, 1) < 0)
return NULL;
if (len > 512) {
str1 = (jchar *)malloc(len * sizeof(jchar));
if (str1 == 0) {
@ -469,6 +472,13 @@ newString8859_1(JNIEnv *env, const char *str)
return result;
}
static jstring
newString8859_1(JNIEnv *env, const char *str)
{
int len = (int)strlen(str);
return newSizedString8859_1(env, str, len);
}
static const char*
getString8859_1Chars(JNIEnv *env, jstring jstr)
{
@ -501,7 +511,7 @@ getString8859_1Chars(JNIEnv *env, jstring jstr)
}
/* Optimized for char set ISO646-US (us-ascii) */
/* Optimized for charset ISO646-US (us-ascii) */
static jstring
newString646_US(JNIEnv *env, const char *str)
{
@ -573,7 +583,7 @@ static int cp1252c1chars[32] = {
0x02Dc,0x2122,0x0161,0x203A,0x0153,0xFFFD,0x017E,0x0178
};
/* Optimized for char set Cp1252 */
/* Optimized for charset Cp1252 */
static jstring
newStringCp1252(JNIEnv *env, const char *str)
{
@ -582,6 +592,10 @@ newStringCp1252(JNIEnv *env, const char *str)
jchar *str1;
jstring result;
int i;
if ((*env)->EnsureLocalCapacity(env, 1) < 0)
return NULL;
if (len > 512) {
str1 = (jchar *)malloc(len * sizeof(jchar));
if (str1 == 0) {
@ -625,9 +639,13 @@ getStringCp1252Chars(JNIEnv *env, jstring jstr)
for (i=0; i<len; i++) {
jchar c = str[i];
if (c < 256)
result[i] = (char)c;
else switch(c) {
if (c < 256) {
if ((c >= 0x80) && (c <= 0x9f)) {
result[i] = '?';
} else {
result[i] = (char)c;
}
} else switch(c) {
case 0x20AC: result[i] = (char)0x80; break;
case 0x201A: result[i] = (char)0x82; break;
case 0x0192: result[i] = (char)0x83; break;
@ -671,8 +689,89 @@ static jstring jnuEncoding = NULL;
static jmethodID String_init_ID; /* String(byte[], enc) */
static jmethodID String_getBytes_ID; /* String.getBytes(enc) */
int getFastEncoding() {
return fastEncoding;
/* Cached field IDs */
static jfieldID String_coder_ID; /* String.coder */
static jfieldID String_value_ID; /* String.value */
static jboolean isJNUEncodingSupported = JNI_FALSE;
static jboolean jnuEncodingSupported(JNIEnv *env) {
jboolean exe;
if (isJNUEncodingSupported == JNI_TRUE) {
return JNI_TRUE;
}
isJNUEncodingSupported = (jboolean) JNU_CallStaticMethodByName (
env, &exe,
"java/nio/charset/Charset",
"isSupported",
"(Ljava/lang/String;)Z",
jnuEncoding).z;
return isJNUEncodingSupported;
}
/* Create a new string by converting str to a heap-allocated byte array and
* calling the appropriate String constructor.
*/
static jstring
newSizedStringJava(JNIEnv *env, const char *str, const int len)
{
jstring result = NULL;
jbyteArray bytes = 0;
if ((*env)->EnsureLocalCapacity(env, 2) < 0)
return NULL;
bytes = (*env)->NewByteArray(env, len);
if (bytes != NULL) {
jclass strClazz = JNU_ClassString(env);
CHECK_NULL_RETURN(strClazz, 0);
(*env)->SetByteArrayRegion(env, bytes, 0, len, (jbyte *)str);
if (jnuEncodingSupported(env)) {
result = (*env)->NewObject(env, strClazz,
String_init_ID, bytes, jnuEncoding);
} else {
/*If the encoding specified in sun.jnu.encoding is not endorsed
by "Charset.isSupported" we have to fall back to use String(byte[])
explicitly here without specifying the encoding name, in which the
StringCoding class will pickup the iso-8859-1 as the fallback
converter for us.
*/
jmethodID mid = (*env)->GetMethodID(env, strClazz,
"<init>", "([B)V");
if (mid != NULL) {
result = (*env)->NewObject(env, strClazz, mid, bytes);
}
}
(*env)->DeleteLocalRef(env, bytes);
return result;
}
return NULL;
}
static jstring
newStringJava(JNIEnv *env, const char *str)
{
int len = (int)strlen(str);
return newSizedStringJava(env, str, len);
}
/* Optimized for charset UTF-8 */
static jstring
newStringUTF8(JNIEnv *env, const char *str)
{
int len;
const unsigned char *p;
unsigned char asciiCheck;
for (asciiCheck = 0, p = (const unsigned char*)str; *p != '\0'; p++) {
asciiCheck |= *p;
}
len = (int)((const char*)p - str);
if (asciiCheck < 0x80) {
// ascii fast-path
return newSizedString8859_1(env, str, len);
}
return newSizedStringJava(env, str, len);
}
/* Initialize the fast encoding. If the "sun.jnu.encoding" property
@ -718,17 +817,20 @@ initializeEncoding(JNIEnv *env)
if ((strcmp(encname, "8859_1") == 0) ||
(strcmp(encname, "ISO8859-1") == 0) ||
(strcmp(encname, "ISO8859_1") == 0) ||
(strcmp(encname, "ISO-8859-1") == 0))
(strcmp(encname, "ISO-8859-1") == 0)) {
fastEncoding = FAST_8859_1;
else if (strcmp(encname, "ISO646-US") == 0)
} else if (strcmp(encname, "UTF-8") == 0) {
fastEncoding = FAST_UTF_8;
jnuEncoding = (jstring)(*env)->NewGlobalRef(env, enc);
} else if (strcmp(encname, "ISO646-US") == 0) {
fastEncoding = FAST_646_US;
else if (strcmp(encname, "Cp1252") == 0 ||
} else if (strcmp(encname, "Cp1252") == 0 ||
/* This is a temporary fix until we move */
/* to wide character versions of all Windows */
/* calls. */
strcmp(encname, "utf-16le") == 0)
strcmp(encname, "utf-16le") == 0) {
fastEncoding = FAST_CP1252;
else {
} else {
fastEncoding = NO_FAST_ENCODING;
jnuEncoding = (jstring)(*env)->NewGlobalRef(env, enc);
}
@ -750,24 +852,10 @@ initializeEncoding(JNIEnv *env)
CHECK_NULL(String_getBytes_ID);
String_init_ID = (*env)->GetMethodID(env, strClazz,
"<init>", "([BLjava/lang/String;)V");
String_coder_ID = (*env)->GetFieldID(env, strClazz, "coder", "B");
String_value_ID = (*env)->GetFieldID(env, strClazz, "value", "[B");
}
static jboolean isJNUEncodingSupported = JNI_FALSE;
static jboolean jnuEncodingSupported(JNIEnv *env) {
jboolean exe;
if (isJNUEncodingSupported == JNI_TRUE) {
return JNI_TRUE;
}
isJNUEncodingSupported = (jboolean) JNU_CallStaticMethodByName (
env, &exe,
"java/nio/charset/Charset",
"isSupported",
"(Ljava/lang/String;)Z",
jnuEncoding).z;
return isJNUEncodingSupported;
}
JNIEXPORT jstring
NewStringPlatform(JNIEnv *env, const char *str)
{
@ -777,10 +865,6 @@ NewStringPlatform(JNIEnv *env, const char *str)
JNIEXPORT jstring JNICALL
JNU_NewStringPlatform(JNIEnv *env, const char *str)
{
jstring result = NULL;
jbyteArray hab = 0;
int len;
if (fastEncoding == NO_ENCODING_YET) {
initializeEncoding(env);
JNU_CHECK_EXCEPTION_RETURN(env, NULL);
@ -792,36 +876,9 @@ JNU_NewStringPlatform(JNIEnv *env, const char *str)
return newString646_US(env, str);
if (fastEncoding == FAST_CP1252)
return newStringCp1252(env, str);
if ((*env)->EnsureLocalCapacity(env, 2) < 0)
return NULL;
len = (int)strlen(str);
hab = (*env)->NewByteArray(env, len);
if (hab != 0) {
jclass strClazz = JNU_ClassString(env);
CHECK_NULL_RETURN(strClazz, 0);
(*env)->SetByteArrayRegion(env, hab, 0, len, (jbyte *)str);
if (jnuEncodingSupported(env)) {
result = (*env)->NewObject(env, strClazz,
String_init_ID, hab, jnuEncoding);
} else {
/*If the encoding specified in sun.jnu.encoding is not endorsed
by "Charset.isSupported" we have to fall back to use String(byte[])
explicitly here without specifying the encoding name, in which the
StringCoding class will pickup the iso-8859-1 as the fallback
converter for us.
*/
jmethodID mid = (*env)->GetMethodID(env, strClazz,
"<init>", "([B)V");
if (mid != NULL) {
result = (*env)->NewObject(env, strClazz, mid, hab);
}
}
(*env)->DeleteLocalRef(env, hab);
return result;
}
return NULL;
if (fastEncoding == FAST_UTF_8)
return newStringUTF8(env, str);
return newStringJava(env, str);
}
JNIEXPORT const char *
@ -830,27 +887,10 @@ GetStringPlatformChars(JNIEnv *env, jstring jstr, jboolean *isCopy)
return JNU_GetStringPlatformChars(env, jstr, isCopy);
}
JNIEXPORT const char * JNICALL
JNU_GetStringPlatformChars(JNIEnv *env, jstring jstr, jboolean *isCopy)
{
static const char* getStringBytes(JNIEnv *env, jstring jstr) {
char *result = NULL;
jbyteArray hab = 0;
if (isCopy)
*isCopy = JNI_TRUE;
if (fastEncoding == NO_ENCODING_YET) {
initializeEncoding(env);
JNU_CHECK_EXCEPTION_RETURN(env, 0);
}
if ((fastEncoding == FAST_8859_1) || (fastEncoding == NO_ENCODING_YET))
return getString8859_1Chars(env, jstr);
if (fastEncoding == FAST_646_US)
return getString646_USChars(env, jstr);
if (fastEncoding == FAST_CP1252)
return getStringCp1252Chars(env, jstr);
if ((*env)->EnsureLocalCapacity(env, 2) < 0)
return 0;
@ -883,6 +923,85 @@ JNU_GetStringPlatformChars(JNIEnv *env, jstring jstr, jboolean *isCopy)
return result;
}
static const char*
getStringUTF8(JNIEnv *env, jstring jstr)
{
int i;
char *result;
jbyteArray value;
jint len;
jbyte *str;
jint rlen;
int ri;
jbyte coder = (*env)->GetByteField(env, jstr, String_coder_ID);
if (coder != java_lang_String_LATIN1) {
return getStringBytes(env, jstr);
}
if ((*env)->EnsureLocalCapacity(env, 2) < 0) {
return NULL;
}
value = (*env)->GetObjectField(env, jstr, String_value_ID);
if (value == NULL)
return NULL;
len = (*env)->GetArrayLength(env, value);
str = (*env)->GetPrimitiveArrayCritical(env, value, NULL);
if (str == NULL) {
return NULL;
}
rlen = len;
// we need two bytes for each latin-1 char above 127 (negative jbytes)
for (i = 0; i < len; i++) {
if (str[i] < 0) {
rlen++;
}
}
result = MALLOC_MIN4(rlen);
if (result == NULL) {
(*env)->ReleasePrimitiveArrayCritical(env, value, str, 0);
JNU_ThrowOutOfMemoryError(env, 0);
return NULL;
}
for (ri = 0, i = 0; i < len; i++) {
jbyte c = str[i];
if (c < 0) {
result[ri++] = (char)(0xc0 | ((c & 0xff) >> 6));
result[ri++] = (char)(0x80 | (c & 0x3f));
} else {
result[ri++] = c;
}
}
(*env)->ReleasePrimitiveArrayCritical(env, value, str, 0);
result[rlen] = '\0';
return result;
}
JNIEXPORT const char * JNICALL
JNU_GetStringPlatformChars(JNIEnv *env, jstring jstr, jboolean *isCopy)
{
if (isCopy)
*isCopy = JNI_TRUE;
if (fastEncoding == NO_ENCODING_YET) {
initializeEncoding(env);
JNU_CHECK_EXCEPTION_RETURN(env, 0);
}
if ((fastEncoding == FAST_8859_1) || (fastEncoding == NO_ENCODING_YET))
return getString8859_1Chars(env, jstr);
if (fastEncoding == FAST_646_US)
return getString646_USChars(env, jstr);
if (fastEncoding == FAST_CP1252)
return getStringCp1252Chars(env, jstr);
if (fastEncoding == FAST_UTF_8)
return getStringUTF8(env, jstr);
else
return getStringBytes(env, jstr);
}
JNIEXPORT void JNICALL
JNU_ReleaseStringPlatformChars(JNIEnv *env, jstring jstr, const char *str)
{

View File

@ -382,7 +382,8 @@ enum {
NO_FAST_ENCODING, /* Platform encoding is not fast */
FAST_8859_1, /* ISO-8859-1 */
FAST_CP1252, /* MS-DOS Cp1252 */
FAST_646_US /* US-ASCII : ISO646-US */
FAST_646_US, /* US-ASCII : ISO646-US */
FAST_UTF_8
};
int getFastEncoding();

View File

@ -0,0 +1,91 @@
/*
* Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @run main/othervm/native -Xcheck:jni StringPlatformChars
*/
import java.util.Arrays;
public class StringPlatformChars {
private static final String JNU_ENCODING = System.getProperty("sun.jnu.encoding");
public static void main(String... args) throws Exception {
System.out.println("sun.jnu.encoding: " + JNU_ENCODING);
System.loadLibrary("stringPlatformChars");
// Test varying lengths, provoking different allocation paths
StringBuilder unicodeSb = new StringBuilder();
StringBuilder asciiSb = new StringBuilder();
StringBuilder latinSb = new StringBuilder();
for (int i = 0; i < 2000; i++) {
unicodeSb.append('\uFEFE');
testString(unicodeSb.toString());
asciiSb.append('x');
testString(asciiSb.toString());
latinSb.append('\u00FE');
testString(latinSb.toString());
testString(latinSb.toString() + asciiSb.toString() + unicodeSb.toString());
}
// Exhaustively test simple Strings made up of all possible chars:
for (char c = '\u0001'; c < Character.MAX_VALUE; c++) {
testString(String.valueOf(c));
}
// Special case: \u0000 is treated as end-of-string in the native code,
// so strings with it should be truncated:
if (getBytes("\u0000abcdef").length != 0 ||
getBytes("a\u0000bcdef").length != 1) {
System.out.println("Mismatching values for strings including \\u0000");
throw new AssertionError();
}
}
private static void testString(String s) throws Exception {
byte[] nativeBytes = getBytes(s);
byte[] stringBytes = s.getBytes(JNU_ENCODING);
if (!Arrays.equals(nativeBytes, stringBytes)) {
System.out.println("Mismatching values for: '" + s + "' " + Arrays.toString(s.chars().toArray()));
System.out.println("Native: " + Arrays.toString(nativeBytes));
System.out.println("String: " + Arrays.toString(stringBytes));
throw new AssertionError(s);
}
String javaNewS = new String(nativeBytes, JNU_ENCODING);
String nativeNewS = newString(nativeBytes);
if (!javaNewS.equals(nativeNewS)) {
System.out.println("New string via native doesn't match via java: '" + javaNewS + "' and '" + nativeNewS + "'");
throw new AssertionError(s);
}
}
static native byte[] getBytes(String string);
static native String newString(byte[] bytes);
}

View File

@ -0,0 +1,76 @@
/*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include <stdlib.h>
#include <string.h>
#include "jni.h"
#include "jni_util.h"
JNIEXPORT jbyteArray JNICALL
Java_StringPlatformChars_getBytes(JNIEnv *env, jclass unused, jstring value)
{
const char* str;
int len;
jbyteArray bytes = NULL;
str = JNU_GetStringPlatformChars(env, value, NULL);
if (str == NULL) {
return NULL;
}
len = (int)strlen(str);
bytes = (*env)->NewByteArray(env, len);
if (bytes != 0) {
jclass strClazz = JNU_ClassString(env);
if (strClazz == NULL) {
return NULL;
}
(*env)->SetByteArrayRegion(env, bytes, 0, len, (jbyte *)str);
return bytes;
}
return NULL;
}
JNIEXPORT jstring JNICALL
Java_StringPlatformChars_newString(JNIEnv *env, jclass unused, jbyteArray bytes)
{
char* str;
int len = (*env)->GetArrayLength(env, bytes);
int i;
jbyte* jbytes;
str = (char*)malloc(len + 1);
jbytes = (*env)->GetPrimitiveArrayCritical(env, bytes, NULL);
if (jbytes == NULL) {
return NULL;
}
for (i = 0; i < len; i++) {
str[i] = (char)jbytes[i];
}
str[len] = '\0';
(*env)->ReleasePrimitiveArrayCritical(env, bytes, (void*)jbytes, 0);
return JNU_NewStringPlatform(env, str);
}