8327156: Avoid copying in StringTable::intern(oop, TRAPS)

8326865: Avoid copying in StringTable::intern(Symbol*, TRAPS)
8327825: StringTable::intern is slow

Reviewed-by: dholmes, coleenp, jsjolen
This commit is contained in:
Casper Norrbin 2024-11-15 11:28:02 +00:00
parent 3eece6e941
commit 75c651f859
6 changed files with 550 additions and 87 deletions

View File

@ -347,7 +347,7 @@ Handle java_lang_String::create_from_str(const char* utf8_str, TRAPS) {
#ifdef ASSERT
// This check is too strict when the input string is not a valid UTF8.
// For example, it may be created with arbitrary content via jni_NewStringUTF.
if (UTF8::is_legal_utf8((const unsigned char*)utf8_str, strlen(utf8_str), false)) {
if (UTF8::is_legal_utf8((const unsigned char*)utf8_str, strlen(utf8_str), /*version_leq_47*/false)) {
ResourceMark rm;
const char* expected = utf8_str;
char* actual = as_utf8_string(h_obj());
@ -365,7 +365,7 @@ oop java_lang_String::create_oop_from_str(const char* utf8_str, TRAPS) {
return h_obj();
}
Handle java_lang_String::create_from_symbol(Symbol* symbol, TRAPS) {
Handle java_lang_String::create_from_symbol(const Symbol* symbol, TRAPS) {
const char* utf8_str = (char*)symbol->bytes();
int utf8_len = symbol->utf8_length();
@ -389,6 +389,8 @@ Handle java_lang_String::create_from_symbol(Symbol* symbol, TRAPS) {
}
#ifdef ASSERT
// This check is too strict on older classfile versions
if (UTF8::is_legal_utf8((const unsigned char*)utf8_str, utf8_len, /*version_leq_47*/false))
{
ResourceMark rm;
const char* expected = symbol->as_utf8();
@ -755,6 +757,35 @@ bool java_lang_String::equals(oop java_string, const jchar* chars, int len) {
return true;
}
bool java_lang_String::equals(oop java_string, const char* utf8_string, size_t utf8_len) {
assert(java_string->klass() == vmClasses::String_klass(),
"must be java_string");
typeArrayOop value = java_lang_String::value_no_keepalive(java_string);
int length = java_lang_String::length(java_string, value);
int unicode_length = UTF8::unicode_length(utf8_string, utf8_len);
if (length != unicode_length) {
return false;
}
bool is_latin1 = java_lang_String::is_latin1(java_string);
jchar c;
if (!is_latin1) {
for (int i = 0; i < unicode_length; i++) {
utf8_string = UTF8::next(utf8_string, &c);
if (value->char_at(i) != c) {
return false;
}
}
} else {
for (int i = 0; i < unicode_length; i++) {
utf8_string = UTF8::next(utf8_string, &c);
if ((((jchar) value->byte_at(i)) & 0xff) != c) {
return false;
}
}
}
return true;
}
bool java_lang_String::equals(oop str1, oop str2) {
assert(str1->klass() == vmClasses::String_klass(),
"must be java String");

View File

@ -32,6 +32,7 @@
#include "runtime/handles.hpp"
#include "runtime/os.hpp"
#include "utilities/macros.hpp"
#include "utilities/utf8.hpp"
#include "utilities/vmEnums.hpp"
class JvmtiThreadState;
@ -99,7 +100,7 @@ class java_lang_String : AllStatic {
static oop create_oop_from_unicode(const jchar* unicode, int len, TRAPS);
static Handle create_from_str(const char* utf8_str, TRAPS);
static oop create_oop_from_str(const char* utf8_str, TRAPS);
static Handle create_from_symbol(Symbol* symbol, TRAPS);
static Handle create_from_symbol(const Symbol* symbol, TRAPS);
static Handle create_from_platform_dependent_str(const char* str, TRAPS);
static void set_compact_strings(bool value);
@ -180,10 +181,24 @@ class java_lang_String : AllStatic {
return h;
}
static unsigned int hash_code(const char* utf8_str, size_t utf8_len) {
unsigned int h = 0;
int unicode_length = UTF8::unicode_length(utf8_str, utf8_len);
jchar c;
while (unicode_length-- > 0) {
utf8_str = UTF8::next(utf8_str, &c);
h = 31 * h + ((unsigned int)c);
}
return h;
}
static unsigned int hash_code(oop java_string);
static unsigned int hash_code_noupdate(oop java_string);
// Compare strings (of different types/encodings), length is the string (array) length
static bool equals(oop java_string, const jchar* chars, int len);
static bool equals(oop java_string, const char* utf8_str, size_t utf8_len);
static bool equals(oop str1, oop str2);
static inline bool value_equals(typeArrayOop str_value1, typeArrayOop str_value2);

View File

@ -99,9 +99,9 @@ inline oop StringTable::read_string_from_compact_hashtable(address base_address,
}
typedef CompactHashtable<
const jchar*, oop,
const StringTable::StringWrapper&, oop,
StringTable::read_string_from_compact_hashtable,
java_lang_String::equals> SharedStringTable;
StringTable::wrapped_string_equals> SharedStringTable;
static SharedStringTable _shared_table;
#endif
@ -123,12 +123,69 @@ volatile bool _alt_hash = false;
static bool _rehashed = false;
static uint64_t _alt_hash_seed = 0;
enum class StringType {
OopStr, UnicodeStr, SymbolStr, UTF8Str
};
struct StringWrapperInternal {
union {
const Handle oop_str;
const jchar* unicode_str;
const Symbol* symbol_str;
const char* utf8_str;
};
const StringType type;
const size_t length;
StringWrapperInternal(const Handle oop_str, const size_t length) : oop_str(oop_str), type(StringType::OopStr), length(length) {}
StringWrapperInternal(const jchar* unicode_str, const size_t length) : unicode_str(unicode_str), type(StringType::UnicodeStr), length(length) {}
StringWrapperInternal(const Symbol* symbol_str, const size_t length) : symbol_str(symbol_str), type(StringType::SymbolStr), length(length) {}
StringWrapperInternal(const char* utf8_str, const size_t length) : utf8_str(utf8_str), type(StringType::UTF8Str), length(length) {}
};
static unsigned int hash_string(const jchar* s, int len, bool useAlt) {
return useAlt ?
AltHashing::halfsiphash_32(_alt_hash_seed, s, len) :
java_lang_String::hash_code(s, len);
}
const char* StringTable::get_symbol_utf8(const StringWrapper& symbol) {
return reinterpret_cast<const char*>(symbol.symbol_str->bytes());
}
unsigned int StringTable::hash_wrapped_string(const StringWrapper& wrapped_str) {
switch (wrapped_str.type) {
case StringType::OopStr:
return java_lang_String::hash_code(wrapped_str.oop_str());
case StringType::UnicodeStr:
return java_lang_String::hash_code(wrapped_str.unicode_str, static_cast<int>(wrapped_str.length));
case StringType::SymbolStr:
return java_lang_String::hash_code(get_symbol_utf8(wrapped_str), wrapped_str.length);
case StringType::UTF8Str:
return java_lang_String::hash_code(wrapped_str.utf8_str, wrapped_str.length);
default:
ShouldNotReachHere();
}
return 0;
}
// Unnamed int needed to fit CompactHashtable's equals type signature
bool StringTable::wrapped_string_equals(oop java_string, const StringWrapper& wrapped_str, int) {
switch (wrapped_str.type) {
case StringType::OopStr:
return java_lang_String::equals(java_string, wrapped_str.oop_str());
case StringType::UnicodeStr:
return java_lang_String::equals(java_string, wrapped_str.unicode_str, static_cast<int>(wrapped_str.length));
case StringType::SymbolStr:
return java_lang_String::equals(java_string, get_symbol_utf8(wrapped_str), wrapped_str.length);
case StringType::UTF8Str:
return java_lang_String::equals(java_string, wrapped_str.utf8_str, wrapped_str.length);
default:
ShouldNotReachHere();
}
return false;
}
class StringTableConfig : public StackObj {
private:
public:
@ -163,22 +220,33 @@ class StringTableConfig : public StackObj {
}
};
class StringTableLookupJchar : StackObj {
private:
Thread* _thread;
class StringTableLookup : StackObj {
uintx _hash;
int _len;
const jchar* _str;
protected:
Thread* _thread;
Handle _found;
public:
StringTableLookupJchar(Thread* thread, uintx hash, const jchar* key, int len)
: _thread(thread), _hash(hash), _len(len), _str(key) {
public:
StringTableLookup(Thread* thread, uintx hash)
: _hash(hash), _thread(thread) {}
uintx get_hash() const { return _hash; }
bool is_dead(WeakHandle* value) {
oop val_oop = value->peek();
return val_oop == nullptr;
}
uintx get_hash() const {
return _hash;
}
bool equals(WeakHandle* value) {
};
class StringTableLookupUnicode : public StringTableLookup {
private:
const jchar* _str;
int _len;
public:
StringTableLookupUnicode(Thread* thread, uintx hash, const jchar* key, int len)
: StringTableLookup(thread, hash), _str(key), _len(len) {}
bool equals(const WeakHandle* value) {
oop val_oop = value->peek();
if (val_oop == nullptr) {
return false;
@ -188,29 +256,42 @@ class StringTableLookupJchar : StackObj {
return false;
}
// Need to resolve weak handle and Handleize through possible safepoint.
_found = Handle(_thread, value->resolve());
_found = Handle(_thread, value->resolve());
return true;
}
bool is_dead(WeakHandle* value) {
oop val_oop = value->peek();
return val_oop == nullptr;
}
};
class StringTableLookupOop : public StackObj {
private:
Thread* _thread;
uintx _hash;
Handle _find;
Handle _found; // Might be a different oop with the same value that's already
// in the table, which is the point.
public:
StringTableLookupOop(Thread* thread, uintx hash, Handle handle)
: _thread(thread), _hash(hash), _find(handle) { }
class StringTableLookupUTF8 : public StringTableLookup {
private:
const char* _str;
size_t _utf8_len;
uintx get_hash() const {
return _hash;
public:
StringTableLookupUTF8(Thread* thread, uintx hash, const char* key, size_t utf8_len)
: StringTableLookup(thread, hash), _str(key), _utf8_len(utf8_len) {}
bool equals(const WeakHandle* value) {
oop val_oop = value->peek();
if (val_oop == nullptr) {
return false;
}
bool equals = java_lang_String::equals(val_oop, _str, _utf8_len);
if (!equals) {
return false;
}
// Need to resolve weak handle and Handleize through possible safepoint.
_found = Handle(_thread, value->resolve());
return true;
}
};
class StringTableLookupOop : public StringTableLookup {
private:
Handle _find;
public:
StringTableLookupOop(Thread* thread, uintx hash, Handle handle)
: StringTableLookup(thread, hash), _find(handle) {}
bool equals(WeakHandle* value) {
oop val_oop = value->peek();
@ -225,11 +306,6 @@ class StringTableLookupOop : public StackObj {
_found = Handle(_thread, value->resolve());
return true;
}
bool is_dead(WeakHandle* value) {
oop val_oop = value->peek();
return val_oop == nullptr;
}
};
void StringTable::create_table() {
@ -291,14 +367,15 @@ oop StringTable::lookup(Symbol* symbol) {
oop StringTable::lookup(const jchar* name, int len) {
unsigned int hash = java_lang_String::hash_code(name, len);
oop string = lookup_shared(name, len, hash);
StringWrapper wrapped_name(name, len);
oop string = lookup_shared(wrapped_name, hash);
if (string != nullptr) {
return string;
}
if (_alt_hash) {
hash = hash_string(name, len, true);
}
return do_lookup(name, len, hash);
return do_lookup(wrapped_name, hash);
}
class StringTableGet : public StackObj {
@ -323,80 +400,140 @@ void StringTable::update_needs_rehash(bool rehash) {
}
}
oop StringTable::do_lookup(const jchar* name, int len, uintx hash) {
oop StringTable::do_lookup(const StringWrapper& name, uintx hash) {
Thread* thread = Thread::current();
StringTableLookupJchar lookup(thread, hash, name, len);
StringTableGet stg(thread);
bool rehash_warning;
_local_table->get(thread, lookup, stg, &rehash_warning);
switch (name.type) {
case StringType::OopStr: {
StringTableLookupOop lookup(thread, hash, name.oop_str);
_local_table->get(thread, lookup, stg, &rehash_warning);
break;
}
case StringType::UnicodeStr: {
StringTableLookupUnicode lookup(thread, hash, name.unicode_str, static_cast<int>(name.length));
_local_table->get(thread, lookup, stg, &rehash_warning);
break;
}
case StringType::SymbolStr: {
StringTableLookupUTF8 lookup(thread, hash, get_symbol_utf8(name), name.length);
_local_table->get(thread, lookup, stg, &rehash_warning);
break;
}
case StringType::UTF8Str: {
StringTableLookupUTF8 lookup(thread, hash, name.utf8_str, name.length);
_local_table->get(thread, lookup, stg, &rehash_warning);
break;
}
default:
ShouldNotReachHere();
}
update_needs_rehash(rehash_warning);
return stg.get_res_oop();
}
// Converts and allocates to a unicode string and stores the unicode length in len
const jchar* StringTable::to_unicode(const StringWrapper& wrapped_str, int &len, TRAPS) {
switch (wrapped_str.type) {
case StringType::UnicodeStr:
len = static_cast<int>(wrapped_str.length);
return wrapped_str.unicode_str;
case StringType::OopStr:
return java_lang_String::as_unicode_string(wrapped_str.oop_str(), len, CHECK_NULL);
case StringType::SymbolStr: {
const char* utf8_str = get_symbol_utf8(wrapped_str);
int unicode_length = UTF8::unicode_length(utf8_str, wrapped_str.symbol_str->utf8_length());
jchar* chars = NEW_RESOURCE_ARRAY(jchar, unicode_length);
UTF8::convert_to_unicode(utf8_str, chars, unicode_length);
len = unicode_length;
return chars;
}
case StringType::UTF8Str: {
int unicode_length = UTF8::unicode_length(wrapped_str.utf8_str);
jchar* chars = NEW_RESOURCE_ARRAY(jchar, unicode_length);
UTF8::convert_to_unicode(wrapped_str.utf8_str, chars, unicode_length);
len = unicode_length;
return chars;
}
default:
ShouldNotReachHere();
}
return nullptr;
}
Handle StringTable::handle_from_wrapped_string(const StringWrapper& wrapped_str, TRAPS) {
switch (wrapped_str.type) {
case StringType::OopStr:
return wrapped_str.oop_str;
case StringType::UnicodeStr:
return java_lang_String::create_from_unicode(wrapped_str.unicode_str, static_cast<int>(wrapped_str.length), THREAD);
case StringType::SymbolStr:
return java_lang_String::create_from_symbol(wrapped_str.symbol_str, THREAD);
case StringType::UTF8Str:
return java_lang_String::create_from_str(wrapped_str.utf8_str, THREAD);
default:
ShouldNotReachHere();
}
return Handle();
}
// Interning
oop StringTable::intern(Symbol* symbol, TRAPS) {
if (symbol == nullptr) return nullptr;
ResourceMark rm(THREAD);
int length;
jchar* chars = symbol->as_unicode(length);
Handle string;
oop result = intern(string, chars, length, CHECK_NULL);
int length = symbol->utf8_length();
StringWrapper name(symbol, length);
oop result = intern(name, CHECK_NULL);
return result;
}
oop StringTable::intern(oop string, TRAPS) {
if (string == nullptr) return nullptr;
ResourceMark rm(THREAD);
int length;
int length = java_lang_String::length(string);
Handle h_string (THREAD, string);
jchar* chars = java_lang_String::as_unicode_string(string, length,
CHECK_NULL);
oop result = intern(h_string, chars, length, CHECK_NULL);
StringWrapper name(h_string, length);
oop result = intern(name, CHECK_NULL);
return result;
}
oop StringTable::intern(const char* utf8_string, TRAPS) {
if (utf8_string == nullptr) return nullptr;
ResourceMark rm(THREAD);
int length = UTF8::unicode_length(utf8_string);
jchar* chars = NEW_RESOURCE_ARRAY(jchar, length);
UTF8::convert_to_unicode(utf8_string, chars, length);
Handle string;
oop result = intern(string, chars, length, CHECK_NULL);
size_t length = strlen(utf8_string);
StringWrapper name(utf8_string, length);
oop result = intern(name, CHECK_NULL);
return result;
}
oop StringTable::intern(Handle string_or_null_h, const jchar* name, int len, TRAPS) {
oop StringTable::intern(const StringWrapper& name, TRAPS) {
// shared table always uses java_lang_String::hash_code
unsigned int hash = java_lang_String::hash_code(name, len);
oop found_string = lookup_shared(name, len, hash);
unsigned int hash = hash_wrapped_string(name);
oop found_string = lookup_shared(name, hash);
if (found_string != nullptr) {
return found_string;
}
if (_alt_hash) {
hash = hash_string(name, len, true);
ResourceMark rm(THREAD);
// Convert to unicode for alt hashing
int unicode_length;
const jchar* chars = to_unicode(name, unicode_length, CHECK_NULL);
hash = hash_string(chars, unicode_length, true);
}
found_string = do_lookup(name, len, hash);
found_string = do_lookup(name, hash);
if (found_string != nullptr) {
return found_string;
}
return do_intern(string_or_null_h, name, len, hash, THREAD);
return do_intern(name, hash, THREAD);
}
oop StringTable::do_intern(Handle string_or_null_h, const jchar* name,
int len, uintx hash, TRAPS) {
oop StringTable::do_intern(const StringWrapper& name, uintx hash, TRAPS) {
HandleMark hm(THREAD); // cleanup strings created
Handle string_h;
Handle string_h = handle_from_wrapped_string(name, CHECK_NULL);
if (!string_or_null_h.is_null()) {
string_h = string_or_null_h;
} else {
string_h = java_lang_String::create_from_unicode(name, len, CHECK_NULL);
}
assert(java_lang_String::equals(string_h(), name, len),
assert(StringTable::wrapped_string_equals(string_h(), name),
"string must be properly initialized");
assert(len == java_lang_String::length(string_h()), "Must be same length");
// Notify deduplication support that the string is being interned. A string
// must never be deduplicated after it has been interned. Doing so interferes
@ -410,7 +547,7 @@ oop StringTable::do_intern(Handle string_or_null_h, const jchar* name,
bool rehash_warning;
do {
// Callers have already looked up the String using the jchar* name, so just go to add.
// Callers have already looked up the String, so just go to add.
WeakHandle wh(_oop_storage, string_h);
// The hash table takes ownership of the WeakHandle, even if it's not inserted.
if (_local_table->insert(THREAD, lookup, wh, &rehash_warning)) {
@ -775,14 +912,17 @@ size_t StringTable::shared_entry_count() {
return _shared_table.entry_count();
}
oop StringTable::lookup_shared(const jchar* name, int len, unsigned int hash) {
assert(hash == java_lang_String::hash_code(name, len),
oop StringTable::lookup_shared(const StringWrapper& name, unsigned int hash) {
assert(hash == hash_wrapped_string(name),
"hash must be computed using java_lang_String::hash_code");
return _shared_table.lookup(name, hash, len);
// len is required but is already part of StringWrapper, so 0 is used
return _shared_table.lookup(name, hash, 0);
}
oop StringTable::lookup_shared(const jchar* name, int len) {
return _shared_table.lookup(name, java_lang_String::hash_code(name, len), len);
StringWrapper wrapped_name(name, len);
// len is required but is already part of StringWrapper, so 0 is used
return _shared_table.lookup(wrapped_name, java_lang_String::hash_code(name, len), 0);
}
// This is called BEFORE we enter the CDS safepoint. We can allocate heap objects.

View File

@ -56,6 +56,18 @@ class StringTable : AllStatic {
static double get_load_factor();
static double get_dead_factor(size_t num_dead);
public:
typedef struct StringWrapperInternal StringWrapper;
// Unnamed int needed to fit CompactHashtable's equals type signature
static bool wrapped_string_equals(oop java_string, const StringWrapper& wrapped_str, int = 0);
private:
static const char* get_symbol_utf8(const StringWrapper& symbol_str);
static unsigned int hash_wrapped_string(const StringWrapper& wrapped_str);
static const jchar* to_unicode(const StringWrapper& wrapped_str, int &len, TRAPS);
static Handle handle_from_wrapped_string(const StringWrapper& wrapped_str, TRAPS);
// GC support
// Callback for GC to notify of changes that might require cleaning or resize.
@ -65,9 +77,9 @@ class StringTable : AllStatic {
static void item_added();
static void item_removed();
static oop intern(Handle string_or_null_h, const jchar* name, int len, TRAPS);
static oop do_intern(Handle string_or_null, const jchar* name, int len, uintx hash, TRAPS);
static oop do_lookup(const jchar* name, int len, uintx hash);
static oop intern(const StringWrapper& name, TRAPS);
static oop do_intern(const StringWrapper& name, uintx hash, TRAPS);
static oop do_lookup(const StringWrapper& name, uintx hash);
static void print_table_statistics(outputStream* st);
@ -87,7 +99,7 @@ class StringTable : AllStatic {
// Interning
static oop intern(Symbol* symbol, TRAPS);
static oop intern(oop string, TRAPS);
static oop intern(const char *utf8_string, TRAPS);
static oop intern(const char* utf8_string, TRAPS);
// Rehash the string table if it gets out of balance
private:
@ -131,7 +143,7 @@ private:
#endif // INCLUDE_CDS_JAVA_HEAP
private:
static oop lookup_shared(const jchar* name, int len, unsigned int hash) NOT_CDS_JAVA_HEAP_RETURN_(nullptr);
static oop lookup_shared(const StringWrapper& name, unsigned int hash) NOT_CDS_JAVA_HEAP_RETURN_(nullptr);
public:
static oop lookup_shared(const jchar* name, int len) NOT_CDS_JAVA_HEAP_RETURN_(nullptr);
static size_t shared_entry_count() NOT_CDS_JAVA_HEAP_RETURN_(0);

View File

@ -0,0 +1,194 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include "precompiled.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "unittest.hpp"
// Tests that string functions (hash code/equals) stay consistant when comparing equal strings and converting between strings types
// Simple ASCII string "Java(R)!!"
// Same length in both UTF8 and Unicode
static const char static_ascii_utf8_str[] = {0x4A, 0x61, 0x76, 0x61, 0x28, 0x52, 0x29, 0x21, 0x21};
static const jchar static_ascii_unicode_str[] = {0x004A, 0x0061, 0x0076, 0x0061, 0x0028, 0x0052, 0x0029, 0x0021, 0x0021};
// Complex string "Jāvá®!☺☻", UTF8 has character lengths 13122133 = 16
static const unsigned char static_utf8_str[] = {0x4A, 0x61, 0xCC, 0x84, 0x76, 0xC3, 0xA1, 0xC2, 0xAE, 0x21, 0xE2, 0x98, 0xBA, 0xE2, 0x98, 0xBB};
static const jchar static_unicode_str[] = { 0x004A, 0x0061, 0x0304, 0x0076, 0x00E1, 0x00AE, 0x0021, 0x263A, 0x263B};
static const int ASCII_LENGTH = 9;
static const size_t UTF8_LENGTH = 16;
static const int UNICODE_LENGTH = 9;
void compare_utf8_utf8(const char* utf8_str1, const char* utf8_str2, size_t utf8_len) {
EXPECT_EQ(java_lang_String::hash_code(utf8_str1, utf8_len), java_lang_String::hash_code(utf8_str2, utf8_len));
EXPECT_STREQ(utf8_str1, utf8_str2);
}
void compare_utf8_unicode(const char* utf8_str, const jchar* unicode_str, size_t utf8_len, int unicode_len) {
EXPECT_EQ(java_lang_String::hash_code(utf8_str, utf8_len), java_lang_String::hash_code(unicode_str, unicode_len));
}
void compare_utf8_oop(const char* utf8_str, Handle oop_str, size_t utf8_len, int unicode_len) {
EXPECT_EQ(java_lang_String::hash_code(utf8_str, utf8_len), java_lang_String::hash_code(oop_str()));
EXPECT_TRUE(java_lang_String::equals(oop_str(), utf8_str, utf8_len));
}
void compare_unicode_unicode(const jchar* unicode_str1, const jchar* unicode_str2, int unicode_len) {
EXPECT_EQ(java_lang_String::hash_code(unicode_str1, unicode_len), java_lang_String::hash_code(unicode_str2, unicode_len));
for (int i = 0; i < unicode_len; i++) {
EXPECT_EQ(unicode_str1[i], unicode_str2[i]);
}
}
void compare_unicode_oop(const jchar* unicode_str, Handle oop_str, int unicode_len) {
EXPECT_EQ(java_lang_String::hash_code(unicode_str, unicode_len), java_lang_String::hash_code(oop_str()));
EXPECT_TRUE(java_lang_String::equals(oop_str(), unicode_str, unicode_len));
}
void compare_oop_oop(Handle oop_str1, Handle oop_str2) {
EXPECT_EQ(java_lang_String::hash_code(oop_str1()), java_lang_String::hash_code(oop_str2()));
EXPECT_TRUE(java_lang_String::equals(oop_str1(), oop_str2()));
}
void test_utf8_convert(const char* utf8_str, size_t utf8_len, int unicode_len) {
EXPECT_TRUE(UTF8::is_legal_utf8((unsigned char*)utf8_str, strlen(utf8_str), false));
JavaThread* THREAD = JavaThread::current();
ThreadInVMfromNative ThreadInVMfromNative(THREAD);
ResourceMark rm(THREAD);
HandleMark hm(THREAD);
jchar* unicode_str_from_utf8 = NEW_RESOURCE_ARRAY(jchar, unicode_len);
UTF8::convert_to_unicode(utf8_str, unicode_str_from_utf8, unicode_len);
Handle oop_str_from_utf8 = java_lang_String::create_from_str(utf8_str, THREAD);
compare_utf8_unicode(utf8_str, unicode_str_from_utf8, utf8_len, unicode_len);
compare_utf8_oop(utf8_str, oop_str_from_utf8, utf8_len, unicode_len);
size_t length = unicode_len;
const char* utf8_str_from_unicode = UNICODE::as_utf8(unicode_str_from_utf8, length);
const char* utf8_str_from_oop = java_lang_String::as_utf8_string(oop_str_from_utf8());
EXPECT_TRUE(UTF8::is_legal_utf8((unsigned char*)utf8_str_from_unicode, strlen(utf8_str_from_unicode), false));
EXPECT_TRUE(UTF8::is_legal_utf8((unsigned char*)utf8_str_from_oop, strlen(utf8_str_from_oop), false));
compare_utf8_utf8(utf8_str, utf8_str_from_unicode, utf8_len);
compare_utf8_utf8(utf8_str, utf8_str_from_oop, utf8_len);
}
void test_unicode_convert(const jchar* unicode_str, size_t utf8_len, int unicode_len) {
JavaThread* THREAD = JavaThread::current();
ThreadInVMfromNative ThreadInVMfromNative(THREAD);
ResourceMark rm(THREAD);
HandleMark hm(THREAD);
size_t length = unicode_len;
const char* utf8_str_from_unicode = UNICODE::as_utf8(unicode_str, length);
Handle oop_str_from_unicode = java_lang_String::create_from_unicode(unicode_str, unicode_len, THREAD);
EXPECT_TRUE(UTF8::is_legal_utf8((unsigned char*)utf8_str_from_unicode, strlen(utf8_str_from_unicode), false));
compare_utf8_unicode(utf8_str_from_unicode, unicode_str, utf8_len, unicode_len);
compare_unicode_oop(unicode_str, oop_str_from_unicode, unicode_len);
int _;
jchar* unicode_str_from_utf8 = NEW_RESOURCE_ARRAY(jchar, unicode_len);
UTF8::convert_to_unicode(utf8_str_from_unicode, unicode_str_from_utf8, unicode_len);
const jchar* unicode_str_from_oop = java_lang_String::as_unicode_string(oop_str_from_unicode(), _, THREAD);
compare_unicode_unicode(unicode_str, unicode_str_from_utf8, unicode_len);
compare_unicode_unicode(unicode_str, unicode_str_from_oop, unicode_len);
}
void test_utf8_unicode_cross(const char* utf8_str, const jchar* unicode_str, size_t utf8_len, int unicode_len) {
compare_utf8_unicode(utf8_str, unicode_str, utf8_len, unicode_len);
JavaThread* THREAD = JavaThread::current();
ThreadInVMfromNative ThreadInVMfromNative(THREAD);
ResourceMark rm(THREAD);
HandleMark hm(THREAD);
size_t length = unicode_len;
const char* utf8_str_from_unicode = UNICODE::as_utf8(unicode_str, length);
jchar* unicode_str_from_utf8 = NEW_RESOURCE_ARRAY(jchar, unicode_len);
UTF8::convert_to_unicode(utf8_str, unicode_str_from_utf8, unicode_len);
Handle oop_str_from_unicode = java_lang_String::create_from_unicode(unicode_str, unicode_len, THREAD);
Handle oop_str_from_utf8 = java_lang_String::create_from_str(utf8_str, THREAD);
compare_utf8_utf8(utf8_str, utf8_str_from_unicode, utf8_len);
compare_utf8_oop(utf8_str, oop_str_from_unicode, utf8_len, unicode_len);
compare_unicode_unicode(unicode_str, unicode_str_from_utf8, unicode_len);
compare_unicode_oop(unicode_str, oop_str_from_utf8, unicode_len);
compare_utf8_oop(utf8_str_from_unicode, oop_str_from_utf8, utf8_len, unicode_len);
compare_unicode_oop(unicode_str_from_utf8, oop_str_from_unicode, unicode_len);
compare_utf8_unicode(utf8_str_from_unicode, unicode_str_from_utf8, utf8_len, unicode_len);
compare_oop_oop(oop_str_from_utf8, oop_str_from_unicode);
}
TEST_VM(StringConversion, fromUTF8_ascii) {
const char utf8_str[ASCII_LENGTH + 1] = { };
memcpy((unsigned char*)utf8_str, static_ascii_utf8_str, ASCII_LENGTH);
test_utf8_convert(utf8_str, ASCII_LENGTH, ASCII_LENGTH);
}
TEST_VM(StringConversion, fromUTF8_varlen) {
const char utf8_str[UTF8_LENGTH + 1] = { };
memcpy((unsigned char*)utf8_str, static_utf8_str, UTF8_LENGTH);
test_utf8_convert(utf8_str, UTF8_LENGTH, UNICODE_LENGTH);
}
TEST_VM(StringConversion, fromUnicode_ascii) {
jchar unicode_str[ASCII_LENGTH] = { };
memcpy(unicode_str, static_ascii_unicode_str, ASCII_LENGTH * sizeof(jchar));
test_unicode_convert(unicode_str, ASCII_LENGTH, ASCII_LENGTH);
}
TEST_VM(StringConversion, fromUnicode_varlen) {
jchar unicode_str[UNICODE_LENGTH] = { };
memcpy(unicode_str, static_unicode_str, UNICODE_LENGTH * sizeof(jchar));
test_unicode_convert(unicode_str, UTF8_LENGTH, UNICODE_LENGTH);
}
TEST_VM(StringConversion, cross_ascii) {
const char utf8_str[ASCII_LENGTH + 1] = { };
jchar unicode_str[ASCII_LENGTH] = { };
memcpy((unsigned char*)utf8_str, static_ascii_utf8_str, ASCII_LENGTH);
memcpy(unicode_str, static_ascii_unicode_str, ASCII_LENGTH * sizeof(jchar));
test_utf8_unicode_cross(utf8_str, unicode_str, ASCII_LENGTH, ASCII_LENGTH);
}
TEST_VM(StringConversion, cross_varlen) {
const char utf8_str[UTF8_LENGTH + 1] = { };
jchar unicode_str[UNICODE_LENGTH] = { };
memcpy((unsigned char*)utf8_str, static_utf8_str, UTF8_LENGTH);
memcpy(unicode_str, static_unicode_str, UNICODE_LENGTH * sizeof(jchar));
test_utf8_unicode_cross(utf8_str, unicode_str, UTF8_LENGTH, UNICODE_LENGTH);
}

View File

@ -0,0 +1,71 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include "precompiled.hpp"
#include "classfile/stringTable.hpp"
#include "classfile/symbolTable.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "unittest.hpp"
// Tests that strings are interned and returns the same string when interning from different string types
// Simple ASCII string "Java(R)!!"
static const char static_ascii_utf8_str[] = {0x4A, 0x61, 0x76, 0x61, 0x28, 0x52, 0x29, 0x21, 0x21};
static const size_t ASCII_LENGTH = 9;
// Complex string "Jāvá®!☺☻", has character lengths 13122133 = 16
static const unsigned char static_utf8_str[] = {0x4A, 0x61, 0xCC, 0x84, 0x76, 0xC3, 0xA1, 0xC2, 0xAE, 0x21, 0xE2, 0x98, 0xBA, 0xE2, 0x98, 0xBB};
static const size_t COMPLEX_LENGTH = 16;
void test_intern(const char* utf8_str, size_t utf8_length) {
JavaThread* THREAD = JavaThread::current();
ThreadInVMfromNative ThreadInVMfromNative(THREAD);
HandleMark hm(THREAD);
oop interned_string_from_utf8 = StringTable::intern(utf8_str, THREAD);
EXPECT_TRUE(java_lang_String::equals(interned_string_from_utf8, utf8_str, utf8_length));
EXPECT_EQ(java_lang_String::hash_code(utf8_str, utf8_length),java_lang_String::hash_code(interned_string_from_utf8));
Symbol* symbol_from_utf8 = SymbolTable::new_symbol(utf8_str, static_cast<int>(utf8_length));
oop interned_string_from_symbol = StringTable::intern(symbol_from_utf8, THREAD);
EXPECT_EQ(interned_string_from_utf8, interned_string_from_symbol);
oop interned_string_from_oop1 = StringTable::intern(interned_string_from_utf8, THREAD);
EXPECT_EQ(interned_string_from_utf8, interned_string_from_oop1);
}
TEST_VM(StringIntern, intern_ascii) {
const char utf8_str[ASCII_LENGTH + 1] = { };
memcpy((unsigned char*)utf8_str, static_ascii_utf8_str, ASCII_LENGTH);
test_intern(utf8_str, ASCII_LENGTH);
}
TEST_VM(StringIntern, intern_varlen) {
const char utf8_str[COMPLEX_LENGTH + 1] = { };
memcpy((unsigned char*)utf8_str, static_utf8_str, COMPLEX_LENGTH);
test_intern(utf8_str, COMPLEX_LENGTH);
}