8327156: Avoid copying in StringTable::intern(oop, TRAPS)
8326865: Avoid copying in StringTable::intern(Symbol*, TRAPS) 8327825: StringTable::intern is slow Reviewed-by: dholmes, coleenp, jsjolen
This commit is contained in:
parent
3eece6e941
commit
75c651f859
@ -347,7 +347,7 @@ Handle java_lang_String::create_from_str(const char* utf8_str, TRAPS) {
|
||||
#ifdef ASSERT
|
||||
// This check is too strict when the input string is not a valid UTF8.
|
||||
// For example, it may be created with arbitrary content via jni_NewStringUTF.
|
||||
if (UTF8::is_legal_utf8((const unsigned char*)utf8_str, strlen(utf8_str), false)) {
|
||||
if (UTF8::is_legal_utf8((const unsigned char*)utf8_str, strlen(utf8_str), /*version_leq_47*/false)) {
|
||||
ResourceMark rm;
|
||||
const char* expected = utf8_str;
|
||||
char* actual = as_utf8_string(h_obj());
|
||||
@ -365,7 +365,7 @@ oop java_lang_String::create_oop_from_str(const char* utf8_str, TRAPS) {
|
||||
return h_obj();
|
||||
}
|
||||
|
||||
Handle java_lang_String::create_from_symbol(Symbol* symbol, TRAPS) {
|
||||
Handle java_lang_String::create_from_symbol(const Symbol* symbol, TRAPS) {
|
||||
const char* utf8_str = (char*)symbol->bytes();
|
||||
int utf8_len = symbol->utf8_length();
|
||||
|
||||
@ -389,6 +389,8 @@ Handle java_lang_String::create_from_symbol(Symbol* symbol, TRAPS) {
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
// This check is too strict on older classfile versions
|
||||
if (UTF8::is_legal_utf8((const unsigned char*)utf8_str, utf8_len, /*version_leq_47*/false))
|
||||
{
|
||||
ResourceMark rm;
|
||||
const char* expected = symbol->as_utf8();
|
||||
@ -755,6 +757,35 @@ bool java_lang_String::equals(oop java_string, const jchar* chars, int len) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool java_lang_String::equals(oop java_string, const char* utf8_string, size_t utf8_len) {
|
||||
assert(java_string->klass() == vmClasses::String_klass(),
|
||||
"must be java_string");
|
||||
typeArrayOop value = java_lang_String::value_no_keepalive(java_string);
|
||||
int length = java_lang_String::length(java_string, value);
|
||||
int unicode_length = UTF8::unicode_length(utf8_string, utf8_len);
|
||||
if (length != unicode_length) {
|
||||
return false;
|
||||
}
|
||||
bool is_latin1 = java_lang_String::is_latin1(java_string);
|
||||
jchar c;
|
||||
if (!is_latin1) {
|
||||
for (int i = 0; i < unicode_length; i++) {
|
||||
utf8_string = UTF8::next(utf8_string, &c);
|
||||
if (value->char_at(i) != c) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < unicode_length; i++) {
|
||||
utf8_string = UTF8::next(utf8_string, &c);
|
||||
if ((((jchar) value->byte_at(i)) & 0xff) != c) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool java_lang_String::equals(oop str1, oop str2) {
|
||||
assert(str1->klass() == vmClasses::String_klass(),
|
||||
"must be java String");
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "runtime/handles.hpp"
|
||||
#include "runtime/os.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
#include "utilities/utf8.hpp"
|
||||
#include "utilities/vmEnums.hpp"
|
||||
|
||||
class JvmtiThreadState;
|
||||
@ -99,7 +100,7 @@ class java_lang_String : AllStatic {
|
||||
static oop create_oop_from_unicode(const jchar* unicode, int len, TRAPS);
|
||||
static Handle create_from_str(const char* utf8_str, TRAPS);
|
||||
static oop create_oop_from_str(const char* utf8_str, TRAPS);
|
||||
static Handle create_from_symbol(Symbol* symbol, TRAPS);
|
||||
static Handle create_from_symbol(const Symbol* symbol, TRAPS);
|
||||
static Handle create_from_platform_dependent_str(const char* str, TRAPS);
|
||||
|
||||
static void set_compact_strings(bool value);
|
||||
@ -180,10 +181,24 @@ class java_lang_String : AllStatic {
|
||||
return h;
|
||||
}
|
||||
|
||||
static unsigned int hash_code(const char* utf8_str, size_t utf8_len) {
|
||||
unsigned int h = 0;
|
||||
int unicode_length = UTF8::unicode_length(utf8_str, utf8_len);
|
||||
|
||||
jchar c;
|
||||
while (unicode_length-- > 0) {
|
||||
utf8_str = UTF8::next(utf8_str, &c);
|
||||
h = 31 * h + ((unsigned int)c);
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
static unsigned int hash_code(oop java_string);
|
||||
static unsigned int hash_code_noupdate(oop java_string);
|
||||
|
||||
// Compare strings (of different types/encodings), length is the string (array) length
|
||||
static bool equals(oop java_string, const jchar* chars, int len);
|
||||
static bool equals(oop java_string, const char* utf8_str, size_t utf8_len);
|
||||
static bool equals(oop str1, oop str2);
|
||||
static inline bool value_equals(typeArrayOop str_value1, typeArrayOop str_value2);
|
||||
|
||||
|
@ -99,9 +99,9 @@ inline oop StringTable::read_string_from_compact_hashtable(address base_address,
|
||||
}
|
||||
|
||||
typedef CompactHashtable<
|
||||
const jchar*, oop,
|
||||
const StringTable::StringWrapper&, oop,
|
||||
StringTable::read_string_from_compact_hashtable,
|
||||
java_lang_String::equals> SharedStringTable;
|
||||
StringTable::wrapped_string_equals> SharedStringTable;
|
||||
|
||||
static SharedStringTable _shared_table;
|
||||
#endif
|
||||
@ -123,12 +123,69 @@ volatile bool _alt_hash = false;
|
||||
static bool _rehashed = false;
|
||||
static uint64_t _alt_hash_seed = 0;
|
||||
|
||||
enum class StringType {
|
||||
OopStr, UnicodeStr, SymbolStr, UTF8Str
|
||||
};
|
||||
|
||||
struct StringWrapperInternal {
|
||||
union {
|
||||
const Handle oop_str;
|
||||
const jchar* unicode_str;
|
||||
const Symbol* symbol_str;
|
||||
const char* utf8_str;
|
||||
};
|
||||
const StringType type;
|
||||
const size_t length;
|
||||
|
||||
StringWrapperInternal(const Handle oop_str, const size_t length) : oop_str(oop_str), type(StringType::OopStr), length(length) {}
|
||||
StringWrapperInternal(const jchar* unicode_str, const size_t length) : unicode_str(unicode_str), type(StringType::UnicodeStr), length(length) {}
|
||||
StringWrapperInternal(const Symbol* symbol_str, const size_t length) : symbol_str(symbol_str), type(StringType::SymbolStr), length(length) {}
|
||||
StringWrapperInternal(const char* utf8_str, const size_t length) : utf8_str(utf8_str), type(StringType::UTF8Str), length(length) {}
|
||||
};
|
||||
|
||||
static unsigned int hash_string(const jchar* s, int len, bool useAlt) {
|
||||
return useAlt ?
|
||||
AltHashing::halfsiphash_32(_alt_hash_seed, s, len) :
|
||||
java_lang_String::hash_code(s, len);
|
||||
}
|
||||
|
||||
const char* StringTable::get_symbol_utf8(const StringWrapper& symbol) {
|
||||
return reinterpret_cast<const char*>(symbol.symbol_str->bytes());
|
||||
}
|
||||
|
||||
unsigned int StringTable::hash_wrapped_string(const StringWrapper& wrapped_str) {
|
||||
switch (wrapped_str.type) {
|
||||
case StringType::OopStr:
|
||||
return java_lang_String::hash_code(wrapped_str.oop_str());
|
||||
case StringType::UnicodeStr:
|
||||
return java_lang_String::hash_code(wrapped_str.unicode_str, static_cast<int>(wrapped_str.length));
|
||||
case StringType::SymbolStr:
|
||||
return java_lang_String::hash_code(get_symbol_utf8(wrapped_str), wrapped_str.length);
|
||||
case StringType::UTF8Str:
|
||||
return java_lang_String::hash_code(wrapped_str.utf8_str, wrapped_str.length);
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Unnamed int needed to fit CompactHashtable's equals type signature
|
||||
bool StringTable::wrapped_string_equals(oop java_string, const StringWrapper& wrapped_str, int) {
|
||||
switch (wrapped_str.type) {
|
||||
case StringType::OopStr:
|
||||
return java_lang_String::equals(java_string, wrapped_str.oop_str());
|
||||
case StringType::UnicodeStr:
|
||||
return java_lang_String::equals(java_string, wrapped_str.unicode_str, static_cast<int>(wrapped_str.length));
|
||||
case StringType::SymbolStr:
|
||||
return java_lang_String::equals(java_string, get_symbol_utf8(wrapped_str), wrapped_str.length);
|
||||
case StringType::UTF8Str:
|
||||
return java_lang_String::equals(java_string, wrapped_str.utf8_str, wrapped_str.length);
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
class StringTableConfig : public StackObj {
|
||||
private:
|
||||
public:
|
||||
@ -163,22 +220,33 @@ class StringTableConfig : public StackObj {
|
||||
}
|
||||
};
|
||||
|
||||
class StringTableLookupJchar : StackObj {
|
||||
private:
|
||||
Thread* _thread;
|
||||
class StringTableLookup : StackObj {
|
||||
uintx _hash;
|
||||
int _len;
|
||||
const jchar* _str;
|
||||
|
||||
protected:
|
||||
Thread* _thread;
|
||||
Handle _found;
|
||||
|
||||
public:
|
||||
StringTableLookupJchar(Thread* thread, uintx hash, const jchar* key, int len)
|
||||
: _thread(thread), _hash(hash), _len(len), _str(key) {
|
||||
public:
|
||||
StringTableLookup(Thread* thread, uintx hash)
|
||||
: _hash(hash), _thread(thread) {}
|
||||
uintx get_hash() const { return _hash; }
|
||||
bool is_dead(WeakHandle* value) {
|
||||
oop val_oop = value->peek();
|
||||
return val_oop == nullptr;
|
||||
}
|
||||
uintx get_hash() const {
|
||||
return _hash;
|
||||
}
|
||||
bool equals(WeakHandle* value) {
|
||||
};
|
||||
|
||||
class StringTableLookupUnicode : public StringTableLookup {
|
||||
private:
|
||||
const jchar* _str;
|
||||
int _len;
|
||||
|
||||
public:
|
||||
StringTableLookupUnicode(Thread* thread, uintx hash, const jchar* key, int len)
|
||||
: StringTableLookup(thread, hash), _str(key), _len(len) {}
|
||||
|
||||
bool equals(const WeakHandle* value) {
|
||||
oop val_oop = value->peek();
|
||||
if (val_oop == nullptr) {
|
||||
return false;
|
||||
@ -188,29 +256,42 @@ class StringTableLookupJchar : StackObj {
|
||||
return false;
|
||||
}
|
||||
// Need to resolve weak handle and Handleize through possible safepoint.
|
||||
_found = Handle(_thread, value->resolve());
|
||||
_found = Handle(_thread, value->resolve());
|
||||
return true;
|
||||
}
|
||||
bool is_dead(WeakHandle* value) {
|
||||
oop val_oop = value->peek();
|
||||
return val_oop == nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
class StringTableLookupOop : public StackObj {
|
||||
private:
|
||||
Thread* _thread;
|
||||
uintx _hash;
|
||||
Handle _find;
|
||||
Handle _found; // Might be a different oop with the same value that's already
|
||||
// in the table, which is the point.
|
||||
public:
|
||||
StringTableLookupOop(Thread* thread, uintx hash, Handle handle)
|
||||
: _thread(thread), _hash(hash), _find(handle) { }
|
||||
class StringTableLookupUTF8 : public StringTableLookup {
|
||||
private:
|
||||
const char* _str;
|
||||
size_t _utf8_len;
|
||||
|
||||
uintx get_hash() const {
|
||||
return _hash;
|
||||
public:
|
||||
StringTableLookupUTF8(Thread* thread, uintx hash, const char* key, size_t utf8_len)
|
||||
: StringTableLookup(thread, hash), _str(key), _utf8_len(utf8_len) {}
|
||||
|
||||
bool equals(const WeakHandle* value) {
|
||||
oop val_oop = value->peek();
|
||||
if (val_oop == nullptr) {
|
||||
return false;
|
||||
}
|
||||
bool equals = java_lang_String::equals(val_oop, _str, _utf8_len);
|
||||
if (!equals) {
|
||||
return false;
|
||||
}
|
||||
// Need to resolve weak handle and Handleize through possible safepoint.
|
||||
_found = Handle(_thread, value->resolve());
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class StringTableLookupOop : public StringTableLookup {
|
||||
private:
|
||||
Handle _find;
|
||||
|
||||
public:
|
||||
StringTableLookupOop(Thread* thread, uintx hash, Handle handle)
|
||||
: StringTableLookup(thread, hash), _find(handle) {}
|
||||
|
||||
bool equals(WeakHandle* value) {
|
||||
oop val_oop = value->peek();
|
||||
@ -225,11 +306,6 @@ class StringTableLookupOop : public StackObj {
|
||||
_found = Handle(_thread, value->resolve());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_dead(WeakHandle* value) {
|
||||
oop val_oop = value->peek();
|
||||
return val_oop == nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
void StringTable::create_table() {
|
||||
@ -291,14 +367,15 @@ oop StringTable::lookup(Symbol* symbol) {
|
||||
|
||||
oop StringTable::lookup(const jchar* name, int len) {
|
||||
unsigned int hash = java_lang_String::hash_code(name, len);
|
||||
oop string = lookup_shared(name, len, hash);
|
||||
StringWrapper wrapped_name(name, len);
|
||||
oop string = lookup_shared(wrapped_name, hash);
|
||||
if (string != nullptr) {
|
||||
return string;
|
||||
}
|
||||
if (_alt_hash) {
|
||||
hash = hash_string(name, len, true);
|
||||
}
|
||||
return do_lookup(name, len, hash);
|
||||
return do_lookup(wrapped_name, hash);
|
||||
}
|
||||
|
||||
class StringTableGet : public StackObj {
|
||||
@ -323,80 +400,140 @@ void StringTable::update_needs_rehash(bool rehash) {
|
||||
}
|
||||
}
|
||||
|
||||
oop StringTable::do_lookup(const jchar* name, int len, uintx hash) {
|
||||
oop StringTable::do_lookup(const StringWrapper& name, uintx hash) {
|
||||
Thread* thread = Thread::current();
|
||||
StringTableLookupJchar lookup(thread, hash, name, len);
|
||||
StringTableGet stg(thread);
|
||||
bool rehash_warning;
|
||||
_local_table->get(thread, lookup, stg, &rehash_warning);
|
||||
|
||||
switch (name.type) {
|
||||
case StringType::OopStr: {
|
||||
StringTableLookupOop lookup(thread, hash, name.oop_str);
|
||||
_local_table->get(thread, lookup, stg, &rehash_warning);
|
||||
break;
|
||||
}
|
||||
case StringType::UnicodeStr: {
|
||||
StringTableLookupUnicode lookup(thread, hash, name.unicode_str, static_cast<int>(name.length));
|
||||
_local_table->get(thread, lookup, stg, &rehash_warning);
|
||||
break;
|
||||
}
|
||||
case StringType::SymbolStr: {
|
||||
StringTableLookupUTF8 lookup(thread, hash, get_symbol_utf8(name), name.length);
|
||||
_local_table->get(thread, lookup, stg, &rehash_warning);
|
||||
break;
|
||||
}
|
||||
case StringType::UTF8Str: {
|
||||
StringTableLookupUTF8 lookup(thread, hash, name.utf8_str, name.length);
|
||||
_local_table->get(thread, lookup, stg, &rehash_warning);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
update_needs_rehash(rehash_warning);
|
||||
return stg.get_res_oop();
|
||||
}
|
||||
|
||||
// Converts and allocates to a unicode string and stores the unicode length in len
|
||||
const jchar* StringTable::to_unicode(const StringWrapper& wrapped_str, int &len, TRAPS) {
|
||||
switch (wrapped_str.type) {
|
||||
case StringType::UnicodeStr:
|
||||
len = static_cast<int>(wrapped_str.length);
|
||||
return wrapped_str.unicode_str;
|
||||
case StringType::OopStr:
|
||||
return java_lang_String::as_unicode_string(wrapped_str.oop_str(), len, CHECK_NULL);
|
||||
case StringType::SymbolStr: {
|
||||
const char* utf8_str = get_symbol_utf8(wrapped_str);
|
||||
int unicode_length = UTF8::unicode_length(utf8_str, wrapped_str.symbol_str->utf8_length());
|
||||
jchar* chars = NEW_RESOURCE_ARRAY(jchar, unicode_length);
|
||||
UTF8::convert_to_unicode(utf8_str, chars, unicode_length);
|
||||
len = unicode_length;
|
||||
return chars;
|
||||
}
|
||||
case StringType::UTF8Str: {
|
||||
int unicode_length = UTF8::unicode_length(wrapped_str.utf8_str);
|
||||
jchar* chars = NEW_RESOURCE_ARRAY(jchar, unicode_length);
|
||||
UTF8::convert_to_unicode(wrapped_str.utf8_str, chars, unicode_length);
|
||||
len = unicode_length;
|
||||
return chars;
|
||||
}
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Handle StringTable::handle_from_wrapped_string(const StringWrapper& wrapped_str, TRAPS) {
|
||||
switch (wrapped_str.type) {
|
||||
case StringType::OopStr:
|
||||
return wrapped_str.oop_str;
|
||||
case StringType::UnicodeStr:
|
||||
return java_lang_String::create_from_unicode(wrapped_str.unicode_str, static_cast<int>(wrapped_str.length), THREAD);
|
||||
case StringType::SymbolStr:
|
||||
return java_lang_String::create_from_symbol(wrapped_str.symbol_str, THREAD);
|
||||
case StringType::UTF8Str:
|
||||
return java_lang_String::create_from_str(wrapped_str.utf8_str, THREAD);
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
return Handle();
|
||||
}
|
||||
|
||||
// Interning
|
||||
oop StringTable::intern(Symbol* symbol, TRAPS) {
|
||||
if (symbol == nullptr) return nullptr;
|
||||
ResourceMark rm(THREAD);
|
||||
int length;
|
||||
jchar* chars = symbol->as_unicode(length);
|
||||
Handle string;
|
||||
oop result = intern(string, chars, length, CHECK_NULL);
|
||||
int length = symbol->utf8_length();
|
||||
StringWrapper name(symbol, length);
|
||||
oop result = intern(name, CHECK_NULL);
|
||||
return result;
|
||||
}
|
||||
|
||||
oop StringTable::intern(oop string, TRAPS) {
|
||||
if (string == nullptr) return nullptr;
|
||||
ResourceMark rm(THREAD);
|
||||
int length;
|
||||
int length = java_lang_String::length(string);
|
||||
Handle h_string (THREAD, string);
|
||||
jchar* chars = java_lang_String::as_unicode_string(string, length,
|
||||
CHECK_NULL);
|
||||
oop result = intern(h_string, chars, length, CHECK_NULL);
|
||||
StringWrapper name(h_string, length);
|
||||
oop result = intern(name, CHECK_NULL);
|
||||
return result;
|
||||
}
|
||||
|
||||
oop StringTable::intern(const char* utf8_string, TRAPS) {
|
||||
if (utf8_string == nullptr) return nullptr;
|
||||
ResourceMark rm(THREAD);
|
||||
int length = UTF8::unicode_length(utf8_string);
|
||||
jchar* chars = NEW_RESOURCE_ARRAY(jchar, length);
|
||||
UTF8::convert_to_unicode(utf8_string, chars, length);
|
||||
Handle string;
|
||||
oop result = intern(string, chars, length, CHECK_NULL);
|
||||
size_t length = strlen(utf8_string);
|
||||
StringWrapper name(utf8_string, length);
|
||||
oop result = intern(name, CHECK_NULL);
|
||||
return result;
|
||||
}
|
||||
|
||||
oop StringTable::intern(Handle string_or_null_h, const jchar* name, int len, TRAPS) {
|
||||
oop StringTable::intern(const StringWrapper& name, TRAPS) {
|
||||
// shared table always uses java_lang_String::hash_code
|
||||
unsigned int hash = java_lang_String::hash_code(name, len);
|
||||
oop found_string = lookup_shared(name, len, hash);
|
||||
unsigned int hash = hash_wrapped_string(name);
|
||||
oop found_string = lookup_shared(name, hash);
|
||||
if (found_string != nullptr) {
|
||||
return found_string;
|
||||
}
|
||||
|
||||
if (_alt_hash) {
|
||||
hash = hash_string(name, len, true);
|
||||
ResourceMark rm(THREAD);
|
||||
// Convert to unicode for alt hashing
|
||||
int unicode_length;
|
||||
const jchar* chars = to_unicode(name, unicode_length, CHECK_NULL);
|
||||
hash = hash_string(chars, unicode_length, true);
|
||||
}
|
||||
found_string = do_lookup(name, len, hash);
|
||||
|
||||
found_string = do_lookup(name, hash);
|
||||
if (found_string != nullptr) {
|
||||
return found_string;
|
||||
}
|
||||
return do_intern(string_or_null_h, name, len, hash, THREAD);
|
||||
return do_intern(name, hash, THREAD);
|
||||
}
|
||||
|
||||
oop StringTable::do_intern(Handle string_or_null_h, const jchar* name,
|
||||
int len, uintx hash, TRAPS) {
|
||||
oop StringTable::do_intern(const StringWrapper& name, uintx hash, TRAPS) {
|
||||
HandleMark hm(THREAD); // cleanup strings created
|
||||
Handle string_h;
|
||||
Handle string_h = handle_from_wrapped_string(name, CHECK_NULL);
|
||||
|
||||
if (!string_or_null_h.is_null()) {
|
||||
string_h = string_or_null_h;
|
||||
} else {
|
||||
string_h = java_lang_String::create_from_unicode(name, len, CHECK_NULL);
|
||||
}
|
||||
|
||||
assert(java_lang_String::equals(string_h(), name, len),
|
||||
assert(StringTable::wrapped_string_equals(string_h(), name),
|
||||
"string must be properly initialized");
|
||||
assert(len == java_lang_String::length(string_h()), "Must be same length");
|
||||
|
||||
// Notify deduplication support that the string is being interned. A string
|
||||
// must never be deduplicated after it has been interned. Doing so interferes
|
||||
@ -410,7 +547,7 @@ oop StringTable::do_intern(Handle string_or_null_h, const jchar* name,
|
||||
|
||||
bool rehash_warning;
|
||||
do {
|
||||
// Callers have already looked up the String using the jchar* name, so just go to add.
|
||||
// Callers have already looked up the String, so just go to add.
|
||||
WeakHandle wh(_oop_storage, string_h);
|
||||
// The hash table takes ownership of the WeakHandle, even if it's not inserted.
|
||||
if (_local_table->insert(THREAD, lookup, wh, &rehash_warning)) {
|
||||
@ -775,14 +912,17 @@ size_t StringTable::shared_entry_count() {
|
||||
return _shared_table.entry_count();
|
||||
}
|
||||
|
||||
oop StringTable::lookup_shared(const jchar* name, int len, unsigned int hash) {
|
||||
assert(hash == java_lang_String::hash_code(name, len),
|
||||
oop StringTable::lookup_shared(const StringWrapper& name, unsigned int hash) {
|
||||
assert(hash == hash_wrapped_string(name),
|
||||
"hash must be computed using java_lang_String::hash_code");
|
||||
return _shared_table.lookup(name, hash, len);
|
||||
// len is required but is already part of StringWrapper, so 0 is used
|
||||
return _shared_table.lookup(name, hash, 0);
|
||||
}
|
||||
|
||||
oop StringTable::lookup_shared(const jchar* name, int len) {
|
||||
return _shared_table.lookup(name, java_lang_String::hash_code(name, len), len);
|
||||
StringWrapper wrapped_name(name, len);
|
||||
// len is required but is already part of StringWrapper, so 0 is used
|
||||
return _shared_table.lookup(wrapped_name, java_lang_String::hash_code(name, len), 0);
|
||||
}
|
||||
|
||||
// This is called BEFORE we enter the CDS safepoint. We can allocate heap objects.
|
||||
|
@ -56,6 +56,18 @@ class StringTable : AllStatic {
|
||||
static double get_load_factor();
|
||||
static double get_dead_factor(size_t num_dead);
|
||||
|
||||
public:
|
||||
typedef struct StringWrapperInternal StringWrapper;
|
||||
|
||||
// Unnamed int needed to fit CompactHashtable's equals type signature
|
||||
static bool wrapped_string_equals(oop java_string, const StringWrapper& wrapped_str, int = 0);
|
||||
|
||||
private:
|
||||
static const char* get_symbol_utf8(const StringWrapper& symbol_str);
|
||||
static unsigned int hash_wrapped_string(const StringWrapper& wrapped_str);
|
||||
static const jchar* to_unicode(const StringWrapper& wrapped_str, int &len, TRAPS);
|
||||
static Handle handle_from_wrapped_string(const StringWrapper& wrapped_str, TRAPS);
|
||||
|
||||
// GC support
|
||||
|
||||
// Callback for GC to notify of changes that might require cleaning or resize.
|
||||
@ -65,9 +77,9 @@ class StringTable : AllStatic {
|
||||
static void item_added();
|
||||
static void item_removed();
|
||||
|
||||
static oop intern(Handle string_or_null_h, const jchar* name, int len, TRAPS);
|
||||
static oop do_intern(Handle string_or_null, const jchar* name, int len, uintx hash, TRAPS);
|
||||
static oop do_lookup(const jchar* name, int len, uintx hash);
|
||||
static oop intern(const StringWrapper& name, TRAPS);
|
||||
static oop do_intern(const StringWrapper& name, uintx hash, TRAPS);
|
||||
static oop do_lookup(const StringWrapper& name, uintx hash);
|
||||
|
||||
static void print_table_statistics(outputStream* st);
|
||||
|
||||
@ -87,7 +99,7 @@ class StringTable : AllStatic {
|
||||
// Interning
|
||||
static oop intern(Symbol* symbol, TRAPS);
|
||||
static oop intern(oop string, TRAPS);
|
||||
static oop intern(const char *utf8_string, TRAPS);
|
||||
static oop intern(const char* utf8_string, TRAPS);
|
||||
|
||||
// Rehash the string table if it gets out of balance
|
||||
private:
|
||||
@ -131,7 +143,7 @@ private:
|
||||
#endif // INCLUDE_CDS_JAVA_HEAP
|
||||
|
||||
private:
|
||||
static oop lookup_shared(const jchar* name, int len, unsigned int hash) NOT_CDS_JAVA_HEAP_RETURN_(nullptr);
|
||||
static oop lookup_shared(const StringWrapper& name, unsigned int hash) NOT_CDS_JAVA_HEAP_RETURN_(nullptr);
|
||||
public:
|
||||
static oop lookup_shared(const jchar* name, int len) NOT_CDS_JAVA_HEAP_RETURN_(nullptr);
|
||||
static size_t shared_entry_count() NOT_CDS_JAVA_HEAP_RETURN_(0);
|
||||
|
194
test/hotspot/gtest/classfile/test_stringConversion.cpp
Normal file
194
test/hotspot/gtest/classfile/test_stringConversion.cpp
Normal file
@ -0,0 +1,194 @@
|
||||
/*
|
||||
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "runtime/interfaceSupport.inline.hpp"
|
||||
#include "unittest.hpp"
|
||||
|
||||
// Tests that string functions (hash code/equals) stay consistant when comparing equal strings and converting between strings types
|
||||
|
||||
// Simple ASCII string "Java(R)!!"
|
||||
// Same length in both UTF8 and Unicode
|
||||
static const char static_ascii_utf8_str[] = {0x4A, 0x61, 0x76, 0x61, 0x28, 0x52, 0x29, 0x21, 0x21};
|
||||
static const jchar static_ascii_unicode_str[] = {0x004A, 0x0061, 0x0076, 0x0061, 0x0028, 0x0052, 0x0029, 0x0021, 0x0021};
|
||||
|
||||
// Complex string "Jāvá®!☺☻", UTF8 has character lengths 13122133 = 16
|
||||
static const unsigned char static_utf8_str[] = {0x4A, 0x61, 0xCC, 0x84, 0x76, 0xC3, 0xA1, 0xC2, 0xAE, 0x21, 0xE2, 0x98, 0xBA, 0xE2, 0x98, 0xBB};
|
||||
static const jchar static_unicode_str[] = { 0x004A, 0x0061, 0x0304, 0x0076, 0x00E1, 0x00AE, 0x0021, 0x263A, 0x263B};
|
||||
|
||||
static const int ASCII_LENGTH = 9;
|
||||
static const size_t UTF8_LENGTH = 16;
|
||||
static const int UNICODE_LENGTH = 9;
|
||||
|
||||
void compare_utf8_utf8(const char* utf8_str1, const char* utf8_str2, size_t utf8_len) {
|
||||
EXPECT_EQ(java_lang_String::hash_code(utf8_str1, utf8_len), java_lang_String::hash_code(utf8_str2, utf8_len));
|
||||
EXPECT_STREQ(utf8_str1, utf8_str2);
|
||||
}
|
||||
|
||||
void compare_utf8_unicode(const char* utf8_str, const jchar* unicode_str, size_t utf8_len, int unicode_len) {
|
||||
EXPECT_EQ(java_lang_String::hash_code(utf8_str, utf8_len), java_lang_String::hash_code(unicode_str, unicode_len));
|
||||
}
|
||||
|
||||
void compare_utf8_oop(const char* utf8_str, Handle oop_str, size_t utf8_len, int unicode_len) {
|
||||
EXPECT_EQ(java_lang_String::hash_code(utf8_str, utf8_len), java_lang_String::hash_code(oop_str()));
|
||||
EXPECT_TRUE(java_lang_String::equals(oop_str(), utf8_str, utf8_len));
|
||||
}
|
||||
|
||||
void compare_unicode_unicode(const jchar* unicode_str1, const jchar* unicode_str2, int unicode_len) {
|
||||
EXPECT_EQ(java_lang_String::hash_code(unicode_str1, unicode_len), java_lang_String::hash_code(unicode_str2, unicode_len));
|
||||
for (int i = 0; i < unicode_len; i++) {
|
||||
EXPECT_EQ(unicode_str1[i], unicode_str2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void compare_unicode_oop(const jchar* unicode_str, Handle oop_str, int unicode_len) {
|
||||
EXPECT_EQ(java_lang_String::hash_code(unicode_str, unicode_len), java_lang_String::hash_code(oop_str()));
|
||||
EXPECT_TRUE(java_lang_String::equals(oop_str(), unicode_str, unicode_len));
|
||||
}
|
||||
|
||||
void compare_oop_oop(Handle oop_str1, Handle oop_str2) {
|
||||
EXPECT_EQ(java_lang_String::hash_code(oop_str1()), java_lang_String::hash_code(oop_str2()));
|
||||
EXPECT_TRUE(java_lang_String::equals(oop_str1(), oop_str2()));
|
||||
}
|
||||
|
||||
void test_utf8_convert(const char* utf8_str, size_t utf8_len, int unicode_len) {
|
||||
EXPECT_TRUE(UTF8::is_legal_utf8((unsigned char*)utf8_str, strlen(utf8_str), false));
|
||||
|
||||
JavaThread* THREAD = JavaThread::current();
|
||||
ThreadInVMfromNative ThreadInVMfromNative(THREAD);
|
||||
ResourceMark rm(THREAD);
|
||||
HandleMark hm(THREAD);
|
||||
|
||||
jchar* unicode_str_from_utf8 = NEW_RESOURCE_ARRAY(jchar, unicode_len);
|
||||
UTF8::convert_to_unicode(utf8_str, unicode_str_from_utf8, unicode_len);
|
||||
Handle oop_str_from_utf8 = java_lang_String::create_from_str(utf8_str, THREAD);
|
||||
|
||||
compare_utf8_unicode(utf8_str, unicode_str_from_utf8, utf8_len, unicode_len);
|
||||
compare_utf8_oop(utf8_str, oop_str_from_utf8, utf8_len, unicode_len);
|
||||
|
||||
size_t length = unicode_len;
|
||||
const char* utf8_str_from_unicode = UNICODE::as_utf8(unicode_str_from_utf8, length);
|
||||
const char* utf8_str_from_oop = java_lang_String::as_utf8_string(oop_str_from_utf8());
|
||||
|
||||
EXPECT_TRUE(UTF8::is_legal_utf8((unsigned char*)utf8_str_from_unicode, strlen(utf8_str_from_unicode), false));
|
||||
EXPECT_TRUE(UTF8::is_legal_utf8((unsigned char*)utf8_str_from_oop, strlen(utf8_str_from_oop), false));
|
||||
|
||||
compare_utf8_utf8(utf8_str, utf8_str_from_unicode, utf8_len);
|
||||
compare_utf8_utf8(utf8_str, utf8_str_from_oop, utf8_len);
|
||||
}
|
||||
|
||||
void test_unicode_convert(const jchar* unicode_str, size_t utf8_len, int unicode_len) {
|
||||
JavaThread* THREAD = JavaThread::current();
|
||||
ThreadInVMfromNative ThreadInVMfromNative(THREAD);
|
||||
ResourceMark rm(THREAD);
|
||||
HandleMark hm(THREAD);
|
||||
|
||||
size_t length = unicode_len;
|
||||
const char* utf8_str_from_unicode = UNICODE::as_utf8(unicode_str, length);
|
||||
Handle oop_str_from_unicode = java_lang_String::create_from_unicode(unicode_str, unicode_len, THREAD);
|
||||
|
||||
EXPECT_TRUE(UTF8::is_legal_utf8((unsigned char*)utf8_str_from_unicode, strlen(utf8_str_from_unicode), false));
|
||||
|
||||
compare_utf8_unicode(utf8_str_from_unicode, unicode_str, utf8_len, unicode_len);
|
||||
compare_unicode_oop(unicode_str, oop_str_from_unicode, unicode_len);
|
||||
|
||||
int _;
|
||||
jchar* unicode_str_from_utf8 = NEW_RESOURCE_ARRAY(jchar, unicode_len);
|
||||
UTF8::convert_to_unicode(utf8_str_from_unicode, unicode_str_from_utf8, unicode_len);
|
||||
const jchar* unicode_str_from_oop = java_lang_String::as_unicode_string(oop_str_from_unicode(), _, THREAD);
|
||||
|
||||
compare_unicode_unicode(unicode_str, unicode_str_from_utf8, unicode_len);
|
||||
compare_unicode_unicode(unicode_str, unicode_str_from_oop, unicode_len);
|
||||
}
|
||||
|
||||
void test_utf8_unicode_cross(const char* utf8_str, const jchar* unicode_str, size_t utf8_len, int unicode_len) {
|
||||
compare_utf8_unicode(utf8_str, unicode_str, utf8_len, unicode_len);
|
||||
|
||||
JavaThread* THREAD = JavaThread::current();
|
||||
ThreadInVMfromNative ThreadInVMfromNative(THREAD);
|
||||
ResourceMark rm(THREAD);
|
||||
HandleMark hm(THREAD);
|
||||
|
||||
size_t length = unicode_len;
|
||||
const char* utf8_str_from_unicode = UNICODE::as_utf8(unicode_str, length);
|
||||
|
||||
jchar* unicode_str_from_utf8 = NEW_RESOURCE_ARRAY(jchar, unicode_len);
|
||||
UTF8::convert_to_unicode(utf8_str, unicode_str_from_utf8, unicode_len);
|
||||
|
||||
Handle oop_str_from_unicode = java_lang_String::create_from_unicode(unicode_str, unicode_len, THREAD);
|
||||
Handle oop_str_from_utf8 = java_lang_String::create_from_str(utf8_str, THREAD);
|
||||
|
||||
compare_utf8_utf8(utf8_str, utf8_str_from_unicode, utf8_len);
|
||||
compare_utf8_oop(utf8_str, oop_str_from_unicode, utf8_len, unicode_len);
|
||||
|
||||
compare_unicode_unicode(unicode_str, unicode_str_from_utf8, unicode_len);
|
||||
compare_unicode_oop(unicode_str, oop_str_from_utf8, unicode_len);
|
||||
|
||||
compare_utf8_oop(utf8_str_from_unicode, oop_str_from_utf8, utf8_len, unicode_len);
|
||||
compare_unicode_oop(unicode_str_from_utf8, oop_str_from_unicode, unicode_len);
|
||||
|
||||
compare_utf8_unicode(utf8_str_from_unicode, unicode_str_from_utf8, utf8_len, unicode_len);
|
||||
compare_oop_oop(oop_str_from_utf8, oop_str_from_unicode);
|
||||
}
|
||||
|
||||
TEST_VM(StringConversion, fromUTF8_ascii) {
|
||||
const char utf8_str[ASCII_LENGTH + 1] = { };
|
||||
memcpy((unsigned char*)utf8_str, static_ascii_utf8_str, ASCII_LENGTH);
|
||||
test_utf8_convert(utf8_str, ASCII_LENGTH, ASCII_LENGTH);
|
||||
}
|
||||
|
||||
TEST_VM(StringConversion, fromUTF8_varlen) {
|
||||
const char utf8_str[UTF8_LENGTH + 1] = { };
|
||||
memcpy((unsigned char*)utf8_str, static_utf8_str, UTF8_LENGTH);
|
||||
test_utf8_convert(utf8_str, UTF8_LENGTH, UNICODE_LENGTH);
|
||||
}
|
||||
|
||||
TEST_VM(StringConversion, fromUnicode_ascii) {
|
||||
jchar unicode_str[ASCII_LENGTH] = { };
|
||||
memcpy(unicode_str, static_ascii_unicode_str, ASCII_LENGTH * sizeof(jchar));
|
||||
test_unicode_convert(unicode_str, ASCII_LENGTH, ASCII_LENGTH);
|
||||
}
|
||||
|
||||
TEST_VM(StringConversion, fromUnicode_varlen) {
|
||||
jchar unicode_str[UNICODE_LENGTH] = { };
|
||||
memcpy(unicode_str, static_unicode_str, UNICODE_LENGTH * sizeof(jchar));
|
||||
test_unicode_convert(unicode_str, UTF8_LENGTH, UNICODE_LENGTH);
|
||||
}
|
||||
|
||||
TEST_VM(StringConversion, cross_ascii) {
|
||||
const char utf8_str[ASCII_LENGTH + 1] = { };
|
||||
jchar unicode_str[ASCII_LENGTH] = { };
|
||||
memcpy((unsigned char*)utf8_str, static_ascii_utf8_str, ASCII_LENGTH);
|
||||
memcpy(unicode_str, static_ascii_unicode_str, ASCII_LENGTH * sizeof(jchar));
|
||||
|
||||
test_utf8_unicode_cross(utf8_str, unicode_str, ASCII_LENGTH, ASCII_LENGTH);
|
||||
}
|
||||
|
||||
TEST_VM(StringConversion, cross_varlen) {
|
||||
const char utf8_str[UTF8_LENGTH + 1] = { };
|
||||
jchar unicode_str[UNICODE_LENGTH] = { };
|
||||
memcpy((unsigned char*)utf8_str, static_utf8_str, UTF8_LENGTH);
|
||||
memcpy(unicode_str, static_unicode_str, UNICODE_LENGTH * sizeof(jchar));
|
||||
|
||||
test_utf8_unicode_cross(utf8_str, unicode_str, UTF8_LENGTH, UNICODE_LENGTH);
|
||||
}
|
71
test/hotspot/gtest/classfile/test_stringIntern.cpp
Normal file
71
test/hotspot/gtest/classfile/test_stringIntern.cpp
Normal file
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "classfile/stringTable.hpp"
|
||||
#include "classfile/symbolTable.hpp"
|
||||
#include "runtime/interfaceSupport.inline.hpp"
|
||||
#include "unittest.hpp"
|
||||
|
||||
// Tests that strings are interned and returns the same string when interning from different string types
|
||||
|
||||
// Simple ASCII string "Java(R)!!"
|
||||
static const char static_ascii_utf8_str[] = {0x4A, 0x61, 0x76, 0x61, 0x28, 0x52, 0x29, 0x21, 0x21};
|
||||
static const size_t ASCII_LENGTH = 9;
|
||||
|
||||
// Complex string "Jāvá®!☺☻", has character lengths 13122133 = 16
|
||||
static const unsigned char static_utf8_str[] = {0x4A, 0x61, 0xCC, 0x84, 0x76, 0xC3, 0xA1, 0xC2, 0xAE, 0x21, 0xE2, 0x98, 0xBA, 0xE2, 0x98, 0xBB};
|
||||
static const size_t COMPLEX_LENGTH = 16;
|
||||
|
||||
void test_intern(const char* utf8_str, size_t utf8_length) {
|
||||
JavaThread* THREAD = JavaThread::current();
|
||||
ThreadInVMfromNative ThreadInVMfromNative(THREAD);
|
||||
HandleMark hm(THREAD);
|
||||
|
||||
oop interned_string_from_utf8 = StringTable::intern(utf8_str, THREAD);
|
||||
|
||||
EXPECT_TRUE(java_lang_String::equals(interned_string_from_utf8, utf8_str, utf8_length));
|
||||
EXPECT_EQ(java_lang_String::hash_code(utf8_str, utf8_length),java_lang_String::hash_code(interned_string_from_utf8));
|
||||
|
||||
Symbol* symbol_from_utf8 = SymbolTable::new_symbol(utf8_str, static_cast<int>(utf8_length));
|
||||
oop interned_string_from_symbol = StringTable::intern(symbol_from_utf8, THREAD);
|
||||
|
||||
EXPECT_EQ(interned_string_from_utf8, interned_string_from_symbol);
|
||||
|
||||
oop interned_string_from_oop1 = StringTable::intern(interned_string_from_utf8, THREAD);
|
||||
|
||||
EXPECT_EQ(interned_string_from_utf8, interned_string_from_oop1);
|
||||
|
||||
}
|
||||
|
||||
TEST_VM(StringIntern, intern_ascii) {
|
||||
const char utf8_str[ASCII_LENGTH + 1] = { };
|
||||
memcpy((unsigned char*)utf8_str, static_ascii_utf8_str, ASCII_LENGTH);
|
||||
test_intern(utf8_str, ASCII_LENGTH);
|
||||
}
|
||||
|
||||
TEST_VM(StringIntern, intern_varlen) {
|
||||
const char utf8_str[COMPLEX_LENGTH + 1] = { };
|
||||
memcpy((unsigned char*)utf8_str, static_utf8_str, COMPLEX_LENGTH);
|
||||
test_intern(utf8_str, COMPLEX_LENGTH);
|
||||
}
|
Loading…
Reference in New Issue
Block a user