From 75c651f859c1372175040a06c68a08298d4da0f1 Mon Sep 17 00:00:00 2001 From: Casper Norrbin Date: Fri, 15 Nov 2024 11:28:02 +0000 Subject: [PATCH] 8327156: Avoid copying in StringTable::intern(oop, TRAPS) 8326865: Avoid copying in StringTable::intern(Symbol*, TRAPS) 8327825: StringTable::intern is slow Reviewed-by: dholmes, coleenp, jsjolen --- src/hotspot/share/classfile/javaClasses.cpp | 35 +- src/hotspot/share/classfile/javaClasses.hpp | 17 +- src/hotspot/share/classfile/stringTable.cpp | 298 +++++++++++++----- src/hotspot/share/classfile/stringTable.hpp | 22 +- .../gtest/classfile/test_stringConversion.cpp | 194 ++++++++++++ .../gtest/classfile/test_stringIntern.cpp | 71 +++++ 6 files changed, 550 insertions(+), 87 deletions(-) create mode 100644 test/hotspot/gtest/classfile/test_stringConversion.cpp create mode 100644 test/hotspot/gtest/classfile/test_stringIntern.cpp diff --git a/src/hotspot/share/classfile/javaClasses.cpp b/src/hotspot/share/classfile/javaClasses.cpp index 0dd183e06d4..e17de0f2264 100644 --- a/src/hotspot/share/classfile/javaClasses.cpp +++ b/src/hotspot/share/classfile/javaClasses.cpp @@ -347,7 +347,7 @@ Handle java_lang_String::create_from_str(const char* utf8_str, TRAPS) { #ifdef ASSERT // This check is too strict when the input string is not a valid UTF8. // For example, it may be created with arbitrary content via jni_NewStringUTF. - if (UTF8::is_legal_utf8((const unsigned char*)utf8_str, strlen(utf8_str), false)) { + if (UTF8::is_legal_utf8((const unsigned char*)utf8_str, strlen(utf8_str), /*version_leq_47*/false)) { ResourceMark rm; const char* expected = utf8_str; char* actual = as_utf8_string(h_obj()); @@ -365,7 +365,7 @@ oop java_lang_String::create_oop_from_str(const char* utf8_str, TRAPS) { return h_obj(); } -Handle java_lang_String::create_from_symbol(Symbol* symbol, TRAPS) { +Handle java_lang_String::create_from_symbol(const Symbol* symbol, TRAPS) { const char* utf8_str = (char*)symbol->bytes(); int utf8_len = symbol->utf8_length(); @@ -389,6 +389,8 @@ Handle java_lang_String::create_from_symbol(Symbol* symbol, TRAPS) { } #ifdef ASSERT + // This check is too strict on older classfile versions + if (UTF8::is_legal_utf8((const unsigned char*)utf8_str, utf8_len, /*version_leq_47*/false)) { ResourceMark rm; const char* expected = symbol->as_utf8(); @@ -755,6 +757,35 @@ bool java_lang_String::equals(oop java_string, const jchar* chars, int len) { return true; } +bool java_lang_String::equals(oop java_string, const char* utf8_string, size_t utf8_len) { + assert(java_string->klass() == vmClasses::String_klass(), + "must be java_string"); + typeArrayOop value = java_lang_String::value_no_keepalive(java_string); + int length = java_lang_String::length(java_string, value); + int unicode_length = UTF8::unicode_length(utf8_string, utf8_len); + if (length != unicode_length) { + return false; + } + bool is_latin1 = java_lang_String::is_latin1(java_string); + jchar c; + if (!is_latin1) { + for (int i = 0; i < unicode_length; i++) { + utf8_string = UTF8::next(utf8_string, &c); + if (value->char_at(i) != c) { + return false; + } + } + } else { + for (int i = 0; i < unicode_length; i++) { + utf8_string = UTF8::next(utf8_string, &c); + if ((((jchar) value->byte_at(i)) & 0xff) != c) { + return false; + } + } + } + return true; +} + bool java_lang_String::equals(oop str1, oop str2) { assert(str1->klass() == vmClasses::String_klass(), "must be java String"); diff --git a/src/hotspot/share/classfile/javaClasses.hpp b/src/hotspot/share/classfile/javaClasses.hpp index e0123595810..a2e6ad7349c 100644 --- a/src/hotspot/share/classfile/javaClasses.hpp +++ b/src/hotspot/share/classfile/javaClasses.hpp @@ -32,6 +32,7 @@ #include "runtime/handles.hpp" #include "runtime/os.hpp" #include "utilities/macros.hpp" +#include "utilities/utf8.hpp" #include "utilities/vmEnums.hpp" class JvmtiThreadState; @@ -99,7 +100,7 @@ class java_lang_String : AllStatic { static oop create_oop_from_unicode(const jchar* unicode, int len, TRAPS); static Handle create_from_str(const char* utf8_str, TRAPS); static oop create_oop_from_str(const char* utf8_str, TRAPS); - static Handle create_from_symbol(Symbol* symbol, TRAPS); + static Handle create_from_symbol(const Symbol* symbol, TRAPS); static Handle create_from_platform_dependent_str(const char* str, TRAPS); static void set_compact_strings(bool value); @@ -180,10 +181,24 @@ class java_lang_String : AllStatic { return h; } + static unsigned int hash_code(const char* utf8_str, size_t utf8_len) { + unsigned int h = 0; + int unicode_length = UTF8::unicode_length(utf8_str, utf8_len); + + jchar c; + while (unicode_length-- > 0) { + utf8_str = UTF8::next(utf8_str, &c); + h = 31 * h + ((unsigned int)c); + } + return h; + } + static unsigned int hash_code(oop java_string); static unsigned int hash_code_noupdate(oop java_string); + // Compare strings (of different types/encodings), length is the string (array) length static bool equals(oop java_string, const jchar* chars, int len); + static bool equals(oop java_string, const char* utf8_str, size_t utf8_len); static bool equals(oop str1, oop str2); static inline bool value_equals(typeArrayOop str_value1, typeArrayOop str_value2); diff --git a/src/hotspot/share/classfile/stringTable.cpp b/src/hotspot/share/classfile/stringTable.cpp index 3a6cf166ff5..4de4b8e333c 100644 --- a/src/hotspot/share/classfile/stringTable.cpp +++ b/src/hotspot/share/classfile/stringTable.cpp @@ -99,9 +99,9 @@ inline oop StringTable::read_string_from_compact_hashtable(address base_address, } typedef CompactHashtable< - const jchar*, oop, + const StringTable::StringWrapper&, oop, StringTable::read_string_from_compact_hashtable, - java_lang_String::equals> SharedStringTable; + StringTable::wrapped_string_equals> SharedStringTable; static SharedStringTable _shared_table; #endif @@ -123,12 +123,69 @@ volatile bool _alt_hash = false; static bool _rehashed = false; static uint64_t _alt_hash_seed = 0; +enum class StringType { + OopStr, UnicodeStr, SymbolStr, UTF8Str +}; + +struct StringWrapperInternal { + union { + const Handle oop_str; + const jchar* unicode_str; + const Symbol* symbol_str; + const char* utf8_str; + }; + const StringType type; + const size_t length; + + StringWrapperInternal(const Handle oop_str, const size_t length) : oop_str(oop_str), type(StringType::OopStr), length(length) {} + StringWrapperInternal(const jchar* unicode_str, const size_t length) : unicode_str(unicode_str), type(StringType::UnicodeStr), length(length) {} + StringWrapperInternal(const Symbol* symbol_str, const size_t length) : symbol_str(symbol_str), type(StringType::SymbolStr), length(length) {} + StringWrapperInternal(const char* utf8_str, const size_t length) : utf8_str(utf8_str), type(StringType::UTF8Str), length(length) {} +}; + static unsigned int hash_string(const jchar* s, int len, bool useAlt) { return useAlt ? AltHashing::halfsiphash_32(_alt_hash_seed, s, len) : java_lang_String::hash_code(s, len); } +const char* StringTable::get_symbol_utf8(const StringWrapper& symbol) { + return reinterpret_cast(symbol.symbol_str->bytes()); +} + +unsigned int StringTable::hash_wrapped_string(const StringWrapper& wrapped_str) { + switch (wrapped_str.type) { + case StringType::OopStr: + return java_lang_String::hash_code(wrapped_str.oop_str()); + case StringType::UnicodeStr: + return java_lang_String::hash_code(wrapped_str.unicode_str, static_cast(wrapped_str.length)); + case StringType::SymbolStr: + return java_lang_String::hash_code(get_symbol_utf8(wrapped_str), wrapped_str.length); + case StringType::UTF8Str: + return java_lang_String::hash_code(wrapped_str.utf8_str, wrapped_str.length); + default: + ShouldNotReachHere(); + } + return 0; +} + +// Unnamed int needed to fit CompactHashtable's equals type signature +bool StringTable::wrapped_string_equals(oop java_string, const StringWrapper& wrapped_str, int) { + switch (wrapped_str.type) { + case StringType::OopStr: + return java_lang_String::equals(java_string, wrapped_str.oop_str()); + case StringType::UnicodeStr: + return java_lang_String::equals(java_string, wrapped_str.unicode_str, static_cast(wrapped_str.length)); + case StringType::SymbolStr: + return java_lang_String::equals(java_string, get_symbol_utf8(wrapped_str), wrapped_str.length); + case StringType::UTF8Str: + return java_lang_String::equals(java_string, wrapped_str.utf8_str, wrapped_str.length); + default: + ShouldNotReachHere(); + } + return false; +} + class StringTableConfig : public StackObj { private: public: @@ -163,22 +220,33 @@ class StringTableConfig : public StackObj { } }; -class StringTableLookupJchar : StackObj { - private: - Thread* _thread; +class StringTableLookup : StackObj { uintx _hash; - int _len; - const jchar* _str; + +protected: + Thread* _thread; Handle _found; - public: - StringTableLookupJchar(Thread* thread, uintx hash, const jchar* key, int len) - : _thread(thread), _hash(hash), _len(len), _str(key) { +public: + StringTableLookup(Thread* thread, uintx hash) + : _hash(hash), _thread(thread) {} + uintx get_hash() const { return _hash; } + bool is_dead(WeakHandle* value) { + oop val_oop = value->peek(); + return val_oop == nullptr; } - uintx get_hash() const { - return _hash; - } - bool equals(WeakHandle* value) { +}; + +class StringTableLookupUnicode : public StringTableLookup { +private: + const jchar* _str; + int _len; + +public: + StringTableLookupUnicode(Thread* thread, uintx hash, const jchar* key, int len) + : StringTableLookup(thread, hash), _str(key), _len(len) {} + + bool equals(const WeakHandle* value) { oop val_oop = value->peek(); if (val_oop == nullptr) { return false; @@ -188,29 +256,42 @@ class StringTableLookupJchar : StackObj { return false; } // Need to resolve weak handle and Handleize through possible safepoint. - _found = Handle(_thread, value->resolve()); + _found = Handle(_thread, value->resolve()); return true; } - bool is_dead(WeakHandle* value) { - oop val_oop = value->peek(); - return val_oop == nullptr; - } }; -class StringTableLookupOop : public StackObj { - private: - Thread* _thread; - uintx _hash; - Handle _find; - Handle _found; // Might be a different oop with the same value that's already - // in the table, which is the point. - public: - StringTableLookupOop(Thread* thread, uintx hash, Handle handle) - : _thread(thread), _hash(hash), _find(handle) { } +class StringTableLookupUTF8 : public StringTableLookup { +private: + const char* _str; + size_t _utf8_len; - uintx get_hash() const { - return _hash; +public: + StringTableLookupUTF8(Thread* thread, uintx hash, const char* key, size_t utf8_len) + : StringTableLookup(thread, hash), _str(key), _utf8_len(utf8_len) {} + + bool equals(const WeakHandle* value) { + oop val_oop = value->peek(); + if (val_oop == nullptr) { + return false; + } + bool equals = java_lang_String::equals(val_oop, _str, _utf8_len); + if (!equals) { + return false; + } + // Need to resolve weak handle and Handleize through possible safepoint. + _found = Handle(_thread, value->resolve()); + return true; } +}; + +class StringTableLookupOop : public StringTableLookup { +private: + Handle _find; + +public: + StringTableLookupOop(Thread* thread, uintx hash, Handle handle) + : StringTableLookup(thread, hash), _find(handle) {} bool equals(WeakHandle* value) { oop val_oop = value->peek(); @@ -225,11 +306,6 @@ class StringTableLookupOop : public StackObj { _found = Handle(_thread, value->resolve()); return true; } - - bool is_dead(WeakHandle* value) { - oop val_oop = value->peek(); - return val_oop == nullptr; - } }; void StringTable::create_table() { @@ -291,14 +367,15 @@ oop StringTable::lookup(Symbol* symbol) { oop StringTable::lookup(const jchar* name, int len) { unsigned int hash = java_lang_String::hash_code(name, len); - oop string = lookup_shared(name, len, hash); + StringWrapper wrapped_name(name, len); + oop string = lookup_shared(wrapped_name, hash); if (string != nullptr) { return string; } if (_alt_hash) { hash = hash_string(name, len, true); } - return do_lookup(name, len, hash); + return do_lookup(wrapped_name, hash); } class StringTableGet : public StackObj { @@ -323,80 +400,140 @@ void StringTable::update_needs_rehash(bool rehash) { } } -oop StringTable::do_lookup(const jchar* name, int len, uintx hash) { +oop StringTable::do_lookup(const StringWrapper& name, uintx hash) { Thread* thread = Thread::current(); - StringTableLookupJchar lookup(thread, hash, name, len); StringTableGet stg(thread); bool rehash_warning; - _local_table->get(thread, lookup, stg, &rehash_warning); + + switch (name.type) { + case StringType::OopStr: { + StringTableLookupOop lookup(thread, hash, name.oop_str); + _local_table->get(thread, lookup, stg, &rehash_warning); + break; + } + case StringType::UnicodeStr: { + StringTableLookupUnicode lookup(thread, hash, name.unicode_str, static_cast(name.length)); + _local_table->get(thread, lookup, stg, &rehash_warning); + break; + } + case StringType::SymbolStr: { + StringTableLookupUTF8 lookup(thread, hash, get_symbol_utf8(name), name.length); + _local_table->get(thread, lookup, stg, &rehash_warning); + break; + } + case StringType::UTF8Str: { + StringTableLookupUTF8 lookup(thread, hash, name.utf8_str, name.length); + _local_table->get(thread, lookup, stg, &rehash_warning); + break; + } + default: + ShouldNotReachHere(); + } + update_needs_rehash(rehash_warning); return stg.get_res_oop(); } +// Converts and allocates to a unicode string and stores the unicode length in len +const jchar* StringTable::to_unicode(const StringWrapper& wrapped_str, int &len, TRAPS) { + switch (wrapped_str.type) { + case StringType::UnicodeStr: + len = static_cast(wrapped_str.length); + return wrapped_str.unicode_str; + case StringType::OopStr: + return java_lang_String::as_unicode_string(wrapped_str.oop_str(), len, CHECK_NULL); + case StringType::SymbolStr: { + const char* utf8_str = get_symbol_utf8(wrapped_str); + int unicode_length = UTF8::unicode_length(utf8_str, wrapped_str.symbol_str->utf8_length()); + jchar* chars = NEW_RESOURCE_ARRAY(jchar, unicode_length); + UTF8::convert_to_unicode(utf8_str, chars, unicode_length); + len = unicode_length; + return chars; + } + case StringType::UTF8Str: { + int unicode_length = UTF8::unicode_length(wrapped_str.utf8_str); + jchar* chars = NEW_RESOURCE_ARRAY(jchar, unicode_length); + UTF8::convert_to_unicode(wrapped_str.utf8_str, chars, unicode_length); + len = unicode_length; + return chars; + } + default: + ShouldNotReachHere(); + } + return nullptr; +} + +Handle StringTable::handle_from_wrapped_string(const StringWrapper& wrapped_str, TRAPS) { + switch (wrapped_str.type) { + case StringType::OopStr: + return wrapped_str.oop_str; + case StringType::UnicodeStr: + return java_lang_String::create_from_unicode(wrapped_str.unicode_str, static_cast(wrapped_str.length), THREAD); + case StringType::SymbolStr: + return java_lang_String::create_from_symbol(wrapped_str.symbol_str, THREAD); + case StringType::UTF8Str: + return java_lang_String::create_from_str(wrapped_str.utf8_str, THREAD); + default: + ShouldNotReachHere(); + } + return Handle(); +} + // Interning oop StringTable::intern(Symbol* symbol, TRAPS) { if (symbol == nullptr) return nullptr; - ResourceMark rm(THREAD); - int length; - jchar* chars = symbol->as_unicode(length); - Handle string; - oop result = intern(string, chars, length, CHECK_NULL); + int length = symbol->utf8_length(); + StringWrapper name(symbol, length); + oop result = intern(name, CHECK_NULL); return result; } oop StringTable::intern(oop string, TRAPS) { if (string == nullptr) return nullptr; - ResourceMark rm(THREAD); - int length; + int length = java_lang_String::length(string); Handle h_string (THREAD, string); - jchar* chars = java_lang_String::as_unicode_string(string, length, - CHECK_NULL); - oop result = intern(h_string, chars, length, CHECK_NULL); + StringWrapper name(h_string, length); + oop result = intern(name, CHECK_NULL); return result; } oop StringTable::intern(const char* utf8_string, TRAPS) { if (utf8_string == nullptr) return nullptr; - ResourceMark rm(THREAD); - int length = UTF8::unicode_length(utf8_string); - jchar* chars = NEW_RESOURCE_ARRAY(jchar, length); - UTF8::convert_to_unicode(utf8_string, chars, length); - Handle string; - oop result = intern(string, chars, length, CHECK_NULL); + size_t length = strlen(utf8_string); + StringWrapper name(utf8_string, length); + oop result = intern(name, CHECK_NULL); return result; } -oop StringTable::intern(Handle string_or_null_h, const jchar* name, int len, TRAPS) { +oop StringTable::intern(const StringWrapper& name, TRAPS) { // shared table always uses java_lang_String::hash_code - unsigned int hash = java_lang_String::hash_code(name, len); - oop found_string = lookup_shared(name, len, hash); + unsigned int hash = hash_wrapped_string(name); + oop found_string = lookup_shared(name, hash); if (found_string != nullptr) { return found_string; } + if (_alt_hash) { - hash = hash_string(name, len, true); + ResourceMark rm(THREAD); + // Convert to unicode for alt hashing + int unicode_length; + const jchar* chars = to_unicode(name, unicode_length, CHECK_NULL); + hash = hash_string(chars, unicode_length, true); } - found_string = do_lookup(name, len, hash); + + found_string = do_lookup(name, hash); if (found_string != nullptr) { return found_string; } - return do_intern(string_or_null_h, name, len, hash, THREAD); + return do_intern(name, hash, THREAD); } -oop StringTable::do_intern(Handle string_or_null_h, const jchar* name, - int len, uintx hash, TRAPS) { +oop StringTable::do_intern(const StringWrapper& name, uintx hash, TRAPS) { HandleMark hm(THREAD); // cleanup strings created - Handle string_h; + Handle string_h = handle_from_wrapped_string(name, CHECK_NULL); - if (!string_or_null_h.is_null()) { - string_h = string_or_null_h; - } else { - string_h = java_lang_String::create_from_unicode(name, len, CHECK_NULL); - } - - assert(java_lang_String::equals(string_h(), name, len), + assert(StringTable::wrapped_string_equals(string_h(), name), "string must be properly initialized"); - assert(len == java_lang_String::length(string_h()), "Must be same length"); // Notify deduplication support that the string is being interned. A string // must never be deduplicated after it has been interned. Doing so interferes @@ -410,7 +547,7 @@ oop StringTable::do_intern(Handle string_or_null_h, const jchar* name, bool rehash_warning; do { - // Callers have already looked up the String using the jchar* name, so just go to add. + // Callers have already looked up the String, so just go to add. WeakHandle wh(_oop_storage, string_h); // The hash table takes ownership of the WeakHandle, even if it's not inserted. if (_local_table->insert(THREAD, lookup, wh, &rehash_warning)) { @@ -775,14 +912,17 @@ size_t StringTable::shared_entry_count() { return _shared_table.entry_count(); } -oop StringTable::lookup_shared(const jchar* name, int len, unsigned int hash) { - assert(hash == java_lang_String::hash_code(name, len), +oop StringTable::lookup_shared(const StringWrapper& name, unsigned int hash) { + assert(hash == hash_wrapped_string(name), "hash must be computed using java_lang_String::hash_code"); - return _shared_table.lookup(name, hash, len); + // len is required but is already part of StringWrapper, so 0 is used + return _shared_table.lookup(name, hash, 0); } oop StringTable::lookup_shared(const jchar* name, int len) { - return _shared_table.lookup(name, java_lang_String::hash_code(name, len), len); + StringWrapper wrapped_name(name, len); + // len is required but is already part of StringWrapper, so 0 is used + return _shared_table.lookup(wrapped_name, java_lang_String::hash_code(name, len), 0); } // This is called BEFORE we enter the CDS safepoint. We can allocate heap objects. diff --git a/src/hotspot/share/classfile/stringTable.hpp b/src/hotspot/share/classfile/stringTable.hpp index 9f49e797182..38abb9c875c 100644 --- a/src/hotspot/share/classfile/stringTable.hpp +++ b/src/hotspot/share/classfile/stringTable.hpp @@ -56,6 +56,18 @@ class StringTable : AllStatic { static double get_load_factor(); static double get_dead_factor(size_t num_dead); +public: + typedef struct StringWrapperInternal StringWrapper; + + // Unnamed int needed to fit CompactHashtable's equals type signature + static bool wrapped_string_equals(oop java_string, const StringWrapper& wrapped_str, int = 0); + +private: + static const char* get_symbol_utf8(const StringWrapper& symbol_str); + static unsigned int hash_wrapped_string(const StringWrapper& wrapped_str); + static const jchar* to_unicode(const StringWrapper& wrapped_str, int &len, TRAPS); + static Handle handle_from_wrapped_string(const StringWrapper& wrapped_str, TRAPS); + // GC support // Callback for GC to notify of changes that might require cleaning or resize. @@ -65,9 +77,9 @@ class StringTable : AllStatic { static void item_added(); static void item_removed(); - static oop intern(Handle string_or_null_h, const jchar* name, int len, TRAPS); - static oop do_intern(Handle string_or_null, const jchar* name, int len, uintx hash, TRAPS); - static oop do_lookup(const jchar* name, int len, uintx hash); + static oop intern(const StringWrapper& name, TRAPS); + static oop do_intern(const StringWrapper& name, uintx hash, TRAPS); + static oop do_lookup(const StringWrapper& name, uintx hash); static void print_table_statistics(outputStream* st); @@ -87,7 +99,7 @@ class StringTable : AllStatic { // Interning static oop intern(Symbol* symbol, TRAPS); static oop intern(oop string, TRAPS); - static oop intern(const char *utf8_string, TRAPS); + static oop intern(const char* utf8_string, TRAPS); // Rehash the string table if it gets out of balance private: @@ -131,7 +143,7 @@ private: #endif // INCLUDE_CDS_JAVA_HEAP private: - static oop lookup_shared(const jchar* name, int len, unsigned int hash) NOT_CDS_JAVA_HEAP_RETURN_(nullptr); + static oop lookup_shared(const StringWrapper& name, unsigned int hash) NOT_CDS_JAVA_HEAP_RETURN_(nullptr); public: static oop lookup_shared(const jchar* name, int len) NOT_CDS_JAVA_HEAP_RETURN_(nullptr); static size_t shared_entry_count() NOT_CDS_JAVA_HEAP_RETURN_(0); diff --git a/test/hotspot/gtest/classfile/test_stringConversion.cpp b/test/hotspot/gtest/classfile/test_stringConversion.cpp new file mode 100644 index 00000000000..13553464229 --- /dev/null +++ b/test/hotspot/gtest/classfile/test_stringConversion.cpp @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "unittest.hpp" + +// Tests that string functions (hash code/equals) stay consistant when comparing equal strings and converting between strings types + +// Simple ASCII string "Java(R)!!" +// Same length in both UTF8 and Unicode +static const char static_ascii_utf8_str[] = {0x4A, 0x61, 0x76, 0x61, 0x28, 0x52, 0x29, 0x21, 0x21}; +static const jchar static_ascii_unicode_str[] = {0x004A, 0x0061, 0x0076, 0x0061, 0x0028, 0x0052, 0x0029, 0x0021, 0x0021}; + +// Complex string "Jāvá®!☺☻", UTF8 has character lengths 13122133 = 16 +static const unsigned char static_utf8_str[] = {0x4A, 0x61, 0xCC, 0x84, 0x76, 0xC3, 0xA1, 0xC2, 0xAE, 0x21, 0xE2, 0x98, 0xBA, 0xE2, 0x98, 0xBB}; +static const jchar static_unicode_str[] = { 0x004A, 0x0061, 0x0304, 0x0076, 0x00E1, 0x00AE, 0x0021, 0x263A, 0x263B}; + +static const int ASCII_LENGTH = 9; +static const size_t UTF8_LENGTH = 16; +static const int UNICODE_LENGTH = 9; + +void compare_utf8_utf8(const char* utf8_str1, const char* utf8_str2, size_t utf8_len) { + EXPECT_EQ(java_lang_String::hash_code(utf8_str1, utf8_len), java_lang_String::hash_code(utf8_str2, utf8_len)); + EXPECT_STREQ(utf8_str1, utf8_str2); +} + +void compare_utf8_unicode(const char* utf8_str, const jchar* unicode_str, size_t utf8_len, int unicode_len) { + EXPECT_EQ(java_lang_String::hash_code(utf8_str, utf8_len), java_lang_String::hash_code(unicode_str, unicode_len)); +} + +void compare_utf8_oop(const char* utf8_str, Handle oop_str, size_t utf8_len, int unicode_len) { + EXPECT_EQ(java_lang_String::hash_code(utf8_str, utf8_len), java_lang_String::hash_code(oop_str())); + EXPECT_TRUE(java_lang_String::equals(oop_str(), utf8_str, utf8_len)); +} + +void compare_unicode_unicode(const jchar* unicode_str1, const jchar* unicode_str2, int unicode_len) { + EXPECT_EQ(java_lang_String::hash_code(unicode_str1, unicode_len), java_lang_String::hash_code(unicode_str2, unicode_len)); + for (int i = 0; i < unicode_len; i++) { + EXPECT_EQ(unicode_str1[i], unicode_str2[i]); + } +} + +void compare_unicode_oop(const jchar* unicode_str, Handle oop_str, int unicode_len) { + EXPECT_EQ(java_lang_String::hash_code(unicode_str, unicode_len), java_lang_String::hash_code(oop_str())); + EXPECT_TRUE(java_lang_String::equals(oop_str(), unicode_str, unicode_len)); +} + +void compare_oop_oop(Handle oop_str1, Handle oop_str2) { + EXPECT_EQ(java_lang_String::hash_code(oop_str1()), java_lang_String::hash_code(oop_str2())); + EXPECT_TRUE(java_lang_String::equals(oop_str1(), oop_str2())); +} + +void test_utf8_convert(const char* utf8_str, size_t utf8_len, int unicode_len) { + EXPECT_TRUE(UTF8::is_legal_utf8((unsigned char*)utf8_str, strlen(utf8_str), false)); + + JavaThread* THREAD = JavaThread::current(); + ThreadInVMfromNative ThreadInVMfromNative(THREAD); + ResourceMark rm(THREAD); + HandleMark hm(THREAD); + + jchar* unicode_str_from_utf8 = NEW_RESOURCE_ARRAY(jchar, unicode_len); + UTF8::convert_to_unicode(utf8_str, unicode_str_from_utf8, unicode_len); + Handle oop_str_from_utf8 = java_lang_String::create_from_str(utf8_str, THREAD); + + compare_utf8_unicode(utf8_str, unicode_str_from_utf8, utf8_len, unicode_len); + compare_utf8_oop(utf8_str, oop_str_from_utf8, utf8_len, unicode_len); + + size_t length = unicode_len; + const char* utf8_str_from_unicode = UNICODE::as_utf8(unicode_str_from_utf8, length); + const char* utf8_str_from_oop = java_lang_String::as_utf8_string(oop_str_from_utf8()); + + EXPECT_TRUE(UTF8::is_legal_utf8((unsigned char*)utf8_str_from_unicode, strlen(utf8_str_from_unicode), false)); + EXPECT_TRUE(UTF8::is_legal_utf8((unsigned char*)utf8_str_from_oop, strlen(utf8_str_from_oop), false)); + + compare_utf8_utf8(utf8_str, utf8_str_from_unicode, utf8_len); + compare_utf8_utf8(utf8_str, utf8_str_from_oop, utf8_len); +} + +void test_unicode_convert(const jchar* unicode_str, size_t utf8_len, int unicode_len) { + JavaThread* THREAD = JavaThread::current(); + ThreadInVMfromNative ThreadInVMfromNative(THREAD); + ResourceMark rm(THREAD); + HandleMark hm(THREAD); + + size_t length = unicode_len; + const char* utf8_str_from_unicode = UNICODE::as_utf8(unicode_str, length); + Handle oop_str_from_unicode = java_lang_String::create_from_unicode(unicode_str, unicode_len, THREAD); + + EXPECT_TRUE(UTF8::is_legal_utf8((unsigned char*)utf8_str_from_unicode, strlen(utf8_str_from_unicode), false)); + + compare_utf8_unicode(utf8_str_from_unicode, unicode_str, utf8_len, unicode_len); + compare_unicode_oop(unicode_str, oop_str_from_unicode, unicode_len); + + int _; + jchar* unicode_str_from_utf8 = NEW_RESOURCE_ARRAY(jchar, unicode_len); + UTF8::convert_to_unicode(utf8_str_from_unicode, unicode_str_from_utf8, unicode_len); + const jchar* unicode_str_from_oop = java_lang_String::as_unicode_string(oop_str_from_unicode(), _, THREAD); + + compare_unicode_unicode(unicode_str, unicode_str_from_utf8, unicode_len); + compare_unicode_unicode(unicode_str, unicode_str_from_oop, unicode_len); +} + +void test_utf8_unicode_cross(const char* utf8_str, const jchar* unicode_str, size_t utf8_len, int unicode_len) { + compare_utf8_unicode(utf8_str, unicode_str, utf8_len, unicode_len); + + JavaThread* THREAD = JavaThread::current(); + ThreadInVMfromNative ThreadInVMfromNative(THREAD); + ResourceMark rm(THREAD); + HandleMark hm(THREAD); + + size_t length = unicode_len; + const char* utf8_str_from_unicode = UNICODE::as_utf8(unicode_str, length); + + jchar* unicode_str_from_utf8 = NEW_RESOURCE_ARRAY(jchar, unicode_len); + UTF8::convert_to_unicode(utf8_str, unicode_str_from_utf8, unicode_len); + + Handle oop_str_from_unicode = java_lang_String::create_from_unicode(unicode_str, unicode_len, THREAD); + Handle oop_str_from_utf8 = java_lang_String::create_from_str(utf8_str, THREAD); + + compare_utf8_utf8(utf8_str, utf8_str_from_unicode, utf8_len); + compare_utf8_oop(utf8_str, oop_str_from_unicode, utf8_len, unicode_len); + + compare_unicode_unicode(unicode_str, unicode_str_from_utf8, unicode_len); + compare_unicode_oop(unicode_str, oop_str_from_utf8, unicode_len); + + compare_utf8_oop(utf8_str_from_unicode, oop_str_from_utf8, utf8_len, unicode_len); + compare_unicode_oop(unicode_str_from_utf8, oop_str_from_unicode, unicode_len); + + compare_utf8_unicode(utf8_str_from_unicode, unicode_str_from_utf8, utf8_len, unicode_len); + compare_oop_oop(oop_str_from_utf8, oop_str_from_unicode); +} + +TEST_VM(StringConversion, fromUTF8_ascii) { + const char utf8_str[ASCII_LENGTH + 1] = { }; + memcpy((unsigned char*)utf8_str, static_ascii_utf8_str, ASCII_LENGTH); + test_utf8_convert(utf8_str, ASCII_LENGTH, ASCII_LENGTH); +} + +TEST_VM(StringConversion, fromUTF8_varlen) { + const char utf8_str[UTF8_LENGTH + 1] = { }; + memcpy((unsigned char*)utf8_str, static_utf8_str, UTF8_LENGTH); + test_utf8_convert(utf8_str, UTF8_LENGTH, UNICODE_LENGTH); +} + +TEST_VM(StringConversion, fromUnicode_ascii) { + jchar unicode_str[ASCII_LENGTH] = { }; + memcpy(unicode_str, static_ascii_unicode_str, ASCII_LENGTH * sizeof(jchar)); + test_unicode_convert(unicode_str, ASCII_LENGTH, ASCII_LENGTH); +} + +TEST_VM(StringConversion, fromUnicode_varlen) { + jchar unicode_str[UNICODE_LENGTH] = { }; + memcpy(unicode_str, static_unicode_str, UNICODE_LENGTH * sizeof(jchar)); + test_unicode_convert(unicode_str, UTF8_LENGTH, UNICODE_LENGTH); +} + +TEST_VM(StringConversion, cross_ascii) { + const char utf8_str[ASCII_LENGTH + 1] = { }; + jchar unicode_str[ASCII_LENGTH] = { }; + memcpy((unsigned char*)utf8_str, static_ascii_utf8_str, ASCII_LENGTH); + memcpy(unicode_str, static_ascii_unicode_str, ASCII_LENGTH * sizeof(jchar)); + + test_utf8_unicode_cross(utf8_str, unicode_str, ASCII_LENGTH, ASCII_LENGTH); +} + +TEST_VM(StringConversion, cross_varlen) { + const char utf8_str[UTF8_LENGTH + 1] = { }; + jchar unicode_str[UNICODE_LENGTH] = { }; + memcpy((unsigned char*)utf8_str, static_utf8_str, UTF8_LENGTH); + memcpy(unicode_str, static_unicode_str, UNICODE_LENGTH * sizeof(jchar)); + + test_utf8_unicode_cross(utf8_str, unicode_str, UTF8_LENGTH, UNICODE_LENGTH); +} diff --git a/test/hotspot/gtest/classfile/test_stringIntern.cpp b/test/hotspot/gtest/classfile/test_stringIntern.cpp new file mode 100644 index 00000000000..69c0a5b2aa4 --- /dev/null +++ b/test/hotspot/gtest/classfile/test_stringIntern.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "classfile/stringTable.hpp" +#include "classfile/symbolTable.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "unittest.hpp" + +// Tests that strings are interned and returns the same string when interning from different string types + +// Simple ASCII string "Java(R)!!" +static const char static_ascii_utf8_str[] = {0x4A, 0x61, 0x76, 0x61, 0x28, 0x52, 0x29, 0x21, 0x21}; +static const size_t ASCII_LENGTH = 9; + +// Complex string "Jāvá®!☺☻", has character lengths 13122133 = 16 +static const unsigned char static_utf8_str[] = {0x4A, 0x61, 0xCC, 0x84, 0x76, 0xC3, 0xA1, 0xC2, 0xAE, 0x21, 0xE2, 0x98, 0xBA, 0xE2, 0x98, 0xBB}; +static const size_t COMPLEX_LENGTH = 16; + +void test_intern(const char* utf8_str, size_t utf8_length) { + JavaThread* THREAD = JavaThread::current(); + ThreadInVMfromNative ThreadInVMfromNative(THREAD); + HandleMark hm(THREAD); + + oop interned_string_from_utf8 = StringTable::intern(utf8_str, THREAD); + + EXPECT_TRUE(java_lang_String::equals(interned_string_from_utf8, utf8_str, utf8_length)); + EXPECT_EQ(java_lang_String::hash_code(utf8_str, utf8_length),java_lang_String::hash_code(interned_string_from_utf8)); + + Symbol* symbol_from_utf8 = SymbolTable::new_symbol(utf8_str, static_cast(utf8_length)); + oop interned_string_from_symbol = StringTable::intern(symbol_from_utf8, THREAD); + + EXPECT_EQ(interned_string_from_utf8, interned_string_from_symbol); + + oop interned_string_from_oop1 = StringTable::intern(interned_string_from_utf8, THREAD); + + EXPECT_EQ(interned_string_from_utf8, interned_string_from_oop1); + +} + +TEST_VM(StringIntern, intern_ascii) { + const char utf8_str[ASCII_LENGTH + 1] = { }; + memcpy((unsigned char*)utf8_str, static_ascii_utf8_str, ASCII_LENGTH); + test_intern(utf8_str, ASCII_LENGTH); +} + +TEST_VM(StringIntern, intern_varlen) { + const char utf8_str[COMPLEX_LENGTH + 1] = { }; + memcpy((unsigned char*)utf8_str, static_utf8_str, COMPLEX_LENGTH); + test_intern(utf8_str, COMPLEX_LENGTH); +}