jdk-24/test/hotspot/gtest/classfile/test_stringConversion.cpp

195 lines
9.0 KiB
C++
Raw Normal View History

/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include "precompiled.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "unittest.hpp"
// Tests that string functions (hash code/equals) stay consistant when comparing equal strings and converting between strings types
// Simple ASCII string "Java(R)!!"
// Same length in both UTF8 and Unicode
static const char static_ascii_utf8_str[] = {0x4A, 0x61, 0x76, 0x61, 0x28, 0x52, 0x29, 0x21, 0x21};
static const jchar static_ascii_unicode_str[] = {0x004A, 0x0061, 0x0076, 0x0061, 0x0028, 0x0052, 0x0029, 0x0021, 0x0021};
// Complex string "Jāvá®!☺☻", UTF8 has character lengths 13122133 = 16
static const unsigned char static_utf8_str[] = {0x4A, 0x61, 0xCC, 0x84, 0x76, 0xC3, 0xA1, 0xC2, 0xAE, 0x21, 0xE2, 0x98, 0xBA, 0xE2, 0x98, 0xBB};
static const jchar static_unicode_str[] = { 0x004A, 0x0061, 0x0304, 0x0076, 0x00E1, 0x00AE, 0x0021, 0x263A, 0x263B};
static const int ASCII_LENGTH = 9;
static const size_t UTF8_LENGTH = 16;
static const int UNICODE_LENGTH = 9;
void compare_utf8_utf8(const char* utf8_str1, const char* utf8_str2, size_t utf8_len) {
EXPECT_EQ(java_lang_String::hash_code(utf8_str1, utf8_len), java_lang_String::hash_code(utf8_str2, utf8_len));
EXPECT_STREQ(utf8_str1, utf8_str2);
}
void compare_utf8_unicode(const char* utf8_str, const jchar* unicode_str, size_t utf8_len, int unicode_len) {
EXPECT_EQ(java_lang_String::hash_code(utf8_str, utf8_len), java_lang_String::hash_code(unicode_str, unicode_len));
}
void compare_utf8_oop(const char* utf8_str, Handle oop_str, size_t utf8_len, int unicode_len) {
EXPECT_EQ(java_lang_String::hash_code(utf8_str, utf8_len), java_lang_String::hash_code(oop_str()));
EXPECT_TRUE(java_lang_String::equals(oop_str(), utf8_str, utf8_len));
}
void compare_unicode_unicode(const jchar* unicode_str1, const jchar* unicode_str2, int unicode_len) {
EXPECT_EQ(java_lang_String::hash_code(unicode_str1, unicode_len), java_lang_String::hash_code(unicode_str2, unicode_len));
for (int i = 0; i < unicode_len; i++) {
EXPECT_EQ(unicode_str1[i], unicode_str2[i]);
}
}
void compare_unicode_oop(const jchar* unicode_str, Handle oop_str, int unicode_len) {
EXPECT_EQ(java_lang_String::hash_code(unicode_str, unicode_len), java_lang_String::hash_code(oop_str()));
EXPECT_TRUE(java_lang_String::equals(oop_str(), unicode_str, unicode_len));
}
void compare_oop_oop(Handle oop_str1, Handle oop_str2) {
EXPECT_EQ(java_lang_String::hash_code(oop_str1()), java_lang_String::hash_code(oop_str2()));
EXPECT_TRUE(java_lang_String::equals(oop_str1(), oop_str2()));
}
void test_utf8_convert(const char* utf8_str, size_t utf8_len, int unicode_len) {
EXPECT_TRUE(UTF8::is_legal_utf8((unsigned char*)utf8_str, strlen(utf8_str), false));
JavaThread* THREAD = JavaThread::current();
ThreadInVMfromNative ThreadInVMfromNative(THREAD);
ResourceMark rm(THREAD);
HandleMark hm(THREAD);
jchar* unicode_str_from_utf8 = NEW_RESOURCE_ARRAY(jchar, unicode_len);
UTF8::convert_to_unicode(utf8_str, unicode_str_from_utf8, unicode_len);
Handle oop_str_from_utf8 = java_lang_String::create_from_str(utf8_str, THREAD);
compare_utf8_unicode(utf8_str, unicode_str_from_utf8, utf8_len, unicode_len);
compare_utf8_oop(utf8_str, oop_str_from_utf8, utf8_len, unicode_len);
size_t length = unicode_len;
const char* utf8_str_from_unicode = UNICODE::as_utf8(unicode_str_from_utf8, length);
const char* utf8_str_from_oop = java_lang_String::as_utf8_string(oop_str_from_utf8());
EXPECT_TRUE(UTF8::is_legal_utf8((unsigned char*)utf8_str_from_unicode, strlen(utf8_str_from_unicode), false));
EXPECT_TRUE(UTF8::is_legal_utf8((unsigned char*)utf8_str_from_oop, strlen(utf8_str_from_oop), false));
compare_utf8_utf8(utf8_str, utf8_str_from_unicode, utf8_len);
compare_utf8_utf8(utf8_str, utf8_str_from_oop, utf8_len);
}
void test_unicode_convert(const jchar* unicode_str, size_t utf8_len, int unicode_len) {
JavaThread* THREAD = JavaThread::current();
ThreadInVMfromNative ThreadInVMfromNative(THREAD);
ResourceMark rm(THREAD);
HandleMark hm(THREAD);
size_t length = unicode_len;
const char* utf8_str_from_unicode = UNICODE::as_utf8(unicode_str, length);
Handle oop_str_from_unicode = java_lang_String::create_from_unicode(unicode_str, unicode_len, THREAD);
EXPECT_TRUE(UTF8::is_legal_utf8((unsigned char*)utf8_str_from_unicode, strlen(utf8_str_from_unicode), false));
compare_utf8_unicode(utf8_str_from_unicode, unicode_str, utf8_len, unicode_len);
compare_unicode_oop(unicode_str, oop_str_from_unicode, unicode_len);
int _;
jchar* unicode_str_from_utf8 = NEW_RESOURCE_ARRAY(jchar, unicode_len);
UTF8::convert_to_unicode(utf8_str_from_unicode, unicode_str_from_utf8, unicode_len);
const jchar* unicode_str_from_oop = java_lang_String::as_unicode_string(oop_str_from_unicode(), _, THREAD);
compare_unicode_unicode(unicode_str, unicode_str_from_utf8, unicode_len);
compare_unicode_unicode(unicode_str, unicode_str_from_oop, unicode_len);
}
void test_utf8_unicode_cross(const char* utf8_str, const jchar* unicode_str, size_t utf8_len, int unicode_len) {
compare_utf8_unicode(utf8_str, unicode_str, utf8_len, unicode_len);
JavaThread* THREAD = JavaThread::current();
ThreadInVMfromNative ThreadInVMfromNative(THREAD);
ResourceMark rm(THREAD);
HandleMark hm(THREAD);
size_t length = unicode_len;
const char* utf8_str_from_unicode = UNICODE::as_utf8(unicode_str, length);
jchar* unicode_str_from_utf8 = NEW_RESOURCE_ARRAY(jchar, unicode_len);
UTF8::convert_to_unicode(utf8_str, unicode_str_from_utf8, unicode_len);
Handle oop_str_from_unicode = java_lang_String::create_from_unicode(unicode_str, unicode_len, THREAD);
Handle oop_str_from_utf8 = java_lang_String::create_from_str(utf8_str, THREAD);
compare_utf8_utf8(utf8_str, utf8_str_from_unicode, utf8_len);
compare_utf8_oop(utf8_str, oop_str_from_unicode, utf8_len, unicode_len);
compare_unicode_unicode(unicode_str, unicode_str_from_utf8, unicode_len);
compare_unicode_oop(unicode_str, oop_str_from_utf8, unicode_len);
compare_utf8_oop(utf8_str_from_unicode, oop_str_from_utf8, utf8_len, unicode_len);
compare_unicode_oop(unicode_str_from_utf8, oop_str_from_unicode, unicode_len);
compare_utf8_unicode(utf8_str_from_unicode, unicode_str_from_utf8, utf8_len, unicode_len);
compare_oop_oop(oop_str_from_utf8, oop_str_from_unicode);
}
TEST_VM(StringConversion, fromUTF8_ascii) {
const char utf8_str[ASCII_LENGTH + 1] = { };
memcpy((unsigned char*)utf8_str, static_ascii_utf8_str, ASCII_LENGTH);
test_utf8_convert(utf8_str, ASCII_LENGTH, ASCII_LENGTH);
}
TEST_VM(StringConversion, fromUTF8_varlen) {
const char utf8_str[UTF8_LENGTH + 1] = { };
memcpy((unsigned char*)utf8_str, static_utf8_str, UTF8_LENGTH);
test_utf8_convert(utf8_str, UTF8_LENGTH, UNICODE_LENGTH);
}
TEST_VM(StringConversion, fromUnicode_ascii) {
jchar unicode_str[ASCII_LENGTH] = { };
memcpy(unicode_str, static_ascii_unicode_str, ASCII_LENGTH * sizeof(jchar));
test_unicode_convert(unicode_str, ASCII_LENGTH, ASCII_LENGTH);
}
TEST_VM(StringConversion, fromUnicode_varlen) {
jchar unicode_str[UNICODE_LENGTH] = { };
memcpy(unicode_str, static_unicode_str, UNICODE_LENGTH * sizeof(jchar));
test_unicode_convert(unicode_str, UTF8_LENGTH, UNICODE_LENGTH);
}
TEST_VM(StringConversion, cross_ascii) {
const char utf8_str[ASCII_LENGTH + 1] = { };
jchar unicode_str[ASCII_LENGTH] = { };
memcpy((unsigned char*)utf8_str, static_ascii_utf8_str, ASCII_LENGTH);
memcpy(unicode_str, static_ascii_unicode_str, ASCII_LENGTH * sizeof(jchar));
test_utf8_unicode_cross(utf8_str, unicode_str, ASCII_LENGTH, ASCII_LENGTH);
}
TEST_VM(StringConversion, cross_varlen) {
const char utf8_str[UTF8_LENGTH + 1] = { };
jchar unicode_str[UNICODE_LENGTH] = { };
memcpy((unsigned char*)utf8_str, static_utf8_str, UTF8_LENGTH);
memcpy(unicode_str, static_unicode_str, UNICODE_LENGTH * sizeof(jchar));
test_utf8_unicode_cross(utf8_str, unicode_str, UTF8_LENGTH, UNICODE_LENGTH);
}