8338257: UTF8 lengths should be size_t not int
Reviewed-by: stuefe, coleenp, dlong
This commit is contained in:
parent
777ed2b5d2
commit
a4962ace4d
@ -431,10 +431,10 @@ void HashtableTextDump::get_utf8(char* utf8_buffer, int utf8_length) {
|
||||
}
|
||||
|
||||
// NOTE: the content is NOT the same as
|
||||
// UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen).
|
||||
// UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, size_t buflen).
|
||||
// We want to escape \r\n\t so that output [1] is more readable; [2] can be more easily
|
||||
// parsed by scripts; [3] quickly processed by HashtableTextDump::get_utf8()
|
||||
void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, int utf8_length) {
|
||||
void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, size_t utf8_length) {
|
||||
const char *c = utf8_string;
|
||||
const char *end = c + utf8_length;
|
||||
for (; c < end; c++) {
|
||||
|
@ -431,7 +431,7 @@ public:
|
||||
|
||||
int unescape(const char* from, const char* end, int count);
|
||||
void get_utf8(char* utf8_buffer, int utf8_length);
|
||||
static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length);
|
||||
static void put_utf8(outputStream* st, const char* utf8_string, size_t utf8_length);
|
||||
};
|
||||
|
||||
#endif // SHARE_CLASSFILE_COMPACTHASHTABLE_HPP
|
||||
|
@ -304,7 +304,8 @@ Handle java_lang_String::create_from_unicode(const jchar* unicode, int length, T
|
||||
#ifdef ASSERT
|
||||
{
|
||||
ResourceMark rm;
|
||||
char* expected = UNICODE::as_utf8(unicode, length);
|
||||
size_t utf8_len = static_cast<size_t>(length);
|
||||
char* expected = UNICODE::as_utf8(unicode, utf8_len);
|
||||
char* actual = as_utf8_string(h_obj());
|
||||
if (strcmp(expected, actual) != 0) {
|
||||
fatal("Unicode conversion failure: %s --> %s", expected, actual);
|
||||
@ -346,7 +347,7 @@ Handle java_lang_String::create_from_str(const char* utf8_str, TRAPS) {
|
||||
#ifdef ASSERT
|
||||
// This check is too strict when the input string is not a valid UTF8.
|
||||
// For example, it may be created with arbitrary content via jni_NewStringUTF.
|
||||
if (UTF8::is_legal_utf8((const unsigned char*)utf8_str, (int)strlen(utf8_str), false)) {
|
||||
if (UTF8::is_legal_utf8((const unsigned char*)utf8_str, strlen(utf8_str), false)) {
|
||||
ResourceMark rm;
|
||||
const char* expected = utf8_str;
|
||||
char* actual = as_utf8_string(h_obj());
|
||||
@ -554,7 +555,7 @@ char* java_lang_String::as_quoted_ascii(oop java_string) {
|
||||
if (length == 0) return nullptr;
|
||||
|
||||
char* result;
|
||||
int result_length;
|
||||
size_t result_length;
|
||||
if (!is_latin1) {
|
||||
jchar* base = value->char_at_addr(0);
|
||||
result_length = UNICODE::quoted_ascii_length(base, length) + 1;
|
||||
@ -566,8 +567,8 @@ char* java_lang_String::as_quoted_ascii(oop java_string) {
|
||||
result = NEW_RESOURCE_ARRAY(char, result_length);
|
||||
UNICODE::as_quoted_ascii(base, length, result, result_length);
|
||||
}
|
||||
assert(result_length >= length + 1, "must not be shorter");
|
||||
assert(result_length == (int)strlen(result) + 1, "must match");
|
||||
assert(result_length >= (size_t)length + 1, "must not be shorter");
|
||||
assert(result_length == strlen(result) + 1, "must match");
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -582,8 +583,9 @@ Symbol* java_lang_String::as_symbol(oop java_string) {
|
||||
} else {
|
||||
ResourceMark rm;
|
||||
jbyte* position = (length == 0) ? nullptr : value->byte_at_addr(0);
|
||||
const char* base = UNICODE::as_utf8(position, length);
|
||||
Symbol* sym = SymbolTable::new_symbol(base, length);
|
||||
size_t utf8_len = static_cast<size_t>(length);
|
||||
const char* base = UNICODE::as_utf8(position, utf8_len);
|
||||
Symbol* sym = SymbolTable::new_symbol(base, checked_cast<int>(utf8_len));
|
||||
return sym;
|
||||
}
|
||||
}
|
||||
@ -598,12 +600,13 @@ Symbol* java_lang_String::as_symbol_or_null(oop java_string) {
|
||||
} else {
|
||||
ResourceMark rm;
|
||||
jbyte* position = (length == 0) ? nullptr : value->byte_at_addr(0);
|
||||
const char* base = UNICODE::as_utf8(position, length);
|
||||
return SymbolTable::probe(base, length);
|
||||
size_t utf8_len = static_cast<size_t>(length);
|
||||
const char* base = UNICODE::as_utf8(position, utf8_len);
|
||||
return SymbolTable::probe(base, checked_cast<int>(utf8_len));
|
||||
}
|
||||
}
|
||||
|
||||
int java_lang_String::utf8_length(oop java_string, typeArrayOop value) {
|
||||
size_t java_lang_String::utf8_length(oop java_string, typeArrayOop value) {
|
||||
assert(value_equals(value, java_lang_String::value(java_string)),
|
||||
"value must be same as java_lang_String::value(java_string)");
|
||||
int length = java_lang_String::length(java_string, value);
|
||||
@ -617,18 +620,39 @@ int java_lang_String::utf8_length(oop java_string, typeArrayOop value) {
|
||||
}
|
||||
}
|
||||
|
||||
int java_lang_String::utf8_length(oop java_string) {
|
||||
size_t java_lang_String::utf8_length(oop java_string) {
|
||||
typeArrayOop value = java_lang_String::value(java_string);
|
||||
return utf8_length(java_string, value);
|
||||
}
|
||||
|
||||
int java_lang_String::utf8_length_as_int(oop java_string) {
|
||||
typeArrayOop value = java_lang_String::value(java_string);
|
||||
return utf8_length_as_int(java_string, value);
|
||||
}
|
||||
|
||||
int java_lang_String::utf8_length_as_int(oop java_string, typeArrayOop value) {
|
||||
assert(value_equals(value, java_lang_String::value(java_string)),
|
||||
"value must be same as java_lang_String::value(java_string)");
|
||||
int length = java_lang_String::length(java_string, value);
|
||||
if (length == 0) {
|
||||
return 0;
|
||||
}
|
||||
if (!java_lang_String::is_latin1(java_string)) {
|
||||
return UNICODE::utf8_length_as_int(value->char_at_addr(0), length);
|
||||
} else {
|
||||
return UNICODE::utf8_length_as_int(value->byte_at_addr(0), length);
|
||||
}
|
||||
}
|
||||
|
||||
char* java_lang_String::as_utf8_string(oop java_string) {
|
||||
int length;
|
||||
size_t length;
|
||||
return as_utf8_string(java_string, length);
|
||||
}
|
||||
|
||||
char* java_lang_String::as_utf8_string(oop java_string, int& length) {
|
||||
char* java_lang_String::as_utf8_string(oop java_string, size_t& length) {
|
||||
typeArrayOop value = java_lang_String::value(java_string);
|
||||
// `length` is used as the incoming number of characters to
|
||||
// convert, and then set as the number of bytes in the UTF8 sequence.
|
||||
length = java_lang_String::length(java_string, value);
|
||||
bool is_latin1 = java_lang_String::is_latin1(java_string);
|
||||
if (!is_latin1) {
|
||||
@ -642,7 +666,7 @@ char* java_lang_String::as_utf8_string(oop java_string, int& length) {
|
||||
|
||||
// Uses a provided buffer if it's sufficiently large, otherwise allocates
|
||||
// a resource array to fit
|
||||
char* java_lang_String::as_utf8_string_full(oop java_string, char* buf, int buflen, int& utf8_len) {
|
||||
char* java_lang_String::as_utf8_string_full(oop java_string, char* buf, size_t buflen, size_t& utf8_len) {
|
||||
typeArrayOop value = java_lang_String::value(java_string);
|
||||
int len = java_lang_String::length(java_string, value);
|
||||
bool is_latin1 = java_lang_String::is_latin1(java_string);
|
||||
@ -663,7 +687,7 @@ char* java_lang_String::as_utf8_string_full(oop java_string, char* buf, int bufl
|
||||
}
|
||||
}
|
||||
|
||||
char* java_lang_String::as_utf8_string(oop java_string, typeArrayOop value, char* buf, int buflen) {
|
||||
char* java_lang_String::as_utf8_string(oop java_string, typeArrayOop value, char* buf, size_t buflen) {
|
||||
assert(value_equals(value, java_lang_String::value(java_string)),
|
||||
"value must be same as java_lang_String::value(java_string)");
|
||||
int length = java_lang_String::length(java_string, value);
|
||||
@ -677,25 +701,28 @@ char* java_lang_String::as_utf8_string(oop java_string, typeArrayOop value, char
|
||||
}
|
||||
}
|
||||
|
||||
char* java_lang_String::as_utf8_string(oop java_string, char* buf, int buflen) {
|
||||
char* java_lang_String::as_utf8_string(oop java_string, char* buf, size_t buflen) {
|
||||
typeArrayOop value = java_lang_String::value(java_string);
|
||||
return as_utf8_string(java_string, value, buf, buflen);
|
||||
}
|
||||
|
||||
char* java_lang_String::as_utf8_string(oop java_string, int start, int len) {
|
||||
// `length` is used as the incoming number of characters to
|
||||
// convert, and then set as the number of bytes in the UTF8 sequence.
|
||||
size_t length = static_cast<size_t>(len);
|
||||
typeArrayOop value = java_lang_String::value(java_string);
|
||||
bool is_latin1 = java_lang_String::is_latin1(java_string);
|
||||
assert(start + len <= java_lang_String::length(java_string), "just checking");
|
||||
if (!is_latin1) {
|
||||
jchar* position = value->char_at_addr(start);
|
||||
return UNICODE::as_utf8(position, len);
|
||||
return UNICODE::as_utf8(position, length);
|
||||
} else {
|
||||
jbyte* position = value->byte_at_addr(start);
|
||||
return UNICODE::as_utf8(position, len);
|
||||
return UNICODE::as_utf8(position, length);
|
||||
}
|
||||
}
|
||||
|
||||
char* java_lang_String::as_utf8_string(oop java_string, typeArrayOop value, int start, int len, char* buf, int buflen) {
|
||||
char* java_lang_String::as_utf8_string(oop java_string, typeArrayOop value, int start, int len, char* buf, size_t buflen) {
|
||||
assert(value_equals(value, java_lang_String::value(java_string)),
|
||||
"value must be same as java_lang_String::value(java_string)");
|
||||
assert(start + len <= java_lang_String::length(java_string), "just checking");
|
||||
|
@ -131,17 +131,21 @@ class java_lang_String : AllStatic {
|
||||
static inline bool deduplication_requested(oop java_string);
|
||||
static inline int length(oop java_string);
|
||||
static inline int length(oop java_string, typeArrayOop string_value);
|
||||
static int utf8_length(oop java_string);
|
||||
static int utf8_length(oop java_string, typeArrayOop string_value);
|
||||
static size_t utf8_length(oop java_string);
|
||||
static size_t utf8_length(oop java_string, typeArrayOop string_value);
|
||||
// Legacy variants that truncate the length if needed
|
||||
static int utf8_length_as_int(oop java_string);
|
||||
static int utf8_length_as_int(oop java_string, typeArrayOop string_value);
|
||||
|
||||
// String converters
|
||||
static char* as_utf8_string(oop java_string);
|
||||
static char* as_utf8_string(oop java_string, int& length);
|
||||
static char* as_utf8_string_full(oop java_string, char* buf, int buflen, int& length);
|
||||
static char* as_utf8_string(oop java_string, char* buf, int buflen);
|
||||
// `length` is set to the length of the utf8 sequence.
|
||||
static char* as_utf8_string(oop java_string, size_t& length);
|
||||
static char* as_utf8_string_full(oop java_string, char* buf, size_t buflen, size_t& length);
|
||||
static char* as_utf8_string(oop java_string, char* buf, size_t buflen);
|
||||
static char* as_utf8_string(oop java_string, int start, int len);
|
||||
static char* as_utf8_string(oop java_string, typeArrayOop value, char* buf, int buflen);
|
||||
static char* as_utf8_string(oop java_string, typeArrayOop value, int start, int len, char* buf, int buflen);
|
||||
static char* as_utf8_string(oop java_string, typeArrayOop value, char* buf, size_t buflen);
|
||||
static char* as_utf8_string(oop java_string, typeArrayOop value, int start, int len, char* buf, size_t buflen);
|
||||
static char* as_platform_dependent_str(Handle java_string, TRAPS);
|
||||
static jchar* as_unicode_string(oop java_string, int& length, TRAPS);
|
||||
static jchar* as_unicode_string_or_null(oop java_string, int& length);
|
||||
|
@ -72,7 +72,9 @@ static char* get_module_name(oop module, int& len, TRAPS) {
|
||||
if (name_oop == nullptr) {
|
||||
THROW_MSG_NULL(vmSymbols::java_lang_NullPointerException(), "Null module name");
|
||||
}
|
||||
char* module_name = java_lang_String::as_utf8_string(name_oop, len);
|
||||
size_t utf8_len;
|
||||
char* module_name = java_lang_String::as_utf8_string(name_oop, utf8_len);
|
||||
len = checked_cast<int>(utf8_len); // module names are < 64K
|
||||
if (!verify_module_name(module_name, len)) {
|
||||
THROW_MSG_NULL(vmSymbols::java_lang_IllegalArgumentException(),
|
||||
err_msg("Invalid module name: %s", module_name));
|
||||
@ -84,9 +86,9 @@ static Symbol* as_symbol(jstring str_object) {
|
||||
if (str_object == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
int len;
|
||||
size_t len;
|
||||
char* str = java_lang_String::as_utf8_string(JNIHandles::resolve_non_null(str_object), len);
|
||||
return SymbolTable::new_symbol(str, len);
|
||||
return SymbolTable::new_symbol(str, checked_cast<int>(len));
|
||||
}
|
||||
|
||||
ModuleEntryTable* Modules::get_module_entry_table(Handle h_loader) {
|
||||
@ -142,8 +144,10 @@ bool Modules::is_package_defined(Symbol* package, Handle h_loader) {
|
||||
// Will use the provided buffer if it's sufficiently large, otherwise allocates
|
||||
// a resource array
|
||||
// The length of the resulting string will be assigned to utf8_len
|
||||
static const char* as_internal_package(oop package_string, char* buf, int buflen, int& utf8_len) {
|
||||
char* package_name = java_lang_String::as_utf8_string_full(package_string, buf, buflen, utf8_len);
|
||||
static const char* as_internal_package(oop package_string, char* buf, size_t buflen, int& utf8_len) {
|
||||
size_t full_utf8_len;
|
||||
char* package_name = java_lang_String::as_utf8_string_full(package_string, buf, buflen, full_utf8_len);
|
||||
utf8_len = checked_cast<int>(full_utf8_len); // package names are < 64K
|
||||
|
||||
// Turn all '/'s into '.'s
|
||||
for (int index = 0; index < utf8_len; index++) {
|
||||
|
@ -686,7 +686,7 @@ static void print_string(Thread* current, outputStream* st, oop s) {
|
||||
st->print("%d: ", length);
|
||||
} else {
|
||||
ResourceMark rm(current);
|
||||
int utf8_length = length;
|
||||
size_t utf8_length = length;
|
||||
char* utf8_string;
|
||||
|
||||
if (!is_latin1) {
|
||||
@ -697,7 +697,7 @@ static void print_string(Thread* current, outputStream* st, oop s) {
|
||||
utf8_string = UNICODE::as_utf8(bytes, utf8_length);
|
||||
}
|
||||
|
||||
st->print("%d: ", utf8_length);
|
||||
st->print("%zu: ", utf8_length);
|
||||
HashtableTextDump::put_utf8(st, utf8_string, utf8_length);
|
||||
}
|
||||
st->cr();
|
||||
|
@ -349,6 +349,7 @@ Symbol* SymbolTable::lookup_common(const char* name,
|
||||
// to be used for arbitrary strings. For debug builds we will assert if
|
||||
// a string is too long, whereas product builds will truncate it.
|
||||
static int check_length(const char* name, int len) {
|
||||
assert(len >= 0, "negative length %d suggests integer overflow in the caller", len);
|
||||
assert(len <= Symbol::max_length(),
|
||||
"String length %d exceeds the maximum Symbol length of %d", len, Symbol::max_length());
|
||||
if (len > Symbol::max_length()) {
|
||||
@ -461,33 +462,33 @@ Symbol* SymbolTable::lookup_only(const char* name, int len, unsigned int& hash)
|
||||
// and probing logic, so there is no need for convert_to_utf8 until
|
||||
// an actual new Symbol* is created.
|
||||
Symbol* SymbolTable::new_symbol(const jchar* name, int utf16_length) {
|
||||
int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
|
||||
size_t utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
|
||||
char stack_buf[ON_STACK_BUFFER_LENGTH];
|
||||
if (utf8_length < (int) sizeof(stack_buf)) {
|
||||
if (utf8_length < sizeof(stack_buf)) {
|
||||
char* chars = stack_buf;
|
||||
UNICODE::convert_to_utf8(name, utf16_length, chars);
|
||||
return new_symbol(chars, utf8_length);
|
||||
return new_symbol(chars, checked_cast<int>(utf8_length));
|
||||
} else {
|
||||
ResourceMark rm;
|
||||
char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);
|
||||
UNICODE::convert_to_utf8(name, utf16_length, chars);
|
||||
return new_symbol(chars, utf8_length);
|
||||
return new_symbol(chars, checked_cast<int>(utf8_length));
|
||||
}
|
||||
}
|
||||
|
||||
Symbol* SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length,
|
||||
unsigned int& hash) {
|
||||
int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
|
||||
size_t utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
|
||||
char stack_buf[ON_STACK_BUFFER_LENGTH];
|
||||
if (utf8_length < (int) sizeof(stack_buf)) {
|
||||
if (utf8_length < sizeof(stack_buf)) {
|
||||
char* chars = stack_buf;
|
||||
UNICODE::convert_to_utf8(name, utf16_length, chars);
|
||||
return lookup_only(chars, utf8_length, hash);
|
||||
return lookup_only(chars, checked_cast<int>(utf8_length), hash);
|
||||
} else {
|
||||
ResourceMark rm;
|
||||
char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);
|
||||
UNICODE::convert_to_utf8(name, utf16_length, chars);
|
||||
return lookup_only(chars, utf8_length, hash);
|
||||
return lookup_only(chars, checked_cast<int>(utf8_length), hash);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -292,10 +292,10 @@ static const char* get_as_dcmd_arena_string(oop string) {
|
||||
char* str = nullptr;
|
||||
const typeArrayOop value = java_lang_String::value(string);
|
||||
if (value != nullptr) {
|
||||
const size_t length = static_cast<size_t>(java_lang_String::utf8_length(string, value)) + 1;
|
||||
const size_t length = java_lang_String::utf8_length(string, value) + 1;
|
||||
str = dcmd_arena_allocate(length);
|
||||
assert(str != nullptr, "invariant");
|
||||
java_lang_String::as_utf8_string(string, value, str, static_cast<int>(length));
|
||||
java_lang_String::as_utf8_string(string, value, str, length);
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -502,7 +502,7 @@ Klass* JfrJavaSupport::klass(const jobject handle) {
|
||||
return obj->klass();
|
||||
}
|
||||
|
||||
static char* allocate_string(bool c_heap, int length, Thread* thread) {
|
||||
static char* allocate_string(bool c_heap, size_t length, Thread* thread) {
|
||||
return c_heap ? NEW_C_HEAP_ARRAY(char, length, mtTracing) :
|
||||
NEW_RESOURCE_ARRAY_IN_THREAD(thread, char, length);
|
||||
}
|
||||
@ -511,7 +511,7 @@ const char* JfrJavaSupport::c_str(oop string, Thread* thread, bool c_heap /* fal
|
||||
char* str = nullptr;
|
||||
const typeArrayOop value = java_lang_String::value(string);
|
||||
if (value != nullptr) {
|
||||
const int length = java_lang_String::utf8_length(string, value);
|
||||
const size_t length = java_lang_String::utf8_length(string, value);
|
||||
str = allocate_string(c_heap, length + 1, thread);
|
||||
if (str == nullptr) {
|
||||
return nullptr;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -121,7 +121,10 @@ static const char* get_java_thread_name(const JavaThread* jt, int& length, oop v
|
||||
}
|
||||
assert(thread_obj != nullptr, "invariant");
|
||||
const oop name = java_lang_Thread::name(thread_obj);
|
||||
return name != nullptr ? java_lang_String::as_utf8_string(name, length) : nullptr;
|
||||
size_t utf8_len;
|
||||
const char* ret = name != nullptr ? java_lang_String::as_utf8_string(name, utf8_len) : nullptr;
|
||||
length = checked_cast<int>(utf8_len); // Thread names should be short
|
||||
return ret;
|
||||
}
|
||||
|
||||
const char* JfrThreadName::name(const Thread* t, int& length, oop vthread) {
|
||||
|
@ -166,7 +166,7 @@ void Symbol::print_symbol_on(outputStream* st) const {
|
||||
|
||||
char* Symbol::as_quoted_ascii() const {
|
||||
const char *ptr = (const char *)&_body[0];
|
||||
int quoted_length = UTF8::quoted_ascii_length(ptr, utf8_length());
|
||||
size_t quoted_length = UTF8::quoted_ascii_length(ptr, utf8_length());
|
||||
char* result = NEW_RESOURCE_ARRAY(char, quoted_length + 1);
|
||||
UTF8::as_quoted_ascii(ptr, utf8_length(), result, quoted_length + 1);
|
||||
return result;
|
||||
|
@ -2223,7 +2223,7 @@ JNI_END
|
||||
JNI_ENTRY(jsize, jni_GetStringUTFLength(JNIEnv *env, jstring string))
|
||||
HOTSPOT_JNI_GETSTRINGUTFLENGTH_ENTRY(env, string);
|
||||
oop java_string = JNIHandles::resolve_non_null(string);
|
||||
jsize ret = java_lang_String::utf8_length(java_string);
|
||||
jsize ret = java_lang_String::utf8_length_as_int(java_string);
|
||||
HOTSPOT_JNI_GETSTRINGUTFLENGTH_RETURN(ret);
|
||||
return ret;
|
||||
JNI_END
|
||||
@ -2236,10 +2236,11 @@ JNI_ENTRY(const char*, jni_GetStringUTFChars(JNIEnv *env, jstring string, jboole
|
||||
typeArrayOop s_value = java_lang_String::value(java_string);
|
||||
if (s_value != nullptr) {
|
||||
size_t length = java_lang_String::utf8_length(java_string, s_value);
|
||||
/* JNI Specification states return null on OOM */
|
||||
// JNI Specification states return null on OOM.
|
||||
// The resulting sequence doesn't have to be NUL-terminated but we do.
|
||||
result = AllocateHeap(length + 1, mtInternal, AllocFailStrategy::RETURN_NULL);
|
||||
if (result != nullptr) {
|
||||
java_lang_String::as_utf8_string(java_string, s_value, result, (int) length + 1);
|
||||
java_lang_String::as_utf8_string(java_string, s_value, result, length + 1);
|
||||
if (isCopy != nullptr) {
|
||||
*isCopy = JNI_TRUE;
|
||||
}
|
||||
|
@ -1321,7 +1321,7 @@ JvmtiEnv::GetThreadInfo(jthread thread, jvmtiThreadInfo* info_ptr) {
|
||||
if (name() != nullptr) {
|
||||
n = java_lang_String::as_utf8_string(name());
|
||||
} else {
|
||||
int utf8_length = 0;
|
||||
size_t utf8_length = 0;
|
||||
n = UNICODE::as_utf8((jchar*) nullptr, utf8_length);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -45,7 +45,7 @@ static const char* allocate(oop string) {
|
||||
char* str = nullptr;
|
||||
const typeArrayOop value = java_lang_String::value(string);
|
||||
if (value != nullptr) {
|
||||
const int length = java_lang_String::utf8_length(string, value);
|
||||
const size_t length = java_lang_String::utf8_length(string, value);
|
||||
str = NEW_C_HEAP_ARRAY(char, length + 1, mtServiceability);
|
||||
java_lang_String::as_utf8_string(string, value, str, length + 1);
|
||||
}
|
||||
|
@ -98,15 +98,21 @@ char* UTF8::next_character(const char* str, jint* value) {
|
||||
return next_ch;
|
||||
}
|
||||
|
||||
// Count bytes of the form 10xxxxxx and deduct this count
|
||||
// The number of unicode characters in a utf8 sequence can be easily
|
||||
// determined by noting that bytes of the form 10xxxxxx are part of
|
||||
// a 2 or 3-byte multi-byte sequence, all others are either characters
|
||||
// themselves or else the start of a multi-byte character.
|
||||
|
||||
// Calculate the unicode length of a utf8 string of known size
|
||||
// by counting bytes of the form 10xxxxxx and deducting this count
|
||||
// from the total byte count. The utf8 string must be in
|
||||
// legal form which has been verified in the format checker.
|
||||
int UTF8::unicode_length(const char* str, int len, bool& is_latin1, bool& has_multibyte) {
|
||||
int num_chars = len;
|
||||
int UTF8::unicode_length(const char* str, size_t len, bool& is_latin1, bool& has_multibyte) {
|
||||
size_t num_chars = len;
|
||||
has_multibyte = false;
|
||||
is_latin1 = true;
|
||||
unsigned char prev = 0;
|
||||
for (int i = 0; i < len; i++) {
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
unsigned char c = str[i];
|
||||
if ((c & 0xC0) == 0x80) {
|
||||
// Multibyte, check if valid latin1 character.
|
||||
@ -118,12 +124,12 @@ int UTF8::unicode_length(const char* str, int len, bool& is_latin1, bool& has_mu
|
||||
}
|
||||
prev = c;
|
||||
}
|
||||
return num_chars;
|
||||
return checked_cast<int>(num_chars);
|
||||
}
|
||||
|
||||
// Count bytes of the utf8 string except those in form
|
||||
// 10xxxxxx which only appear in multibyte characters.
|
||||
// The utf8 string must be in legal form and has been
|
||||
// Calculate the unicode length of a nul-terminated utf8 string
|
||||
// by counting bytes of the utf8 string except those in the form
|
||||
// 10xxxxxx. The utf8 string must be in legal form and has been
|
||||
// verified in the format checker.
|
||||
int UTF8::unicode_length(const char* str, bool& is_latin1, bool& has_multibyte) {
|
||||
int num_chars = 0;
|
||||
@ -195,10 +201,10 @@ template void UTF8::convert_to_unicode<jchar>(const char* utf8_str, jchar* unico
|
||||
template void UTF8::convert_to_unicode<jbyte>(const char* utf8_str, jbyte* unicode_str, int unicode_length);
|
||||
|
||||
// returns the quoted ascii length of a 0-terminated utf8 string
|
||||
int UTF8::quoted_ascii_length(const char* utf8_str, int utf8_length) {
|
||||
size_t UTF8::quoted_ascii_length(const char* utf8_str, size_t utf8_length) {
|
||||
const char *ptr = utf8_str;
|
||||
const char* end = ptr + utf8_length;
|
||||
int result = 0;
|
||||
size_t result = 0;
|
||||
while (ptr < end) {
|
||||
jchar c;
|
||||
ptr = UTF8::next(ptr, &c);
|
||||
@ -212,7 +218,7 @@ int UTF8::quoted_ascii_length(const char* utf8_str, int utf8_length) {
|
||||
}
|
||||
|
||||
// converts a utf8 string to quoted ascii
|
||||
void UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen) {
|
||||
void UTF8::as_quoted_ascii(const char* utf8_str, size_t utf8_length, char* buf, size_t buflen) {
|
||||
const char *ptr = utf8_str;
|
||||
const char *utf8_end = ptr + utf8_length;
|
||||
char* p = buf;
|
||||
@ -248,7 +254,7 @@ const char* UTF8::from_quoted_ascii(const char* quoted_ascii_str) {
|
||||
return quoted_ascii_str;
|
||||
}
|
||||
// everything up to this point was ok.
|
||||
int length = ptr - quoted_ascii_str;
|
||||
size_t length = ptr - quoted_ascii_str;
|
||||
char* buffer = nullptr;
|
||||
for (int round = 0; round < 2; round++) {
|
||||
while (*ptr != '\0') {
|
||||
@ -330,11 +336,11 @@ jint UTF8::get_supplementary_character(const unsigned char* str) {
|
||||
+ ((str[4] & 0x0f) << 6) + (str[5] & 0x3f);
|
||||
}
|
||||
|
||||
bool UTF8::is_legal_utf8(const unsigned char* buffer, int length,
|
||||
bool UTF8::is_legal_utf8(const unsigned char* buffer, size_t length,
|
||||
bool version_leq_47) {
|
||||
int i = 0;
|
||||
int count = length >> 2;
|
||||
for (int k=0; k<count; k++) {
|
||||
size_t i = 0;
|
||||
size_t count = length >> 2;
|
||||
for (size_t k = 0; k < count; k++) {
|
||||
unsigned char b0 = buffer[i];
|
||||
unsigned char b1 = buffer[i+1];
|
||||
unsigned char b2 = buffer[i+2];
|
||||
@ -405,7 +411,7 @@ static bool is_starting_byte(unsigned char b) {
|
||||
// To avoid that the caller can choose to check for validity first.
|
||||
// The incoming buffer is still expected to be NUL-terminated.
|
||||
// The incoming buffer is expected to be a realistic size - we assert if it is too small.
|
||||
void UTF8::truncate_to_legal_utf8(unsigned char* buffer, int length) {
|
||||
void UTF8::truncate_to_legal_utf8(unsigned char* buffer, size_t length) {
|
||||
assert(length > 5, "invalid length");
|
||||
assert(buffer[length - 1] == '\0', "Buffer should be NUL-terminated");
|
||||
|
||||
@ -433,7 +439,7 @@ void UTF8::truncate_to_legal_utf8(unsigned char* buffer, int length) {
|
||||
// then we insert NUL at that location to terminate the buffer. There is an added complexity with 6 byte
|
||||
// encodings as the first and fourth bytes are the same and overlap with the 3 byte encoding.
|
||||
|
||||
for (int index = length - 2; index > 0; index--) {
|
||||
for (size_t index = length - 2; index > 0; index--) {
|
||||
if (is_starting_byte(buffer[index])) {
|
||||
if (buffer[index] == 0xED) {
|
||||
// Could be first byte of 3 or 6, or fourth byte of 6.
|
||||
@ -441,7 +447,7 @@ void UTF8::truncate_to_legal_utf8(unsigned char* buffer, int length) {
|
||||
// surrogate value in the range EDA080 to EDAFBF. We only
|
||||
// need to check for EDA to establish this as the "missing"
|
||||
// values in EDAxxx would not be valid 3 byte encodings.
|
||||
if ((index - 3) >= 0 &&
|
||||
if (index >= 3 &&
|
||||
(buffer[index - 3] == 0xED) &&
|
||||
((buffer[index - 2] & 0xF0) == 0xA0)) {
|
||||
assert(buffer[index - 1] >= 0x80 && buffer[index - 1] <= 0xBF, "sanity check");
|
||||
@ -470,7 +476,7 @@ bool UNICODE::is_latin1(const jchar* base, int length) {
|
||||
return true;
|
||||
}
|
||||
|
||||
int UNICODE::utf8_size(jchar c) {
|
||||
size_t UNICODE::utf8_size(jchar c) {
|
||||
if ((0x0001 <= c) && (c <= 0x007F)) {
|
||||
// ASCII character
|
||||
return 1;
|
||||
@ -481,7 +487,7 @@ int UNICODE::utf8_size(jchar c) {
|
||||
}
|
||||
}
|
||||
|
||||
int UNICODE::utf8_size(jbyte c) {
|
||||
size_t UNICODE::utf8_size(jbyte c) {
|
||||
if (c >= 0x01) {
|
||||
// ASCII character. Check is equivalent to
|
||||
// (0x01 <= c) && (c <= 0x7F) because c is signed.
|
||||
@ -494,11 +500,23 @@ int UNICODE::utf8_size(jbyte c) {
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
int UNICODE::utf8_length(const T* base, int length) {
|
||||
size_t UNICODE::utf8_length(const T* base, int length) {
|
||||
size_t result = 0;
|
||||
for (int index = 0; index < length; index++) {
|
||||
result += utf8_size(base[index]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
int UNICODE::utf8_length_as_int(const T* base, int length) {
|
||||
size_t result = 0;
|
||||
for (int index = 0; index < length; index++) {
|
||||
T c = base[index];
|
||||
int sz = utf8_size(c);
|
||||
size_t sz = utf8_size(c);
|
||||
// If the length is > INT_MAX-1 we truncate at a completed
|
||||
// modified-UTF8 encoding. This allows for +1 to be added
|
||||
// by the caller for NUL-termination, without overflow.
|
||||
if (result + sz > INT_MAX-1) {
|
||||
break;
|
||||
}
|
||||
@ -508,41 +526,44 @@ int UNICODE::utf8_length(const T* base, int length) {
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
char* UNICODE::as_utf8(const T* base, int& length) {
|
||||
int utf8_len = utf8_length(base, length);
|
||||
char* UNICODE::as_utf8(const T* base, size_t& length) {
|
||||
// Incoming length must be <= INT_MAX
|
||||
size_t utf8_len = utf8_length(base, static_cast<int>(length));
|
||||
u_char* buf = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
|
||||
char* result = as_utf8(base, length, (char*) buf, utf8_len + 1);
|
||||
assert((int) strlen(result) == utf8_len, "length prediction must be correct");
|
||||
// Set string length to uft8 length
|
||||
char* result = as_utf8(base, static_cast<int>(length), (char*) buf, utf8_len + 1);
|
||||
assert(strlen(result) == utf8_len, "length prediction must be correct");
|
||||
// Set outgoing string length to uft8 length
|
||||
length = utf8_len;
|
||||
return (char*) result;
|
||||
}
|
||||
|
||||
char* UNICODE::as_utf8(const jchar* base, int length, char* buf, int buflen) {
|
||||
char* UNICODE::as_utf8(const jchar* base, int length, char* buf, size_t buflen) {
|
||||
assert(buflen > 0, "zero length output buffer");
|
||||
u_char* p = (u_char*)buf;
|
||||
for (int index = 0; index < length; index++) {
|
||||
jchar c = base[index];
|
||||
buflen -= utf8_size(c);
|
||||
if (buflen <= 0) break; // string is truncated
|
||||
size_t sz = utf8_size(c);
|
||||
if (sz >= buflen) break; // string is truncated
|
||||
buflen -= sz;
|
||||
p = utf8_write(p, c);
|
||||
}
|
||||
*p = '\0';
|
||||
return buf;
|
||||
}
|
||||
|
||||
char* UNICODE::as_utf8(const jbyte* base, int length, char* buf, int buflen) {
|
||||
char* UNICODE::as_utf8(const jbyte* base, int length, char* buf, size_t buflen) {
|
||||
assert(buflen > 0, "zero length output buffer");
|
||||
u_char* p = (u_char*)buf;
|
||||
for (int index = 0; index < length; index++) {
|
||||
jbyte c = base[index];
|
||||
int sz = utf8_size(c);
|
||||
size_t sz = utf8_size(c);
|
||||
if (sz >= buflen) break; // string is truncated
|
||||
buflen -= sz;
|
||||
if (buflen <= 0) break; // string is truncated
|
||||
if (sz == 1) {
|
||||
// Copy ASCII characters (UTF-8 is ASCII compatible)
|
||||
*p++ = c;
|
||||
} else {
|
||||
assert(sz == 2, "must be!");
|
||||
// Non-ASCII character or 0x00 which should
|
||||
// be encoded as 0xC080 in "modified" UTF8.
|
||||
p = utf8_write(p, ((jchar) c) & 0xff);
|
||||
@ -561,8 +582,8 @@ void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer)
|
||||
|
||||
// returns the quoted ascii length of a unicode string
|
||||
template<typename T>
|
||||
int UNICODE::quoted_ascii_length(const T* base, int length) {
|
||||
int result = 0;
|
||||
size_t UNICODE::quoted_ascii_length(const T* base, int length) {
|
||||
size_t result = 0;
|
||||
for (int i = 0; i < length; i++) {
|
||||
T c = base[i];
|
||||
if (c >= 32 && c < 127) {
|
||||
@ -576,7 +597,7 @@ int UNICODE::quoted_ascii_length(const T* base, int length) {
|
||||
|
||||
// converts a unicode string to quoted ascii
|
||||
template<typename T>
|
||||
void UNICODE::as_quoted_ascii(const T* base, int length, char* buf, int buflen) {
|
||||
void UNICODE::as_quoted_ascii(const T* base, int length, char* buf, size_t buflen) {
|
||||
char* p = buf;
|
||||
char* end = buf + buflen;
|
||||
for (int index = 0; index < length; index++) {
|
||||
@ -594,11 +615,13 @@ void UNICODE::as_quoted_ascii(const T* base, int length, char* buf, int buflen)
|
||||
}
|
||||
|
||||
// Explicit instantiation for all supported types.
|
||||
template int UNICODE::utf8_length(const jbyte* base, int length);
|
||||
template int UNICODE::utf8_length(const jchar* base, int length);
|
||||
template char* UNICODE::as_utf8(const jbyte* base, int& length);
|
||||
template char* UNICODE::as_utf8(const jchar* base, int& length);
|
||||
template int UNICODE::quoted_ascii_length<jbyte>(const jbyte* base, int length);
|
||||
template int UNICODE::quoted_ascii_length<jchar>(const jchar* base, int length);
|
||||
template void UNICODE::as_quoted_ascii<jbyte>(const jbyte* base, int length, char* buf, int buflen);
|
||||
template void UNICODE::as_quoted_ascii<jchar>(const jchar* base, int length, char* buf, int buflen);
|
||||
template size_t UNICODE::utf8_length(const jbyte* base, int length);
|
||||
template size_t UNICODE::utf8_length(const jchar* base, int length);
|
||||
template int UNICODE::utf8_length_as_int(const jbyte* base, int length);
|
||||
template int UNICODE::utf8_length_as_int(const jchar* base, int length);
|
||||
template char* UNICODE::as_utf8(const jbyte* base, size_t& length);
|
||||
template char* UNICODE::as_utf8(const jchar* base, size_t& length);
|
||||
template size_t UNICODE::quoted_ascii_length<jbyte>(const jbyte* base, int length);
|
||||
template size_t UNICODE::quoted_ascii_length<jchar>(const jchar* base, int length);
|
||||
template void UNICODE::as_quoted_ascii<jbyte>(const jbyte* base, int length, char* buf, size_t buflen);
|
||||
template void UNICODE::as_quoted_ascii<jchar>(const jchar* base, int length, char* buf, size_t buflen);
|
||||
|
@ -29,6 +29,45 @@
|
||||
#include "memory/allStatic.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
|
||||
/**
|
||||
|
||||
String handling within Java and the VM requires a bit of explanation.
|
||||
|
||||
Logically a java.lang.String is a sequence of 16-bit Unicode characters
|
||||
encoded in UTF-16. In the past a String contained a Java char[] and so
|
||||
could theoretically contain INT_MAX 16-bit characters. Then came JEP 254:
|
||||
Compact Strings.
|
||||
|
||||
With Compact Strings the Java char[] becomes a Java byte[], and that byte[]
|
||||
contains either latin-1 characters all of which fit in 8-bits, or else each
|
||||
pair of bytes represents a UTF-16 character. Consequently the maximum length
|
||||
in characters of a latin-1 string is INT_MAX, whilst for non-latin-1 it is INT_MAX/2.
|
||||
|
||||
In the code below if we have latin-1 content then we treat the String's data
|
||||
array as a jbyte[], else a jchar[]. The lengths of these arrays are specified
|
||||
as an int value, with a nominal maximum of INT_MAX.
|
||||
|
||||
The modified UTF-8 encoding specified for the VM, nominally encodes characters
|
||||
in 1, 2, 3 or 6 bytes. The 6-byte representation is actually two 3-byte representations
|
||||
for two UTF-16 characters forming a surrogate pair. If we are dealing with
|
||||
a latin-1 string then each character will be encoded as either 1 or 2 bytes and so the
|
||||
maximum UTF8 length is 2*INT_MAX. This can't be stored in an int so utf8 buffers must
|
||||
use a size_t length. For non-latin-1 strings each UTF-16 character will encode as either
|
||||
2 or 3 bytes, so the maximum UTF8 length in that case is 3 * INT_MAX/2 i.e. 1.5*INT_MAX.
|
||||
|
||||
The "quoted ascii" form of a unicode string is at worst 6 times longer than its
|
||||
regular form, and so these lengths must always be size_t - though if we know we only
|
||||
ever do this to symbols (or small symbol combinations) then we could use int.
|
||||
|
||||
There is an additional assumption/expectation that our UTF8 API's are never dealing with
|
||||
invalid UTF8, and more generally that all UTF8 sequences could form valid Strings.
|
||||
Consequently the Unicode length of a UTF8 sequence is assumed to always be representable
|
||||
by an int. However, there are API's, such as JNI NewStringUTF, that do deal with such input
|
||||
and could potentially have an unrepresentable string. The long standing position with JNI
|
||||
is that the user must supply valid input so we do not try to account for these cases.
|
||||
|
||||
*/
|
||||
|
||||
// Low-level interface for UTF8 strings
|
||||
|
||||
class UTF8 : AllStatic {
|
||||
@ -41,20 +80,20 @@ class UTF8 : AllStatic {
|
||||
static int unicode_length(const char* utf8_str, bool& is_latin1, bool& has_multibyte);
|
||||
|
||||
// returns the unicode length of a non-0-terminated utf8 string
|
||||
static int unicode_length(const char* utf8_str, int len) {
|
||||
static int unicode_length(const char* utf8_str, size_t len) {
|
||||
bool is_latin1, has_multibyte;
|
||||
return unicode_length(utf8_str, len, is_latin1, has_multibyte);
|
||||
}
|
||||
static int unicode_length(const char* utf8_str, int len, bool& is_latin1, bool& has_multibyte);
|
||||
static int unicode_length(const char* utf8_str, size_t len, bool& is_latin1, bool& has_multibyte);
|
||||
|
||||
// converts a utf8 string to a unicode string
|
||||
template<typename T> static void convert_to_unicode(const char* utf8_str, T* unicode_str, int unicode_length);
|
||||
|
||||
// returns the quoted ascii length of a utf8 string
|
||||
static int quoted_ascii_length(const char* utf8_str, int utf8_length);
|
||||
static size_t quoted_ascii_length(const char* utf8_str, size_t utf8_length);
|
||||
|
||||
// converts a utf8 string to quoted ascii
|
||||
static void as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen);
|
||||
static void as_quoted_ascii(const char* utf8_str, size_t utf8_length, char* buf, size_t buflen);
|
||||
|
||||
#ifndef PRODUCT
|
||||
// converts a quoted ascii string to utf8 string. returns the original
|
||||
@ -82,13 +121,13 @@ class UTF8 : AllStatic {
|
||||
while(--length >= 0 && base[length] != c);
|
||||
return (length < 0) ? nullptr : &base[length];
|
||||
}
|
||||
static bool equal(const jbyte* base1, int length1, const jbyte* base2,int length2);
|
||||
static bool equal(const jbyte* base1, int length1, const jbyte* base2, int length2);
|
||||
static bool is_supplementary_character(const unsigned char* str);
|
||||
static jint get_supplementary_character(const unsigned char* str);
|
||||
|
||||
static bool is_legal_utf8(const unsigned char* buffer, int length,
|
||||
static bool is_legal_utf8(const unsigned char* buffer, size_t length,
|
||||
bool version_leq_47);
|
||||
static void truncate_to_legal_utf8(unsigned char* buffer, int length);
|
||||
static void truncate_to_legal_utf8(unsigned char* buffer, size_t length);
|
||||
};
|
||||
|
||||
|
||||
@ -99,6 +138,12 @@ class UTF8 : AllStatic {
|
||||
// units, so a supplementary character uses two positions in a unicode string.
|
||||
|
||||
class UNICODE : AllStatic {
|
||||
|
||||
// returns the utf8 size of a unicode character
|
||||
// uses size_t for convenience in overflow checks
|
||||
static size_t utf8_size(jchar c);
|
||||
static size_t utf8_size(jbyte c);
|
||||
|
||||
public:
|
||||
// checks if the given unicode character can be encoded as latin1
|
||||
static bool is_latin1(jchar c);
|
||||
@ -106,28 +151,27 @@ class UNICODE : AllStatic {
|
||||
// checks if the given string can be encoded as latin1
|
||||
static bool is_latin1(const jchar* base, int length);
|
||||
|
||||
// returns the utf8 size of a unicode character
|
||||
static int utf8_size(jchar c);
|
||||
static int utf8_size(jbyte c);
|
||||
|
||||
// returns the utf8 length of a unicode string
|
||||
template<typename T> static int utf8_length(const T* base, int length);
|
||||
template<typename T> static size_t utf8_length(const T* base, int length);
|
||||
|
||||
// returns the utf8 length of a unicode string as an int - truncated if needed
|
||||
template<typename T> static int utf8_length_as_int(const T* base, int length);
|
||||
|
||||
// converts a unicode string to utf8 string
|
||||
static void convert_to_utf8(const jchar* base, int length, char* utf8_buffer);
|
||||
|
||||
// converts a unicode string to a utf8 string; result is allocated
|
||||
// in resource area unless a buffer is provided. The unicode 'length'
|
||||
// parameter is set to the length of the result utf8 string.
|
||||
template<typename T> static char* as_utf8(const T* base, int& length);
|
||||
static char* as_utf8(const jchar* base, int length, char* buf, int buflen);
|
||||
static char* as_utf8(const jbyte* base, int length, char* buf, int buflen);
|
||||
// parameter is set to the length of the resulting utf8 string.
|
||||
template<typename T> static char* as_utf8(const T* base, size_t& length);
|
||||
static char* as_utf8(const jchar* base, int length, char* buf, size_t buflen);
|
||||
static char* as_utf8(const jbyte* base, int length, char* buf, size_t buflen);
|
||||
|
||||
// returns the quoted ascii length of a unicode string
|
||||
template<typename T> static int quoted_ascii_length(const T* base, int length);
|
||||
template<typename T> static size_t quoted_ascii_length(const T* base, int length);
|
||||
|
||||
// converts a unicode string to quoted ascii
|
||||
template<typename T> static void as_quoted_ascii(const T* base, int length, char* buf, int buflen);
|
||||
template<typename T> static void as_quoted_ascii(const T* base, int length, char* buf, size_t buflen);
|
||||
};
|
||||
|
||||
#endif // SHARE_UTILITIES_UTF8_HPP
|
||||
|
Loading…
Reference in New Issue
Block a user