8338257: UTF8 lengths should be size_t not int

Reviewed-by: stuefe, coleenp, dlong
2024-08-29 20:38:52 +00:00 · 2024-08-29 20:38:52 +00:00 · a4962ace4d
commit a4962ace4d
parent 777ed2b5d2
16 changed files with 229 additions and 122 deletions
--- a/src/hotspot/share/classfile/compactHashtable.cpp
+++ b/src/hotspot/share/classfile/compactHashtable.cpp
@ -431,10 +431,10 @@ void HashtableTextDump::get_utf8(char* utf8_buffer, int utf8_length) {
 }
 // NOTE: the content is NOT the same as
-// UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen).
+// UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, size_t buflen).
 // We want to escape \r\n\t so that output [1] is more readable; [2] can be more easily
 // parsed by scripts; [3] quickly processed by HashtableTextDump::get_utf8()
-void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, int utf8_length) {
+void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, size_t utf8_length) {
  const char *c = utf8_string;
  const char *end = c + utf8_length;
  for (; c < end; c++) {
--- a/src/hotspot/share/classfile/compactHashtable.hpp
+++ b/src/hotspot/share/classfile/compactHashtable.hpp
@ -431,7 +431,7 @@ public:
  int unescape(const char* from, const char* end, int count);
  void get_utf8(char* utf8_buffer, int utf8_length);
-  static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length);
+  static void put_utf8(outputStream* st, const char* utf8_string, size_t utf8_length);
 };
 #endif // SHARE_CLASSFILE_COMPACTHASHTABLE_HPP
--- a/src/hotspot/share/classfile/javaClasses.cpp
+++ b/src/hotspot/share/classfile/javaClasses.cpp
@ -304,7 +304,8 @@ Handle java_lang_String::create_from_unicode(const jchar* unicode, int length, T
 #ifdef ASSERT
  {
    ResourceMark rm;
-    char* expected = UNICODE::as_utf8(unicode, length);
+    size_t utf8_len = static_cast<size_t>(length);
    char* expected = UNICODE::as_utf8(unicode, utf8_len);
    char* actual = as_utf8_string(h_obj());
    if (strcmp(expected, actual) != 0) {
      fatal("Unicode conversion failure: %s --> %s", expected, actual);
@ -346,7 +347,7 @@ Handle java_lang_String::create_from_str(const char* utf8_str, TRAPS) {
 #ifdef ASSERT
  // This check is too strict when the input string is not a valid UTF8.
  // For example, it may be created with arbitrary content via jni_NewStringUTF.
-  if (UTF8::is_legal_utf8((const unsigned char*)utf8_str, (int)strlen(utf8_str), false)) {
+  if (UTF8::is_legal_utf8((const unsigned char*)utf8_str, strlen(utf8_str), false)) {
    ResourceMark rm;
    const char* expected = utf8_str;
    char* actual = as_utf8_string(h_obj());
@ -554,7 +555,7 @@ char* java_lang_String::as_quoted_ascii(oop java_string) {
  if (length == 0) return nullptr;
  char* result;
-  int result_length;
+  size_t result_length;
  if (!is_latin1) {
    jchar* base = value->char_at_addr(0);
    result_length = UNICODE::quoted_ascii_length(base, length) + 1;
@ -566,8 +567,8 @@ char* java_lang_String::as_quoted_ascii(oop java_string) {
    result = NEW_RESOURCE_ARRAY(char, result_length);
    UNICODE::as_quoted_ascii(base, length, result, result_length);
  }
-  assert(result_length >= length + 1, "must not be shorter");
+  assert(result_length >= (size_t)length + 1, "must not be shorter");
-  assert(result_length == (int)strlen(result) + 1, "must match");
+  assert(result_length == strlen(result) + 1, "must match");
  return result;
 }
@ -582,8 +583,9 @@ Symbol* java_lang_String::as_symbol(oop java_string) {
  } else {
    ResourceMark rm;
    jbyte* position = (length == 0) ? nullptr : value->byte_at_addr(0);
-    const char* base = UNICODE::as_utf8(position, length);
+    size_t utf8_len = static_cast<size_t>(length);
-    Symbol* sym = SymbolTable::new_symbol(base, length);
+    const char* base = UNICODE::as_utf8(position, utf8_len);
    Symbol* sym = SymbolTable::new_symbol(base, checked_cast<int>(utf8_len));
    return sym;
  }
 }
@ -598,12 +600,13 @@ Symbol* java_lang_String::as_symbol_or_null(oop java_string) {
  } else {
    ResourceMark rm;
    jbyte* position = (length == 0) ? nullptr : value->byte_at_addr(0);
-    const char* base = UNICODE::as_utf8(position, length);
+    size_t utf8_len = static_cast<size_t>(length);
-    return SymbolTable::probe(base, length);
+    const char* base = UNICODE::as_utf8(position, utf8_len);
    return SymbolTable::probe(base, checked_cast<int>(utf8_len));
  }
 }
-int java_lang_String::utf8_length(oop java_string, typeArrayOop value) {
+size_t java_lang_String::utf8_length(oop java_string, typeArrayOop value) {
  assert(value_equals(value, java_lang_String::value(java_string)),
         "value must be same as java_lang_String::value(java_string)");
  int length = java_lang_String::length(java_string, value);
@ -617,18 +620,39 @@ int java_lang_String::utf8_length(oop java_string, typeArrayOop value) {
  }
 }
-int java_lang_String::utf8_length(oop java_string) {
+size_t java_lang_String::utf8_length(oop java_string) {
  typeArrayOop value = java_lang_String::value(java_string);
  return utf8_length(java_string, value);
 }
 int java_lang_String::utf8_length_as_int(oop java_string) {
  typeArrayOop value = java_lang_String::value(java_string);
  return utf8_length_as_int(java_string, value);
 }
 int java_lang_String::utf8_length_as_int(oop java_string, typeArrayOop value) {
  assert(value_equals(value, java_lang_String::value(java_string)),
         "value must be same as java_lang_String::value(java_string)");
  int length = java_lang_String::length(java_string, value);
  if (length == 0) {
    return 0;
  }
  if (!java_lang_String::is_latin1(java_string)) {
    return UNICODE::utf8_length_as_int(value->char_at_addr(0), length);
  } else {
    return UNICODE::utf8_length_as_int(value->byte_at_addr(0), length);
  }
 }
 char* java_lang_String::as_utf8_string(oop java_string) {
-  int length;
+  size_t length;
  return as_utf8_string(java_string, length);
 }
-char* java_lang_String::as_utf8_string(oop java_string, int& length) {
+char* java_lang_String::as_utf8_string(oop java_string, size_t& length) {
  typeArrayOop value = java_lang_String::value(java_string);
  // `length` is used as the incoming number of characters to
  // convert, and then set as the number of bytes in the UTF8 sequence.
  length             = java_lang_String::length(java_string, value);
  bool     is_latin1 = java_lang_String::is_latin1(java_string);
  if (!is_latin1) {
@ -642,7 +666,7 @@ char* java_lang_String::as_utf8_string(oop java_string, int& length) {
 // Uses a provided buffer if it's sufficiently large, otherwise allocates
 // a resource array to fit
-char* java_lang_String::as_utf8_string_full(oop java_string, char* buf, int buflen, int& utf8_len) {
+char* java_lang_String::as_utf8_string_full(oop java_string, char* buf, size_t buflen, size_t& utf8_len) {
  typeArrayOop value = java_lang_String::value(java_string);
  int            len = java_lang_String::length(java_string, value);
  bool     is_latin1 = java_lang_String::is_latin1(java_string);
@ -663,7 +687,7 @@ char* java_lang_String::as_utf8_string_full(oop java_string, char* buf, int bufl
  }
 }
-char* java_lang_String::as_utf8_string(oop java_string, typeArrayOop value, char* buf, int buflen) {
+char* java_lang_String::as_utf8_string(oop java_string, typeArrayOop value, char* buf, size_t buflen) {
  assert(value_equals(value, java_lang_String::value(java_string)),
         "value must be same as java_lang_String::value(java_string)");
  int     length = java_lang_String::length(java_string, value);
@ -677,25 +701,28 @@ char* java_lang_String::as_utf8_string(oop java_string, typeArrayOop value, char
  }
 }
-char* java_lang_String::as_utf8_string(oop java_string, char* buf, int buflen) {
+char* java_lang_String::as_utf8_string(oop java_string, char* buf, size_t buflen) {
  typeArrayOop value = java_lang_String::value(java_string);
  return as_utf8_string(java_string, value, buf, buflen);
 }
 char* java_lang_String::as_utf8_string(oop java_string, int start, int len) {
  // `length` is used as the incoming number of characters to
  // convert, and then set as the number of bytes in the UTF8 sequence.
  size_t  length = static_cast<size_t>(len);
  typeArrayOop value  = java_lang_String::value(java_string);
  bool      is_latin1 = java_lang_String::is_latin1(java_string);
  assert(start + len <= java_lang_String::length(java_string), "just checking");
  if (!is_latin1) {
    jchar* position = value->char_at_addr(start);
-    return UNICODE::as_utf8(position, len);
+    return UNICODE::as_utf8(position, length);
  } else {
    jbyte* position = value->byte_at_addr(start);
-    return UNICODE::as_utf8(position, len);
+    return UNICODE::as_utf8(position, length);
  }
 }
-char* java_lang_String::as_utf8_string(oop java_string, typeArrayOop value, int start, int len, char* buf, int buflen) {
+char* java_lang_String::as_utf8_string(oop java_string, typeArrayOop value, int start, int len, char* buf, size_t buflen) {
  assert(value_equals(value, java_lang_String::value(java_string)),
         "value must be same as java_lang_String::value(java_string)");
  assert(start + len <= java_lang_String::length(java_string), "just checking");
--- a/src/hotspot/share/classfile/javaClasses.hpp
+++ b/src/hotspot/share/classfile/javaClasses.hpp
@ -131,17 +131,21 @@ class java_lang_String : AllStatic {
  static inline bool deduplication_requested(oop java_string);
  static inline int length(oop java_string);
  static inline int length(oop java_string, typeArrayOop string_value);
-  static int utf8_length(oop java_string);
+  static size_t utf8_length(oop java_string);
-  static int utf8_length(oop java_string, typeArrayOop string_value);
+  static size_t utf8_length(oop java_string, typeArrayOop string_value);
  // Legacy variants that truncate the length if needed
  static int    utf8_length_as_int(oop java_string);
  static int    utf8_length_as_int(oop java_string, typeArrayOop string_value);
  // String converters
  static char*  as_utf8_string(oop java_string);
-  static char*  as_utf8_string(oop java_string, int& length);
+  // `length` is set to the length of the utf8 sequence.
-  static char*  as_utf8_string_full(oop java_string, char* buf, int buflen, int& length);
+  static char*  as_utf8_string(oop java_string, size_t& length);
-  static char*  as_utf8_string(oop java_string, char* buf, int buflen);
+  static char*  as_utf8_string_full(oop java_string, char* buf, size_t buflen, size_t& length);
  static char*  as_utf8_string(oop java_string, char* buf, size_t buflen);
  static char*  as_utf8_string(oop java_string, int start, int len);
-  static char*  as_utf8_string(oop java_string, typeArrayOop value, char* buf, int buflen);
+  static char*  as_utf8_string(oop java_string, typeArrayOop value, char* buf, size_t buflen);
-  static char*  as_utf8_string(oop java_string, typeArrayOop value, int start, int len, char* buf, int buflen);
+  static char*  as_utf8_string(oop java_string, typeArrayOop value, int start, int len, char* buf, size_t buflen);
  static char*  as_platform_dependent_str(Handle java_string, TRAPS);
  static jchar* as_unicode_string(oop java_string, int& length, TRAPS);
  static jchar* as_unicode_string_or_null(oop java_string, int& length);
--- a/src/hotspot/share/classfile/modules.cpp
+++ b/src/hotspot/share/classfile/modules.cpp
@ -72,7 +72,9 @@ static char* get_module_name(oop module, int& len, TRAPS) {
  if (name_oop == nullptr) {
    THROW_MSG_NULL(vmSymbols::java_lang_NullPointerException(), "Null module name");
  }
-  char* module_name = java_lang_String::as_utf8_string(name_oop, len);
+  size_t utf8_len;
  char* module_name = java_lang_String::as_utf8_string(name_oop, utf8_len);
  len = checked_cast<int>(utf8_len); // module names are < 64K
  if (!verify_module_name(module_name, len)) {
    THROW_MSG_NULL(vmSymbols::java_lang_IllegalArgumentException(),
                   err_msg("Invalid module name: %s", module_name));
@ -84,9 +86,9 @@ static Symbol* as_symbol(jstring str_object) {
  if (str_object == nullptr) {
    return nullptr;
  }
-  int len;
+  size_t len;
  char* str = java_lang_String::as_utf8_string(JNIHandles::resolve_non_null(str_object), len);
-  return SymbolTable::new_symbol(str, len);
+  return SymbolTable::new_symbol(str, checked_cast<int>(len));
 }
 ModuleEntryTable* Modules::get_module_entry_table(Handle h_loader) {
@ -142,8 +144,10 @@ bool Modules::is_package_defined(Symbol* package, Handle h_loader) {
 // Will use the provided buffer if it's sufficiently large, otherwise allocates
 // a resource array
 // The length of the resulting string will be assigned to utf8_len
-static const char* as_internal_package(oop package_string, char* buf, int buflen, int& utf8_len) {
+static const char* as_internal_package(oop package_string, char* buf, size_t buflen, int& utf8_len) {
-  char* package_name = java_lang_String::as_utf8_string_full(package_string, buf, buflen, utf8_len);
+  size_t full_utf8_len;
  char* package_name = java_lang_String::as_utf8_string_full(package_string, buf, buflen, full_utf8_len);
  utf8_len = checked_cast<int>(full_utf8_len); // package names are < 64K
  // Turn all '/'s into '.'s
  for (int index = 0; index < utf8_len; index++) {
--- a/src/hotspot/share/classfile/stringTable.cpp
+++ b/src/hotspot/share/classfile/stringTable.cpp
@ -686,7 +686,7 @@ static void print_string(Thread* current, outputStream* st, oop s) {
    st->print("%d: ", length);
  } else {
    ResourceMark rm(current);
-    int utf8_length = length;
+    size_t utf8_length = length;
    char* utf8_string;
    if (!is_latin1) {
@ -697,7 +697,7 @@ static void print_string(Thread* current, outputStream* st, oop s) {
      utf8_string = UNICODE::as_utf8(bytes, utf8_length);
    }
-    st->print("%d: ", utf8_length);
+    st->print("%zu: ", utf8_length);
    HashtableTextDump::put_utf8(st, utf8_string, utf8_length);
  }
  st->cr();
--- a/src/hotspot/share/classfile/symbolTable.cpp
+++ b/src/hotspot/share/classfile/symbolTable.cpp
@ -349,6 +349,7 @@ Symbol* SymbolTable::lookup_common(const char* name,
 // to be used for arbitrary strings. For debug builds we will assert if
 // a string is too long, whereas product builds will truncate it.
 static int check_length(const char* name, int len) {
  assert(len >= 0, "negative length %d suggests integer overflow in the caller", len);
  assert(len <= Symbol::max_length(),
         "String length %d exceeds the maximum Symbol length of %d", len, Symbol::max_length());
  if (len > Symbol::max_length()) {
@ -461,33 +462,33 @@ Symbol* SymbolTable::lookup_only(const char* name, int len, unsigned int& hash)
 // and probing logic, so there is no need for convert_to_utf8 until
 // an actual new Symbol* is created.
 Symbol* SymbolTable::new_symbol(const jchar* name, int utf16_length) {
-  int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
+  size_t utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
  char stack_buf[ON_STACK_BUFFER_LENGTH];
-  if (utf8_length < (int) sizeof(stack_buf)) {
+  if (utf8_length < sizeof(stack_buf)) {
    char* chars = stack_buf;
    UNICODE::convert_to_utf8(name, utf16_length, chars);
-    return new_symbol(chars, utf8_length);
+    return new_symbol(chars, checked_cast<int>(utf8_length));
  } else {
    ResourceMark rm;
    char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);
    UNICODE::convert_to_utf8(name, utf16_length, chars);
-    return new_symbol(chars, utf8_length);
+    return new_symbol(chars, checked_cast<int>(utf8_length));
  }
 }
 Symbol* SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length,
                                         unsigned int& hash) {
-  int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
+  size_t utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
  char stack_buf[ON_STACK_BUFFER_LENGTH];
-  if (utf8_length < (int) sizeof(stack_buf)) {
+  if (utf8_length < sizeof(stack_buf)) {
    char* chars = stack_buf;
    UNICODE::convert_to_utf8(name, utf16_length, chars);
-    return lookup_only(chars, utf8_length, hash);
+    return lookup_only(chars, checked_cast<int>(utf8_length), hash);
  } else {
    ResourceMark rm;
    char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);
    UNICODE::convert_to_utf8(name, utf16_length, chars);
-    return lookup_only(chars, utf8_length, hash);
+    return lookup_only(chars, checked_cast<int>(utf8_length), hash);
  }
 }
--- a/src/hotspot/share/jfr/dcmd/jfrDcmds.cpp
+++ b/src/hotspot/share/jfr/dcmd/jfrDcmds.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -292,10 +292,10 @@ static const char* get_as_dcmd_arena_string(oop string) {
  char* str = nullptr;
  const typeArrayOop value = java_lang_String::value(string);
  if (value != nullptr) {
-    const size_t length = static_cast<size_t>(java_lang_String::utf8_length(string, value)) + 1;
+    const size_t length = java_lang_String::utf8_length(string, value) + 1;
    str = dcmd_arena_allocate(length);
    assert(str != nullptr, "invariant");
-    java_lang_String::as_utf8_string(string, value, str, static_cast<int>(length));
+    java_lang_String::as_utf8_string(string, value, str, length);
  }
  return str;
 }
--- a/src/hotspot/share/jfr/jni/jfrJavaSupport.cpp
+++ b/src/hotspot/share/jfr/jni/jfrJavaSupport.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -502,7 +502,7 @@ Klass* JfrJavaSupport::klass(const jobject handle) {
  return obj->klass();
 }
-static char* allocate_string(bool c_heap, int length, Thread* thread) {
+static char* allocate_string(bool c_heap, size_t length, Thread* thread) {
  return c_heap ? NEW_C_HEAP_ARRAY(char, length, mtTracing) :
                  NEW_RESOURCE_ARRAY_IN_THREAD(thread, char, length);
 }
@ -511,7 +511,7 @@ const char* JfrJavaSupport::c_str(oop string, Thread* thread, bool c_heap /* fal
  char* str = nullptr;
  const typeArrayOop value = java_lang_String::value(string);
  if (value != nullptr) {
-    const int length = java_lang_String::utf8_length(string, value);
+    const size_t length = java_lang_String::utf8_length(string, value);
    str = allocate_string(c_heap, length + 1, thread);
    if (str == nullptr) {
      return nullptr;
--- a/src/hotspot/share/jfr/recorder/checkpoint/types/jfrThreadState.cpp
+++ b/src/hotspot/share/jfr/recorder/checkpoint/types/jfrThreadState.cpp
@ -1,5 +1,5 @@
 /*
-* Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
+* Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -121,7 +121,10 @@ static const char* get_java_thread_name(const JavaThread* jt, int& length, oop v
  }
  assert(thread_obj != nullptr, "invariant");
  const oop name = java_lang_Thread::name(thread_obj);
-  return name != nullptr ? java_lang_String::as_utf8_string(name, length) : nullptr;
+  size_t utf8_len;
  const char* ret = name != nullptr ? java_lang_String::as_utf8_string(name, utf8_len) : nullptr;
  length = checked_cast<int>(utf8_len); // Thread names should be short
  return ret;
 }
 const char* JfrThreadName::name(const Thread* t, int& length, oop vthread) {
--- a/src/hotspot/share/oops/symbol.cpp
+++ b/src/hotspot/share/oops/symbol.cpp
@ -166,7 +166,7 @@ void Symbol::print_symbol_on(outputStream* st) const {
 char* Symbol::as_quoted_ascii() const {
  const char *ptr = (const char *)&_body[0];
-  int quoted_length = UTF8::quoted_ascii_length(ptr, utf8_length());
+  size_t quoted_length = UTF8::quoted_ascii_length(ptr, utf8_length());
  char* result = NEW_RESOURCE_ARRAY(char, quoted_length + 1);
  UTF8::as_quoted_ascii(ptr, utf8_length(), result, quoted_length + 1);
  return result;
--- a/src/hotspot/share/prims/jni.cpp
+++ b/src/hotspot/share/prims/jni.cpp
@ -2223,7 +2223,7 @@ JNI_END
 JNI_ENTRY(jsize, jni_GetStringUTFLength(JNIEnv *env, jstring string))
 HOTSPOT_JNI_GETSTRINGUTFLENGTH_ENTRY(env, string);
  oop java_string = JNIHandles::resolve_non_null(string);
-  jsize ret = java_lang_String::utf8_length(java_string);
+  jsize ret = java_lang_String::utf8_length_as_int(java_string);
  HOTSPOT_JNI_GETSTRINGUTFLENGTH_RETURN(ret);
  return ret;
 JNI_END
@ -2236,10 +2236,11 @@ JNI_ENTRY(const char*, jni_GetStringUTFChars(JNIEnv *env, jstring string, jboole
  typeArrayOop s_value = java_lang_String::value(java_string);
  if (s_value != nullptr) {
    size_t length = java_lang_String::utf8_length(java_string, s_value);
-    /* JNI Specification states return null on OOM */
+    // JNI Specification states return null on OOM.
    // The resulting sequence doesn't have to be NUL-terminated but we do.
    result = AllocateHeap(length + 1, mtInternal, AllocFailStrategy::RETURN_NULL);
    if (result != nullptr) {
-      java_lang_String::as_utf8_string(java_string, s_value, result, (int) length + 1);
+      java_lang_String::as_utf8_string(java_string, s_value, result, length + 1);
      if (isCopy != nullptr) {
        *isCopy = JNI_TRUE;
      }
--- a/src/hotspot/share/prims/jvmtiEnv.cpp
+++ b/src/hotspot/share/prims/jvmtiEnv.cpp
@ -1321,7 +1321,7 @@ JvmtiEnv::GetThreadInfo(jthread thread, jvmtiThreadInfo* info_ptr) {
    if (name() != nullptr) {
      n = java_lang_String::as_utf8_string(name());
    } else {
-      int utf8_length = 0;
+      size_t utf8_length = 0;
      n = UNICODE::as_utf8((jchar*) nullptr, utf8_length);
    }
--- a/src/hotspot/share/services/finalizerService.cpp
+++ b/src/hotspot/share/services/finalizerService.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -45,7 +45,7 @@ static const char* allocate(oop string) {
  char* str = nullptr;
  const typeArrayOop value = java_lang_String::value(string);
  if (value != nullptr) {
-    const int length = java_lang_String::utf8_length(string, value);
+    const size_t length = java_lang_String::utf8_length(string, value);
    str = NEW_C_HEAP_ARRAY(char, length + 1, mtServiceability);
    java_lang_String::as_utf8_string(string, value, str, length + 1);
  }
--- a/src/hotspot/share/utilities/utf8.cpp
+++ b/src/hotspot/share/utilities/utf8.cpp
@ -98,15 +98,21 @@ char* UTF8::next_character(const char* str, jint* value) {
  return next_ch;
 }
-// Count bytes of the form 10xxxxxx and deduct this count
+// The number of unicode characters in a utf8 sequence can be easily
 // determined by noting that bytes of the form 10xxxxxx are part of
 // a 2 or 3-byte multi-byte sequence, all others are either characters
 // themselves or else the start of a multi-byte character.
 // Calculate the unicode length of a utf8 string of known size
 // by counting bytes of the form 10xxxxxx and deducting this count
 // from the total byte count.  The utf8 string must be in
 // legal form which has been verified in the format checker.
-int UTF8::unicode_length(const char* str, int len, bool& is_latin1, bool& has_multibyte) {
+int UTF8::unicode_length(const char* str, size_t len, bool& is_latin1, bool& has_multibyte) {
-  int num_chars = len;
+  size_t num_chars = len;
  has_multibyte = false;
  is_latin1 = true;
  unsigned char prev = 0;
-  for (int i = 0; i < len; i++) {
+  for (size_t i = 0; i < len; i++) {
    unsigned char c = str[i];
    if ((c & 0xC0) == 0x80) {
      // Multibyte, check if valid latin1 character.
@ -118,12 +124,12 @@ int UTF8::unicode_length(const char* str, int len, bool& is_latin1, bool& has_mu
    }
    prev = c;
  }
-  return num_chars;
+  return checked_cast<int>(num_chars);
 }
-// Count bytes of the utf8 string except those in form
+// Calculate the unicode length of a nul-terminated utf8 string
-// 10xxxxxx which only appear in multibyte characters.
+// by counting bytes of the utf8 string except those in the form
-// The utf8 string must be in legal form and has been
+// 10xxxxxx. The utf8 string must be in legal form and has been
 // verified in the format checker.
 int UTF8::unicode_length(const char* str, bool& is_latin1, bool& has_multibyte) {
  int num_chars = 0;
@ -195,10 +201,10 @@ template void UTF8::convert_to_unicode<jchar>(const char* utf8_str, jchar* unico
 template void UTF8::convert_to_unicode<jbyte>(const char* utf8_str, jbyte* unicode_str, int unicode_length);
 // returns the quoted ascii length of a 0-terminated utf8 string
-int UTF8::quoted_ascii_length(const char* utf8_str, int utf8_length) {
+size_t UTF8::quoted_ascii_length(const char* utf8_str, size_t utf8_length) {
  const char *ptr = utf8_str;
  const char* end = ptr + utf8_length;
-  int result = 0;
+  size_t result = 0;
  while (ptr < end) {
    jchar c;
    ptr = UTF8::next(ptr, &c);
@ -212,7 +218,7 @@ int UTF8::quoted_ascii_length(const char* utf8_str, int utf8_length) {
 }
 // converts a utf8 string to quoted ascii
-void UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen) {
+void UTF8::as_quoted_ascii(const char* utf8_str, size_t utf8_length, char* buf, size_t buflen) {
  const char *ptr = utf8_str;
  const char *utf8_end = ptr + utf8_length;
  char* p = buf;
@ -248,7 +254,7 @@ const char* UTF8::from_quoted_ascii(const char* quoted_ascii_str) {
    return quoted_ascii_str;
  }
  // everything up to this point was ok.
-  int length = ptr - quoted_ascii_str;
+  size_t length = ptr - quoted_ascii_str;
  char* buffer = nullptr;
  for (int round = 0; round < 2; round++) {
    while (*ptr != '\0') {
@ -330,11 +336,11 @@ jint UTF8::get_supplementary_character(const unsigned char* str) {
                 + ((str[4] & 0x0f) << 6)  + (str[5] & 0x3f);
 }
-bool UTF8::is_legal_utf8(const unsigned char* buffer, int length,
+bool UTF8::is_legal_utf8(const unsigned char* buffer, size_t length,
                         bool version_leq_47) {
-  int i = 0;
+  size_t i = 0;
-  int count = length >> 2;
+  size_t count = length >> 2;
-  for (int k=0; k<count; k++) {
+  for (size_t k = 0; k < count; k++) {
    unsigned char b0 = buffer[i];
    unsigned char b1 = buffer[i+1];
    unsigned char b2 = buffer[i+2];
@ -405,7 +411,7 @@ static bool is_starting_byte(unsigned char b) {
 // To avoid that the caller can choose to check for validity first.
 // The incoming buffer is still expected to be NUL-terminated.
 // The incoming buffer is expected to be a realistic size - we assert if it is too small.
-void UTF8::truncate_to_legal_utf8(unsigned char* buffer, int length) {
+void UTF8::truncate_to_legal_utf8(unsigned char* buffer, size_t length) {
  assert(length > 5, "invalid length");
  assert(buffer[length - 1] == '\0', "Buffer should be NUL-terminated");
@ -433,7 +439,7 @@ void UTF8::truncate_to_legal_utf8(unsigned char* buffer, int length) {
  // then we insert NUL at that location to terminate the buffer. There is an added complexity with 6 byte
  // encodings as the first and fourth bytes are the same and overlap with the 3 byte encoding.
-  for (int index = length - 2; index > 0; index--) {
+  for (size_t index = length - 2; index > 0; index--) {
    if (is_starting_byte(buffer[index])) {
      if (buffer[index] == 0xED) {
        // Could be first byte of 3 or 6, or fourth byte of 6.
@ -441,7 +447,7 @@ void UTF8::truncate_to_legal_utf8(unsigned char* buffer, int length) {
        // surrogate value in the range EDA080 to EDAFBF. We only
        // need to check for EDA to establish this as the "missing"
        // values in EDAxxx would not be valid 3 byte encodings.
-        if ((index - 3) >= 0 &&
+        if (index >= 3 &&
            (buffer[index - 3] == 0xED) &&
            ((buffer[index - 2] & 0xF0) == 0xA0)) {
          assert(buffer[index - 1] >= 0x80 && buffer[index - 1] <= 0xBF, "sanity check");
@ -470,7 +476,7 @@ bool UNICODE::is_latin1(const jchar* base, int length) {
  return true;
 }
-int UNICODE::utf8_size(jchar c) {
+size_t UNICODE::utf8_size(jchar c) {
  if ((0x0001 <= c) && (c <= 0x007F)) {
    // ASCII character
    return 1;
@ -481,7 +487,7 @@ int UNICODE::utf8_size(jchar c) {
  }
 }
-int UNICODE::utf8_size(jbyte c) {
+size_t UNICODE::utf8_size(jbyte c) {
  if (c >= 0x01) {
    // ASCII character. Check is equivalent to
    // (0x01 <= c) && (c <= 0x7F) because c is signed.
@ -494,11 +500,23 @@ int UNICODE::utf8_size(jbyte c) {
 }
 template<typename T>
-int UNICODE::utf8_length(const T* base, int length) {
+size_t UNICODE::utf8_length(const T* base, int length) {
  size_t result = 0;
  for (int index = 0; index < length; index++) {
    result += utf8_size(base[index]);
  }
  return result;
 }
 template<typename T>
 int UNICODE::utf8_length_as_int(const T* base, int length) {
  size_t result = 0;
  for (int index = 0; index < length; index++) {
    T c = base[index];
-    int sz = utf8_size(c);
+    size_t sz = utf8_size(c);
    // If the length is > INT_MAX-1 we truncate at a completed
    // modified-UTF8 encoding. This allows for +1 to be added
    // by the caller for NUL-termination, without overflow.
    if (result + sz > INT_MAX-1) {
      break;
    }
@ -508,41 +526,44 @@ int UNICODE::utf8_length(const T* base, int length) {
 }
 template<typename T>
-char* UNICODE::as_utf8(const T* base, int& length) {
+char* UNICODE::as_utf8(const T* base, size_t& length) {
-  int utf8_len = utf8_length(base, length);
+  // Incoming length must be <= INT_MAX
  size_t utf8_len = utf8_length(base, static_cast<int>(length));
  u_char* buf = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
-  char* result = as_utf8(base, length, (char*) buf, utf8_len + 1);
+  char* result = as_utf8(base, static_cast<int>(length), (char*) buf, utf8_len + 1);
-  assert((int) strlen(result) == utf8_len, "length prediction must be correct");
+  assert(strlen(result) == utf8_len, "length prediction must be correct");
-  // Set string length to uft8 length
+  // Set outgoing string length to uft8 length
  length = utf8_len;
  return (char*) result;
 }
-char* UNICODE::as_utf8(const jchar* base, int length, char* buf, int buflen) {
+char* UNICODE::as_utf8(const jchar* base, int length, char* buf, size_t buflen) {
  assert(buflen > 0, "zero length output buffer");
  u_char* p = (u_char*)buf;
  for (int index = 0; index < length; index++) {
    jchar c = base[index];
-    buflen -= utf8_size(c);
+    size_t sz = utf8_size(c);
-    if (buflen <= 0) break; // string is truncated
+    if (sz >= buflen) break; // string is truncated
    buflen -= sz;
    p = utf8_write(p, c);
  }
  *p = '\0';
  return buf;
 }
-char* UNICODE::as_utf8(const jbyte* base, int length, char* buf, int buflen) {
+char* UNICODE::as_utf8(const jbyte* base, int length, char* buf, size_t buflen) {
  assert(buflen > 0, "zero length output buffer");
  u_char* p = (u_char*)buf;
  for (int index = 0; index < length; index++) {
    jbyte c = base[index];
-    int sz = utf8_size(c);
+    size_t sz = utf8_size(c);
    if (sz >= buflen) break; // string is truncated
    buflen -= sz;
    if (buflen <= 0) break; // string is truncated
    if (sz == 1) {
      // Copy ASCII characters (UTF-8 is ASCII compatible)
      *p++ = c;
    } else {
      assert(sz == 2, "must be!");
      // Non-ASCII character or 0x00 which should
      // be encoded as 0xC080 in "modified" UTF8.
      p = utf8_write(p, ((jchar) c) & 0xff);
@ -561,8 +582,8 @@ void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer)
 // returns the quoted ascii length of a unicode string
 template<typename T>
-int UNICODE::quoted_ascii_length(const T* base, int length) {
+size_t UNICODE::quoted_ascii_length(const T* base, int length) {
-  int result = 0;
+  size_t result = 0;
  for (int i = 0; i < length; i++) {
    T c = base[i];
    if (c >= 32 && c < 127) {
@ -576,7 +597,7 @@ int UNICODE::quoted_ascii_length(const T* base, int length) {
 // converts a unicode string to quoted ascii
 template<typename T>
-void UNICODE::as_quoted_ascii(const T* base, int length, char* buf, int buflen) {
+void UNICODE::as_quoted_ascii(const T* base, int length, char* buf, size_t buflen) {
  char* p = buf;
  char* end = buf + buflen;
  for (int index = 0; index < length; index++) {
@ -594,11 +615,13 @@ void UNICODE::as_quoted_ascii(const T* base, int length, char* buf, int buflen)
 }
 // Explicit instantiation for all supported types.
-template int UNICODE::utf8_length(const jbyte* base, int length);
+template size_t UNICODE::utf8_length(const jbyte* base, int length);
-template int UNICODE::utf8_length(const jchar* base, int length);
+template size_t UNICODE::utf8_length(const jchar* base, int length);
-template char* UNICODE::as_utf8(const jbyte* base, int& length);
+template int UNICODE::utf8_length_as_int(const jbyte* base, int length);
-template char* UNICODE::as_utf8(const jchar* base, int& length);
+template int UNICODE::utf8_length_as_int(const jchar* base, int length);
-template int UNICODE::quoted_ascii_length<jbyte>(const jbyte* base, int length);
+template char* UNICODE::as_utf8(const jbyte* base, size_t& length);
-template int UNICODE::quoted_ascii_length<jchar>(const jchar* base, int length);
+template char* UNICODE::as_utf8(const jchar* base, size_t& length);
-template void UNICODE::as_quoted_ascii<jbyte>(const jbyte* base, int length, char* buf, int buflen);
+template size_t UNICODE::quoted_ascii_length<jbyte>(const jbyte* base, int length);
-template void UNICODE::as_quoted_ascii<jchar>(const jchar* base, int length, char* buf, int buflen);
+template size_t UNICODE::quoted_ascii_length<jchar>(const jchar* base, int length);
 template void UNICODE::as_quoted_ascii<jbyte>(const jbyte* base, int length, char* buf, size_t buflen);
 template void UNICODE::as_quoted_ascii<jchar>(const jchar* base, int length, char* buf, size_t buflen);
--- a/src/hotspot/share/utilities/utf8.hpp
+++ b/src/hotspot/share/utilities/utf8.hpp
@ -29,6 +29,45 @@
 #include "memory/allStatic.hpp"
 #include "utilities/debug.hpp"
 /**
 String handling within Java and the VM requires a bit of explanation.
 Logically a java.lang.String is a sequence of 16-bit Unicode characters
 encoded in UTF-16. In the past a String contained a Java char[] and so
 could theoretically contain INT_MAX 16-bit characters. Then came JEP 254:
 Compact Strings.
 With Compact Strings the Java char[] becomes a Java byte[], and that byte[]
 contains either latin-1 characters all of which fit in 8-bits, or else each
 pair of bytes represents a UTF-16 character. Consequently the maximum length
 in characters of a latin-1 string is INT_MAX, whilst for non-latin-1 it is INT_MAX/2.
 In the code below if we have latin-1 content then we treat the String's data
 array as a jbyte[], else a jchar[]. The lengths of these arrays are specified
 as an int value, with a nominal maximum of INT_MAX.
 The modified UTF-8 encoding specified for the VM, nominally encodes characters
 in 1, 2, 3 or 6 bytes. The 6-byte representation is actually two 3-byte representations
 for two UTF-16 characters forming a surrogate pair. If we are dealing with
 a latin-1 string then each character will be encoded as either 1 or 2 bytes and so the
 maximum UTF8 length is 2*INT_MAX. This can't be stored in an int so utf8 buffers must
 use a size_t length. For non-latin-1 strings each UTF-16 character will encode as either
 2 or 3 bytes, so the maximum UTF8 length in that case is 3 * INT_MAX/2 i.e. 1.5*INT_MAX.
 The "quoted ascii" form of a unicode string is at worst 6 times longer than its
 regular form, and so these lengths must always be size_t - though if we know we only
 ever do this to symbols (or small symbol combinations) then we could use int.
 There is an additional assumption/expectation that our UTF8 API's are never dealing with
 invalid UTF8, and more generally that all UTF8 sequences could form valid Strings.
 Consequently the Unicode length of a UTF8 sequence is assumed to always be representable
 by an int. However, there are API's, such as JNI NewStringUTF, that do deal with such input
 and could potentially have an unrepresentable string. The long standing position with JNI
 is that the user must supply valid input so we do not try to account for these cases.
 */
 // Low-level interface for UTF8 strings
 class UTF8 : AllStatic {
@ -41,20 +80,20 @@ class UTF8 : AllStatic {
  static int unicode_length(const char* utf8_str, bool& is_latin1, bool& has_multibyte);
  // returns the unicode length of a non-0-terminated utf8 string
-  static int unicode_length(const char* utf8_str, int len) {
+  static int unicode_length(const char* utf8_str, size_t len) {
    bool is_latin1, has_multibyte;
    return unicode_length(utf8_str, len, is_latin1, has_multibyte);
  }
-  static int unicode_length(const char* utf8_str, int len, bool& is_latin1, bool& has_multibyte);
+  static int unicode_length(const char* utf8_str, size_t len, bool& is_latin1, bool& has_multibyte);
  // converts a utf8 string to a unicode string
  template<typename T> static void convert_to_unicode(const char* utf8_str, T* unicode_str, int unicode_length);
  // returns the quoted ascii length of a utf8 string
-  static int quoted_ascii_length(const char* utf8_str, int utf8_length);
+  static size_t quoted_ascii_length(const char* utf8_str, size_t utf8_length);
  // converts a utf8 string to quoted ascii
-  static void as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen);
+  static void as_quoted_ascii(const char* utf8_str, size_t utf8_length, char* buf, size_t buflen);
 #ifndef PRODUCT
  // converts a quoted ascii string to utf8 string.  returns the original
@ -82,13 +121,13 @@ class UTF8 : AllStatic {
    while(--length >= 0 && base[length] != c);
    return (length < 0) ? nullptr : &base[length];
  }
-  static bool   equal(const jbyte* base1, int length1, const jbyte* base2,int length2);
+  static bool   equal(const jbyte* base1, int length1, const jbyte* base2, int length2);
  static bool   is_supplementary_character(const unsigned char* str);
  static jint   get_supplementary_character(const unsigned char* str);
-  static bool   is_legal_utf8(const unsigned char* buffer, int length,
+  static bool   is_legal_utf8(const unsigned char* buffer, size_t length,
                              bool version_leq_47);
-  static void   truncate_to_legal_utf8(unsigned char* buffer, int length);
+  static void   truncate_to_legal_utf8(unsigned char* buffer, size_t length);
 };
@ -99,6 +138,12 @@ class UTF8 : AllStatic {
 // units, so a supplementary character uses two positions in a unicode string.
 class UNICODE : AllStatic {
  // returns the utf8 size of a unicode character
  // uses size_t for convenience in overflow checks
  static size_t utf8_size(jchar c);
  static size_t utf8_size(jbyte c);
 public:
  // checks if the given unicode character can be encoded as latin1
  static bool is_latin1(jchar c);
@ -106,28 +151,27 @@ class UNICODE : AllStatic {
  // checks if the given string can be encoded as latin1
  static bool is_latin1(const jchar* base, int length);
  // returns the utf8 size of a unicode character
  static int utf8_size(jchar c);
  static int utf8_size(jbyte c);
  // returns the utf8 length of a unicode string
-  template<typename T> static int utf8_length(const T* base, int length);
+  template<typename T> static size_t utf8_length(const T* base, int length);
  // returns the utf8 length of a unicode string as an int - truncated if needed
  template<typename T> static int utf8_length_as_int(const T* base, int length);
  // converts a unicode string to utf8 string
  static void convert_to_utf8(const jchar* base, int length, char* utf8_buffer);
  // converts a unicode string to a utf8 string; result is allocated
  // in resource area unless a buffer is provided. The unicode 'length'
-  // parameter is set to the length of the result utf8 string.
+  // parameter is set to the length of the resulting utf8 string.
-  template<typename T> static char* as_utf8(const T* base, int& length);
+  template<typename T> static char* as_utf8(const T* base, size_t& length);
-  static char* as_utf8(const jchar* base, int length, char* buf, int buflen);
+  static char* as_utf8(const jchar* base, int length, char* buf, size_t buflen);
-  static char* as_utf8(const jbyte* base, int length, char* buf, int buflen);
+  static char* as_utf8(const jbyte* base, int length, char* buf, size_t buflen);
  // returns the quoted ascii length of a unicode string
-  template<typename T> static int quoted_ascii_length(const T* base, int length);
+  template<typename T> static size_t quoted_ascii_length(const T* base, int length);
  // converts a unicode string to quoted ascii
-  template<typename T> static void as_quoted_ascii(const T* base, int length, char* buf, int buflen);
+  template<typename T> static void as_quoted_ascii(const T* base, int length, char* buf, size_t buflen);
 };
 #endif // SHARE_UTILITIES_UTF8_HPP