8320500: [vectorapi] RISC-V: Optimize vector math operations with SLEEF
Reviewed-by: luhenry, ihse, erikj, fyang, rehn
This commit is contained in:
parent
4a12f5b26e
commit
580eb62dc0
@ -37,3 +37,21 @@ ifeq ($(call isTargetOs, linux windows)+$(call isTargetCpu, x86_64)+$(INCLUDE_CO
|
|||||||
|
|
||||||
TARGETS += $(BUILD_LIBJSVML)
|
TARGETS += $(BUILD_LIBJSVML)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
## Build libsleef
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, riscv64)+$(INCLUDE_COMPILER2), true+true+true)
|
||||||
|
$(eval $(call SetupJdkLibrary, BUILD_LIBSLEEF, \
|
||||||
|
NAME := sleef, \
|
||||||
|
OPTIMIZATION := HIGH, \
|
||||||
|
SRC := libsleef/lib, \
|
||||||
|
EXTRA_SRC := libsleef/generated, \
|
||||||
|
DISABLED_WARNINGS_gcc := unused-function sign-compare tautological-compare ignored-qualifiers, \
|
||||||
|
DISABLED_WARNINGS_clang := unused-function sign-compare tautological-compare ignored-qualifiers, \
|
||||||
|
CFLAGS := -march=rv64gcv, \
|
||||||
|
))
|
||||||
|
|
||||||
|
TARGETS += $(BUILD_LIBSLEEF)
|
||||||
|
endif
|
||||||
|
@ -46,8 +46,10 @@
|
|||||||
class Argument {
|
class Argument {
|
||||||
public:
|
public:
|
||||||
enum {
|
enum {
|
||||||
n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
|
// check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
|
||||||
n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... )
|
n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
|
||||||
|
n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... )
|
||||||
|
n_vector_register_parameters_c = 16, // v8, v9, ... v23
|
||||||
|
|
||||||
n_int_register_parameters_j = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...)
|
n_int_register_parameters_j = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...)
|
||||||
n_float_register_parameters_j = 8 // f10, f11, ... f17 (j_farg0, j_farg1, ...)
|
n_float_register_parameters_j = 8 // f10, f11, ... f17 (j_farg0, j_farg1, ...)
|
||||||
|
@ -1972,12 +1972,16 @@ const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length)
|
|||||||
|
|
||||||
// Vector calling convention not yet implemented.
|
// Vector calling convention not yet implemented.
|
||||||
bool Matcher::supports_vector_calling_convention(void) {
|
bool Matcher::supports_vector_calling_convention(void) {
|
||||||
return false;
|
return EnableVectorSupport && UseVectorStubs;
|
||||||
}
|
}
|
||||||
|
|
||||||
OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
|
OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
|
||||||
Unimplemented();
|
assert(EnableVectorSupport && UseVectorStubs, "sanity");
|
||||||
return OptoRegPair(0, 0);
|
assert(ideal_reg == Op_VecA, "sanity");
|
||||||
|
// check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
|
||||||
|
int lo = V8_num;
|
||||||
|
int hi = V8_K_num;
|
||||||
|
return OptoRegPair(hi, lo);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Is this branch offset short enough that a short branch can be used?
|
// Is this branch offset short enough that a short branch can be used?
|
||||||
@ -10075,6 +10079,23 @@ instruct CallLeafDirect(method meth, rFlagsReg cr)
|
|||||||
ins_pipe(pipe_class_call);
|
ins_pipe(pipe_class_call);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
// Call Runtime Instruction without safepoint and with vector arguments
|
||||||
|
|
||||||
|
instruct CallLeafDirectVector(method meth, rFlagsReg cr)
|
||||||
|
%{
|
||||||
|
match(CallLeafVector);
|
||||||
|
|
||||||
|
effect(USE meth, KILL cr);
|
||||||
|
|
||||||
|
ins_cost(BRANCH_COST);
|
||||||
|
|
||||||
|
format %{ "CALL, runtime leaf vector $meth" %}
|
||||||
|
|
||||||
|
ins_encode(riscv_enc_java_to_runtime(meth));
|
||||||
|
|
||||||
|
ins_pipe(pipe_class_call);
|
||||||
|
%}
|
||||||
|
|
||||||
// Call Runtime Instruction
|
// Call Runtime Instruction
|
||||||
|
|
||||||
instruct CallLeafNoFPDirect(method meth, rFlagsReg cr)
|
instruct CallLeafNoFPDirect(method meth, rFlagsReg cr)
|
||||||
|
@ -666,7 +666,20 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
|
|||||||
int SharedRuntime::vector_calling_convention(VMRegPair *regs,
|
int SharedRuntime::vector_calling_convention(VMRegPair *regs,
|
||||||
uint num_bits,
|
uint num_bits,
|
||||||
uint total_args_passed) {
|
uint total_args_passed) {
|
||||||
Unimplemented();
|
assert(total_args_passed <= Argument::n_vector_register_parameters_c, "unsupported");
|
||||||
|
assert(num_bits >= 64 && num_bits <= 2048 && is_power_of_2(num_bits), "unsupported");
|
||||||
|
|
||||||
|
// check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
|
||||||
|
static const VectorRegister VEC_ArgReg[Argument::n_vector_register_parameters_c] = {
|
||||||
|
v8, v9, v10, v11, v12, v13, v14, v15,
|
||||||
|
v16, v17, v18, v19, v20, v21, v22, v23
|
||||||
|
};
|
||||||
|
|
||||||
|
const int next_reg_val = 3;
|
||||||
|
for (uint i = 0; i < total_args_passed; i++) {
|
||||||
|
VMReg vmreg = VEC_ArgReg[i]->as_VMReg();
|
||||||
|
regs[i].set_pair(vmreg->next(next_reg_val), vmreg);
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6071,6 +6071,58 @@ static const int64_t right_3_bits = right_n_bits(3);
|
|||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void generate_vector_math_stubs() {
|
||||||
|
if (!UseRVV) {
|
||||||
|
log_info(library)("vector is not supported, skip loading vector math (sleef) library!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get native vector math stub routine addresses
|
||||||
|
void* libsleef = nullptr;
|
||||||
|
char ebuf[1024];
|
||||||
|
char dll_name[JVM_MAXPATHLEN];
|
||||||
|
if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "sleef")) {
|
||||||
|
libsleef = os::dll_load(dll_name, ebuf, sizeof ebuf);
|
||||||
|
}
|
||||||
|
if (libsleef == nullptr) {
|
||||||
|
log_info(library)("Failed to load native vector math (sleef) library, %s!", ebuf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Method naming convention
|
||||||
|
// All the methods are named as <OP><T>_<U><suffix>
|
||||||
|
//
|
||||||
|
// Where:
|
||||||
|
// <OP> is the operation name, e.g. sin, cos
|
||||||
|
// <T> is to indicate float/double
|
||||||
|
// "fx/dx" for vector float/double operation
|
||||||
|
// <U> is the precision level
|
||||||
|
// "u10/u05" represents 1.0/0.5 ULP error bounds
|
||||||
|
// We use "u10" for all operations by default
|
||||||
|
// But for those functions do not have u10 support, we use "u05" instead
|
||||||
|
// <suffix> rvv, indicates riscv vector extension
|
||||||
|
//
|
||||||
|
// e.g. sinfx_u10rvv is the method for computing vector float sin using rvv instructions
|
||||||
|
//
|
||||||
|
log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "sleef" JNI_LIB_SUFFIX, p2i(libsleef));
|
||||||
|
|
||||||
|
for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
|
||||||
|
int vop = VectorSupport::VECTOR_OP_MATH_START + op;
|
||||||
|
if (vop == VectorSupport::VECTOR_OP_TANH) { // skip tanh because of performance regression
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The native library does not support u10 level of "hypot".
|
||||||
|
const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10";
|
||||||
|
|
||||||
|
snprintf(ebuf, sizeof(ebuf), "%sfx_%srvv", VectorSupport::mathname[op], ulf);
|
||||||
|
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);
|
||||||
|
|
||||||
|
snprintf(ebuf, sizeof(ebuf), "%sdx_%srvv", VectorSupport::mathname[op], ulf);
|
||||||
|
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif // COMPILER2
|
#endif // COMPILER2
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -6291,6 +6343,8 @@ static const int64_t right_3_bits = right_n_bits(3);
|
|||||||
|
|
||||||
generate_string_indexof_stubs();
|
generate_string_indexof_stubs();
|
||||||
|
|
||||||
|
generate_vector_math_stubs();
|
||||||
|
|
||||||
#endif // COMPILER2
|
#endif // COMPILER2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4184,41 +4184,41 @@ void StubGenerator::generate_compiler_stubs() {
|
|||||||
|
|
||||||
log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "jsvml" JNI_LIB_SUFFIX, p2i(libjsvml));
|
log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "jsvml" JNI_LIB_SUFFIX, p2i(libjsvml));
|
||||||
if (UseAVX > 2) {
|
if (UseAVX > 2) {
|
||||||
for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) {
|
for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
|
||||||
int vop = VectorSupport::VECTOR_OP_SVML_START + op;
|
int vop = VectorSupport::VECTOR_OP_MATH_START + op;
|
||||||
if ((!VM_Version::supports_avx512dq()) &&
|
if ((!VM_Version::supports_avx512dq()) &&
|
||||||
(vop == VectorSupport::VECTOR_OP_LOG || vop == VectorSupport::VECTOR_OP_LOG10 || vop == VectorSupport::VECTOR_OP_POW)) {
|
(vop == VectorSupport::VECTOR_OP_LOG || vop == VectorSupport::VECTOR_OP_LOG10 || vop == VectorSupport::VECTOR_OP_POW)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf16_ha_z0", VectorSupport::svmlname[op]);
|
snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf16_ha_z0", VectorSupport::mathname[op]);
|
||||||
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);
|
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);
|
||||||
|
|
||||||
snprintf(ebuf, sizeof(ebuf), "__jsvml_%s8_ha_z0", VectorSupport::svmlname[op]);
|
snprintf(ebuf, sizeof(ebuf), "__jsvml_%s8_ha_z0", VectorSupport::mathname[op]);
|
||||||
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);
|
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const char* avx_sse_str = (UseAVX >= 2) ? "l9" : ((UseAVX == 1) ? "e9" : "ex");
|
const char* avx_sse_str = (UseAVX >= 2) ? "l9" : ((UseAVX == 1) ? "e9" : "ex");
|
||||||
for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) {
|
for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
|
||||||
int vop = VectorSupport::VECTOR_OP_SVML_START + op;
|
int vop = VectorSupport::VECTOR_OP_MATH_START + op;
|
||||||
if (vop == VectorSupport::VECTOR_OP_POW) {
|
if (vop == VectorSupport::VECTOR_OP_POW) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
|
snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
|
||||||
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);
|
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);
|
||||||
|
|
||||||
snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
|
snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
|
||||||
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);
|
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);
|
||||||
|
|
||||||
snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf8_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
|
snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf8_ha_%s", VectorSupport::mathname[op], avx_sse_str);
|
||||||
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);
|
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);
|
||||||
|
|
||||||
snprintf(ebuf, sizeof(ebuf), "__jsvml_%s1_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
|
snprintf(ebuf, sizeof(ebuf), "__jsvml_%s1_ha_%s", VectorSupport::mathname[op], avx_sse_str);
|
||||||
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);
|
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);
|
||||||
|
|
||||||
snprintf(ebuf, sizeof(ebuf), "__jsvml_%s2_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
|
snprintf(ebuf, sizeof(ebuf), "__jsvml_%s2_ha_%s", VectorSupport::mathname[op], avx_sse_str);
|
||||||
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);
|
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);
|
||||||
|
|
||||||
snprintf(ebuf, sizeof(ebuf), "__jsvml_%s4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
|
snprintf(ebuf, sizeof(ebuf), "__jsvml_%s4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
|
||||||
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);
|
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -755,7 +755,7 @@ Node *CallNode::match( const ProjNode *proj, const Matcher *match ) {
|
|||||||
|
|
||||||
if (Opcode() == Op_CallLeafVector) {
|
if (Opcode() == Op_CallLeafVector) {
|
||||||
// If the return is in vector, compute appropriate regmask taking into account the whole range
|
// If the return is in vector, compute appropriate regmask taking into account the whole range
|
||||||
if(ideal_reg >= Op_VecS && ideal_reg <= Op_VecZ) {
|
if(ideal_reg >= Op_VecA && ideal_reg <= Op_VecZ) {
|
||||||
if(OptoReg::is_valid(regs.second())) {
|
if(OptoReg::is_valid(regs.second())) {
|
||||||
for (OptoReg::Name r = regs.first(); r <= regs.second(); r = OptoReg::add(r, 1)) {
|
for (OptoReg::Name r = regs.first(); r <= regs.second(); r = OptoReg::add(r, 1)) {
|
||||||
rm.Insert(r);
|
rm.Insert(r);
|
||||||
|
@ -374,7 +374,7 @@ class LibraryCallKit : public GraphKit {
|
|||||||
bool inline_index_vector();
|
bool inline_index_vector();
|
||||||
bool inline_index_partially_in_upper_range();
|
bool inline_index_partially_in_upper_range();
|
||||||
|
|
||||||
Node* gen_call_to_svml(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2);
|
Node* gen_call_to_vector_math(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2);
|
||||||
|
|
||||||
enum VectorMaskUseType {
|
enum VectorMaskUseType {
|
||||||
VecMaskUseLoad = 1 << 0,
|
VecMaskUseLoad = 1 << 0,
|
||||||
|
@ -468,11 +468,11 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) {
|
|||||||
Node* operation = nullptr;
|
Node* operation = nullptr;
|
||||||
if (opc == Op_CallLeafVector) {
|
if (opc == Op_CallLeafVector) {
|
||||||
assert(UseVectorStubs, "sanity");
|
assert(UseVectorStubs, "sanity");
|
||||||
operation = gen_call_to_svml(opr->get_con(), elem_bt, num_elem, opd1, opd2);
|
operation = gen_call_to_vector_math(opr->get_con(), elem_bt, num_elem, opd1, opd2);
|
||||||
if (operation == nullptr) {
|
if (operation == nullptr) {
|
||||||
log_if_needed(" ** svml call failed for %s_%s_%d",
|
log_if_needed(" ** Vector math call failed for %s_%s_%d",
|
||||||
(elem_bt == T_FLOAT)?"float":"double",
|
(elem_bt == T_FLOAT) ? "float" : "double",
|
||||||
VectorSupport::svmlname[opr->get_con() - VectorSupport::VECTOR_OP_SVML_START],
|
VectorSupport::mathname[opr->get_con() - VectorSupport::VECTOR_OP_MATH_START],
|
||||||
num_elem * type2aelembytes(elem_bt));
|
num_elem * type2aelembytes(elem_bt));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -2071,12 +2071,12 @@ bool LibraryCallKit::inline_vector_rearrange() {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static address get_svml_address(int vop, int bits, BasicType bt, char* name_ptr, int name_len) {
|
static address get_vector_math_address(int vop, int bits, BasicType bt, char* name_ptr, int name_len) {
|
||||||
address addr = nullptr;
|
address addr = nullptr;
|
||||||
assert(UseVectorStubs, "sanity");
|
assert(UseVectorStubs, "sanity");
|
||||||
assert(name_ptr != nullptr, "unexpected");
|
assert(name_ptr != nullptr, "unexpected");
|
||||||
assert((vop >= VectorSupport::VECTOR_OP_SVML_START) && (vop <= VectorSupport::VECTOR_OP_SVML_END), "unexpected");
|
assert((vop >= VectorSupport::VECTOR_OP_MATH_START) && (vop <= VectorSupport::VECTOR_OP_MATH_END), "unexpected");
|
||||||
int op = vop - VectorSupport::VECTOR_OP_SVML_START;
|
int op = vop - VectorSupport::VECTOR_OP_MATH_START;
|
||||||
|
|
||||||
switch(bits) {
|
switch(bits) {
|
||||||
case 64: //fallthough
|
case 64: //fallthough
|
||||||
@ -2084,21 +2084,34 @@ static address get_svml_address(int vop, int bits, BasicType bt, char* name_ptr,
|
|||||||
case 256: //fallthough
|
case 256: //fallthough
|
||||||
case 512:
|
case 512:
|
||||||
if (bt == T_FLOAT) {
|
if (bt == T_FLOAT) {
|
||||||
snprintf(name_ptr, name_len, "vector_%s_float%d", VectorSupport::svmlname[op], bits);
|
snprintf(name_ptr, name_len, "vector_%s_float_%dbits_fixed", VectorSupport::mathname[op], bits);
|
||||||
addr = StubRoutines::_vector_f_math[exact_log2(bits/64)][op];
|
addr = StubRoutines::_vector_f_math[exact_log2(bits/64)][op];
|
||||||
} else {
|
} else {
|
||||||
assert(bt == T_DOUBLE, "must be FP type only");
|
assert(bt == T_DOUBLE, "must be FP type only");
|
||||||
snprintf(name_ptr, name_len, "vector_%s_double%d", VectorSupport::svmlname[op], bits);
|
snprintf(name_ptr, name_len, "vector_%s_double_%dbits_fixed", VectorSupport::mathname[op], bits);
|
||||||
addr = StubRoutines::_vector_d_math[exact_log2(bits/64)][op];
|
addr = StubRoutines::_vector_d_math[exact_log2(bits/64)][op];
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
snprintf(name_ptr, name_len, "invalid");
|
if (!Matcher::supports_scalable_vector() || !Matcher::vector_size_supported(bt, bits/type2aelembytes(bt)) ) {
|
||||||
addr = nullptr;
|
snprintf(name_ptr, name_len, "invalid");
|
||||||
Unimplemented();
|
addr = nullptr;
|
||||||
|
Unimplemented();
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (addr == nullptr && Matcher::supports_scalable_vector()) {
|
||||||
|
if (bt == T_FLOAT) {
|
||||||
|
snprintf(name_ptr, name_len, "vector_%s_float_%dbits_scalable", VectorSupport::mathname[op], bits);
|
||||||
|
addr = StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op];
|
||||||
|
} else {
|
||||||
|
assert(bt == T_DOUBLE, "must be FP type only");
|
||||||
|
snprintf(name_ptr, name_len, "vector_%s_double_%dbits_scalable", VectorSupport::mathname[op], bits);
|
||||||
|
addr = StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return addr;
|
return addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2246,16 +2259,16 @@ bool LibraryCallKit::inline_vector_select_from() {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
Node* LibraryCallKit::gen_call_to_svml(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2) {
|
Node* LibraryCallKit::gen_call_to_vector_math(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2) {
|
||||||
assert(UseVectorStubs, "sanity");
|
assert(UseVectorStubs, "sanity");
|
||||||
assert(vector_api_op_id >= VectorSupport::VECTOR_OP_SVML_START && vector_api_op_id <= VectorSupport::VECTOR_OP_SVML_END, "need valid op id");
|
assert(vector_api_op_id >= VectorSupport::VECTOR_OP_MATH_START && vector_api_op_id <= VectorSupport::VECTOR_OP_MATH_END, "need valid op id");
|
||||||
assert(opd1 != nullptr, "must not be null");
|
assert(opd1 != nullptr, "must not be null");
|
||||||
const TypeVect* vt = TypeVect::make(bt, num_elem);
|
const TypeVect* vt = TypeVect::make(bt, num_elem);
|
||||||
const TypeFunc* call_type = OptoRuntime::Math_Vector_Vector_Type(opd2 != nullptr ? 2 : 1, vt, vt);
|
const TypeFunc* call_type = OptoRuntime::Math_Vector_Vector_Type(opd2 != nullptr ? 2 : 1, vt, vt);
|
||||||
char name[100] = "";
|
char name[100] = "";
|
||||||
|
|
||||||
// Get address for svml method.
|
// Get address for vector math method.
|
||||||
address addr = get_svml_address(vector_api_op_id, vt->length_in_bytes() * BitsPerByte, bt, name, 100);
|
address addr = get_vector_math_address(vector_api_op_id, vt->length_in_bytes() * BitsPerByte, bt, name, 100);
|
||||||
|
|
||||||
if (addr == nullptr) {
|
if (addr == nullptr) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -43,7 +43,7 @@
|
|||||||
#endif // COMPILER2
|
#endif // COMPILER2
|
||||||
|
|
||||||
#ifdef COMPILER2
|
#ifdef COMPILER2
|
||||||
const char* VectorSupport::svmlname[VectorSupport::NUM_SVML_OP] = {
|
const char* VectorSupport::mathname[VectorSupport::NUM_VECTOR_OP_MATH] = {
|
||||||
"tan",
|
"tan",
|
||||||
"tanh",
|
"tanh",
|
||||||
"sin",
|
"sin",
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -121,9 +121,9 @@ class VectorSupport : AllStatic {
|
|||||||
VECTOR_OP_EXPM1 = 117,
|
VECTOR_OP_EXPM1 = 117,
|
||||||
VECTOR_OP_HYPOT = 118,
|
VECTOR_OP_HYPOT = 118,
|
||||||
|
|
||||||
VECTOR_OP_SVML_START = VECTOR_OP_TAN,
|
VECTOR_OP_MATH_START = VECTOR_OP_TAN,
|
||||||
VECTOR_OP_SVML_END = VECTOR_OP_HYPOT,
|
VECTOR_OP_MATH_END = VECTOR_OP_HYPOT,
|
||||||
NUM_SVML_OP = VECTOR_OP_SVML_END - VECTOR_OP_SVML_START + 1
|
NUM_VECTOR_OP_MATH = VECTOR_OP_MATH_END - VECTOR_OP_MATH_START + 1
|
||||||
};
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
@ -131,7 +131,8 @@ class VectorSupport : AllStatic {
|
|||||||
VEC_SIZE_128 = 1,
|
VEC_SIZE_128 = 1,
|
||||||
VEC_SIZE_256 = 2,
|
VEC_SIZE_256 = 2,
|
||||||
VEC_SIZE_512 = 3,
|
VEC_SIZE_512 = 3,
|
||||||
NUM_VEC_SIZES = 4
|
VEC_SIZE_SCALABLE = 4,
|
||||||
|
NUM_VEC_SIZES = 5
|
||||||
};
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
@ -139,7 +140,7 @@ class VectorSupport : AllStatic {
|
|||||||
MODE_BITS_COERCED_LONG_TO_MASK = 1
|
MODE_BITS_COERCED_LONG_TO_MASK = 1
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char* svmlname[VectorSupport::NUM_SVML_OP];
|
static const char* mathname[VectorSupport::NUM_VECTOR_OP_MATH];
|
||||||
|
|
||||||
static int vop2ideal(jint vop, BasicType bt);
|
static int vop2ideal(jint vop, BasicType bt);
|
||||||
|
|
||||||
|
@ -176,8 +176,8 @@ address StubRoutines::_dtanh = nullptr;
|
|||||||
address StubRoutines::_f2hf = nullptr;
|
address StubRoutines::_f2hf = nullptr;
|
||||||
address StubRoutines::_hf2f = nullptr;
|
address StubRoutines::_hf2f = nullptr;
|
||||||
|
|
||||||
address StubRoutines::_vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP] = {{nullptr}, {nullptr}};
|
address StubRoutines::_vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH] = {{nullptr}, {nullptr}};
|
||||||
address StubRoutines::_vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP] = {{nullptr}, {nullptr}};
|
address StubRoutines::_vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH] = {{nullptr}, {nullptr}};
|
||||||
|
|
||||||
address StubRoutines::_method_entry_barrier = nullptr;
|
address StubRoutines::_method_entry_barrier = nullptr;
|
||||||
address StubRoutines::_array_sort = nullptr;
|
address StubRoutines::_array_sort = nullptr;
|
||||||
|
@ -294,8 +294,8 @@ class StubRoutines: AllStatic {
|
|||||||
static address _cont_returnBarrierExc;
|
static address _cont_returnBarrierExc;
|
||||||
|
|
||||||
// Vector Math Routines
|
// Vector Math Routines
|
||||||
static address _vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP];
|
static address _vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH];
|
||||||
static address _vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP];
|
static address _vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH];
|
||||||
|
|
||||||
static address _upcall_stub_exception_handler;
|
static address _upcall_stub_exception_handler;
|
||||||
static address _upcall_stub_load_target;
|
static address _upcall_stub_load_target;
|
||||||
|
@ -0,0 +1,120 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024, Rivos Inc. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// On riscv, sleef vector apis depend on native vector intrinsic, which is supported on
|
||||||
|
// some compiler, e.g. gcc 14+.
|
||||||
|
// __riscv_v_intrinsic is used to tell if the compiler supports vector intrinsic.
|
||||||
|
//
|
||||||
|
// At compile-time, if the current compiler does support vector intrinsics, bridge
|
||||||
|
// functions will be built in the library. In case the current compiler doesn't support
|
||||||
|
// vector intrinsics (gcc < 14), then the bridge functions won't be compiled.
|
||||||
|
// At run-time, if the library is found and the bridge functions are available in the
|
||||||
|
// library, then the java vector API will call into the bridge functions and sleef.
|
||||||
|
|
||||||
|
#ifdef __riscv_v_intrinsic
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include <riscv_vector.h>
|
||||||
|
|
||||||
|
#include "../generated/misc.h"
|
||||||
|
#include "../generated/sleefinline_rvvm1.h"
|
||||||
|
|
||||||
|
#include <jni.h>
|
||||||
|
|
||||||
|
// We maintain an invariant in java world that default dynamic rounding mode is RNE,
|
||||||
|
// please check JDK-8330094, JDK-8330266 for more details.
|
||||||
|
// Currently, sleef source on riscv does not change rounding mode to others except
|
||||||
|
// of RNE. But we still think it's safer to make sure that after calling into sleef
|
||||||
|
// the dynamic rounding mode is always RNE.
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
#define CHECK_FRM __asm__ __volatile__ ( \
|
||||||
|
" frrm t0 \n\t" \
|
||||||
|
" beqz t0, 2f \n\t" \
|
||||||
|
" csrrw x0, cycle, x0 \n\t" \
|
||||||
|
"2: \n\t" \
|
||||||
|
: : : "memory" );
|
||||||
|
#else
|
||||||
|
#define CHECK_FRM
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define DEFINE_VECTOR_MATH_UNARY_RVV(op, type) \
|
||||||
|
JNIEXPORT \
|
||||||
|
type op##rvv(type input) { \
|
||||||
|
type res = Sleef_##op##rvvm1(input); \
|
||||||
|
CHECK_FRM \
|
||||||
|
return res; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define DEFINE_VECTOR_MATH_BINARY_RVV(op, type) \
|
||||||
|
JNIEXPORT \
|
||||||
|
type op##rvv(type input1, type input2) { \
|
||||||
|
type res = Sleef_##op##rvvm1(input1, input2); \
|
||||||
|
CHECK_FRM \
|
||||||
|
return res; \
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(tanfx_u10, vfloat_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(sinfx_u10, vfloat_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(sinhfx_u10, vfloat_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(cosfx_u10, vfloat_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(coshfx_u10, vfloat_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(asinfx_u10, vfloat_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(acosfx_u10, vfloat_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(atanfx_u10, vfloat_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(cbrtfx_u10, vfloat_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(logfx_u10, vfloat_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(log10fx_u10, vfloat_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(log1pfx_u10, vfloat_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(expfx_u10, vfloat_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(expm1fx_u10, vfloat_rvvm1_sleef)
|
||||||
|
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(tandx_u10, vdouble_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(sindx_u10, vdouble_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(sinhdx_u10, vdouble_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(cosdx_u10, vdouble_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(coshdx_u10, vdouble_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(asindx_u10, vdouble_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(acosdx_u10, vdouble_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(atandx_u10, vdouble_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(cbrtdx_u10, vdouble_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(logdx_u10, vdouble_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(log10dx_u10, vdouble_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(log1pdx_u10, vdouble_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(expdx_u10, vdouble_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_UNARY_RVV(expm1dx_u10, vdouble_rvvm1_sleef)
|
||||||
|
|
||||||
|
DEFINE_VECTOR_MATH_BINARY_RVV(atan2fx_u10, vfloat_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_BINARY_RVV(powfx_u10, vfloat_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_BINARY_RVV(hypotfx_u05, vfloat_rvvm1_sleef)
|
||||||
|
|
||||||
|
DEFINE_VECTOR_MATH_BINARY_RVV(atan2dx_u10, vdouble_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_BINARY_RVV(powdx_u10, vdouble_rvvm1_sleef)
|
||||||
|
DEFINE_VECTOR_MATH_BINARY_RVV(hypotdx_u05, vdouble_rvvm1_sleef)
|
||||||
|
|
||||||
|
#undef DEFINE_VECTOR_MATH_UNARY_RVV
|
||||||
|
|
||||||
|
#undef DEFINE_VECTOR_MATH_BINARY_RVV
|
||||||
|
|
||||||
|
#endif /* __riscv_v_intrinsic */
|
Loading…
Reference in New Issue
Block a user