8293409: [vectorapi] Intrinsify VectorSupport.indexVector
Reviewed-by: eliu, jbhateja
This commit is contained in:
parent
3f3d63d02a
commit
857b0f9b05
@ -457,22 +457,31 @@ instruct storeV_masked(vReg src, vmemA mem, pRegGov pg) %{
|
||||
|
||||
// vector load const
|
||||
|
||||
instruct vloadconB(vReg dst, immI0 src) %{
|
||||
predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
|
||||
instruct vloadcon(vReg dst, immI0 src) %{
|
||||
match(Set dst (VectorLoadConst src));
|
||||
format %{ "vloadconB $dst, $src\t# load/generate iota indices" %}
|
||||
format %{ "vloadcon $dst, $src\t# load/generate iota indices" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
if (UseSVE == 0) {
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
assert(length_in_bytes <= 16, "must be");
|
||||
__ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
|
||||
// The iota indices are ordered by type B/S/I/L/F/D, and the offset between two types is 16.
|
||||
int offset = exact_log2(type2aelembytes(bt)) << 4;
|
||||
if (is_floating_point_type(bt)) {
|
||||
offset += 32;
|
||||
}
|
||||
__ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices() + offset));
|
||||
if (length_in_bytes == 16) {
|
||||
__ ldrq($dst$$FloatRegister, rscratch1);
|
||||
} else {
|
||||
__ ldrd($dst$$FloatRegister, rscratch1);
|
||||
}
|
||||
} else {
|
||||
__ sve_index($dst$$FloatRegister, __ B, 0, 1);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
__ sve_index($dst$$FloatRegister, size, 0, 1);
|
||||
if (is_floating_point_type(bt)) {
|
||||
__ sve_scvtf($dst$$FloatRegister, size, ptrue, $dst$$FloatRegister, size);
|
||||
}
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
|
@ -396,22 +396,31 @@ instruct storeV_masked(vReg src, vmemA mem, pRegGov pg) %{
|
||||
|
||||
// vector load const
|
||||
|
||||
instruct vloadconB(vReg dst, immI0 src) %{
|
||||
predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
|
||||
instruct vloadcon(vReg dst, immI0 src) %{
|
||||
match(Set dst (VectorLoadConst src));
|
||||
format %{ "vloadconB $dst, $src\t# load/generate iota indices" %}
|
||||
format %{ "vloadcon $dst, $src\t# load/generate iota indices" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
if (UseSVE == 0) {
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
assert(length_in_bytes <= 16, "must be");
|
||||
__ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
|
||||
// The iota indices are ordered by type B/S/I/L/F/D, and the offset between two types is 16.
|
||||
int offset = exact_log2(type2aelembytes(bt)) << 4;
|
||||
if (is_floating_point_type(bt)) {
|
||||
offset += 32;
|
||||
}
|
||||
__ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices() + offset));
|
||||
if (length_in_bytes == 16) {
|
||||
__ ldrq($dst$$FloatRegister, rscratch1);
|
||||
} else {
|
||||
__ ldrd($dst$$FloatRegister, rscratch1);
|
||||
}
|
||||
} else {
|
||||
__ sve_index($dst$$FloatRegister, __ B, 0, 1);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
__ sve_index($dst$$FloatRegister, size, 0, 1);
|
||||
if (is_floating_point_type(bt)) {
|
||||
__ sve_scvtf($dst$$FloatRegister, size, ptrue, $dst$$FloatRegister, size);
|
||||
}
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
|
@ -630,8 +630,24 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
// B
|
||||
__ emit_data64(0x0706050403020100, relocInfo::none);
|
||||
__ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
|
||||
// H
|
||||
__ emit_data64(0x0003000200010000, relocInfo::none);
|
||||
__ emit_data64(0x0007000600050004, relocInfo::none);
|
||||
// S
|
||||
__ emit_data64(0x0000000100000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000300000002, relocInfo::none);
|
||||
// D
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000001, relocInfo::none);
|
||||
// S - FP
|
||||
__ emit_data64(0x3F80000000000000, relocInfo::none); // 0.0f, 1.0f
|
||||
__ emit_data64(0x4040000040000000, relocInfo::none); // 2.0f, 3.0f
|
||||
// D - FP
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none); // 0.0d
|
||||
__ emit_data64(0x3FF0000000000000, relocInfo::none); // 1.0d
|
||||
return start;
|
||||
}
|
||||
|
||||
@ -7846,7 +7862,9 @@ class StubGenerator: public StubCodeGenerator {
|
||||
SharedRuntime::
|
||||
throw_NullPointerException_at_call));
|
||||
|
||||
StubRoutines::aarch64::_vector_iota_indices = generate_iota_indices("iota_indices");
|
||||
if (UseSVE == 0) {
|
||||
StubRoutines::aarch64::_vector_iota_indices = generate_iota_indices("iota_indices");
|
||||
}
|
||||
|
||||
// arraycopy stubs used by compilers
|
||||
generate_arraycopy_stubs();
|
||||
|
@ -1691,8 +1691,13 @@ void C2_MacroAssembler::load_constant_vector(BasicType bt, XMMRegister dst, Inte
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::load_iota_indices(XMMRegister dst, int vlen_in_bytes) {
|
||||
ExternalAddress addr(StubRoutines::x86::vector_iota_indices());
|
||||
void C2_MacroAssembler::load_iota_indices(XMMRegister dst, int vlen_in_bytes, BasicType bt) {
|
||||
// The iota indices are ordered by type B/S/I/L/F/D, and the offset between two types is 64.
|
||||
int offset = exact_log2(type2aelembytes(bt)) << 6;
|
||||
if (is_floating_point_type(bt)) {
|
||||
offset += 128;
|
||||
}
|
||||
ExternalAddress addr(StubRoutines::x86::vector_iota_indices() + offset);
|
||||
if (vlen_in_bytes <= 4) {
|
||||
movdl(dst, addr);
|
||||
} else if (vlen_in_bytes == 8) {
|
||||
|
@ -159,7 +159,7 @@ public:
|
||||
void load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, bool novlbwdq, int vlen_enc);
|
||||
|
||||
void load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen);
|
||||
void load_iota_indices(XMMRegister dst, int vlen_in_bytes);
|
||||
void load_iota_indices(XMMRegister dst, int vlen_in_bytes, BasicType bt);
|
||||
|
||||
// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
|
||||
|
||||
|
@ -811,7 +811,7 @@ address StubGenerator::generate_iota_indices(const char *stub_name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
|
||||
// B
|
||||
__ emit_data64(0x0706050403020100, relocInfo::none);
|
||||
__ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
|
||||
__ emit_data64(0x1716151413121110, relocInfo::none);
|
||||
@ -820,7 +820,51 @@ address StubGenerator::generate_iota_indices(const char *stub_name) {
|
||||
__ emit_data64(0x2F2E2D2C2B2A2928, relocInfo::none);
|
||||
__ emit_data64(0x3736353433323130, relocInfo::none);
|
||||
__ emit_data64(0x3F3E3D3C3B3A3938, relocInfo::none);
|
||||
|
||||
// W
|
||||
__ emit_data64(0x0003000200010000, relocInfo::none);
|
||||
__ emit_data64(0x0007000600050004, relocInfo::none);
|
||||
__ emit_data64(0x000B000A00090008, relocInfo::none);
|
||||
__ emit_data64(0x000F000E000D000C, relocInfo::none);
|
||||
__ emit_data64(0x0013001200110010, relocInfo::none);
|
||||
__ emit_data64(0x0017001600150014, relocInfo::none);
|
||||
__ emit_data64(0x001B001A00190018, relocInfo::none);
|
||||
__ emit_data64(0x001F001E001D001C, relocInfo::none);
|
||||
// D
|
||||
__ emit_data64(0x0000000100000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000300000002, relocInfo::none);
|
||||
__ emit_data64(0x0000000500000004, relocInfo::none);
|
||||
__ emit_data64(0x0000000700000006, relocInfo::none);
|
||||
__ emit_data64(0x0000000900000008, relocInfo::none);
|
||||
__ emit_data64(0x0000000B0000000A, relocInfo::none);
|
||||
__ emit_data64(0x0000000D0000000C, relocInfo::none);
|
||||
__ emit_data64(0x0000000F0000000E, relocInfo::none);
|
||||
// Q
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000001, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000002, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000003, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000004, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000005, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000006, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000007, relocInfo::none);
|
||||
// D - FP
|
||||
__ emit_data64(0x3F80000000000000, relocInfo::none); // 0.0f, 1.0f
|
||||
__ emit_data64(0x4040000040000000, relocInfo::none); // 2.0f, 3.0f
|
||||
__ emit_data64(0x40A0000040800000, relocInfo::none); // 4.0f, 5.0f
|
||||
__ emit_data64(0x40E0000040C00000, relocInfo::none); // 6.0f, 7.0f
|
||||
__ emit_data64(0x4110000041000000, relocInfo::none); // 8.0f, 9.0f
|
||||
__ emit_data64(0x4130000041200000, relocInfo::none); // 10.0f, 11.0f
|
||||
__ emit_data64(0x4150000041400000, relocInfo::none); // 12.0f, 13.0f
|
||||
__ emit_data64(0x4170000041600000, relocInfo::none); // 14.0f, 15.0f
|
||||
// Q - FP
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none); // 0.0d
|
||||
__ emit_data64(0x3FF0000000000000, relocInfo::none); // 1.0d
|
||||
__ emit_data64(0x4000000000000000, relocInfo::none); // 2.0d
|
||||
__ emit_data64(0x4008000000000000, relocInfo::none); // 3.0d
|
||||
__ emit_data64(0x4010000000000000, relocInfo::none); // 4.0d
|
||||
__ emit_data64(0x4014000000000000, relocInfo::none); // 5.0d
|
||||
__ emit_data64(0x4018000000000000, relocInfo::none); // 6.0d
|
||||
__ emit_data64(0x401c000000000000, relocInfo::none); // 7.0d
|
||||
return start;
|
||||
}
|
||||
|
||||
|
@ -8400,12 +8400,12 @@ instruct vmaskcast_avx(vec dst, vec src) %{
|
||||
//-------------------------------- Load Iota Indices ----------------------------------
|
||||
|
||||
instruct loadIotaIndices(vec dst, immI_0 src) %{
|
||||
predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
|
||||
match(Set dst (VectorLoadConst src));
|
||||
format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
|
||||
ins_encode %{
|
||||
int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ load_iota_indices($dst$$XMMRegister, vlen_in_bytes);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -8417,14 +8417,11 @@ instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
|
||||
format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
assert($src2$$constant == 1, "required");
|
||||
int vlen = Matcher::vector_length(this);
|
||||
int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
BasicType elem_bt = Matcher::vector_element_basic_type(this);
|
||||
__ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
|
||||
__ load_iota_indices($dst$$XMMRegister, vlen);
|
||||
if (elem_bt != T_BYTE) {
|
||||
__ vconvert_b2x(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
}
|
||||
__ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
|
||||
__ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
@ -8436,14 +8433,11 @@ instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
|
||||
format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
assert($src2$$constant == 1, "required");
|
||||
int vlen = Matcher::vector_length(this);
|
||||
int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
BasicType elem_bt = Matcher::vector_element_basic_type(this);
|
||||
__ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
|
||||
__ load_iota_indices($dst$$XMMRegister, vlen);
|
||||
if (elem_bt != T_BYTE) {
|
||||
__ vconvert_b2x(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
}
|
||||
__ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
|
||||
__ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
|
@ -1178,6 +1178,18 @@ class methodHandle;
|
||||
"Ljdk/internal/vm/vector/VectorSupport$CompressExpandOperation;)" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
|
||||
do_name(vector_compress_expand_op_name, "compressExpandOp") \
|
||||
\
|
||||
do_intrinsic(_IndexVector, jdk_internal_vm_vector_VectorSupport, index_vector_op_name, index_vector_op_sig, F_S) \
|
||||
do_signature(index_vector_op_sig, "(Ljava/lang/Class;" \
|
||||
"Ljava/lang/Class;" \
|
||||
"I" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
|
||||
"I" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$IndexOperation;)" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
|
||||
do_name(index_vector_op_name, "indexVector") \
|
||||
\
|
||||
/* (2) Bytecode intrinsics */ \
|
||||
\
|
||||
do_intrinsic(_park, jdk_internal_misc_Unsafe, park_name, park_signature, F_RN) \
|
||||
@ -1286,7 +1298,7 @@ enum class vmIntrinsicID : int {
|
||||
__IGNORE_CLASS, __IGNORE_NAME, __IGNORE_SIGNATURE, __IGNORE_ALIAS)
|
||||
|
||||
ID_LIMIT,
|
||||
LAST_COMPILER_INLINE = _VectorCompressExpand,
|
||||
LAST_COMPILER_INLINE = _IndexVector,
|
||||
FIRST_MH_SIG_POLY = _invokeGeneric,
|
||||
FIRST_MH_STATIC = _linkToVirtual,
|
||||
LAST_MH_SIG_POLY = _linkToNative,
|
||||
|
@ -776,6 +776,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
|
||||
case vmIntrinsics::_VectorInsert:
|
||||
case vmIntrinsics::_VectorExtract:
|
||||
case vmIntrinsics::_VectorMaskOp:
|
||||
case vmIntrinsics::_IndexVector:
|
||||
return EnableVectorSupport;
|
||||
case vmIntrinsics::_blackhole:
|
||||
break;
|
||||
|
@ -716,6 +716,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
||||
return inline_vector_extract();
|
||||
case vmIntrinsics::_VectorCompressExpand:
|
||||
return inline_vector_compress_expand();
|
||||
case vmIntrinsics::_IndexVector:
|
||||
return inline_index_vector();
|
||||
|
||||
case vmIntrinsics::_getObjectSize:
|
||||
return inline_getObjectSize();
|
||||
|
@ -348,6 +348,7 @@ class LibraryCallKit : public GraphKit {
|
||||
bool inline_vector_extract();
|
||||
bool inline_vector_insert();
|
||||
bool inline_vector_compress_expand();
|
||||
bool inline_index_vector();
|
||||
|
||||
Node* gen_call_to_svml(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2);
|
||||
|
||||
|
@ -2844,3 +2844,155 @@ bool LibraryCallKit::inline_vector_compress_expand() {
|
||||
C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
|
||||
return true;
|
||||
}
|
||||
|
||||
// public static
|
||||
// <V extends Vector<E>,
|
||||
// E,
|
||||
// S extends VectorSpecies<E>>
|
||||
// V indexVector(Class<? extends V> vClass, Class<E> eClass,
|
||||
// int length,
|
||||
// V v, int step, S s,
|
||||
// IndexOperation<V, S> defaultImpl)
|
||||
bool LibraryCallKit::inline_index_vector() {
|
||||
const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr();
|
||||
const TypeInstPtr* elem_klass = gvn().type(argument(1))->isa_instptr();
|
||||
const TypeInt* vlen = gvn().type(argument(2))->isa_int();
|
||||
|
||||
if (vector_klass == NULL || elem_klass == NULL || vlen == NULL ||
|
||||
vector_klass->const_oop() == NULL || !vlen->is_con() ||
|
||||
elem_klass->const_oop() == NULL) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** missing constant: vclass=%s etype=%s vlen=%s",
|
||||
NodeClassNames[argument(0)->Opcode()],
|
||||
NodeClassNames[argument(1)->Opcode()],
|
||||
NodeClassNames[argument(2)->Opcode()]);
|
||||
}
|
||||
return false; // not enough info for intrinsification
|
||||
}
|
||||
|
||||
if (!is_klass_initialized(vector_klass)) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** klass argument not initialized");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
|
||||
if (!elem_type->is_primitive_type()) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** not a primitive bt=%d", elem_type->basic_type());
|
||||
}
|
||||
return false; // should be primitive type
|
||||
}
|
||||
|
||||
int num_elem = vlen->get_con();
|
||||
BasicType elem_bt = elem_type->basic_type();
|
||||
|
||||
// Check whether the iota index generation op is supported by the current hardware
|
||||
if (!arch_supports_vector(Op_VectorLoadConst, num_elem, elem_bt, VecMaskNotUsed)) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** not supported: vlen=%d etype=%s", num_elem, type2name(elem_bt));
|
||||
}
|
||||
return false; // not supported
|
||||
}
|
||||
|
||||
int mul_op = VectorSupport::vop2ideal(VectorSupport::VECTOR_OP_MUL, elem_bt);
|
||||
int vmul_op = VectorNode::opcode(mul_op, elem_bt);
|
||||
bool needs_mul = true;
|
||||
Node* scale = argument(4);
|
||||
const TypeInt* scale_type = gvn().type(scale)->isa_int();
|
||||
// Multiply is not needed if the scale is a constant "1".
|
||||
if (scale_type && scale_type->is_con() && scale_type->get_con() == 1) {
|
||||
needs_mul = false;
|
||||
} else {
|
||||
// Check whether the vector multiply op is supported by the current hardware
|
||||
if (!arch_supports_vector(vmul_op, num_elem, elem_bt, VecMaskNotUsed)) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** not supported: vlen=%d etype=%s", num_elem, type2name(elem_bt));
|
||||
}
|
||||
return false; // not supported
|
||||
}
|
||||
|
||||
// Check whether the scalar cast op is supported by the current hardware
|
||||
if (is_floating_point_type(elem_bt) || elem_bt == T_LONG) {
|
||||
int cast_op = elem_bt == T_LONG ? Op_ConvI2L :
|
||||
elem_bt == T_FLOAT? Op_ConvI2F : Op_ConvI2D;
|
||||
if (!Matcher::match_rule_supported(cast_op)) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** Rejected op (%s) because architecture does not support it",
|
||||
NodeClassNames[cast_op]);
|
||||
}
|
||||
return false; // not supported
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
|
||||
const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
|
||||
Node* opd = unbox_vector(argument(3), vbox_type, elem_bt, num_elem);
|
||||
if (opd == NULL) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** unbox failed vector=%s",
|
||||
NodeClassNames[argument(3)->Opcode()]);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int add_op = VectorSupport::vop2ideal(VectorSupport::VECTOR_OP_ADD, elem_bt);
|
||||
int vadd_op = VectorNode::opcode(add_op, elem_bt);
|
||||
bool needs_add = true;
|
||||
// The addition is not needed if all the element values of "opd" are zero
|
||||
if (VectorNode::is_all_zeros_vector(opd)) {
|
||||
needs_add = false;
|
||||
} else {
|
||||
// Check whether the vector addition op is supported by the current hardware
|
||||
if (!arch_supports_vector(vadd_op, num_elem, elem_bt, VecMaskNotUsed)) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** not supported: vlen=%d etype=%s", num_elem, type2name(elem_bt));
|
||||
}
|
||||
return false; // not supported
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the iota indice vector
|
||||
const TypeVect* vt = TypeVect::make(elem_bt, num_elem);
|
||||
Node* index = gvn().transform(new VectorLoadConstNode(gvn().makecon(TypeInt::ZERO), vt));
|
||||
|
||||
// Broadcast the "scale" to a vector, and multiply the "scale" with iota indice vector.
|
||||
if (needs_mul) {
|
||||
switch (elem_bt) {
|
||||
case T_BOOLEAN: // fall-through
|
||||
case T_BYTE: // fall-through
|
||||
case T_SHORT: // fall-through
|
||||
case T_CHAR: // fall-through
|
||||
case T_INT: {
|
||||
// no conversion needed
|
||||
break;
|
||||
}
|
||||
case T_LONG: {
|
||||
scale = gvn().transform(new ConvI2LNode(scale));
|
||||
break;
|
||||
}
|
||||
case T_FLOAT: {
|
||||
scale = gvn().transform(new ConvI2FNode(scale));
|
||||
break;
|
||||
}
|
||||
case T_DOUBLE: {
|
||||
scale = gvn().transform(new ConvI2DNode(scale));
|
||||
break;
|
||||
}
|
||||
default: fatal("%s", type2name(elem_bt));
|
||||
}
|
||||
scale = gvn().transform(VectorNode::scalar2vector(scale, num_elem, Type::get_const_basic_type(elem_bt)));
|
||||
index = gvn().transform(VectorNode::make(vmul_op, index, scale, vt));
|
||||
}
|
||||
|
||||
// Add "opd" if addition is needed.
|
||||
if (needs_add) {
|
||||
index = gvn().transform(VectorNode::make(vadd_op, opd, index, vt));
|
||||
}
|
||||
Node* vbox = box_vector(index, vbox_type, elem_bt, num_elem);
|
||||
set_result(vbox);
|
||||
C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
|
||||
return true;
|
||||
}
|
||||
|
@ -240,7 +240,7 @@ public class VectorSupport {
|
||||
V index(V v, int step, S s);
|
||||
}
|
||||
|
||||
//FIXME @IntrinsicCandidate
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<V extends Vector<E>,
|
||||
E,
|
||||
|
@ -0,0 +1,105 @@
|
||||
//
|
||||
// Copyright (c) 2022, Arm Limited. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License version 2 only, as
|
||||
// published by the Free Software Foundation.
|
||||
//
|
||||
// This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
// version 2 for more details (a copy is included in the LICENSE file that
|
||||
// accompanied this code).
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License version
|
||||
// 2 along with this work; if not, write to the Free Software Foundation,
|
||||
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
//
|
||||
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
// or visit www.oracle.com if you need additional information or have any
|
||||
// questions.
|
||||
//
|
||||
//
|
||||
package org.openjdk.bench.jdk.incubator.vector;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import jdk.incubator.vector.*;
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||
@State(Scope.Benchmark)
|
||||
@Warmup(iterations = 3, time = 1)
|
||||
@Measurement(iterations = 5, time = 1)
|
||||
@Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
|
||||
public class IndexVectorBenchmark {
|
||||
@Param({"1024"})
|
||||
private int size;
|
||||
|
||||
private byte[] ba;
|
||||
private short[] sa;
|
||||
private int[] ia;
|
||||
private long[] la;
|
||||
private float[] fa;
|
||||
private double[] da;
|
||||
|
||||
private static final VectorSpecies<Byte> bspecies = VectorSpecies.ofLargestShape(byte.class);
|
||||
private static final VectorSpecies<Short> sspecies = VectorSpecies.ofLargestShape(short.class);
|
||||
private static final VectorSpecies<Integer> ispecies = VectorSpecies.ofLargestShape(int.class);
|
||||
private static final VectorSpecies<Long> lspecies = VectorSpecies.ofLargestShape(long.class);
|
||||
private static final VectorSpecies<Float> fspecies = VectorSpecies.ofLargestShape(float.class);
|
||||
private static final VectorSpecies<Double> dspecies = VectorSpecies.ofLargestShape(double.class);
|
||||
|
||||
@Setup(Level.Trial)
|
||||
public void Setup() {
|
||||
ba = new byte[size];
|
||||
sa = new short[size];
|
||||
ia = new int[size];
|
||||
la = new long[size];
|
||||
fa = new float[size];
|
||||
da = new double[size];
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void byteIndexVector() {
|
||||
for (int i = 0; i < size; i += bspecies.length()) {
|
||||
((ByteVector) bspecies.broadcast(0).addIndex(i % 2)).intoArray(ba, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void shortIndexVector() {
|
||||
for (int i = 0; i < size; i += sspecies.length()) {
|
||||
((ShortVector) sspecies.broadcast(0).addIndex(i % 5)).intoArray(sa, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void intIndexVector() {
|
||||
for (int i = 0; i < size; i += ispecies.length()) {
|
||||
((IntVector) ispecies.broadcast(0).addIndex(i % 5)).intoArray(ia, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void longIndexVector() {
|
||||
for (int i = 0; i < size; i += lspecies.length()) {
|
||||
((LongVector) lspecies.broadcast(0).addIndex(i % 5)).intoArray(la, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void floatIndexVector() {
|
||||
for (int i = 0; i < size; i += fspecies.length()) {
|
||||
((FloatVector) fspecies.broadcast(0).addIndex(i % 5)).intoArray(fa, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void doubleIndexVector() {
|
||||
for (int i = 0; i < size; i += dspecies.length()) {
|
||||
((DoubleVector) dspecies.broadcast(0).addIndex(i % 5)).intoArray(da, i);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user