8293409: [vectorapi] Intrinsify VectorSupport.indexVector

Reviewed-by: eliu, jbhateja
This commit is contained in:
Xiaohong Gong 2022-10-19 09:24:05 +00:00
parent 3f3d63d02a
commit 857b0f9b05
14 changed files with 382 additions and 30 deletions

View File

@ -457,22 +457,31 @@ instruct storeV_masked(vReg src, vmemA mem, pRegGov pg) %{
// vector load const
instruct vloadconB(vReg dst, immI0 src) %{
predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
instruct vloadcon(vReg dst, immI0 src) %{
match(Set dst (VectorLoadConst src));
format %{ "vloadconB $dst, $src\t# load/generate iota indices" %}
format %{ "vloadcon $dst, $src\t# load/generate iota indices" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
if (UseSVE == 0) {
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
assert(length_in_bytes <= 16, "must be");
__ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
// The iota indices are ordered by type B/S/I/L/F/D, and the offset between two types is 16.
int offset = exact_log2(type2aelembytes(bt)) << 4;
if (is_floating_point_type(bt)) {
offset += 32;
}
__ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices() + offset));
if (length_in_bytes == 16) {
__ ldrq($dst$$FloatRegister, rscratch1);
} else {
__ ldrd($dst$$FloatRegister, rscratch1);
}
} else {
__ sve_index($dst$$FloatRegister, __ B, 0, 1);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_index($dst$$FloatRegister, size, 0, 1);
if (is_floating_point_type(bt)) {
__ sve_scvtf($dst$$FloatRegister, size, ptrue, $dst$$FloatRegister, size);
}
}
%}
ins_pipe(pipe_slow);

View File

@ -396,22 +396,31 @@ instruct storeV_masked(vReg src, vmemA mem, pRegGov pg) %{
// vector load const
instruct vloadconB(vReg dst, immI0 src) %{
predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
instruct vloadcon(vReg dst, immI0 src) %{
match(Set dst (VectorLoadConst src));
format %{ "vloadconB $dst, $src\t# load/generate iota indices" %}
format %{ "vloadcon $dst, $src\t# load/generate iota indices" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
if (UseSVE == 0) {
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
assert(length_in_bytes <= 16, "must be");
__ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
// The iota indices are ordered by type B/S/I/L/F/D, and the offset between two types is 16.
int offset = exact_log2(type2aelembytes(bt)) << 4;
if (is_floating_point_type(bt)) {
offset += 32;
}
__ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices() + offset));
if (length_in_bytes == 16) {
__ ldrq($dst$$FloatRegister, rscratch1);
} else {
__ ldrd($dst$$FloatRegister, rscratch1);
}
} else {
__ sve_index($dst$$FloatRegister, __ B, 0, 1);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_index($dst$$FloatRegister, size, 0, 1);
if (is_floating_point_type(bt)) {
__ sve_scvtf($dst$$FloatRegister, size, ptrue, $dst$$FloatRegister, size);
}
}
%}
ins_pipe(pipe_slow);

View File

@ -630,8 +630,24 @@ class StubGenerator: public StubCodeGenerator {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
// B
__ emit_data64(0x0706050403020100, relocInfo::none);
__ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
// H
__ emit_data64(0x0003000200010000, relocInfo::none);
__ emit_data64(0x0007000600050004, relocInfo::none);
// S
__ emit_data64(0x0000000100000000, relocInfo::none);
__ emit_data64(0x0000000300000002, relocInfo::none);
// D
__ emit_data64(0x0000000000000000, relocInfo::none);
__ emit_data64(0x0000000000000001, relocInfo::none);
// S - FP
__ emit_data64(0x3F80000000000000, relocInfo::none); // 0.0f, 1.0f
__ emit_data64(0x4040000040000000, relocInfo::none); // 2.0f, 3.0f
// D - FP
__ emit_data64(0x0000000000000000, relocInfo::none); // 0.0d
__ emit_data64(0x3FF0000000000000, relocInfo::none); // 1.0d
return start;
}
@ -7846,7 +7862,9 @@ class StubGenerator: public StubCodeGenerator {
SharedRuntime::
throw_NullPointerException_at_call));
StubRoutines::aarch64::_vector_iota_indices = generate_iota_indices("iota_indices");
if (UseSVE == 0) {
StubRoutines::aarch64::_vector_iota_indices = generate_iota_indices("iota_indices");
}
// arraycopy stubs used by compilers
generate_arraycopy_stubs();

View File

@ -1691,8 +1691,13 @@ void C2_MacroAssembler::load_constant_vector(BasicType bt, XMMRegister dst, Inte
}
}
void C2_MacroAssembler::load_iota_indices(XMMRegister dst, int vlen_in_bytes) {
ExternalAddress addr(StubRoutines::x86::vector_iota_indices());
void C2_MacroAssembler::load_iota_indices(XMMRegister dst, int vlen_in_bytes, BasicType bt) {
// The iota indices are ordered by type B/S/I/L/F/D, and the offset between two types is 64.
int offset = exact_log2(type2aelembytes(bt)) << 6;
if (is_floating_point_type(bt)) {
offset += 128;
}
ExternalAddress addr(StubRoutines::x86::vector_iota_indices() + offset);
if (vlen_in_bytes <= 4) {
movdl(dst, addr);
} else if (vlen_in_bytes == 8) {

View File

@ -159,7 +159,7 @@ public:
void load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, bool novlbwdq, int vlen_enc);
void load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen);
void load_iota_indices(XMMRegister dst, int vlen_in_bytes);
void load_iota_indices(XMMRegister dst, int vlen_in_bytes, BasicType bt);
// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.

View File

@ -811,7 +811,7 @@ address StubGenerator::generate_iota_indices(const char *stub_name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
// B
__ emit_data64(0x0706050403020100, relocInfo::none);
__ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
__ emit_data64(0x1716151413121110, relocInfo::none);
@ -820,7 +820,51 @@ address StubGenerator::generate_iota_indices(const char *stub_name) {
__ emit_data64(0x2F2E2D2C2B2A2928, relocInfo::none);
__ emit_data64(0x3736353433323130, relocInfo::none);
__ emit_data64(0x3F3E3D3C3B3A3938, relocInfo::none);
// W
__ emit_data64(0x0003000200010000, relocInfo::none);
__ emit_data64(0x0007000600050004, relocInfo::none);
__ emit_data64(0x000B000A00090008, relocInfo::none);
__ emit_data64(0x000F000E000D000C, relocInfo::none);
__ emit_data64(0x0013001200110010, relocInfo::none);
__ emit_data64(0x0017001600150014, relocInfo::none);
__ emit_data64(0x001B001A00190018, relocInfo::none);
__ emit_data64(0x001F001E001D001C, relocInfo::none);
// D
__ emit_data64(0x0000000100000000, relocInfo::none);
__ emit_data64(0x0000000300000002, relocInfo::none);
__ emit_data64(0x0000000500000004, relocInfo::none);
__ emit_data64(0x0000000700000006, relocInfo::none);
__ emit_data64(0x0000000900000008, relocInfo::none);
__ emit_data64(0x0000000B0000000A, relocInfo::none);
__ emit_data64(0x0000000D0000000C, relocInfo::none);
__ emit_data64(0x0000000F0000000E, relocInfo::none);
// Q
__ emit_data64(0x0000000000000000, relocInfo::none);
__ emit_data64(0x0000000000000001, relocInfo::none);
__ emit_data64(0x0000000000000002, relocInfo::none);
__ emit_data64(0x0000000000000003, relocInfo::none);
__ emit_data64(0x0000000000000004, relocInfo::none);
__ emit_data64(0x0000000000000005, relocInfo::none);
__ emit_data64(0x0000000000000006, relocInfo::none);
__ emit_data64(0x0000000000000007, relocInfo::none);
// D - FP
__ emit_data64(0x3F80000000000000, relocInfo::none); // 0.0f, 1.0f
__ emit_data64(0x4040000040000000, relocInfo::none); // 2.0f, 3.0f
__ emit_data64(0x40A0000040800000, relocInfo::none); // 4.0f, 5.0f
__ emit_data64(0x40E0000040C00000, relocInfo::none); // 6.0f, 7.0f
__ emit_data64(0x4110000041000000, relocInfo::none); // 8.0f, 9.0f
__ emit_data64(0x4130000041200000, relocInfo::none); // 10.0f, 11.0f
__ emit_data64(0x4150000041400000, relocInfo::none); // 12.0f, 13.0f
__ emit_data64(0x4170000041600000, relocInfo::none); // 14.0f, 15.0f
// Q - FP
__ emit_data64(0x0000000000000000, relocInfo::none); // 0.0d
__ emit_data64(0x3FF0000000000000, relocInfo::none); // 1.0d
__ emit_data64(0x4000000000000000, relocInfo::none); // 2.0d
__ emit_data64(0x4008000000000000, relocInfo::none); // 3.0d
__ emit_data64(0x4010000000000000, relocInfo::none); // 4.0d
__ emit_data64(0x4014000000000000, relocInfo::none); // 5.0d
__ emit_data64(0x4018000000000000, relocInfo::none); // 6.0d
__ emit_data64(0x401c000000000000, relocInfo::none); // 7.0d
return start;
}

View File

@ -8400,12 +8400,12 @@ instruct vmaskcast_avx(vec dst, vec src) %{
//-------------------------------- Load Iota Indices ----------------------------------
instruct loadIotaIndices(vec dst, immI_0 src) %{
predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
match(Set dst (VectorLoadConst src));
format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
ins_encode %{
int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
__ load_iota_indices($dst$$XMMRegister, vlen_in_bytes);
BasicType bt = Matcher::vector_element_basic_type(this);
__ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
%}
ins_pipe( pipe_slow );
%}
@ -8417,14 +8417,11 @@ instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
ins_encode %{
assert($src2$$constant == 1, "required");
int vlen = Matcher::vector_length(this);
int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
int vlen_enc = vector_length_encoding(this);
BasicType elem_bt = Matcher::vector_element_basic_type(this);
__ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
__ load_iota_indices($dst$$XMMRegister, vlen);
if (elem_bt != T_BYTE) {
__ vconvert_b2x(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
}
__ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
__ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
@ -8436,14 +8433,11 @@ instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
ins_encode %{
assert($src2$$constant == 1, "required");
int vlen = Matcher::vector_length(this);
int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
int vlen_enc = vector_length_encoding(this);
BasicType elem_bt = Matcher::vector_element_basic_type(this);
__ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
__ load_iota_indices($dst$$XMMRegister, vlen);
if (elem_bt != T_BYTE) {
__ vconvert_b2x(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
}
__ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
__ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );

View File

@ -1178,6 +1178,18 @@ class methodHandle;
"Ljdk/internal/vm/vector/VectorSupport$CompressExpandOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
do_name(vector_compress_expand_op_name, "compressExpandOp") \
\
do_intrinsic(_IndexVector, jdk_internal_vm_vector_VectorSupport, index_vector_op_name, index_vector_op_sig, F_S) \
do_signature(index_vector_op_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"Ljdk/internal/vm/vector/VectorSupport$IndexOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(index_vector_op_name, "indexVector") \
\
/* (2) Bytecode intrinsics */ \
\
do_intrinsic(_park, jdk_internal_misc_Unsafe, park_name, park_signature, F_RN) \
@ -1286,7 +1298,7 @@ enum class vmIntrinsicID : int {
__IGNORE_CLASS, __IGNORE_NAME, __IGNORE_SIGNATURE, __IGNORE_ALIAS)
ID_LIMIT,
LAST_COMPILER_INLINE = _VectorCompressExpand,
LAST_COMPILER_INLINE = _IndexVector,
FIRST_MH_SIG_POLY = _invokeGeneric,
FIRST_MH_STATIC = _linkToVirtual,
LAST_MH_SIG_POLY = _linkToNative,

View File

@ -776,6 +776,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
case vmIntrinsics::_VectorInsert:
case vmIntrinsics::_VectorExtract:
case vmIntrinsics::_VectorMaskOp:
case vmIntrinsics::_IndexVector:
return EnableVectorSupport;
case vmIntrinsics::_blackhole:
break;

View File

@ -716,6 +716,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
return inline_vector_extract();
case vmIntrinsics::_VectorCompressExpand:
return inline_vector_compress_expand();
case vmIntrinsics::_IndexVector:
return inline_index_vector();
case vmIntrinsics::_getObjectSize:
return inline_getObjectSize();

View File

@ -348,6 +348,7 @@ class LibraryCallKit : public GraphKit {
bool inline_vector_extract();
bool inline_vector_insert();
bool inline_vector_compress_expand();
bool inline_index_vector();
Node* gen_call_to_svml(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2);

View File

@ -2844,3 +2844,155 @@ bool LibraryCallKit::inline_vector_compress_expand() {
C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
return true;
}
// public static
// <V extends Vector<E>,
// E,
// S extends VectorSpecies<E>>
// V indexVector(Class<? extends V> vClass, Class<E> eClass,
// int length,
// V v, int step, S s,
// IndexOperation<V, S> defaultImpl)
bool LibraryCallKit::inline_index_vector() {
const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr();
const TypeInstPtr* elem_klass = gvn().type(argument(1))->isa_instptr();
const TypeInt* vlen = gvn().type(argument(2))->isa_int();
if (vector_klass == NULL || elem_klass == NULL || vlen == NULL ||
vector_klass->const_oop() == NULL || !vlen->is_con() ||
elem_klass->const_oop() == NULL) {
if (C->print_intrinsics()) {
tty->print_cr(" ** missing constant: vclass=%s etype=%s vlen=%s",
NodeClassNames[argument(0)->Opcode()],
NodeClassNames[argument(1)->Opcode()],
NodeClassNames[argument(2)->Opcode()]);
}
return false; // not enough info for intrinsification
}
if (!is_klass_initialized(vector_klass)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** klass argument not initialized");
}
return false;
}
ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
if (!elem_type->is_primitive_type()) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not a primitive bt=%d", elem_type->basic_type());
}
return false; // should be primitive type
}
int num_elem = vlen->get_con();
BasicType elem_bt = elem_type->basic_type();
// Check whether the iota index generation op is supported by the current hardware
if (!arch_supports_vector(Op_VectorLoadConst, num_elem, elem_bt, VecMaskNotUsed)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not supported: vlen=%d etype=%s", num_elem, type2name(elem_bt));
}
return false; // not supported
}
int mul_op = VectorSupport::vop2ideal(VectorSupport::VECTOR_OP_MUL, elem_bt);
int vmul_op = VectorNode::opcode(mul_op, elem_bt);
bool needs_mul = true;
Node* scale = argument(4);
const TypeInt* scale_type = gvn().type(scale)->isa_int();
// Multiply is not needed if the scale is a constant "1".
if (scale_type && scale_type->is_con() && scale_type->get_con() == 1) {
needs_mul = false;
} else {
// Check whether the vector multiply op is supported by the current hardware
if (!arch_supports_vector(vmul_op, num_elem, elem_bt, VecMaskNotUsed)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not supported: vlen=%d etype=%s", num_elem, type2name(elem_bt));
}
return false; // not supported
}
// Check whether the scalar cast op is supported by the current hardware
if (is_floating_point_type(elem_bt) || elem_bt == T_LONG) {
int cast_op = elem_bt == T_LONG ? Op_ConvI2L :
elem_bt == T_FLOAT? Op_ConvI2F : Op_ConvI2D;
if (!Matcher::match_rule_supported(cast_op)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** Rejected op (%s) because architecture does not support it",
NodeClassNames[cast_op]);
}
return false; // not supported
}
}
}
ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
Node* opd = unbox_vector(argument(3), vbox_type, elem_bt, num_elem);
if (opd == NULL) {
if (C->print_intrinsics()) {
tty->print_cr(" ** unbox failed vector=%s",
NodeClassNames[argument(3)->Opcode()]);
}
return false;
}
int add_op = VectorSupport::vop2ideal(VectorSupport::VECTOR_OP_ADD, elem_bt);
int vadd_op = VectorNode::opcode(add_op, elem_bt);
bool needs_add = true;
// The addition is not needed if all the element values of "opd" are zero
if (VectorNode::is_all_zeros_vector(opd)) {
needs_add = false;
} else {
// Check whether the vector addition op is supported by the current hardware
if (!arch_supports_vector(vadd_op, num_elem, elem_bt, VecMaskNotUsed)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not supported: vlen=%d etype=%s", num_elem, type2name(elem_bt));
}
return false; // not supported
}
}
// Compute the iota indice vector
const TypeVect* vt = TypeVect::make(elem_bt, num_elem);
Node* index = gvn().transform(new VectorLoadConstNode(gvn().makecon(TypeInt::ZERO), vt));
// Broadcast the "scale" to a vector, and multiply the "scale" with iota indice vector.
if (needs_mul) {
switch (elem_bt) {
case T_BOOLEAN: // fall-through
case T_BYTE: // fall-through
case T_SHORT: // fall-through
case T_CHAR: // fall-through
case T_INT: {
// no conversion needed
break;
}
case T_LONG: {
scale = gvn().transform(new ConvI2LNode(scale));
break;
}
case T_FLOAT: {
scale = gvn().transform(new ConvI2FNode(scale));
break;
}
case T_DOUBLE: {
scale = gvn().transform(new ConvI2DNode(scale));
break;
}
default: fatal("%s", type2name(elem_bt));
}
scale = gvn().transform(VectorNode::scalar2vector(scale, num_elem, Type::get_const_basic_type(elem_bt)));
index = gvn().transform(VectorNode::make(vmul_op, index, scale, vt));
}
// Add "opd" if addition is needed.
if (needs_add) {
index = gvn().transform(VectorNode::make(vadd_op, opd, index, vt));
}
Node* vbox = box_vector(index, vbox_type, elem_bt, num_elem);
set_result(vbox);
C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
return true;
}

View File

@ -240,7 +240,7 @@ public class VectorSupport {
V index(V v, int step, S s);
}
//FIXME @IntrinsicCandidate
@IntrinsicCandidate
public static
<V extends Vector<E>,
E,

View File

@ -0,0 +1,105 @@
//
// Copyright (c) 2022, Arm Limited. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
//
// This code is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// version 2 for more details (a copy is included in the LICENSE file that
// accompanied this code).
//
// You should have received a copy of the GNU General Public License version
// 2 along with this work; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
// or visit www.oracle.com if you need additional information or have any
// questions.
//
//
package org.openjdk.bench.jdk.incubator.vector;
import java.util.concurrent.TimeUnit;
import jdk.incubator.vector.*;
import org.openjdk.jmh.annotations.*;
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Benchmark)
@Warmup(iterations = 3, time = 1)
@Measurement(iterations = 5, time = 1)
@Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
public class IndexVectorBenchmark {
@Param({"1024"})
private int size;
private byte[] ba;
private short[] sa;
private int[] ia;
private long[] la;
private float[] fa;
private double[] da;
private static final VectorSpecies<Byte> bspecies = VectorSpecies.ofLargestShape(byte.class);
private static final VectorSpecies<Short> sspecies = VectorSpecies.ofLargestShape(short.class);
private static final VectorSpecies<Integer> ispecies = VectorSpecies.ofLargestShape(int.class);
private static final VectorSpecies<Long> lspecies = VectorSpecies.ofLargestShape(long.class);
private static final VectorSpecies<Float> fspecies = VectorSpecies.ofLargestShape(float.class);
private static final VectorSpecies<Double> dspecies = VectorSpecies.ofLargestShape(double.class);
@Setup(Level.Trial)
public void Setup() {
ba = new byte[size];
sa = new short[size];
ia = new int[size];
la = new long[size];
fa = new float[size];
da = new double[size];
}
@Benchmark
public void byteIndexVector() {
for (int i = 0; i < size; i += bspecies.length()) {
((ByteVector) bspecies.broadcast(0).addIndex(i % 2)).intoArray(ba, i);
}
}
@Benchmark
public void shortIndexVector() {
for (int i = 0; i < size; i += sspecies.length()) {
((ShortVector) sspecies.broadcast(0).addIndex(i % 5)).intoArray(sa, i);
}
}
@Benchmark
public void intIndexVector() {
for (int i = 0; i < size; i += ispecies.length()) {
((IntVector) ispecies.broadcast(0).addIndex(i % 5)).intoArray(ia, i);
}
}
@Benchmark
public void longIndexVector() {
for (int i = 0; i < size; i += lspecies.length()) {
((LongVector) lspecies.broadcast(0).addIndex(i % 5)).intoArray(la, i);
}
}
@Benchmark
public void floatIndexVector() {
for (int i = 0; i < size; i += fspecies.length()) {
((FloatVector) fspecies.broadcast(0).addIndex(i % 5)).intoArray(fa, i);
}
}
@Benchmark
public void doubleIndexVector() {
for (int i = 0; i < size; i += dspecies.length()) {
((DoubleVector) dspecies.broadcast(0).addIndex(i % 5)).intoArray(da, i);
}
}
}