8289186: Support predicated vector load/store operations over X86 AVX2 targets.
Reviewed-by: xgong, kvn
This commit is contained in:
parent
3c08e6b311
commit
81ee7d28f8
@ -3038,6 +3038,60 @@ void Assembler::vmovdqu(Address dst, XMMRegister src) {
|
||||
emit_operand(src, dst);
|
||||
}
|
||||
|
||||
void Assembler::vpmaskmovd(XMMRegister dst, XMMRegister mask, Address src, int vector_len) {
|
||||
assert((VM_Version::supports_avx2() && vector_len == AVX_256bit), "");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0x8C);
|
||||
emit_operand(dst, src);
|
||||
}
|
||||
|
||||
void Assembler::vpmaskmovq(XMMRegister dst, XMMRegister mask, Address src, int vector_len) {
|
||||
assert((VM_Version::supports_avx2() && vector_len == AVX_256bit), "");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0x8C);
|
||||
emit_operand(dst, src);
|
||||
}
|
||||
|
||||
void Assembler::vmaskmovps(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x2C);
|
||||
emit_operand(dst, src);
|
||||
}
|
||||
|
||||
void Assembler::vmaskmovpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x2D);
|
||||
emit_operand(dst, src);
|
||||
}
|
||||
|
||||
void Assembler::vmaskmovps(Address dst, XMMRegister src, XMMRegister mask, int vector_len) {
|
||||
assert(UseAVX > 0, "");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
vex_prefix(dst, mask->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x2E);
|
||||
emit_operand(src, dst);
|
||||
}
|
||||
|
||||
void Assembler::vmaskmovpd(Address dst, XMMRegister src, XMMRegister mask, int vector_len) {
|
||||
assert(UseAVX > 0, "");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
vex_prefix(dst, mask->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x2F);
|
||||
emit_operand(src, dst);
|
||||
}
|
||||
|
||||
// Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
|
||||
void Assembler::evmovdqub(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_avx512vlbw(), "");
|
||||
@ -4394,14 +4448,6 @@ void Assembler::vmovmskpd(Register dst, XMMRegister src, int vec_enc) {
|
||||
emit_int16(0x50, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpmaskmovd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
|
||||
assert((VM_Version::supports_avx2() && vector_len == AVX_256bit), "");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0x8C);
|
||||
emit_operand(dst, src);
|
||||
}
|
||||
|
||||
void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
|
||||
assert(VM_Version::supports_sse4_1(), "");
|
||||
|
@ -1804,6 +1804,13 @@ private:
|
||||
void vmovmskps(Register dst, XMMRegister src, int vec_enc);
|
||||
void vmovmskpd(Register dst, XMMRegister src, int vec_enc);
|
||||
void vpmaskmovd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
void vpmaskmovq(XMMRegister dst, XMMRegister mask, Address src, int vector_len);
|
||||
|
||||
|
||||
void vmaskmovps(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
|
||||
void vmaskmovpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
|
||||
void vmaskmovps(Address dst, XMMRegister src, XMMRegister mask, int vector_len);
|
||||
void vmaskmovpd(Address dst, XMMRegister src, XMMRegister mask, int vector_len);
|
||||
|
||||
// SSE 4.1 extract
|
||||
void pextrd(Register dst, XMMRegister src, int imm8);
|
||||
|
@ -2025,6 +2025,39 @@ void C2_MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, X
|
||||
MacroAssembler::evmovdqu(type, kmask, dst, src, merge, vector_len);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vmovmask(BasicType elem_bt, XMMRegister dst, Address src, XMMRegister mask,
|
||||
int vec_enc) {
|
||||
switch(elem_bt) {
|
||||
case T_INT:
|
||||
case T_FLOAT:
|
||||
vmaskmovps(dst, src, mask, vec_enc);
|
||||
break;
|
||||
case T_LONG:
|
||||
case T_DOUBLE:
|
||||
vmaskmovpd(dst, src, mask, vec_enc);
|
||||
break;
|
||||
default:
|
||||
fatal("Unsupported type %s", type2name(elem_bt));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vmovmask(BasicType elem_bt, Address dst, XMMRegister src, XMMRegister mask,
|
||||
int vec_enc) {
|
||||
switch(elem_bt) {
|
||||
case T_INT:
|
||||
case T_FLOAT:
|
||||
vmaskmovps(dst, src, mask, vec_enc);
|
||||
break;
|
||||
case T_LONG:
|
||||
case T_DOUBLE:
|
||||
vmaskmovpd(dst, src, mask, vec_enc);
|
||||
break;
|
||||
default:
|
||||
fatal("Unsupported type %s", type2name(elem_bt));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
|
||||
XMMRegister dst, XMMRegister src,
|
||||
|
@ -442,4 +442,9 @@ public:
|
||||
|
||||
void vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
|
||||
KRegister ktmp1, int vec_enc);
|
||||
|
||||
void vmovmask(BasicType elem_bt, XMMRegister dst, Address src, XMMRegister mask, int vec_enc);
|
||||
|
||||
void vmovmask(BasicType elem_bt, Address dst, XMMRegister src, XMMRegister mask, int vec_enc);
|
||||
|
||||
#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP
|
||||
|
@ -1589,8 +1589,6 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
|
||||
case Op_VectorCmpMasked:
|
||||
case Op_VectorMaskGen:
|
||||
case Op_LoadVectorMasked:
|
||||
case Op_StoreVectorMasked:
|
||||
if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) {
|
||||
return false;
|
||||
}
|
||||
@ -1753,8 +1751,6 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
||||
case Op_ClearArray:
|
||||
case Op_VectorMaskGen:
|
||||
case Op_VectorCmpMasked:
|
||||
case Op_LoadVectorMasked:
|
||||
case Op_StoreVectorMasked:
|
||||
if (!is_LP64 || !VM_Version::supports_avx512bw()) {
|
||||
return false;
|
||||
}
|
||||
@ -1762,6 +1758,12 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_LoadVectorMasked:
|
||||
case Op_StoreVectorMasked:
|
||||
if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_CMoveVD:
|
||||
if (vlen != 4) {
|
||||
return false; // implementation limitation (only vcmov4D_reg is present)
|
||||
@ -9082,9 +9084,59 @@ instruct vprorate(vec dst, vec src, vec shift) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
#ifdef _LP64
|
||||
// ---------------------------------- Masked Operations ------------------------------------
|
||||
instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
|
||||
predicate(!n->in(3)->bottom_type()->isa_vectmask());
|
||||
match(Set dst (LoadVectorMasked mem mask));
|
||||
format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
|
||||
ins_encode %{
|
||||
BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
__ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
|
||||
instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
|
||||
predicate(n->in(3)->bottom_type()->isa_vectmask());
|
||||
match(Set dst (LoadVectorMasked mem mask));
|
||||
format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
|
||||
ins_encode %{
|
||||
BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
|
||||
int vector_len = vector_length_encoding(this);
|
||||
__ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
|
||||
predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
|
||||
match(Set mem (StoreVectorMasked mem (Binary src mask)));
|
||||
format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
|
||||
ins_encode %{
|
||||
const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
|
||||
int vlen_enc = vector_length_encoding(src_node);
|
||||
BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
|
||||
__ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
|
||||
predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
|
||||
match(Set mem (StoreVectorMasked mem (Binary src mask)));
|
||||
format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
|
||||
ins_encode %{
|
||||
const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
|
||||
BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
|
||||
int vlen_enc = vector_length_encoding(src_node);
|
||||
__ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
#ifdef _LP64
|
||||
instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
|
||||
match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
|
||||
effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
|
||||
@ -9111,17 +9163,6 @@ instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kR
|
||||
%}
|
||||
|
||||
|
||||
instruct vmasked_load64(vec dst, memory mem, kReg mask) %{
|
||||
match(Set dst (LoadVectorMasked mem mask));
|
||||
format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
|
||||
ins_encode %{
|
||||
BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
|
||||
int vector_len = vector_length_encoding(this);
|
||||
__ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vmask_gen(kReg dst, rRegL len, rRegL temp) %{
|
||||
match(Set dst (VectorMaskGen len));
|
||||
effect(TEMP temp);
|
||||
@ -9143,18 +9184,6 @@ instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vmasked_store64(memory mem, vec src, kReg mask) %{
|
||||
match(Set mem (StoreVectorMasked mem (Binary src mask)));
|
||||
format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
|
||||
ins_encode %{
|
||||
const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
|
||||
BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
|
||||
int vector_len = vector_length_encoding(src_node);
|
||||
__ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
|
||||
predicate(n->in(1)->bottom_type()->isa_vectmask());
|
||||
match(Set dst (VectorMaskToLong mask));
|
||||
|
@ -1217,8 +1217,7 @@ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
|
||||
int mem_num_elem = using_byte_array ? num_elem * type2aelembytes(elem_bt) : num_elem;
|
||||
BasicType mem_elem_bt = using_byte_array ? T_BYTE : elem_bt;
|
||||
bool supports_predicate = arch_supports_vector(is_store ? Op_StoreVectorMasked : Op_LoadVectorMasked,
|
||||
mem_num_elem, mem_elem_bt,
|
||||
(VectorMaskUseType) (VecMaskUseLoad | VecMaskUsePred));
|
||||
mem_num_elem, mem_elem_bt, VecMaskUseLoad);
|
||||
|
||||
// If current arch does not support the predicated operations, we have to bail
|
||||
// out when current case uses the predicate feature.
|
||||
|
@ -922,7 +922,6 @@ class StoreVectorMaskedNode : public StoreVectorNode {
|
||||
public:
|
||||
StoreVectorMaskedNode(Node* c, Node* mem, Node* dst, Node* src, const TypePtr* at, Node* mask)
|
||||
: StoreVectorNode(c, mem, dst, at, src) {
|
||||
assert(mask->bottom_type()->isa_vectmask(), "sanity");
|
||||
init_class_id(Class_StoreVectorMasked);
|
||||
set_mismatched_access();
|
||||
add_req(mask);
|
||||
@ -943,7 +942,6 @@ class LoadVectorMaskedNode : public LoadVectorNode {
|
||||
LoadVectorMaskedNode(Node* c, Node* mem, Node* src, const TypePtr* at, const TypeVect* vt, Node* mask,
|
||||
ControlDependency control_dependency = LoadNode::DependsOnlyOnTest)
|
||||
: LoadVectorNode(c, mem, src, at, vt, control_dependency) {
|
||||
assert(mask->bottom_type()->isa_vectmask(), "sanity");
|
||||
init_class_id(Class_LoadVectorMasked);
|
||||
set_mismatched_access();
|
||||
add_req(mask);
|
||||
|
@ -0,0 +1,134 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.jdk.incubator.vector;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import jdk.incubator.vector.*;
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||
@State(Scope.Benchmark)
|
||||
@Warmup(iterations = 3, time = 1)
|
||||
@Measurement(iterations = 5, time = 1)
|
||||
@Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
|
||||
public class StoreMaskedIOOBEBenchmark {
|
||||
@Param({"1024"})
|
||||
private int inSize;
|
||||
|
||||
@Param({"1022"})
|
||||
private int outSize;
|
||||
|
||||
private byte[] byteIn;
|
||||
private byte[] byteOut;
|
||||
private short[] shortIn;
|
||||
private short[] shortOut;
|
||||
private int[] intIn;
|
||||
private int[] intOut;
|
||||
private long[] longIn;
|
||||
private long[] longOut;
|
||||
private float[] floatIn;
|
||||
private float[] floatOut;
|
||||
private double[] doubleIn;
|
||||
private double[] doubleOut;
|
||||
|
||||
private static final VectorSpecies<Byte> bspecies = VectorSpecies.ofLargestShape(byte.class);
|
||||
private static final VectorSpecies<Short> sspecies = VectorSpecies.ofLargestShape(short.class);
|
||||
private static final VectorSpecies<Integer> ispecies = VectorSpecies.ofLargestShape(int.class);
|
||||
private static final VectorSpecies<Long> lspecies = VectorSpecies.ofLargestShape(long.class);
|
||||
private static final VectorSpecies<Float> fspecies = VectorSpecies.ofLargestShape(float.class);
|
||||
private static final VectorSpecies<Double> dspecies = VectorSpecies.ofLargestShape(double.class);
|
||||
|
||||
@Setup(Level.Trial)
|
||||
public void Setup() {
|
||||
byteIn = new byte[inSize];
|
||||
byteOut = new byte[outSize];
|
||||
shortIn = new short[inSize];
|
||||
shortOut = new short[outSize];
|
||||
intIn = new int[inSize];
|
||||
intOut = new int[outSize];
|
||||
longIn = new long[inSize];
|
||||
longOut = new long[outSize];
|
||||
floatIn = new float[inSize];
|
||||
floatOut = new float[outSize];
|
||||
doubleIn = new double[inSize];
|
||||
doubleOut = new double[outSize];
|
||||
|
||||
for (int i = 0; i < inSize; i++) {
|
||||
byteIn[i] = (byte) i;
|
||||
shortIn[i] = (short) i;
|
||||
intIn[i] = i;
|
||||
longIn[i] = i;
|
||||
floatIn[i] = (float) i;
|
||||
doubleIn[i] = (double) i;
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void byteStoreArrayMaskIOOBE() {
|
||||
VectorMask<Byte> mask = VectorMask.fromLong(bspecies, (1 << (bspecies.length() - 2)) - 1);
|
||||
for (int i = 0; i < inSize; i += bspecies.length()) {
|
||||
ByteVector.fromArray(bspecies, byteIn, i, mask).intoArray(byteOut, i, mask);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void shortStoreArrayMaskIOOBE() {
|
||||
VectorMask<Short> mask = VectorMask.fromLong(sspecies, (1 << (sspecies.length() - 2)) - 1);
|
||||
for (int i = 0; i < inSize; i += sspecies.length()) {
|
||||
ShortVector.fromArray(sspecies, shortIn, i).intoArray(shortOut, i, mask);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void intStoreArrayMaskIOOBE() {
|
||||
VectorMask<Integer> mask = VectorMask.fromLong(ispecies, (1 << (ispecies.length() - 2)) - 1);
|
||||
for (int i = 0; i < inSize; i += ispecies.length()) {
|
||||
IntVector.fromArray(ispecies, intIn, i).intoArray(intOut, i, mask);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void longStoreArrayMaskIOOBE() {
|
||||
VectorMask<Long> mask = VectorMask.fromLong(lspecies, (1 << (lspecies.length() - 2)) -1);
|
||||
for (int i = 0; i < inSize; i += lspecies.length()) {
|
||||
LongVector.fromArray(lspecies, longIn, i).intoArray(longOut, i, mask);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void floatStoreArrayMaskIOOBE() {
|
||||
VectorMask<Float> mask = VectorMask.fromLong(fspecies, (1 << (fspecies.length() - 2)) - 1);
|
||||
for (int i = 0; i < inSize; i += fspecies.length()) {
|
||||
FloatVector.fromArray(fspecies, floatIn, i).intoArray(floatOut, i, mask);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void doubleStoreArrayMaskIOOBE() {
|
||||
VectorMask<Double> mask = VectorMask.fromLong(dspecies, (1 << (dspecies.length() - 2)) - 1);
|
||||
for (int i = 0; i < inSize; i += dspecies.length()) {
|
||||
DoubleVector.fromArray(dspecies, doubleIn, i).intoArray(doubleOut, i, mask);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user