8276985: AArch64: [vectorapi] Backend support of VectorMaskToLongNode
Reviewed-by: njian, aph
This commit is contained in:
parent
08aad8506e
commit
6dae52f8e3
@ -5617,3 +5617,35 @@ instruct vmask_lasttrue16B(iRegINoSp dst, vecX src) %{
|
|||||||
%}
|
%}
|
||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
instruct vmask_tolong8B(iRegLNoSp dst, vecD src) %{
|
||||||
|
match(Set dst (VectorMaskToLong src));
|
||||||
|
ins_cost(5 * INSN_COST);
|
||||||
|
format %{ "vmask_tolong $dst, $src\t# convert mask to long (8B)" %}
|
||||||
|
ins_encode %{
|
||||||
|
// Input "src" is a vector of boolean represented as
|
||||||
|
// bytes with 0x00/0x01 as element values.
|
||||||
|
|
||||||
|
__ fmovd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
|
||||||
|
__ bytemask_compress(as_Register($dst$$reg));
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct vmask_tolong16B(iRegLNoSp dst, vecX src) %{
|
||||||
|
match(Set dst (VectorMaskToLong src));
|
||||||
|
ins_cost(11 * INSN_COST);
|
||||||
|
format %{ "vmask_tolong $dst, $src\t# convert mask to long (16B)" %}
|
||||||
|
ins_encode %{
|
||||||
|
// Input "src" is a vector of boolean represented as
|
||||||
|
// bytes with 0x00/0x01 as element values.
|
||||||
|
|
||||||
|
__ umov(as_Register($dst$$reg), as_FloatRegister($src$$reg), __ D, 0);
|
||||||
|
__ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 1);
|
||||||
|
__ bytemask_compress(as_Register($dst$$reg));
|
||||||
|
__ bytemask_compress(rscratch1);
|
||||||
|
__ orr(as_Register($dst$$reg), as_Register($dst$$reg),
|
||||||
|
rscratch1, Assembler::LSL, 8);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
@ -2481,3 +2481,35 @@ instruct vmask_lasttrue16B(iRegINoSp dst, vecX src) %{
|
|||||||
%}
|
%}
|
||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
instruct vmask_tolong8B(iRegLNoSp dst, vecD src) %{
|
||||||
|
match(Set dst (VectorMaskToLong src));
|
||||||
|
ins_cost(5 * INSN_COST);
|
||||||
|
format %{ "vmask_tolong $dst, $src\t# convert mask to long (8B)" %}
|
||||||
|
ins_encode %{
|
||||||
|
// Input "src" is a vector of boolean represented as
|
||||||
|
// bytes with 0x00/0x01 as element values.
|
||||||
|
|
||||||
|
__ fmovd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
|
||||||
|
__ bytemask_compress(as_Register($dst$$reg));
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct vmask_tolong16B(iRegLNoSp dst, vecX src) %{
|
||||||
|
match(Set dst (VectorMaskToLong src));
|
||||||
|
ins_cost(11 * INSN_COST);
|
||||||
|
format %{ "vmask_tolong $dst, $src\t# convert mask to long (16B)" %}
|
||||||
|
ins_encode %{
|
||||||
|
// Input "src" is a vector of boolean represented as
|
||||||
|
// bytes with 0x00/0x01 as element values.
|
||||||
|
|
||||||
|
__ umov(as_Register($dst$$reg), as_FloatRegister($src$$reg), __ D, 0);
|
||||||
|
__ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 1);
|
||||||
|
__ bytemask_compress(as_Register($dst$$reg));
|
||||||
|
__ bytemask_compress(rscratch1);
|
||||||
|
__ orr(as_Register($dst$$reg), as_Register($dst$$reg),
|
||||||
|
rscratch1, Assembler::LSL, 8);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
@ -5746,6 +5746,22 @@ instruct vmask_lasttrue_partial(iRegINoSp dst, pReg src, pReg ptmp, rFlagsReg cr
|
|||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
instruct vmask_tolong(iRegLNoSp dst, pReg src, vReg vtmp1, vReg vtmp2, pRegGov pgtmp, rFlagsReg cr) %{
|
||||||
|
predicate(UseSVE > 0 &&
|
||||||
|
n->in(1)->bottom_type()->is_vect()->length() <= 64);
|
||||||
|
match(Set dst (VectorMaskToLong src));
|
||||||
|
effect(TEMP vtmp1, TEMP vtmp2, TEMP pgtmp, KILL cr);
|
||||||
|
ins_cost(13 * SVE_COST);
|
||||||
|
format %{ "vmask_tolong $dst, $src\t# vector mask tolong (sve)" %}
|
||||||
|
ins_encode %{
|
||||||
|
__ sve_vmask_tolong(as_Register($dst$$reg), as_PRegister($src$$reg),
|
||||||
|
Matcher::vector_element_basic_type(this, $src),
|
||||||
|
Matcher::vector_length(this, $src),
|
||||||
|
as_FloatRegister($vtmp1$$reg), as_FloatRegister($vtmp2$$reg),
|
||||||
|
as_PRegister($pgtmp$$reg));
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
// ---------------------------- Vector mask generation ---------------------------
|
// ---------------------------- Vector mask generation ---------------------------
|
||||||
instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{
|
instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{
|
||||||
predicate(UseSVE > 0);
|
predicate(UseSVE > 0);
|
||||||
|
@ -3176,6 +3176,23 @@ instruct vmask_lasttrue_partial(iRegINoSp dst, pReg src, pReg ptmp, rFlagsReg cr
|
|||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
instruct vmask_tolong(iRegLNoSp dst, pReg src, vReg vtmp1, vReg vtmp2, pRegGov pgtmp, rFlagsReg cr) %{
|
||||||
|
predicate(UseSVE > 0 &&
|
||||||
|
n->in(1)->bottom_type()->is_vect()->length() <= 64);
|
||||||
|
match(Set dst (VectorMaskToLong src));
|
||||||
|
effect(TEMP vtmp1, TEMP vtmp2, TEMP pgtmp, KILL cr);
|
||||||
|
ins_cost(13 * SVE_COST);
|
||||||
|
format %{ "vmask_tolong $dst, $src\t# vector mask tolong (sve)" %}
|
||||||
|
ins_encode %{
|
||||||
|
__ sve_vmask_tolong(as_Register($dst$$reg), as_PRegister($src$$reg),
|
||||||
|
Matcher::vector_element_basic_type(this, $src),
|
||||||
|
Matcher::vector_length(this, $src),
|
||||||
|
as_FloatRegister($vtmp1$$reg), as_FloatRegister($vtmp2$$reg),
|
||||||
|
as_PRegister($pgtmp$$reg));
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}dnl
|
||||||
|
|
||||||
// ---------------------------- Vector mask generation ---------------------------
|
// ---------------------------- Vector mask generation ---------------------------
|
||||||
instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{
|
instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{
|
||||||
predicate(UseSVE > 0);
|
predicate(UseSVE > 0);
|
||||||
|
@ -946,6 +946,48 @@ void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegis
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Compress the least significant bit of each byte to the rightmost and clear
|
||||||
|
// the higher garbage bits.
|
||||||
|
void C2_MacroAssembler::bytemask_compress(Register dst) {
|
||||||
|
// Example input, dst = 0x01 00 00 00 01 01 00 01
|
||||||
|
// The "??" bytes are garbage.
|
||||||
|
orr(dst, dst, dst, Assembler::LSR, 7); // dst = 0x?? 02 ?? 00 ?? 03 ?? 01
|
||||||
|
orr(dst, dst, dst, Assembler::LSR, 14); // dst = 0x????????08 ??????0D
|
||||||
|
orr(dst, dst, dst, Assembler::LSR, 28); // dst = 0x????????????????8D
|
||||||
|
andr(dst, dst, 0xff); // dst = 0x8D
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pack the lowest-numbered bit of each mask element in src into a long value
|
||||||
|
// in dst, at most the first 64 lane elements.
|
||||||
|
// Clobbers: rscratch1
|
||||||
|
void C2_MacroAssembler::sve_vmask_tolong(Register dst, PRegister src, BasicType bt, int lane_cnt,
|
||||||
|
FloatRegister vtmp1, FloatRegister vtmp2, PRegister pgtmp) {
|
||||||
|
assert(pgtmp->is_governing(), "This register has to be a governing predicate register.");
|
||||||
|
assert(lane_cnt <= 64 && is_power_of_2(lane_cnt), "Unsupported lane count");
|
||||||
|
assert_different_registers(dst, rscratch1);
|
||||||
|
|
||||||
|
Assembler::SIMD_RegVariant size = elemType_to_regVariant(bt);
|
||||||
|
|
||||||
|
// Pack the mask into vector with sequential bytes.
|
||||||
|
sve_cpy(vtmp1, size, src, 1, false);
|
||||||
|
if (bt != T_BYTE) {
|
||||||
|
sve_vector_narrow(vtmp1, B, vtmp1, size, vtmp2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compress the lowest 8 bytes.
|
||||||
|
fmovd(dst, vtmp1);
|
||||||
|
bytemask_compress(dst);
|
||||||
|
if (lane_cnt <= 8) return;
|
||||||
|
|
||||||
|
// Repeat on higher bytes and join the results.
|
||||||
|
// Compress 8 bytes in each iteration.
|
||||||
|
for (int idx = 1; idx < (lane_cnt / 8); idx++) {
|
||||||
|
idx == 1 ? fmovhid(rscratch1, vtmp1) : sve_extract(rscratch1, D, pgtmp, vtmp1, idx);
|
||||||
|
bytemask_compress(rscratch1);
|
||||||
|
orr(dst, dst, rscratch1, Assembler::LSL, idx << 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void C2_MacroAssembler::sve_compare(PRegister pd, BasicType bt, PRegister pg,
|
void C2_MacroAssembler::sve_compare(PRegister pd, BasicType bt, PRegister pg,
|
||||||
FloatRegister zn, FloatRegister zm, int cond) {
|
FloatRegister zn, FloatRegister zm, int cond) {
|
||||||
assert(pg->is_governing(), "This register has to be a governing predicate register");
|
assert(pg->is_governing(), "This register has to be a governing predicate register");
|
||||||
@ -1021,6 +1063,7 @@ void C2_MacroAssembler::sve_vector_narrow(FloatRegister dst, SIMD_RegVariant dst
|
|||||||
FloatRegister src, SIMD_RegVariant src_size,
|
FloatRegister src, SIMD_RegVariant src_size,
|
||||||
FloatRegister tmp) {
|
FloatRegister tmp) {
|
||||||
assert(dst_size < src_size && dst_size <= S && src_size <= D, "invalid element size");
|
assert(dst_size < src_size && dst_size <= S && src_size <= D, "invalid element size");
|
||||||
|
assert_different_registers(src, tmp);
|
||||||
sve_dup(tmp, src_size, 0);
|
sve_dup(tmp, src_size, 0);
|
||||||
if (src_size == D) {
|
if (src_size == D) {
|
||||||
switch (dst_size) {
|
switch (dst_size) {
|
||||||
|
@ -55,6 +55,15 @@
|
|||||||
FloatRegister ztmp1, FloatRegister ztmp2,
|
FloatRegister ztmp1, FloatRegister ztmp2,
|
||||||
PRegister pgtmp, PRegister ptmp, bool isL);
|
PRegister pgtmp, PRegister ptmp, bool isL);
|
||||||
|
|
||||||
|
// Compress the least significant bit of each byte to the rightmost and clear
|
||||||
|
// the higher garbage bits.
|
||||||
|
void bytemask_compress(Register dst);
|
||||||
|
|
||||||
|
// Pack the lowest-numbered bit of each mask element in src into a long value
|
||||||
|
// in dst, at most the first 64 lane elements.
|
||||||
|
void sve_vmask_tolong(Register dst, PRegister src, BasicType bt, int lane_cnt,
|
||||||
|
FloatRegister vtmp1, FloatRegister vtmp2, PRegister pgtmp);
|
||||||
|
|
||||||
// SIMD&FP comparison
|
// SIMD&FP comparison
|
||||||
void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1,
|
void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1,
|
||||||
FloatRegister src2, int cond, bool isQ);
|
FloatRegister src2, int cond, bool isQ);
|
||||||
|
Loading…
Reference in New Issue
Block a user