8265263: AArch64: Combine vneg with right shift count
Reviewed-by: adinn, dlong
This commit is contained in:
parent
ea19114e66
commit
49245131e9
@ -1311,6 +1311,9 @@ public:
|
||||
// predicate controlling translation of CompareAndSwapX
|
||||
bool needs_acquiring_load_exclusive(const Node *load);
|
||||
|
||||
// Assert that the given node is not a variable shift.
|
||||
bool assert_not_var_shift(const Node* n);
|
||||
|
||||
// predicate controlling addressing modes
|
||||
bool size_fits_all_mem_uses(AddPNode* addp, int shift);
|
||||
%}
|
||||
@ -1725,6 +1728,12 @@ bool needs_acquiring_load_exclusive(const Node *n)
|
||||
return true;
|
||||
}
|
||||
|
||||
// Assert that the given node is not a variable shift.
|
||||
bool assert_not_var_shift(const Node* n) {
|
||||
assert(!n->as_ShiftV()->is_var_shift(), "illegal variable shift");
|
||||
return true;
|
||||
}
|
||||
|
||||
#define __ _masm.
|
||||
|
||||
// advance declarations for helper functions to convert register
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
// Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2020, 2021, Arm Limited. All rights reserved.
|
||||
// Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2020, 2022, Arm Limited. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
@ -1972,223 +1972,277 @@ VLOGICAL(xor, eor, xor, Xor, 16, B, X)
|
||||
|
||||
// ------------------------------ Shift ---------------------------------------
|
||||
dnl
|
||||
define(`VSHIFTCNT', `
|
||||
instruct vshiftcnt$3$4`'(vec$5 dst, iRegIorL2I cnt) %{
|
||||
predicate(UseSVE == 0 && (ifelse($3, 8, n->as_Vector()->length_in_bytes() == 4 ||`
|
||||
')n->as_Vector()->length_in_bytes() == $3));
|
||||
define(`VSLCNT', `
|
||||
instruct vslcnt$1$2`'(vec$3 dst, iRegIorL2I cnt) %{
|
||||
predicate(UseSVE == 0 && ifelse($1, 8,
|
||||
(n->as_Vector()->length_in_bytes() == 4 ||`
|
||||
'n->as_Vector()->length_in_bytes() == $1),
|
||||
n->as_Vector()->length_in_bytes() == $1));
|
||||
match(Set dst (LShiftCntV cnt));
|
||||
match(Set dst (RShiftCntV cnt));
|
||||
format %{ "$1 $dst, $cnt\t# shift count vector ($3$4)" %}
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "dup $dst, $cnt\t# shift count vector ($1$2)" %}
|
||||
ins_encode %{
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$4, as_Register($cnt$$reg));
|
||||
__ dup(as_FloatRegister($dst$$reg), __ T$1$2, as_Register($cnt$$reg));
|
||||
%}
|
||||
ins_pipe(vdup_reg_reg`'ifelse($5, D, 64, 128));
|
||||
ins_pipe(vdup_reg_reg`'ifelse($3, D, 64, 128));
|
||||
%}')dnl
|
||||
dnl $1 $2 $3 $4 $5
|
||||
VSHIFTCNT(dup, dup, 8, B, D)
|
||||
VSHIFTCNT(dup, dup, 16, B, X)
|
||||
dnl
|
||||
define(`VSRCNT', `
|
||||
instruct vsrcnt$1$2`'(vec$3 dst, iRegIorL2I cnt) %{
|
||||
predicate(UseSVE == 0 && ifelse($1, 8,
|
||||
(n->as_Vector()->length_in_bytes() == 4 ||`
|
||||
'n->as_Vector()->length_in_bytes() == $1),
|
||||
n->as_Vector()->length_in_bytes() == $1));
|
||||
match(Set dst (RShiftCntV cnt));
|
||||
ins_cost(INSN_COST * 2);
|
||||
format %{ "negw rscratch1, $cnt\t"
|
||||
"dup $dst, rscratch1\t# shift count vector ($1$2)" %}
|
||||
ins_encode %{
|
||||
__ negw(rscratch1, as_Register($cnt$$reg));
|
||||
__ dup(as_FloatRegister($dst$$reg), __ T$1$2, rscratch1);
|
||||
%}
|
||||
ins_pipe(vdup_reg_reg`'ifelse($3, D, 64, 128));
|
||||
%}')dnl
|
||||
dnl
|
||||
|
||||
// Vector shift count
|
||||
// Note-1: Low 8 bits of each element are used, so it doesn't matter if we
|
||||
// treat it as ints or bytes here.
|
||||
// Note-2: Shift value is negated for RShiftCntV additionally. See the comments
|
||||
// on vsra8B rule for more details.
|
||||
dnl $1 $2 $3
|
||||
VSLCNT(8, B, D)
|
||||
VSLCNT(16, B, X)
|
||||
VSRCNT(8, B, D)
|
||||
VSRCNT(16, B, X)
|
||||
dnl
|
||||
define(`PREDICATE',
|
||||
`ifelse($1, 8B,
|
||||
ifelse($3, `', `predicate(n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8);',
|
||||
`predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&`
|
||||
'$3);'),
|
||||
$1, 4S,
|
||||
ifelse($3, `', `predicate(n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4);',
|
||||
`predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4) &&`
|
||||
'$3);'),
|
||||
ifelse($3, `', `predicate(n->as_Vector()->length() == $2);',
|
||||
`predicate(n->as_Vector()->length() == $2 && $3);'))')dnl
|
||||
dnl
|
||||
define(`VSLL', `
|
||||
instruct vsll$3$4`'(vec$6 dst, vec$6 src, vec$6 shift) %{
|
||||
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||`
|
||||
',
|
||||
$3$4, 4S, n->as_Vector()->length() == 2 ||`
|
||||
')n->as_Vector()->length() == $3);
|
||||
match(Set dst (LShiftV$4 src shift));
|
||||
instruct vsll$1$2`'(vec$4 dst, vec$4 src, vec$4 shift) %{
|
||||
PREDICATE(`$1$2', $1, )
|
||||
match(Set dst (LShiftV$2 src shift));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "$1 $dst,$src,$shift\t# vector ($3$5)" %}
|
||||
format %{ "sshl $dst,$src,$shift\t# vector ($1$3)" %}
|
||||
ins_encode %{
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
__ sshl(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($shift$$reg));
|
||||
%}
|
||||
ins_pipe(vshift`'ifelse($6, D, 64, 128));
|
||||
ins_pipe(vshift`'ifelse($4, D, 64, 128));
|
||||
%}')dnl
|
||||
dnl
|
||||
define(`VSRA', `
|
||||
instruct vsra$3$4`'(vec$6 dst, vec$6 src, vec$6 shift, vec$6 tmp) %{
|
||||
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||`
|
||||
',
|
||||
$3$4, 4S, n->as_Vector()->length() == 2 ||`
|
||||
')n->as_Vector()->length() == $3);
|
||||
match(Set dst (RShiftV$4 src shift));
|
||||
instruct vsra$1$2`'(vec$4 dst, vec$4 src, vec$4 shift) %{
|
||||
PREDICATE(`$1$2', $1, !n->as_ShiftV()->is_var_shift())
|
||||
match(Set dst (RShiftV$2 src shift));
|
||||
ins_cost(INSN_COST);
|
||||
effect(TEMP tmp);
|
||||
format %{ "$1 $tmp,$shift\t"
|
||||
"$2 $dst,$src,$tmp\t# vector ($3$5)" %}
|
||||
format %{ "sshl $dst,$src,$shift\t# vector ($1$3)" %}
|
||||
ins_encode %{
|
||||
__ $1(as_FloatRegister($tmp$$reg), __ T`'ifelse($6, D, 8B, 16B),
|
||||
as_FloatRegister($shift$$reg));
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
__ sshl(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($tmp$$reg));
|
||||
as_FloatRegister($shift$$reg));
|
||||
%}
|
||||
ins_pipe(vshift`'ifelse($6, D, 64, 128));
|
||||
ins_pipe(vshift`'ifelse($4, D, 64, 128));
|
||||
%}')dnl
|
||||
dnl
|
||||
define(`VSRA_VAR', `
|
||||
instruct vsra$1$2_var`'(vec$4 dst, vec$4 src, vec$4 shift) %{
|
||||
PREDICATE(`$1$2', $1, n->as_ShiftV()->is_var_shift())
|
||||
match(Set dst (RShiftV$2 src shift));
|
||||
ins_cost(INSN_COST * 2);
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "negr $dst,$shift\t"
|
||||
"sshl $dst,$src,$dst\t# vector ($1$3)" %}
|
||||
ins_encode %{
|
||||
__ negr(as_FloatRegister($dst$$reg), __ T`'ifelse($4, D, 8B, 16B),
|
||||
as_FloatRegister($shift$$reg));
|
||||
__ sshl(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($dst$$reg));
|
||||
%}
|
||||
ins_pipe(vshift`'ifelse($4, D, 64, 128));
|
||||
%}')dnl
|
||||
dnl
|
||||
define(`VSRL', `
|
||||
instruct vsrl$3$4`'(vec$6 dst, vec$6 src, vec$6 shift, vec$6 tmp) %{
|
||||
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||`
|
||||
',
|
||||
$3$4, 4S, n->as_Vector()->length() == 2 ||`
|
||||
')n->as_Vector()->length() == $3);
|
||||
match(Set dst (URShiftV$4 src shift));
|
||||
instruct vsrl$1$2`'(vec$4 dst, vec$4 src, vec$4 shift) %{
|
||||
PREDICATE(`$1$2', $1, !n->as_ShiftV()->is_var_shift())
|
||||
match(Set dst (URShiftV$2 src shift));
|
||||
ins_cost(INSN_COST);
|
||||
effect(TEMP tmp);
|
||||
format %{ "$1 $tmp,$shift\t"
|
||||
"$2 $dst,$src,$tmp\t# vector ($3$5)" %}
|
||||
format %{ "ushl $dst,$src,$shift\t# vector ($1$3)" %}
|
||||
ins_encode %{
|
||||
__ $1(as_FloatRegister($tmp$$reg), __ T`'ifelse($6, D, 8B, 16B),
|
||||
as_FloatRegister($shift$$reg));
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
__ ushl(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($tmp$$reg));
|
||||
as_FloatRegister($shift$$reg));
|
||||
%}
|
||||
ins_pipe(vshift`'ifelse($6, D, 64, 128));
|
||||
ins_pipe(vshift`'ifelse($4, D, 64, 128));
|
||||
%}')dnl
|
||||
dnl
|
||||
define(`VSRL_VAR', `
|
||||
instruct vsrl$1$2_var`'(vec$4 dst, vec$4 src, vec$4 shift) %{
|
||||
PREDICATE(`$1$2', $1, n->as_ShiftV()->is_var_shift())
|
||||
match(Set dst (URShiftV$2 src shift));
|
||||
ins_cost(INSN_COST * 2);
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "negr $dst,$shift\t"
|
||||
"ushl $dst,$src,$dst\t# vector ($1$3)" %}
|
||||
ins_encode %{
|
||||
__ negr(as_FloatRegister($dst$$reg), __ T`'ifelse($4, D, 8B, 16B),
|
||||
as_FloatRegister($shift$$reg));
|
||||
__ ushl(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($dst$$reg));
|
||||
%}
|
||||
ins_pipe(vshift`'ifelse($4, D, 64, 128));
|
||||
%}')dnl
|
||||
dnl
|
||||
define(`VSLL_IMM', `
|
||||
instruct vsll$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{
|
||||
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||`
|
||||
',
|
||||
$3$4, 4S, n->as_Vector()->length() == 2 ||`
|
||||
')n->as_Vector()->length() == $3);
|
||||
match(Set dst (LShiftV$4 src (LShiftCntV shift)));
|
||||
instruct vsll$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
|
||||
PREDICATE(`$1$2', $1, assert_not_var_shift(n))
|
||||
match(Set dst (LShiftV$2 src (LShiftCntV shift)));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %}
|
||||
ins_encode %{ifelse($4, B,`
|
||||
format %{ "shl $dst, $src, $shift\t# vector ($1$3)" %}
|
||||
ins_encode %{ifelse($2, B,`
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh >= 8) {
|
||||
__ eor(as_FloatRegister($dst$$reg), __ ifelse($6, D, T8B, T16B),
|
||||
__ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B),
|
||||
as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($src$$reg));
|
||||
} else {
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
__ shl(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
}', $4, S,`
|
||||
}', $2, S,`
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh >= 16) {
|
||||
__ eor(as_FloatRegister($dst$$reg), __ ifelse($6, D, T8B, T16B),
|
||||
__ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B),
|
||||
as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($src$$reg));
|
||||
} else {
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
__ shl(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
}', `
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
__ shl(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg),
|
||||
(int)$shift$$constant);')
|
||||
%}
|
||||
ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm);
|
||||
ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
|
||||
%}')dnl
|
||||
dnl
|
||||
define(`VSRA_IMM', `
|
||||
instruct vsra$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{
|
||||
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||`
|
||||
',
|
||||
$3$4, 4S, n->as_Vector()->length() == 2 ||`
|
||||
')n->as_Vector()->length() == $3);
|
||||
match(Set dst (RShiftV$4 src (RShiftCntV shift)));
|
||||
instruct vsra$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
|
||||
PREDICATE(`$1$2', $1, assert_not_var_shift(n))
|
||||
match(Set dst (RShiftV$2 src (RShiftCntV shift)));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %}
|
||||
ins_encode %{ifelse($4, B,`
|
||||
format %{ "sshr $dst, $src, $shift\t# vector ($1$3)" %}
|
||||
ins_encode %{ifelse($2, B,`
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh >= 8) sh = 7;
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
as_FloatRegister($src$$reg), sh);', $4, S,`
|
||||
__ sshr(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg), sh);', $2, S,`
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh >= 16) sh = 15;
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
as_FloatRegister($src$$reg), sh);', `
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
__ sshr(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg), sh);', `
|
||||
__ sshr(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg),
|
||||
(int)$shift$$constant);')
|
||||
%}
|
||||
ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm);
|
||||
ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
|
||||
%}')dnl
|
||||
dnl
|
||||
define(`VSRL_IMM', `
|
||||
instruct vsrl$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{
|
||||
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||`
|
||||
',
|
||||
$3$4, 4S, n->as_Vector()->length() == 2 ||`
|
||||
')n->as_Vector()->length() == $3);
|
||||
match(Set dst (URShiftV$4 src (RShiftCntV shift)));
|
||||
instruct vsrl$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
|
||||
PREDICATE(`$1$2', $1, assert_not_var_shift(n))
|
||||
match(Set dst (URShiftV$2 src (RShiftCntV shift)));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %}
|
||||
ins_encode %{ifelse($4, B,`
|
||||
format %{ "ushr $dst, $src, $shift\t# vector ($1$3)" %}
|
||||
ins_encode %{ifelse($2, B,`
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh >= 8) {
|
||||
__ eor(as_FloatRegister($dst$$reg), __ ifelse($6, D, T8B, T16B),
|
||||
__ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B),
|
||||
as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($src$$reg));
|
||||
} else {
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
}', $4, S,`
|
||||
__ ushr(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
}', $2, S,`
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh >= 16) {
|
||||
__ eor(as_FloatRegister($dst$$reg), __ ifelse($6, D, T8B, T16B),
|
||||
__ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B),
|
||||
as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($src$$reg));
|
||||
} else {
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
__ ushr(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
}', `
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
__ ushr(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg),
|
||||
(int)$shift$$constant);')
|
||||
%}
|
||||
ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm);
|
||||
ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
|
||||
%}')dnl
|
||||
dnl
|
||||
define(`VSRLA_IMM', `
|
||||
instruct vsrla$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == $3);
|
||||
match(Set dst (AddV$4 dst (URShiftV$4 src (RShiftCntV shift))));
|
||||
instruct vsrla$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == $1);
|
||||
match(Set dst (AddV$2 dst (URShiftV$2 src (RShiftCntV shift))));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %}
|
||||
ins_encode %{ifelse($4, B,`
|
||||
format %{ "usra $dst, $src, $shift\t# vector ($1$3)" %}
|
||||
ins_encode %{ifelse($2, B,`
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh < 8) {
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
}', $4, S,`
|
||||
__ usra(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
}', $2, S,`
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh < 16) {
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
__ usra(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
}', `
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
__ usra(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg),
|
||||
(int)$shift$$constant);')
|
||||
%}
|
||||
ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm);
|
||||
ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
|
||||
%}')dnl
|
||||
dnl
|
||||
define(`VSRAA_IMM', `
|
||||
instruct vsraa$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == $3);
|
||||
match(Set dst (AddV$4 dst (RShiftV$4 src (RShiftCntV shift))));
|
||||
instruct vsraa$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == $1);
|
||||
match(Set dst (AddV$2 dst (RShiftV$2 src (RShiftCntV shift))));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %}
|
||||
ins_encode %{ifelse($4, B,`
|
||||
format %{ "ssra $dst, $src, $shift\t# vector ($1$3)" %}
|
||||
ins_encode %{ifelse($2, B,`
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh >= 8) sh = 7;
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
as_FloatRegister($src$$reg), sh);', $4, S,`
|
||||
__ ssra(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg), sh);', $2, S,`
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh >= 16) sh = 15;
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
as_FloatRegister($src$$reg), sh);', `
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
|
||||
__ ssra(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg), sh);', `
|
||||
__ ssra(as_FloatRegister($dst$$reg), __ T$1$3,
|
||||
as_FloatRegister($src$$reg),
|
||||
(int)$shift$$constant);')
|
||||
%}
|
||||
ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm);
|
||||
ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
|
||||
%}')dnl
|
||||
dnl $1 $2 $3 $4 $5 $6
|
||||
VSLL(sshl, sshl, 8, B, B, D)
|
||||
VSLL(sshl, sshl, 16, B, B, X)
|
||||
dnl
|
||||
undefine(PREDICATE)dnl
|
||||
dnl
|
||||
dnl $1 $2 $3 $4
|
||||
VSLL(8, B, B, D)
|
||||
VSLL(16, B, B, X)
|
||||
|
||||
// Right shifts with vector shift count on aarch64 SIMD are implemented
|
||||
// as left shift by negative shift count.
|
||||
@ -2199,8 +2253,6 @@ VSLL(sshl, sshl, 16, B, B, X)
|
||||
// LoadVector RShiftCntV
|
||||
// | /
|
||||
// RShiftVI
|
||||
// Note: In inner loop, multiple neg instructions are used, which can be
|
||||
// moved to outer loop and merge into one neg instruction.
|
||||
//
|
||||
// Case 2: The vector shift count is from loading.
|
||||
// This case isn't supported by middle-end now. But it's supported by
|
||||
@ -2210,61 +2262,83 @@ VSLL(sshl, sshl, 16, B, B, X)
|
||||
// | /
|
||||
// RShiftVI
|
||||
//
|
||||
dnl $1 $2 $3 $4 $5 $6
|
||||
VSRA(negr, sshl, 8, B, B, D)
|
||||
VSRA(negr, sshl, 16, B, B, X)
|
||||
VSRL(negr, ushl, 8, B, B, D)
|
||||
VSRL(negr, ushl, 16, B, B, X)
|
||||
VSLL_IMM(shl, shl, 8, B, B, D)
|
||||
VSLL_IMM(shl, shl, 16, B, B, X)
|
||||
VSRA_IMM(sshr, sshr, 8, B, B, D)
|
||||
VSRA_IMM(sshr, sshr, 16, B, B, X)
|
||||
VSRL_IMM(ushr, ushr, 8, B, B, D)
|
||||
VSRL_IMM(ushr, ushr, 16, B, B, X)
|
||||
VSLL(sshl, sshl, 4, S, H, D)
|
||||
VSLL(sshl, sshl, 8, S, H, X)
|
||||
VSRA(negr, sshl, 4, S, H, D)
|
||||
VSRA(negr, sshl, 8, S, H, X)
|
||||
VSRL(negr, ushl, 4, S, H, D)
|
||||
VSRL(negr, ushl, 8, S, H, X)
|
||||
VSLL_IMM(shl, shl, 4, S, H, D)
|
||||
VSLL_IMM(shl, shl, 8, S, H, X)
|
||||
VSRA_IMM(sshr, sshr, 4, S, H, D)
|
||||
VSRA_IMM(sshr, sshr, 8, S, H, X)
|
||||
VSRL_IMM(ushr, ushr, 4, S, H, D)
|
||||
VSRL_IMM(ushr, ushr, 8, S, H, X)
|
||||
VSLL(sshl, sshl, 2, I, S, D)
|
||||
VSLL(sshl, sshl, 4, I, S, X)
|
||||
VSRA(negr, sshl, 2, I, S, D)
|
||||
VSRA(negr, sshl, 4, I, S, X)
|
||||
VSRL(negr, ushl, 2, I, S, D)
|
||||
VSRL(negr, ushl, 4, I, S, X)
|
||||
VSLL_IMM(shl, shl, 2, I, S, D)
|
||||
VSLL_IMM(shl, shl, 4, I, S, X)
|
||||
VSRA_IMM(sshr, sshr, 2, I, S, D)
|
||||
VSRA_IMM(sshr, sshr, 4, I, S, X)
|
||||
VSRL_IMM(ushr, ushr, 2, I, S, D)
|
||||
VSRL_IMM(ushr, ushr, 4, I, S, X)
|
||||
VSLL(sshl, sshl, 2, L, D, X)
|
||||
VSRA(negr, sshl, 2, L, D, X)
|
||||
VSRL(negr, ushl, 2, L, D, X)
|
||||
VSLL_IMM(shl, shl, 2, L, D, X)
|
||||
VSRA_IMM(sshr, sshr, 2, L, D, X)
|
||||
VSRL_IMM(ushr, ushr, 2, L, D, X)
|
||||
VSRAA_IMM(ssra, ssra, 8, B, B, D)
|
||||
VSRAA_IMM(ssra, ssra, 16, B, B, X)
|
||||
VSRAA_IMM(ssra, ssra, 4, S, H, D)
|
||||
VSRAA_IMM(ssra, ssra, 8, S, H, X)
|
||||
VSRAA_IMM(ssra, ssra, 2, I, S, D)
|
||||
VSRAA_IMM(ssra, ssra, 4, I, S, X)
|
||||
VSRAA_IMM(ssra, ssra, 2, L, D, X)
|
||||
VSRLA_IMM(usra, usra, 8, B, B, D)
|
||||
VSRLA_IMM(usra, usra, 16, B, B, X)
|
||||
VSRLA_IMM(usra, usra, 4, S, H, D)
|
||||
VSRLA_IMM(usra, usra, 8, S, H, X)
|
||||
VSRLA_IMM(usra, usra, 2, I, S, D)
|
||||
VSRLA_IMM(usra, usra, 4, I, S, X)
|
||||
VSRLA_IMM(usra, usra, 2, L, D, X)
|
||||
// The negate is conducted in RShiftCntV rule for case 1, whereas it's done in
|
||||
// RShiftV* rules for case 2. Because there exists an optimization opportunity
|
||||
// for case 1, that is, multiple neg instructions in inner loop can be hoisted
|
||||
// to outer loop and merged into one neg instruction.
|
||||
//
|
||||
// Note that ShiftVNode::is_var_shift() indicates whether the vector shift
|
||||
// count is a variable vector(case 2) or not(a vector generated by RShiftCntV,
|
||||
// i.e. case 1).
|
||||
dnl $1 $2 $3 $4
|
||||
VSRA(8, B, B, D)
|
||||
VSRA_VAR(8, B, B, D)
|
||||
VSRA(16, B, B, X)
|
||||
VSRA_VAR(16, B, B, X)
|
||||
VSRL(8, B, B, D)
|
||||
VSRL_VAR(8, B, B, D)
|
||||
VSRL(16, B, B, X)
|
||||
VSRL_VAR(16, B, B, X)
|
||||
VSLL_IMM(8, B, B, D)
|
||||
VSLL_IMM(16, B, B, X)
|
||||
VSRA_IMM(8, B, B, D)
|
||||
VSRA_IMM(16, B, B, X)
|
||||
VSRL_IMM(8, B, B, D)
|
||||
VSRL_IMM(16, B, B, X)
|
||||
VSLL(4, S, H, D)
|
||||
VSLL(8, S, H, X)
|
||||
VSRA(4, S, H, D)
|
||||
VSRA_VAR(4, S, H, D)
|
||||
VSRA(8, S, H, X)
|
||||
VSRA_VAR(8, S, H, X)
|
||||
VSRL(4, S, H, D)
|
||||
VSRL_VAR(4, S, H, D)
|
||||
VSRL(8, S, H, X)
|
||||
VSRL_VAR(8, S, H, X)
|
||||
VSLL_IMM(4, S, H, D)
|
||||
VSLL_IMM(8, S, H, X)
|
||||
VSRA_IMM(4, S, H, D)
|
||||
VSRA_IMM(8, S, H, X)
|
||||
VSRL_IMM(4, S, H, D)
|
||||
VSRL_IMM(8, S, H, X)
|
||||
VSLL(2, I, S, D)
|
||||
VSLL(4, I, S, X)
|
||||
VSRA(2, I, S, D)
|
||||
VSRA_VAR(2, I, S, D)
|
||||
VSRA(4, I, S, X)
|
||||
VSRA_VAR(4, I, S, X)
|
||||
VSRL(2, I, S, D)
|
||||
VSRL_VAR(2, I, S, D)
|
||||
VSRL(4, I, S, X)
|
||||
VSRL_VAR(4, I, S, X)
|
||||
VSLL_IMM(2, I, S, D)
|
||||
VSLL_IMM(4, I, S, X)
|
||||
VSRA_IMM(2, I, S, D)
|
||||
VSRA_IMM(4, I, S, X)
|
||||
VSRL_IMM(2, I, S, D)
|
||||
VSRL_IMM(4, I, S, X)
|
||||
VSLL(2, L, D, X)
|
||||
VSRA(2, L, D, X)
|
||||
VSRA_VAR(2, L, D, X)
|
||||
VSRL(2, L, D, X)
|
||||
VSRL_VAR(2, L, D, X)
|
||||
VSLL_IMM(2, L, D, X)
|
||||
VSRA_IMM(2, L, D, X)
|
||||
VSRL_IMM(2, L, D, X)
|
||||
VSRAA_IMM(8, B, B, D)
|
||||
VSRAA_IMM(16, B, B, X)
|
||||
VSRAA_IMM(4, S, H, D)
|
||||
VSRAA_IMM(8, S, H, X)
|
||||
VSRAA_IMM(2, I, S, D)
|
||||
VSRAA_IMM(4, I, S, X)
|
||||
VSRAA_IMM(2, L, D, X)
|
||||
VSRLA_IMM(8, B, B, D)
|
||||
VSRLA_IMM(16, B, B, X)
|
||||
VSRLA_IMM(4, S, H, D)
|
||||
VSRLA_IMM(8, S, H, X)
|
||||
VSRLA_IMM(2, I, S, D)
|
||||
VSRLA_IMM(4, I, S, X)
|
||||
VSRLA_IMM(2, L, D, X)
|
||||
dnl
|
||||
define(`VMINMAX', `
|
||||
instruct v$1$3`'ifelse($5, S, F, D)`'(vec$6 dst, vec$6 src1, vec$6 src2)
|
||||
|
129
test/micro/org/openjdk/bench/vm/compiler/VectorShiftRight.java
Normal file
129
test/micro/org/openjdk/bench/vm/compiler/VectorShiftRight.java
Normal file
@ -0,0 +1,129 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.vm.compiler;
|
||||
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
import org.openjdk.jmh.infra.*;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.Random;
|
||||
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@State(Scope.Thread)
|
||||
public class VectorShiftRight {
|
||||
@Param({"1024"})
|
||||
public int SIZE;
|
||||
|
||||
private byte[] bytesA, bytesB;
|
||||
private short[] shortsA, shortsB;
|
||||
private char[] charsA, charsB;
|
||||
private int[] intsA, intsB;
|
||||
private long[] longsA, longsB;
|
||||
|
||||
@Param("0")
|
||||
private int seed;
|
||||
private Random r = new Random(seed);
|
||||
|
||||
@Param("3")
|
||||
private int shiftCount;
|
||||
|
||||
@Setup
|
||||
public void init() {
|
||||
bytesA = new byte[SIZE];
|
||||
shortsA = new short[SIZE];
|
||||
charsA = new char[SIZE];
|
||||
intsA = new int[SIZE];
|
||||
longsA = new long[SIZE];
|
||||
|
||||
bytesB = new byte[SIZE];
|
||||
shortsB = new short[SIZE];
|
||||
charsB = new char[SIZE];
|
||||
intsB = new int[SIZE];
|
||||
longsB = new long[SIZE];
|
||||
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
bytesA[i] = (byte) r.nextInt();
|
||||
shortsA[i] = (short) r.nextInt();
|
||||
charsA[i] = (char) r.nextInt();
|
||||
intsA[i] = r.nextInt();
|
||||
longsA[i] = r.nextLong();
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void rShiftByte() {
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
bytesB[i] = (byte) (bytesA[i] >> shiftCount);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void urShiftByte() {
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
bytesB[i] = (byte) (bytesA[i] >>> shiftCount);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void rShiftShort() {
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
shortsB[i] = (short) (shortsA[i] >> shiftCount);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void urShiftChar() {
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
charsB[i] = (char) (charsA[i] >>> shiftCount);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void rShiftInt() {
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
intsB[i] = intsA[i] >> shiftCount;
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void urShiftInt() {
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
intsB[i] = intsA[i] >>> shiftCount;
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void rShiftLong() {
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
longsB[i] = longsA[i] >> shiftCount;
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void urShiftLong() {
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
longsB[i] = longsA[i] >>> shiftCount;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user