8265263: AArch64: Combine vneg with right shift count

Reviewed-by: adinn, dlong
This commit is contained in:
Hao Sun 2022-03-09 00:52:01 +00:00 committed by Pengfei Li
parent ea19114e66
commit 49245131e9
4 changed files with 818 additions and 383 deletions

View File

@ -1311,6 +1311,9 @@ public:
// predicate controlling translation of CompareAndSwapX // predicate controlling translation of CompareAndSwapX
bool needs_acquiring_load_exclusive(const Node *load); bool needs_acquiring_load_exclusive(const Node *load);
// Assert that the given node is not a variable shift.
bool assert_not_var_shift(const Node* n);
// predicate controlling addressing modes // predicate controlling addressing modes
bool size_fits_all_mem_uses(AddPNode* addp, int shift); bool size_fits_all_mem_uses(AddPNode* addp, int shift);
%} %}
@ -1725,6 +1728,12 @@ bool needs_acquiring_load_exclusive(const Node *n)
return true; return true;
} }
// Assert that the given node is not a variable shift.
bool assert_not_var_shift(const Node* n) {
assert(!n->as_ShiftV()->is_var_shift(), "illegal variable shift");
return true;
}
#define __ _masm. #define __ _masm.
// advance declarations for helper functions to convert register // advance declarations for helper functions to convert register

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
// Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved. // Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2020, 2021, Arm Limited. All rights reserved. // Copyright (c) 2020, 2022, Arm Limited. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
// //
// This code is free software; you can redistribute it and/or modify it // This code is free software; you can redistribute it and/or modify it
@ -1972,223 +1972,277 @@ VLOGICAL(xor, eor, xor, Xor, 16, B, X)
// ------------------------------ Shift --------------------------------------- // ------------------------------ Shift ---------------------------------------
dnl dnl
define(`VSHIFTCNT', ` define(`VSLCNT', `
instruct vshiftcnt$3$4`'(vec$5 dst, iRegIorL2I cnt) %{ instruct vslcnt$1$2`'(vec$3 dst, iRegIorL2I cnt) %{
predicate(UseSVE == 0 && (ifelse($3, 8, n->as_Vector()->length_in_bytes() == 4 ||` predicate(UseSVE == 0 && ifelse($1, 8,
')n->as_Vector()->length_in_bytes() == $3)); (n->as_Vector()->length_in_bytes() == 4 ||`
'n->as_Vector()->length_in_bytes() == $1),
n->as_Vector()->length_in_bytes() == $1));
match(Set dst (LShiftCntV cnt)); match(Set dst (LShiftCntV cnt));
match(Set dst (RShiftCntV cnt)); ins_cost(INSN_COST);
format %{ "$1 $dst, $cnt\t# shift count vector ($3$4)" %} format %{ "dup $dst, $cnt\t# shift count vector ($1$2)" %}
ins_encode %{ ins_encode %{
__ $2(as_FloatRegister($dst$$reg), __ T$3$4, as_Register($cnt$$reg)); __ dup(as_FloatRegister($dst$$reg), __ T$1$2, as_Register($cnt$$reg));
%} %}
ins_pipe(vdup_reg_reg`'ifelse($5, D, 64, 128)); ins_pipe(vdup_reg_reg`'ifelse($3, D, 64, 128));
%}')dnl %}')dnl
dnl $1 $2 $3 $4 $5 dnl
VSHIFTCNT(dup, dup, 8, B, D) define(`VSRCNT', `
VSHIFTCNT(dup, dup, 16, B, X) instruct vsrcnt$1$2`'(vec$3 dst, iRegIorL2I cnt) %{
predicate(UseSVE == 0 && ifelse($1, 8,
(n->as_Vector()->length_in_bytes() == 4 ||`
'n->as_Vector()->length_in_bytes() == $1),
n->as_Vector()->length_in_bytes() == $1));
match(Set dst (RShiftCntV cnt));
ins_cost(INSN_COST * 2);
format %{ "negw rscratch1, $cnt\t"
"dup $dst, rscratch1\t# shift count vector ($1$2)" %}
ins_encode %{
__ negw(rscratch1, as_Register($cnt$$reg));
__ dup(as_FloatRegister($dst$$reg), __ T$1$2, rscratch1);
%}
ins_pipe(vdup_reg_reg`'ifelse($3, D, 64, 128));
%}')dnl
dnl
// Vector shift count
// Note-1: Low 8 bits of each element are used, so it doesn't matter if we
// treat it as ints or bytes here.
// Note-2: Shift value is negated for RShiftCntV additionally. See the comments
// on vsra8B rule for more details.
dnl $1 $2 $3
VSLCNT(8, B, D)
VSLCNT(16, B, X)
VSRCNT(8, B, D)
VSRCNT(16, B, X)
dnl
define(`PREDICATE',
`ifelse($1, 8B,
ifelse($3, `', `predicate(n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8);',
`predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&`
'$3);'),
$1, 4S,
ifelse($3, `', `predicate(n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4);',
`predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4) &&`
'$3);'),
ifelse($3, `', `predicate(n->as_Vector()->length() == $2);',
`predicate(n->as_Vector()->length() == $2 && $3);'))')dnl
dnl dnl
define(`VSLL', ` define(`VSLL', `
instruct vsll$3$4`'(vec$6 dst, vec$6 src, vec$6 shift) %{ instruct vsll$1$2`'(vec$4 dst, vec$4 src, vec$4 shift) %{
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||` PREDICATE(`$1$2', $1, )
', match(Set dst (LShiftV$2 src shift));
$3$4, 4S, n->as_Vector()->length() == 2 ||`
')n->as_Vector()->length() == $3);
match(Set dst (LShiftV$4 src shift));
ins_cost(INSN_COST); ins_cost(INSN_COST);
format %{ "$1 $dst,$src,$shift\t# vector ($3$5)" %} format %{ "sshl $dst,$src,$shift\t# vector ($1$3)" %}
ins_encode %{ ins_encode %{
__ $2(as_FloatRegister($dst$$reg), __ T$3$5, __ sshl(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg), as_FloatRegister($src$$reg),
as_FloatRegister($shift$$reg)); as_FloatRegister($shift$$reg));
%} %}
ins_pipe(vshift`'ifelse($6, D, 64, 128)); ins_pipe(vshift`'ifelse($4, D, 64, 128));
%}')dnl %}')dnl
dnl dnl
define(`VSRA', ` define(`VSRA', `
instruct vsra$3$4`'(vec$6 dst, vec$6 src, vec$6 shift, vec$6 tmp) %{ instruct vsra$1$2`'(vec$4 dst, vec$4 src, vec$4 shift) %{
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||` PREDICATE(`$1$2', $1, !n->as_ShiftV()->is_var_shift())
', match(Set dst (RShiftV$2 src shift));
$3$4, 4S, n->as_Vector()->length() == 2 ||`
')n->as_Vector()->length() == $3);
match(Set dst (RShiftV$4 src shift));
ins_cost(INSN_COST); ins_cost(INSN_COST);
effect(TEMP tmp); format %{ "sshl $dst,$src,$shift\t# vector ($1$3)" %}
format %{ "$1 $tmp,$shift\t"
"$2 $dst,$src,$tmp\t# vector ($3$5)" %}
ins_encode %{ ins_encode %{
__ $1(as_FloatRegister($tmp$$reg), __ T`'ifelse($6, D, 8B, 16B), __ sshl(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($shift$$reg));
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
as_FloatRegister($src$$reg), as_FloatRegister($src$$reg),
as_FloatRegister($tmp$$reg)); as_FloatRegister($shift$$reg));
%} %}
ins_pipe(vshift`'ifelse($6, D, 64, 128)); ins_pipe(vshift`'ifelse($4, D, 64, 128));
%}')dnl
dnl
define(`VSRA_VAR', `
instruct vsra$1$2_var`'(vec$4 dst, vec$4 src, vec$4 shift) %{
PREDICATE(`$1$2', $1, n->as_ShiftV()->is_var_shift())
match(Set dst (RShiftV$2 src shift));
ins_cost(INSN_COST * 2);
effect(TEMP_DEF dst);
format %{ "negr $dst,$shift\t"
"sshl $dst,$src,$dst\t# vector ($1$3)" %}
ins_encode %{
__ negr(as_FloatRegister($dst$$reg), __ T`'ifelse($4, D, 8B, 16B),
as_FloatRegister($shift$$reg));
__ sshl(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg),
as_FloatRegister($dst$$reg));
%}
ins_pipe(vshift`'ifelse($4, D, 64, 128));
%}')dnl %}')dnl
dnl dnl
define(`VSRL', ` define(`VSRL', `
instruct vsrl$3$4`'(vec$6 dst, vec$6 src, vec$6 shift, vec$6 tmp) %{ instruct vsrl$1$2`'(vec$4 dst, vec$4 src, vec$4 shift) %{
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||` PREDICATE(`$1$2', $1, !n->as_ShiftV()->is_var_shift())
', match(Set dst (URShiftV$2 src shift));
$3$4, 4S, n->as_Vector()->length() == 2 ||`
')n->as_Vector()->length() == $3);
match(Set dst (URShiftV$4 src shift));
ins_cost(INSN_COST); ins_cost(INSN_COST);
effect(TEMP tmp); format %{ "ushl $dst,$src,$shift\t# vector ($1$3)" %}
format %{ "$1 $tmp,$shift\t"
"$2 $dst,$src,$tmp\t# vector ($3$5)" %}
ins_encode %{ ins_encode %{
__ $1(as_FloatRegister($tmp$$reg), __ T`'ifelse($6, D, 8B, 16B), __ ushl(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($shift$$reg));
__ $2(as_FloatRegister($dst$$reg), __ T$3$5,
as_FloatRegister($src$$reg), as_FloatRegister($src$$reg),
as_FloatRegister($tmp$$reg)); as_FloatRegister($shift$$reg));
%} %}
ins_pipe(vshift`'ifelse($6, D, 64, 128)); ins_pipe(vshift`'ifelse($4, D, 64, 128));
%}')dnl
dnl
define(`VSRL_VAR', `
instruct vsrl$1$2_var`'(vec$4 dst, vec$4 src, vec$4 shift) %{
PREDICATE(`$1$2', $1, n->as_ShiftV()->is_var_shift())
match(Set dst (URShiftV$2 src shift));
ins_cost(INSN_COST * 2);
effect(TEMP_DEF dst);
format %{ "negr $dst,$shift\t"
"ushl $dst,$src,$dst\t# vector ($1$3)" %}
ins_encode %{
__ negr(as_FloatRegister($dst$$reg), __ T`'ifelse($4, D, 8B, 16B),
as_FloatRegister($shift$$reg));
__ ushl(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg),
as_FloatRegister($dst$$reg));
%}
ins_pipe(vshift`'ifelse($4, D, 64, 128));
%}')dnl %}')dnl
dnl dnl
define(`VSLL_IMM', ` define(`VSLL_IMM', `
instruct vsll$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{ instruct vsll$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||` PREDICATE(`$1$2', $1, assert_not_var_shift(n))
', match(Set dst (LShiftV$2 src (LShiftCntV shift)));
$3$4, 4S, n->as_Vector()->length() == 2 ||`
')n->as_Vector()->length() == $3);
match(Set dst (LShiftV$4 src (LShiftCntV shift)));
ins_cost(INSN_COST); ins_cost(INSN_COST);
format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %} format %{ "shl $dst, $src, $shift\t# vector ($1$3)" %}
ins_encode %{ifelse($4, B,` ins_encode %{ifelse($2, B,`
int sh = (int)$shift$$constant; int sh = (int)$shift$$constant;
if (sh >= 8) { if (sh >= 8) {
__ eor(as_FloatRegister($dst$$reg), __ ifelse($6, D, T8B, T16B), __ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B),
as_FloatRegister($src$$reg), as_FloatRegister($src$$reg),
as_FloatRegister($src$$reg)); as_FloatRegister($src$$reg));
} else { } else {
__ $2(as_FloatRegister($dst$$reg), __ T$3$5, __ shl(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg), sh); as_FloatRegister($src$$reg), sh);
}', $4, S,` }', $2, S,`
int sh = (int)$shift$$constant; int sh = (int)$shift$$constant;
if (sh >= 16) { if (sh >= 16) {
__ eor(as_FloatRegister($dst$$reg), __ ifelse($6, D, T8B, T16B), __ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B),
as_FloatRegister($src$$reg), as_FloatRegister($src$$reg),
as_FloatRegister($src$$reg)); as_FloatRegister($src$$reg));
} else { } else {
__ $2(as_FloatRegister($dst$$reg), __ T$3$5, __ shl(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg), sh); as_FloatRegister($src$$reg), sh);
}', ` }', `
__ $2(as_FloatRegister($dst$$reg), __ T$3$5, __ shl(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg), as_FloatRegister($src$$reg),
(int)$shift$$constant);') (int)$shift$$constant);')
%} %}
ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm); ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
%}')dnl %}')dnl
dnl
define(`VSRA_IMM', ` define(`VSRA_IMM', `
instruct vsra$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{ instruct vsra$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||` PREDICATE(`$1$2', $1, assert_not_var_shift(n))
', match(Set dst (RShiftV$2 src (RShiftCntV shift)));
$3$4, 4S, n->as_Vector()->length() == 2 ||`
')n->as_Vector()->length() == $3);
match(Set dst (RShiftV$4 src (RShiftCntV shift)));
ins_cost(INSN_COST); ins_cost(INSN_COST);
format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %} format %{ "sshr $dst, $src, $shift\t# vector ($1$3)" %}
ins_encode %{ifelse($4, B,` ins_encode %{ifelse($2, B,`
int sh = (int)$shift$$constant; int sh = (int)$shift$$constant;
if (sh >= 8) sh = 7; if (sh >= 8) sh = 7;
__ $2(as_FloatRegister($dst$$reg), __ T$3$5, __ sshr(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg), sh);', $4, S,` as_FloatRegister($src$$reg), sh);', $2, S,`
int sh = (int)$shift$$constant; int sh = (int)$shift$$constant;
if (sh >= 16) sh = 15; if (sh >= 16) sh = 15;
__ $2(as_FloatRegister($dst$$reg), __ T$3$5, __ sshr(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg), sh);', ` as_FloatRegister($src$$reg), sh);', `
__ $2(as_FloatRegister($dst$$reg), __ T$3$5, __ sshr(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg), as_FloatRegister($src$$reg),
(int)$shift$$constant);') (int)$shift$$constant);')
%} %}
ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm); ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
%}')dnl %}')dnl
dnl dnl
define(`VSRL_IMM', ` define(`VSRL_IMM', `
instruct vsrl$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{ instruct vsrl$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||` PREDICATE(`$1$2', $1, assert_not_var_shift(n))
', match(Set dst (URShiftV$2 src (RShiftCntV shift)));
$3$4, 4S, n->as_Vector()->length() == 2 ||`
')n->as_Vector()->length() == $3);
match(Set dst (URShiftV$4 src (RShiftCntV shift)));
ins_cost(INSN_COST); ins_cost(INSN_COST);
format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %} format %{ "ushr $dst, $src, $shift\t# vector ($1$3)" %}
ins_encode %{ifelse($4, B,` ins_encode %{ifelse($2, B,`
int sh = (int)$shift$$constant; int sh = (int)$shift$$constant;
if (sh >= 8) { if (sh >= 8) {
__ eor(as_FloatRegister($dst$$reg), __ ifelse($6, D, T8B, T16B), __ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B),
as_FloatRegister($src$$reg), as_FloatRegister($src$$reg),
as_FloatRegister($src$$reg)); as_FloatRegister($src$$reg));
} else { } else {
__ $2(as_FloatRegister($dst$$reg), __ T$3$5, __ ushr(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg), sh); as_FloatRegister($src$$reg), sh);
}', $4, S,` }', $2, S,`
int sh = (int)$shift$$constant; int sh = (int)$shift$$constant;
if (sh >= 16) { if (sh >= 16) {
__ eor(as_FloatRegister($dst$$reg), __ ifelse($6, D, T8B, T16B), __ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B),
as_FloatRegister($src$$reg), as_FloatRegister($src$$reg),
as_FloatRegister($src$$reg)); as_FloatRegister($src$$reg));
} else { } else {
__ $2(as_FloatRegister($dst$$reg), __ T$3$5, __ ushr(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg), sh); as_FloatRegister($src$$reg), sh);
}', ` }', `
__ $2(as_FloatRegister($dst$$reg), __ T$3$5, __ ushr(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg), as_FloatRegister($src$$reg),
(int)$shift$$constant);') (int)$shift$$constant);')
%} %}
ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm); ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
%}')dnl %}')dnl
dnl dnl
define(`VSRLA_IMM', ` define(`VSRLA_IMM', `
instruct vsrla$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{ instruct vsrla$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
predicate(n->as_Vector()->length() == $3); predicate(n->as_Vector()->length() == $1);
match(Set dst (AddV$4 dst (URShiftV$4 src (RShiftCntV shift)))); match(Set dst (AddV$2 dst (URShiftV$2 src (RShiftCntV shift))));
ins_cost(INSN_COST); ins_cost(INSN_COST);
format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %} format %{ "usra $dst, $src, $shift\t# vector ($1$3)" %}
ins_encode %{ifelse($4, B,` ins_encode %{ifelse($2, B,`
int sh = (int)$shift$$constant; int sh = (int)$shift$$constant;
if (sh < 8) { if (sh < 8) {
__ $2(as_FloatRegister($dst$$reg), __ T$3$5, __ usra(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg), sh); as_FloatRegister($src$$reg), sh);
}', $4, S,` }', $2, S,`
int sh = (int)$shift$$constant; int sh = (int)$shift$$constant;
if (sh < 16) { if (sh < 16) {
__ $2(as_FloatRegister($dst$$reg), __ T$3$5, __ usra(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg), sh); as_FloatRegister($src$$reg), sh);
}', ` }', `
__ $2(as_FloatRegister($dst$$reg), __ T$3$5, __ usra(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg), as_FloatRegister($src$$reg),
(int)$shift$$constant);') (int)$shift$$constant);')
%} %}
ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm); ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
%}')dnl %}')dnl
dnl dnl
define(`VSRAA_IMM', ` define(`VSRAA_IMM', `
instruct vsraa$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{ instruct vsraa$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{
predicate(n->as_Vector()->length() == $3); predicate(n->as_Vector()->length() == $1);
match(Set dst (AddV$4 dst (RShiftV$4 src (RShiftCntV shift)))); match(Set dst (AddV$2 dst (RShiftV$2 src (RShiftCntV shift))));
ins_cost(INSN_COST); ins_cost(INSN_COST);
format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %} format %{ "ssra $dst, $src, $shift\t# vector ($1$3)" %}
ins_encode %{ifelse($4, B,` ins_encode %{ifelse($2, B,`
int sh = (int)$shift$$constant; int sh = (int)$shift$$constant;
if (sh >= 8) sh = 7; if (sh >= 8) sh = 7;
__ $2(as_FloatRegister($dst$$reg), __ T$3$5, __ ssra(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg), sh);', $4, S,` as_FloatRegister($src$$reg), sh);', $2, S,`
int sh = (int)$shift$$constant; int sh = (int)$shift$$constant;
if (sh >= 16) sh = 15; if (sh >= 16) sh = 15;
__ $2(as_FloatRegister($dst$$reg), __ T$3$5, __ ssra(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg), sh);', ` as_FloatRegister($src$$reg), sh);', `
__ $2(as_FloatRegister($dst$$reg), __ T$3$5, __ ssra(as_FloatRegister($dst$$reg), __ T$1$3,
as_FloatRegister($src$$reg), as_FloatRegister($src$$reg),
(int)$shift$$constant);') (int)$shift$$constant);')
%} %}
ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm); ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm);
%}')dnl %}')dnl
dnl $1 $2 $3 $4 $5 $6 dnl
VSLL(sshl, sshl, 8, B, B, D) undefine(PREDICATE)dnl
VSLL(sshl, sshl, 16, B, B, X) dnl
dnl $1 $2 $3 $4
VSLL(8, B, B, D)
VSLL(16, B, B, X)
// Right shifts with vector shift count on aarch64 SIMD are implemented // Right shifts with vector shift count on aarch64 SIMD are implemented
// as left shift by negative shift count. // as left shift by negative shift count.
@ -2199,8 +2253,6 @@ VSLL(sshl, sshl, 16, B, B, X)
// LoadVector RShiftCntV // LoadVector RShiftCntV
// | / // | /
// RShiftVI // RShiftVI
// Note: In inner loop, multiple neg instructions are used, which can be
// moved to outer loop and merge into one neg instruction.
// //
// Case 2: The vector shift count is from loading. // Case 2: The vector shift count is from loading.
// This case isn't supported by middle-end now. But it's supported by // This case isn't supported by middle-end now. But it's supported by
@ -2210,61 +2262,83 @@ VSLL(sshl, sshl, 16, B, B, X)
// | / // | /
// RShiftVI // RShiftVI
// //
dnl $1 $2 $3 $4 $5 $6 // The negate is conducted in RShiftCntV rule for case 1, whereas it's done in
VSRA(negr, sshl, 8, B, B, D) // RShiftV* rules for case 2. Because there exists an optimization opportunity
VSRA(negr, sshl, 16, B, B, X) // for case 1, that is, multiple neg instructions in inner loop can be hoisted
VSRL(negr, ushl, 8, B, B, D) // to outer loop and merged into one neg instruction.
VSRL(negr, ushl, 16, B, B, X) //
VSLL_IMM(shl, shl, 8, B, B, D) // Note that ShiftVNode::is_var_shift() indicates whether the vector shift
VSLL_IMM(shl, shl, 16, B, B, X) // count is a variable vector(case 2) or not(a vector generated by RShiftCntV,
VSRA_IMM(sshr, sshr, 8, B, B, D) // i.e. case 1).
VSRA_IMM(sshr, sshr, 16, B, B, X) dnl $1 $2 $3 $4
VSRL_IMM(ushr, ushr, 8, B, B, D) VSRA(8, B, B, D)
VSRL_IMM(ushr, ushr, 16, B, B, X) VSRA_VAR(8, B, B, D)
VSLL(sshl, sshl, 4, S, H, D) VSRA(16, B, B, X)
VSLL(sshl, sshl, 8, S, H, X) VSRA_VAR(16, B, B, X)
VSRA(negr, sshl, 4, S, H, D) VSRL(8, B, B, D)
VSRA(negr, sshl, 8, S, H, X) VSRL_VAR(8, B, B, D)
VSRL(negr, ushl, 4, S, H, D) VSRL(16, B, B, X)
VSRL(negr, ushl, 8, S, H, X) VSRL_VAR(16, B, B, X)
VSLL_IMM(shl, shl, 4, S, H, D) VSLL_IMM(8, B, B, D)
VSLL_IMM(shl, shl, 8, S, H, X) VSLL_IMM(16, B, B, X)
VSRA_IMM(sshr, sshr, 4, S, H, D) VSRA_IMM(8, B, B, D)
VSRA_IMM(sshr, sshr, 8, S, H, X) VSRA_IMM(16, B, B, X)
VSRL_IMM(ushr, ushr, 4, S, H, D) VSRL_IMM(8, B, B, D)
VSRL_IMM(ushr, ushr, 8, S, H, X) VSRL_IMM(16, B, B, X)
VSLL(sshl, sshl, 2, I, S, D) VSLL(4, S, H, D)
VSLL(sshl, sshl, 4, I, S, X) VSLL(8, S, H, X)
VSRA(negr, sshl, 2, I, S, D) VSRA(4, S, H, D)
VSRA(negr, sshl, 4, I, S, X) VSRA_VAR(4, S, H, D)
VSRL(negr, ushl, 2, I, S, D) VSRA(8, S, H, X)
VSRL(negr, ushl, 4, I, S, X) VSRA_VAR(8, S, H, X)
VSLL_IMM(shl, shl, 2, I, S, D) VSRL(4, S, H, D)
VSLL_IMM(shl, shl, 4, I, S, X) VSRL_VAR(4, S, H, D)
VSRA_IMM(sshr, sshr, 2, I, S, D) VSRL(8, S, H, X)
VSRA_IMM(sshr, sshr, 4, I, S, X) VSRL_VAR(8, S, H, X)
VSRL_IMM(ushr, ushr, 2, I, S, D) VSLL_IMM(4, S, H, D)
VSRL_IMM(ushr, ushr, 4, I, S, X) VSLL_IMM(8, S, H, X)
VSLL(sshl, sshl, 2, L, D, X) VSRA_IMM(4, S, H, D)
VSRA(negr, sshl, 2, L, D, X) VSRA_IMM(8, S, H, X)
VSRL(negr, ushl, 2, L, D, X) VSRL_IMM(4, S, H, D)
VSLL_IMM(shl, shl, 2, L, D, X) VSRL_IMM(8, S, H, X)
VSRA_IMM(sshr, sshr, 2, L, D, X) VSLL(2, I, S, D)
VSRL_IMM(ushr, ushr, 2, L, D, X) VSLL(4, I, S, X)
VSRAA_IMM(ssra, ssra, 8, B, B, D) VSRA(2, I, S, D)
VSRAA_IMM(ssra, ssra, 16, B, B, X) VSRA_VAR(2, I, S, D)
VSRAA_IMM(ssra, ssra, 4, S, H, D) VSRA(4, I, S, X)
VSRAA_IMM(ssra, ssra, 8, S, H, X) VSRA_VAR(4, I, S, X)
VSRAA_IMM(ssra, ssra, 2, I, S, D) VSRL(2, I, S, D)
VSRAA_IMM(ssra, ssra, 4, I, S, X) VSRL_VAR(2, I, S, D)
VSRAA_IMM(ssra, ssra, 2, L, D, X) VSRL(4, I, S, X)
VSRLA_IMM(usra, usra, 8, B, B, D) VSRL_VAR(4, I, S, X)
VSRLA_IMM(usra, usra, 16, B, B, X) VSLL_IMM(2, I, S, D)
VSRLA_IMM(usra, usra, 4, S, H, D) VSLL_IMM(4, I, S, X)
VSRLA_IMM(usra, usra, 8, S, H, X) VSRA_IMM(2, I, S, D)
VSRLA_IMM(usra, usra, 2, I, S, D) VSRA_IMM(4, I, S, X)
VSRLA_IMM(usra, usra, 4, I, S, X) VSRL_IMM(2, I, S, D)
VSRLA_IMM(usra, usra, 2, L, D, X) VSRL_IMM(4, I, S, X)
VSLL(2, L, D, X)
VSRA(2, L, D, X)
VSRA_VAR(2, L, D, X)
VSRL(2, L, D, X)
VSRL_VAR(2, L, D, X)
VSLL_IMM(2, L, D, X)
VSRA_IMM(2, L, D, X)
VSRL_IMM(2, L, D, X)
VSRAA_IMM(8, B, B, D)
VSRAA_IMM(16, B, B, X)
VSRAA_IMM(4, S, H, D)
VSRAA_IMM(8, S, H, X)
VSRAA_IMM(2, I, S, D)
VSRAA_IMM(4, I, S, X)
VSRAA_IMM(2, L, D, X)
VSRLA_IMM(8, B, B, D)
VSRLA_IMM(16, B, B, X)
VSRLA_IMM(4, S, H, D)
VSRLA_IMM(8, S, H, X)
VSRLA_IMM(2, I, S, D)
VSRLA_IMM(4, I, S, X)
VSRLA_IMM(2, L, D, X)
dnl dnl
define(`VMINMAX', ` define(`VMINMAX', `
instruct v$1$3`'ifelse($5, S, F, D)`'(vec$6 dst, vec$6 src1, vec$6 src2) instruct v$1$3`'ifelse($5, S, F, D)`'(vec$6 dst, vec$6 src1, vec$6 src2)

View File

@ -0,0 +1,129 @@
/*
* Copyright (c) 2022, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.vm.compiler;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.*;
import java.util.concurrent.TimeUnit;
import java.util.Random;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
public class VectorShiftRight {
@Param({"1024"})
public int SIZE;
private byte[] bytesA, bytesB;
private short[] shortsA, shortsB;
private char[] charsA, charsB;
private int[] intsA, intsB;
private long[] longsA, longsB;
@Param("0")
private int seed;
private Random r = new Random(seed);
@Param("3")
private int shiftCount;
@Setup
public void init() {
bytesA = new byte[SIZE];
shortsA = new short[SIZE];
charsA = new char[SIZE];
intsA = new int[SIZE];
longsA = new long[SIZE];
bytesB = new byte[SIZE];
shortsB = new short[SIZE];
charsB = new char[SIZE];
intsB = new int[SIZE];
longsB = new long[SIZE];
for (int i = 0; i < SIZE; i++) {
bytesA[i] = (byte) r.nextInt();
shortsA[i] = (short) r.nextInt();
charsA[i] = (char) r.nextInt();
intsA[i] = r.nextInt();
longsA[i] = r.nextLong();
}
}
@Benchmark
public void rShiftByte() {
for (int i = 0; i < SIZE; i++) {
bytesB[i] = (byte) (bytesA[i] >> shiftCount);
}
}
@Benchmark
public void urShiftByte() {
for (int i = 0; i < SIZE; i++) {
bytesB[i] = (byte) (bytesA[i] >>> shiftCount);
}
}
@Benchmark
public void rShiftShort() {
for (int i = 0; i < SIZE; i++) {
shortsB[i] = (short) (shortsA[i] >> shiftCount);
}
}
@Benchmark
public void urShiftChar() {
for (int i = 0; i < SIZE; i++) {
charsB[i] = (char) (charsA[i] >>> shiftCount);
}
}
@Benchmark
public void rShiftInt() {
for (int i = 0; i < SIZE; i++) {
intsB[i] = intsA[i] >> shiftCount;
}
}
@Benchmark
public void urShiftInt() {
for (int i = 0; i < SIZE; i++) {
intsB[i] = intsA[i] >>> shiftCount;
}
}
@Benchmark
public void rShiftLong() {
for (int i = 0; i < SIZE; i++) {
longsB[i] = longsA[i] >> shiftCount;
}
}
@Benchmark
public void urShiftLong() {
for (int i = 0; i < SIZE; i++) {
longsB[i] = longsA[i] >>> shiftCount;
}
}
}