8269559: AArch64: Implement string_compare intrinsic in SVE

Reviewed-by: ngasson, aph
This commit is contained in:
TatWai Chong 2021-10-14 05:27:12 +00:00 committed by Nick Gasson
parent d9e03e42af
commit 8b1b6f9fb3
11 changed files with 516 additions and 96 deletions

View File

@ -1199,6 +1199,9 @@ reg_class gov_pr (
// P7, non-allocatable, preserved with all elements preset to TRUE.
);
reg_class p0_reg(P0);
reg_class p1_reg(P1);
// Singleton class for condition codes
reg_class int_flags(RFLAGS);
@ -5537,6 +5540,24 @@ operand pRegGov()
interface(REG_INTER);
%}
operand pRegGov_P0()
%{
constraint(ALLOC_IN_RC(p0_reg));
match(RegVectMask);
op_cost(0);
format %{ %}
interface(REG_INTER);
%}
operand pRegGov_P1()
%{
constraint(ALLOC_IN_RC(p1_reg));
match(RegVectMask);
op_cost(0);
format %{ %}
interface(REG_INTER);
%}
// Flags register, used as output of signed compare instructions
// note that on AArch64 we also use this register as the output for
@ -16491,7 +16512,7 @@ instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp,
instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
%{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
predicate((UseSVE == 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU));
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
@ -16501,7 +16522,7 @@ instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 c
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$Register, $tmp2$$Register,
fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU);
fnoreg, fnoreg, fnoreg, pnoreg, pnoreg, StrIntrinsicNode::UU);
%}
ins_pipe(pipe_class_memory);
%}
@ -16509,7 +16530,7 @@ instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 c
instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
%{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
predicate((UseSVE == 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL));
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
@ -16518,7 +16539,7 @@ instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 c
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$Register, $tmp2$$Register,
fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL);
fnoreg, fnoreg, fnoreg, pnoreg, pnoreg, StrIntrinsicNode::LL);
%}
ins_pipe(pipe_class_memory);
%}
@ -16527,7 +16548,7 @@ instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4
iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
%{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
predicate((UseSVE == 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL));
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
@ -16537,8 +16558,8 @@ instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$Register, $tmp2$$Register,
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
$vtmp3$$FloatRegister, StrIntrinsicNode::UL);
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister, $vtmp3$$FloatRegister,
pnoreg, pnoreg, StrIntrinsicNode::UL);
%}
ins_pipe(pipe_class_memory);
%}
@ -16547,7 +16568,7 @@ instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4
iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
%{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
predicate((UseSVE == 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU));
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
@ -16557,8 +16578,8 @@ instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$Register, $tmp2$$Register,
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
$vtmp3$$FloatRegister,StrIntrinsicNode::LU);
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister, $vtmp3$$FloatRegister,
pnoreg, pnoreg, StrIntrinsicNode::LU);
%}
ins_pipe(pipe_class_memory);
%}

View File

@ -3810,6 +3810,105 @@ instruct stringU_indexof_char_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
ins_pipe(pipe_class_memory);
%}
// Intrisics for String.compareTo()
// Note that Z registers alias the corresponding NEON registers, we declare the vector operands of
// these string_compare variants as NEON register type for convenience so that the prototype of
// string_compare can be shared with all variants.
instruct string_compareLL_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
vRegD_V0 vtmp1, vRegD_V1 vtmp2, pRegGov_P0 pgtmp1,
pRegGov_P1 pgtmp2, rFlagsReg cr)
%{
predicate((UseSVE > 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL));
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp1, TEMP pgtmp2,
USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # USE sve" %}
ins_encode %{
// Count is in 8-bit bytes; non-Compact chars are 16 bits.
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$Register, $tmp2$$Register,
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister, fnoreg,
as_PRegister($pgtmp1$$reg), as_PRegister($pgtmp2$$reg),
StrIntrinsicNode::LL);
%}
ins_pipe(pipe_class_memory);
%}
instruct string_compareLU_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
vRegD_V0 vtmp1, vRegD_V1 vtmp2, pRegGov_P0 pgtmp1,
pRegGov_P1 pgtmp2, rFlagsReg cr)
%{
predicate((UseSVE > 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU));
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp1, TEMP pgtmp2,
USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # USE sve" %}
ins_encode %{
// Count is in 8-bit bytes; non-Compact chars are 16 bits.
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$Register, $tmp2$$Register,
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister, fnoreg,
as_PRegister($pgtmp1$$reg), as_PRegister($pgtmp2$$reg),
StrIntrinsicNode::LU);
%}
ins_pipe(pipe_class_memory);
%}
instruct string_compareUL_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
vRegD_V0 vtmp1, vRegD_V1 vtmp2, pRegGov_P0 pgtmp1,
pRegGov_P1 pgtmp2, rFlagsReg cr)
%{
predicate((UseSVE > 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL));
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp1, TEMP pgtmp2,
USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # USE sve" %}
ins_encode %{
// Count is in 8-bit bytes; non-Compact chars are 16 bits.
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$Register, $tmp2$$Register,
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister, fnoreg,
as_PRegister($pgtmp1$$reg), as_PRegister($pgtmp2$$reg),
StrIntrinsicNode::UL);
%}
ins_pipe(pipe_class_memory);
%}
instruct string_compareUU_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
vRegD_V0 vtmp1, vRegD_V1 vtmp2, pRegGov_P0 pgtmp1,
pRegGov_P1 pgtmp2, rFlagsReg cr)
%{
predicate((UseSVE > 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU));
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp1, TEMP pgtmp2,
USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # USE sve" %}
ins_encode %{
// Count is in 8-bit bytes; non-Compact chars are 16 bits.
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$Register, $tmp2$$Register,
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister, fnoreg,
as_PRegister($pgtmp1$$reg), as_PRegister($pgtmp2$$reg),
StrIntrinsicNode::UU);
%}
ins_pipe(pipe_class_memory);
%}
// ---------------------------- Vector mask reductions ---------------------------
instruct vmask_truecount(iRegINoSp dst, vReg src, pReg ptmp, rFlagsReg cr) %{

View File

@ -2513,6 +2513,43 @@ dnl $1 $2 $3
STRING_INDEXOF_CHAR(L, Latin1, true)
STRING_INDEXOF_CHAR(U, UTF16, false)
// Intrisics for String.compareTo()
// Note that Z registers alias the corresponding NEON registers, we declare the vector operands of
// these string_compare variants as NEON register type for convenience so that the prototype of
// string_compare can be shared with all variants.
dnl
define(`STRING_COMPARETO', `
instruct string_compare$1_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
vRegD_V0 vtmp1, vRegD_V1 vtmp2, pRegGov_P0 pgtmp1,
pRegGov_P1 pgtmp2, rFlagsReg cr)
%{
predicate((UseSVE > 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::$1));
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp1, TEMP pgtmp2,
USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # USE sve" %}
ins_encode %{
// Count is in 8-bit bytes; non-Compact chars are 16 bits.
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$Register, $tmp2$$Register,
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister, fnoreg,
as_PRegister($pgtmp1$$reg), as_PRegister($pgtmp2$$reg),
StrIntrinsicNode::$1);
%}
ins_pipe(pipe_class_memory);
%}')dnl
dnl $1
STRING_COMPARETO(LL)
STRING_COMPARETO(LU)
STRING_COMPARETO(UL)
STRING_COMPARETO(UU)
dnl
dnl
dnl VMASK_REDUCTION($1, $2, $3 )
dnl VMASK_REDUCTION(suffix, op_name, cost)

View File

@ -676,7 +676,8 @@ void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1,
// Compare strings.
void C2_MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) {
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
PRegister pgtmp1, PRegister pgtmp2, int ae) {
Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
DIFF, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
SHORT_LOOP_START, TAIL_CHECK;

View File

@ -32,7 +32,8 @@
void string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
Register tmp1, Register tmp2, FloatRegister vtmp1,
FloatRegister vtmp2, FloatRegister vtmp3, int ae);
FloatRegister vtmp2, FloatRegister vtmp3,
PRegister pgtmp1, PRegister pgtmp2, int ae);
void string_indexof(Register str1, Register str2,
Register cnt1, Register cnt2,

View File

@ -263,6 +263,8 @@ class PRegisterImpl: public AbstractRegisterImpl {
};
// The predicate registers of SVE.
CONSTANT_REGISTER_DECLARATION(PRegister, pnoreg, (-1));
CONSTANT_REGISTER_DECLARATION(PRegister, p0, ( 0));
CONSTANT_REGISTER_DECLARATION(PRegister, p1, ( 1));
CONSTANT_REGISTER_DECLARATION(PRegister, p2, ( 2));

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -188,6 +188,8 @@ REGISTER_DEFINITION(FloatRegister, z29);
REGISTER_DEFINITION(FloatRegister, z30);
REGISTER_DEFINITION(FloatRegister, z31);
REGISTER_DEFINITION(PRegister, pnoreg);
REGISTER_DEFINITION(PRegister, p0);
REGISTER_DEFINITION(PRegister, p1);
REGISTER_DEFINITION(PRegister, p2);

View File

@ -4879,6 +4879,11 @@ class StubGenerator: public StubCodeGenerator {
// r4 = cnt2
// r10 = tmp1
// r11 = tmp2
// r12 = tmp3
// r14 = tmp4
// v0 = vtmp1
// v1 = vtmp2
// v2 = vtmp3
address generate_compare_long_string_different_encoding(bool isLU) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", isLU
@ -5031,6 +5036,97 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
enum string_compare_mode {
LL,
LU,
UL,
UU,
};
// The following registers are declared in aarch64.ad
// r0 = result
// r1 = str1
// r2 = cnt1
// r3 = str2
// r4 = cnt2
// r10 = tmp1
// r11 = tmp2
// z0 = ztmp1
// z1 = ztmp2
// p0 = pgtmp1
// p1 = pgtmp2
address generate_compare_long_string_sve(string_compare_mode mode) {
__ align(CodeEntryAlignment);
address entry = __ pc();
Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
tmp1 = r10, tmp2 = r11;
Label LOOP, MATCH, DONE, NOMATCH;
Register vec_len = tmp1;
Register idx = tmp2;
// The minimum of the string lengths has been stored in cnt2.
Register cnt = cnt2;
FloatRegister ztmp1 = z0, ztmp2 = z1;
PRegister pgtmp1 = p0, pgtmp2 = p1;
if (mode == LL) {
__ sve_cntb(vec_len);
} else {
__ sve_cnth(vec_len);
}
__ mov(idx, 0);
__ sve_whilelt(pgtmp1, mode == LL ? __ B : __ H, idx, cnt);
__ bind(LOOP);
switch (mode) {
case LL:
__ sve_ld1b(ztmp1, __ B, pgtmp1, Address(str1, idx));
__ sve_ld1b(ztmp2, __ B, pgtmp1, Address(str2, idx));
break;
case LU:
__ sve_ld1b(ztmp1, __ H, pgtmp1, Address(str1, idx));
__ sve_ld1h(ztmp2, __ H, pgtmp1, Address(str2, idx, Address::lsl(1)));
break;
case UL:
__ sve_ld1h(ztmp1, __ H, pgtmp1, Address(str1, idx, Address::lsl(1)));
__ sve_ld1b(ztmp2, __ H, pgtmp1, Address(str2, idx));
break;
case UU:
__ sve_ld1h(ztmp1, __ H, pgtmp1, Address(str1, idx, Address::lsl(1)));
__ sve_ld1h(ztmp2, __ H, pgtmp1, Address(str2, idx, Address::lsl(1)));
break;
default: ShouldNotReachHere();
}
__ add(idx, idx, vec_len);
// Compare strings.
__ sve_cmp(Assembler::NE, pgtmp2, mode == LL ? __ B : __ H, pgtmp1, ztmp1, ztmp2);
__ br(__ NE, MATCH);
__ sve_whilelt(pgtmp1, mode == LL ? __ B : __ H, idx, cnt);
__ br(__ LT, LOOP);
// The result has been computed in the caller prior to entering this stub.
__ b(DONE);
__ bind(MATCH);
// Crop the vector to find its location.
__ sve_brkb(pgtmp2, pgtmp1, pgtmp2, false /* isMerge */);
// Extract the first different characters of each string.
__ sve_lasta(rscratch1, mode == LL ? __ B : __ H, pgtmp2, ztmp1);
__ sve_lasta(rscratch2, mode == LL ? __ B : __ H, pgtmp2, ztmp2);
// Compute the difference of the first different characters.
__ sub(result, rscratch1, rscratch2);
__ bind(DONE);
__ ret(lr);
return entry;
}
// r0 = result
// r1 = str1
// r2 = cnt1
@ -5153,6 +5249,7 @@ class StubGenerator: public StubCodeGenerator {
}
void generate_compare_long_strings() {
if (!UseSVE) {
StubRoutines::aarch64::_compare_long_string_LL
= generate_compare_long_string_same_encoding(true);
StubRoutines::aarch64::_compare_long_string_UU
@ -5161,6 +5258,16 @@ class StubGenerator: public StubCodeGenerator {
= generate_compare_long_string_different_encoding(true);
StubRoutines::aarch64::_compare_long_string_UL
= generate_compare_long_string_different_encoding(false);
} else {
StubRoutines::aarch64::_compare_long_string_LL
= generate_compare_long_string_sve(LL);
StubRoutines::aarch64::_compare_long_string_UU
= generate_compare_long_string_sve(UU);
StubRoutines::aarch64::_compare_long_string_LU
= generate_compare_long_string_sve(LU);
StubRoutines::aarch64::_compare_long_string_UL
= generate_compare_long_string_sve(UL);
}
}
// R0 = result

View File

@ -1567,6 +1567,8 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
["ld1b", "__ sve_ld1b(z0, __ S, p2, Address(sp, r8));", "ld1b\t{z0.s}, p2/z, [sp, x8]"],
["ld1b", "__ sve_ld1b(z0, __ D, p3, Address(sp, 7));", "ld1b\t{z0.d}, p3/z, [sp, #7, MUL VL]"],
["ld1h", "__ sve_ld1h(z10, __ H, p1, Address(sp, -8));", "ld1h\t{z10.h}, p1/z, [sp, #-8, MUL VL]"],
["ld1h",
"__ sve_ld1h(z10, __ H, p0, Address(r4, r2, Address::lsl(1)));", "ld1h\t{z10.h}, p0/z, [x4, x2, LSL 1]"],
["ld1w", "__ sve_ld1w(z20, __ S, p2, Address(r0, 7));", "ld1w\t{z20.s}, p2/z, [x0, #7, MUL VL]"],
["ld1b", "__ sve_ld1b(z30, __ B, p3, Address(sp, r8));", "ld1b\t{z30.b}, p3/z, [sp, x8]"],
["ld1w", "__ sve_ld1w(z0, __ S, p4, Address(sp, r28));", "ld1w\t{z0.s}, p4/z, [sp, x28, LSL #2]"],

View File

@ -756,6 +756,7 @@
__ sve_ld1b(z0, __ S, p2, Address(sp, r8)); // ld1b {z0.s}, p2/z, [sp, x8]
__ sve_ld1b(z0, __ D, p3, Address(sp, 7)); // ld1b {z0.d}, p3/z, [sp, #7, MUL VL]
__ sve_ld1h(z10, __ H, p1, Address(sp, -8)); // ld1h {z10.h}, p1/z, [sp, #-8, MUL VL]
__ sve_ld1h(z10, __ H, p0, Address(r4, r2, Address::lsl(1))); // ld1h {z10.h}, p0/z, [x4, x2, LSL 1]
__ sve_ld1w(z20, __ S, p2, Address(r0, 7)); // ld1w {z20.s}, p2/z, [x0, #7, MUL VL]
__ sve_ld1b(z30, __ B, p3, Address(sp, r8)); // ld1b {z30.b}, p3/z, [sp, x8]
__ sve_ld1w(z0, __ S, p4, Address(sp, r28)); // ld1w {z0.s}, p4/z, [sp, x28, LSL #2]
@ -1042,30 +1043,30 @@
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
0x14000000, 0x17ffffd7, 0x1400034c, 0x94000000,
0x97ffffd4, 0x94000349, 0x3400000a, 0x34fffa2a,
0x340068ca, 0x35000008, 0x35fff9c8, 0x35006868,
0xb400000b, 0xb4fff96b, 0xb400680b, 0xb500001d,
0xb5fff91d, 0xb50067bd, 0x10000013, 0x10fff8b3,
0x10006753, 0x90000013, 0x36300016, 0x3637f836,
0x363066d6, 0x3758000c, 0x375ff7cc, 0x3758666c,
0x14000000, 0x17ffffd7, 0x1400034d, 0x94000000,
0x97ffffd4, 0x9400034a, 0x3400000a, 0x34fffa2a,
0x340068ea, 0x35000008, 0x35fff9c8, 0x35006888,
0xb400000b, 0xb4fff96b, 0xb400682b, 0xb500001d,
0xb5fff91d, 0xb50067dd, 0x10000013, 0x10fff8b3,
0x10006773, 0x90000013, 0x36300016, 0x3637f836,
0x363066f6, 0x3758000c, 0x375ff7cc, 0x3758668c,
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
0x54006440, 0x54000001, 0x54fff541, 0x540063e1,
0x54000002, 0x54fff4e2, 0x54006382, 0x54000002,
0x54fff482, 0x54006322, 0x54000003, 0x54fff423,
0x540062c3, 0x54000003, 0x54fff3c3, 0x54006263,
0x54000004, 0x54fff364, 0x54006204, 0x54000005,
0x54fff305, 0x540061a5, 0x54000006, 0x54fff2a6,
0x54006146, 0x54000007, 0x54fff247, 0x540060e7,
0x54000008, 0x54fff1e8, 0x54006088, 0x54000009,
0x54fff189, 0x54006029, 0x5400000a, 0x54fff12a,
0x54005fca, 0x5400000b, 0x54fff0cb, 0x54005f6b,
0x5400000c, 0x54fff06c, 0x54005f0c, 0x5400000d,
0x54fff00d, 0x54005ead, 0x5400000e, 0x54ffefae,
0x54005e4e, 0x5400000f, 0x54ffef4f, 0x54005def,
0x54006460, 0x54000001, 0x54fff541, 0x54006401,
0x54000002, 0x54fff4e2, 0x540063a2, 0x54000002,
0x54fff482, 0x54006342, 0x54000003, 0x54fff423,
0x540062e3, 0x54000003, 0x54fff3c3, 0x54006283,
0x54000004, 0x54fff364, 0x54006224, 0x54000005,
0x54fff305, 0x540061c5, 0x54000006, 0x54fff2a6,
0x54006166, 0x54000007, 0x54fff247, 0x54006107,
0x54000008, 0x54fff1e8, 0x540060a8, 0x54000009,
0x54fff189, 0x54006049, 0x5400000a, 0x54fff12a,
0x54005fea, 0x5400000b, 0x54fff0cb, 0x54005f8b,
0x5400000c, 0x54fff06c, 0x54005f2c, 0x5400000d,
0x54fff00d, 0x54005ecd, 0x5400000e, 0x54ffefae,
0x54005e6e, 0x5400000f, 0x54ffef4f, 0x54005e0f,
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200,
@ -1097,7 +1098,7 @@
0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176,
0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422,
0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a,
0xbd1b1869, 0x58004e3b, 0x1800000b, 0xf8945060,
0xbd1b1869, 0x58004e5b, 0x1800000b, 0xf8945060,
0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035,
0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380,
0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11,
@ -1193,67 +1194,66 @@
0x25208028, 0x2538cfe0, 0x2578d001, 0x25b8efe2,
0x25f8f007, 0x2538dfea, 0x25b8dfeb, 0xa400a3e0,
0xa420a7e0, 0xa4484be0, 0xa467afe0, 0xa4a8a7ea,
0xa547a814, 0xa4084ffe, 0xa55c53e0, 0xa5e1540b,
0xe400fbf6, 0xe408ffff, 0xe420e7e0, 0xe4484be0,
0xe460efe0, 0xe547e400, 0xe4014be0, 0xe4a84fe0,
0xe5f15000, 0x858043e0, 0x85a043ff, 0xe59f5d08,
0x0420e3e9, 0x0460e3ea, 0x04a0e3eb, 0x04e0e3ec,
0x25104042, 0x25104871, 0x25904861, 0x25904c92,
0x05344020, 0x05744041, 0x05b44062, 0x05f44083,
0x252c8840, 0x253c1420, 0x25681572, 0x25a21ce3,
0x25ea1e34, 0x0522c020, 0x05e6c0a4, 0x2401a001,
0x2443a051, 0x24858881, 0x24c78cd1, 0x24850891,
0x24c70cc1, 0x250f9001, 0x25508051, 0x25802491,
0x25df28c1, 0x25850c81, 0x251e10d1, 0x65816001,
0x65c36051, 0x65854891, 0x65c74cc1, 0x05733820,
0x05b238a4, 0x05f138e6, 0x0570396a, 0x65d0a001,
0x65d6a443, 0x65d4a826, 0x6594ac26, 0x6554ac26,
0x6556ac26, 0x6552ac26, 0x65cbac85, 0x65caac01,
0x65dea833, 0x659ca509, 0x65d8a801, 0x65dcac01,
0x655cb241, 0x0520a1e0, 0x0521a601, 0x052281e0,
0x05238601, 0x04a14026, 0x0568aca7, 0x05b23230,
0x853040af, 0xc5b040af, 0xe57080af, 0xe5b080af,
0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000,
0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000,
0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000,
0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000,
0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000,
0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000,
0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000,
0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000,
0xf8208193, 0xf83101b6, 0xf83c13fe, 0xf821239a,
0xf824309e, 0xf826535e, 0xf8304109, 0xf82c7280,
0xf8216058, 0xf8a08309, 0xf8ba03d0, 0xf8a312ea,
0xf8aa21e4, 0xf8a2310b, 0xf8aa522f, 0xf8a2418a,
0xf8ac71af, 0xf8a26287, 0xf8fa8090, 0xf8e20184,
0xf8f01215, 0xf8f022ab, 0xf8f7334c, 0xf8f751dc,
0xf8eb4038, 0xf8ec715f, 0xf8f06047, 0xf863826d,
0xf8710070, 0xf86113cb, 0xf86521e8, 0xf87d301e,
0xf8745287, 0xf87742bc, 0xf87b70b9, 0xf8616217,
0xb83f8185, 0xb82901fc, 0xb83d13f6, 0xb83320bf,
0xb82e33f0, 0xb830529b, 0xb830416c, 0xb82973c6,
0xb831639b, 0xb8be8147, 0xb8b4008a, 0xb8b81231,
0xb8b623a3, 0xb8af3276, 0xb8b35056, 0xb8af4186,
0xb8b071ab, 0xb8b763c1, 0xb8f38225, 0xb8e202d0,
0xb8ed12aa, 0xb8fd219b, 0xb8fb3023, 0xb8ff5278,
0xb8f14389, 0xb8fb70ef, 0xb8f563f7, 0xb87983e2,
0xb87b0150, 0xb8771073, 0xb8702320, 0xb87a3057,
0xb870508c, 0xb87c43be, 0xb87070db, 0xb86961fd,
0xce273c87, 0xce080ac9, 0xce7e8e9b, 0xce808b45,
0xce79806e, 0xce758768, 0xcec0835a, 0xce608ad8,
0x043100c4, 0x046105e3, 0x65c900a6, 0x65d60a87,
0x65c80545, 0x0416a63e, 0x04001f8b, 0x0450979a,
0x04dabe0d, 0x045381a5, 0x04918b4f, 0x049006cb,
0x0497a264, 0x045eadd1, 0x04881062, 0x040a04d7,
0x04810f71, 0x04dca450, 0x65c084c3, 0x65cd8d93,
0x65c69a68, 0x65878ae0, 0x65c29db3, 0x049da0e6,
0x6582b911, 0x65c0b6d6, 0x65c1a1e2, 0x65cda494,
0x65c18107, 0x65af1493, 0x65e52b36, 0x65ab4ed0,
0x65f06a8d, 0x0451448f, 0x049c7c86, 0x0429335d,
0x04bc3162, 0x047a3027, 0x04e831d1, 0x05a56b15,
0x05b66e35, 0x041a367d, 0x041832e4, 0x04d926f3,
0x04482113, 0x04ca3a2e, 0x658727d5, 0x6586358a,
0x65d82709, 0x044138c4,
0xa4a2408a, 0xa547a814, 0xa4084ffe, 0xa55c53e0,
0xa5e1540b, 0xe400fbf6, 0xe408ffff, 0xe420e7e0,
0xe4484be0, 0xe460efe0, 0xe547e400, 0xe4014be0,
0xe4a84fe0, 0xe5f15000, 0x858043e0, 0x85a043ff,
0xe59f5d08, 0x0420e3e9, 0x0460e3ea, 0x04a0e3eb,
0x04e0e3ec, 0x25104042, 0x25104871, 0x25904861,
0x25904c92, 0x05344020, 0x05744041, 0x05b44062,
0x05f44083, 0x252c8840, 0x253c1420, 0x25681572,
0x25a21ce3, 0x25ea1e34, 0x0522c020, 0x05e6c0a4,
0x2401a001, 0x2443a051, 0x24858881, 0x24c78cd1,
0x24850891, 0x24c70cc1, 0x250f9001, 0x25508051,
0x25802491, 0x25df28c1, 0x25850c81, 0x251e10d1,
0x65816001, 0x65c36051, 0x65854891, 0x65c74cc1,
0x05733820, 0x05b238a4, 0x05f138e6, 0x0570396a,
0x65d0a001, 0x65d6a443, 0x65d4a826, 0x6594ac26,
0x6554ac26, 0x6556ac26, 0x6552ac26, 0x65cbac85,
0x65caac01, 0x65dea833, 0x659ca509, 0x65d8a801,
0x65dcac01, 0x655cb241, 0x0520a1e0, 0x0521a601,
0x052281e0, 0x05238601, 0x04a14026, 0x0568aca7,
0x05b23230, 0x853040af, 0xc5b040af, 0xe57080af,
0xe5b080af, 0x1e601000, 0x1e603000, 0x1e621000,
0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000,
0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000,
0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000,
0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000,
0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000,
0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000,
0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000,
0x1e7e3000, 0xf8208193, 0xf83101b6, 0xf83c13fe,
0xf821239a, 0xf824309e, 0xf826535e, 0xf8304109,
0xf82c7280, 0xf8216058, 0xf8a08309, 0xf8ba03d0,
0xf8a312ea, 0xf8aa21e4, 0xf8a2310b, 0xf8aa522f,
0xf8a2418a, 0xf8ac71af, 0xf8a26287, 0xf8fa8090,
0xf8e20184, 0xf8f01215, 0xf8f022ab, 0xf8f7334c,
0xf8f751dc, 0xf8eb4038, 0xf8ec715f, 0xf8f06047,
0xf863826d, 0xf8710070, 0xf86113cb, 0xf86521e8,
0xf87d301e, 0xf8745287, 0xf87742bc, 0xf87b70b9,
0xf8616217, 0xb83f8185, 0xb82901fc, 0xb83d13f6,
0xb83320bf, 0xb82e33f0, 0xb830529b, 0xb830416c,
0xb82973c6, 0xb831639b, 0xb8be8147, 0xb8b4008a,
0xb8b81231, 0xb8b623a3, 0xb8af3276, 0xb8b35056,
0xb8af4186, 0xb8b071ab, 0xb8b763c1, 0xb8f38225,
0xb8e202d0, 0xb8ed12aa, 0xb8fd219b, 0xb8fb3023,
0xb8ff5278, 0xb8f14389, 0xb8fb70ef, 0xb8f563f7,
0xb87983e2, 0xb87b0150, 0xb8771073, 0xb8702320,
0xb87a3057, 0xb870508c, 0xb87c43be, 0xb87070db,
0xb86961fd, 0xce273c87, 0xce080ac9, 0xce7e8e9b,
0xce808b45, 0xce79806e, 0xce758768, 0xcec0835a,
0xce608ad8, 0x043100c4, 0x046105e3, 0x65c900a6,
0x65d60a87, 0x65c80545, 0x0416a63e, 0x04001f8b,
0x0450979a, 0x04dabe0d, 0x045381a5, 0x04918b4f,
0x049006cb, 0x0497a264, 0x045eadd1, 0x04881062,
0x040a04d7, 0x04810f71, 0x04dca450, 0x65c084c3,
0x65cd8d93, 0x65c69a68, 0x65878ae0, 0x65c29db3,
0x049da0e6, 0x6582b911, 0x65c0b6d6, 0x65c1a1e2,
0x65cda494, 0x65c18107, 0x65af1493, 0x65e52b36,
0x65ab4ed0, 0x65f06a8d, 0x0451448f, 0x049c7c86,
0x0429335d, 0x04bc3162, 0x047a3027, 0x04e831d1,
0x05a56b15, 0x05b66e35, 0x041a367d, 0x041832e4,
0x04d926f3, 0x04482113, 0x04ca3a2e, 0x658727d5,
0x6586358a, 0x65d82709, 0x044138c4,
};
// END Generated code -- do not edit

View File

@ -0,0 +1,148 @@
/*
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2021, BELLSOFT. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.arm.benchmarks.intrinsics;
import java.util.concurrent.TimeUnit;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.CompilerControl;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
/**
* This benchmark modified from test/hotspot/jtreg/compiler/intrinsics/string/TestStringCompareToDifferentLength.java
* This benchmark can be used to measure performance of compareTo() in
* (Latin1, Latin1), (Latin1, UTF16), (UTF16, Latin1), and (UTF16, UTF16)
* comparisons.
*/
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Benchmark)
@Measurement(iterations = 3, time = 1000, timeUnit = TimeUnit.MILLISECONDS)
@Warmup(iterations = 3, time = 1000, timeUnit = TimeUnit.MILLISECONDS)
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public class StringCompareToDifferentLength {
@State(Scope.Benchmark)
public static class Input {
@Param({"24", "36", "72", "128", "256", "512"})
public int size;
@Param({"2"})
public int delta;
int count = 100000;
String longLatin1;
String shortLatin1;
String longUTF16FirstChar;
String shortUTF16FirstChar;
String longUTF16LastChar;
String shortUTF16LastChar;
/**
* Initialize. New array objects and set initial values.
*/
@Setup(Level.Trial)
public void setup() throws Exception {
char[] strsrc = new char[size + delta];
// generate ASCII string
for (int i = 0; i < size + delta; i++) {
strsrc[i] = (char) ('a' + (i % 26));
}
longLatin1 = new String(strsrc);
shortLatin1 = longLatin1.substring(0, size);
longUTF16LastChar = longLatin1.substring(0, longLatin1.length() - 1) + '\ubeef';
longUTF16FirstChar = '\ubeef' + longLatin1.substring(1, longLatin1.length());
shortUTF16LastChar = shortLatin1.substring(0, shortLatin1.length() - 1) + '\ubeef';
shortUTF16FirstChar = longUTF16FirstChar.substring(0, size);
}
}
private int runCompareTo(String str2, String str1) {
return str1.compareTo(str2);
}
/**
* latin1-latin1
*/
@Benchmark
public void compareToLL(Input in, Blackhole blackhole) {
int res = 0;
for (int i = 0; i < in.count; ++i) {
res += runCompareTo(in.longLatin1, in.shortLatin1);
}
blackhole.consume(res);
}
/**
* UTF16-UTF16
*/
@Benchmark
public void compareToUU(Input in, Blackhole blackhole) {
int res = 0;
for (int i = 0; i < in.count; ++i) {
res += runCompareTo(in.longUTF16FirstChar, in.shortUTF16FirstChar);
}
blackhole.consume(res);
}
/**
* latin1-UTF16
*/
@Benchmark
public void compareToLU(Input in, Blackhole blackhole) {
int res = 0;
for (int i = 0; i < in.count; ++i) {
res += runCompareTo(in.longUTF16LastChar, in.shortLatin1);
}
blackhole.consume(res);
}
/**
* UTF16-latin1
*/
@Benchmark
public void compareToUL(Input in, Blackhole blackhole) {
int res = 0;
for (int i = 0; i < in.count; ++i) {
res += runCompareTo(in.longLatin1, in.shortUTF16LastChar);
}
blackhole.consume(res);
}
}