From b5526e5e5935658ed1d39938441ae1a3417c0545 Mon Sep 17 00:00:00 2001 From: Ningsheng Jian Date: Wed, 18 May 2022 01:34:14 +0000 Subject: [PATCH] 8281712: [REDO] AArch64: Implement string_compare intrinsic in SVE Co-authored-by: Tat Wai Chong Reviewed-by: thartmann, ngasson --- src/hotspot/cpu/aarch64/aarch64.ad | 37 ++++- src/hotspot/cpu/aarch64/aarch64_sve.ad | 101 ++++++++++++ src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 | 38 +++++ .../cpu/aarch64/c2_MacroAssembler_aarch64.cpp | 3 +- .../cpu/aarch64/c2_MacroAssembler_aarch64.hpp | 3 +- .../cpu/aarch64/stubGenerator_aarch64.cpp | 123 +++++++++++++++ .../lang/StringCompareToDifferentLength.java | 148 ++++++++++++++++++ 7 files changed, 443 insertions(+), 10 deletions(-) create mode 100644 test/micro/org/openjdk/bench/java/lang/StringCompareToDifferentLength.java diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index b713e4c822b..9f718d71933 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -1199,6 +1199,9 @@ reg_class gov_pr ( // P7, non-allocatable, preserved with all elements preset to TRUE. ); +reg_class p0_reg(P0); +reg_class p1_reg(P1); + // Singleton class for condition codes reg_class int_flags(RFLAGS); @@ -5731,6 +5734,24 @@ operand pRegGov() interface(REG_INTER); %} +operand pRegGov_P0() +%{ + constraint(ALLOC_IN_RC(p0_reg)); + match(RegVectMask); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand pRegGov_P1() +%{ + constraint(ALLOC_IN_RC(p1_reg)); + match(RegVectMask); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + // Flags register, used as output of signed compare instructions // note that on AArch64 we also use this register as the output for @@ -16911,7 +16932,7 @@ instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2, iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr) %{ - predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); + predicate((UseSVE == 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU)); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -16921,7 +16942,7 @@ instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 c __ string_compare($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, $result$$Register, $tmp1$$Register, $tmp2$$Register, - fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU); + fnoreg, fnoreg, fnoreg, pnoreg, pnoreg, StrIntrinsicNode::UU); %} ins_pipe(pipe_class_memory); %} @@ -16929,7 +16950,7 @@ instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 c instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2, iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr) %{ - predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); + predicate((UseSVE == 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL)); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -16938,7 +16959,7 @@ instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 c __ string_compare($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, $result$$Register, $tmp1$$Register, $tmp2$$Register, - fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL); + fnoreg, fnoreg, fnoreg, pnoreg, pnoreg, StrIntrinsicNode::LL); %} ins_pipe(pipe_class_memory); %} @@ -16947,7 +16968,7 @@ instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr) %{ - predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); + predicate((UseSVE == 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL)); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -16958,7 +16979,7 @@ instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 $cnt1$$Register, $cnt2$$Register, $result$$Register, $tmp1$$Register, $tmp2$$Register, $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, - $vtmp3$$FloatRegister, StrIntrinsicNode::UL); + $vtmp3$$FloatRegister, pnoreg, pnoreg, StrIntrinsicNode::UL); %} ins_pipe(pipe_class_memory); %} @@ -16967,7 +16988,7 @@ instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr) %{ - predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); + predicate((UseSVE == 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU)); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -16978,7 +16999,7 @@ instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 $cnt1$$Register, $cnt2$$Register, $result$$Register, $tmp1$$Register, $tmp2$$Register, $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, - $vtmp3$$FloatRegister,StrIntrinsicNode::LU); + $vtmp3$$FloatRegister, pnoreg, pnoreg, StrIntrinsicNode::LU); %} ins_pipe(pipe_class_memory); %} diff --git a/src/hotspot/cpu/aarch64/aarch64_sve.ad b/src/hotspot/cpu/aarch64/aarch64_sve.ad index be96098afbe..835fd566143 100644 --- a/src/hotspot/cpu/aarch64/aarch64_sve.ad +++ b/src/hotspot/cpu/aarch64/aarch64_sve.ad @@ -5397,6 +5397,8 @@ instruct populateindex(vReg dst, iRegIorL2I src1, immI src2) %{ ins_pipe(pipe_slow); %} +// ----------------------------- String Intrinsics ------------------------------- + // Intrisics for String.indexOf(char) @@ -5436,6 +5438,105 @@ instruct stringU_indexof_char_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, ins_pipe(pipe_class_memory); %} +// Intrisics for String.compareTo() + +// Note that Z registers alias the corresponding NEON registers, we declare the vector operands of +// these string_compare variants as NEON register type for convenience so that the prototype of +// string_compare can be shared with all variants. + + +instruct string_compareLL_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2, + iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, + vRegD_V0 vtmp1, vRegD_V1 vtmp2, pRegGov_P0 pgtmp1, + pRegGov_P1 pgtmp2, rFlagsReg cr) +%{ + predicate((UseSVE > 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL)); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp1, TEMP pgtmp2, + USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # USE sve" %} + ins_encode %{ + // Count is in 8-bit bytes; non-Compact chars are 16 bits. + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, + $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, fnoreg, + as_PRegister($pgtmp1$$reg), as_PRegister($pgtmp2$$reg), + StrIntrinsicNode::LL); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_compareLU_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2, + iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, + vRegD_V0 vtmp1, vRegD_V1 vtmp2, pRegGov_P0 pgtmp1, + pRegGov_P1 pgtmp2, rFlagsReg cr) +%{ + predicate((UseSVE > 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU)); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp1, TEMP pgtmp2, + USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # USE sve" %} + ins_encode %{ + // Count is in 8-bit bytes; non-Compact chars are 16 bits. + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, + $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, fnoreg, + as_PRegister($pgtmp1$$reg), as_PRegister($pgtmp2$$reg), + StrIntrinsicNode::LU); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_compareUL_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2, + iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, + vRegD_V0 vtmp1, vRegD_V1 vtmp2, pRegGov_P0 pgtmp1, + pRegGov_P1 pgtmp2, rFlagsReg cr) +%{ + predicate((UseSVE > 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL)); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp1, TEMP pgtmp2, + USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # USE sve" %} + ins_encode %{ + // Count is in 8-bit bytes; non-Compact chars are 16 bits. + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, + $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, fnoreg, + as_PRegister($pgtmp1$$reg), as_PRegister($pgtmp2$$reg), + StrIntrinsicNode::UL); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_compareUU_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2, + iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, + vRegD_V0 vtmp1, vRegD_V1 vtmp2, pRegGov_P0 pgtmp1, + pRegGov_P1 pgtmp2, rFlagsReg cr) +%{ + predicate((UseSVE > 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU)); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp1, TEMP pgtmp2, + USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # USE sve" %} + ins_encode %{ + // Count is in 8-bit bytes; non-Compact chars are 16 bits. + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, + $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, fnoreg, + as_PRegister($pgtmp1$$reg), as_PRegister($pgtmp2$$reg), + StrIntrinsicNode::UU); + %} + ins_pipe(pipe_class_memory); +%} + // ---------------------------- Vector mask reductions --------------------------- instruct vmask_truecount(iRegINoSp dst, pReg src) %{ predicate(UseSVE > 0); diff --git a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 index 67216def0b5..ff86c2df51e 100644 --- a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 @@ -2979,6 +2979,8 @@ instruct populateindex(vReg dst, iRegIorL2I src1, immI src2) %{ ins_pipe(pipe_slow); %} +// ----------------------------- String Intrinsics ------------------------------- + // Intrisics for String.indexOf(char) dnl @@ -3004,6 +3006,42 @@ dnl $1 $2 $3 STRING_INDEXOF_CHAR(L, Latin1, true) STRING_INDEXOF_CHAR(U, UTF16, false) +// Intrisics for String.compareTo() + +// Note that Z registers alias the corresponding NEON registers, we declare the vector operands of +// these string_compare variants as NEON register type for convenience so that the prototype of +// string_compare can be shared with all variants. + +dnl +define(`STRING_COMPARETO', ` +instruct string_compare$1_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2, + iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, + vRegD_V0 vtmp1, vRegD_V1 vtmp2, pRegGov_P0 pgtmp1, + pRegGov_P1 pgtmp2, rFlagsReg cr) +%{ + predicate((UseSVE > 0) && (((StrCompNode*)n)->encoding() == StrIntrinsicNode::$1)); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp1, TEMP pgtmp2, + USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # USE sve" %} + ins_encode %{ + // Count is in 8-bit bytes; non-Compact chars are 16 bits. + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, + $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, fnoreg, + as_PRegister($pgtmp1$$reg), as_PRegister($pgtmp2$$reg), + StrIntrinsicNode::$1); + %} + ins_pipe(pipe_class_memory); +%}')dnl +dnl $1 +STRING_COMPARETO(LL) +STRING_COMPARETO(LU) +STRING_COMPARETO(UL) +STRING_COMPARETO(UU) + // ---------------------------- Vector mask reductions --------------------------- instruct vmask_truecount(iRegINoSp dst, pReg src) %{ predicate(UseSVE > 0); diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index 4f701a49cac..b096e2bf645 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -676,7 +676,8 @@ void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, // Compare strings. void C2_MacroAssembler::string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, - FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) { + FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, + PRegister pgtmp1, PRegister pgtmp2, int ae) { Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, DIFF, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, SHORT_LOOP_START, TAIL_CHECK; diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp index 1abd30a1bfb..2673cc0e83b 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp @@ -32,7 +32,8 @@ void string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, FloatRegister vtmp1, - FloatRegister vtmp2, FloatRegister vtmp3, int ae); + FloatRegister vtmp2, FloatRegister vtmp3, + PRegister pgtmp1, PRegister pgtmp2, int ae); void string_indexof(Register str1, Register str2, Register cnt1, Register cnt2, diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp index 327cb31f10d..34a4c8688f2 100644 --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -5324,7 +5324,120 @@ class StubGenerator: public StubCodeGenerator { return entry; } + enum string_compare_mode { + LL, + LU, + UL, + UU, + }; + + // The following registers are declared in aarch64.ad + // r0 = result + // r1 = str1 + // r2 = cnt1 + // r3 = str2 + // r4 = cnt2 + // r10 = tmp1 + // r11 = tmp2 + // z0 = ztmp1 + // z1 = ztmp2 + // p0 = pgtmp1 + // p1 = pgtmp2 + address generate_compare_long_string_sve(string_compare_mode mode) { + __ align(CodeEntryAlignment); + address entry = __ pc(); + Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4, + tmp1 = r10, tmp2 = r11; + + Label LOOP, DONE, MISMATCH; + Register vec_len = tmp1; + Register idx = tmp2; + // The minimum of the string lengths has been stored in cnt2. + Register cnt = cnt2; + FloatRegister ztmp1 = z0, ztmp2 = z1; + PRegister pgtmp1 = p0, pgtmp2 = p1; + +#define LOAD_PAIR(ztmp1, ztmp2, pgtmp1, src1, src2, idx) \ + switch (mode) { \ + case LL: \ + __ sve_ld1b(ztmp1, __ B, pgtmp1, Address(str1, idx)); \ + __ sve_ld1b(ztmp2, __ B, pgtmp1, Address(str2, idx)); \ + break; \ + case LU: \ + __ sve_ld1b(ztmp1, __ H, pgtmp1, Address(str1, idx)); \ + __ sve_ld1h(ztmp2, __ H, pgtmp1, Address(str2, idx, Address::lsl(1))); \ + break; \ + case UL: \ + __ sve_ld1h(ztmp1, __ H, pgtmp1, Address(str1, idx, Address::lsl(1))); \ + __ sve_ld1b(ztmp2, __ H, pgtmp1, Address(str2, idx)); \ + break; \ + case UU: \ + __ sve_ld1h(ztmp1, __ H, pgtmp1, Address(str1, idx, Address::lsl(1))); \ + __ sve_ld1h(ztmp2, __ H, pgtmp1, Address(str2, idx, Address::lsl(1))); \ + break; \ + default: \ + ShouldNotReachHere(); \ + } + + const char* stubname; + switch (mode) { + case LL: stubname = "compare_long_string_same_encoding LL"; break; + case LU: stubname = "compare_long_string_different_encoding LU"; break; + case UL: stubname = "compare_long_string_different_encoding UL"; break; + case UU: stubname = "compare_long_string_same_encoding UU"; break; + default: ShouldNotReachHere(); + } + + StubCodeMark mark(this, "StubRoutines", stubname); + + __ mov(idx, 0); + __ sve_whilelt(pgtmp1, mode == LL ? __ B : __ H, idx, cnt); + + if (mode == LL) { + __ sve_cntb(vec_len); + } else { + __ sve_cnth(vec_len); + } + + __ sub(rscratch1, cnt, vec_len); + + __ bind(LOOP); + + // main loop + LOAD_PAIR(ztmp1, ztmp2, pgtmp1, src1, src2, idx); + __ add(idx, idx, vec_len); + // Compare strings. + __ sve_cmp(Assembler::NE, pgtmp2, mode == LL ? __ B : __ H, pgtmp1, ztmp1, ztmp2); + __ br(__ NE, MISMATCH); + __ cmp(idx, rscratch1); + __ br(__ LT, LOOP); + + // post loop, last iteration + __ sve_whilelt(pgtmp1, mode == LL ? __ B : __ H, idx, cnt); + + LOAD_PAIR(ztmp1, ztmp2, pgtmp1, src1, src2, idx); + __ sve_cmp(Assembler::NE, pgtmp2, mode == LL ? __ B : __ H, pgtmp1, ztmp1, ztmp2); + __ br(__ EQ, DONE); + + __ bind(MISMATCH); + + // Crop the vector to find its location. + __ sve_brkb(pgtmp2, pgtmp1, pgtmp2, false /* isMerge */); + // Extract the first different characters of each string. + __ sve_lasta(rscratch1, mode == LL ? __ B : __ H, pgtmp2, ztmp1); + __ sve_lasta(rscratch2, mode == LL ? __ B : __ H, pgtmp2, ztmp2); + + // Compute the difference of the first different characters. + __ sub(result, rscratch1, rscratch2); + + __ bind(DONE); + __ ret(lr); +#undef LOAD_PAIR + return entry; + } + void generate_compare_long_strings() { + if (UseSVE == 0) { StubRoutines::aarch64::_compare_long_string_LL = generate_compare_long_string_same_encoding(true); StubRoutines::aarch64::_compare_long_string_UU @@ -5333,6 +5446,16 @@ class StubGenerator: public StubCodeGenerator { = generate_compare_long_string_different_encoding(true); StubRoutines::aarch64::_compare_long_string_UL = generate_compare_long_string_different_encoding(false); + } else { + StubRoutines::aarch64::_compare_long_string_LL + = generate_compare_long_string_sve(LL); + StubRoutines::aarch64::_compare_long_string_UU + = generate_compare_long_string_sve(UU); + StubRoutines::aarch64::_compare_long_string_LU + = generate_compare_long_string_sve(LU); + StubRoutines::aarch64::_compare_long_string_UL + = generate_compare_long_string_sve(UL); + } } // R0 = result diff --git a/test/micro/org/openjdk/bench/java/lang/StringCompareToDifferentLength.java b/test/micro/org/openjdk/bench/java/lang/StringCompareToDifferentLength.java new file mode 100644 index 00000000000..2ebee9a6006 --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/StringCompareToDifferentLength.java @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, BELLSOFT. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package com.arm.benchmarks.intrinsics; + +import java.util.concurrent.TimeUnit; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.CompilerControl; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import org.openjdk.jmh.infra.Blackhole; + +/** + * This benchmark modified from test/hotspot/jtreg/compiler/intrinsics/string/TestStringCompareToDifferentLength.java + * This benchmark can be used to measure performance of compareTo() in + * (Latin1, Latin1), (Latin1, UTF16), (UTF16, Latin1), and (UTF16, UTF16) + * comparisons. + */ + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Benchmark) +@Measurement(iterations = 3, time = 1000, timeUnit = TimeUnit.MILLISECONDS) +@Warmup(iterations = 3, time = 1000, timeUnit = TimeUnit.MILLISECONDS) +@CompilerControl(CompilerControl.Mode.DONT_INLINE) +public class StringCompareToDifferentLength { + + @State(Scope.Benchmark) + public static class Input { + @Param({"24", "36", "72", "128", "256", "512"}) + public int size; + + @Param({"2"}) + public int delta; + + int count = 100000; + String longLatin1; + String shortLatin1; + String longUTF16FirstChar; + String shortUTF16FirstChar; + String longUTF16LastChar; + String shortUTF16LastChar; + + /** + * Initialize. New array objects and set initial values. + */ + @Setup(Level.Trial) + public void setup() throws Exception { + char[] strsrc = new char[size + delta]; + // generate ASCII string + for (int i = 0; i < size + delta; i++) { + strsrc[i] = (char) ('a' + (i % 26)); + } + + longLatin1 = new String(strsrc); + shortLatin1 = longLatin1.substring(0, size); + longUTF16LastChar = longLatin1.substring(0, longLatin1.length() - 1) + '\ubeef'; + longUTF16FirstChar = '\ubeef' + longLatin1.substring(1, longLatin1.length()); + shortUTF16LastChar = shortLatin1.substring(0, shortLatin1.length() - 1) + '\ubeef'; + shortUTF16FirstChar = longUTF16FirstChar.substring(0, size); + } + } + + private int runCompareTo(String str2, String str1) { + return str1.compareTo(str2); + } + + /** + * latin1-latin1 + */ + @Benchmark + public void compareToLL(Input in, Blackhole blackhole) { + int res = 0; + for (int i = 0; i < in.count; ++i) { + res += runCompareTo(in.longLatin1, in.shortLatin1); + } + blackhole.consume(res); + } + + /** + * UTF16-UTF16 + */ + @Benchmark + public void compareToUU(Input in, Blackhole blackhole) { + int res = 0; + for (int i = 0; i < in.count; ++i) { + res += runCompareTo(in.longUTF16FirstChar, in.shortUTF16FirstChar); + } + blackhole.consume(res); + } + + /** + * latin1-UTF16 + */ + @Benchmark + public void compareToLU(Input in, Blackhole blackhole) { + int res = 0; + for (int i = 0; i < in.count; ++i) { + res += runCompareTo(in.longUTF16LastChar, in.shortLatin1); + } + blackhole.consume(res); + } + + /** + * UTF16-latin1 + */ + @Benchmark + public void compareToUL(Input in, Blackhole blackhole) { + int res = 0; + for (int i = 0; i < in.count; ++i) { + res += runCompareTo(in.longLatin1, in.shortUTF16LastChar); + } + blackhole.consume(res); + } +} +