7016474: string compare intrinsic improvements
Reviewed-by: kvn
This commit is contained in:
parent
477cbff4b6
commit
4b3ada699d
@ -2349,6 +2349,17 @@ void Assembler::prefix(Prefix p) {
|
|||||||
a_byte(p);
|
a_byte(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::por(XMMRegister dst, XMMRegister src) {
|
||||||
|
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||||
|
|
||||||
|
emit_byte(0x66);
|
||||||
|
int encode = prefix_and_encode(dst->encoding(), src->encoding());
|
||||||
|
emit_byte(0x0F);
|
||||||
|
|
||||||
|
emit_byte(0xEB);
|
||||||
|
emit_byte(0xC0 | encode);
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
|
void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
|
||||||
assert(isByte(mode), "invalid value");
|
assert(isByte(mode), "invalid value");
|
||||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||||
@ -8655,7 +8666,7 @@ void MacroAssembler::string_indexof(Register str1, Register str2,
|
|||||||
// Compare strings.
|
// Compare strings.
|
||||||
void MacroAssembler::string_compare(Register str1, Register str2,
|
void MacroAssembler::string_compare(Register str1, Register str2,
|
||||||
Register cnt1, Register cnt2, Register result,
|
Register cnt1, Register cnt2, Register result,
|
||||||
XMMRegister vec1, XMMRegister vec2) {
|
XMMRegister vec1) {
|
||||||
Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
|
Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
|
||||||
|
|
||||||
// Compute the minimum of the string lengths and the
|
// Compute the minimum of the string lengths and the
|
||||||
@ -8702,62 +8713,85 @@ void MacroAssembler::string_compare(Register str1, Register str2,
|
|||||||
bind(LSkip2);
|
bind(LSkip2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Advance to next character
|
Address::ScaleFactor scale = Address::times_2;
|
||||||
addptr(str1, 2);
|
int stride = 8;
|
||||||
addptr(str2, 2);
|
|
||||||
|
// Advance to next element
|
||||||
|
addptr(str1, 16/stride);
|
||||||
|
addptr(str2, 16/stride);
|
||||||
|
|
||||||
if (UseSSE42Intrinsics) {
|
if (UseSSE42Intrinsics) {
|
||||||
// With SSE4.2, use double quad vector compare
|
Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
|
||||||
Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
|
int pcmpmask = 0x19;
|
||||||
// Setup to compare 16-byte vectors
|
// Setup to compare 16-byte vectors
|
||||||
movl(cnt1, cnt2);
|
movl(result, cnt2);
|
||||||
andl(cnt2, 0xfffffff8); // cnt2 holds the vector count
|
andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count
|
||||||
andl(cnt1, 0x00000007); // cnt1 holds the tail count
|
|
||||||
testl(cnt2, cnt2);
|
|
||||||
jccb(Assembler::zero, COMPARE_TAIL);
|
jccb(Assembler::zero, COMPARE_TAIL);
|
||||||
|
|
||||||
lea(str2, Address(str2, cnt2, Address::times_2));
|
lea(str1, Address(str1, result, scale));
|
||||||
lea(str1, Address(str1, cnt2, Address::times_2));
|
lea(str2, Address(str2, result, scale));
|
||||||
negptr(cnt2);
|
negptr(result);
|
||||||
|
|
||||||
bind(COMPARE_VECTORS);
|
// pcmpestri
|
||||||
movdqu(vec1, Address(str1, cnt2, Address::times_2));
|
// inputs:
|
||||||
movdqu(vec2, Address(str2, cnt2, Address::times_2));
|
// vec1- substring
|
||||||
pxor(vec1, vec2);
|
// rax - negative string length (elements count)
|
||||||
ptest(vec1, vec1);
|
// mem - scaned string
|
||||||
jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
|
// rdx - string length (elements count)
|
||||||
addptr(cnt2, 8);
|
// pcmpmask - cmp mode: 11000 (string compare with negated result)
|
||||||
jcc(Assembler::notZero, COMPARE_VECTORS);
|
// + 00 (unsigned bytes) or + 01 (unsigned shorts)
|
||||||
jmpb(COMPARE_TAIL);
|
// outputs:
|
||||||
|
// rcx - first mismatched element index
|
||||||
|
assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
|
||||||
|
|
||||||
|
bind(COMPARE_WIDE_VECTORS);
|
||||||
|
movdqu(vec1, Address(str1, result, scale));
|
||||||
|
pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
|
||||||
|
// After pcmpestri cnt1(rcx) contains mismatched element index
|
||||||
|
|
||||||
|
jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1
|
||||||
|
addptr(result, stride);
|
||||||
|
subptr(cnt2, stride);
|
||||||
|
jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
|
||||||
|
|
||||||
|
// compare wide vectors tail
|
||||||
|
testl(result, result);
|
||||||
|
jccb(Assembler::zero, LENGTH_DIFF_LABEL);
|
||||||
|
|
||||||
|
movl(cnt2, stride);
|
||||||
|
movl(result, stride);
|
||||||
|
negptr(result);
|
||||||
|
movdqu(vec1, Address(str1, result, scale));
|
||||||
|
pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
|
||||||
|
jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
|
||||||
|
|
||||||
// Mismatched characters in the vectors
|
// Mismatched characters in the vectors
|
||||||
bind(VECTOR_NOT_EQUAL);
|
bind(VECTOR_NOT_EQUAL);
|
||||||
lea(str1, Address(str1, cnt2, Address::times_2));
|
addptr(result, cnt1);
|
||||||
lea(str2, Address(str2, cnt2, Address::times_2));
|
movptr(cnt2, result);
|
||||||
movl(cnt1, 8);
|
load_unsigned_short(result, Address(str1, cnt2, scale));
|
||||||
|
load_unsigned_short(cnt1, Address(str2, cnt2, scale));
|
||||||
|
subl(result, cnt1);
|
||||||
|
jmpb(POP_LABEL);
|
||||||
|
|
||||||
// Compare tail (< 8 chars), or rescan last vectors to
|
bind(COMPARE_TAIL); // limit is zero
|
||||||
// find 1st mismatched characters
|
movl(cnt2, result);
|
||||||
bind(COMPARE_TAIL);
|
|
||||||
testl(cnt1, cnt1);
|
|
||||||
jccb(Assembler::zero, LENGTH_DIFF_LABEL);
|
|
||||||
movl(cnt2, cnt1);
|
|
||||||
// Fallthru to tail compare
|
// Fallthru to tail compare
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shift str2 and str1 to the end of the arrays, negate min
|
// Shift str2 and str1 to the end of the arrays, negate min
|
||||||
lea(str1, Address(str1, cnt2, Address::times_2, 0));
|
lea(str1, Address(str1, cnt2, scale, 0));
|
||||||
lea(str2, Address(str2, cnt2, Address::times_2, 0));
|
lea(str2, Address(str2, cnt2, scale, 0));
|
||||||
negptr(cnt2);
|
negptr(cnt2);
|
||||||
|
|
||||||
// Compare the rest of the characters
|
// Compare the rest of the elements
|
||||||
bind(WHILE_HEAD_LABEL);
|
bind(WHILE_HEAD_LABEL);
|
||||||
load_unsigned_short(result, Address(str1, cnt2, Address::times_2, 0));
|
load_unsigned_short(result, Address(str1, cnt2, scale, 0));
|
||||||
load_unsigned_short(cnt1, Address(str2, cnt2, Address::times_2, 0));
|
load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0));
|
||||||
subl(result, cnt1);
|
subl(result, cnt1);
|
||||||
jccb(Assembler::notZero, POP_LABEL);
|
jccb(Assembler::notZero, POP_LABEL);
|
||||||
increment(cnt2);
|
increment(cnt2);
|
||||||
jcc(Assembler::notZero, WHILE_HEAD_LABEL);
|
jccb(Assembler::notZero, WHILE_HEAD_LABEL);
|
||||||
|
|
||||||
// Strings are equal up to min length. Return the length difference.
|
// Strings are equal up to min length. Return the length difference.
|
||||||
bind(LENGTH_DIFF_LABEL);
|
bind(LENGTH_DIFF_LABEL);
|
||||||
@ -8766,7 +8800,7 @@ void MacroAssembler::string_compare(Register str1, Register str2,
|
|||||||
|
|
||||||
// Discard the stored length difference
|
// Discard the stored length difference
|
||||||
bind(POP_LABEL);
|
bind(POP_LABEL);
|
||||||
addptr(rsp, wordSize);
|
pop(cnt1);
|
||||||
|
|
||||||
// That's it
|
// That's it
|
||||||
bind(DONE_LABEL);
|
bind(DONE_LABEL);
|
||||||
@ -8814,6 +8848,7 @@ void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Regist
|
|||||||
if (UseSSE42Intrinsics) {
|
if (UseSSE42Intrinsics) {
|
||||||
// With SSE4.2, use double quad vector compare
|
// With SSE4.2, use double quad vector compare
|
||||||
Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
|
Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
|
||||||
|
|
||||||
// Compare 16-byte vectors
|
// Compare 16-byte vectors
|
||||||
andl(result, 0x0000000e); // tail count (in bytes)
|
andl(result, 0x0000000e); // tail count (in bytes)
|
||||||
andl(limit, 0xfffffff0); // vector count (in bytes)
|
andl(limit, 0xfffffff0); // vector count (in bytes)
|
||||||
@ -8827,11 +8862,23 @@ void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Regist
|
|||||||
movdqu(vec1, Address(ary1, limit, Address::times_1));
|
movdqu(vec1, Address(ary1, limit, Address::times_1));
|
||||||
movdqu(vec2, Address(ary2, limit, Address::times_1));
|
movdqu(vec2, Address(ary2, limit, Address::times_1));
|
||||||
pxor(vec1, vec2);
|
pxor(vec1, vec2);
|
||||||
|
|
||||||
ptest(vec1, vec1);
|
ptest(vec1, vec1);
|
||||||
jccb(Assembler::notZero, FALSE_LABEL);
|
jccb(Assembler::notZero, FALSE_LABEL);
|
||||||
addptr(limit, 16);
|
addptr(limit, 16);
|
||||||
jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
|
jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
|
||||||
|
|
||||||
|
testl(result, result);
|
||||||
|
jccb(Assembler::zero, TRUE_LABEL);
|
||||||
|
|
||||||
|
movdqu(vec1, Address(ary1, result, Address::times_1, -16));
|
||||||
|
movdqu(vec2, Address(ary2, result, Address::times_1, -16));
|
||||||
|
pxor(vec1, vec2);
|
||||||
|
|
||||||
|
ptest(vec1, vec1);
|
||||||
|
jccb(Assembler::notZero, FALSE_LABEL);
|
||||||
|
jmpb(TRUE_LABEL);
|
||||||
|
|
||||||
bind(COMPARE_TAIL); // limit is zero
|
bind(COMPARE_TAIL); // limit is zero
|
||||||
movl(limit, result);
|
movl(limit, result);
|
||||||
// Fallthru to tail compare
|
// Fallthru to tail compare
|
||||||
|
@ -1277,6 +1277,9 @@ private:
|
|||||||
void prefetcht2(Address src);
|
void prefetcht2(Address src);
|
||||||
void prefetchw(Address src);
|
void prefetchw(Address src);
|
||||||
|
|
||||||
|
// POR - Bitwise logical OR
|
||||||
|
void por(XMMRegister dst, XMMRegister src);
|
||||||
|
|
||||||
// Shuffle Packed Doublewords
|
// Shuffle Packed Doublewords
|
||||||
void pshufd(XMMRegister dst, XMMRegister src, int mode);
|
void pshufd(XMMRegister dst, XMMRegister src, int mode);
|
||||||
void pshufd(XMMRegister dst, Address src, int mode);
|
void pshufd(XMMRegister dst, Address src, int mode);
|
||||||
@ -2294,7 +2297,7 @@ public:
|
|||||||
// Compare strings.
|
// Compare strings.
|
||||||
void string_compare(Register str1, Register str2,
|
void string_compare(Register str1, Register str2,
|
||||||
Register cnt1, Register cnt2, Register result,
|
Register cnt1, Register cnt2, Register result,
|
||||||
XMMRegister vec1, XMMRegister vec2);
|
XMMRegister vec1);
|
||||||
|
|
||||||
// Compare char[] arrays.
|
// Compare char[] arrays.
|
||||||
void char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
|
void char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
|
||||||
|
@ -12629,16 +12629,16 @@ instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlag
|
|||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eBXRegI cnt2,
|
instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
|
||||||
eAXRegI result, regXD tmp1, regXD tmp2, eFlagsReg cr) %{
|
eAXRegI result, regXD tmp1, eFlagsReg cr) %{
|
||||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||||
effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||||
|
|
||||||
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1, $tmp2" %}
|
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ string_compare($str1$$Register, $str2$$Register,
|
__ string_compare($str1$$Register, $str2$$Register,
|
||||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister);
|
$tmp1$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
@ -11583,17 +11583,17 @@ instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
|
|||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rbx_RegI cnt2,
|
instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
|
||||||
rax_RegI result, regD tmp1, regD tmp2, rFlagsReg cr)
|
rax_RegI result, regD tmp1, rFlagsReg cr)
|
||||||
%{
|
%{
|
||||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||||
effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||||
|
|
||||||
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1, $tmp2" %}
|
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ string_compare($str1$$Register, $str2$$Register,
|
__ string_compare($str1$$Register, $str2$$Register,
|
||||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister);
|
$tmp1$$XMMRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user